In [209]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import re
def get_soup(url):
    """Function to get the soup object from a given URL."""
    response = requests.get(url)
    return BeautifulSoup(response.text, 'html.parser')

base_url = "https://static.case.law/"
soup = get_soup(base_url)
hrefs = [urljoin(base_url, link.get('href')) for link in soup.find_all('a') if link.get('href')]
all_links = []
for href in hrefs[:100]:
    soup = get_soup(href)
    urls = [a['href'] for a in soup.find_all('a', href=True) if re.search(r'/\d+/$', a['href'])]
    for url in urls:
        soup = get_soup(url+"html/")
        links = [link['href'] for link in soup.find_all('a') if link.get('href') and link.get('href').endswith('.html')]
        all_links += links


In [223]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma

from dotenv import load_dotenv
from os import getenv

load_dotenv("../../../.env")
my_key = getenv("OPENAI_API_KEY")


loader = WebBaseLoader(
    web_paths=all_links[:100],
)
docs = loader.load()



In [220]:
chroma_name = "./../../output/chroma_db"
text_splitter = CharacterTextSplitter(chunk_size=7000, chunk_overlap=200)
documents = text_splitter.split_documents(documents)
db = Chroma.from_documents(documents, OpenAIEmbeddings(api_key=my_key), persist_directory=chroma_name)



In [207]:
##ADD TO CHROMA DB

# import chromadb
# import uuid
# chroma_name = "./../../output/chroma_db"
# persistent_client = chromadb.PersistentClient(chroma_name)
# db = persistent_client.get_or_create_collection("chroma.sqlite3")
# text_docs = [doc.page_content for doc in documents]
# ids = [str(uuid.uuid4()) for _ in documents]
# db.add(ids=ids, documents=text_docs)




In [224]:
query = "Brinkley vs Monetery"
docs = db.similarity_search(query)
docs

[Document(page_content='Tiffany BRINKLEY, Plaintiff and Appellant, v. MONTEREY FINANCIAL SERVICES, INC., Defendant and Respondent.\nD066059\nCourt of Appeal, Fourth District, Division 1, California.\nFiled 11/19/2015\nNiddrie Fish & Addams, Niddrie Addams, David A. Niddrie, John S. Addams, San Diego; Keegan & Baker, Patrick N. Keegan, Carlsbad; Wickman & Wickman, Steven A. Wickman and Christina E. Wickman, Escondido, for Plaintiff and Appellant.\nCall & Jensen, Matthew R. Orr and Melinda Evans, Newport Beach, for Defendant and Respondent.', metadata={'source': 'https://static.case.law/cal-rptr-3d/196/html/0001-01.html'}),
 Document(page_content='Tiffany BRINKLEY, Plaintiff and Appellant, v. MONTEREY FINANCIAL SERVICES, INC., Defendant and Respondent.\nD066059\nCourt of Appeal, Fourth District, Division 1, California.\nFiled 11/19/2015\nNiddrie Fish & Addams, Niddrie Addams, David A. Niddrie, John S. Addams, San Diego; Keegan & Baker, Patrick N. Keegan, Carlsbad; Wickman & Wickman, Stev

In [175]:
##HTML LOADER

# from langchain_community.document_loaders import UnstructuredHTMLLoader
# urls = '../../../lawyer_data/html'
# documents = []
# for url in os.listdir(urls)[:20]:
#     url = urls+"/"+url
#     print(url)
#     loader = UnstructuredHTMLLoader(url)
#     document = loader.load()
#     text = document[0].page_content
#     documents.append(text)
    

In [240]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain import hub


llm = ChatOpenAI(model_name="gpt-4", temperature=0.7, max_tokens=2000)
retriever = db.as_retriever()
# prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a world class technical documentation writer."),
    ("user", "{context} {question}")
])




# Configure the Rag (Retrieval-Augmented Generation) Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)




prompt = f"Create a detailed legal briefing about AT&T vs Apple. Please ensure the briefing is comprehensive and approximately 1000 words in length."
print(rag_chain.invoke(prompt))





Title: Legal Briefing: AT&T vs Apple

I. INTRODUCTION

The case of AT&T vs Apple involves a dispute between two major corporations: AT&T, one of the world's largest telecommunication companies, and Apple, a renowned technology company. This briefing provides a comprehensive understanding of the case, its background, legal arguments, and implications.

II. BACKGROUND OF THE CASE

The dispute between AT&T and Apple began when AT&T accused Apple of patent infringement. AT&T claimed that Apple had violated patents related to technologies used in iPhones, particularly those that enhance the efficiency of data transmission and reduce power consumption. Apple denied these allegations and counter-sued AT&T, alleging that AT&T had infringed upon Apple's patents.

III. LEGAL ISSUES

The legal issues in this case revolve around patent laws and the rights of patent holders. Specifically, the core issues include:

1. Whether Apple infringed upon AT&T's patents as alleged.
2. Whether AT&T infringed 

In [241]:


llm.invoke(prompt)


AIMessage(content="Subject: Legal Briefing on AT&T vs Apple\n\nI. Introduction\n\nThis brief presents a comprehensive review of the legal issues and consequences surrounding the AT&T versus Apple case. The case, which is often referred to as the 'Monopoly' case, focuses on exclusive agreements and their impact on market competition. It emerged from the exclusive partnership between Apple and AT&T for the distribution of the first iPhone in the United States in 2007, which led to an antitrust lawsuit against the companies.\n\nII. Factual Background\n\nIn 2007, Apple Inc. and AT&T entered into a five-year exclusivity agreement, making AT&T the only authorized carrier for the iPhone in the United States. The exclusive agreement was not disclosed to the public. Consumers who purchased iPhones were compelled to sign a two-year service contract with AT&T, which was renewed automatically unless consumers expressly canceled it. In 2010, Apple released the iPhone 4, which was initially offered 

In [69]:
##RETRIEVAL Q&A
# chain = RetrievalQA.from_chain_type(llm=llm,
#                                     chain_type="stuff",
#                                     retriever=db.as_retriever())

# chain(query)


{'query': 'What did the president say about Ketanji Brown Jackson',
 'result': "I don't have any information about what the president said about Ketanji Brown Jackson."}