In [1]:
!pip install langchain
!pip install langchain-openai
!pip install python-dotenv
!pip install pypdf
!pip install PyPDF2
!pip install chromadb
!pip install streamlit

Collecting langchain
  Downloading langchain-0.2.6-py3-none-any.whl (975 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m975.5/975.5 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Collecting langchain-core<0.3.0,>=0.2.10 (from langchain)
  Downloading langchain_core-0.2.11-py3-none-any.whl (337 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m337.4/337.4 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.2-py3-none-any.whl (25 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.83-py3-none-any.whl (127 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.5/127.5 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.10->langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting orjson<4.0.0,>=3.9.14 (from langsmi

In [4]:
%reload_ext autoreload
%autoreload 2


In [5]:
from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

False

In [14]:
!pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.2.6-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.21.3-py3-none-any.whl (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.2/49.2 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)
Installing collected packages: mypy-extensi

In [20]:

from langchain.llms import OpenAI

llm = OpenAI(openai_api_key="your_api_key")

In [21]:
llm.invoke("What is a legally binding contract and its termination period?")

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************6WCu. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

**Working with LangChainExpressionLanguage(LCEL) to create a chain**

In [2]:
from langchain_core.prompts import ChatPromptTemplate

# add output parser to the chain, specifically string parser for the output
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert in reviewing a contract between Cloud Investments and Mr Jack Robinson"),
    ("user", "{input}")
])

In [3]:
chain = prompt | llm | output_parser

NameError: name 'llm' is not defined


**Asking our LLM a question regarding the contract**

Since it doesn't actually know about the contract between the 2 parties, it won't give us the direct/correct answer we want


In [None]:
chain.invoke({"input": "Who owns the IP"})

**Working on the retriever**

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("../data/Raptor Contract.pdf")
pages = loader.load()


In [None]:
len(pages)

In [None]:
# Splitting the documents into chunks
from langchain.text_splitter  import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000, chunk_overlap=200, length_function=len
)
document_chunks = text_splitter.split_documents(pages)
len(document_chunks)


In [None]:
# storing the chunks in a vector store
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

# save to disk
db = Chroma.from_documents(pages, embeddings, persist_directory="../data/chroma_db")

In [None]:
# load from disk
vector_store = Chroma(persist_directory="../data/chroma_db", embedding_function=embeddings)

query = 'What does Closing Debt Amount mean?'
docs_retrieved = vector_store.similarity_search(query)
print(docs_retrieved[0].page_content)


**Create a RAG Chain**

In [None]:
# create chain for documents
from langchain.chains.combine_documents import create_stuff_documents_chain

template = """"Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}
"""
prompt = ChatPromptTemplate.from_template(template)
document_chain = create_stuff_documents_chain(llm, prompt)



In [None]:
# create retrieval chain

from langchain.chains import create_retrieval_chain

retriever = vector_store.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [None]:
response = retrieval_chain.invoke({
    "input": "What does Closing Debt Amount mean?"
})
response
