In [25]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama
from langchain_community.document_loaders import PyPDFLoader
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_ollama import OllamaEmbeddings
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain_chroma import Chroma
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.messages import HumanMessage, AIMessage

In [2]:
file_path = r"C:\Users\Ansh Lulla\VS-Code\Full_Stack_Frontend\summarization-workflow\SampleContract-Shuttle.pdf"
loader = PyPDFLoader(file_path=file_path)
loader

<langchain_community.document_loaders.pdf.PyPDFLoader at 0x1867e94b0b0>

In [3]:
docs = loader.load()
docs[0]

Document(metadata={'producer': 'Acrobat Distiller 9.3.2 (Windows)', 'creator': 'PScript5.dll Version 5.2.2', 'creationdate': '2012-07-11T18:52:58-07:00', 'author': 'kshultz', 'moddate': '2012-07-11T18:52:58-07:00', 'title': 'Microsoft Word - SampleContract-Shuttle', 'source': 'C:\\Users\\Ansh Lulla\\VS-Code\\Full_Stack_Frontend\\summarization-workflow\\SampleContract-Shuttle.pdf', 'total_pages': 10, 'page': 0, 'page_label': '1'}, page_content='Page 1 \nSample Contract \n \nContract No.___________ \nPROFESSIONAL SERVICES AGREEMENT \n \n \nTHIS AGREEMENT made and entered into this _______day of                      , 20     by and between the SANTA \nCRUZ COUNTY REGIONAL TRANSPORTATION COMMISSION, hereinafter called COMMISSION, and \n________    ____, hereinafter called CONSULTANT for __________________ (services/project name).   \n \n1. DUTIES.  \nA. CONSULTANT agrees to exercise special skill to accomplish the following results in a manner \nreasonably satisfactory to COMMISSION: _____

In [4]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
chunks = splitter.split_documents(docs)
print(len(chunks))

43


In [6]:
embedding_model = OllamaEmbeddings(model="llama3.2:latest")
embedding_model

OllamaEmbeddings(model='llama3.2:latest', base_url=None, client_kwargs={}, mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, keep_alive=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None)

In [8]:
vectorstore = Chroma.from_documents(documents=chunks, embedding=embedding_model)
vectorstore

<langchain_chroma.vectorstores.Chroma at 0x1867ebe2e70>

In [9]:
retriever = vectorstore.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001867EBE2E70>, search_kwargs={})

In [16]:
system_prompt = """
    You are an expert at analysing Legal Documents and Contracts. Answer the questions regarding the user input (pdf or a docs file) and respond in a clear manner with considering the legal terms mentioned in the document.
    Here's the context:
    {context}
"""

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}")
])

In [17]:
llm = ChatOllama(model="llama3.2:latest", temperature=0.4)
llm

ChatOllama(model='llama3.2:latest', temperature=0.4)

In [18]:
qa_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)
qa_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="\n    You are an expert at analysing Legal Documents and Contracts. Answer the questions regarding the user input (pdf or a docs file) and respond in a clear manner with considering the legal terms mentioned in the document.\n    Here's the context:\n    {context}\n"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])
| ChatOllama(model='llama3.2:latest', temperature=0.4)
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, 

In [19]:
rag_chain = create_retrieval_chain(retriever=retriever, combine_docs_chain=qa_chain)
rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001867EBE2E70>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="\n    You are an expert at analysing Legal Documents and Contracts. Answer the questions regarding the user input (pdf or a docs file) and respond

In [20]:
response = rag_chain.invoke({"input": "What is the document about?"})
print(response["answer"])

This appears to be a contract between a Commission (likely a government agency or organization) and a Consultant (a private company or individual providing services). The contract outlines the terms and conditions of their agreement, including:

1. The scope of work and responsibilities of both parties.
2. Payment terms and conditions, including the requirement for prior approval of work before payment.
3. Authority to act on behalf of the Commission.
4. Restrictions on changes to the project or Consultant's role without Commission consent.
5. Requirements for progress reports and meetings between the Consultant and Commission.

The contract also includes specific details about invoicing procedures, such as timing and required information for reimbursement.

Overall, this document appears to be a standard contract for consulting services, with provisions that balance the needs of both parties to ensure successful project execution.


In [21]:
contextualize_q_system_prompt = ("""
    Given a chat history and the latest user query, which might be a reference context in the chat history, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, reformulate it if needed otherwise return it as it is.    
""")

contextualize_q_prompt = ChatPromptTemplate.from_messages([
    ("system", contextualize_q_system_prompt),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}")
])

contextualize_q_prompt

ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.Annotated[l

In [22]:
history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
history_aware_retriever

RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
| VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001867EBE2E70>, search_kwargs={}))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChun

In [23]:
qa_prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}")
])

In [24]:
qa_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, qa_chain)
rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001867EBE2E70>, search_kwargs={}))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotat

In [26]:
chat_history = []
question = "What is the scope of work and responsibilities of the Commission party"
response1 = rag_chain.invoke({"input": question, "chat_history": chat_history})

chat_history.extend([
    HumanMessage(content=question),
    AIMessage(content=response1["answer"])
])

question2 = "Tell me more about it."
response2 = rag_chain.invoke({"input": question2, "chat_history": chat_history})
print(response1["answer"])
print("---------------------")
print(response2["answer"])

Based on the provided contract, the Commission party (COMMISSION) is responsible for:

1. **Payment**: The Commission agrees to make payments to the Consultant in accordance with the terms and conditions of this Agreement.

2. **Authorization**: The Executive Director of COMMISSION or their designee has the authority to act for and exercise any rights of COMMISSION as set forth in this Agreement, subsequent to, and in accordance with the authorization granted by COMMISSION.

3. **Review and Approval Process**: The Commission requires the Consultant to furnish all necessary copies of data needed to complete the review and approval process for deliverables.

4. **Ownership and Title**: Upon completion of all work under this contract, ownership and title to custom letters, reports, documents, plans, specifications, and estimates (deliverables) will automatically be vested in COMMISSION; no further agreement is necessary to transfer ownership.

5. **Confidentiality**: Information derived f