In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
import google.generativeai as genai
import os
genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

In [5]:
! pip install -q --upgrade google-generativeai langchain-google-genai python-dotenv

In [6]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.3)

In [7]:
pdf_loader = PyPDFLoader("/Users/pateld/Documents/Multi-Language-Legal-Document-Extractor/Hariom_Agrawal_vs_Prakash_Chand_Malviya_on_8_October_2007.PDF")
pages = pdf_loader.load_and_split()
# print(pages[3].page_content)

In [8]:
prompt_template = """You are a legal expert in understanding legal documents.
                    Answer the question as precise as possible using the provided context. \n\n
                    Context: \n {context}?\n
                    Question: \n {question} \n
                    Answer:
                  """

prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [9]:
stuff_chain = load_qa_chain(
    model,
    chain_type="stuff",
    prompt=prompt
)

In [10]:
question = "What is the name of PETITIONER?"
stuff_answer = stuff_chain(
    {
        "input_documents": pages,
        "question": question
    },
    return_only_outputs=True
)

In [11]:
print(stuff_answer)

{'output_text': 'Hariom Agrawal'}


In [12]:
question = "Give the Summary of the Judgement"
stuff_answer = stuff_chain(
    {
        "input_documents": pages,
        "question": question
    },
    return_only_outputs=True
)

In [13]:
print(stuff_answer)

{'output_text': "- The case involved a dispute between a landlord and a tenant over the validity of a rental agreement.\n- The original agreement was lost, and the tenant sought to introduce a photocopy as secondary evidence.\n- The High Court held that the photocopy could not be admitted as evidence because it was not properly stamped.\n- The Supreme Court agreed with the High Court's decision, holding that a photocopy of an improperly stamped original document cannot be admitted as secondary evidence under the Indian Stamp Act, 1899.\n- The Court also held that Section 37 of the Act, which allows for the correction of improperly stamped instruments, does not apply to copies of documents.\n- Finally, the Court held that Section 48-B of the Act, which allows the Collector to impound an original instrument based on a copy, does not authorize the Collector to impound a copy of an instrument."}


#RAG Pipeline: Embedding + LLM

In [34]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=0
)
context = "\n\n".join(str(p) for p in pages)
texts = text_splitter.split_text(context)

In [35]:
print(texts)

["page_content='Supreme Court of India\\nHariom Agrawal vs Prakash Chand Malviya on 8 October, 2007\\nAuthor: P Naolekar\\nBench: B.N. Agrawal, P.P. Naolekar, P. Sathasivam\\n           CASE NO.:\\nAppeal", '(civil)  4696 of 2007\\nPETITIONER:\\nHariom Agrawal\\nRESPONDENT:\\nPrakash Chand Malviya\\nDATE OF JUDGMENT: 08/10/2007\\nBENCH:\\nB.N. AGRAWAL,P.P. NAOLEKAR & P. SATHASIVAM\\nJUDGMENT:\\nJ U D G M E N T', '(arising out of Special Leave Petition (Civil)No.12573 of 2006) P.P. NAOLEKAR,\\nJ.:\\n1. Leave granted.\\n2. The facts necessary for deciding the question involved in the case are that one Maganlal', 'Jain was\\nthe original tenant of Prakash Chand Malviya, the respondent- landlord. Maganlal Jain had given\\nthe shop to the appellant for carrying out the business. On a dispute being arisen between', 'the\\nrespondent-landlord, the original tenant Maganlal Jain and the appellant herein, an agreement was\\nexecuted on 28.3.1988 by the respondent (landlord) and the appellant (su

In [36]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector_index = Chroma.from_texts(texts,embeddings).as_retriever()

In [46]:
question = "What is the date of Judgement?"
docs = vector_index.get_relevant_documents(question)
stuff_answer = stuff_chain(
    {
        "input_documents": docs,
        "question": question
    },
    return_only_outputs=True
)

In [47]:
print(stuff_answer)

{'input_documents': [Document(page_content='(civil)  4696 of 2007\\nPETITIONER:\\nHariom Agrawal\\nRESPONDENT:\\nPrakash Chand Malviya\\nDATE OF JUDGMENT: 08/10/2007\\nBENCH:\\nB.N. AGRAWAL,P.P. NAOLEKAR & P. SATHASIVAM\\nJUDGMENT:\\nJ U D G M E N T'), Document(page_content='(civil)  4696 of 2007\\nPETITIONER:\\nHariom Agrawal\\nRESPONDENT:\\nPrakash Chand Malviya\\nDATE OF JUDGMENT: 08/10/2007\\nBENCH:\\nB.N. AGRAWAL,P.P. NAOLEKAR & P. SATHASIVAM\\nJUDGMENT:\\nJ U D G M E N T'), Document(page_content='(civil)  4696 of 2007\\nPETITIONER:\\nHariom Agrawal\\nRESPONDENT:\\nPrakash Chand Malviya\\nDATE OF JUDGMENT: 08/10/2007\\nBENCH:\\nB.N. AGRAWAL,P.P. NAOLEKAR & P. SATHASIVAM\\nJUDGMENT:\\nJ U D G M E N T'), Document(page_content='Indian Kanoon - http://indiankanoon.org/doc/1515290/ 6')], 'question': 'What is the date of Judgement?', 'output_text': '08/10/2007'}
