In [1]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("train-00000-of-00001.pdf")
data = loader.load() # entire PDF is loaded as a single Document
# data

In [2]:
len(data)

145

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

#split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)

print("Total number of documents: ",len(docs))

Total number of documents:  570


In [4]:
docs[3]

Document(metadata={'source': 'train-00000-of-00001.pdf', 'page': 0}, page_content='war, intentionally allows the prisoner to escape from \nconfinement, they can be punished with life imprisonment or \nimprisonment of up to ten years, and may also be fined.\nPublic servant \nvoluntarily \nallowing prisoner \nof State or war in \nhis custody to \nescape\nImprisonment \nfor Life or 10 \nYears + Fine\nIPC_128\nDescription of IPC Section 129 According to section 129 of \nIndian penal code, Whoever, being a public servant and having \nthe custody of any State prisoner or prisoner of war, \nnegligently suffers such prisoner to escape from any place of \nconfinement in which such prisoner is confined, shall be \npunished with simple imprisonment for a term which may \nextend to three years, and shall also be liable to fine. IPC 129 \nin Simple Words If a public servant, responsible for the \ncustody of a State prisoner or prisoner of war, carelessly allows \nthe prisoner to escape from confine

In [5]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from ipywidgets import IntProgress

from dotenv import load_dotenv
load_dotenv()

IntProgress(value=50, min=0, max=100)

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector = embeddings.embed_query("hello, world!")
vector[:5]
#vector

[0.05168594419956207,
 -0.030764883384108543,
 -0.03062233328819275,
 -0.02802734263241291,
 0.01813093200325966]

In [6]:
vectorstore = Chroma.from_documents(documents=docs, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

In [7]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":10})

retrieved_docs = retriever.invoke("Imagine two people arguing on a busy street, and suddenly one person throws a punch, causing a public disturbance. What would this kind of altercation be classified as, and what penalties could they face?")

In [9]:
len(retrieved_docs)

10

In [10]:
print(retrieved_docs[5].page_content)

Page 9  of 145
Description Offense Punishment Section
Description of IPC Section 153AA According to section 
153AA of Indian penal code, Whoever by words, either spoken 
or written, or by signs or by visible representations or 
otherwise, promotes or attempts to promote, on grounds of 
religion, race, place of birth, residence, language, caste or 
community or any other ground whatsoever, disharmony or 
feelings of enmity, hatred or ill-will between different 
religious, racials, language or regional groups or castes or 
communities, or commits any act which is prejudicial to the 
maintenance of harmony between different religious, racial, 
language or regional groups or castes or communities, and 
which disturbs or is likely to disturb the public tranquility, or 
organizes any exercise, movement, drill or other similar 
activity intending that the participants in such activity shall use 
or be trained to use criminal force or violence or knowing it to


In [11]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.3, max_tokens=500)

In [12]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are a legal expert specializing in Indian criminal law." 
    "Use the following pieces of retrieved context to answer the question"
    "You have to identify the crime type, IPC sections, and legal outcome based on the description given to you along with court references if that particular crime has one."
    "If the given description is not a crime mention that it is not a crime and why it is not a crime"
    "Use three sentences maximum and keep the answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [13]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [15]:
response = rag_chain.invoke({"input": "Someone broke into my house and stole my properties"})
print(response["answer"])

This is housebreaking and theft.  Relevant IPC sections are 445 (house-breaking) and 379 (theft).  Punishment depends on the value of stolen property and other factors but can include imprisonment and fines.

