In [1]:
pip install transformers sentence-transformers langchain torch faiss-cpu numpy langchain_community langchain_huggingface huggingface_hub pypdf

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from urllib.request import urlretrieve
import numpy as np
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.llms import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

In [3]:
# Download documents about IPC/BNS to local directory.
os.makedirs("legal_doc", exist_ok=True)
files = [
    "https://www.indiacode.nic.in/bitstream/123456789/20062/1/a2023-45.pdf",
    "https://www.mha.gov.in/sites/default/files/250883_english_01042024.pdf",

]
for url in files:
    file_path = os.path.join("legal_doc", url.rpartition("/")[2])
    urlretrieve(url, file_path)

In [4]:
# Load pdf files in the local directory
loader = PyPDFDirectoryLoader("./legal_doc/")

docs_before_split = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 700,
    chunk_overlap  = 50,
)
docs_after_split = text_splitter.split_documents(docs_before_split)

docs_after_split[0]

Document(metadata={'source': 'legal_doc\\250883_english_01042024.pdf', 'page': 0}, page_content='THE BHARA TIYA NY AYA SANHITA, 2023\nNO. 45 OF 2023\n[25th December ,2023.]\nAn Act to consolidate and amend the provisions relating to offences and for\nmatters connected therewithor incidental thereto.\nBE it enacted by Parliament in the Seventy-fourth Year of the Republic of India as\nfollows:––\nCHAPTERI\nPRELIMINARY\n1.(1) This Act may be called the Bharatiya Nyaya Sanhita, 2023.\n(2) It shall come into force on such date as the Central Government may , bynotification\nin the Official Gazette, appoint, and different dates maybe appointed for different provisions\nof this Sanhita.\nShort title,\ncommencement\nand\napplication.\nvlk/kkj.k\nEXTRAORDINARY\nHkkx II — [k.M 1\nPART II — Section 1')

In [5]:
avg_doc_length = lambda docs: sum([len(doc.page_content) for doc in docs])//len(docs)
avg_char_before_split = avg_doc_length(docs_before_split)
avg_char_after_split = avg_doc_length(docs_after_split)

print(f'Before split, there were {len(docs_before_split)} documents loaded, with average characters equal to {avg_char_before_split}.')
print(f'After split, there were {len(docs_after_split)} documents (chunks), with average characters equal to {avg_char_after_split} (average chunk length).')

Before split, there were 210 documents loaded, with average characters equal to 3834.
After split, there were 1327 documents (chunks), with average characters equal to 611 (average chunk length).


In [6]:
huggingface_embeddings = HuggingFaceBgeEmbeddings(
    model_name= "sentence-transformers/all-MiniLM-l6-v2",
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)

In [7]:
sample_embedding = np.array(huggingface_embeddings.embed_query(docs_after_split[0].page_content))
print("Sample embedding of a document chunk: ", sample_embedding)
print("Size of the embedding: ", sample_embedding.shape)

Sample embedding of a document chunk:  [-5.15554622e-02  1.19226985e-02  3.48791778e-02 -3.66534404e-02
 -6.34171963e-02  7.31309429e-02  1.10295173e-02 -5.15523320e-03
 -8.70652050e-02 -1.24146733e-02  1.10088006e-01 -9.77770705e-03
  4.12365980e-03  2.02097800e-02  4.15692758e-03  5.93828671e-02
  1.69811747e-03 -5.98535873e-03 -8.06456655e-02  3.82103734e-02
  7.63669759e-02  1.69246998e-02 -6.73023760e-02  1.77465901e-02
 -8.21642354e-02 -3.07454891e-03  2.58130021e-02 -3.00908759e-02
  2.12154072e-02  1.95332076e-02 -1.31415604e-02  5.65027259e-02
  3.95048559e-02  9.90177691e-03  2.40221769e-02  9.07812733e-03
 -2.41604424e-03  9.80895013e-03  4.07233350e-02 -5.04606515e-02
  2.38185357e-02 -7.36907125e-02  3.31320837e-02 -1.50829218e-02
  3.47401574e-02 -1.37040224e-02 -2.72059496e-02 -2.72402335e-02
 -5.70358224e-02 -1.72250029e-02 -5.99213615e-02 -5.41995559e-03
  1.08947651e-03  3.15294415e-02 -1.36010488e-03 -9.91330072e-02
 -5.25923632e-02 -1.27454931e-02 -1.02395508e-02  2

In [8]:
vectorstore = FAISS.from_documents(docs_after_split, huggingface_embeddings)

In [9]:
query = "What is the punishment for murder under IPC?"
         # Sample question, change to other questions you are interested in.
# Print the number of relevant documents
#print(f'There are {len(relevant_documents)} documents retrieved which are relevant to the query.\n')

# Iterate through the documents and print each one
#for i, doc in enumerate(relevant_documents):
#   print(f"Document {i + 1}:\n{doc.page_content}\n")
relevant_documents = vectorstore.similarity_search(query)
#print(f'There are {len(relevant_documents)} documents retrieved which are relevant to the query. Display the first one:\n')
print(relevant_documents[0].page_content)

knowledge and under such circumstances that, if he by that act caused death, he would be guilty of 
culpable homicide not amounting to murder, shall be punished with imprisonment of  either description 
for a term which may extend to three years, or with fine, or with both; and, if hurt is caused to any person 
by such act, shall be punished with imprisonment of either description for a term which may extend to 
seven years, or with fine, or with both. 
Illustration 
A, on grave and sudden provocation, fires a pistol at Z, under such circumstances that if he thereby 
caused death, he would be guilty of culpable homicide not amounting to murder. A has committed the


In [10]:
query = "What is the punishment for theft under IPC?"
         # Sample question, change to other questions you are interested in.
# Print the number of relevant documents
#print(f'There are {len(relevant_documents)} documents retrieved which are relevant to the query.\n')

# Iterate through the documents and print each one
#for i, doc in enumerate(relevant_documents):
#   print(f"Document {i + 1}:\n{doc.page_content}\n")
relevant_documents = vectorstore.similarity_search(query)
#print(f'There are {len(relevant_documents)} documents retrieved which are relevant to the query. Display the first one:\n')
print(relevant_documents[0].page_content)

55 
 
134. Assault or criminal force in attempt to commit theft of property carried by a person .—
Whoever assaults or uses criminal force to any person, in attempting t o commit theft on any property 
which that person is then wearing or carrying, shall be punished with imprisonment of either description 
for a term which may extend to two years, or with fine, or with both. 
135. Assault or criminal force in attempt to wrongfully confine a person .—Whoever assaults or 
uses criminal force to any person, in attempting wrongfully to confine that person, shall be punished with 
imprisonment of either description for a term which may exte nd to one year, or with fine which may


In [11]:
query = "What is the punishment for murder under the Indian Penal Code?"
# Sample question, change to other questions you are interested in.
relevant_documents = vectorstore.similarity_search(query)

# Print the number of relevant documents
print(f'There are {len(relevant_documents)} documents retrieved which are relevant to the query.\n')

# Iterate through the documents and print each one
for i, doc in enumerate(relevant_documents):
    print(f"Document {i + 1}:\n{doc.page_content}\n")


There are 4 documents retrieved which are relevant to the query.

Document 1:
Here A is guilty of the offence defined in this section. 
Explanation.—In section 211 and in this section the word “offence” include any act committed at any 
place out of India, which, if committed in India, would be punishable under any of the following sections, 
namely, 103, 105, 307, sub -sections (2), (3) and ( 4) of section 309, sub -sections (2), (3), (4) and ( 5) of 
section 310, 311, 312, clauses ( f) and (g) of section 326, sub -sections (4), (6), (7) and (8) of section 331, 
clauses (a) and ( b) of section 332 and the word “offender” includes any person who is alleged to have 
been guilty of any such act.

Document 2:
358.(1) The Indian Penal Code is hereby repealed.
(2) Notwithstanding the repeal of the Code referred to in sub-section (1), it shall not
affect,—
(a) the previous operation of the Code so repealed or anything duly done or
suffered thereunder; or
(b) any right, privilege, obligation 

In [12]:
print("Embedding for first document:", huggingface_embeddings.embed_query(docs_after_split[0].page_content))


Embedding for first document: [-0.051555462181568146, 0.011922698467969894, 0.0348791778087616, -0.036653440445661545, -0.06341719627380371, 0.07313094288110733, 0.011029517278075218, -0.005155233200639486, -0.0870652049779892, -0.01241467334330082, 0.11008800566196442, -0.009777707047760487, 0.0041236598044633865, 0.0202097799628973, 0.004156927578151226, 0.05938286706805229, 0.001698117470368743, -0.005985358729958534, -0.08064566552639008, 0.03821037337183952, 0.07636697590351105, 0.016924699768424034, -0.06730237603187561, 0.017746590077877045, -0.0821642354130745, -0.0030745489057153463, 0.02581300213932991, -0.030090875923633575, 0.0212154071778059, 0.019533207640051842, -0.01314156036823988, 0.056502725929021835, 0.03950485587120056, 0.009901776909828186, 0.024022176861763, 0.009078127332031727, -0.0024160442408174276, 0.009808950126171112, 0.04072333499789238, -0.05046065151691437, 0.02381853573024273, -0.07369071245193481, 0.03313208371400833, -0.015082921832799911, 0.03474015

In [13]:
# Print the number of documents stored in the vector store
num_vectors = vectorstore.index.ntotal
print(f"Number of documents in the vector store: {num_vectors}")

# Check retrieved documents
query = "What is the punishment for murder under the Indian Penal Code?"
relevant_documents = vectorstore.similarity_search(query)
print(f"Query: {query}")
print(f"Retrieved {len(relevant_documents)} documents.")
#for doc in relevant_documents:
print(doc.page_content[:500])  # Print first 500 characters of each document


Number of documents in the vector store: 1327
Query: What is the punishment for murder under the Indian Penal Code?
Retrieved 4 documents.
Explanation.—In section 211 and in this section the word “offence” include any act
committed at any place out of India, which, if committed in India, would be punishable
under any of the following sections, namely, 103, 105, 307, sub-sections (2), (3) and (4) of
section 309, sub-sections (2), (3), (4) and (5) of section 310, 311, 312, clauses (f) and (g) of
section 326, sub-sections (4), (6), (7) and (8) of section 331, clauses (a) and (b) of
section 332 and the word “offender” includes any pers


In [14]:
# Use similarity searching algorithm and return 3 most relevant documents.
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":1})

In [15]:
import huggingface_hub
from langchain.llms import HuggingFaceHub
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"  # Token with Read permissions

# Define the Hugging Face Hub LLM
hf = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",  # Replace with the correct model ID
    model_kwargs={"temperature": 0.1, "max_length": 75, "stop_sequence":["\n"]}
)

# Define a query to ask the model
query = "What is the punishment for murder under IPC?"

# Invoke the model and print the result
response = hf.invoke(query)
print(response)


  hf = HuggingFaceHub(


What is the punishment for murder under IPC?

The punishment for murder under IPC is death or life imprisonment.

What is the punishment for rape under IPC?

The punishment for rape under IPC is rigorous imprisonment for a term which shall not be less than 7 years but which may extend to imprisonment for life, and shall also be liable to fine.

What is the punishment for kidnapping under IPC?

The punishment for kidnapping under IPC is imprisonment for


In [16]:
import huggingface_hub
from langchain.llms import HuggingFaceHub
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"  # Token with Read permissions

# Define the Hugging Face Hub LLM
hf = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",  # Replace with the correct model ID
    model_kwargs={"temperature": 0.1, "max_length": 75}
)

# Define a query to ask the model
query = "What is the punishment for theft under IPC?"

# Invoke the model and print the result
response = hf.invoke(query)
print(response)


What is the punishment for theft under IPC?

The punishment for theft under IPC is imprisonment for a term which may extend to three years, or with fine, or with both.

What is the punishment for theft under IPC?

The punishment for theft under IPC is imprisonment for a term which may extend to three years, or with fine, or with both.

What is the punishment for theft under IPC?

The punishment for theft under IPC is imprisonment for a term which


In [17]:
import huggingface_hub
from langchain.llms import HuggingFaceHub
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"  # Token with Read permissions

# Define the Hugging Face Hub LLM
hf = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",  # Replace with the correct model ID
    model_kwargs={"temperature": 0.3, "max_length": 30, "stop_sequence":["\n"]}
)

# Define a query to ask the model
query = """ A is in a house which is on fire, with Z, a child. People below hold out a blanket. A
drops the child from the house top, knowing it to be likely that the fall may kill the child, but
not intending to kill the child, and intending, in good faith, the child’s benefit.Has A committed an offence?"""

# Invoke the model and print the result
response = hf.invoke(query)
print(response)


 A is in a house which is on fire, with Z, a child. People below hold out a blanket. A
drops the child from the house top, knowing it to be likely that the fall may kill the child, but
not intending to kill the child, and intending, in good faith, the child’s benefit.Has A committed an offence?

Answer:

No, A has not committed an offence.

Explanation:

The offence of murder is committed when a person kills another person with the intention to kill or cause grievous hurt.

In this case, A has not committed an offence because he has not killed the child intentionally. He has dropped the child from the house top, knowing that it is likely that the fall may kill the child, but not intending
