# Import Libraries

In [None]:
import warnings
warnings.filterwarnings("ignore")

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_classic.chains import ConversationalRetrievalChain
from langchain_classic.memory import ConversationBufferMemory
from langchain_groq import ChatGroq

# Load IPC PDF

In [5]:
loader=PyPDFLoader("THE-INDIAN-PENAL-CODE-1860.pdf")
docs=loader.load()
print(f"Loaded {len(docs)} pages from the IPC PDF")

Loaded 227 pages from the IPC PDF


In [6]:
docs[:5]

[Document(metadata={'producer': 'GPL Ghostscript 8.15; modified using iText® 7.0.4 ©2000-2017 iText Group NV (AGPL-version)', 'creator': 'PScript5.dll Version 5.2.2', 'creationdate': '2006-08-30T15:58:35+00:00', 'author': 'Administrator', 'keywords': '1501764525-THE-INDIAN-PENAL-CODE-1860', 'moddate': '2018-06-04T13:24:44+05:30', 'title': '1501764525-THE-INDIAN-PENAL-CODE-1860', 'source': 'THE-INDIAN-PENAL-CODE-1860.pdf', 'total_pages': 227, 'page': 0, 'page_label': '1'}, page_content='THE INDIAN PENAL CODE, 1860 \nACT NO. 45 OF 1860 1* \n[6th October, 1860.] \nCHAPTER I \nINTRODUCTION \nCHAPTER I \nINTRODUCTION \nPreamble.-WHEREAS it is expedient to provide a general Penal Code \nfor 2*[India]; \nIt is enacted as follows:--\n1. \nTitle and extent of operation of the Code. \n1. Title and extent of operation of the Code.--This Act shall be \ncalled the Indian Penal Code, and shall 3*[extend to the whole of \nIndia 4*[except the State of Jammu and Kashmir]]. \n2. \nPunishment of offences

# document Spliting

In [9]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  
    chunk_overlap=200,  
    separators=["\n\n", "\n", ". ", " ", ""]  # Better separators for legal text
)
chunks = splitter.split_documents(docs)

print(f"Created {len(chunks)} chunks")

Created 610 chunks


# Create embeddings and vector store

In [10]:
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embedding)

print("Vector store created successfully")                

  embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Vector store created successfully


# Initialize Groq LLM

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

llm = ChatGroq(
    groq_api_key=os.getenv("GROQ_API_KEY"),
    model_name="llama-3.3-70b-versatile",
    temperature=0
)


In [12]:
# RETRIVER SETTING
retriver=vectorstore.as_retriever(
search_type="mmr", #Use Max Marginal Relevance for better diversity
search_kwargs={"k":3})

In [15]:
# Setup memory and chain
memory = ConversationBufferMemory(
    memory_key="chat_history", 
    return_messages=True,
    output_key='answer'
)

# CHAIN CONFIGURATION
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm, 
    retriever=retriver,
    memory=memory,
    return_source_documents=True,  # Enable to see what's being retrieved
    verbose=False  # Set to True for debugging
)