In [1]:
## pip uninstall --yes langchain langchain-core langchain-community langchain-text-splitters langchain-google-genai google-generativeai google-ai-generativelanguage langchain-huggingface
## pip install --upgrade langchain-core langchain-community langchain-google-genai langchain-huggingface langchain-text-splitters faiss-cpu sentence-transformers


import json
from langchain_core.documents import Document

# --- UPDATE THIS PATH IF NEEDED ---
# This path points to the same folder as your notebook.
# Change it if your JSON file is located elsewhere.
file_path = "Contextualized_Bangladesh_Legal_Acts.json"
# -------------------------------

all_docs = []

print(f"Loading data from {file_path}...")

# Open and load the single JSON file
with open(file_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Iterate through the 'acts' list in the JSON
for act in data.get('acts', []):
    # Get metadata for the whole act
    act_title = act.get('act_title', 'Unknown Title')
    act_no = act.get('act_no', 'N/A')
    act_year = act.get('act_year', 'N/A')
    source_url = act.get('source_url', 'N/A')
    
    # Create common metadata for all sections in this act
    common_metadata = {
        "source": source_url,
        "act_title": act_title,
        "act_no": act_no,
        "act_year": act_year
    }
    
    # Iterate through each 'section' in the act
    for section in act.get('sections', []):
        section_content = section.get('section_content')
        
        # Create a Document for each section and add it to our list
        if section_content:
            doc = Document(page_content=section_content, metadata=common_metadata.copy())
            all_docs.append(doc)

# This 'docs' variable will be used in the next steps
docs = all_docs
print(f"Successfully loaded and processed {len(docs)} document sections.")

Loading data from Contextualized_Bangladesh_Legal_Acts.json...
Successfully loaded and processed 35630 document sections.


In [2]:
# Check the first loaded document
print(docs[0])

page_content='2. In all cases of Hindu, Mussalman or other person subject to the jurisdiction of the Zila Courts, having at his death left a will and appointed an executor or executors to carry the same into effect, and in which the heir to the deceased may not be a disqualified landholder subject to the superintendence of the Courts of wards the executors so appointed are to take charge of the estate of the deceased, and proceed in the execution of their trust according to the will of the deceased and the laws and usages of the country, without any application to the Judge of the Diwani Adalat or any other officer of the Government for his sanction; and the Courts of Justice are prohibited to interfere in such cases, except on a regular complaint against the executors for a breach of trust or otherwise, when they are to take cognizance of such complaint in common with all others of a civil nature.' metadata={'source': 'http://bdlaws.minlaw.gov.bd/act-print-1315.html', 'act_title': 'TH

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, 
    chunk_overlap=200
)
splits = text_splitter.split_documents(docs)

print(f"Split {len(docs)} documents into {len(splits)} chunks.")

Split 35630 documents into 48602 chunks.


In [4]:
%pip install -U langchain-huggingface

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [5]:

from langchain_community.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-MiniLM-L6-v2"

# Default to 'cpu'. Change to 'cuda' if you have an NVIDIA GPU and installed faiss-gpu
model_kwargs = {'device': 'cpu'} 
encode_kwargs = {'normalize_embeddings': False}

embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

print("Embeddings model loaded.")

  embeddings = HuggingFaceEmbeddings(


Embeddings model loaded.


In [6]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

# --- This is how you would load the index in a new session ---

# 1. Define the same folder path
folder_path = "faiss_index"

# 2. Re-initialize the *same* embeddings model
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}

embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

# 3. Load the vector store
print(f"Loading vector store from {folder_path}...")
# Add allow_dangerous_deserialization=True
loaded_vectorstore = FAISS.load_local(
    folder_path, 
    embeddings,
    allow_dangerous_deserialization=True 
)
print("Vector store loaded successfully.")

# Now you can use 'loaded_vectorstore' to run queries
# Example:
# results = loaded_vectorstore.similarity_search("What is the penalty for...")
# print(results)

Loading vector store from faiss_index...
Vector store loaded successfully.


In [7]:
import os
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter

# --- 1. Setup the LLM with system message ---
llm = ChatOllama(
    model="mistral:7b-instruct-v0.2-q4_0",
    temperature=0.1,
    system="You are a helpful assistant that ALWAYS responds in Bengali (Bangla) language. Never respond in English. Use proper Bengali grammar and natural language."
)

# --- 2. Define the Prompt Template with stronger Bengali instructions ---
template = """[SYSTEM]: আপনি একজন সহায়ক সহকারী যিনি সর্বদা বাংলা ভাষায় উত্তর দিবেন।

নিম্নলিখিত প্রসঙ্গ ব্যবহার করে প্রশ্নের উত্তর দিন:

{context}

যদি আপনি উত্তর না জানেন, তবে শুধু বাংলায় বলুন "আমি এই প্রশ্নের উত্তর জানি না"।

নির্দেশাবলী:
- অবশ্যই শুধুমাত্র বাংলা ভাষায় উত্তর দিন
- সর্বোচ্চ তিনটি বাক্য ব্যবহার করুন
- সংক্ষিপ্ত এবং সুনির্দিষ্ট উত্তর দিন
- প্রদত্ত তথ্যের বাইরে যাবেন না

প্রশ্ন: {question}

উত্তর:"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

# --- 3. Define the Retriever ---
retriever = loaded_vectorstore.as_retriever(
    search_kwargs={"k": 4}  # Increase the number of retrieved documents for better context
)

# --- 4. Create a helper function to format documents ---
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# --- 5. Build the RAG chain using LCEL ---
print("Defining LCEL RAG chain...")

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | QA_CHAIN_PROMPT
    | llm
    | StrOutputParser()
)

print("✅ LCEL RAG chain is ready.")
print("এখন আপনি প্রশ্ন করতে পারেন।")

Defining LCEL RAG chain...
✅ LCEL RAG chain is ready.
এখন আপনি প্রশ্ন করতে পারেন।


  llm = ChatOllama(


In [8]:
# Query 1: About general act information
query = "বাংলাদেশ জাতীয় গ্রন্থকেন্দ্র আইন ২০২১ এর মূল উদ্দেশ্য কি?"

print(f"Query: {query}\n")
print("Thinking...")

result = rag_chain.invoke(query)

print("\nAnswer:")
print(result)

Query: বাংলাদেশ জাতীয় গ্রন্থকেন্দ্র আইন ২০২১ এর মূল উদ্দেশ্য কি?

Thinking...


ConnectionError: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/chat (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000019FC02A9D30>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))

In [9]:
# Query 2: About specific responsibilities
query = "এই আইনে উল্লিখিত অপরাধের জন্য কি শাস্তির বিধান রয়েছে?"

print(f"Query: {query}\n")
print("Thinking...")

result = rag_chain.invoke(query)

print("\nAnswer:")
print(result)

Query: এই আইনে উল্লিখিত অপরাধের জন্য কি শাস্তির বিধান রয়েছে?

Thinking...

Answer:
 This law establishes the role of the Health Department's chief licensing officer under the given act. (10\.) It also mentions the Computing and Research Academy of Law, established in 2005. (38\.) The Penal Code and Regulations on Offenses under this Act will be effective. (1\.)

Answer:
 This law establishes the role of the Health Department's chief licensing officer under the given act. (10\.) It also mentions the Computing and Research Academy of Law, established in 2005. (38\.) The Penal Code and Regulations on Offenses under this Act will be effective. (1\.)


In [10]:
# Query 3: About organizational structure
query = "জাতীয় গ্রন্থকেন্দ্রের পরিচালনা বোর্ডের গঠন কিভাবে হবে?"

print(f"Query: {query}\n")
print("Thinking...")

result = rag_chain.invoke(query)

print("\nAnswer:")
print(result)

Query: জাতীয় গ্রন্থকেন্দ্রের পরিচালনা বোর্ডের গঠন কিভাবে হবে?

Thinking...

Answer:
 The woman will work under the government's service rules. The government will implement and enforce the National Residency Control Plan. The father of the nation's behavior and conduct law, Act No. 2 of 2001, restricts her employment. She will receive a monthly salary of eighty-six thousand takas from the prime minister.

Answer:
 The woman will work under the government's service rules. The government will implement and enforce the National Residency Control Plan. The father of the nation's behavior and conduct law, Act No. 2 of 2001, restricts her employment. She will receive a monthly salary of eighty-six thousand takas from the prime minister.


In [9]:
# Query 4: About financial matters
query = "জাতীয় গ্রন্থকেন্দ্রের তহবিল কিভাবে গঠিত হবে এবং এর ব্যবহার কিভাবে হবে?"

print(f"Query: {query}\n")
print("Thinking...")

result = rag_chain.invoke(query)

print("\nAnswer:")
print(result)

Query: জাতীয় গ্রন্থকেন্দ্রের তহবিল কিভাবে গঠিত হবে এবং এর ব্যবহার কিভাবে হবে?

Thinking...


ConnectionError: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/chat (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000019FCA769BD0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))