In [1]:
!pip install langchain-community
!pip install sentence_transformers
!pip install faiss-cpu
!pip install -U transformers
!pip install accelerate

Collecting langchain-community
  Downloading langchain_community-0.3.23-py3-none-any.whl.metadata (2.5 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB

In [2]:
import pandas as pd
df = pd.read_csv('/content/constitution.csv')
df.head()

Unnamed: 0,article_id,article_desc
0,Article 1 of Indian Constitution,"Name and territory of the Union\n(1) India, th..."
1,Article 2 of Indian Constitution,Admission !!!!!!&& or establishment of new Sta...
2,Article 2A of Indian Constitution,Sikkim to be associated with the Union Rep by ...
3,Article 3 of Indian Constitution,Formation of new States and alteration of area...
4,Article 4 of Indian Constitution,Laws made under Articles 2 and 3 to provide fo...


In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pandas as pd

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=128,
    # length_function=len
)

chunked_data = []

for _, row in df.iterrows():
    article_id = row["article_id"]
    article_text = str(row["article_desc"])

    article_intro = f"{article_id}: "
    full_text = article_intro + article_text

    chunks = text_splitter.split_text(full_text)

    for i, chunk in enumerate(chunks):
        chunked_data.append({
            "article_id": article_id,
            "chunk_id": i + 1,
            "chunk_text": chunk
        })

chunked_df = pd.DataFrame(chunked_data)
print(len(chunked_df))
chunked_df.head()

1331


Unnamed: 0,article_id,chunk_id,chunk_text
0,Article 1 of Indian Constitution,1,Article 1 of Indian Constitution: Name and ter...
1,Article 2 of Indian Constitution,1,Article 2 of Indian Constitution: Admission !!...
2,Article 2A of Indian Constitution,1,Article 2A of Indian Constitution: Sikkim to b...
3,Article 3 of Indian Constitution,1,Article 3 of Indian Constitution: Formation of...
4,Article 3 of Indian Constitution,2,(e) alter the name of any State; Provided that...


In [4]:
import re
from langchain.docstore.document import Document

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r"[^\w\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

documents = [Document(
    page_content=preprocess_text(row["chunk_text"]),
    metadata={"article_id": row["article_id"], "chunk_id": row["chunk_id"]}) for i, row in chunked_df.iterrows()]
#print(documents)

In [5]:
from langchain.embeddings import HuggingFaceEmbeddings
import pandas as pd

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
texts = [doc.page_content for doc in documents]
embeddings = embedding_model.embed_documents(texts)
embed_df = pd.DataFrame({
    "text": texts,
    "embedding": embeddings

})
embed_df.head()

  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Unnamed: 0,text,embedding
0,article 1 of indian constitution name and terr...,"[0.03161894157528877, -0.011712305247783661, -..."
1,article 2 of indian constitution admission or ...,"[-0.002397556323558092, 0.00045230131945572793..."
2,article 2a of indian constitution sikkim to be...,"[-0.023230405524373055, 0.06714336574077606, 0..."
3,article 3 of indian constitution formation of ...,"[-0.00433116452768445, 0.03810510039329529, -0..."
4,e alter the name of any state provided that no...,"[-0.03659382089972496, 0.10697875171899796, 0...."


In [6]:
import os
os.environ["HF_TOKEN"]="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"

In [None]:
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import numpy as np
# import faiss
# import torch

# model_name = "Qwen/Qwen3-4B"

# tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     device_map="auto",
#     torch_dtype=torch.float16,
#     trust_remote_code=True
# )

# embeddings_np = np.array(embeddings).astype("float32")
# faiss.normalize_L2(embeddings_np)
# index = faiss.IndexFlatIP(embeddings_np.shape[1])
# index.add(embeddings_np)

# while True:
#     user_query = input("\nAsk your legal question (or type 'exit'): ")
#     if user_query.lower() == "exit":
#         break

#     cleaned_query = preprocess_text(user_query)
#     query_embedding = embedding_model.embed_query(cleaned_query)
#     query_embedding_np = np.array([query_embedding]).astype("float32")
#     faiss.normalize_L2(query_embedding_np)

#     D, I = index.search(query_embedding_np, k=5)

#     relevant_chunks = []
#     for score, idx in zip(D[0], I[0]):
#         if score >= 0.4:
#             doc = documents[idx]
#             relevant_chunks.append(doc.page_content)

#     if not relevant_chunks:
#         print("\nNo, this does not come under legal queries.")
#         continue

#     context = "\n".join(relevant_chunks)
#     system_prompt = (
#     "You are a legal assistant. Answer the user's question based on the following context from the Constitution of India. "
#     "Do not add any introductory or filler phrases. Respond only with the direct legal answer, in a formal and concise manner.\n\n"
#     f"Context:\n{context}"
#   )
#     messages = [
#         {"role": "system", "content": system_prompt},
#         {"role": "user", "content": user_query}
#     ]

#     prompt_text = tokenizer.apply_chat_template(
#         messages,
#         tokenize=False,
#         add_generation_prompt=True,
#         enable_thinking=False
#     )

#     model_inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)

#     output_ids = model.generate(
#         **model_inputs,
#         max_new_tokens=200,
#         do_sample=True,
#         temperature=0.4,
#         repetition_penalty=1.2,
#         top_p=0.9,
#         eos_token_id=tokenizer.eos_token_id
#     )

#     # Get only newly generated tokens (excluding input prompt)
#     generated_text = tokenizer.decode(output_ids[0][model_inputs["input_ids"].shape[1]:], skip_special_tokens=True)

#     print("\nLLM's Answer:\n", generated_text.strip())


In [7]:
!pip install flask flask-cors pyngrok nest_asyncio
!pip install --upgrade --ignore-installed blinker


Collecting flask-cors
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Collecting pyngrok
  Downloading pyngrok-7.2.7-py3-none-any.whl.metadata (9.4 kB)
Downloading flask_cors-5.0.1-py3-none-any.whl (11 kB)
Downloading pyngrok-7.2.7-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok, flask-cors
Successfully installed flask-cors-5.0.1 pyngrok-7.2.7
Collecting blinker
  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Downloading blinker-1.9.0-py3-none-any.whl (8.5 kB)
Installing collected packages: blinker
Successfully installed blinker-1.9.0


In [8]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import numpy as np
import faiss
import torch
from flask import Flask, request, jsonify
from flask_cors import CORS
import threading
import nest_asyncio
from pyngrok import ngrok

# Load tokenizer and model
model_name = "Qwen/Qwen3-4B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)



# Build FAISS index
embeddings_np = np.array(embeddings).astype("float32")
faiss.normalize_L2(embeddings_np)
index = faiss.IndexFlatIP(embeddings_np.shape[1])
index.add(embeddings_np)

# RAG processing function
def process_rag_query(user_query):
    cleaned_query = preprocess_text(user_query)
    query_embedding = embedding_model.embed_query(cleaned_query)
    query_embedding_np = np.array([query_embedding]).astype("float32")
    faiss.normalize_L2(query_embedding_np)

    D, I = index.search(query_embedding_np, k=5)

    relevant_chunks = []
    for score, idx in zip(D[0], I[0]):
        if score >= 0.4:
            doc = documents[idx]
            relevant_chunks.append(doc.page_content)

    if not relevant_chunks:
        return {"answer": "No, this does not come under legal queries."}

    context = "\n".join(relevant_chunks)
    system_prompt = (
    "You are a legal assistant. Answer the user's question based on the following context from the Constitution of India. "
    "Do not add any introductory or filler phrases. Respond only with the direct legal answer, in a formal and concise manner.\n\n"
    f"Context:\n{context}"
  )

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_query}
    ]

    prompt_text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    model_inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)


    output_ids = model.generate(
        **model_inputs,
        max_new_tokens=200,
        do_sample=True,
        temperature=0.4,
        repetition_penalty=1.2,
        top_p=0.9,
        eos_token_id=tokenizer.eos_token_id
    )

    generated_text = tokenizer.decode(output_ids[0][model_inputs["input_ids"].shape[1]:], skip_special_tokens=True)
    return {"answer": generated_text.strip()}

# --- Flask API setup for frontend integration ---
app = Flask(__name__)
CORS(app)

@app.route("/generate", methods=["POST"])
def generate():
    data = request.json
    query = data.get("query", "")
    result = process_rag_query(query)
    return jsonify(result)

# --- Colab async fix ---
nest_asyncio.apply()

def run_server():
    app.run(port=8000, host="0.0.0.0")

def start_ngrok():
    NGROK_AUTH_TOKEN = "2wRVACCogbO1iqe4EoeCknWqVTn_wTKYzRotW6wEDdSKs99V"  # Replace this safely
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    public_url = ngrok.connect(8000).public_url
    print(f"✅ Public API: {public_url}/generate")
    return public_url

# --- Start everything ---
if __name__ == "__main__":
    threading.Thread(target=run_server).start()
    public_url = start_ngrok()
    print("Paste this URL in your frontend to connect to the backend.")

    import time
    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        print("Server stopped")


tokenizer_config.json:   0%|          | 0.00/9.68k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model-00001-of-00003.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/3.99G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/99.6M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading ngrok ... * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8000
 * Running on http://172.28.0.12:8000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


✅ Public API: https://1485-34-106-102-80.ngrok-free.app/generate
Paste this URL in your frontend to connect to the backend.


INFO:werkzeug:127.0.0.1 - - [09/May/2025 07:15:38] "OPTIONS /generate HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [09/May/2025 07:16:54] "POST /generate HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [09/May/2025 07:17:16] "OPTIONS /generate HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [09/May/2025 07:17:17] "POST /generate HTTP/1.1" 200 -


Server stopped
