In [None]:
!pip install gradio langchain openai sentence-transformers faiss-cpu datasets pandas nltk pymongo
!pip install langchain-community

Collecting gradio
  Downloading gradio-5.26.0-py3-none-any.whl.metadata (16 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting pymongo
  Downloading pymongo-4.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.9.0 (from gradio)
  Downloading gradio_client-1.9.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.wh

In [None]:
import os
import gradio as gr
import pandas as pd
import numpy as np
import faiss
import nltk
import re
from datasets import load_dataset
from nltk.tokenize import sent_tokenize
from sentence_transformers import SentenceTransformer
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from pymongo import MongoClient

nltk.download("punkt")
nltk.download("punkt_tab")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
from google.colab import files
uploaded = files.upload()
#This is for manually uploading ur dataset

Saving ipc_sections.csv to ipc_sections.csv


In [None]:
def connect_mongo():
    try:
        client = MongoClient("mongodb+srv://ProjectNameOfYourDB:Password@ClusterName.mvncwzt.mongodb.net/?retryWrites=true&w=majority&appName=ClusterName")#Replace with ur actual Id password of MongoDB and cluster
        db = client["argulex"]
        collection = db["prompt_history"]
        print("✅ MongoDB connected.")
        return collection
    except Exception as e:
        print("❌ MongoDB Connection Failed:", e)
        return None


In [None]:
ipc_df = pd.read_csv("ipc_sections.csv")
ipc_df.dropna(subset=["Description"], inplace=True)

constitution_ds = load_dataset("Sharathhebbar24/Indian-Constitution", split="train")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/244 [00:00<?, ?B/s]

Final_IC.csv:   0%|          | 0.00/443k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/454 [00:00<?, ? examples/s]

In [None]:
constitution_data = [
    {
        "source": row["article_id"],
        "text": sent.strip()
    }
    for row in constitution_ds
    if "article_desc" in row and row["article_desc"]
    for sent in sent_tokenize(row["article_desc"])
    if len(sent.strip()) > 20
]

ipc_data = [
    {
        "source": f"Section {row['Section']}",
        "text": sent.strip()
    }
    for _, row in ipc_df.iterrows()
    for sent in sent_tokenize(str(row["Description"]))
    if len(sent.strip()) > 20
]

all_data = constitution_data + ipc_data
texts = [item["text"] for item in all_data]
sources = [item["source"] for item in all_data]


In [None]:
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(texts, convert_to_tensor=False, show_progress_bar=True)
embeddings = np.array(embeddings).astype("float32")

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)


Batches:   0%|          | 0/46 [00:00<?, ?it/s]

In [None]:
os.environ["OPENAI_API_KEY"] = "Replace-With-Your-OpenAI-API"
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a legal expert. Given the legal text below and the user's question, provide a precise and accurate legal answer:

Legal Text:
{context}

Question:
{question}

Answer:"""
)

qa_chain = LLMChain(llm=llm, prompt=prompt_template)

In [None]:
def get_best_match(question):
    question_emb = model.encode([question], convert_to_tensor=False)
    D, I = index.search(np.array(question_emb).astype("float32"), k=1)
    best_idx = I[0][0]
    return texts[best_idx], sources[best_idx]

def legal_chatbot(question):
    context, source = get_best_match(question)
    print("=== Retrieved Context ===\n", context)
    print("=== Source ===", source)

    try:
        response = qa_chain.run({"context": context, "question": question})
        print("=== Response ===", response)
    except Exception as e:
        response = f"Error during generation: {e}"
        print("❌ LLM Error:", e)

    try:
        if collection:
            result = collection.insert_one({
                "question": question,
                "context": context,
                "source": source,
                "response": response
            })
            print("✅ MongoDB Inserted ID:", result.inserted_id)
    except Exception as e:
        print("❌ MongoDB insertion error:", e)

    return f"✅ **Answer:** {response}\n\n📚 **Source:** {source}"

In [None]:
gr.Interface(
    fn=legal_chatbot,
    inputs="text",
    outputs="markdown",
    title="⚖️ ArguLex: Legal AI Chatbot",
    description="Ask legal questions from the Indian Constitution or IPC sections."
).launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c148276319848393c3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


