### Install the latest LangChain Community package (for accessing community-maintained tools like local LLMs, retrievers, loaders, etc.)

In [None]:
!pip install -U langchain-community

###  Install core libraries for building the Legal AI Chatbot:
###- langchain: for chaining LLMs with tools and retrievers
### - pypdf: to read and extract text from PDF files
### - chromadb: to store and search document embeddings (vector DB)
### - sentence-transformers: to generate semantic embeddings from text

In [None]:
!pip install langchain pypdf chromadb sentence-transformers

### Install supporting libraries for PDF handling and model management:
### - pypdf: for parsing and splitting PDF documents
### - python-dotenv: to manage environment variables securely (e.g., API keys, paths)
### - transformers: to use HuggingFace transformer models for embeddings or LLMs

In [None]:
!pip install -q pypdf
!pip install -q python-dotenv
!pip install -q transformers

In [None]:
!pip install -q llama-index

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m46.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m267.3/267.3 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.0/41.0 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.3/129.3 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip show llama-index

Name: llama-index
Version: 0.12.40
Summary: Interface between LLMs and your data
Home-page: https://llamaindex.ai
Author: 
Author-email: Jerry Liu <jerry@llamaindex.ai>
License: 
Location: /usr/local/lib/python3.11/dist-packages
Requires: llama-index-agent-openai, llama-index-cli, llama-index-core, llama-index-embeddings-openai, llama-index-indices-managed-llama-cloud, llama-index-llms-openai, llama-index-multi-modal-llms-openai, llama-index-program-openai, llama-index-question-gen-openai, llama-index-readers-file, llama-index-readers-llama-parse, nltk
Required-by: 


### Install llama-index plugin for integrating LlamaCpp local LLMs with LangChain

In [None]:
!pip install llama-index-llms-llama-cpp

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma  # Or FAISS
from langchain.chains import RetrievalQA  # Import RetrievalQA here
import os

## 4 PDFS

### Install ChromaDB for storing and searching vector embeddings locally

In [None]:
!pip install chromadb

### Loading and creating vector embeddings

In [None]:
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

pdf_folder = "/content/PDFS"

all_chunks = []
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
    separators=["\nSection ", "\n\n", "\n"]
)

for filename in os.listdir(pdf_folder):
    if filename.lower().endswith(".pdf"):
        pdf_path = os.path.join(pdf_folder, filename)
        loader = PyPDFLoader(pdf_path)
        pages = loader.load_and_split()
        chunks = splitter.split_documents(pages)
        all_chunks.extend(chunks)

print(f"✅ Loaded and split {len(all_chunks)} chunks from {len(os.listdir(pdf_folder))} PDFs.")

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vectordb = Chroma.from_documents(
    documents=all_chunks,
    embedding=embedding_model,
    persist_directory="./cyberlaw_vectorstore"
)

vectordb.persist()
print(f"✅ Vector store saved with {len(all_chunks)} chunks.")

### Loading pre-loaded vector files

In [None]:
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vectordb = Chroma(
    persist_directory="./cyberlaw_vectorstore",
    embedding_function=embedding_model
)

print("✅ Vector store loaded.")

✅ Vector store loaded.


### Create a retriever from the Chroma vector store to fetch top 2 most similar document chunks for a query

In [None]:
retriever = vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 2})

### Download and load the TinyLlama Model

In [None]:
from langchain.chains import RetrievalQA
from langchain_community.llms import LlamaCpp
import os

# Define the path where you want to save the model
model_directory = "./llm_models_tiny"
model_filename = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
local_model_path = os.path.join(model_directory, model_filename)
model_url = "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"

# Create the directory if it doesn't exist
os.makedirs(model_directory, exist_ok=True)

# Download the model if it doesn't exist locally
if not os.path.exists(local_model_path):
    print(f"Downloading model from {model_url} to {local_model_path}")
    # Use a command-line tool like wget to download the file
    !wget -O {local_model_path} {model_url}
    print("Download complete.")
else:
    print(f"Model already exists at {local_model_path}")


llm = LlamaCpp(
    model_path=local_model_path,
    temperature=0.1,
    max_tokens=256,
    n_ctx=3900,
    n_gpu_layers=0,
    stop=["\nQuestion:", "\nQ:", "\nUser:"],
    verbose=True,
)

qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

### Define a clear prompt template to guide the LLM to answer concisely using provided context

### Create a RetrievalQA chain that uses the retriever and LLM with the custom prompt

In [None]:
from langchain.prompts import PromptTemplate

# Define a prompt template that instructs the model clearly
template = """
You are a helpful assistant specialized in answering questions concisely.
Use the provided context to answer the question below.
Only provide a direct answer. Do not generate additional questions or follow-ups.

Context:
{context}

Question:
{question}

Answer:
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=template
)

# Create the RetrievalQA chain with the custom prompt
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",  # or 'map_reduce' depending on your setup
    chain_type_kwargs={"prompt": prompt}
)

### Gradio

In [None]:
!pip install gradio

In [None]:
import gradio as gr

# Cache dictionary to store question-answer pairs
cache = {}

# Chat function with caching logic
def chatbot_response(user_input, history=None):
    try:
        if user_input in cache:
            response = f"{cache[user_input]}"
        else:
            response = qa_chain.run(user_input)  # Replace with your actual QA chain
            cache[user_input] = response

        print("Response from LlamaIndex:", response)
        return str(response) if response else "I couldn't generate a response."

    except Exception as e:
        print("Error:", e)
        return "Sorry, an error occurred. Please try again."

# List of example questions for the user to click on
example_questions = [
    "What is the Indian Penal Code (IPC), 1860, and what does it cover?",
    "What are the employer’s obligations under the Gratuity Act, 1972?",
    "Who can become a certifying authority?",
    "What is a digital signature?",
    "When was the IT Act, 2000 amended?",
    "What is the history of banking laws in India?"
]

with gr.Blocks() as demo:
    # Custom CSS for Clear button with new color
    demo.css = """
    #clear-button {
        background-color: #AFA939 !important;
        color: white !important;
        border: 1px solid #ccc !important;
        font-weight: bold;
        width: 100%;
        padding: 8px 0;
        margin-top: 10px;
        cursor: pointer;
    }
    #clear-button:hover {
        background-color: #999932 !important;
    }
    """

    # Title at the top
    gr.Markdown("## Legal AI Chatbot")

    with gr.Row():
        # Left panel with description and clear button
        with gr.Column(scale=2, min_width=250):
            gr.Markdown(
                """
                **Description:**
                TinyLlama, a compact yet powerful AI model trained extensively on Indian legal PDFs covering IPC, Labour Laws, Cyber Laws, and Banking Laws. Try clicking one of the sample questions below!

                **How to use:**
                1. Type your question or click one of the example questions below.
                2. Wait for the AI to generate an answer.
                3. Use the **Clear** button to reset the chat history anytime.

                **⚠️Disclaimer:**
                This chatbot can make mistakes & hallucinate. It provides information for educational purposes only and should not be considered legal advice.
                """
            )
            clear_btn = gr.Button("Clear Chat", elem_id="clear-button")

        # Main chat interface
        with gr.Column(scale=5):
            chatbot = gr.ChatInterface(
                chatbot_response,
                examples=example_questions,
            )

    # Link clear button to clear chat history
    clear_btn.click(lambda: None, None, chatbot.chatbot, queue=False)

demo.launch(share=True)

  self.chatbot = Chatbot(


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://acee35c0ef3ed12304.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




### 🔗 IMPORTANT: Gradio Link for Website

After running this notebook, Gradio will generate a **new public link** (e.g., `https://xyz123.gradio.live`).

📌 Please follow these steps:

1. **Copy the public Gradio link** shown in the output after `launch()` (usually something like `https://abc123.gradio.live`)
2. **Open your `index.html` file** inside the `Verdict AI UI\templates ` folder
3. Replace the old link for this feature with the new one using the following format:

```html
<!-- Example -->
<a href="https://new-chatbot-link.gradio.live" target="_blank">Open Legal AI Chatbot</a>


## EVALUATION CODE

In [None]:
import shutil
from google.colab import files

# Zip the cyberlaw_vectorstore directory
shutil.make_archive("cyberlaw_vectorstore", 'zip', "./cyberlaw_vectorstore")

# Download the zipped folder
files.download("cyberlaw_vectorstore.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer('all-MiniLM-L6-v2')

ref = """Labour law also known as employment law is the body of laws, administrative rulings, and
precedents which address the legal rights of, and restrictions on, working people and their
organizations. As such, it mediates many aspects of the relationship between trade unions,
employers and employees. In other words, Labour law defines the rights and obligations as workers,
union members and employers in the workplace. Generally, labour law covers:
 Industrial relations – certification of unions, labour-management relations, collective
bargaining and unfair labour practices;
 Workplace health and safety;
 Employment standards, including general holidays, annual leave, working hours, unfair
dismissals, minimum wage, layoff procedures and severance pay.
There are two broad categories of labour law. First, collective labour law relates to the tripartite
relationship between employee, employer and union. Second, individual labour law concerns
employees' rights at work and through the contract for work.
The labour movement has been instrumental in the enacting of laws protecting labour rights in the
19th and 20th centuries. Labour rights have been integral to the social and economic development
since the industrial revolution.
1.1. History of Labour laws
Labour law arose due to the demands of workers for better conditions, the right to organize, and the
simultaneous demands of employers to restrict the powers of workers in many organizations and to
keep labour costs low. Employers' costs can increase due to workers organizing to win higher wages,
or by laws imposing costly requirements, such as health and safety or equal opportunities
conditions. Workers' organizations, such as trade unions, can also transcend purely industrial
disputes, and gain political power - which some employers may oppose. The state of labour law at
any one time is therefore both the product of, and a component of, struggles between different
interests in society."""
res = """Labour Law is the body of laws, administrative rules, and precedents that address the legal rights and
restrictions on working people and their organizations. It also includes the relationship between trade union
organizations, employers, and employees. The history of labour law can be traced back to the industrial
revolution in the 19th century when workers demanded better conditions, the right to organize, and the
simultaneous demand for employers to restrict their powers. Employers' costs can increase due to workers
organizing to win higher wages or by laws imposing costly requirements such as health and safety or equal
opportunities conditions. Workers' organizations, such as trade unions, can also transcend purely industrial
relationships, such as the right to organize and collectively bargain for better working conditions. The
history of labour law includes the development of collective labour laws that relate to the relationship between
employer and employee, while individual labour laws concern specific aspects of workers' rights and restrictions
on employers."""

embeddings = model.encode([ref, res], convert_to_tensor=True)
similarity_score = util.cos_sim(embeddings[0], embeddings[1]).item()
print("Question: Explain the history of Labour Laws")
print(f"Semantic Similarity: {similarity_score:.4f}")


Question: Explain the history of Labour Laws
Semantic Similarity: 0.9122


In [None]:
print("a")

a


In [None]:
from sentence_transformers import SentenceTransformer, util
import pandas as pd

# Sample data - Replace this with your actual reference texts and chatbot answers
data = [
    {
        "query": "What is the origin of word 'Cyber'?",
        "reference": "'Cyber' is a prefix used to describe a person, thing, or idea as part of the computer and information age. Taken from kybernetes, Greek word for 'steersman' or 'governor', it was first used in cybernetics, a word coined by Norbert Wiener and his colleagus.",
        "chatbot_answer": "The prefix 'cyber' is derived from the Greek word 'kybernetes', which means a 'steersman' or 'governo'. It was first used in cybernetic s, a word coined by Nobert Wiener and his colleagues."
    },
    {
        "query": "Explain Section 66A of the IT Act.",
        "reference": """Any person who sends, by means of a computer resource or a communication
device,—
(a) any information that is grossly offensive or has menacing character; or
(b) any information which he knows to be false, but for the purpose of causing
annoyance, inconvenience, danger, obstruction, insult, injury, criminal
intimidation, enmity, hatred or ill will, persistently by making use of such
computer resource or a communication device,
(c) any electronic mail or electronic mail message for the purpose of causing
annoyance or inconvenience or to deceive or to mislead the addressee or
recipient about the origin of such messages, shall be punishable with
imprisonment for a term which may extend to three years and with fine.
Explanation.— For the purpose of this section, terms “electronic mail” and
“electronic mail message” means a message or information created or
transmitted or received on a computer, computer system, computer resource or
communication device including attachments in text, images, audio, video and
any other electronic record, which may be transmitted with the message.
Sec.66B - Whoever dishonestly received or retains any stolen computer
resource or communication device knowing or having reason to believe the
same to be stolen computer resource or communication device, shall be
punished with imprisonment of either description for a term which may extend
to three years or with fine which may extend to rupees one lakh or with both.""",
        "chatbot_answer": """Section 66A of the Information Technology Act, 2000 provides for the penalty for
violation of the provisions of section 66B. The penalty is a fine of Rs 50,000 or
imprisonment for a term which may extend to three years, or both. The penalty is
increased by an additional amount of Rs 25,000 for every day during which the
violation continues. The penalty is payable by the person who violates the provisions
of section 66B."""
    },
    {
        "query": "What is the Minimum Wages Act about?",
        "reference": """The enactment of the Minimum Wages Act in 1948 is a landmark in the
labour history of India. The Act provides for fixation of minimum wages for notified scheduled
employment.
As per Government of India, for all the States, the minimum wages have been fixed at about Rs 40 to
60 per day per person, average about Rs 50 per day for 25 days per month.
There are 45 scheduled employments in the Central sphere and 1232 in the state sphere for which
minimum wages have been fixed.""",
        "chatbot_answer": """The Minimum Waages Act is a landmark in the labour history of India. It provides for fixation of
minimum wages for notified scheduled employments. The minimum wages have been fixed at about Rs 40 to
60 per day per person, average about Rs 50 per day for 25 days per month. There are 45 scheduled
employments in the Central sphere and 1232 in the state sphere for which minimum wages have been
fixed. The enactment of the Act is a landmark in the labour history of India."""
    },
    {
        "query": "What is the Origin of 'Banking'?",
        "reference": """Origin of ‘Banking’ the first bank was probably the religious temples of the
ancient world wherein gold was stored in the form of easy-to-carry compressed
plates. Their owners justly felt that temples were the safest places to store their
gold as they were constantly attended, well built and were sacred, thus deterring
would-be thieves.""",
        "chatbot_answer": """Origin of 'Banking' is the religious temple in ancient world wherein gold was stored in the form of easy-to-carry compressed plates. Their owner, justly felt that temples were the safeest places to store their gold as they were constantly attended, well built and well-secured, thus deterring would-be thieves."""
    }
]

# Load sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Compute semantic similarity for each pair
results = []
for entry in data:
    embeddings = model.encode([entry['reference'], entry['chatbot_answer']], convert_to_tensor=True)
    score = util.cos_sim(embeddings[0], embeddings[1]).item()
    verdict = "Excellent" if score > 0.85 else "Good" if score > 0.7 else "Needs Improvement"
    results.append({
        "Query": entry['query'],
        "Similarity Score": round(score, 4),
        "Verdict": verdict
    })

# Convert results to DataFrame for display
df_results = pd.DataFrame(results)
df_results


Unnamed: 0,Query,Similarity Score,Verdict
0,What is the origin of word 'Cyber'?,0.9216,Excellent
1,Explain Section 66A of the IT Act.,0.4882,Needs Improvement
2,What is the Minimum Wages Act about?,0.8495,Good
3,What is the Origin of 'Banking'?,0.964,Excellent


In [None]:
import time

start = time.time()
response = qa_chain.run("What is the punishment for theft under IPC?")
end = time.time()

print("Response Time:", end - start, "seconds")


Llama.generate: 50 prefix-match hit, remaining 576 prompt tokens to eval
llama_perf_context_print:        load time =   51283.43 ms
llama_perf_context_print: prompt eval time =   33079.86 ms /   576 tokens (   57.43 ms per token,    17.41 tokens per second)
llama_perf_context_print:        eval time =    6560.83 ms /    50 runs   (  131.22 ms per token,     7.62 tokens per second)
llama_perf_context_print:       total time =   39706.40 ms /   626 tokens


Response Time: 39.842101097106934 seconds


In [None]:
import time

def measure_latency(chatbot_func, inputs, num_runs=5):
    latency_results = {}

    for query in inputs:
        total_time = 0.0
        for _ in range(num_runs):
            start_time = time.time()
            _ = chatbot_func(query)
            end_time = time.time()
            total_time += (end_time - start_time)
        avg_latency = total_time / num_runs
        latency_results[query] = avg_latency

    return latency_results

# Your actual chatbot function might look like this:
def get_chatbot_response(query):
    # Example: your LangChain RetrievalQA call or similar
    response = qa_chain.run(query)
    return response

# Example test queries to check latency on
test_queries = [
    "What is the punishment for cheating under IPC?",
    "What is the history of banking laws in india?",
    "Define Section 375 of IPC.",
    "What are the labor laws related to maternity benefits?"
]

# Measure latency
latency_results = measure_latency(get_chatbot_response, test_queries, num_runs=5)

# Print results
for query, avg_time in latency_results.items():
    print(f"Query: {query}\nAverage Latency: {avg_time:.3f} seconds\n")


Llama.generate: 53 prefix-match hit, remaining 518 prompt tokens to eval
llama_perf_context_print:        load time =   51283.43 ms
llama_perf_context_print: prompt eval time =   30402.68 ms /   518 tokens (   58.69 ms per token,    17.04 tokens per second)
llama_perf_context_print:        eval time =    6290.56 ms /    30 runs   (  209.69 ms per token,     4.77 tokens per second)
llama_perf_context_print:       total time =   36770.97 ms /   548 tokens
Llama.generate: 570 prefix-match hit, remaining 1 prompt tokens to eval
llama_perf_context_print:        load time =   51283.43 ms
llama_perf_context_print: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =    6282.89 ms /    54 runs   (  116.35 ms per token,     8.59 tokens per second)
llama_perf_context_print:       total time =    6347.52 ms /    55 tokens
Llama.generate: 570 prefix-match hit, remaining 1 prompt tokens to eval
llama_perf_co

Query: What is the punishment for cheating under IPC?
Average Latency: 11.490 seconds

Query: What is the history of banking laws in india?
Average Latency: 26.250 seconds

Query: Define Section 375 of IPC.
Average Latency: 11.208 seconds

Query: What are the labor laws related to maternity benefits?
Average Latency: 34.740 seconds

