Installing packages

In [12]:
!pip install requests faiss-cpu transformers sentence-transformers beautifulsoup4



In [14]:
import requests
from bs4 import BeautifulSoup
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import LlamaForCausalLM, LlamaTokenizer
import torch
import json

# Initialize embedding model (Sentence-BERT for efficiency)
model = SentenceTransformer("all-MiniLM-L6-v2")

# Initialize Llama model and tokenizer
llama_model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
llama_tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")

BASE_URL = "https://devgan.in"
LAW_TYPES = ["ipc", "bns"]  # Both IPC and BNS will be stored together

# FAISS setup
d = 384  # Dimension of embeddings (for MiniLM, it's 384)
index = faiss.IndexFlatL2(d)
documents = []  # To store metadata

for LAW_TYPE in LAW_TYPES:
    MAIN_URL = f"{BASE_URL}/{LAW_TYPE}/"
    response = requests.get(MAIN_URL)
    soup = BeautifulSoup(response.text, "html.parser")

    chapters = []
    for row in soup.select("table.menu tr"):
        columns = row.find_all("td")
        if len(columns) == 2:
            chapter_number = columns[0].text.strip()
            chapter_title = columns[1].text.strip()
            chapter_link = BASE_URL + columns[1].find("a")["href"]
            chapters.append((chapter_number, chapter_title, chapter_link))

    # Scrape each chapter's content
    for chapter_number, chapter_title, chapter_link in chapters:
        chapter_response = requests.get(chapter_link)
        chapter_soup = BeautifulSoup(chapter_response.text, "html.parser")
        content_div = chapter_soup.find("div", id="content")

        if content_div:
            chapter_content = content_div.get_text(separator="\n", strip=True)
        else:
            chapter_content = "Content not found."

        # Generate embeddings and store in FAISS
        embedding = model.encode(chapter_content).astype(np.float32)
        index.add(np.array([embedding]))

        # Store metadata
        documents.append({
            "law_type": LAW_TYPE.upper(),
            "chapter_number": chapter_number,
            "chapter_title": chapter_title,
            "content": chapter_content,
            "source_url": chapter_link
        })

index = faiss.write_index(index, "law_faiss.index")

# Save metadata
with open("law_metadata.json", "w", encoding="utf-8") as f:
    json.dump(documents, f, ensure_ascii=False, indent=4)

print("Scraping and FAISS indexing complete!")

# Function to retrieve sections and generate procedure
def get_legal_procedure(grievance):
    # Convert grievance to embedding
    query_embedding = model.encode(grievance).astype(np.float32)

    # Search FAISS index
    D, I = index.search(np.array([query_embedding]), k=3)  # Retrieve top 3 relevant sections

    retrieved_sections = [documents[i] for i in I[0] if i != -1]

    # Construct prompt for Llama
    prompt = """
    A legal grievance has been reported: {grievance}.
    Based on the legal code, the following sections are relevant:

    {sections}

    Please generate a step-by-step procedure explaining what actions should be taken according to these legal sections.

    Your response should be clear, actionable, and reference the sections where necessary.
    """.format(
        grievance=grievance,
        sections="\n\n".join([f"Section from {s['law_type']}, Chapter {s['chapter_number']}: {s['chapter_title']}\n{s['content']}" for s in retrieved_sections])
    )

    # Generate response using Llama
    inputs = llama_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
    output = llama_model.generate(**inputs, max_length=4096, do_sample=True, temperature=0.7)
    response = llama_tokenizer.decode(output[0], skip_special_tokens=True)

    return {
        "procedure": response,
        "retrieved_sections": retrieved_sections
    }

# Example usage
grievance = "A person was assaulted in a public place. What should be done?"
result = get_legal_procedure(grievance)
print(json.dumps(result, indent=4, ensure_ascii=False))


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-chat-hf.
401 Client Error. (Request ID: Root=1-67c94a8c-76a2ad7d5adae3f3224295e4;ef4f498f-32a1-4436-a19d-0c5c8f4a5406)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/resolve/main/config.json.
Access to model meta-llama/Llama-2-7b-chat-hf is restricted. You must have access to it and be authenticated to access it. Please log in.