Installing packages

In [1]:
!pip install requests faiss-cpu transformers sentence-transformers beautifulsoup4

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_6

In [7]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: write).
The token `Gen AI Project` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `Gen AI Pr

In [None]:
import requests
from bs4 import BeautifulSoup
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import json

# Initialize embedding model (Sentence-BERT for efficiency)
model = SentenceTransformer("all-MiniLM-L6-v2")

# Initialize Llama model using Hugging Face pipeline
llama_pipeline = pipeline("text-generation", model="meta-llama/Llama-2-7b-chat-hf")

BASE_URL = "https://devgan.in"
LAW_TYPES = ["ipc", "bns"]  # Both IPC and BNS will be stored together

# FAISS setup
d = 384  # Dimension of embeddings (for MiniLM, it's 384)
index = faiss.IndexFlatL2(d)
documents = []  # To store metadata

for LAW_TYPE in LAW_TYPES:
    MAIN_URL = f"{BASE_URL}/{LAW_TYPE}/"
    response = requests.get(MAIN_URL)
    soup = BeautifulSoup(response.text, "html.parser")

    chapters = []
    for row in soup.select("table.menu tr"):
        columns = row.find_all("td")
        if len(columns) == 2:
            chapter_number = columns[0].text.strip()
            chapter_title = columns[1].text.strip()
            chapter_link = BASE_URL + columns[1].find("a")["href"]
            chapters.append((chapter_number, chapter_title, chapter_link))

    # Scrape each chapter's content
    for chapter_number, chapter_title, chapter_link in chapters:
        chapter_response = requests.get(chapter_link)
        chapter_soup = BeautifulSoup(chapter_response.text, "html.parser")
        content_div = chapter_soup.find("div", id="content")

        if content_div:
            chapter_content = content_div.get_text(separator="\n", strip=True)
        else:
            chapter_content = "Content not found."

        # Generate embeddings and store in FAISS
        embedding = model.encode(chapter_content).astype(np.float32)
        index.add(np.array([embedding]))

        # Store metadata
        documents.append({
            "law_type": LAW_TYPE.upper(),
            "chapter_number": chapter_number,
            "chapter_title": chapter_title,
            "content": chapter_content,
            "source_url": chapter_link
        })

# Save FAISS indexfaiss.write_index(index, "law_faiss.index")

# Save metadata
with open("law_metadata.json", "w", encoding="utf-8") as f:
    json.dump(documents, f, ensure_ascii=False, indent=4)

print("Scraping and FAISS indexing complete!")

# Function to retrieve sections and generate procedure
def get_legal_procedure(grievance):
    # Convert grievance to embedding
    query_embedding = model.encode(grievance).astype(np.float32)

    # Search FAISS index
    D, I = index.search(np.array([query_embedding]), k=3)  # Retrieve top 3 relevant sections

    retrieved_sections = [documents[i] for i in I[0] if i != -1]

    # Construct prompt for Llama
    prompt = """
    A legal grievance has been reported: {grievance}.
    Based on the legal code, the following sections are relevant:

    {sections}

    Please generate a step-by-step procedure explaining what actions should be taken according to these legal sections.

    Your response should be clear, actionable, and reference the sections where necessary.
    """.format(
        grievance=grievance,
        sections="\n\n".join([f"Section from {s['law_type']}, Chapter {s['chapter_number']}: {s['chapter_title']}\n{s['content']}" for s in retrieved_sections])
    )

    # Generate response using Hugging Face pipeline
    response = llama_pipeline(prompt, max_length=4096, do_sample=True, temperature=0.7)[0]["generated_text"]

    return {
        "procedure": response,
        "retrieved_sections": retrieved_sections
    }

# Example usage
grievance = "A person was assaulted in a public place. What should be done?"
result = get_legal_procedure(grievance)
print(json.dumps(result, indent=4, ensure_ascii=False))

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]