In [None]:
# Install necessary libraries
!pip install pandas faiss-cpu sentence-transformers transformers --quiet

import pandas as pd
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from google.colab import files



# Load your IPC dataset
df = pd.read_csv("/content/ipc_sections.csv")  # Automatically picks the uploaded file

# Extract Columns
descriptions = df['Description'].tolist()
sections = df['Section'].tolist()
offenses = df['Offense'].tolist()
punishments = df['Punishment'].tolist()

# Load SBERT Model for Embeddings
print("Loading sentence transformer model...")
embedder = SentenceTransformer("sentence-transformers/paraphrase-mpnet-base-v2")

# Create Embeddings
print("Creating embeddings...")
embeddings = embedder.encode(descriptions, convert_to_numpy=True)

# Build FAISS Index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Load Free LLM (No API Key)
print("Loading legal text generation model...")
model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
llm_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
generator = pipeline("text2text-generation", model=llm_model, tokenizer=tokenizer)

# Function to search IPC Sections
def find_ipc_section(case_description, k=3):
    query_embedding = embedder.encode([case_description], convert_to_numpy=True)
    D, I = index.search(query_embedding, k)
    results = []
    for i in range(k):
        sec_num = sections[I[0][i]]
        sec_desc = descriptions[I[0][i]]
        offense = offenses[I[0][i]]
        punishment = punishments[I[0][i]]
        results.append((sec_num, offense, punishment, sec_desc, D[0][i]))
    return results

# Function to Generate Legal Document with formatted IPC sections
def generate_legal_doc(case_input, retrieved_sections):
    # Header
    legal_doc = "LEGAL CASE ANALYSIS REPORT\n\n"
    legal_doc += "="*50 + "\n\n"

    # Case Summary
    legal_doc += "CASE DESCRIPTION:\n"
    legal_doc += f"{case_input}\n\n"
    legal_doc += "="*50 + "\n\n"

    # Applicable IPC Sections
    legal_doc += "APPLICABLE IPC SECTIONS:\n\n"
    for i, (sec, off, pun, desc, _) in enumerate(retrieved_sections, 1):
        legal_doc += f"{i}. Section {sec}\n"
        legal_doc += f"   - Offense: {off}\n"
        legal_doc += f"   - Punishment: {pun}\n"
        legal_doc += f"   - Description: {desc}\n\n"

    legal_doc += "="*50 + "\n\n"

    # Legal Analysis prompt
    prompt = f"""
    Based on the case description: "{case_input}"

    And the following applicable IPC sections:
    {[sec for sec, _, _, _, _ in retrieved_sections]}

    Provide a detailed legal analysis that:
    1. Explains how each IPC section applies to the case facts
    2. Discusses potential defenses
    3. Analyzes the severity of the offenses
    4. Estimates likely legal outcomes

    Write in professional legal language suitable for court documentation.
    """

    # Generate analysis
    analysis = generator(prompt, max_length=1024, do_sample=True, temperature=0.7)[0]['generated_text']

    legal_doc += "LEGAL ANALYSIS:\n\n"
    legal_doc += analysis + "\n\n"
    legal_doc += "="*50 + "\n\n"

    # Conclusion
    legal_doc += "CONCLUSION:\n"
    legal_doc += "Based on the above analysis, this case appears to involve violations of "
    legal_doc += ", ".join([f"Section {sec}" for sec, _, _, _, _ in retrieved_sections[:-1]])
    if len(retrieved_sections) > 1:
        legal_doc += f", and Section {retrieved_sections[-1][0]}"
    else:
        legal_doc += f"Section {retrieved_sections[0][0]}"
    legal_doc += " of the Indian Penal Code.\n\n"

    return legal_doc

# Main execution
print("\n" + "="*50)
print("IPC SECTION FINDER AND LEGAL DOCUMENT GENERATOR")
print("="*50 + "\n")

case_input = input("Enter the case details: ")

print("\nSearching for relevant IPC sections...")
retrieved = find_ipc_section(case_input, k=3)

print("\n" + "="*50)
print("RETRIEVED IPC SECTIONS:")
print("="*50)
for i, (sec, off, pun, desc, dist) in enumerate(retrieved, 1):
    print(f"\n{i}. Section {sec}")
    print(f"   Offense: {off}")
    print(f"   Punishment: {pun}")
    print(f"   Description: {desc}")

print("\n" + "="*50)
print("GENERATING LEGAL DOCUMENT...")
print("="*50 + "\n")

legal_document = generate_legal_doc(case_input, retrieved)
print(legal_document)

# Option to save the document
save_option = input("\nWould you like to save this document? (yes/no): ")
if save_option.lower() == 'yes':
    filename = input("Enter filename (without extension): ") + ".txt"
    with open(filename, 'w') as f:
        f.write(legal_document)
    print(f"Document saved as {filename}")