In [None]:
!pip install faiss-cpu

In [None]:
!pip install openai==0.28


In [None]:
# Install Google Generative AI library
!pip install google-generativeai

# Import the library
import google.generativeai as genai

# Configure with your Google API key
GOOGLE_API_KEY = ""
genai.configure(api_key=GOOGLE_API_KEY)

# Create the model
model = genai.GenerativeModel("gemini-1.5-flash")

# Generate text
prompt = "Write a short poem about AI and web development."
response = model.generate_content(prompt)

# Print the output
print(response.text)

In [None]:
import os
import json
from typing import List, Dict, Any
from dataclasses import dataclass

from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
from tqdm import tqdm


# Optional OpenAI import (used if OPENAI_API_KEY is set)
try:
    import openai
    OPENAI_AVAILABLE = True
except Exception:
    OPENAI_AVAILABLE = False

# CELL: Simple precedent dataset
# Keep the dataset small for speed. In real usage, you'd index many case texts.
PRECEDENTS = [
    {
        "id": "SC-2017-RightToPrivacy",
        "title": "Justice X v. Union of India (2017) - Right to Privacy",
        "summary": (
            "In 2017, the Supreme Court held that the Right to Privacy is a fundamental right under Article 21. "
            "The judgment emphasized dignity, informational privacy, and the need to balance state interests against personal liberties. "
            "The Court outlined a proportionality test and warned against unchecked state surveillance."
        ),
        "year": 2017,
        "court": "Supreme Court"
    },
    {
        "id": "HC-2019-DataRetention",
        "title": "A. v. State (2019) - Data Retention Limits",
        "summary": (
            "High Court struck down a broad data retention law that required indefinite storage of telecom records. "
            "Court held that retention must be limited in scope, time and purpose; safeguards and oversight required."
        ),
        "year": 2019,
        "court": "High Court"
    },
    {
        "id": "SC-2022-BiometricGuidelines",
        "title": "R. v. Union (2022) - Biometric Guidelines clarified",
        "summary": (
            "Supreme Court clarified that biometric collection for targeted welfare schemes may be permissible with consent and safeguards. "
            "However, mandatory universal biometric databases without judicial oversight were viewed skeptically."
        ),
        "year": 2022,
        "court": "Supreme Court"
    }
]

# CELL: Embeddings + FAISS index builder
EMBED_MODEL_NAME = 'all-MiniLM-L6-v2'  # compact and fast

class Retriever:
    def __init__(self, precedents: List[Dict[str, Any]], model_name: str = EMBED_MODEL_NAME):
        self.precedents = precedents
        self.model = SentenceTransformer(model_name)
        self.ids = [p['id'] for p in precedents]
        # Build embeddings
        texts = [p['summary'] for p in precedents]
        print('Computing embeddings for precedents...')
        self.embeddings = self.model.encode(texts, show_progress_bar=True, convert_to_numpy=True)
        # Normalize
        faiss.normalize_L2(self.embeddings)
        d = self.embeddings.shape[1]
        self.index = faiss.IndexFlatIP(d)  # inner product -> cosine when normalized
        self.index.add(self.embeddings)

    def retrieve(self, query: str, k: int = 3):
        q_emb = self.model.encode([query], convert_to_numpy=True)
        faiss.normalize_L2(q_emb)
        D, I = self.index.search(q_emb, k)
        results = []
        for idx in I[0]:
            if idx < 0 or idx >= len(self.precedents):
                continue
            results.append(self.precedents[idx])
        return results

# CELL: Simple agentic pipeline
@dataclass
class Evidence:
    precedent_id: str
    text: str
    year: int
    court: str

class LegalAgent:
    def __init__(self, retriever: Retriever):
        self.retriever = retriever

    def evidence_check(self, query: str) -> List[Evidence]:
        # Retrieve and map to Evidence objects
        docs = self.retriever.retrieve(query, k=3)
        return [Evidence(precedent_id=d['id'], text=d['summary'], year=d['year'], court=d['court']) for d in docs]

    def call_llm(self, prompt: str, max_tokens: int = 512) -> str:
        # Use OpenAI if available and key set, else fallback to simulated reasoner
        key = os.environ.get('OPENAI_API_KEY')
        if key and OPENAI_AVAILABLE:
            openai.api_key = key
            resp = openai.ChatCompletion.create(
                model='gpt-4o-mini' if 'gpt-4o-mini' in openai.Model.list() else 'gpt-4o',
                messages=[{"role": "system", "content": "You are a legal reasoning assistant."},
                          {"role": "user", "content": prompt}],
                max_tokens=max_tokens,
                temperature=0.2
            )
            txt = resp['choices'][0]['message']['content']
            return txt
        else:
            # Simulated LLM: produce a structured output using templates (fast & offline)
            return simulated_reasoner(prompt)

    def run(self, case_description: str) -> Dict[str, Any]:
        # Agent steps:
        # 1. Retrieve precedents
        evidence = self.evidence_check(case_description)
        # 2. Build prompt with retrieved evidence and instructions for multi-step reasoning
        prompt = build_prompt(case_description, evidence)
        # 3. Call LLM
        llm_out = self.call_llm(prompt)
        # 4. Parse output (we'll keep it simple and return raw)
        return {
            'evidence': [e.__dict__ for e in evidence],
            'prompt': prompt,
            'llm_output': llm_out
        }

# CELL: Prompt builder and simulated reasoner

def build_prompt(case_description: str, evidence: List[Evidence]) -> str:
    evid_text = "\n\n".join([f"[{e.precedent_id}] {e.text} ({e.court}, {e.year})" for e in evidence])
    prompt = (
        "You are asked to reason about a legal case.\n"
        "Case description:\n" + case_description + "\n\n"
        "Relevant precedents:\n" + evid_text + "\n\n"
        "Task:\n"
        "1) Summarize the most relevant points of law from the precedents.\n"
        "2) Provide legal arguments in favor of the petitioner (privacy breach).\n"
        "3) Provide legal arguments in favor of the government (state interest).\n"
        "4) Assess the likely verdict and explain the reasoning, using a proportionality test if appropriate.\n"
        "5) List any recommended safeguards or modifications to the policy to make it constitutional.\n\n"
        "Answer in numbered sections. Be concise and cite precedents by their id when used."
    )
    return prompt


def simulated_reasoner(prompt: str) -> str:
    # A crude template-based responder: extracts keywords and crafts answers.
    # This is only a fallback to make the notebook runnable without an API key.
    # It does NOT match an LLM's depth but is useful for demos.
    out = []
    out.append("1) Precedent summary:\n- 2017 Right to Privacy: Privacy is fundamental under Article 21; proportionality required.\n- 2019 Data Retention: retention limited by scope/time/purpose.\n- 2022 Biometric Guidelines: biometric collection may be permissible with consent & safeguards, but universal mandatory databases are risky.")
    out.append("\n2) Arguments for petitioner:\n- Mandatory biometric submission is an intrusion into informational privacy (cite SC-2017-RightToPrivacy).\n- Policy fails proportionality: it's overbroad and not the least restrictive means.\n- Lack of sufficient safeguards and oversight (cite HC-2019-DataRetention).")
    out.append("\n3) Arguments for government:\n- Biometric data improves efficient delivery of services and reduces fraud.\n- If policy contains strong safeguards, oversight and limited retention, it could be constitutional in part (cite SC-2022-BiometricGuidelines).\n- State interest in public welfare and efficient administration is legitimate.")
    out.append("\n4) Likely verdict:\n- Court may strike down or read-down the policy as applied universally and mandatorily. A permissible variant would be: limited scope, explicit safeguards, independent oversight, and retention limits.\n- The proportionality test likely favors privacy unless government demonstrates strict necessity and narrow tailoring.")
    out.append("\n5) Suggested safeguards:\n- Purpose limitation, data minimization, retention schedules, independent oversight, encryption and breach notifications, opt-out/consent where feasible, judicial review." )
    return "\n\n".join(out)

# CELL: Example usage
if __name__ == '__main__':
    print('Initializing retriever and agent...')
    retr = Retriever(PRECEDENTS)
    agent = LegalAgent(retr)

    case_2025 = (
        "In 2025, the government issues a policy mandating that all citizens must submit biometric data (fingerprints and iris scans) "
        "to access public services such as welfare disbursement and government IDs. The petitioner claims this violates their fundamental right to privacy under Article 21."
    )

    result = agent.run(case_2025)

    print('\n===== Evidence retrieved =====')
    for e in result['evidence']:
        print(f"- {e['precedent_id']}: {e['text'][:140]}...")

    print('\n===== LLM Output =====')
    print(result['llm_output'])

    print('\n===== Notes =====')
    print('If you want more thorough legal-style outputs, set your OPENAI_API_KEY and re-run.\n')
    print('To save outputs to files or push to GitHub, you can write result to JSON and commit to your repo.')

    # Example: save result
    with open('hannoworks_result_example.json', 'w', encoding='utf-8') as f:
        json.dump(result, f, indent=2)

    print('Saved example output to hannoworks_result_example.json')