In [None]:
SYSTEM_PROMPT = """
You are an expert Indian Legal Assistant. Your task is to analyze Hindi Police Case Diaries (Chargesheets).
You must provide a structured JSON response based strictly on the provided text.

OUTPUT A: Extract FIR details, Names, Facts, and Legal Sections.
OUTPUT B: Classify the crime into 'theft_robbery', 'assault_hurt', 'cyber_fraud', 'ndps', or 'UNKNOWN'.
OUTPUT C: Evaluate specific checklist items from the static config provided.

Return ONLY a JSON object with the keys: 'summary', 'classification', and 'checklist_evaluation'.
"""

def generate_extraction_prompt(clean_text, checklist_json):
    prompt = f"""
    Case Diary Text: {clean_text}
    
    Checklist Configuration (JSON): {checklist_json}
    
    Instructions:
    1. Identify the Crime Type by matching legal sections (e.g., IPC 379/461 matches theft_robbery).
    2. Summarize the incident in a comprehensive manner.
    3. For Output C, evaluate every 'required_item' in the checklist for the identified crime type. 
    4. Provide 'status' as 'PRESENT', 'MISSING', or 'PARTIAL' and include a brief 'detail' or 'reason'.
    """
    return prompt

In [None]:
import google.generativeai as genai
import json

def process_stage_1(clean_text):
    # Load static checklist config [cite: 19, 52]
    with open('checklists.json', 'r') as f:
        checklists = json.load(f)

    model = genai.GenerativeModel('gemini-1.5-flash')
    
    # Programmatic API Call [cite: 31, 47]
    response = model.generate_content([SYSTEM_PROMPT, generate_extraction_prompt(clean_text, checklists)])
    
    # Parse LLM JSON 
    data = json.loads(response.text)
    
    # Output A: Structured Case Summary 
    summary = data.get('summary')
    
    # Output B: Crime Classification 
    classification = data.get('classification')
    
    # Output C: Missing Items Checklist 
    checklist = data.get('checklist_evaluation')
    
    return summary, classification, checklist