In [None]:
import google.generativeai as genai
import json
import os

genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

def generate_final_outputs(english_file, checklist_file, output_json_path):
    # Load the English content
    with open(english_file, "r", encoding="utf-8") as f:
        case_content = f.read()

    # Load static checklist config (Requirement 19 & 20)
    with open(checklist_file, "r", encoding="utf-8") as f:
        checklists = json.load(f)

    model = genai.GenerativeModel('gemini-2.5-flash')

    # Structured Output Prompting (Requirement 31)
    prompt = f"""
    You are a Legal AI Assistant. Analyze the provided English Case Diary text and map it to the Checklist Schema.
    
    Case Diary Text:
    {case_content}
    
    Checklist Schema:
    {json.dumps(checklists)}
    
    TASK:
    Generate a JSON response with exactly three keys: 'Output_A', 'Output_B', and 'Output_C'.
    
    Output_A (Summary): Extract FIR number, date, Police Station, Accused/Victim names, incident facts, and Legal Sections.
    Output_B (Classification): Identify the crime type from the schema. If no sections match, return {{"crime_type": "UNKNOWN", "reason": "No matching sections found"}}.
    Output_C (Checklist): For the detected crime type, evaluate every 'required_item'. 
    - Use '✅ PRESENT' with a brief detail if found.
    - Use '❌ MISSING' if not found.
    - Use '⚠ PARTIAL' if referenced but incomplete, stating what is missing.
    
    STRICT REQUIREMENT: Return ONLY valid JSON.
    """

    try:
        response = model.generate_content(prompt)
        # Clean potential markdown formatting from LLM response
        json_str = response.text.replace('```json', '').replace('```', '').strip()
        final_output = json.loads(json_str)

        # Store as JSON file (Requirement 10)
        with open(output_json_path, "w", encoding="utf-8") as f:
            json.dump(final_output, f, indent=4, ensure_ascii=False)
        
        print(f"Success! Final structured report saved to {output_json_path}")
        return final_output

    except Exception as e:
        print(f"Error in Stage 1 pipeline: {e}")
        return None

# Execute
generate_final_outputs("full_case_distinct_en.txt", "checklists.json", "stage1_output.json")

Success! Final structured report saved to final_hackathon_report.json


{'Output_A': {'FIR_Number': '999/2020',
  'Date': '12/07/2020',
  'Police_Station': 'GHI',
  'Accused_Details': 'Unknown thieves',
  'Victim_Details': 'XYZ, aged 28 years, son of ABC, resident of village DEF, Police Station GHI, District JKL',
  'Incident_Facts': "On 12/07/2020, complainant XYZ discovered a theft in his mobile shop 'Maa Saraswati Telecom' located at LC 14 Railway Gate, Near ZZ Market. Thieves entered by opening the asbestos roof. Stolen items include Rs. 8000 cash, two new mobile phones (IMEI 000000000000000, 111111111111111, 222222222222222, 333333333333333), and five old mobile phones given for repair.",
  'Legal_Sections': 'Sections 379/461 IPC'},
 'Output_B': {'crime_type': 'theft_robbery',
  'reason': "Sections 379/461 IPC directly match 'theft_robbery' typical sections (IPC 379)."},
 'Output_C': {'theft_robbery': {'FIR number and date': '✅ PRESENT - FIR No. 999/2020, dated 12/07/2020.',
   'Police station name': '✅ PRESENT - GHI Police Station.',
   'Place and ti