In [None]:
# CELL 1 — Install (run once)
!pip install -q sentence-transformers faiss-cpu huggingface-hub transformers accelerate bitsandbytes
print("✓ Install complete")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m78.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25h✓ Install complete


In [None]:
# CELL 1: Imports and device
import os, re, json, random, math
from datetime import datetime
from collections import defaultdict
from typing import List, Dict, Any, Tuple


import numpy as np
import pandas as pd


import faiss
import torch
from sentence_transformers import SentenceTransformer
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM


DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'✓ Using device: {DEVICE}')

✓ Using device: cuda


In [None]:
# %%
# CELL 2: Configurable settings
EMBED_MODEL = 'bhavyagiri/InLegal-Sbert'
ZERO_SHOT_MODEL = 'law-ai/InCaseLawBERT'
NER_MODEL = 'law-ai/InLegalBERT'


LLM_PREFERRED = [
    "soketlabs/pragna-1b",
    "google/flan-t5-large",
]

USE_LLM = True

In [None]:
# CELL 3 — Load Models (Legal-LLaMA-1B Enabled)

from transformers import AutoTokenizer, AutoModelForCausalLM

print('Loading embedding model...')
embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)
print('✓ Embedder loaded.')

# ZERO-SHOT CLASSIFIER
try:
    print('Loading zero-shot classifier...')
    zsc = pipeline('zero-shot-classification', model=ZERO_SHOT_MODEL, device=0 if DEVICE=='cuda' else -1)
    print('✓ Zero-shot classifier ready.')
except:
    zsc = None
    print('⚠ Zero-shot classifier unavailable.')

# NER
try:
    print('Loading NER...')
    ner_pipe = pipeline('ner', model=NER_MODEL, aggregation_strategy='simple', device=0 if DEVICE=='cuda' else -1)
    print('✓ NER ready.')
except:
    ner_pipe = None
    print('⚠ NER unavailable, regex fallback enabled.')

# LLM (Legal Reasoning Model)
HAS_LLM = False
LLM_MODEL, LLM_TOKENIZER, LLM_NAME = None, None, None

if USE_LLM:
    for mname in LLM_PREFERRED:
        try:
            print(f'Attempting LLM load: {mname}')
            LLM_TOKENIZER = AutoTokenizer.from_pretrained(mname)
            LLM_MODEL = AutoModelForCausalLM.from_pretrained(
                mname,
                torch_dtype=torch.float32,
                device_map=None
            ).to(DEVICE)
            HAS_LLM = True
            LLM_NAME = mname
            print(f'✅ Loaded Legal Reasoning LLM: {mname}')
            break
        except Exception as e:
            print(f'❌ Failed: {mname} — {str(e)[:200]}')

if not HAS_LLM:
    print("⚠ No legal reasoning LLM loaded — using deterministic analysis fallback.")


Loading embedding model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/341 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/114 [00:00<?, ?B/s]

2_Dense/pytorch_model.bin:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

✓ Embedder loaded.
Loading zero-shot classifier...


config.json:   0%|          | 0.00/861 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/534M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at law-ai/InCaseLawBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/343 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/534M [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cuda:0
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


✓ Zero-shot classifier ready.
Loading NER...


config.json:   0%|          | 0.00/671 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/534M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/534M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at law-ai/InLegalBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/516 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cuda:0


✓ NER ready.
Attempting LLM load: soketlabs/pragna-1b


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/678 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/2.51G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/209 [00:00<?, ?B/s]

✅ Loaded Legal Reasoning LLM: soketlabs/pragna-1b


In [None]:
# %%
# CELL 4 — Load Legal Q&A Dataset (Kaggle-Optimized, Always Fresh)

from huggingface_hub import snapshot_download
import os, pandas as pd, json, glob

# 🔧 Change this if you use your own dataset repo
REPO_ID = "Techmaestro369/indian-legal-texts-finetuning"

# Create cache dir (Kaggle: /kaggle/working/ is writable)
DATA_DIR = "./legal_data_cache"
os.makedirs(DATA_DIR, exist_ok=True)

print(f"⬇️  Downloading dataset snapshot from {REPO_ID} ...")
DATA_PATH = snapshot_download(
    repo_id=REPO_ID,
    repo_type="dataset",
    local_dir=DATA_DIR,
    local_dir_use_symlinks=False,
    revision="main"   # ensure latest
)
print("✓ Dataset snapshot downloaded")

# Detect .json files automatically
json_files = glob.glob(os.path.join(DATA_PATH, "*.json"))
if not json_files:
    raise FileNotFoundError("No .json files found in the downloaded dataset.")

documents = []
for path in json_files:
    fname = os.path.basename(path)
    try:
        df = pd.read_json(path, lines=False)
        df["source"] = fname.replace(".json", "")
        for _, row in df.iterrows():
            documents.append({
                "id": len(documents),
                "question": str(row.get("question", ""))[:1500],
                "answer": str(row.get("answer", ""))[:8000],
                "text": f"Q: {row.get('question', '')}\nA: {row.get('answer', '')}",
                "source": row.get("source", fname.replace(".json", ""))
            })
        print(f"✓ Loaded {fname}: {len(df)} pairs")
    except Exception as e:
        print(f"⚠️  Error reading {fname}: {e}")

print(f"✅ Total documents prepared: {len(documents)}")

# Optional quick check
if len(documents) > 0:
    print("Sample document:", documents[0])
else:
    print("⚠️ No documents loaded — check repo or format.")


⬇️  Downloading dataset snapshot from Techmaestro369/indian-legal-texts-finetuning ...


For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

constitution_qa.json: 0.00B [00:00, ?B/s]

ipc_qa.json: 0.00B [00:00, ?B/s]

README.md: 0.00B [00:00, ?B/s]

crpc_qa.json: 0.00B [00:00, ?B/s]

dataset_card.json:   0%|          | 0.00/873 [00:00<?, ?B/s]

.gitattributes: 0.00B [00:00, ?B/s]

✓ Dataset snapshot downloaded
✓ Loaded constitution_qa.json: 4082 pairs
⚠️  Error reading dataset_card.json: All arrays must be of the same length
✓ Loaded ipc_qa.json: 2267 pairs
✓ Loaded crpc_qa.json: 8194 pairs
✅ Total documents prepared: 14543
Sample document: {'id': 0, 'question': 'What is India according to the Union and its Territory?', 'answer': 'India, that is Bharat, shall be a Union of States.', 'text': 'Q: What is India according to the Union and its Territory?\nA: India, that is Bharat, shall be a Union of States.', 'source': 'constitution_qa'}


In [None]:
# CELL 5 — FAISS

if len(documents) > 0:
    print('Building FAISS index...')
    texts = [d['text'][:256] for d in documents]   # ✅ RAM Safe
    embeddings = embedder.encode(texts, show_progress_bar=True, convert_to_numpy=True).astype('float32')

    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)
    print(f'✓ Vector DB ready. {len(documents)} docs indexed (dim={dim})')

else:
    index = None


Building FAISS index...


Batches:   0%|          | 0/455 [00:00<?, ?it/s]

✓ Vector DB ready. 14543 docs indexed (dim=768)


In [None]:
# %%
# CELL 6: KnowledgeGraph
from collections import defaultdict

class KnowledgeGraph:
    def __init__(self):
        self.entities = defaultdict(list)
        self.relations = []
        self.context = {}
    def add_entity(self, entity_type, value):
        if value and value.strip() and value not in self.entities[entity_type]:
            self.entities[entity_type].append(value)
    def add_relation(self, subject, relation, obj):
        self.relations.append({'subject': subject, 'relation': relation, 'object': obj})
    def set_context(self, key, value):
        self.context[key] = value
    def to_dict(self):
        return {'entities': dict(self.entities), 'relations': self.relations, 'context': self.context}
    def get_summary(self):
        parts = []
        for etype, vals in self.entities.items():
            if vals:
                parts.append(f"{etype.upper()}: {', '.join(vals[:3])}")
        if self.context.get('situation'):
            parts.append(f"SITUATION: {self.context['situation'][:200]}")
        return ' | '.join(parts) if parts else 'Knowledge graph empty'

print('✓ KnowledgeGraph ready')

✓ KnowledgeGraph ready


In [None]:
# %%
# CELL 8: EntityExtractionAgent
class EntityExtractionAgent:
    def extract(self, text: str):
        out = {'dates': [], 'locations': [], 'values': [], 'items': [], 'parties': []}
        if ner_pipe is not None:
            try:
                ents = ner_pipe(text)
                for e in ents:
                    lab = e.get('entity_group') or e.get('entity')
                    tok = e.get('word') or e.get('entity')
                    if not tok: continue
                    tok = tok.strip()
                    if lab in ['PER']:
                        out['parties'].append(tok)
                    elif lab in ['ORG','LOC']:
                        out['locations'].append(tok)
                    elif lab in ['MISC']:
                        out['items'].append(tok)
            except Exception as e:
                print('NER pipeline error, regex fallback:', e)

        out['dates'] = list(dict.fromkeys(out['dates'] + re.findall(r'\b\d{1,2}[\-/]\d{1,2}[\-/]\d{2,4}\b', text)))[:3]
        months = re.findall(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2}(?:,\s*\d{4})?', text, flags=re.I)
        out['dates'] += months
        out['values'] = re.findall(r'\b(?:Rs\.?|₹)\s*[\d,]+\b', text)
        caps = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,2}\b', text)
        out['locations'] += caps
        keywords = ['phone','laptop','car','house','land','jewelry','money','document','agreement','FIR','complaint']
        out['items'] += [kw for kw in keywords if kw.lower() in text.lower()]
        return {k:list(dict.fromkeys(v))[:6] for k,v in out.items()}

print('✓ EntityExtractionAgent ready')

✓ EntityExtractionAgent ready


In [None]:
# %%
# CELL 9: ResearchAgent
class ResearchAgent:
    def __init__(self, documents, index, embedder):
        self.documents = documents
        self.index = index
        self.embedder = embedder
    def research(self, query: str, top_k=6):
        if self.index is None or len(self.documents)==0:
            return [], []
        qvec = self.embedder.encode([query], convert_to_numpy=True).astype('float32')
        distances, indices = self.index.search(qvec, top_k)
        results, dists = [], []
        for pos, idx in enumerate(indices[0]):
            if idx<0 or idx>=len(self.documents): continue
            doc = self.documents[idx].copy()
            doc['excerpt'] = (doc.get('answer') or doc.get('text') or '')[:800].replace('\n',' ')
            results.append(doc)
            dists.append(float(distances[0][pos]))
        return results, dists

print('✓ ResearchAgent ready')

✓ ResearchAgent ready


In [35]:
# ============================================================
# ANALYSIS AGENT v4 — DETAILED + ANTI-REPETITION VERSION
# ============================================================

import torch, re

class AnalysisAgent:
    def __init__(self, has_llm: bool, max_tokens: int = 950):
        self.has_llm = has_llm and HAS_LLM and LLM_MODEL is not None
        self.max_tokens = max_tokens

    def analyze(self, case_type: str, kg: KnowledgeGraph, docs, dists):
        situation = kg.context.get('situation', '')
        subtype = kg.context.get('criminal_subtype', None)
        facts = kg.get_summary()

        fact_text = "\n".join(
            [f"{k.upper()}: {', '.join(v)}" for k, v in kg.entities.items() if len(v)]
        )

        law_text = "\n".join([
            re.sub(r"\s+", " ", f"[{i+1}] {d['excerpt'][:350]}") for i, d in enumerate(docs[:3])
        ])

        # Structured prompt with stop markers
        prompt = f"""
You are a Senior Indian Criminal Lawyer with 25 years of trial and appellate experience.
Analyze the situation below with step-by-step reasoning.
Provide a detailed professional legal analysis as if writing an internal brief.

---
### CLIENT STATEMENT:
{situation}

### FACTS EXTRACTED:
{fact_text or 'None'}

### RELEVANT LEGAL MATERIAL:
{law_text or 'None'}

### CASE TYPE: {case_type.upper()}
### SUBTYPE: {subtype or 'UNKNOWN'}
---

Write the analysis using the following explicit structure and stop after "END OF REPORT":

<SECTION 1: SUMMARY OF INCIDENT>
Describe exactly what happened — who, what, when, where, and how — in clear terms.

<SECTION 2: LEGAL CHARACTERIZATION>
Explain which offences apply, mention relevant IPC/CrPC sections, and justify each.

<SECTION 3: LEGAL REASONING>
Explain logically how the facts satisfy the elements of the law (actus reus + mens rea if criminal).

<SECTION 4: PROSECUTION & DEFENCE>
Summarize the likely arguments from both sides and weaknesses in each.

<SECTION 5: EVIDENCE & STRATEGY>
Advise on evidence needed, investigative steps, and risks.

<SECTION 6: CONCLUSION>
Provide the senior lawyer’s final opinion and recommendations.

END OF REPORT
"""

        if not self.has_llm:
            return {"report": f"[NO LLM AVAILABLE]\nFacts:\n{facts}"}

        inputs = LLM_TOKENIZER(prompt, return_tensors="pt", truncation=True).to(DEVICE)
        with torch.no_grad():
            output = LLM_MODEL.generate(
                **inputs,
                max_new_tokens=self.max_tokens,
                temperature=0.55,
                top_p=0.9,
                do_sample=True,
                eos_token_id=LLM_TOKENIZER.eos_token_id,
                pad_token_id=LLM_TOKENIZER.eos_token_id,
            )

        final = LLM_TOKENIZER.decode(output[0], skip_special_tokens=True)
        final = final.replace(prompt.strip(), "").strip()

        # --- CLEANUP PHASE ---
        # Stop at END OF REPORT
        if "END OF REPORT" in final:
            final = final.split("END OF REPORT")[0]

        # Remove repeated paragraphs
        lines = final.splitlines()
        seen, cleaned = set(), []
        for line in lines:
            text = line.strip()
            if text and text not in seen:
                cleaned.append(text)
                seen.add(text)
        final = "\n".join(cleaned)

        # Remove stray model echoes
        final = re.sub(r"(<SECTION.*?>)", r"\n\1", final)
        final = re.sub(r"You are a Senior.*?experience\.", "", final, flags=re.I)

        return {"report": final.strip()}

print("✓ AnalysisAgent (v4) — detailed + anti-repetition loaded successfully.")


✓ AnalysisAgent (v4) — detailed + anti-repetition loaded successfully.


In [33]:
# AGENTIC SELF-CORRECTING CLASSIFIER
class SmartClassifierAgent:
    """Corrects itself based on user responses and knowledge graph"""
    def __init__(self):
        self.case_keywords = {
            'criminal': ['theft','stolen','robbery','assault','murder','rape','dacoity','fir','police','crime','burglar','pickpocket','extortion','blackmail'],
            'family': ['divorce','marriage','custody','alimony','maintenance','dowry','wife','husband','domestic','separation','child','guardian'],
            'property': ['land','boundary','inheritance','encroachment','tenant','landlord','eviction','deed','title','plot','mutation'],
            'contract': ['agreement','breach','contract','payment','outstanding','invoice','debt','loan','delivery','default']
        }

    def initial_classify(self, query: str) -> tuple:
        """First pass classification"""
        q = query.lower()
        scores = {}
        for case_type, keywords in self.case_keywords.items():
            match_count = sum(1 for kw in keywords if kw in q)
            scores[case_type] = match_count

        if max(scores.values()) == 0:
            return 'general', 0.5

        best_type = max(scores, key=scores.get)
        confidence = min(scores[best_type] / 5.0, 1.0)  # Max 5 keywords per category
        return best_type, confidence

    def reconsider(self, initial_type: str, kg: KnowledgeGraph) -> tuple:
        """Reconsider classification based on accumulated facts"""
        entities_text = ' '.join([str(v) for vals in kg.entities.values() for v in vals]).lower()
        situation = kg.context.get('situation', '').lower()
        combined = (entities_text + ' ' + situation).lower()

        scores = {}
        for case_type, keywords in self.case_keywords.items():
            match_count = sum(1 for kw in keywords if kw in combined)
            scores[case_type] = match_count

        best_type = max(scores, key=scores.get) if max(scores.values()) > 0 else initial_type

        # If we found a better match, flag it
        if best_type != initial_type and scores[best_type] > scores[initial_type]:
            return best_type, True  # Returns (case_type, was_corrected)

        return initial_type, False


# --- CRIMINAL-AWARE QUESTION GENERATOR (Corrected) ---

class AdaptiveQuestionGenerator:

    OFFENSE_KEYWORDS = {
        'murder':  ['murder','killed','homicide','stabbed','shot','strangled','burned','ipc 302','302'],
        'theft':   ['theft','stolen','pickpocket','burglary','phone was stolen','ipc 379','379'],
        'robbery': ['robbery','dacoity','snatched','armed','weapon','force','ipc 392','392','395','396'],
        'assault': ['assault','beat','injury','attack','fight','ipc 323','324','325','326'],
    }

    QUESTION_TEMPLATES = {
        'murder': {
            'time':       ['When did the incident occur?', 'Approximate time of death known?'],
            'location':   ['Where did the incident happen?', 'Where was the body found?'],
            'relationship':['What was the relationship between accused and victim?', 'Any prior dispute?'],
            'weapon':     ['What weapon was used? (knife/firearm/blunt object)', 'Was the weapon recovered?'],
            'evidence':   ['Post-mortem report available?', 'Any CCTV or eyewitnesses?'],
            'police':     ['FIR filed? Which police station?', 'Any arrests made?'],
        },
        'robbery': {
            'time':     ['When did the robbery occur?'],
            'location': ['Where exactly did it happen? (street / shop / home)'],
            'force':    ['Was a weapon or threat used?', 'Any injuries?'],
            'property': ['What items or money were taken?'],
            'evidence': ['Any CCTV or witnesses?', 'Police informed? FIR filed?']
        },
        'theft': {
            'time':     ['When was the item last seen?', 'When did you notice it missing?'],
            'location': ['Where was the theft location?'],
            'property': ['What item was stolen? (model/serial/IMEI)'],
            'evidence': ['CCTV or witnesses?', 'Proof of ownership available?'],
            'police':   ['FIR filed? Which station?']
        },
        'assault': {
            'time':     ['When did the assault occur?'],
            'location': ['Where did it happen?'],
            'injury':   ['What injuries occurred? Medical report available?'],
            'cause':    ['Was there a dispute or trigger?'],
            'evidence': ['CCTV or witnesses?', 'Any hospital or police report?']
        }
    }

    def detect_crime_subtype(self, text: str) -> str:
        t = text.lower()
        best = 'theft'
        best_score = 0
        for subtype, keywords in self.OFFENSE_KEYWORDS.items():
            score = sum(1 for kw in keywords if kw in t)
            if score > best_score:
                best = subtype; best_score = score
        return best

    def generate_next(self, case_type: str, kg, asked):
        situation = kg.context.get('situation', '')
        subtype = kg.context.get('criminal_subtype') or self.detect_crime_subtype(situation)
        templates = self.QUESTION_TEMPLATES.get(subtype, self.QUESTION_TEMPLATES['theft'])

        for category, questions in templates.items():
          missing_questions = [q for q in questions if q not in asked]
          if missing_questions:
            return missing_questions[0];

        return None





In [30]:
# --- FULL AGENTIC LEGAL SYSTEM WITH SUBTYPE FIX ---

class AgenticLegalSystem:

    def __init__(self):
        self.classifier = SmartClassifierAgent()
        self.extractor  = EntityExtractionAgent()
        self.qgen       = AdaptiveQuestionGenerator()
        self.researcher = ResearchAgent(documents, index, embedder)
        self.analyzer   = AnalysisAgent(has_llm=USE_LLM)

    def run_consultation(self, initial_query, interactive=True, max_turns=3):

        print("\n" + "="*70)
        print("AGENTIC LEGAL CONSULTATION")
        print("="*70 + "\n")

        case_type, conf = self.classifier.initial_classify(initial_query)
        print(f"Initial Classification: {case_type} (confidence: {conf:.2f})")

        kg = KnowledgeGraph()
        kg.set_context('situation', initial_query)
        kg.set_context('case_type', case_type)

        # ✅ NEW — detect subtype immediately
        kg.set_context('criminal_subtype', self.qgen.detect_crime_subtype(initial_query))

        asked = []

        for turn in range(max_turns):
            print("\nKG:", kg.get_summary(), "\n")

            q = self.qgen.generate_next(case_type, kg, asked)
            if not q:
                break

            print(f"Agent: {q}")
            asked.append(q)

            if not interactive:
                break

            reply = input("You: ").strip()
            if not reply:
                continue

            ents = self.extractor.extract(reply)
            for et, vals in ents.items():
                for v in vals:
                    kg.add_entity(et, v)

            # ✅ UPDATE SUBTYPE AS FACTS GROW
            kg.set_context('criminal_subtype', self.qgen.detect_crime_subtype(
                kg.context.get('situation', '') + " " +
                " ".join([x for vals in kg.entities.values() for x in vals])
            ))

        print("\nInformation gathering complete.\n")

        summary = kg.get_summary()
        docs, dists = self.researcher.research(summary)

        print(f"Retrieved {len(docs)} documents.\n")

        result = self.analyzer.analyze(case_type, kg, docs, dists)
        print(result["report"])
        return result

legal_system = AgenticLegalSystem()
print("✓ System updated with criminal subtype intelligence.")


✓ System updated with criminal subtype intelligence.


In [36]:
print('='*70)
print(f'CHATLAW – AGENTIC LEGAL SYSTEM (LLM: {LLM_NAME if HAS_LLM else "DETERMINISTIC"})')
print('='*70+'\n')

while True:
    query = input("Your legal query (or 'quit'): ").strip()
    if not query:
        continue
    if query.lower() in ['quit','exit']:
        print('Goodbye.')
        break

    try:
        result = legal_system.run_consultation(query, interactive=True, max_turns=7)
        print('\\n' + result['report'] + '\\n')

        save = input('Save report? (y/n): ').strip().lower()
        if save == 'y':
            fname = f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
            with open(fname, 'w') as f:
                f.write(result['report'])
            print(f'✓ Saved to {fname}')
    except Exception as e:
        import traceback
        print(f'\\nError during analysis: {type(e).__name__}: {str(e)[:100]}')
        print('Traceback:')
        traceback.print_exc()
        print('Please try again with a clearer query.\\n')

CHATLAW – AGENTIC LEGAL SYSTEM (LLM: soketlabs/pragna-1b)

Your legal query (or 'quit'): robbery

AGENTIC LEGAL CONSULTATION

Initial Classification: criminal (confidence: 0.20)

KG: SITUATION: robbery 

Agent: When did the robbery occur?
You: yesterday

KG: SITUATION: robbery 

Agent: Where exactly did it happen? (street / shop / home)
You: home

KG: SITUATION: robbery 

Agent: Was a weapon or threat used?
You: yes

KG: SITUATION: robbery 

Agent: Any injuries?
You: yes

KG: SITUATION: robbery 

Agent: What items or money were taken?
You: money

KG: ITEMS: money | SITUATION: robbery 

Agent: Any CCTV or witnesses?
You: no

KG: ITEMS: money | SITUATION: robbery 

Agent: Police informed? FIR filed?
You: no

Information gathering complete.

Retrieved 6 documents.

**Summary of Incident:**

The accused robbed the victim of a sum of money. The victim's statement states that the accused approached him and demanded money. He refused to give the money and threatened to shoot him if he did not