In [None]:
!pip install transformers==4.30.0
!pip install torch torchvision torchaudio
!pip install sentence-transformers
!pip install PyPDF2
!pip install numpy pandas
!pip install scikit-learn
!pip install spacy
!pip install nltk

Collecting transformers==4.30.0
  Downloading transformers-4.30.0-py3-none-any.whl.metadata (113 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m113.6/113.6 kB[0m [31m737.4 kB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.30.0)
  Downloading tokenizers-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.30.0-py3-none-any.whl (7.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.21.1
    Uninstalling tokenizers-0.21.1:
      Successful

In [None]:
import torch
import numpy as np
import pandas as pd
import json
import re
import warnings
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from collections import defaultdict

# Transformers and NLP
from transformers import AutoTokenizer, AutoModel, pipeline
from sentence_transformers import SentenceTransformer
import nltk
import spacy

# File processing
import PyPDF2
import io
from google.colab import files

# ML and similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans

warnings.filterwarnings('ignore')

In [None]:
try:
    legal_tokenizer = AutoTokenizer.from_pretrained("law-ai/InLegalBERT")
    legal_model = AutoModel.from_pretrained("law-ai/InLegalBERT")
    print("✅ InLegalBERT loaded successfully")
except Exception as e:
    print(f"⚠️ InLegalBERT failed: {e}")
    # Fallback to BERT
    legal_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    legal_model = AutoModel.from_pretrained("bert-base-uncased")
    print("✅ Fallback BERT loaded")

# Load sentence transformer for semantic similarity
try:
    sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
    print("✅ Sentence transformer loaded")
except:
    print("⚠️ Sentence transformer failed")

# Load Q&A pipeline
try:
    qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
    print("✅ Q&A pipeline loaded")
except:
    print("⚠️ Q&A pipeline failed")

tokenizer_config.json:   0%|          | 0.00/516 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/222k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/671 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/534M [00:00<?, ?B/s]

✅ InLegalBERT loaded successfully


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/534M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Sentence transformer loaded


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/496M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

Device set to use cpu


✅ Q&A pipeline loaded


In [None]:

@dataclass
class QuestionType:
    """Question classification types"""
    DOCUMENT_SPECIFIC = "document_specific"
    LEGAL_KNOWLEDGE = "legal_knowledge"
    PROCEDURAL = "procedural"
    COMPARATIVE = "comparative"
    DEFINITION = "definition"

@dataclass
class AnswerResult:
    """Structure for Q&A results"""
    answer: str
    confidence: float
    source: str
    citations: List[str]
    question_type: str
    follow_ups: List[str]

class LegalKnowledgeBase:
    """Legal knowledge database"""

    def __init__(self):
        self.statutes = self._load_indian_statutes()
        self.landmark_cases = self._load_landmark_cases()
        self.legal_terms = self._load_legal_terms()
        self.procedures = self._load_court_procedures()
        print("✅ Legal Knowledge Base initialized")

    def _load_indian_statutes(self) -> Dict:
        """Load Indian legal statutes"""
        return {
            "ipc": {
                "name": "Indian Penal Code, 1860",
                "sections": {
                    "302": "Punishment for murder - Whoever commits murder shall be punished with death, or imprisonment for life, and shall also be liable to fine.",
                    "307": "Attempt to murder - Whoever does any act with such intention or knowledge, and under such circumstances that, if he by that act caused death, he would be guilty of murder.",
                    "376": "Punishment for rape - Whoever commits rape, shall be punished with rigorous imprisonment of either description for a term which shall not be less than ten years.",
                    "420": "Cheating and dishonestly inducing delivery of property - Whoever cheats and thereby dishonestly induces the person deceived to deliver any property."
                }
            },
            "constitution": {
                "name": "Constitution of India, 1950",
                "articles": {
                    "14": "Equality before law - The State shall not deny to any person equality before the law or the equal protection of the laws within the territory of India.",
                    "19": "Protection of certain rights regarding freedom of speech, etc.",
                    "21": "Protection of life and personal liberty - No person shall be deprived of his life or personal liberty except according to procedure established by law.",
                    "32": "Right to Constitutional Remedies - The right to move the Supreme Court by appropriate proceedings for the enforcement of fundamental rights is guaranteed."
                }
            },
            "crpc": {
                "name": "Code of Criminal Procedure, 1973",
                "sections": {
                    "154": "Information in cognizable cases - Every information relating to the commission of a cognizable offence, if given orally to an officer in charge of a police station, shall be reduced to writing by him or under his direction.",
                    "161": "Examination of witnesses by police - Any police officer making an investigation may examine orally any person supposed to be acquainted with the facts and circumstances of the case.",
                    "482": "Saving of inherent powers of High Court - Nothing in this Code shall be deemed to limit or affect the inherent powers of the High Court to make such orders as may be necessary."
                }
            }
        }

    def _load_landmark_cases(self) -> Dict:
        """Load important Indian legal cases"""
        return {
            "kesavananda_bharati": {
                "name": "Kesavananda Bharati v. State of Kerala (1973)",
                "principle": "Basic Structure Doctrine - Parliament cannot alter the basic structure of the Constitution",
                "significance": "Landmark case establishing limits on constitutional amendments"
            },
            "maneka_gandhi": {
                "name": "Maneka Gandhi v. Union of India (1978)",
                "principle": "Article 21 includes right to travel abroad and expanded interpretation of personal liberty",
                "significance": "Broadened scope of fundamental rights"
            },
            "vishaka": {
                "name": "Vishaka v. State of Rajasthan (1997)",
                "principle": "Guidelines for prevention of sexual harassment at workplace",
                "significance": "Created legal framework for workplace harassment before specific legislation"
            },
            "minerva_mills": {
                "name": "Minerva Mills v. Union of India (1980)",
                "principle": "Balance between fundamental rights and directive principles",
                "significance": "Reinforced basic structure doctrine"
            }
        }

    def _load_legal_terms(self) -> Dict:
        """Load legal terminology dictionary"""
        return {
            "mens_rea": "Mental element or guilty mind required for criminal liability",
            "actus_reus": "Physical element or guilty act in criminal law",
            "ratio_decidendi": "The legal principle or rule that forms the basis of a judicial decision",
            "obiter_dicta": "Remarks or observations made by a judge that are not essential to the decision",
            "stare_decisis": "Legal principle of determining points in litigation according to precedent",
            "habeas_corpus": "Writ requiring person under arrest to be brought before a judge",
            "certiorari": "Writ issued by superior court to review decisions of lower court",
            "mandamus": "Writ compelling performance of public duty",
            "quo_warranto": "Writ questioning person's right to hold public office",
            "ultra_vires": "Beyond the legal power or authority",
            "prima_facie": "Based on first impression; accepted as correct until proved otherwise",
            "bona_fide": "In good faith; genuine",
            "caveat": "Legal notice to court or public officer to suspend proceeding",
            "injunction": "Court order requiring party to do or refrain from specific acts"
        }

    def _load_court_procedures(self) -> Dict:
        """Load court procedures and hierarchy"""
        return {
            "hierarchy": {
                "supreme_court": "Highest court of appeal in India",
                "high_courts": "Principal civil courts of original jurisdiction in states",
                "district_courts": "Courts at district level handling civil and criminal cases",
                "magistrate_courts": "Lower courts handling minor criminal cases"
            },
            "procedures": {
                "filing_petition": "Process of submitting formal request to court",
                "service_of_notice": "Formal delivery of legal documents to parties",
                "evidence_recording": "Process of documenting testimony and exhibits",
                "final_arguments": "Concluding submissions by advocates",
                "judgment_delivery": "Court's final decision on the matter"
            },
            "timelines": {
                "criminal_trial": "Generally 6 months to 2 years depending on complexity",
                "civil_suit": "Usually 1-3 years for resolution",
                "constitutional_matters": "Can extend several years due to complexity",
                "bail_applications": "Decided within few days to weeks"
            }
        }

# Initialize the knowledge base
legal_kb = LegalKnowledgeBase()
print("🎯 Legal Knowledge Base loaded with:")
print(f"   • {len(legal_kb.statutes)} statute categories")
print(f"   • {len(legal_kb.landmark_cases)} landmark cases")
print(f"   • {len(legal_kb.legal_terms)} legal terms")
print(f"   • Court procedures and hierarchy")

✅ Legal Knowledge Base initialized
🎯 Legal Knowledge Base loaded with:
   • 3 statute categories
   • 4 landmark cases
   • 14 legal terms
   • Court procedures and hierarchy


In [None]:

class QuestionClassifier:
    """Classifies legal questions into different types"""

    def __init__(self):
        self.patterns = {
            QuestionType.DOCUMENT_SPECIFIC: [
                r'what.*held.*case', r'court.*decided', r'judgment.*this',
                r'ratio.*case', r'facts.*case', r'what.*appellant',
                r'what.*respondent', r'this.*case', r'above.*case'
            ],
            QuestionType.LEGAL_KNOWLEDGE: [
                r'section.*\d+', r'article.*\d+', r'what.*ipc',
                r'constitution.*says', r'law.*says', r'according.*law',
                r'legal.*provision', r'statute.*provides'
            ],
            QuestionType.PROCEDURAL: [
                r'how.*file', r'procedure.*for', r'steps.*to',
                r'court.*process', r'how.*appeal', r'timeline.*for',
                r'jurisdiction.*of', r'which.*court'
            ],
            QuestionType.COMPARATIVE: [
                r'similar.*to', r'like.*case', r'precedent.*for',
                r'compare.*with', r'difference.*between', r'relate.*to'
            ],
            QuestionType.DEFINITION: [
                r'what.*is', r'define', r'meaning.*of',
                r'explain', r'concept.*of', r'term.*means'
            ]
        }
        print("✅ Question Classifier initialized")

    def classify(self, question: str) -> str:
        """Classify question into appropriate type"""
        question_lower = question.lower()

        # Score each question type
        scores = {}
        for q_type, patterns in self.patterns.items():
            score = 0
            for pattern in patterns:
                if re.search(pattern, question_lower):
                    score += 1
            scores[q_type] = score

        # Return type with highest score, default to LEGAL_KNOWLEDGE
        if max(scores.values()) > 0:
            return max(scores, key=scores.get)
        else:
            return QuestionType.LEGAL_KNOWLEDGE

class LegalEntityRecognizer:
    """Recognizes legal entities in questions"""

    def __init__(self):
        self.entity_patterns = {
            'SECTION': r'section\s+(\d+[a-z]*)',
            'ARTICLE': r'article\s+(\d+[a-z]*)',
            'CASE_NAME': r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+v[s]?\.\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)',
            'ACT': r'(indian\s+penal\s+code|constitution|cr\.?p\.?c|evidence\s+act)',
            'COURT': r'(supreme\s+court|high\s+court|district\s+court|magistrate)',
            'LEGAL_TERM': r'\b(mens\s+rea|actus\s+reus|ratio|obiter|stare\s+decisis|habeas\s+corpus)\b'
        }
        print("✅ Legal Entity Recognizer initialized")

    def extract_entities(self, text: str) -> Dict[str, List[str]]:
        """Extract legal entities from text"""
        entities = defaultdict(list)
        text_lower = text.lower()

        for entity_type, pattern in self.entity_patterns.items():
            matches = re.findall(pattern, text_lower, re.IGNORECASE)
            if matches:
                if entity_type == 'CASE_NAME':
                    # Handle case names specially (they return tuples)
                    entities[entity_type].extend([f"{m[0]} v. {m[1]}" for m in matches])
                else:
                    entities[entity_type].extend(matches if isinstance(matches[0], str) else [m[0] for m in matches])

        return dict(entities)

# Initialize classification components
question_classifier = QuestionClassifier()
entity_recognizer = LegalEntityRecognizer()


✅ Question Classifier initialized
✅ Legal Entity Recognizer initialized


In [None]:

class DocumentProcessor:
    """Handles document upload and processing"""

    def __init__(self):
        self.current_document = None
        self.document_embeddings = None
        print("✅ Document Processor initialized")

    def upload_document(self):
        """Handle document upload"""
        print("📤 UPLOAD YOUR LEGAL DOCUMENT (PDF)")
        print("=" * 50)

        uploaded = files.upload()
        if not uploaded:
            print("❌ No file uploaded")
            return None

        filename = list(uploaded.keys())[0]
        file_content = uploaded[filename]

        print(f"📄 Processing: {filename}")
        text = self.extract_pdf_text(file_content)

        if text:
            self.current_document = {
                'filename': filename,
                'content': text,
                'length': len(text)
            }
            self._create_document_embeddings(text)
            print(f"✅ Document processed: {len(text)} characters")
            return self.current_document
        else:
            print("❌ Failed to extract text")
            return None

    def extract_pdf_text(self, file_content):
        """Extract text from PDF"""
        try:
            pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_content))
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text() + "\n"

            # Clean text
            text = re.sub(r'\s+', ' ', text)
            text = re.sub(r'Page \d+', '', text)
            return text.strip()
        except Exception as e:
            print(f"❌ PDF extraction failed: {e}")
            return None

    def _create_document_embeddings(self, text):
        """Create embeddings for document sections"""
        try:
            # Split into sentences for better granularity
            sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 20]
            if sentences and sentence_model:
                self.document_embeddings = {
                    'sentences': sentences,
                    'embeddings': sentence_model.encode(sentences)
                }
                print(f"✅ Created embeddings for {len(sentences)} sentences")
        except Exception as e:
            print(f"⚠️ Embedding creation failed: {e}")

    def find_relevant_context(self, question: str, top_k: int = 3) -> List[str]:
        """Find most relevant document sections for question"""
        if not self.document_embeddings:
            return []

        try:
            question_embedding = sentence_model.encode([question])
            similarities = cosine_similarity(question_embedding, self.document_embeddings['embeddings'])[0]

            # Get top-k most similar sentences
            top_indices = np.argsort(similarities)[-top_k:][::-1]
            relevant_sentences = [self.document_embeddings['sentences'][i] for i in top_indices]

            return relevant_sentences
        except Exception as e:
            print(f"⚠️ Context finding failed: {e}")
            return []

# Initialize document processor
doc_processor = DocumentProcessor()



✅ Document Processor initialized


In [None]:

class AnswerGenerator:
    """Generates answers using multiple knowledge sources"""

    def __init__(self, knowledge_base, doc_processor):
        self.kb = knowledge_base
        self.doc_processor = doc_processor
        print("✅ Answer Generator initialized")

    def generate_answer(self, question: str, question_type: str, entities: Dict) -> AnswerResult:
        """Generate comprehensive answer based on question type"""

        if question_type == QuestionType.DOCUMENT_SPECIFIC:
            return self._answer_document_question(question, entities)
        elif question_type == QuestionType.LEGAL_KNOWLEDGE:
            return self._answer_legal_knowledge(question, entities)
        elif question_type == QuestionType.PROCEDURAL:
            return self._answer_procedural_question(question, entities)
        elif question_type == QuestionType.COMPARATIVE:
            return self._answer_comparative_question(question, entities)
        elif question_type == QuestionType.DEFINITION:
            return self._answer_definition_question(question, entities)
        else:
            return self._fallback_answer(question)

    def _answer_document_question(self, question: str, entities: Dict) -> AnswerResult:
        """Answer questions about uploaded document"""
        if not self.doc_processor.current_document:
            return AnswerResult(
                answer="No document has been uploaded. Please upload a legal document first.",
                confidence=0.0,
                source="system",
                citations=[],
                question_type=QuestionType.DOCUMENT_SPECIFIC,
                follow_ups=["Would you like to upload a document?"]
            )

        # Find relevant context from document
        context_sentences = self.doc_processor.find_relevant_context(question)
        context = " ".join(context_sentences)

        # Use Q&A pipeline if available
        if qa_pipeline and context:
            try:
                result = qa_pipeline(question=question, context=context)
                confidence = min(result['score'] * 1.2, 1.0)  # Boost confidence slightly

                return AnswerResult(
                    answer=result['answer'],
                    confidence=confidence,
                    source="document",
                    citations=[f"Document: {self.doc_processor.current_document['filename']}"],
                    question_type=QuestionType.DOCUMENT_SPECIFIC,
                    follow_ups=[
                        "Would you like me to explain any legal terms?",
                        "Do you want to compare this with similar cases?"
                    ]
                )
            except Exception as e:
                print(f"⚠️ Q&A pipeline failed: {e}")

        # Fallback: return relevant context
        if context_sentences:
            return AnswerResult(
                answer=f"Based on the document: {context_sentences[0]}",
                confidence=0.6,
                source="document_context",
                citations=[f"Document: {self.doc_processor.current_document['filename']}"],
                question_type=QuestionType.DOCUMENT_SPECIFIC,
                follow_ups=["Would you like more details about this?"]
            )

        return AnswerResult(
            answer="I couldn't find relevant information in the uploaded document.",
            confidence=0.3,
            source="document",
            citations=[],
            question_type=QuestionType.DOCUMENT_SPECIFIC,
            follow_ups=["Could you rephrase your question?"]
        )

    def _answer_legal_knowledge(self, question: str, entities: Dict) -> AnswerResult:
        """Answer questions using legal knowledge base"""
        # Check for section references
        if 'SECTION' in entities:
            section = entities['SECTION'][0]
            return self._lookup_section(section)

        # Check for article references
        if 'ARTICLE' in entities:
            article = entities['ARTICLE'][0]
            return self._lookup_article(article)

        # Check for legal terms
        if 'LEGAL_TERM' in entities:
            term = entities['LEGAL_TERM'][0].replace(' ', '_')
            if term in self.kb.legal_terms:
                return AnswerResult(
                    answer=f"{term.replace('_', ' ').title()}: {self.kb.legal_terms[term]}",
                    confidence=0.9,
                    source="legal_knowledge",
                    citations=["Legal terminology database"],
                    question_type=QuestionType.DEFINITION,
                    follow_ups=[
                        "Would you like examples of this concept?",
                        "Do you want related legal principles?"
                    ]
                )

        # Fallback: search in all knowledge
        return self._search_knowledge_base(question)

    def _lookup_section(self, section: str) -> AnswerResult:
        """Look up IPC section"""
        section = section.replace('a', '').replace('b', '').replace('c', '')  # Handle subsections

        if section in self.kb.statutes["ipc"]["sections"]:
            section_text = self.kb.statutes["ipc"]["sections"][section]
            return AnswerResult(
                answer=f"Section {section} IPC: {section_text}",
                confidence=0.95,
                source="ipc",
                citations=[f"Indian Penal Code, 1860, Section {section}"],
                question_type=QuestionType.LEGAL_KNOWLEDGE,
                follow_ups=[
                    f"Would you like case law on Section {section}?",
                    "Do you want related sections?"
                ]
            )

        return AnswerResult(
            answer=f"Section {section} not found in knowledge base.",
            confidence=0.2,
            source="legal_knowledge",
            citations=[],
            question_type=QuestionType.LEGAL_KNOWLEDGE,
            follow_ups=["Could you check the section number?"]
        )

    def _lookup_article(self, article: str) -> AnswerResult:
        """Look up Constitutional article"""
        if article in self.kb.statutes["constitution"]["articles"]:
            article_text = self.kb.statutes["constitution"]["articles"][article]
            return AnswerResult(
                answer=f"Article {article}: {article_text}",
                confidence=0.95,
                source="constitution",
                citations=[f"Constitution of India, 1950, Article {article}"],
                question_type=QuestionType.LEGAL_KNOWLEDGE,
                follow_ups=[
                    f"Would you like landmark cases on Article {article}?",
                    "Do you want related fundamental rights?"
                ]
            )

        return AnswerResult(
            answer=f"Article {article} not found in knowledge base.",
            confidence=0.2,
            source="legal_knowledge",
            citations=[],
            question_type=QuestionType.LEGAL_KNOWLEDGE,
            follow_ups=["Could you check the article number?"]
        )

    def _answer_procedural_question(self, question: str, entities: Dict) -> AnswerResult:
        """Answer procedural questions"""
        question_lower = question.lower()

        if any(word in question_lower for word in ['file', 'filing']):
            return AnswerResult(
                answer="Filing a petition involves: 1) Draft petition with proper format, 2) Pay court fees, 3) Attach required documents, 4) Submit to appropriate court registry, 5) Serve notice to opposite parties.",
                confidence=0.8,
                source="procedures",
                citations=["Court procedures handbook"],
                question_type=QuestionType.PROCEDURAL,
                follow_ups=[
                    "Would you like specific forms required?",
                    "Do you need court fee details?"
                ]
            )

        if any(word in question_lower for word in ['appeal', 'appeals']):
            return AnswerResult(
                answer="Appeal process: 1) File appeal within limitation period, 2) Pay court fees and tribunal fees, 3) Prepare appeal memorandum, 4) File paper book, 5) Argue before appellate court.",
                confidence=0.8,
                source="procedures",
                citations=["Appellate procedure rules"],
                question_type=QuestionType.PROCEDURAL,
                follow_ups=[
                    "What type of appeal are you considering?",
                    "Do you need limitation period details?"
                ]
            )

        return AnswerResult(
            answer="I can help with court procedures. Could you be more specific about what procedure you need information about?",
            confidence=0.5,
            source="procedures",
            citations=[],
            question_type=QuestionType.PROCEDURAL,
            follow_ups=[
                "Are you asking about filing procedures?",
                "Do you need appeal process information?"
            ]
        )

    def _answer_comparative_question(self, question: str, entities: Dict) -> AnswerResult:
        """Answer comparative questions"""
        return AnswerResult(
            answer="For comparative analysis, I would need more context about what cases or legal principles you want to compare.",
            confidence=0.4,
            source="comparative",
            citations=[],
            question_type=QuestionType.COMPARATIVE,
            follow_ups=[
                "Which specific cases do you want to compare?",
                "Are you comparing legal principles or factual situations?"
            ]
        )

    def _answer_definition_question(self, question: str, entities: Dict) -> AnswerResult:
        """Answer definition questions"""
        question_lower = question.lower()

        # Search for terms in our legal dictionary
        for term, definition in self.kb.legal_terms.items():
            if term.replace('_', ' ') in question_lower:
                return AnswerResult(
                    answer=f"{term.replace('_', ' ').title()}: {definition}",
                    confidence=0.9,
                    source="legal_definitions",
                    citations=["Legal terminology database"],
                    question_type=QuestionType.DEFINITION,
                    follow_ups=[
                        "Would you like examples of this concept?",
                        "Do you want related legal terms?"
                    ]
                )

        return AnswerResult(
            answer="I don't have a definition for that term in my knowledge base. Could you provide more context?",
            confidence=0.3,
            source="legal_definitions",
            citations=[],
            question_type=QuestionType.DEFINITION,
            follow_ups=["Could you rephrase or provide more context?"]
        )

    def _search_knowledge_base(self, question: str) -> AnswerResult:
        """General search across knowledge base"""
        return AnswerResult(
            answer="I can help with Indian legal questions. Try asking about specific sections, articles, legal terms, or court procedures.",
            confidence=0.4,
            source="general",
            citations=[],
            question_type=QuestionType.LEGAL_KNOWLEDGE,
            follow_ups=[
                "Ask about specific IPC sections",
                "Ask about Constitutional articles",
                "Ask about legal terms or procedures"
            ]
        )

    def _fallback_answer(self, question: str) -> AnswerResult:
        """Fallback for unclassified questions"""
        return AnswerResult(
            answer="I'm a legal research assistant. I can help with document analysis, legal knowledge, and court procedures related to Indian law.",
            confidence=0.3,
            source="fallback",
            citations=[],
            question_type="unknown",
            follow_ups=[
                "Upload a legal document for analysis",
                "Ask about Indian legal provisions",
                "Ask about court procedures"
            ]
        )

# Initialize answer generator
answer_generator = AnswerGenerator(legal_kb, doc_processor)


✅ Answer Generator initialized


In [None]:

class LegalQASystem:
    """Main interface for legal Q&A system"""

    def __init__(self):
        self.classifier = question_classifier
        self.entity_recognizer = entity_recognizer
        self.answer_generator = answer_generator
        self.doc_processor = doc_processor
        self.conversation_history = []
        print("✅ Legal Q&A System initialized")

    def process_question(self, question: str) -> AnswerResult:
        """Process a single question and return answer"""

        # Step 1: Classify question
        question_type = self.classifier.classify(question)

        # Step 2: Extract entities
        entities = self.entity_recognizer.extract_entities(question)

        # Step 3: Generate answer
        result = self.answer_generator.generate_answer(question, question_type, entities)

        # Step 4: Store in conversation history
        self.conversation_history.append({
            'question': question,
            'answer': result,
            'entities': entities,
            'type': question_type
        })

        return result

    def start_interactive_session(self):
        """Start interactive Q&A session"""
        print("\n" + "="*60)
        print("🏛️  LEGAL Q&A ASSISTANT")
        print("="*60)
        print("I can help you with:")
        print("• Document analysis and questions")
        print("• Indian legal provisions (IPC, Constitution, CrPC)")
        print("• Legal terminology and definitions")
        print("• Court procedures and processes")
        print("\nCommands:")
        print("• 'upload' - Upload a legal document")
        print("• 'history' - View conversation history")
        print("• 'help' - Show help information")
        print("• 'quit' or 'exit' - End session")
        print("-" * 60)

        while True:
            try:
                question = input("\n🤔 Your question: ").strip()

                if question.lower() in ['quit', 'exit', 'bye']:
                    print("\n👋 Thank you for using Legal Q&A Assistant!")
                    break

                if question.lower() == 'upload':
                    self.doc_processor.upload_document()
                    continue

                if question.lower() == 'history':
                    self._show_history()
                    continue

                if question.lower() == 'help':
                    self._show_help()
                    continue

                if not question:
                    print("⚠️ Please enter a question")
                    continue

                print("\n🤖 Processing your question...")
                result = self.process_question(question)

                self._display_answer(result)

            except KeyboardInterrupt:
                print("\n\n👋 Session ended by user")
                break
            except Exception as e:
                print(f"\n❌ Error: {e}")
                print("Please try again or rephrase your question")

    def _display_answer(self, result: AnswerResult):
        """Display formatted answer"""
        print("\n" + "="*50)
        print("💡 ANSWER:")
        print("-" * 15)
        print(result.answer)

        if result.citations:
            print(f"\n📚 Sources: {', '.join(result.citations)}")

        print(f"\n🎯 Confidence: {result.confidence:.1%}")
        print(f"📋 Question Type: {result.question_type}")

        if result.follow_ups:
            print("\n💭 Follow-up suggestions:")
            for i, follow_up in enumerate(result.follow_ups, 1):
                print(f"   {i}. {follow_up}")

    def _show_history(self):
        """Display conversation history"""
        if not self.conversation_history:
            print("📝 No conversation history yet")
            return

        print("\n📚 CONVERSATION HISTORY:")
        print("-" * 30)
        for i, entry in enumerate(self.conversation_history[-5:], 1):  # Show last 5
            print(f"\n{i}. Q: {entry['question']}")
            print(f"   A: {entry['answer'].answer[:100]}...")
            print(f"   Type: {entry['type']}")

    def _show_help(self):
        """Display help information"""
        print("\n💡 HELP - Question Examples:")
        print("-" * 35)
        print("📄 Document Questions:")
        print("   • What was held in this case?")
        print("   • What are the facts of this case?")
        print("   • What was the court's decision?")

        print("\n⚖️  Legal Knowledge:")
        print("   • What is Section 302 IPC?")
        print("   • Explain Article 21")
        print("   • What does mens rea mean?")

        print("\n🏛️  Procedures:")
        print("   • How to file an appeal?")
        print("   • What is the procedure for filing a petition?")
        print("   • Which court has jurisdiction?")

# Initialize the main Q&A system
legal_qa = LegalQASystem()

print("\n🎉 Legal Q&A System ready!")
print("📝 To start interactive session, run: legal_qa.start_interactive_session()")
print("📄 To upload document first, run: doc_processor.upload_document()")

# Test the system with sample questions
print("\n🧪 Testing with sample questions...")

test_questions = [
    "What is Section 302 IPC?",
    "Define mens rea",
    "What is Article 21?",
    "How to file an appeal?"
]

for q in test_questions:
    print(f"\n❓ Test: {q}")
    result = legal_qa.process_question(q)
    print(f"💡 Answer: {result.answer[:100]}...")
    print(f"🎯 Confidence: {result.confidence:.1%}")



✅ Legal Q&A System initialized

🎉 Legal Q&A System ready!
📝 To start interactive session, run: legal_qa.start_interactive_session()
📄 To upload document first, run: doc_processor.upload_document()

🧪 Testing with sample questions...

❓ Test: What is Section 302 IPC?
💡 Answer: Section 302 IPC: Punishment for murder - Whoever commits murder shall be punished with death, or imp...
🎯 Confidence: 95.0%

❓ Test: Define mens rea
💡 Answer: Mens Rea: Mental element or guilty mind required for criminal liability...
🎯 Confidence: 90.0%

❓ Test: What is Article 21?
💡 Answer: Article 21: Protection of life and personal liberty - No person shall be deprived of his life or per...
🎯 Confidence: 95.0%

❓ Test: How to file an appeal?
💡 Answer: Filing a petition involves: 1) Draft petition with proper format, 2) Pay court fees, 3) Attach requi...
🎯 Confidence: 80.0%

✅ All blocks 5-8 completed successfully!
