In [None]:
"""
SEER-NLP
Semantic Explainable Extraction & Reasoning
for Pension Applications in Disaster Response

Author: (Your Name)
Purpose: Research-grade NLP framework
"""

import re
import math
import pickle
from typing import List, Dict, Tuple
from collections import defaultdict

# ============================================================
# 1. TEXT NORMALIZATION ENGINE
# ============================================================

class TextNormalizer:
    """
    Responsible for linguistic normalization.
    This is intentionally separated for extensibility.
    """

    def __init__(self):
        self.replacements = {
            "yrs": "years",
            "yr": "year",
            "govt": "government"
        }

    def normalize(self, text: str) -> str:
        text = text.lower()
        for k, v in self.replacements.items():
            text = text.replace(k, v)
        text = re.sub(r"\s+", " ", text)
        return text.strip()

# ============================================================
# 2. SENTENCE & CLAUSE INTELLIGENCE
# ============================================================

class LinguisticSegmenter:
    """
    Performs sentence and clause-level segmentation.
    Clause segmentation adds novelty and depth.
    """

    SENTENCE_SPLIT_REGEX = r"[.!?]"
    CLAUSE_SPLIT_REGEX = r",|;| and | but | because | since "

    def split_sentences(self, text: str) -> List[str]:
        return [s.strip() for s in re.split(self.SENTENCE_SPLIT_REGEX, text) if s.strip()]

    def split_clauses(self, sentence: str) -> List[str]:
        return [c.strip() for c in re.split(self.CLAUSE_SPLIT_REGEX, sentence) if c.strip()]

# ============================================================
# 3. SEMANTIC SIGNAL PRIMITIVE (CORE CONCEPT)
# ============================================================

class SemanticSignal:
    """
    Atomic unit of meaning extracted from text.
    Signals are later connected into a graph.
    """

    def __init__(
        self,
        signal_type: str,
        surface_form: str,
        sentence: str,
        clause: str
    ):
        self.signal_type = signal_type
        self.surface_form = surface_form
        self.sentence = sentence
        self.clause = clause
        self.base_weight = self.assign_weight()

    def assign_weight(self) -> float:
        """
        Base semantic importance by signal type.
        """
        weights = {
            "AGE": 0.9,
            "LOCATION": 0.7,
            "DISASTER": 0.9,
            "WIDOW": 1.0,
            "DISABILITY": 1.0,
            "OLD_AGE": 0.8,
            "DISTRESS": 0.6
        }
        return weights.get(self.signal_type, 0.5)

# ============================================================
# 4. CONTEXT & NEGATION ANALYZER
# ============================================================

class ContextAnalyzer:
    """
    Handles negation and contextual weakening.
    """

    NEGATION_TERMS = [
        "not", "never", "no longer", "without", "none"
    ]

    def is_negated(self, text: str, phrase: str) -> bool:
        idx = text.find(phrase)
        if idx == -1:
            return False
        window = text[max(0, idx - 35):idx]
        return any(n in window for n in self.NEGATION_TERMS)

# ============================================================
# 5. SEMANTIC EVIDENCE GRAPH (KEY NOVELTY)
# ============================================================

class SemanticEvidenceGraph:
    """
    Graph-based semantic reasoning structure.

    Nodes   → Semantic Signals
    Edges   → Contextual co-occurrence
    """

    def __init__(self):
        self.nodes: List[SemanticSignal] = []
        self.edges: Dict[int, List[int]] = defaultdict(list)

    def add_signal(self, signal: SemanticSignal) -> int:
        idx = len(self.nodes)
        self.nodes.append(signal)
        return idx

    def connect(self, i: int, j: int):
        if j not in self.edges[i]:
            self.edges[i].append(j)
        if i not in self.edges[j]:
            self.edges[j].append(i)

    def auto_connect(self):
        """
        Connect signals appearing in same sentence or clause.
        """
        for i in range(len(self.nodes)):
            for j in range(i + 1, len(self.nodes)):
                if (
                    self.nodes[i].sentence == self.nodes[j].sentence
                    or self.nodes[i].clause == self.nodes[j].clause
                ):
                    self.connect(i, j)

    def score_signal_type(self, signal_type: str) -> float:
        score = 0.0
        for node in self.nodes:
            if node.signal_type == signal_type:
                score += node.base_weight
        return score

    def collect_evidence(self, signal_type: str) -> List[str]:
        return [
            n.surface_form for n in self.nodes
            if n.signal_type == signal_type
        ]
# ============================================================
# PART 2 — ADAPTIVE SEMANTIC LEXICON SYSTEM (RESEARCH CORE)
# ============================================================

import re
import itertools
from collections import defaultdict, Counter
from typing import List

# ============================================================
# 6. SEMANTIC ROLE DEFINITIONS (EXPLICIT ONTOLOGY)
# ============================================================

SEMANTIC_ROLES = [
    "AGE",
    "LOCATION",
    "DISASTER",
    "WIDOW",
    "DISABILITY",
    "OLD_AGE",
    "DISTRESS"
]

# ============================================================
# 7. SEED SEMANTIC LEXICON (HIGH PRECISION, LOW RECALL)
# ============================================================

class SeedSemanticLexicon:
    """
    Seed lexicon = semantic anchors.
    These phrases are trusted and NEVER auto-removed.
    """

    AGE_PATTERNS = [
        r"\b(\d{2})\s+year[s]?\s+old\b",
        r"\bat\s+the\s+age\s+of\s+(\d{2})\b",
        r"\baged\s+(\d{2})\b",
        r"\bi\s+am\s+(\d{2})\s+years\b",
        r"\bmy\s+age\s+is\s+(\d{2})\b"
    ]

    LOCATION_PATTERN = r"(district|city|village)\s+([a-z ]+)"

    DISASTER_SEEDS = {
        "Flood": ["flood", "floods", "flooding"],
        "Cyclone": ["cyclone", "storm", "cyclonic"],
        "Fire": ["fire", "burnt", "burned"],
        "Earthquake": ["earthquake", "tremor", "seismic"]
    }

    WIDOW_SEEDS = [
        "widow", "widowed",
        "husband passed away",
        "lost my husband",
        "after my husband died"
    ]

    DISABILITY_SEEDS = [
        "disabled",
        "unable to work",
        "physically impaired",
        "medically unfit",
        "chronic illness"
    ]

    OLD_AGE_SEEDS = [
        "too old to work",
        "because of my age",
        "elderly",
        "advanced age",
        "no strength to work"
    ]

    DISTRESS_SEEDS = [
        "no income",
        "lost everything",
        "nowhere to go",
        "urgent help",
        "dependent on support"
    ]

# ============================================================
# 8. LEXICON MEMORY (PERSISTENT & SERIALIZABLE)
# ============================================================

class LexiconMemory:
    """
    Stores learned lexicon expansions.
    This object is saved inside the pickle model.
    """

    def __init__(self):
        self.role_phrases = {
            "WIDOW": set(),
            "DISABILITY": set(),
            "OLD_AGE": set(),
            "DISTRESS": set()
        }

    def add(self, role: str, phrase: str):
        self.role_phrases[role].add(phrase)

    def get(self, role: str):
        return self.role_phrases.get(role, set())

# ============================================================
# 9. SEMANTIC STABILITY–AWARE LEXICON EXPANSION ENGINE (NOVEL)
# ============================================================

class StableLexiconExpansionEngine:
    """
    Discovers new semantic phrases using:
    - frequency
    - role purity
    - contextual stability
    """

    def __init__(self, min_freq=3, min_role_purity=0.7):
        self.min_freq = min_freq
        self.min_role_purity = min_role_purity

    # --------------------------------------------------------
    # N-GRAM CANDIDATE MINING
    # --------------------------------------------------------

    def mine_candidates(self, clauses: List[str], seed_phrases: List[str]):
        candidates = defaultdict(list)

        for clause in clauses:
            if any(seed in clause for seed in seed_phrases):
                tokens = clause.split()
                for n in [2, 3, 4]:
                    for gram in zip(*[tokens[i:] for i in range(n)]):
                        phrase = " ".join(gram)
                        if phrase not in seed_phrases:
                            candidates[phrase].append(clause)

        return candidates

    # --------------------------------------------------------
    # ROLE PURITY EVALUATION
    # --------------------------------------------------------

    def role_purity(self, contexts, role_seeds):
        hits = sum(
            1 for c in contexts
            if any(seed in c for seed in role_seeds)
        )
        return hits / len(contexts)

    # --------------------------------------------------------
    # PROMOTION DECISION
    # --------------------------------------------------------

    def promote(self, candidates, role, role_seeds, memory: LexiconMemory):
        for phrase, contexts in candidates.items():
            if len(contexts) < self.min_freq:
                continue

            purity = self.role_purity(contexts, role_seeds)
            if purity >= self.min_role_purity:
                memory.add(role, phrase)

    # --------------------------------------------------------
    # FULL UPDATE PIPELINE
    # --------------------------------------------------------

    def update_from_text(self, text, segmenter, memory: LexiconMemory):
        sentences = segmenter.split_sentences(text)
        clauses = []
        for s in sentences:
            clauses.extend(segmenter.split_clauses(s))

        self.promote(
            self.mine_candidates(clauses, SeedSemanticLexicon.WIDOW_SEEDS),
            "WIDOW", SeedSemanticLexicon.WIDOW_SEEDS, memory
        )

        self.promote(
            self.mine_candidates(clauses, SeedSemanticLexicon.DISABILITY_SEEDS),
            "DISABILITY", SeedSemanticLexicon.DISABILITY_SEEDS, memory
        )

        self.promote(
            self.mine_candidates(clauses, SeedSemanticLexicon.OLD_AGE_SEEDS),
            "OLD_AGE", SeedSemanticLexicon.OLD_AGE_SEEDS, memory
        )

        self.promote(
            self.mine_candidates(clauses, SeedSemanticLexicon.DISTRESS_SEEDS),
            "DISTRESS", SeedSemanticLexicon.DISTRESS_SEEDS, memory
        )

# ============================================================
# 10. ADAPTIVE SEMANTIC SIGNAL EXTRACTOR (CORE NLP ENGINE)
# ============================================================

class AdaptiveSemanticSignalExtractor:
    """
    Extracts semantic signals using:
    - seed lexicons (precision)
    - expanded lexicons (recall)
    - negation-aware context analysis
    """

    def __init__(self):
        self.seed = SeedSemanticLexicon()
        self.memory = LexiconMemory()
        self.expander = StableLexiconExpansionEngine()
        self.segmenter = LinguisticSegmenter()
        self.context = ContextAnalyzer()

    # --------------------------------------------------------
    # ONLINE ADAPTATION HOOK (OPTIONAL)
    # --------------------------------------------------------

    def adapt(self, text: str):
        self.expander.update_from_text(
            text, self.segmenter, self.memory
        )

    # --------------------------------------------------------
    # SEMANTIC SIGNAL EXTRACTION
    # --------------------------------------------------------

    def extract_signals(self, text: str) -> SemanticEvidenceGraph:
        graph = SemanticEvidenceGraph()

        sentences = self.segmenter.split_sentences(text)

        for sentence in sentences:
            clauses = self.segmenter.split_clauses(sentence)

            for clause in clauses:
                self._age(clause, sentence, graph)
                self._location(clause, sentence, graph)
                self._disaster(clause, sentence, graph)
                self._vulnerability(clause, sentence, graph)
                self._distress(clause, sentence, graph)

        graph.auto_connect()
        return graph

    # ---------------- SIGNAL HELPERS ----------------

    def _age(self, clause, sentence, graph):
        for p in self.seed.AGE_PATTERNS:
            m = re.search(p, clause)
            if m:
                graph.add_signal(
                    SemanticSignal("AGE", m.group(0), sentence, clause)
                )

    def _location(self, clause, sentence, graph):
        m = re.search(self.seed.LOCATION_PATTERN, clause)
        if m:
            graph.add_signal(
                SemanticSignal("LOCATION", m.group(0), sentence, clause)
            )

    def _disaster(self, clause, sentence, graph):
        for _, seeds in self.seed.DISASTER_SEEDS.items():
            for s in seeds:
                if s in clause:
                    graph.add_signal(
                        SemanticSignal("DISASTER", s, sentence, clause)
                    )

    def _vulnerability(self, clause, sentence, graph):
        for role, seeds in {
            "WIDOW": self.seed.WIDOW_SEEDS,
            "DISABILITY": self.seed.DISABILITY_SEEDS,
            "OLD_AGE": self.seed.OLD_AGE_SEEDS
        }.items():
            for s in seeds:
                if s in clause and not self.context.is_negated(clause, s):
                    graph.add_signal(
                        SemanticSignal(role, s, sentence, clause)
                    )

        # Expanded lexicon usage
        for role in ["WIDOW", "DISABILITY", "OLD_AGE"]:
            for phrase in self.memory.get(role):
                if phrase in clause:
                    graph.add_signal(
                        SemanticSignal(role, phrase, sentence, clause)
                    )

    def _distress(self, clause, sentence, graph):
        for s in self.seed.DISTRESS_SEEDS:
            if s in clause:
                graph.add_signal(
                    SemanticSignal("DISTRESS", s, sentence, clause)
                )

        for s in self.memory.get("DISTRESS"):
            if s in clause:
                graph.add_signal(
                    SemanticSignal("DISTRESS", s, sentence, clause)
                )
# ============================================================
# PART 3 — SEMANTIC REASONING ENGINE (CORE NOVELTY)
# ============================================================

import math
from collections import defaultdict

# ============================================================
# 11. GRAPH ENERGY & SIGNAL CENTRALITY ANALYZER
# ============================================================

class GraphEnergyAnalyzer:
    """
    Computes semantic strength using:
    - signal weights
    - graph connectivity
    - role concentration
    """

    def __init__(self, graph: SemanticEvidenceGraph):
        self.graph = graph

    def signal_energy(self, idx: int) -> float:
        """
        Energy of a signal = base weight × (1 + degree)
        """
        node = self.graph.nodes[idx]
        degree = len(self.graph.edges.get(idx, []))
        return node.base_weight * (1 + math.log1p(degree))

    def role_energy(self, role: str) -> float:
        """
        Total semantic energy of a role
        """
        energy = 0.0
        for i, node in enumerate(self.graph.nodes):
            if node.signal_type == role:
                energy += self.signal_energy(i)
        return energy

    def role_distribution(self) -> dict:
        """
        Normalized role energy distribution
        """
        role_scores = defaultdict(float)
        for role in SEMANTIC_ROLES:
            role_scores[role] = self.role_energy(role)

        total = sum(role_scores.values()) or 1.0
        return {r: v / total for r, v in role_scores.items()}


# ============================================================
# 12. PENSION INTENT RESOLUTION ENGINE (CONFLICT AWARE)
# ============================================================

class PensionIntentResolver:
    """
    Resolves pension intent using:
    - role energy
    - mutual exclusion
    - dominance margins
    """

    def __init__(self, graph: SemanticEvidenceGraph):
        self.energy = GraphEnergyAnalyzer(graph)

    def resolve(self):
        energies = {
            "Widow Pension": self.energy.role_energy("WIDOW"),
            "Disability Pension": self.energy.role_energy("DISABILITY"),
            "Old Age Pension": self.energy.role_energy("OLD_AGE")
        }

        # No signals found
        if all(v == 0 for v in energies.values()):
            return "Unknown", 0.3, "no vulnerability signals detected"

        # Sort by strength
        sorted_roles = sorted(
            energies.items(), key=lambda x: x[1], reverse=True
        )

        top_role, top_score = sorted_roles[0]
        second_score = sorted_roles[1][1]

        # Confidence based on dominance gap
        dominance = top_score - second_score
        confidence = min(0.6 + dominance, 0.95)

        reason = (
            f"resolved by semantic dominance: "
            f"{top_role} ({top_score:.2f}) > others"
        )

        return top_role, confidence, reason


# ============================================================
# 13. DISASTER EVENT RESOLUTION ENGINE
# ============================================================

class DisasterResolver:
    """
    Determines disaster event using graph energy
    """

    def __init__(self, graph: SemanticEvidenceGraph):
        self.graph = graph
        self.energy = GraphEnergyAnalyzer(graph)

    def resolve(self):
        disaster_energy = defaultdict(float)

        for i, node in enumerate(self.graph.nodes):
            if node.signal_type == "DISASTER":
                disaster_energy[node.surface_form] += (
                    self.energy.signal_energy(i)
                )

        if not disaster_energy:
            return "Unknown", 0.0, "no disaster signals"

        # Pick strongest signal cluster
        best_phrase = max(disaster_energy, key=disaster_energy.get)
        confidence = min(0.8 + disaster_energy[best_phrase], 0.95)

        return best_phrase, confidence, "disaster energy dominance"


# ============================================================
# 14. AGE & LOCATION REFINEMENT ENGINE
# ============================================================

class AttributeResolver:
    """
    Resolves factual attributes (age, location)
    with redundancy handling.
    """

    def __init__(self, graph: SemanticEvidenceGraph):
        self.graph = graph

    def resolve_age(self):
        for node in self.graph.nodes:
            if node.signal_type == "AGE":
                age = int(re.search(r"\d{2}", node.surface_form).group())
                return age, 0.95, "explicit age detected"
        return "Unknown", 0.0, "age not found"

    def resolve_location(self):
        for node in self.graph.nodes:
            if node.signal_type == "LOCATION":
                return node.surface_form, 0.85, "location phrase detected"
        return "Unknown", 0.0, "location not found"


# ============================================================
# 15. DISASTER DISTRESS SEVERITY SCORER
# ============================================================

class DistressSeverityAnalyzer:
    """
    Computes disaster severity using
    distress signal density.
    """

    def __init__(self, graph: SemanticEvidenceGraph):
        self.graph = graph

    def severity_score(self):
        distress_count = sum(
            1 for n in self.graph.nodes if n.signal_type == "DISTRESS"
        )
        return min(0.4 + 0.15 * distress_count, 1.0)


# ============================================================
# 16. EXPLAINABILITY TRACE GENERATOR (VERY IMPORTANT)
# ============================================================

class ExplainabilityEngine:
    """
    Generates human-readable reasoning traces.
    """

    def __init__(self, graph: SemanticEvidenceGraph):
        self.graph = graph

    def evidence_by_role(self, role):
        return [
            n.surface_form for n in self.graph.nodes
            if n.signal_type == role
        ]

    def full_trace(self):
        trace = {}
        for role in SEMANTIC_ROLES:
            trace[role] = self.evidence_by_role(role)
        return trace


# ============================================================
# 17. FULL SEMANTIC DECISION ENGINE (INTEGRATION)
# ============================================================

class SemanticDecisionEngine:
    """
    End-to-end reasoning from evidence graph to final output.
    """

    def __init__(self, graph: SemanticEvidenceGraph):
        self.graph = graph

        self.intent_resolver = PensionIntentResolver(graph)
        self.disaster_resolver = DisasterResolver(graph)
        self.attribute_resolver = AttributeResolver(graph)
        self.distress_analyzer = DistressSeverityAnalyzer(graph)
        self.explainer = ExplainabilityEngine(graph)

    def decide(self) -> dict:
        age, age_c, age_r = self.attribute_resolver.resolve_age()
        location, loc_c, loc_r = self.attribute_resolver.resolve_location()

        pension, pen_c, pen_r = self.intent_resolver.resolve()
        disaster, dis_c, dis_r = self.disaster_resolver.resolve()
        distress = self.distress_analyzer.severity_score()

        return {
            "Age": {"value": age, "confidence": age_c, "reason": age_r},
            "Location": {"value": location, "confidence": loc_c, "reason": loc_r},
            "Pension_Type": {
                "value": pension,
                "confidence": pen_c,
                "reason": pen_r
            },
            "Disaster_Event": {
                "value": disaster,
                "confidence": dis_c,
                "reason": dis_r
            },
            "Disaster_Severity": distress,
            "Explainability_Trace": self.explainer.full_trace()
        }
# ============================================================
# PART 4 — SEER-NLP MASTER MODEL & DEPLOYMENT LAYER
# ============================================================

import pickle
from typing import Iterable

# ============================================================
# 18. SEER-NLP MASTER MODEL (FULL PIPELINE)
# ============================================================

class SEERNLPModel:
    """
    SEER-NLP Master Model

    Orchestrates:
    - Text normalization
    - Adaptive semantic signal extraction
    - Evidence graph construction
    - Semantic reasoning & conflict resolution
    - Explainability generation

    This is the object serialized for deployment.
    """

    def __init__(self):
        # Core NLP components
        self.normalizer = TextNormalizer()
        self.signal_extractor = AdaptiveSemanticSignalExtractor()

        # Meta information
        self.version = "SEER-NLP v1.0"
        self.description = (
            "Semantic Explainable Extraction & Reasoning "
            "for Pension Applications in Disaster Response"
        )

    # --------------------------------------------------------
    # SINGLE DOCUMENT INFERENCE
    # --------------------------------------------------------

    def infer(self, raw_text: str, adapt: bool = False) -> dict:
        """
        Perform full semantic extraction & reasoning
        on a single application narrative.

        Parameters
        ----------
        raw_text : str
            Input pension application text
        adapt : bool
            Whether to allow lexicon adaptation

        Returns
        -------
        dict : structured, explainable output
        """

        # Step 1: Normalize
        text = self.normalizer.normalize(raw_text)

        # Step 2: Optional online adaptation (novelty)
        if adapt:
            self.signal_extractor.adapt(text)

        # Step 3: Extract semantic signals & graph
        graph = self.signal_extractor.extract_signals(text)

        # Step 4: Reasoning & decision
        decision_engine = SemanticDecisionEngine(graph)
        result = decision_engine.decide()

        # Step 5: Attach meta info
        result["_meta"] = {
            "model_version": self.version,
            "adaptation_used": adapt
        }

        return result

    # --------------------------------------------------------
    # BATCH INFERENCE (DATASET SCALE)
    # --------------------------------------------------------

    def infer_batch(
        self,
        texts: Iterable[str],
        adapt: bool = False
    ) -> List[dict]:
        """
        Apply SEER-NLP on multiple texts.

        This method is optimized for datasets
        (e.g., your 10k synthetic dataset).
        """

        outputs = []

        for text in texts:
            outputs.append(self.infer(text, adapt=adapt))

        return outputs


# ============================================================
# 19. MODEL SERIALIZATION & LOADING
# ============================================================

class ModelPersistence:
    """
    Handles saving and loading of SEER-NLP models.
    """

    @staticmethod
    def save(model: SEERNLPModel, path: str):
        with open(path, "wb") as f:
            pickle.dump(model, f)
        print(f"✅ SEER-NLP model saved to: {path}")

    @staticmethod
    def load(path: str) -> SEERNLPModel:
        with open(path, "rb") as f:
            model = pickle.load(f)
        print(f"✅ SEER-NLP model loaded from: {path}")
        return model


# ============================================================
# 20. STREAMLIT-READY INTERFACE (API STYLE)
# ============================================================

class SEERNLPService:
    """
    Lightweight wrapper for app-level usage.
    Ideal for Streamlit or REST APIs.
    """

    def __init__(self, model_path: str):
        self.model = ModelPersistence.load(model_path)

    def process_text(self, text: str) -> dict:
        """
        Entry point for Streamlit UI.
        """
        return self.model.infer(text, adapt=False)

    def process_text_with_adaptation(self, text: str) -> dict:
        """
        Advanced mode: adapts lexicon online.
        """
        return self.model.infer(text, adapt=True)


# ============================================================
# 21. ONE-TIME MODEL BUILD & SAVE SCRIPT
# ============================================================

def build_and_save_seer_model(
    output_path: str = "seer_nlp_model.pkl"
):
    """
    Initializes SEER-NLP and serializes it.
    Run this ONCE before deployment.
    """
    model = SEERNLPModel()
    ModelPersistence.save(model, output_path)


# ============================================================
# 22. EXAMPLE USAGE (OPTIONAL TEST)
# ============================================================

if __name__ == "__main__":
    sample_text = """
    I am 72 years old and live in Gaya district.
    After floods damaged my house, I lost my husband
    and now have no income to survive.
    """

    # Build & save model
    build_and_save_seer_model("seer_nlp_model.pkl")

    # Load & test
    service = SEERNLPService("seer_nlp_model.pkl")
    output = service.process_text(sample_text)

    import json
    print(json.dumps(output, indent=4))


In [8]:
# ============================================================
# PART 4 — SEER-NLP MASTER MODEL & DEPLOYMENT LAYER
# ============================================================

import pickle
from typing import Iterable

# ============================================================
# 18. SEER-NLP MASTER MODEL (FULL PIPELINE)
# ============================================================

class SEERNLPModel:
    """
    SEER-NLP Master Model

    Orchestrates:
    - Text normalization
    - Adaptive semantic signal extraction
    - Evidence graph construction
    - Semantic reasoning & conflict resolution
    - Explainability generation

    This is the object serialized for deployment.
    """

    def __init__(self):
        # Core NLP components
        self.normalizer = TextNormalizer()
        self.signal_extractor = AdaptiveSemanticSignalExtractor()

        # Meta information
        self.version = "SEER-NLP v1.0"
        self.description = (
            "Semantic Explainable Extraction & Reasoning "
            "for Pension Applications in Disaster Response"
        )

    # --------------------------------------------------------
    # SINGLE DOCUMENT INFERENCE
    # --------------------------------------------------------

    def infer(self, raw_text: str, adapt: bool = False) -> dict:
        """
        Perform full semantic extraction & reasoning
        on a single application narrative.

        Parameters
        ----------
        raw_text : str
            Input pension application text
        adapt : bool
            Whether to allow lexicon adaptation

        Returns
        -------
        dict : structured, explainable output
        """

        # Step 1: Normalize
        text = self.normalizer.normalize(raw_text)

        # Step 2: Optional online adaptation (novelty)
        if adapt:
            self.signal_extractor.adapt(text)

        # Step 3: Extract semantic signals & graph
        graph = self.signal_extractor.extract_signals(text)

        # Step 4: Reasoning & decision
        decision_engine = SemanticDecisionEngine(graph)
        result = decision_engine.decide()

        # Step 5: Attach meta info
        result["_meta"] = {
            "model_version": self.version,
            "adaptation_used": adapt
        }

        return result

    # --------------------------------------------------------
    # BATCH INFERENCE (DATASET SCALE)
    # --------------------------------------------------------

    def infer_batch(
        self,
        texts: Iterable[str],
        adapt: bool = False
    ) -> List[dict]:
        """
        Apply SEER-NLP on multiple texts.

        This method is optimized for datasets
        (e.g., your 10k synthetic dataset).
        """

        outputs = []

        for text in texts:
            outputs.append(self.infer(text, adapt=adapt))

        return outputs


# ============================================================
# 19. MODEL SERIALIZATION & LOADING
# ============================================================

class ModelPersistence:
    """
    Handles saving and loading of SEER-NLP models.
    """

    @staticmethod
    def save(model: SEERNLPModel, path: str):
        with open(path, "wb") as f:
            pickle.dump(model, f)
        print(f"✅ SEER-NLP model saved to: {path}")

    @staticmethod
    def load(path: str) -> SEERNLPModel:
        with open(path, "rb") as f:
            model = pickle.load(f)
        print(f"✅ SEER-NLP model loaded from: {path}")
        return model


# ============================================================
# 20. STREAMLIT-READY INTERFACE (API STYLE)
# ============================================================

class SEERNLPService:
    """
    Lightweight wrapper for app-level usage.
    Ideal for Streamlit or REST APIs.
    """

    def __init__(self, model_path: str):
        self.model = ModelPersistence.load(model_path)

    def process_text(self, text: str) -> dict:
        """
        Entry point for Streamlit UI.
        """
        return self.model.infer(text, adapt=False)

    def process_text_with_adaptation(self, text: str) -> dict:
        """
        Advanced mode: adapts lexicon online.
        """
        return self.model.infer(text, adapt=True)


# ============================================================
# 21. ONE-TIME MODEL BUILD & SAVE SCRIPT
# ============================================================

def build_and_save_seer_model(
    output_path: str = "seer_nlp_model.pkl"
):
    """
    Initializes SEER-NLP and serializes it.
    Run this ONCE before deployment.
    """
    model = SEERNLPModel()
    ModelPersistence.save(model, output_path)


# ============================================================
# 22. EXAMPLE USAGE (OPTIONAL TEST)
# ============================================================

if __name__ == "__main__":
    sample_text = """
    I am 72 years old and live in Gaya district.
    After floods damaged my house, I lost my husband
    and now have no income to survive.
    """

    # Build & save model
    build_and_save_seer_model("seer_nlp_model.pkl")

    # Load & test
    service = SEERNLPService("seer_nlp_model.pkl")
    output = service.process_text(sample_text)

    import json
    print(json.dumps(output, indent=4))


✅ SEER-NLP model saved to: seer_nlp_model.pkl
✅ SEER-NLP model loaded from: seer_nlp_model.pkl
{
    "Age": {
        "value": 72,
        "confidence": 0.95,
        "reason": "explicit age detected"
    },
    "Location": {
        "value": "Unknown",
        "confidence": 0.0,
        "reason": "location not found"
    },
    "Pension_Type": {
        "value": "Widow Pension",
        "confidence": 0.95,
        "reason": "resolved by semantic dominance: Widow Pension (2.39) > others"
    },
    "Disaster_Event": {
        "value": "flood",
        "confidence": 0.95,
        "reason": "disaster energy dominance"
    },
    "Disaster_Severity": 0.55,
    "Explainability_Trace": {
        "AGE": [
            "72 years old",
            "i am 72 years"
        ],
        "LOCATION": [],
        "DISASTER": [
            "flood",
            "floods"
        ],
        "WIDOW": [
            "lost my husband"
        ],
        "DISABILITY": [],
        "OLD_AGE": [],
        "DISTRES

In [16]:
result["Explainability_Trace"]


{'AGE': ['72 years old', 'i am 72 years'],
 'LOCATION': [],
 'DISASTER': ['flood', 'floods'],
 'WIDOW': ['husband passed away'],
 'DISABILITY': [],
 'OLD_AGE': [],
 'DISTRESS': ['no income']}