In [120]:
# ============================================================
# STAGE 1: TEXT NORMALIZATION ENGINE (ABSOLUTE PEAK)
# ============================================================

import re
import unicodedata
from typing import Dict, List


class TextNormalizer:
    """
    High-assurance, deterministic text normalization engine
    for noisy pension & disaster-response documents.

    Properties:
    - OCR-robust
    - Unicode-safe
    - Domain-aware
    - Word-boundary safe
    - Explainable (trace)
    - Confidence-aware (noise score)
    - Zero ML, zero ambiguity
    """

    def __init__(self):
        # ----------------------------------------------------
        # DOMAIN-SAFE CANONICAL REPLACEMENTS
        # ----------------------------------------------------
        self.replacements = {
            r"\byrs\b": "years",
            r"\byr\b": "year",
            r"\bage\b": "age",
            r"\bgovt\b": "government",
            r"\bdept\b": "department",
            r"\bdistt\b": "district",
            r"\baddr\b": "address",
            r"\bwid\b": "widow",
            r"\bexpired\b": "passed away",
            r"\bno income\b": "no income",
            r"\bjobless\b": "no income"
        }

        self._compiled_replacements = [
            (re.compile(p), r) for p, r in self.replacements.items()
        ]

        # ----------------------------------------------------
        # STRUCTURAL & NUMERIC NORMALIZATION
        # ----------------------------------------------------
        self.age_pattern = re.compile(
            r"\b(\d{1,3})\s*[-]?\s*(year|years|yrs)\b"
        )

        # ----------------------------------------------------
        # UNICODE & OCR NOISE CONTROL
        # ----------------------------------------------------
        self.illegal_chars = re.compile(
            r"[^\w\s\.\,\;\:\!\?\-]"
        )

        self.repeated_punct = re.compile(
            r"([.!?]){2,}"
        )

        self.field_separator = re.compile(
            r"\s*[:\-]\s*"
        )

        self.whitespace = re.compile(r"\s+")

        # ----------------------------------------------------
        # HARD NOISE INDICATORS (CONFIDENCE PENALTY)
        # ----------------------------------------------------
        self.noise_patterns = [
            re.compile(p) for p in [
                r"\?\?\?",
                r"unknown",
                r"illegible",
                r"n/a",
                r"xxx",
                r"cannot read",
                r"blurred"
            ]
        ]

    # ========================================================
    # NORMALIZATION PIPELINE
    # ========================================================

    def normalize(self, text: str) -> Dict[str, object]:
        """
        Returns:
        {
            'normalized_text': str,
            'normalization_trace': List[str],
            'noise_score': float
        }
        """

        trace: List[str] = []
        noise_score = 0.0

        # 0. Unicode canonicalization (OCR & PDF safe)
        text = unicodedata.normalize("NFKD", text)
        trace.append("unicode normalized")

        # 1. Case normalization
        text = text.lower()
        trace.append("lowercased")

        # 2. Normalize field separators (semi-structured forms)
        if self.field_separator.search(text):
            text = self.field_separator.sub(": ", text)
            trace.append("normalized field separators")

        # 3. Remove illegal / OCR characters
        if self.illegal_chars.search(text):
            text = self.illegal_chars.sub(" ", text)
            trace.append("removed OCR noise")

        # 4. Collapse repeated punctuation
        if self.repeated_punct.search(text):
            text = self.repeated_punct.sub(r"\1", text)
            trace.append("collapsed repeated punctuation")

        # 5. Canonical lexical replacements (boundary-safe)
        for pattern, replacement in self._compiled_replacements:
            if pattern.search(text):
                text = pattern.sub(replacement, text)
                trace.append(f"canonicalized {pattern.pattern}")

        # 6. Normalize numeric age expressions
        if self.age_pattern.search(text):
            text = self.age_pattern.sub(r"\1 years", text)
            trace.append("normalized age expressions")

        # 7. Whitespace normalization
        text = self.whitespace.sub(" ", text).strip()
        trace.append("normalized whitespace")

        # 8. Noise scoring (confidence penalty)
        for p in self.noise_patterns:
            if p.search(text):
                noise_score += 0.15

        noise_score = min(noise_score, 1.0)

        return {
            "normalized_text": text,
            "normalization_trace": trace,
            "noise_score": round(noise_score, 2)
        }


In [121]:
# ============================================================
# PART 2 — FINAL REFINED ADAPTIVE SEMANTIC LEXICON SYSTEM
# ============================================================

import re
from collections import defaultdict
from typing import List, Dict, Set

# ============================================================
# SEMANTIC ROLE ONTOLOGY
# ============================================================

SEMANTIC_ROLES = {
    "AGE", "LOCATION", "DISASTER",
    "WIDOW", "DISABILITY", "OLD_AGE", "DISTRESS"
}

# ============================================================
# SEED SEMANTIC LEXICON (IMMUTABLE ANCHORS)
# ============================================================

class SeedSemanticLexicon:
    """
    High-precision semantic anchors.
    Seeds are immutable and role-specific.
    """

    AGE_PATTERNS = [
        r"\b(\d{2})\s+year[s]?\s+old\b",
        r"\bat\s+the\s+age\s+of\s+(\d{2})\b",
        r"\baged\s+(\d{2})\b",
        r"\bi\s+am\s+(\d{2})\s+years\b"
    ]

    # ✅ FIX: LOCATION PATTERN (REQUIRED BY EXTRACTOR)
    LOCATION_PATTERN = r"\b(live in\s+)?([a-z ]{3,})\s+(district|city|village)\b"

    DISASTER_SEEDS = {
        "Flood": {"flood", "floods", "flooding"},
        "Cyclone": {"cyclone", "storm", "cyclonic"},
        "Fire": {"fire", "burnt", "burned"},
        "Earthquake": {"earthquake", "tremor", "seismic"}
    }

    WIDOW_SEEDS = {
        "widow", "widowed",
        "husband passed away",
        "lost my husband"
    }

    DISABILITY_SEEDS = {
        "disabled",
        "unable to work",
        "physically impaired",
        "medically unfit"
    }

    OLD_AGE_SEEDS = {
        "too old to work",
        "elderly",
        "advanced age"
    }

    DISTRESS_SEEDS = {
        "no income",
        "lost everything",
        "urgent help",
        "dependent on support"
    }


# ============================================================
# LEXICON MEMORY (STABLE, SERIALIZABLE)
# ============================================================

class LexiconMemory:
    """
    Stores learned phrases with stability metadata.
    Safe for persistence inside model pickle.
    """

    def __init__(self):
        self.memory: Dict[str, Dict[str, dict]] = defaultdict(dict)

    def add(self, role: str, phrase: str, meta: dict):
        if phrase not in self.memory[role]:
            self.memory[role][phrase] = meta
        else:
            self.memory[role][phrase]["freq"] += meta["freq"]
            self.memory[role][phrase]["docs"] += meta["docs"]

    def get(
        self,
        role: str,
        min_freq: int = 3,
        min_docs: int = 2,
        min_purity: float = 0.7
    ) -> Set[str]:
        return {
            p for p, m in self.memory[role].items()
            if (
                m["freq"] >= min_freq and
                m["docs"] >= min_docs and
                m["purity"] >= min_purity
            )
        }

    def is_cross_role(self, phrase: str, role: str) -> bool:
        for r, phrases in self.memory.items():
            if r != role and phrase in phrases:
                return True
        return False


# ============================================================
# STABILITY-AWARE LEXICON EXPANSION ENGINE
# ============================================================

class StableLexiconExpansionEngine:
    """
    Adaptive lexicon expansion using:
    - frequency
    - role purity
    - causal context validation
    - cross-role isolation
    """

    CAUSAL_MARKERS = {
        "because", "due to", "after", "since", "as a result"
    }

    def __init__(
        self,
        min_freq: int = 3,
        min_role_purity: float = 0.7,
        max_ngram: int = 5
    ):
        self.min_freq = min_freq
        self.min_role_purity = min_role_purity
        self.max_ngram = max_ngram

    def mine_candidates(
        self,
        clauses: List[str],
        seeds: Set[str]
    ) -> Dict[str, List[str]]:

        candidates = defaultdict(list)

        for clause in clauses:
            if not any(seed in clause for seed in seeds):
                continue

            tokens = clause.split()
            for n in range(2, self.max_ngram + 1):
                for i in range(len(tokens) - n + 1):
                    phrase = " ".join(tokens[i:i+n])
                    candidates[phrase].append(clause)

        return candidates

    def role_purity(self, contexts: List[str], seeds: Set[str]) -> float:
        return sum(
            1 for c in contexts if any(s in c for s in seeds)
        ) / len(contexts)

    def causal_hits(self, contexts: List[str]) -> int:
        return sum(
            1 for c in contexts
            if any(m in c for m in self.CAUSAL_MARKERS)
        )

    def promote(
        self,
        candidates: Dict[str, List[str]],
        role: str,
        seeds: Set[str],
        memory: LexiconMemory
    ):
        for phrase, contexts in candidates.items():

            if len(contexts) < self.min_freq:
                continue

            purity = self.role_purity(contexts, seeds)
            if purity < self.min_role_purity:
                continue

            if self.causal_hits(contexts) == 0:
                continue

            if memory.is_cross_role(phrase, role):
                continue

            memory.add(role, phrase, {
                "freq": len(contexts),
                "docs": len(set(contexts)),
                "purity": round(purity, 2)
            })

    def update_from_text(self, text, segmenter, memory: LexiconMemory):
        clauses = [
            c for s in segmenter.split_sentences(text)
            for c in segmenter.split_clauses(s)
        ]

        self.promote(
            self.mine_candidates(clauses, SeedSemanticLexicon.WIDOW_SEEDS),
            "WIDOW", SeedSemanticLexicon.WIDOW_SEEDS, memory
        )
        self.promote(
            self.mine_candidates(clauses, SeedSemanticLexicon.DISABILITY_SEEDS),
            "DISABILITY", SeedSemanticLexicon.DISABILITY_SEEDS, memory
        )
        self.promote(
            self.mine_candidates(clauses, SeedSemanticLexicon.OLD_AGE_SEEDS),
            "OLD_AGE", SeedSemanticLexicon.OLD_AGE_SEEDS, memory
        )
        self.promote(
            self.mine_candidates(clauses, SeedSemanticLexicon.DISTRESS_SEEDS),
            "DISTRESS", SeedSemanticLexicon.DISTRESS_SEEDS, memory
        )


# ============================================================
# ADAPTIVE SEMANTIC SIGNAL EXTRACTOR (STAGE 2.5)
# ============================================================

class LinguisticSegmenter:
    SENTENCE_SPLIT_REGEX = r"[.!?]"
    CLAUSE_SPLIT_REGEX = r",|;| and | but | because | since "

    def split_sentences(self, text: str) -> List[str]:
        return [
            s.strip()
            for s in re.split(self.SENTENCE_SPLIT_REGEX, text)
            if s.strip()
        ]

    def split_clauses(self, sentence: str) -> List[str]:
        return [
            c.strip()
            for c in re.split(self.CLAUSE_SPLIT_REGEX, sentence)
            if c.strip()
        ]


class ContextAnalyzer:
    NEGATION_TERMS = {"not", "never", "no longer", "without"}

    def is_negated(self, text: str, phrase: str) -> bool:
        idx = text.find(phrase)
        if idx == -1:
            return False
        window = text[max(0, idx - 40):idx]
        return any(n in window for n in self.NEGATION_TERMS)


class SemanticSignal:
    def __init__(self, signal_type: str, surface_form: str, sentence: str, clause: str):
        self.signal_type = signal_type
        self.surface_form = surface_form
        self.sentence = sentence
        self.clause = clause
        self.base_weight = 1.0


class SemanticEvidenceGraph:
    def __init__(self):
        self.nodes = []
        self.edges = defaultdict(list)

    def add_signal(self, signal: SemanticSignal) -> int:
        idx = len(self.nodes)
        self.nodes.append(signal)
        return idx

    def auto_connect(self):
        for i in range(len(self.nodes)):
            for j in range(i + 1, len(self.nodes)):
                if (
                    self.nodes[i].sentence == self.nodes[j].sentence
                    or self.nodes[i].clause == self.nodes[j].clause
                ):
                    self.edges[i].append(j)
                    self.edges[j].append(i)


class AdaptiveSemanticSignalExtractor:
    """
    Deterministic, explainable semantic signal extraction engine.
    """

    def __init__(self):
        self.seed = SeedSemanticLexicon()
        self.memory = LexiconMemory()
        self.expander = StableLexiconExpansionEngine()
        self.segmenter = LinguisticSegmenter()
        self.context = ContextAnalyzer()

    def adapt(self, text: str):
        self.expander.update_from_text(text, self.segmenter, self.memory)

    def extract_signals(self, text: str) -> SemanticEvidenceGraph:
        graph = SemanticEvidenceGraph()

        for sentence in self.segmenter.split_sentences(text):
            for clause in self.segmenter.split_clauses(sentence):

                for pattern in self.seed.AGE_PATTERNS:
                    m = re.search(pattern, clause)
                    if m:
                        graph.add_signal(
                            SemanticSignal("AGE", m.group(0), sentence, clause)
                        )

                m = re.search(self.seed.LOCATION_PATTERN, clause)
                if m:
                    phrase = m.group(0).replace("live in", "").strip()
                    graph.add_signal(
                        SemanticSignal("LOCATION", phrase, sentence, clause)
                    )

                for seeds in self.seed.DISASTER_SEEDS.values():
                    for term in seeds:
                        if term in clause:
                            graph.add_signal(
                                SemanticSignal("DISASTER", term, sentence, clause)
                            )

                for role, seeds in {
                    "WIDOW": self.seed.WIDOW_SEEDS,
                    "DISABILITY": self.seed.DISABILITY_SEEDS,
                    "OLD_AGE": self.seed.OLD_AGE_SEEDS
                }.items():
                    for term in seeds:
                        if term in clause and not self.context.is_negated(clause, term):
                            graph.add_signal(
                                SemanticSignal(role, term, sentence, clause)
                            )

                for term in self.seed.DISTRESS_SEEDS:
                    if term in clause:
                        graph.add_signal(
                            SemanticSignal("DISTRESS", term, sentence, clause)
                        )

        graph.auto_connect()
        return graph


In [122]:
# ============================================================
# PART 3 — ULTRA-REFINED SEMANTIC REASONING ENGINE
# ============================================================

import math
import re
from collections import defaultdict

# ============================================================
# 11. GRAPH ENERGY ANALYZER (EVIDENCE-WEIGHTED)
# ============================================================

class GraphEnergyAnalyzer:
    """
    Computes semantic energy using:
    - base signal weight
    - connectivity
    - evidence diversity (anti-noise)
    """

    def __init__(self, graph: "SemanticEvidenceGraph"):
        self.graph = graph

    def signal_energy(self, idx: int) -> float:
        node = self.graph.nodes[idx]
        neighbors = self.graph.edges.get(idx, [])

        # Penalize signals connected to many different roles
        connected_roles = {
            self.graph.nodes[j].signal_type for j in neighbors
        }

        diversity_penalty = 1 / max(len(connected_roles), 1)
        connectivity = math.log1p(len(neighbors))

        return node.base_weight * (0.6 + connectivity) * diversity_penalty

    def role_energy(self, role: str) -> float:
        energies = [
            self.signal_energy(i)
            for i, n in enumerate(self.graph.nodes)
            if n.signal_type == role
        ]

        # Saturation control (diminishing returns)
        return sum(energies) ** 0.85 if energies else 0.0


# ============================================================
# 12. PENSION INTENT RESOLUTION ENGINE (UNCERTAINTY-AWARE)
# ============================================================

class PensionIntentResolver:
    """
    Resolves pension intent with explicit uncertainty handling.
    """

    def __init__(self, graph: "SemanticEvidenceGraph"):
        self.energy = GraphEnergyAnalyzer(graph)

    def resolve(self):
        energies = {
            "Widow Pension": self.energy.role_energy("WIDOW"),
            "Disability Pension": self.energy.role_energy("DISABILITY"),
            "Old Age Pension": self.energy.role_energy("OLD_AGE")
        }

        if all(v == 0 for v in energies.values()):
            return (
                "Unknown",
                0.3,
                "no qualifying vulnerability evidence",
                energies
            )

        ranked = sorted(
            energies.items(), key=lambda x: x[1], reverse=True
        )

        top_role, top_energy = ranked[0]
        second_energy = ranked[1][1]

        dominance = top_energy - second_energy

        # Ambiguity handling
        if dominance < 0.25:
            return (
                top_role,
                0.55,
                "competing vulnerability evidence detected",
                energies
            )

        confidence = min(0.65 + dominance, 0.95)

        return (
            top_role,
            confidence,
            "resolved via dominant semantic evidence",
            energies
        )


# ============================================================
# 13. DISASTER EVENT RESOLUTION ENGINE (CONSENSUS-BASED)
# ============================================================

class DisasterResolver:
    """
    Resolves disaster event using
    energy + consensus reinforcement.
    """

    def __init__(self, graph: "SemanticEvidenceGraph"):
        self.graph = graph
        self.energy = GraphEnergyAnalyzer(graph)

    def resolve(self):
        scores = defaultdict(list)

        for i, node in enumerate(self.graph.nodes):
            if node.signal_type == "DISASTER":
                scores[node.surface_form].append(
                    self.energy.signal_energy(i)
                )

        if not scores:
            return "Unknown", 0.0, "no disaster evidence"

        phrase_scores = {
            k: sum(v) * (1 + math.log1p(len(v)))
            for k, v in scores.items()
        }

        best = max(phrase_scores, key=phrase_scores.get)
        strength = phrase_scores[best]

        confidence = min(0.7 + 0.15 * strength, 0.95)

        return best, confidence, "resolved by reinforced disaster consensus"


# ============================================================
# 14. ATTRIBUTE RESOLVER (ROBUST AGGREGATION)
# ============================================================

class AttributeResolver:
    """
    Resolves factual attributes using redundancy tolerance.
    """

    def __init__(self, graph: "SemanticEvidenceGraph"):
        self.graph = graph

    def resolve_age(self):
        ages = []

        for n in self.graph.nodes:
            if n.signal_type == "AGE":
                m = re.search(r"\d{2}", n.surface_form)
                if m:
                    ages.append(int(m.group()))

        if not ages:
            return "Unknown", 0.0, "age not mentioned"

        age = round(sum(ages) / len(ages))
        confidence = min(0.9 + 0.02 * len(ages), 0.97)

        return age, confidence, "aggregated age evidence"

    def resolve_location(self):
        locations = [
            n.surface_form for n in self.graph.nodes
            if n.signal_type == "LOCATION"
        ]

        if not locations:
            return "Unknown", 0.0, "location not mentioned"

        return locations[0], 0.85, "explicit location phrase"


# ============================================================
# 15. DISTRESS SEVERITY ANALYZER (EVIDENCE-DENSITY BASED)
# ============================================================

class DistressSeverityAnalyzer:
    """
    Computes distress severity using
    density and reinforcement.
    """

    def __init__(self, graph: "SemanticEvidenceGraph"):
        self.energy = GraphEnergyAnalyzer(graph)
        self.graph = graph

    def severity_score(self):
        energies = [
            self.energy.signal_energy(i)
            for i, n in enumerate(self.graph.nodes)
            if n.signal_type == "DISTRESS"
        ]

        if not energies:
            return 0.25

        severity = 0.3 + 0.2 * math.log1p(sum(energies))
        return min(severity, 1.0)


# ============================================================
# 16. EXPLAINABILITY ENGINE (DECISION-CENTRIC)
# ============================================================

class ExplainabilityEngine:
    """
    Generates decision-aligned explanations.
    """

    def __init__(self, graph: "SemanticEvidenceGraph"):
        self.graph = graph

    def evidence_by_role(self, role: str):
        return [
            n.surface_form for n in self.graph.nodes
            if n.signal_type == role
        ]

    def full_trace(self):
        return {
            role: self.evidence_by_role(role)
            for role in SEMANTIC_ROLES
        }


# ============================================================
# 17. FULL SEMANTIC DECISION ENGINE (FINAL)
# ============================================================

class SemanticDecisionEngine:
    """
    End-to-end semantic reasoning engine.
    """

    def __init__(self, graph: "SemanticEvidenceGraph"):
        self.graph = graph
        self.intent = PensionIntentResolver(graph)
        self.disaster = DisasterResolver(graph)
        self.attributes = AttributeResolver(graph)
        self.distress = DistressSeverityAnalyzer(graph)
        self.explainer = ExplainabilityEngine(graph)

    def decide(self) -> dict:
        age, age_c, age_r = self.attributes.resolve_age()
        loc, loc_c, loc_r = self.attributes.resolve_location()

        pension, pen_c, pen_r, pen_debug = self.intent.resolve()
        disaster, dis_c, dis_r = self.disaster.resolve()
        severity = self.distress.severity_score()

        return {
            "Age": {"value": age, "confidence": age_c, "reason": age_r},
            "Location": {"value": loc, "confidence": loc_c, "reason": loc_r},
            "Pension_Type": {
                "value": pension,
                "confidence": pen_c,
                "reason": pen_r,
                "debug_energy": pen_debug
            },
            "Disaster_Event": {
                "value": disaster,
                "confidence": dis_c,
                "reason": dis_r
            },
            "Disaster_Severity": severity,
            "Explainability_Trace": self.explainer.full_trace()
        }


In [123]:
# ============================================================
# PART 4 — SEER-NLP MASTER MODEL & DEPLOYMENT LAYER (FINAL)
# ============================================================

import pickle
import hashlib
from typing import Iterable, List, Dict


# ============================================================
# 18. SEER-NLP MASTER MODEL (FULL PIPELINE)
# ============================================================

class SEERNLPModel:
    """
    SEER-NLP Master Model

    Orchestrates:
    - Text normalization
    - Adaptive semantic extraction
    - Evidence graph construction
    - Semantic reasoning
    - Explainability generation

    This object is serialized for deployment.
    """

    def __init__(self):
        # Core NLP components
        self.normalizer = TextNormalizer()
        self.signal_extractor = AdaptiveSemanticSignalExtractor()

        # Metadata
        self.version = "SEER-NLP v1.1"
        self.schema_version = "1.1"
        self.description = (
            "Semantic Explainable Extraction & Reasoning "
            "for Pension Applications in Disaster Response"
        )

        self.model_fingerprint = self._build_fingerprint()

    # --------------------------------------------------------
    # MODEL FINGERPRINT (REPRODUCIBILITY)
    # --------------------------------------------------------

    def _build_fingerprint(self) -> str:
        signature = (
            self.version +
            self.schema_version +
            self.description
        )
        return hashlib.sha256(signature.encode()).hexdigest()[:12]

    # --------------------------------------------------------
    # SINGLE DOCUMENT INFERENCE
    # --------------------------------------------------------

    def infer(self, raw_text: str, adapt: bool = False) -> Dict:
        """
        Perform full semantic extraction & reasoning
        on a single application narrative.
        """

        # Step 1: Normalize
        norm = self.normalizer.normalize(raw_text)
        text = norm["normalized_text"]

        # Step 2: Optional controlled adaptation
        if adapt:
            self.signal_extractor.adapt(text)

        # Step 3: Semantic extraction
        graph = self.signal_extractor.extract_signals(text)

        # Step 4: Semantic reasoning
        decision_engine = SemanticDecisionEngine(graph)
        result = decision_engine.decide()

        # Step 5: Attach governance metadata
        result["_meta"] = {
            "model_version": self.version,
            "schema_version": self.schema_version,
            "model_fingerprint": self.model_fingerprint,
            "adaptation_used": adapt,
            "normalization_trace": norm["normalization_trace"],
            "noise_score": norm["noise_score"]
        }

        return result

    # --------------------------------------------------------
    # BATCH INFERENCE
    # --------------------------------------------------------

    def infer_batch(
        self,
        texts: Iterable[str],
        adapt: bool = False
    ) -> List[Dict]:

        return [self.infer(t, adapt=adapt) for t in texts]


# ============================================================
# 19. MODEL SERIALIZATION & LOADING
# ============================================================

class ModelPersistence:

    @staticmethod
    def save(model: SEERNLPModel, path: str):
        with open(path, "wb") as f:
            pickle.dump(model, f)
        print(f"✅ SEER-NLP model saved to: {path}")

    @staticmethod
    def load(path: str) -> SEERNLPModel:
        with open(path, "rb") as f:
            model = pickle.load(f)
        print(f"✅ SEER-NLP model loaded from: {path}")
        return model


# ============================================================
# 20. SERVICE WRAPPER (STREAMLIT / API)
# ============================================================

class SEERNLPService:

    def __init__(self, model_path: str):
        self.model = ModelPersistence.load(model_path)

    def process_text(self, text: str) -> Dict:
        return self.model.infer(text, adapt=False)

    def process_text_with_adaptation(self, text: str) -> Dict:
        return self.model.infer(text, adapt=True)


# ============================================================
# 21. ONE-TIME MODEL BUILD SCRIPT
# ============================================================

def build_and_save_seer_model(
    output_path: str = "seer_nlp_model.pkl"
):
    model = SEERNLPModel()
    ModelPersistence.save(model, output_path)


# ============================================================
# 22. EXAMPLE USAGE
# ============================================================

if __name__ == "__main__":

    sample_text = """
    I am 72 years old and live in Gaya district.
    After floods damaged my house, I lost my husband
    and now have no income to survive.
    """

    build_and_save_seer_model("seer_nlp_model.pkl")

    service = SEERNLPService("seer_nlp_model.pkl")
    output = service.process_text(sample_text)

    import json
    print(json.dumps(output, indent=4))


✅ SEER-NLP model saved to: seer_nlp_model.pkl
✅ SEER-NLP model loaded from: seer_nlp_model.pkl
{
    "Age": {
        "value": 72,
        "confidence": 0.9400000000000001,
        "reason": "aggregated age evidence"
    },
    "Location": {
        "value": "gaya district",
        "confidence": 0.85,
        "reason": "explicit location phrase"
    },
    "Pension_Type": {
        "value": "Widow Pension",
        "confidence": 0.95,
        "reason": "resolved via dominant semantic evidence",
        "debug_energy": {
            "Widow Pension": 0.9941721018101672,
            "Disability Pension": 0.0,
            "Old Age Pension": 0.0
        }
    },
    "Disaster_Event": {
        "value": "flood",
        "confidence": 0.868154434864613,
        "reason": "resolved by reinforced disaster consensus"
    },
    "Disaster_Severity": 0.4379429774509145,
    "Explainability_Trace": {
        "OLD_AGE": [],
        "DISTRESS": [
            "no income"
        ],
        "DISASTER"