In [1]:
# ==========================================
# CELL 1: DEEP FLIGHT RECORDER (FULL FIDELITY)
# ==========================================
import os
import json
import time
import uuid
import logging
import copy
from datetime import datetime
from typing import TypedDict, List, Dict, Literal, Any
from langchain_groq import ChatGroq
from langchain_core.messages import SystemMessage, HumanMessage
from langgraph.graph import StateGraph, END

# --- IMPORT CONFIG & MODULES ---
from config import (
    GRAPH_PATH, CHUNKS_PATH, VECTOR_INDEX_PATH, BM25_INDEX_PATH, 
    QUERY_MODEL, SCOUT_MODEL, AUDIT_MODEL, SYNTHESIZE_MODEL,
    GROQ_API_KEY, LOG_FILE_PATH, REPORTS_DIR
)
from rag_engine import OmniRetriever
from web_engine import DeepWebScout, KnowledgeCurator

# --- SETUP LOGGING ---
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)
logging.basicConfig(
    filename=LOG_FILE_PATH,
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%H:%M:%S'
)
logger = logging.getLogger("BRAIN")

class DeepFlightRecorder:
    def __init__(self):
        self.current_run_id = None
        self.run_data = {}
        if not os.path.exists(REPORTS_DIR): os.makedirs(REPORTS_DIR)

    def start_run(self, query):
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.current_run_id = f"trace_{timestamp}"
        self.run_data = {
            "meta": { "run_id": self.current_run_id, "query": query, "timestamp": timestamp },
            "trace_log": []
        }
        print(f"üìº Deep Trace started: {self.current_run_id}")

    def log_event(self, event_type: str, component: str, data: Any, duration_ms: float = 0.0):
        """
        Logs a specific internal event with FULL FIDELITY.
        """
        entry = {
            "type": event_type,
            "component": component,
            "duration_ms": round(duration_ms, 2), # Explicit latency tracking
            # We enforce a Deep Copy to capture the exact state at that millisecond
            # NO TRUNCATION.
            "data": copy.deepcopy(data) if isinstance(data, (dict, list)) else str(data)
        }
        self.run_data["trace_log"].append(entry)

    def save_report(self):
        filename = f"{self.current_run_id}_deep_trace.json"
        filepath = os.path.join(REPORTS_DIR, filename)
        with open(filepath, "w", encoding='utf-8') as f:
            json.dump(self.run_data, f, indent=2, ensure_ascii=False)
        print(f"üíæ Deep Trace Report saved to: {filepath}")
        return filepath

recorder = DeepFlightRecorder()

In [2]:
# CELL 2: INSTRUMENTED TOOLS (STOPWATCH EDITION)
# ==========================================

class TracedOmniRetriever(OmniRetriever):
    def retrieve(self, query, top_k_per_task=5, verbose=False):
        # 1. OPTIMIZATION TRACE
        t_start = time.perf_counter()
        omni = self.optimizer.optimize(query)
        duration = (time.perf_counter() - t_start) * 1000
        
        # Log the full breakdown so we can see the isolated metadata in the UI
        recorder.log_event("QUERY_DECOMPOSITION", "QueryOptimizer", omni, duration)
        
        final_structure = {"original_query": query, "tasks": []}
        
        # 2. ATOMIC EXECUTION TRACE
        for i, task in enumerate(omni['tasks']):
            sub_q = task['sub_query']
            candidates = {} 
            
            # --- A. VECTOR SEARCH (HyDE) ---
            t0 = time.perf_counter()
            vector_hits = []
            if task.get('hyde_passage'):
                emb = self.embedder.encode([task['hyde_passage']], convert_to_numpy=True)
                # Matches the new OmniRetriever logic (k*3)
                D, I = self.index.search(emb, k=top_k_per_task*3)
                for idx in I[0]: 
                    if idx < len(self.chunks): 
                        txt = self.chunks[idx]['text']
                        candidates[txt] = 0.0
                        vector_hits.append(txt) 
            
            vec_dur = (time.perf_counter() - t0) * 1000
            recorder.log_event("SEARCH_VECTOR", f"Task_{i}", {
                "hyde_used": task.get('hyde_passage'), 
                "hit_count": len(vector_hits), 
                "full_results": vector_hits 
            }, vec_dur)

            # --- B. BM25 SEARCH (Keywords) --- 
            # [NEW] Added this block to capture the keyword search
            t0 = time.perf_counter()
            keyword_hits = []
            if task.get('keywords'):
                bm25_query = f"{sub_q} {' '.join(task['keywords'])}"
                tokenized_query = bm25_query.split()
                bm25_docs = self.bm25.get_top_n(tokenized_query, self.chunk_texts, n=top_k_per_task*3)
                for txt in bm25_docs:
                    candidates[txt] = 0.0
                    keyword_hits.append(txt)

            bm25_dur = (time.perf_counter() - t0) * 1000
            recorder.log_event("SEARCH_BM25", f"Task_{i}", {
                "keywords_used": task.get('keywords'),
                "hit_count": len(keyword_hits),
                "full_results": keyword_hits
            }, bm25_dur)

            # --- C. GRAPH SEARCH (Entities) ---
            t0 = time.perf_counter()
            graph_hits = []
            for entity in task.get('graph_entities', []):
                # Exact Match
                if entity in self.graph_engine.G: 
                    facts = self.graph_engine.get_neighbors(entity)
                    for f in facts: 
                        candidates[f] = 0.0
                        graph_hits.append(f)
                # Fuzzy Match (Quick check)
                else:
                    for node in self.graph_engine.G.nodes():
                        if str(node).lower() == entity.lower():
                            facts = self.graph_engine.get_neighbors(node)
                            for f in facts:
                                candidates[f] = 0.0
                                graph_hits.append(f)
                            break
            
            graph_dur = (time.perf_counter() - t0) * 1000
            recorder.log_event("SEARCH_GRAPH", f"Task_{i}", {
                "entities_used": task.get('graph_entities'), 
                "hit_count": len(graph_hits), 
                "full_facts": graph_hits 
            }, graph_dur)
            
            # --- D. RERANKING (Full Scores) ---
            t0 = time.perf_counter()
            unique_docs = list(candidates.keys())
            score_log = []
            results = []

            if unique_docs:
                pairs = [[sub_q, doc] for doc in unique_docs]
                scores = self.reranker.predict(pairs)
                ranked = sorted(list(zip(unique_docs, scores)), key=lambda x: x[1], reverse=True)
                results = ranked[:top_k_per_task]
                
                # LOG FULL SCORES for visualization
                score_log = [{"text": r[0], "score": float(r[1])} for r in results]
                
            rerank_dur = (time.perf_counter() - t0) * 1000
            recorder.log_event("RERANKER_SCORES", f"Task_{i}", score_log, rerank_dur)

            final_structure["tasks"].append({"sub_query": sub_q, "results": results})

        # 3. GLOBAL MERGE (New Step)
        # We use the parent class method for consistency
        combined_context = self._deduplicate_and_flatten(final_structure["tasks"])
        final_structure["combined_context"] = combined_context
        
        return final_structure

class TracedWebScout(DeepWebScout):
    def search_and_extract(self, sub_query: str):
        t0 = time.perf_counter()
        result = super().search_and_extract(sub_query)
        duration = (time.perf_counter() - t0) * 1000
        
        # Capture raw Tavily output structure
        recorder.log_event("WEB_SEARCH_RAW", "DeepWebScout", result, duration)
        return result

# INITIALIZE
print("‚öôÔ∏è  Injecting High-Fidelity Probes...")
traced_retriever = TracedOmniRetriever(GRAPH_PATH, CHUNKS_PATH, VECTOR_INDEX_PATH, BM25_INDEX_PATH, QUERY_MODEL)
traced_scout = TracedWebScout()
print("üöÄ Probes Active.")

‚öôÔ∏è  Injecting High-Fidelity Probes...
üìÇ Loading Resources...
üöÄ Omni-Retriever Ready.
üöÄ Probes Active.


In [3]:
# ==========================================
# CELL 3: THE AGENT NODES
# ==========================================

class BrainState(TypedDict):
    query: str
    internal_knowledge: List[str]
    external_knowledge: List[str]
    gap_analysis: Dict
    final_answer: str

def retrieve_node(state: BrainState):
    query = state["query"]
    print(f"\nüìö [LIBRARIAN] Executing Traced Retrieval...")
    
    t0 = time.perf_counter()
    results = traced_retriever.retrieve(query, top_k_per_task=3)
    duration = (time.perf_counter() - t0) * 1000
    
    evidence = []
    for task in results.get("tasks", []):
        sub_q = task["sub_query"]
        for txt, score in task["results"]:
            evidence.append(f"[Score: {score:.2f}] {txt}")
    
    recorder.log_event("NODE_OUTPUT", "retrieve_node", {"evidence_count": len(evidence)}, duration)
    return {"internal_knowledge": evidence}

def audit_node(state: BrainState):
    print("üïµÔ∏è‚Äç‚ôÇÔ∏è [AUDITOR] Auditing Evidence & Freshness...")
    t0 = time.perf_counter()
    
    query = state["query"]
    evidence_text = "\n".join(state["internal_knowledge"][:20]) # Increased context limit
    
    sys_msg = """
        You are the Gap Analysis & Freshness Auditor.
        Your Job: Evaluate if the provided INTERNAL EVIDENCE is sufficient to answer the USER QUERY fully and accurately.

        ### CRITICAL "FRESHNESS" RULES:
        1. **Assume Stale Data:** Internal data is static. If the user asks for "current," "latest," "2024/2025," "today," or "news," and the evidence does not explicitly contain recent timestamps (last 30 days), you MUST mark it as **INSUFFICIENT**.
        2. **Dynamic Topics:** For queries about volatile topics (stock prices, weather, software versions, recent events), strictly reject internal data unless it is verified as live/real-time.
        3. **Trigger Search:** When rejecting data due to age/freshness, format your `missing_topics` specifically to guide a web search (e.g., use "Current status of X" or "2025 updates for Y").

        ### OUTPUT SCHEMA (Strict JSON):
        { 
            "sufficient": boolean, 
            "missing_topics": [
                "Topic 1 (e.g., 'Latest 2025 features for Python')",
                "Topic 2 (e.g., 'Current stock price of NVDA')"
            ] 
        }
        """
    user_msg = f"QUERY: {query}\n\nINTERNAL EVIDENCE:\n{evidence_text}"
    
    # Init LLM
    audit_llm = ChatGroq(temperature=0, model_name=AUDIT_MODEL, api_key=GROQ_API_KEY)
    
    response = audit_llm.invoke([
        SystemMessage(content=sys_msg),
        HumanMessage(content=user_msg)
    ])
    
    duration = (time.perf_counter() - t0) * 1000
    
    # LOG FULL PROMPT AND RESPONSE
    recorder.log_event("LLM_AUDIT", "AuditNode", {
        "full_system_prompt": sys_msg,
        "full_user_prompt": user_msg,
        "full_response": response.content
    }, duration)
    
    try:
        analysis = json.loads(response.content)
    except:
        analysis = {"sufficient": False, "missing_topics": [query]}
        
    if analysis.get("sufficient"):
        print("   ‚úÖ Evidence is sufficient & fresh.")
    else:
        print(f"   ‚ùå Gaps/Stale Data detected: {analysis.get('missing_topics')}")

    return {"gap_analysis": analysis}

def web_search_node(state: BrainState):
    gaps = state["gap_analysis"].get("missing_topics", [])
    print(f"üåê [SCOUT] Tracing Web Search for {len(gaps)} gaps...")
    external_facts = []
    
    t0 = time.perf_counter()
    for gap in gaps:
        res = traced_scout.search_and_extract(gap)
        if res["status"] == "success":
            external_facts.append(f"[Web: {gap}] {res.get('tavily_answer', '')}")
    duration = (time.perf_counter() - t0) * 1000
            
    recorder.log_event("NODE_OUTPUT", "web_search_node", {"facts_found": len(external_facts)}, duration)
    return {"external_knowledge": external_facts}

def synthesize_node(state: BrainState):
    print("‚úçÔ∏è [SYNTHESIZER] Writing Final Answer...")
    t0 = time.perf_counter()
    
    synth_llm = ChatGroq(temperature=0, model_name=SYNTHESIZE_MODEL, api_key=GROQ_API_KEY)
    
    sys_msg = """
        You are the **Chief Intelligence Officer**. 
        Your mandate is to synthesize fragmented information into a cohesive, executive-level intelligence briefing.

        ### CORE OBJECTIVES:
        1. **Executive Synthesis**: Do not just list facts. Synthesize them into a narrative that directly answers the user's intent. Start with a **Bottom Line Up Front (BLUF)** summary.
        2. **Hybrid Citation Protocol**: You must rigorously attribute every claim to its origin to maintain the chain of custody for information.
        - **Internal Data**: Cite as `[Internal Database]`.
        - **External Web Data**: Cite as `[Source: domain.com]`.
        - **Combined**: If a point is supported by both, use `[Internal Database | Source: domain.com]`.

        ### CONFLICT RESOLUTION:
        - If External and Internal sources conflict, present **both** viewpoints but prioritize the source with the more recent timestamp.
        - Explicitly label discrepancies: *"Note: Internal records indicate X, while recent public reporting suggests Y."*

        ### OUTPUT STRUCTURE:
        ## Executive Summary
        (A 2-3 sentence direct answer.)

        ## Detailed Analysis
        (Structured findings with 
        ## Strategic Implications / Next Steps
        (Actionable insights based on the data.)

        ### CONSTRAINT:
        - Answer ONLY using the provided context. If the context is missing specific details, state: "Insufficient intelligence available regarding [Topic]."
        """
    user_msg = f"""
    QUESTION: {state['query']}
    INTERNAL: {state['internal_knowledge']}
    EXTERNAL: {state.get('external_knowledge', [])}
    """
    
    response = synth_llm.invoke([
        SystemMessage(content=sys_msg),
        HumanMessage(content=user_msg)
    ])
    
    duration = (time.perf_counter() - t0) * 1000
    
    recorder.log_event("LLM_SYNTHESIS", "SynthesizeNode", {
        "full_system_prompt": sys_msg,
        "full_user_prompt": user_msg,
        "full_response": response.content
    }, duration)
    
    return {"final_answer": response.content}

In [4]:
# ==========================================
# CELL 4: EXECUTION
# ==========================================

workflow = StateGraph(BrainState)
workflow.add_node("retrieve", retrieve_node)
workflow.add_node("audit", audit_node)
workflow.add_node("web_search", web_search_node)
workflow.add_node("synthesize", synthesize_node)

workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "audit")
workflow.add_conditional_edges("audit", 
    lambda x: "synthesize" if x["gap_analysis"].get("sufficient") else "web_search",
    {"synthesize": "synthesize", "web_search": "web_search"}
)
workflow.add_edge("web_search", "synthesize")
workflow.add_edge("synthesize", END)
app = workflow.compile()

from IPython.display import Markdown, display

def ask_brain_full_fidelity(question: str):
    recorder.start_run(question)
    
    print(f"\n‚ùì QUERY: {question}\n" + "="*40)
    
    try:
        result = app.invoke({"query": question})
        display(Markdown(result["final_answer"]))
    except Exception as e:
        print(f"‚ùå Error: {e}")
    finally:
        path = recorder.save_report()
        print(f"\nüìÑ FULL FIDELITY LOG: {path}")

In [7]:
%%time
# TEST
ask_brain_full_fidelity("Why are newborns described as being physiologically immunodeficient?")

üìº Deep Trace started: trace_20260118_113418

‚ùì QUERY: Why are newborns described as being physiologically immunodeficient?

üìö [LIBRARIAN] Executing Traced Retrieval...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

üïµÔ∏è‚Äç‚ôÇÔ∏è [AUDITOR] Auditing Evidence & Freshness...
   ‚úÖ Evidence is sufficient & fresh.
‚úçÔ∏è [SYNTHESIZER] Writing Final Answer...


## Executive Summary  
Newborns are termed **physiologically immunodeficient** because at birth they rely almost entirely on maternally‚Äëderived antibodies and possess an immature innate and adaptive immune system that lacks sufficient cell‚Äëmediated responses, complement activity, and high‚Äëaffinity antibody production. This transient deficiency makes them especially vulnerable to infection until their own immune components mature over the first year of life.‚ÄØ[Internal Database]

## Detailed Analysis  

| Aspect | Why it contributes to physiological immunodeficiency | Evidence |
|--------|------------------------------------------------------|----------|
| **Absence of self‚Äëproduced immunoglobulins** | IgM, IgD, IgE and IgA do not cross the placenta, so newborns have virtually no endogenous antibodies at birth; they depend on maternal IgG transferred via the FcRn receptor. | ‚ÄúAt birth, most of the immunoglobulin present is maternal IgG‚Ä¶ IgM, IgD, IgE and IgA do not cross the placenta, they are almost undetectable at birth.‚Äù‚ÄØ[Internal Database] |
| **Limited duration and low affinity of passive antibodies** | Maternal IgG provides short‚Äëterm protection (up to ~18‚ÄØmonths) but is of low affinity and wanes quickly; breast‚Äëmilk IgA offers only mucosal protection for a few months. | ‚ÄúThese passively‚Äëacquired antibodies can protect the newborn for up to 18 months, but their response is usually short‚Äëlived and of low affinity.‚Äù‚ÄØ[Internal Database] |
| **Immature T‚Äëcell responses** | Neonatal T‚Äëcells respond poorly to Th1‚Äëtype vaccines; the Th1/Th2 balance is skewed, limiting cell‚Äëmediated immunity against intracellular pathogens. | ‚ÄúVaccines that induce Th1 responses in adults do not readily elicit these same responses in neonates.‚Äù‚ÄØ[Internal Database] |
| **Underdeveloped innate functions** | Newborns have reduced complement activity, phagocyte function, and cytokine production, all of which are essential for early pathogen clearance. | ‚ÄúDiets lacking sufficient protein are associated with impaired cell‚Äëmediated immunity, complement activity, phagocyte function, IgA antibody concentrations, and cytokine production.‚Äù‚ÄØ[Internal Database] |
| **Thymic immaturity** | The thymus is relatively small and its output of na√Øve T cells is limited; any early loss (genetic or surgical) leads to severe immunodeficiency. | ‚ÄúThe loss of the thymus at an early age‚Ä¶ results in severe immunodeficiency and a high susceptibility to infection.‚Äù‚ÄØ[Internal Database] |
| **No prior microbial exposure** | Without previous antigen encounters, newborns lack memory B and T cells, reducing the speed and specificity of adaptive responses. | ‚ÄúNewborn infants have no prior exposure to microbes and are particularly vulnerable to infection.‚Äù‚ÄØ[Internal Database] |
| **Maternal antibody interference** | Existing maternal IgG can suppress the infant‚Äôs own antibody response to vaccination (immune tolerance), further delaying active immunity. | ‚ÄúPassively acquired maternal antibodies can suppress the antibody response to active immunization.‚Äù‚ÄØ[Internal Database] |

Collectively, these factors create a **physiological state of immunodeficiency** that is normal for the neonatal period but resolves as the infant‚Äôs immune system matures (generally by 6‚Äì12‚ÄØmonths for protein antigens and later for polysaccharide antigens).‚ÄØ[Internal Database]

## Strategic Implications / Next Steps  

1. **Vaccination Scheduling** ‚Äì Align immunization programs with the known maturation windows (e.g., prioritize protein‚Äëbased vaccines early, defer polysaccharide vaccines until ‚â•12‚ÄØmonths).  
2. **Passive Immunity Augmentation** ‚Äì Consider prophylactic administration of immunoglobulin preparations or monoclonal antibodies for high‚Äërisk neonates (e.g., preterm, immunocompromised).  
3. **Maternal Immunization** ‚Äì Boost maternal IgG levels through vaccination during pregnancy to enhance passive protection for the newborn.  
4. **Monitoring & Early Intervention** ‚Äì Implement heightened surveillance for infections in the first months of life, especially in settings with malnutrition or thymic compromise.  
5. **Research Priorities** ‚Äì Investigate adjuvants or vaccine platforms that can safely elicit robust Th1 responses in neonates, overcoming the intrinsic T‚Äëcell bias.  

These actions leverage the understanding that newborn immunodeficiency is a predictable, time‚Äëlimited condition, allowing targeted interventions to bridge the gap until the infant‚Äôs own immune competence is established.‚ÄØ[Internal Database]

üíæ Deep Trace Report saved to: ./models/run_reports/trace_20260118_113418_deep_trace.json

üìÑ FULL FIDELITY LOG: ./models/run_reports/trace_20260118_113418_deep_trace.json
CPU times: user 688 ms, sys: 87.5 ms, total: 775 ms
Wall time: 6.16 s
