In [2]:
"""
notebooks/06_agentic_ai_loop.ipynb
Agentic AI Loop - Capstone 3
--------------------------------
Purpose:
 - Demonstrate an agent that orchestrates data analysis tasks:
   1) Load dataset
   2) Run issue detection
   3) Train & run simulation
   4) Call GenAI for summaries
   5) Produce recommendations

Notes:
 - This notebook uses your existing src/ modules:
   src/data_loader.py, src/issue_detection.py, src/scenario_simulation.py,
   src/genai_interface.py, src/agent/memory.py (optional)
 - If LangChain is available it shows an example integration; otherwise it runs a safe local agent.
"""

# -------------------- Imports & Setup --------------------
import os
import sys
import pprint
from pathlib import Path
import pandas as pd
from IPython.display import display, Markdown

# make src importable
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../src")))

# core tools from src
from data_loader import load_csv
from issue_detection import detect_non_compliance, detect_adverse_events, detect_outcome_anomalies, summarize_issues
from scenario_simulation import train_simulation_model, simulate_scenario
from genai_interface import summarize_doctor_notes, generate_regulatory_summary

# optional agent memory
try:
    from agent.memory import Memory
    have_memory = True
except Exception:
    have_memory = False

# Try to import langchain for optional advanced agent (not required)
try:
    from langchain import LLMChain, PromptTemplate
    from langchain.llms import OpenAI  # this is optional; only for demonstration
    have_langchain = True
except Exception:
    have_langchain = False

print("Agent environment:")
print(" - LangChain available:", have_langchain)
print(" - Agent memory available:", have_memory)

# -------------------- Helper utilities --------------------

def get_data_path():
    # robust path detection (works in notebook and script)
    project_root = os.getcwd()
    if "notebooks" in project_root:
        return os.path.abspath(os.path.join(project_root, "../data/clinical_trial_data.csv"))
    return os.path.abspath(os.path.join(project_root, "data/clinical_trial_data.csv"))

def load_data():
    path = get_data_path()
    print("Loading data from:", path)
    df = pd.read_csv(path, parse_dates=["visit_date"])
    df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]
    return df

def pretty_section(title):
    display(Markdown(f"### {title}"))

# -------------------- Agent Implementation --------------------

class SimpleAgent:
    """
    Deterministic local agent that orchestrates analysis steps.
    Uses the src modules (issue detection, simulation, genai) and optional memory.
    """
    def __init__(self, df, memory=None):
        self.df = df
        self.memory = memory
        self.outputs = {}

    def step_detect_issues(self, compliance_threshold=80):
        pretty_section("Detect Issues")
        non_comp = detect_non_compliance(self.df, threshold=compliance_threshold)
        adverse = detect_adverse_events(self.df)
        anomalies = detect_outcome_anomalies(self.df)
        summary = {
            "non_compliance_count": len(non_comp),
            "adverse_event_count": len(adverse),
            "outcome_anomaly_count": len(anomalies)
        }
        self.outputs["issues"] = {
            "non_compliant": non_comp,
            "adverse": adverse,
            "anomalies": anomalies,
            "summary": summary
        }
        print("Issue summary:", summary)
        if self.memory is not None:
            self.memory.add({"issues_summary": summary})
        return summary

    def step_train_simulator(self):
        pretty_section("Train Scenario Simulator")
        try:
            metrics = train_simulation_model(self.df)
            self.outputs["simulator_metrics"] = metrics
            print("Trained regression model metrics:", metrics)
            if self.memory is not None:
                self.memory.add({"simulator_metrics": metrics})
            return metrics
        except Exception as e:
            print("Simulator training failed:", e)
            return {"error": str(e)}

    def step_run_scenarios(self, scenarios=None):
        pretty_section("Run Scenario Simulations")
        if scenarios is None:
            scenarios = [
                {"dosage_change": 10, "compliance_change": 0},
                {"dosage_change": 0, "compliance_change": 10},
                {"dosage_change": 10, "compliance_change": 10},
                {"dosage_change": -10, "compliance_change": -10},
            ]
        results = []
        for s in scenarios:
            try:
                delta = simulate_scenario(s["dosage_change"], s["compliance_change"], self.df)
                results.append({**s, "predicted_delta": delta})
                print(f"Scenario {s} -> ΔOutcome = {delta}")
            except Exception as e:
                results.append({**s, "error": str(e)})
                print("Scenario error:", e)
        self.outputs["scenarios"] = results
        if self.memory is not None:
            self.memory.add({"scenarios": results})
        return results

    def step_genai_summaries(self, notes_limit=200):
        pretty_section("GenAI Summaries")
        notes = self.df.get("doctor_notes", pd.Series([], dtype=object)).dropna().unique().tolist()[:notes_limit]
        if not notes:
            print("No doctor notes found in dataset to summarize.")
            notes_summary = "No doctor notes available."
        else:
            try:
                notes_summary = summarize_doctor_notes(notes)
            except Exception as e:
                notes_summary = f"GenAI summarization failed: {e}"
        # Regulatory summary: auto-compose trial text
        trial_text_parts = []
        trial_text_parts.append(f"Rows: {len(self.df)}")
        if "cohort" in self.df.columns:
            trial_text_parts.append(f"Cohorts: {self.df['cohort'].value_counts().to_dict()}")
        if "compliance_pct" in self.df.columns or "compliance_rate" in self.df.columns:
            col = "compliance_pct" if "compliance_pct" in self.df.columns else "compliance_rate"
            trial_text_parts.append(f"Mean compliance: {self.df[col].mean():.2f}")
        if "adverse_event_flag" in self.df.columns or "adverse_event" in self.df.columns:
            a_col = "adverse_event_flag" if "adverse_event_flag" in self.df.columns else "adverse_event"
            trial_text_parts.append(f"Adverse event count: {int(self.df[a_col].sum())}")
        if "outcome_score" in self.df.columns:
            trial_text_parts.append(f"Mean outcome score: {self.df['outcome_score'].mean():.2f}")
        trial_text = "\n".join(trial_text_parts)
        try:
            regulatory_summary = generate_regulatory_summary(trial_text)
        except Exception as e:
            regulatory_summary = f"Regulatory summary generation failed: {e}"

        self.outputs["notes_summary"] = notes_summary
        self.outputs["regulatory_summary"] = regulatory_summary

        # show small preview
        display(Markdown("**Doctor Notes Summary (preview):**"))
        print(notes_summary[:2000] + ("..." if len(notes_summary) > 2000 else ""))
        display(Markdown("**Regulatory Summary (preview):**"))
        print(regulatory_summary[:2000] + ("..." if len(regulatory_summary) > 2000 else ""))

        if self.memory is not None:
            self.memory.add({"notes_summary": notes_summary, "regulatory_summary": regulatory_summary})

        return {"notes_summary": notes_summary, "regulatory_summary": regulatory_summary}

    def step_recommendations(self):
        pretty_section("Generate Recommendations")
        recs = []
        issues = self.outputs.get("issues", {}).get("summary", {})
        scenarios = self.outputs.get("scenarios", [])
        # High-level rules to create recommendations
        if issues.get("adverse_event_count", 0) > 0:
            recs.append("Investigate patients with recorded adverse events; consider immediate follow-up.")
        if issues.get("non_compliance_count", 0) > 0:
            recs.append("Run adherence interventions for patients below compliance threshold (e.g., reminders).")
        # Use scenario outputs to recommend dosage/compliance actions
        best = None
        for s in scenarios:
            if "predicted_delta" in s:
                if best is None or s["predicted_delta"] > best["predicted_delta"]:
                    best = s
        if best and best.get("predicted_delta", 0) > 0:
            recs.append(f"Consider scenario with dosage change {best['dosage_change']}% and compliance change {best['compliance_change']}% — predicted avg outcome Δ = {best['predicted_delta']}.")
        if not recs:
            recs.append("No immediate actions suggested; continue monitoring and run scheduled analyses.")

        self.outputs["recommendations"] = recs
        if self.memory is not None:
            self.memory.add({"recommendations": recs})

        display(Markdown("**Recommendations:**"))
        for r in recs:
            print("- ", r)
        return recs

    def run_full_loop(self):
        # Orchestrate all steps in order
        self.step_detect_issues()
        self.step_train_simulator()
        self.step_run_scenarios()
        self.step_genai_summaries()
        self.step_recommendations()
        return self.outputs

# -------------------- Run the Agent --------------------
pretty_section("Agentic Loop - Start")

# Load data
df = load_data()
print("Loaded rows:", len(df))

# Create memory if available
memory = Memory() if have_memory else None

agent = SimpleAgent(df=df, memory=memory)
outputs = agent.run_full_loop()

pretty_section("Agentic Loop - Completed")
print("Agent outputs keys:", list(outputs.keys()))

# If memory was used, show last stored items
if memory is not None:
    pretty_section("Agent Memory (recent entries)")
    display(memory.get_all()[-5:])  # show last 5 entries

# -------------------- End of notebook --------------------
display(Markdown("#### Agentic loop finished. You can save outputs or extend the agent to run automatically on schedule."))



Agent environment:
 - LangChain available: True
 - Agent memory available: True


### Agentic Loop - Start

Loading data from: c:\Users\mackrish_malik\Desktop\clinical-insights-assistant\data\clinical_trial_data.csv
Loaded rows: 6000


### Detect Issues

Issue summary: {'non_compliance_count': 930, 'adverse_event_count': 623, 'outcome_anomaly_count': 102}


### Train Scenario Simulator

Trained regression model metrics: {'r2': 0.327, 'mae': 5.321}


### Run Scenario Simulations

Scenario {'dosage_change': 10, 'compliance_change': 0} -> ΔOutcome = 1.59
Scenario {'dosage_change': 0, 'compliance_change': 10} -> ΔOutcome = 2.81
Scenario {'dosage_change': 10, 'compliance_change': 10} -> ΔOutcome = 4.32
Scenario {'dosage_change': -10, 'compliance_change': -10} -> ΔOutcome = -4.16


### GenAI Summaries

**Doctor Notes Summary (preview):**

Okay, here's a summary of the clinical doctor notes organized into the requested categories:

**1. Key Observations:**

*   Patient is currently stable.
*   An adverse reaction was observed.
*   Fatigue is present and being monitored.

**2. Common Adverse Events:**

*   Adverse reaction requiring dosage adjustment.
*   Mild headache.
*   Fatigue.

**3. Positive Improvements:**

*   Symptoms are improving with the current dosage (after adjustment presumably).

**4. Outliers or Anomalies:**

*   The "adverse reaction" is the most significant outlier since it necessitated a dosage adjustment. The nature and severity of this reaction would need further clarification from the full notes.


**Regulatory Summary (preview):**

This randomized, controlled clinical trial involving 6,000 participants (Cohort A: 3,030, Cohort B: 2,970) investigated the efficacy and safety of a novel intervention. The study demonstrated a high level of compliance, with a mean compliance rate of 89.27% across both cohorts. Furthermore, participants exhibited a mean outcome score of 83.24, suggesting a positive impact on the measured outcome. These findings indicate potential clinical benefit associated with the intervention under study.

The safety profile of the intervention was assessed through the monitoring of adverse events. During the trial, a total of 623 adverse events were reported. Further analysis is required to determine the severity, causality, and frequency of these events, including whether they are related to the intervention or other factors. A thorough evaluation of the adverse event data is crucial for a complete understanding of the intervention's safety profile.

Overall, the trial results suggest a promising 

### Generate Recommendations

**Recommendations:**

-  Investigate patients with recorded adverse events; consider immediate follow-up.
-  Run adherence interventions for patients below compliance threshold (e.g., reminders).
-  Consider scenario with dosage change 10% and compliance change 10% — predicted avg outcome Δ = 4.32.


### Agentic Loop - Completed

Agent outputs keys: ['issues', 'simulator_metrics', 'scenarios', 'notes_summary', 'regulatory_summary', 'recommendations']


### Agent Memory (recent entries)

[{'issues_summary': {'non_compliance_count': 930,
   'adverse_event_count': 623,
   'outcome_anomaly_count': 102}},
 {'simulator_metrics': {'r2': 0.327, 'mae': 5.321}},
 {'scenarios': [{'dosage_change': 10,
    'compliance_change': 0,
    'predicted_delta': np.float64(1.59)},
   {'dosage_change': 0,
    'compliance_change': 10,
    'predicted_delta': np.float64(2.81)},
   {'dosage_change': 10,
    'compliance_change': 10,
    'predicted_delta': np.float64(4.32)},
   {'dosage_change': -10,
    'compliance_change': -10,
    'predicted_delta': np.float64(-4.16)}]},
 {'notes_summary': 'Okay, here\'s a summary of the clinical doctor notes organized into the requested categories:\n\n**1. Key Observations:**\n\n*   Patient is currently stable.\n*   An adverse reaction was observed.\n*   Fatigue is present and being monitored.\n\n**2. Common Adverse Events:**\n\n*   Adverse reaction requiring dosage adjustment.\n*   Mild headache.\n*   Fatigue.\n\n**3. Positive Improvements:**\n\n*   Symptoms 

#### Agentic loop finished. You can save outputs or extend the agent to run automatically on schedule.