#### Warning:
To run this must be in the "blackwell" package root directory.

In [None]:
import json
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
from IPython.display import clear_output
from langchain_core.messages import HumanMessage
from blackwell.anamnesis import AnamnesisAgent
from blackwell.evaluator import EvaluatorAgent
from blackwell.prompts import ai_patient_prompt
from blackwell.config import fast_model, agent_model, pro_model, logger
from evaluation.aip import AiPatient


## Anamnesis Procedure Evaluation

In [None]:
# Load the dataset, shuffle, and split into X and y
dataset = pd.read_csv("evaluation/dataset/Training.csv")
dataset = dataset.sample(frac=1, random_state=44).reset_index(drop=True)
X = dataset.iloc[0:, :-1].reset_index(drop=True) #Symptoms
y = dataset.iloc[0:, -1].reset_index(drop=True) #Diagnosis

In [11]:
try:
    df = pd.read_csv("evaluation/anamnesis_output.csv")
except FileNotFoundError:
    df = pd.DataFrame()
len(df)

250

In [None]:
# Emergency words to halt the anamnesis if detected
emergency_words = ["emergency", "immediately", "urgent"]

In [None]:
# Run the anamnesis simulation for each patient case
number_of_cases = 250
for i in tqdm(range(number_of_cases)):
    verbose = False
    if X.iloc[i]['coma'] == 1:
        print("Skipping coma case for safety.")
        continue
    symptoms = X.iloc[i].to_json()
    condition = y.iloc[i]
    # Generate patient profile
    patient_profile = ai_patient_prompt.content.format(PATIENT_PROFILE=symptoms, PATIENT_CONDITION=condition)
    config = {"configurable": {"thread_id": np.random.randint(10, 100000)}}

    # Start the anamnesis interaction loop between AiPatient and AnamnesisAgent
    p_result = AiPatient.invoke({"messages": "Initiate your introduction." , "profile": patient_profile}, config)
    result = AnamnesisAgent.invoke({"messages": [HumanMessage(content=p_result['messages'][-1].content)], "function": "chat", "final_report": None, "documents_report": None}, config)

    # Continue the interaction until a final report is generated or max exchanges reached
    while (result["final_report"] is None) and (len(result["messages"]) < 35):
        p_result = AiPatient.invoke({"messages": [HumanMessage(content=result['messages'][-1].content)], "profile": patient_profile}, config)
        print("Patient Response:") if verbose else None
        print(p_result['messages'][-1].content) if verbose else None
        result = AnamnesisAgent.invoke({"messages": [HumanMessage(content=p_result['messages'][-1].content)]}, config)
        print("Agent Response:") if verbose else None
        print(result['messages'][-1].content) if verbose else None
        report = result["final_report"]
        if any(word in result['messages'][-1].content.lower() for word in emergency_words):
            print("Emergency detected, going to next case.")
            report = "EMERGENCY DETECTED. Anamnesis aborted."
            break
        # Time delay to avoid rate limiting
        #time.sleep(10)
    if len(result["messages"]) >= 35:
        print("Max exchanges reached, moving to next case.")
        continue

    print("True Condition:", condition)
    print("Anamnesis Completed with: ", len(result["messages"]), " exchanges.")
    case_study = {"patient_profile": symptoms, "anamnesis_report": json.dumps(report), "true_condition": condition, "exchanges": len(result["messages"])}
    p_result = None
    result = None
    # Save the results to CSV
    df = pd.concat([df, pd.DataFrame([case_study])], ignore_index=True)
    df.to_csv("evaluation/anamnesis_output.csv", index=False)

  0%|          | 0/2 [00:00<?, ?it/s]

Emergency detected, going to next case.
True Condition: GERD
Anamnesis Completed with:  4  exchanges.
True Condition: Peptic ulcer diseae
Anamnesis Completed with:  18  exchanges.


## Evaluator Procedure Analysis

In [None]:
try:
    reports = pd.read_csv("evaluation/evaluator_output.csv")
except FileNotFoundError:
    reports = pd.read_csv("evaluation/anamnesis_output.csv")
    reports.loc[:, 'final_report'] = 'none'
    reports.to_csv("evaluation/evaluator_output.csv", index=False)
len(reports)

250

#### Single Testing

In [None]:
print(json.loads(reports.loc[102, "anamnesis_report"]))

In [30]:
reports.loc[216, "final_report"] = "rerun"
reports.loc[245, "final_report"] = "rerun"
reports.loc[249, "final_report"] = "rerun"
reports.to_csv("evaluation/evaluator_output.csv", index=False)

In [5]:
an = "[ANAMNESIS REPORT]:"+json.loads(reports["anamnesis_report"][0]).split("[ANAMNESIS REPORT]:" )[1]

In [None]:
config = {"configurable": {"thread_id": np.random.rand()}, "recursion_limit": 100}
initial_state = {"references": [], "t_run": 0, "anamnesis_report": HumanMessage(content=an), "query": None, "reports": {}, "final_report": None}
result = EvaluatorAgent.invoke(initial_state, config)

#### Procedural Testing

In [32]:
for i in range(len(reports)):
    clear_output(wait=True)
    print("Processing index ", i)
    logger.info(f"Processing index {i}")
    
    if reports.loc[i, "final_report"] not in ["none", "rerun"]:
        continue
    ar = json.loads(reports["anamnesis_report"][i])

    if "EMERGENCY DETECTED" in ar:
        logger.info(f"Emergency detected in index {i}, skipping evaluation.")
        reports.loc[i, "final_report"] = json.dumps(ar)
        reports.to_csv("evaluation/evaluator_output.csv", index=False)
        continue

    config = {"configurable": {"thread_id": np.random.rand()}, "recursion_limit": 50}
    initial_state = {"references": [], "t_run": 0, "anamnesis_report": HumanMessage(content=ar), "query": None, "reports": {}, "final_report": None}
    result = EvaluatorAgent.invoke(initial_state, config)
    final_report = result["final_report"]
    reports.loc[i, "final_report"] = json.dumps(final_report)
    reports.to_csv("evaluation/evaluator_output.csv", index=False)
    logger.info(f"Final report for index {i} saved.")

Processing index  249
Proposing Hypothesis query...
RAG Agent researching...
RAG Agent completed research with 6 messages
Assessing clinical certainty expert...
Running investigator expert...
Generating hypothesis report...
Proposing Treatment query...
RAG Agent researching...
RAG Agent completed research with 14 messages
Performing PubMed search for additional context...
PubMed Agent completed research with 14 messages
Generating treatment plan...
Generating final report...


### Expected Evaluation Run

* Processing index  191
* Proposing Hypothesis query...
* RAG Agent researching...
* RAG Agent completed research with 12 messages
* Assessing clinical certainty expert...
* Running investigator expert...
* Generating hypothesis report...
* Proposing Treatment query...
* RAG Agent researching...
* RAG Agent completed research with 14 messages
* Performing PubMed search for additional context...
* PubMed Agent completed research with 124 messages
* Generating treatment plan...
* Generating final report...