**Warning:**

To run this notebook, the .ipynb file must be in the  project root directory.

In [7]:
import json
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
from IPython.display import clear_output
from langchain_core.messages import HumanMessage, SystemMessage
from blackwell.prompts import final_report_prompt
from blackwell.anamnesis import AnamnesisAgent
from blackwell.evaluator import EvaluatorAgent
from blackwell.prompts import ai_patient_prompt
from blackwell.config import fast_model, pro_model, logger
from evaluation.aip import AiPatient

## Anamnesis Reports Generation

In [None]:
# Load the dataset, shuffle, and split into X and y
dataset = pd.read_csv("evaluation/dataset/Training.csv")
dataset = dataset.sample(frac=1, random_state=44).reset_index(drop=True)
X = dataset.iloc[0:, :-1].reset_index(drop=True) #Symptoms
y = dataset.iloc[0:, -1].reset_index(drop=True) #Diagnosis

In [None]:
try:
    df = pd.read_csv("evaluation/anamnesis_output.csv")
except FileNotFoundError:
    sample_size = 250
    df = pd.DataFrame()
    for i in range(sample_size):
        if X.iloc[i]['coma'] == 1:
            print("Skipping coma case for safety.")
            continue
        symptoms = X.iloc[i].to_json()
        condition = y.iloc[i]
        case_study = {"true_condition": condition, "patient_profile": symptoms, "exchanges": 0, "anamnesis_report": "none"}
        df = pd.concat([df, pd.DataFrame([case_study])], ignore_index=True)
    df = pd.DataFrame(columns=["patient_profile","true_condition", "aip_query", "final_report"])
    df.to_csv("evaluation/anamnesis_output.csv", index=False)
len(df)

In [None]:
# Emergency words to halt the anamnesis if detected
emergency_words = ["emergency", "immediately", "urgent"]

In [None]:
# Run the anamnesis simulation for each patient case
number_of_cases = 250
for i in tqdm(range(number_of_cases)):
    verbose = False
    symptoms = df["patient_profile"].iloc[i]
    condition = df["true_condition"].iloc[i]
    # Generate patient profile
    patient_profile = ai_patient_prompt.content.format(PATIENT_PROFILE=symptoms, PATIENT_CONDITION=condition)
    config = {"configurable": {"thread_id": np.random.randint(10, 100000)}}

    # Start the anamnesis interaction loop between AiPatient and AnamnesisAgent
    p_result = AiPatient.invoke({"messages": "Initiate your introduction." , "profile": patient_profile}, config)
    result = AnamnesisAgent.invoke({"messages": [HumanMessage(content=p_result['messages'][-1].content)], "function": "chat", "final_report": None, "documents_report": None}, config)

    # Continue the interaction until a final report is generated or max exchanges reached
    while (result["final_report"] is None) and (len(result["messages"]) < 35):
        p_result = AiPatient.invoke({"messages": [HumanMessage(content=result['messages'][-1].content)], "profile": patient_profile}, config)
        print("Patient Response:") if verbose else None
        print(p_result['messages'][-1].content) if verbose else None
        result = AnamnesisAgent.invoke({"messages": [HumanMessage(content=p_result['messages'][-1].content)]}, config)
        print("Agent Response:") if verbose else None
        print(result['messages'][-1].content) if verbose else None
        report = result["final_report"]
        if any(word in result['messages'][-1].content.lower() for word in emergency_words):
            print("Emergency detected, going to next case.")
            report = "EMERGENCY DETECTED. Anamnesis aborted."
            break
        # Time delay to avoid rate limiting
        #time.sleep(10)
    if len(result["messages"]) >= 35:
        print("Max exchanges reached, moving to next case.")
        continue

    print("True Condition:", condition)
    print("Anamnesis Completed with: ", len(result["messages"]), " exchanges.")
    df.loc[i, 'exchanges'] = len(result["messages"])
    df.loc[i, 'anamnesis_report'] = json.dumps(report)
    p_result = None
    result = None
    # Save the results to CSV
    df.to_csv("evaluation/anamnesis_output.csv", index=False)

## Evaluator Reports Generation

In [None]:
try:
    reports = pd.read_csv("evaluation/evaluator_output.csv")
except FileNotFoundError:
    reports = pd.read_csv("evaluation/anamnesis_output.csv")
    reports.loc[:, 'final_report'] = 'none'
    reports.to_csv("evaluation/evaluator_output.csv", index=False)
len(reports)

250

#### Procedural Testing

In [32]:
for i in range(len(reports)):
    clear_output(wait=True)
    print("Processing index ", i)
    logger.info(f"Processing index {i}")
    
    if reports.loc[i, "final_report"] not in ["none", "rerun"]:
        continue
    ar = json.loads(reports["anamnesis_report"][i])

    if "EMERGENCY DETECTED" in ar:
        logger.info(f"Emergency detected in index {i}, skipping evaluation.")
        reports.loc[i, "final_report"] = json.dumps(ar)
        reports.to_csv("evaluation/evaluator_output.csv", index=False)
        continue

    config = {"configurable": {"thread_id": np.random.rand()}, "recursion_limit": 50}
    initial_state = {"references": [], "t_run": 0, "anamnesis_report": HumanMessage(content=ar), "query": None, "reports": {}, "final_report": None}
    result = EvaluatorAgent.invoke(initial_state, config)
    final_report = result["final_report"]
    reports.loc[i, "final_report"] = json.dumps(final_report)
    reports.to_csv("evaluation/evaluator_output.csv", index=False)
    logger.info(f"Final report for index {i} saved.")

Processing index  249
Proposing Hypothesis query...
RAG Agent researching...
RAG Agent completed research with 6 messages
Assessing clinical certainty expert...
Running investigator expert...
Generating hypothesis report...
Proposing Treatment query...
RAG Agent researching...
RAG Agent completed research with 14 messages
Performing PubMed search for additional context...
PubMed Agent completed research with 14 messages
Generating treatment plan...
Generating final report...


## Stock Model Evaluator Generation

In [2]:
try:
    reports = pd.read_csv("evaluation/evaluator_stock.csv")
except FileNotFoundError:
    reports = pd.read_csv("evaluation/anamnesis_output.csv")
    reports.loc[:, 'final_report'] = 'none'
    reports.to_csv("evaluation/evaluator_stock.csv", index=False)
len(reports)

250

In [None]:
for i in range(len(reports)):
    clear_output(wait=True)
    print("Processing index ", i)
    logger.info(f"Processing index {i}")
    
    if reports.loc[i, "final_report"] not in ["none", "rerun"]:
        continue
    ar = json.loads(reports["anamnesis_report"][i])

    if "EMERGENCY DETECTED" in ar:
        logger.info(f"Emergency detected in index {i}, skipping evaluation.")
        reports.loc[i, "final_report"] = json.dumps(ar)
        reports.to_csv("evaluation/evaluator_stock.csv", index=False)
        continue

    an = HumanMessage(content=ar)
    result = pro_model.invoke([final_report_prompt] + [an])
        
    final_report = result.content
    reports.loc[i, "final_report"] = json.dumps(final_report)
    reports.to_csv("evaluation/evaluator_stock.csv", index=False)
    logger.info(f"Final report for index {i} saved.")

## Raw Model Evaluator Generation

In [3]:
# Load the dataset, shuffle, and split into X and y
dataset = pd.read_csv("evaluation/dataset/Training.csv")
dataset = dataset.sample(frac=1, random_state=44).reset_index(drop=True)
X = dataset.iloc[0:, :-1].reset_index(drop=True) #Symptoms
y = dataset.iloc[0:, -1].reset_index(drop=True) #Diagnosis

In [None]:
# Create or load the raw evaluation dataset
try:
    sample_size = 250
    raw_eval = pd.read_csv("evaluation/evaluator_raw.csv")
except FileNotFoundError:
    df = pd.DataFrame()
    i = -1
    while len(df) < sample_size:
        i += 1
        if X.iloc[i]['coma'] == 1:
            print("Skipping coma case for safety.")
            continue
        symptoms = X.iloc[i].to_json()
        condition = y.iloc[i]
        case_study = {"true_condition": condition, "patient_profile": symptoms, "aip_query": "none", "final_report": "none"}
        df = pd.concat([df, pd.DataFrame([case_study])], ignore_index=True)
    df.to_csv("evaluation/evaluator_raw.csv", index=False)
    raw_eval = df
len(raw_eval)

250

In [19]:
raw_prompt = SystemMessage(content="""You are a highly intelligent medical evaluator AI.
                            Your task is to provide a Probable Diagnosis based on the complaint of the patient.content_blocks=
                            Provide the main probable diagnosis like "Probable Diagnosis: <diagnosis>"
                            Provide differential diagnoses as "Differential Diagnoses: <diagnosis1>, <diagnosis2>, ..."
                            If necessary provide Exams to confirm the diagnosis.
                            Provide a thorough treatment plan as "Treatment Plan: <plan>"
                            Here is the patient's complaint:
                           """)

In [None]:
# Run the anamnesis simulation for each patient case
sample_size = 100
for i in tqdm(range(sample_size)):
    verbose = False
    symptoms = raw_eval["patient_profile"].iloc[i]
    condition = raw_eval["true_condition"].iloc[i]
    if raw_eval.loc[i, 'final_report'] not in ["none", "rerun"]:
        continue
    # Generate patient profile
    patient_profile = ai_patient_prompt.content.format(PATIENT_PROFILE=symptoms, PATIENT_CONDITION=condition)
    config = {"configurable": {"thread_id": np.random.randint(10, 100000)}}

    # Start the anamnesis interaction loop between AiPatient and AnamnesisAgent
    p_result = AiPatient.invoke({"messages": "Initiate your introduction with you chief complaint and how you are feeling" , "profile": patient_profile}, config)
    aip_query = HumanMessage(content=p_result['messages'][-1].content)
    
    result = pro_model.invoke([raw_prompt] + [aip_query])
    report = result.content

    print("True Condition:", condition)
    raw_eval.loc[i, 'aip_query'] = json.dumps(aip_query.content)
    raw_eval.loc[i, 'final_report'] = json.dumps(report)
    p_result = None
    result = None
    # Save the results to CSV
    raw_eval.to_csv("evaluation/evaluator_raw.csv", index=False)

  0%|          | 0/100 [00:00<?, ?it/s]

True Condition: Urinary tract infection
True Condition: Allergy
True Condition: Diabetes 
True Condition: Urinary tract infection
True Condition: (vertigo) Paroymsal  Positional Vertigo
True Condition: Tuberculosis
True Condition: Osteoarthristis
True Condition: Hypoglycemia
True Condition: Typhoid
True Condition: Arthritis
True Condition: Psoriasis
True Condition: Dimorphic hemmorhoids(piles)
True Condition: Hypothyroidism
True Condition: Alcoholic hepatitis
True Condition: Gastroenteritis
True Condition: Heart attack
True Condition: Malaria
True Condition: Impetigo
True Condition: Hypoglycemia
True Condition: AIDS
True Condition: Bronchial Asthma
True Condition: AIDS
True Condition: Urinary tract infection
True Condition: Gastroenteritis
True Condition: Gastroenteritis
True Condition: Dengue
True Condition: Migraine
True Condition: GERD
True Condition: Allergy
True Condition: Osteoarthristis
True Condition: (vertigo) Paroymsal  Positional Vertigo
True Condition: Paralysis (brain hemo