In [None]:
from dotenv import load_dotenv
load_dotenv()

from openai import OpenAI
import re

import os
import sys
sys.path.append(os.getcwd())

from agents.reason_agent import decomposition
from agents.utils import GPT_MODEL, exec_func, llm_request, LOGGER
from agents import clinical_agent

client = OpenAI()

import time

import pandas as pd
import random
from tqdm import tqdm

import traceback

In [None]:
def solve_problem(user_problem):
    agent_tools = [
        {
            "type": "function",
            "function": {
                "name": "safety_agent",
                "description": "To understand the safety of the drug, including toxicity and side effects, consult the Safety Agent for safety information. Given drug name, return the safety information of the drug, e.g. drug introduction, toxicity and side effects etc.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "drug_name": {
                            "type": "string",
                            "description": "The drug name",
                        }
                    },
                    "required": ["drug_name"],
                },
            }
        },
        {
            "type": "function",
            "function": {
                "name": "efficacy_agent",
                "description": "To assess the drug's efficacy against the diseases, ask the Efficacy Agent for information regarding the drug's effectiveness on the disease. Given drug name and disease name, return the drug introduction, disease introduction, and the path between drug and disease in the hetionet knowledge graph etc.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "drug_name": {
                            "type": "string",
                            "description": "The drug name",
                        },
                        "disease_name": {
                            "type": "string",
                            "description": "The disease name",
                        }
                    },
                    "required": ["drug_name", "disease_name"],
                },
            }
        },
        {
            "type": "function",
            "function": {
                "name": "enrollment_agent",
                "description": "To determine if the clinical trial's eligibility criteria facilitate easy enrollment with enough patients. Given eligibility criteria, return the clinical trial will be poor enrollment, good enrollment, or excellent enrollment.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "eligibility_criteria": {
                            "type": "string",
                            "description": "eligibility criteria, contains including criteria and excluding criteria",
                        }
                    },
                    "required": ["eligibility_criteria"],
                },
            }
        },
    ]

    LOGGER.log_with_depth(f"User problem:\n {user_problem}", depth=0)


    LOGGER.log_with_depth(f"Planing...", depth=0)
    time.sleep(2)
    LOGGER.log_with_depth(f"[Thought] Least to Most Reasoning: Decompose the original user problem", depth=0)
    decomposed_resp = decomposition(user_problem, agent_tools)

    subproblems = re.findall(r"<subproblem>(.*?)</subproblem>", decomposed_resp)
    subproblems = [subproblem.strip() for subproblem in subproblems]

    for idx, subproblem in enumerate(subproblems):
        LOGGER.log_with_depth(f"<subproblem>{subproblem}</subproblem>", depth=0)

    problem_results = []

    clinicalAgent = clinical_agent.ClinicalAgent(user_problem, depth=1)

    LOGGER.log_with_depth(f"[Action] Solve each subproblem...", depth=0)
    for sub_problem in subproblems:
        LOGGER.log_with_depth(f"Solving...", depth=0)
        response = clinicalAgent.request(f"The original user problem is: {user_problem}\nNow, please you solve this problem: {sub_problem}")

        # LOGGER.log_with_depth(f"<solution>{response}</solution>", depth=0)
        problem_results.append(response)
    problem_results = '\n'.join(problem_results)
    
    messages = []

    LOGGER.log_with_depth("Thinking...")
    time.sleep(2)
    LOGGER.log_with_depth("[Action] Requesting the final result from the Reasoning Agent...", depth=0)

    fewshot_examples = open("few_shot.txt", "r").read()
    system_prompt = f''' 
        You are an expert in clinical trials. Based on your own knowledge and the sub-problems have solved, please solve the user's problem and provide the reason.
        First, Analysis the user's problem.
        Second, present the final result of the user's problem in <final_result></final_result>, for a binary problem, it is a value between 0 to 1. You must include the exact probability within the <final_result></final_result> tags, e.g., 'The failure rate of the clinical trial is <final_result>0.8</final_result>.'
        Third, explain the reason step by step.
        Noted, you must include the exact probability within the <final_result></final_result> tags.
    '''
    
    # The following examples are essential for your understanding, offering significant learning opportunities. By analyzing these examples, including sub-problems and labels, you will learn how to make accurate predictions.
    # Each example is within the <example></example> tags.
    # Examples:
    # {fewshot_examples}

    messages.append({ "role": "system", "content": system_prompt})
    messages.append({ "role": "user", "content": f"The original user problem is: {user_problem}"})
    messages.append({ "role": "user", "content": f"The subproblems have solved are: {problem_results}"})
    messages.append({ "role": "user", "content": "Please solve the user's problem and provide the reason."})

    final_results = llm_request(messages, model='gpt-4-turbo')
    LOGGER.log_with_depth(final_results.choices[0].message.content, depth=0)
    LOGGER.log_with_depth("\n======================    END   ================\n\n", depth=0)

    return problem_results, final_results.choices[0].message.content


In [None]:
def solve_problem_standard(user_problem):
    LOGGER.log_with_depth(f"User problem:\n {user_problem}", depth=0)

    messages = []

    system_prompt = f''' 
        You are an expert in clinical trials. Based on your own knowledge, please solve the user's problem and provide the reason.
        First, analysis the user's problem.
        Second, explain how you solve the user's problem step by step.
        Finally, present the final result of the user's problem in <final_result></final_result>, for a binary problem, it is a value between 0 to 1. You must include the exact probability within the <final_result></final_result> tags, e.g., 'The failure rate of the clinical trial is <final_result>0.8</final_result>.'
        Noted, you must include the exact probability within the <final_result></final_result> tags.
    '''
    
    messages.append({ "role": "system", "content": system_prompt})
    messages.append({ "role": "user", "content": f"The original user problem is: {user_problem}"})
    messages.append({ "role": "user", "content": "Please solve the user's problem and provide the reason."})

    final_results = llm_request(messages)
    LOGGER.log_with_depth(final_results.choices[0].message.content, depth=0)
    LOGGER.log_with_depth("\n======================    END   ================\n\n", depth=0)

    return None, final_results.choices[0].message.content


In [None]:
cwd_path = os.getcwd()
trial_df = pd.read_csv(f"{cwd_path}/agents/tools/risk_model/data/trial_success.csv", sep='\t')

In [None]:
subproblems_solved = []
final_results = []
labels = []
nctid_idxs = []
random_idxs =[]

# random_seed = random.randint(0, 10000)
random_seed = 5138
print(f"Random Seed: {random_seed}")
random.seed(random_seed)
for i in tqdm(range(40)):
    random_idx = random.randint(0, trial_df.shape[0]-1)
    LOGGER.log_with_depth(f"Random Index: {random_idx}", depth=0)

    trial_row = trial_df.iloc[random_idx]

    try:
        nctid = trial_row['nctid']
        criteria = trial_row['criteria']
        drugs = trial_row['drugs']
        diseases = trial_row['diseases']
        label = trial_row['label']

        LOGGER.log_with_depth(f"NCT ID: {nctid}, Label: {1-label}\n", depth=0)

        user_problem = f'''
        #My problem#: <problem>I have designed a clinical trial and hope you can help me predict the probability of failure for this clinical trial.</problem>
        #drugs#: <drug>{drugs}</drug>
        #diseases#: <disease>{diseases}</disease>
        #criteria#: <criteria>{criteria}</criteria>
        '''

        # subproblem_solve, final_result_str = solve_problem(user_problem)
        subproblem_solve, final_result_str = solve_problem_standard(user_problem)

        subproblems_solved.append(subproblem_solve)
        final_results.append(final_result_str)
        labels.append(label)
        nctid_idxs.append(nctid)
        random_idxs.append(random_idx)

        LOGGER.log_with_depth("\n\n\n\n\n\n\n\n", depth=0)

        
    except Exception as e:
        LOGGER.log_with_depth(f"Error: {e}", depth=0)
        traceback.print_exc()
        # raise e
        continue

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, log_loss, precision_recall_curve, auc

def get_final_result(final_str):
    try:
        likelihood = re.search(r'<final_result>(.*?)</final_result>', final_str).group(1)
        likelihood = float(re.findall(r"\d+(?:\.\d+)?", likelihood)[0])
    except Exception as e:
        return 0.5
    
    # Percentage
    if likelihood > 1:
        likelihood = likelihood / 100
    
    if likelihood < 0 or likelihood > 1:
        likelihood = 0.5

    return likelihood

# Success to failure
y_true = [1-label for label in labels]

print(f"Sample Indexes: {random_idxs}")
print(f"NCT ID Indexes: {nctid_idxs}")

print(f"Length of y_true: {len(y_true)}")
print(f"Length of positive label: {len([label for label in y_true if label == 1])}")
print(f"y_true: {y_true}")

y_pred_probs = [get_final_result(result) for result in final_results]

print(f"y_pred_probs: {y_pred_probs}\n")

# Convert predicted probabilities to binary predictions
threshold = 0.5
y_pred = [1 if prob >= threshold else 0 for prob in y_pred_probs]

# Calculate metrics
print("Accuracy:", accuracy_score(y_true, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_true, y_pred_probs))

# Calculating PR AUC
precision, recall, _ = precision_recall_curve(y_true, y_pred_probs)
pr_auc = auc(recall, precision)
print("PR AUC:", pr_auc)

print("Precision:", precision_score(y_true, y_pred))
print("Recall:", recall_score(y_true, y_pred))
print("F1 Score:", f1_score(y_true, y_pred))
print("Log Loss:", log_loss(y_true, y_pred_probs))



In [None]:
# for idx, subproblem_solve in enumerate(subproblems_solved):
#     if y_pred[idx] != labels[idx]:
#         print("===============================================================")
#         # print(f"NCT ID: {nctid_idxs[idx]}")
#         print(f"Prob: {y_pred_probs[idx]}")
#         print(f"Label: {labels[idx]}")
#         print()
#         print(f"<example>")
#         print(f"The correct label is <final_result>{labels[idx]}<final_result>")
#         print(f"<sub problem results>{subproblem_solve}</sub problem results>")
#         print(f"</example>")
