In [None]:
import os
import random
import re
import sys
import time

import pandas as pd
from agents import clinical_agent
from agents.planning_agent import decomposition
from openai import OpenAI
from sklearn.metrics import (
    accuracy_score,
    auc,
    f1_score,
    log_loss,
    precision_recall_curve,
    precision_score,
    recall_score,
    roc_auc_score,
)
from tqdm import tqdm

from core.utils import LOGGER, llm_request
from core.constants import LLMConstants


In [2]:
sys.path.append(os.getcwd())
client = OpenAI(api_key=LLMConstants.OPENAI_API_KEY)

In [3]:
def solve_problem(user_problem):
    agent_tools = [
        {
            "type": "function",
            "function": {
                "name": "safety_agent",
                "description": "To understand the safety of the drug, including toxicity and side effects, consult the Safety Agent for safety information. Given drug name, return the safety information of the drug, e.g. drug introduction, toxicity and side effects etc.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "drug_name": {
                            "type": "string",
                            "description": "The drug name",
                        }
                    },
                    "required": ["drug_name"],
                },
            },
        },
        {
            "type": "function",
            "function": {
                "name": "efficiency_agent",
                "description": "To assess the drug's efficiency against the diseases, ask the efficiency Agent for information regarding the drug's effectiveness on the disease. Given drug name and disease name, return the drug introduction, disease introduction, and the path between drug and disease in the hetionet knowledge graph etc.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "drug_name": {
                            "type": "string",
                            "description": "The drug name",
                        },
                        "disease_name": {
                            "type": "string",
                            "description": "The disease name",
                        },
                    },
                    "required": ["drug_name", "disease_name"],
                },
            },
        },
        {
            "type": "function",
            "function": {
                "name": "enrollment_agent",
                "description": "To determine if the clinical trial's eligibility criteria facilitate easy enrollment with enough patients. Given eligibility criteria, return the clinical trial will be poor enrollment, good enrollment, or excellent enrollment.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "eligibility_criteria": {
                            "type": "string",
                            "description": "eligibility criteria, contains including criteria and excluding criteria",
                        }
                    },
                    "required": ["eligibility_criteria"],
                },
            },
        },
        {
            "type": "function",
            "function": {
                "name": "graph_reasoning_agent",
                "description": "The Graph Reasoning Agent is designed to interpret and respond to requests by analyzing graph structures. The agent loads the specified graph, processes the relationships or paths between the nodes, and delivers a response aligned with the user's instruction. Ideal for tasks involving graph-based data reasoning and exploration.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "keyword_1": {
                            "type": "string",
                            "description": "The first keyword indicating a node in the graph",
                        },
                        "keyword_2": {
                            "type": "string",
                            "description": "The second keyword indicating a node in the graph",
                        },
                        "user_instruction": {
                            "type": "string",
                            "description": "The request to be answered",
                        },
                    },
                    "required": [
                        "keyword_1",
                        "keyword_2",
                        "user_instruction",
                    ],
                },
            },
        },
    ]


    LOGGER.log_with_depth(f"User problem:\n {user_problem}", depth=0)

    ##################
    ###  PLANNING  ###
    ##################
    LOGGER.log_with_depth(f"Planing...", depth=0)
    time.sleep(2)
    LOGGER.log_with_depth(f"[Thought] Least to Most Reasoning: Decompose the original user problem", depth=0)
    decomposed_resp = decomposition(user_problem, agent_tools)

    subproblems = re.findall(r"<subproblem>(.*?)</subproblem>", decomposed_resp)
    subproblems = [subproblem.strip() for subproblem in subproblems]

    for idx, subproblem in enumerate(subproblems):
        LOGGER.log_with_depth(f"<subproblem>{subproblem}</subproblem>", depth=0)

    problem_results = []

    clinicalAgent = clinical_agent.ClinicalAgent(user_problem, depth=1)

    LOGGER.log_with_depth(f"[Action] Solve each subproblem...", depth=0)
    for sub_problem in subproblems:
        LOGGER.log_with_depth(f"Solving...", depth=0)
        response = clinicalAgent.request(f"The original user problem is: {user_problem}\nNow, please you solve this problem: {sub_problem}")

        LOGGER.log_with_depth(f"<solution>{response}</solution>", depth=0)
        problem_results.append(response)
        
    problem_results = '\n'.join(problem_results)
    
    messages = []

    LOGGER.log_with_depth("Thinking...")
    time.sleep(2)
    LOGGER.log_with_depth("[Action] Requesting the final result from the Reasoning Agent...", depth=0)

    fewshot_examples = open("few_shot.txt", "r").read()
    system_prompt = f''' 
        You are an expert in clinical trials. Based on your own knowledge and the sub-problems have solved, please solve the user's problem and provide the reason.
        First, Analysis the user's problem.
        Second, present the final result of the user's problem in <final_result></final_result>, for a binary problem, it is a value between 0 to 1. You must include the exact probability within the <final_result></final_result> tags, e.g., 'The failure rate of the clinical trial is <final_result>0.8</final_result>.'
        Third, explain the reason step by step.
        Noted, you must include the exact probability within the <final_result></final_result> tags.

        The following examples are essential for your understanding, offering significant learning opportunities. By analyzing these examples, including sub-problems and labels, you will learn how to make accurate predictions.
        Each example is within the <example></example> tags.
        Examples:
        {fewshot_examples}
    '''
    
    
    messages.append({ "role": "system", "content": system_prompt})
    messages.append({ "role": "user", "content": f"The original user problem is: {user_problem}"})
    messages.append({ "role": "user", "content": f"The subproblems have solved are: {problem_results}"})
    messages.append({ "role": "user", "content": "Please solve the user's problem and provide the reason."})

    final_results = llm_request(messages, model='gpt-4-turbo')
    LOGGER.log_with_depth(final_results.choices[0].message.content, depth=0)
    LOGGER.log_with_depth("\n======================    END   ================\n\n", depth=0)

    return problem_results, final_results.choices[0].message.content


In [4]:
cwd_path = os.getcwd()
trial_df = pd.read_csv(f"{cwd_path}/agents/tools/risk_model/data/trial_success.csv", sep='\t')

In [5]:
trial_df = trial_df[trial_df["diseases"].str.contains("parkinson")]
trial_df

Unnamed: 0,nctid,criteria,drugs,diseases,label
274,NCT00040404,Inclusion Criteria:\r\n\r\nPatients will be in...,cep-1347 10mg;cep1347 25mg;cep-1347 50mg,parkinson disease,0
930,NCT00099268,Inclusion Criteria:\r\n\r\n - Clinical diagn...,carbidopa/levodopa/entacapone;immediate releas...,parkinson's disease,0
1011,NCT00105508,Inclusion Criteria:\r\n\r\n - The subject is...,sarizotan;placebo,parkinson's disease;dyskinesia,0
1269,NCT00125567,Inclusion Criteria:\r\n\r\n - Idiopathic Par...,stalevo (levodopa/carbidopa/entacapone);levodo...,idiopathic parkinson's disease,1
1374,NCT00134966,Inclusion Criteria:\r\n\r\n - Aged 30 to 80 ...,"capsules containing: carbidopa (25 mg), levodo...",parkinson's disease,1
...,...,...,...,...,...
22599,NCT03257046,Major Inclusion Criteria:\r\n\r\n - Clinical...,iti-214,parkinson disease,1
22756,NCT03368170,Inclusion Criteria:\r\n\r\n 1. Male or female...,mesdopetam (irl790),parkinson disease,0
22894,NCT03482882,Inclusion Criteria:\r\n\r\n 1. Can understand...,pimavanserin,treatment of depression in adults with parkins...,1
22927,NCT03531060,Inclusion Criteria:\r\n\r\n 1. Male or female...,irl790;placebo,parkinson disease,1


In [6]:
subproblems_solved = []
final_results = []
labels = []
nctid_idxs = []
random_idxs =[]


random_seed = 47
random.seed(random_seed)
for i in tqdm(range(30)):
    random_idx = random.randint(0, trial_df.shape[0]-1)
    LOGGER.log_with_depth(f"Random Index: {random_idx}", depth=0)

    trial_row = trial_df.iloc[random_idx]

    try:
        nctid = trial_row['nctid']
        criteria = trial_row['criteria']
        drugs = trial_row['drugs']
        diseases = trial_row['diseases']
        label = trial_row['label']

        LOGGER.log_with_depth(f"NCT ID: {nctid}, Label: {1-label}\n", depth=0)

        user_problem = f'''
        #My problem#: <problem>I have designed a clinical trial and hope you can help me predict the probability of failure for this clinical trial.</problem>
        #drugs#: <drug>{drugs}</drug>
        #diseases#: <disease>{diseases}</disease>
        #criteria#: <criteria>{criteria}</criteria>
        '''

        subproblem_solve, final_result_str = solve_problem(user_problem)

        subproblems_solved.append(subproblem_solve)
        final_results.append(final_result_str)
        labels.append(label)
        nctid_idxs.append(nctid)
        random_idxs.append(random_idx)

        LOGGER.log_with_depth("\n\n\n\n\n\n\n\n", depth=0)

    except Exception as e:
        LOGGER.log_with_depth(f"Error: {e}", depth=0)
        continue

  0%|          | 0/1 [00:00<?, ?it/s]

Random Index: 90
NCT ID: NCT00640159, Label: 0

User problem:
 
        #My problem#: <problem>I have designed a clinical trial and hope you can help me predict the probability of failure for this clinical trial.</problem>
        #drugs#: <drug>zelapar</drug>
        #diseases#: <disease>parkinson's disease</disease>
        #criteria#: <criteria>Inclusion Criteria:

  1. Idiopathic PD confirmed by at least two of the following signs: resting tremor,
     bradykinesia, rigidity

  2. Male or female outpatients

  3. Age 30-90 years

  4. Current use of levodopa and oral selegiline (5-10 mg /day), stable for at least 1
     month and well tolerated

  5. Positive treatment response to current anti-parkinsonian medications in the opinion
     of the investigator

  6. Acceptable contraception for females of child bearing potential

  7. Willing and able to comply with study procedures.

  8. Willing and able to give written informed consent prior to beginning any study
     procedures.


  torch.load(f"{current_file_path}/data/enrollment_model.pt")


[Action] Requesting the final result from the Reasoning Agent...
To solve the user's problem of predicting the probability of failure for the clinical trial involving Zelapar for Parkinson's disease, we can analyze the data and insights derived from the subproblem results.

**Analysis:**

1. **Historical Failure Rates**:
   - Zelapar's historical failure rate in clinical trials is 0.0, indicating it has performed exceptionally well historically.
   - Parkinson's disease, however, has a moderate historical failure rate at 0.3566, reflecting the complexity and variability in its clinical management.

2. **Trial Design and Eligibility Criteria**:
   - Your trial's inclusion and exclusion criteria are specifically detailed and targeted, aiming to select a patient population that is likely to benefit most from Zelapar. This should theoretically reduce external variability and focus results on the drug's efficacy and safety.

3. **Handling of Parkinson's Disease Variability**:
   - By establ

100%|██████████| 1/1 [01:04<00:00, 64.16s/it]


In [9]:
def get_final_result(final_str):
    try:
        likelihood = re.search(r'<final_result>(.*?)</final_result>', final_str).group(1)
        likelihood = float(re.findall(r"\d+(?:\.\d+)?", likelihood)[0])
    except Exception as e:
        return 0.5
    
    # Percentage
    if likelihood > 1:
        likelihood = likelihood / 100
    
    if likelihood < 0 or likelihood > 1:
        likelihood = 0.5

    return likelihood

# Success to failure
y_true = [1-label for label in labels]

print(f"Sample Indexes: {random_idxs}")
print(f"NCT ID Indexes: {nctid_idxs}")

print(f"Length of y_true: {len(y_true)}")
print(f"Length of positive label: {len([label for label in y_true if label == 1])}")
print(f"y_true: {y_true}")

y_pred_probs = [get_final_result(result) for result in final_results]

print(f"y_pred_probs: {y_pred_probs}\n")

# Convert predicted probabilities to binary predictions
threshold = 0.5
y_pred = [1 if prob >= threshold else 0 for prob in y_pred_probs]

# Calculate metrics
print("Accuracy:", accuracy_score(y_true, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_true, y_pred_probs))

# Calculating PR AUC
precision, recall, _ = precision_recall_curve(y_true, y_pred_probs)
pr_auc = auc(recall, precision)
print("PR AUC:", pr_auc)

print("Precision:", precision_score(y_true, y_pred))
print("Recall:", recall_score(y_true, y_pred))
print("F1 Score:", f1_score(y_true, y_pred))
print("Log Loss:", log_loss(y_true, y_pred_probs))

Sample Indexes: [90]
NCT ID Indexes: ['NCT00640159']
Length of y_true: 1
Length of positive label: 0
y_true: [np.int64(0)]
y_pred_probs: [0.15]

Accuracy: 1.0
