In [0]:
import os
import re
import sys
import time

from openai import OpenAI

from agents import clinical_agent
from agents.reason_agent import decomposition
from core.utils import LOGGER, llm_request
from core.constants import LLMConstants

config.json:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/779 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

In [0]:
sys.path.append(os.getcwd())
client = OpenAI(api_key=LLMConstants.OPENAI_API_KEY)

In [0]:
def solve_generate_new_drug_problem(problem):
    agent_tools = [
        {
            "type": "function",
            "function": {
                "name": "graph_reasoning_agent",
                "description": "To answer the user's instruction using information from a graph, ask the graph_reasoning Agent to answer the user's instruction. Given the graphroot name, 2 keywords indicating 2 nodes or similar nodes in the graph, and the user's instruction.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "graph_text_file_name": {
                            "type": "string",
                            "description": "The path to the text file to build the graph if needed",
                        },
                        "graph_root": {
                            "type": "string",
                            "description": "The graph root to store the graph in filesystem",
                        },
                        "keyword_1": {
                            "type": "string",
                            "description": "The first keyword indicating a node in the graph",
                        },
                        "keyword_2": {
                            "type": "string",
                            "description": "The second keyword indicating a node in the graph",
                        },
                        "user_instruction": {
                            "type": "string",
                            "description": "The user's instruction to be answered",
                        }
                    },
                    "required": ["graph_text_file_name", "graph_root", "keyword_1", "keyword_2", "user_instruction"],
                },
            },
        },
    ]

    LOGGER.log_with_depth(f"User problem:\n {problem}", depth=0)

    ##################
    ###  PLANNING  ###
    ##################
    LOGGER.log_with_depth(f"Planing...", depth=0)
    time.sleep(2)
    LOGGER.log_with_depth(f"[Thought] Least to Most Reasoning: Decompose the original user problem", depth=0)
    decomposed_resp = decomposition(problem, agent_tools)

    subproblems = re.findall(r"<subproblem>(.*?)</subproblem>", decomposed_resp)
    subproblems = [subproblem.strip() for subproblem in subproblems]

    for idx, subproblem in enumerate(subproblems):
        LOGGER.log_with_depth(f"<subproblem>{subproblem}</subproblem>", depth=0)

    problem_results = []

    clinicalAgent = clinical_agent.ClinicalAgent(problem, depth=1)

    LOGGER.log_with_depth(f"[Action] Solve each subproblem...", depth=0)
    for sub_problem in subproblems:
        LOGGER.log_with_depth(f"Solving...", depth=0)
        response = clinicalAgent.request(f"The original user problem is: {problem}\nNow, please you solve this problem: {sub_problem}")

        LOGGER.log_with_depth(f"<solution>{response}</solution>", depth=0)
        problem_results.append(response)
        
    problem_results = '\n'.join(problem_results)
    
    messages = []

    LOGGER.log_with_depth("Thinking...")
    time.sleep(2)
    LOGGER.log_with_depth("[Action] Requesting the final result from the Reasoning Agent...", depth=0)

    system_prompt = '''You are a Python programming expert. Based on your own knowledge and the sub-problems have solved, please solve the user's problem and provide the reason.
First, analyse the user's problem.

Second, present the final result of the user's problem in the following format:
#drugs#: <drug>The drug name</drug>
#diseases#: <disease>The disease that this drug aims to treat</disease>
#proposal#: <proposal>The full description of the drug</proposal>

Note, put the exact full description of the drug in the <proposal></proposal> tag.
'''

    messages.append({ "role": "system", "content": system_prompt})
    messages.append({ "role": "user", "content": f"The original user problem is: {problem}"})
    messages.append({ "role": "user", "content": f"The subproblems have solved are: {problem_results}"})
    messages.append({ "role": "user", "content": "Please solve the user's problem and provide the reason."})

    proposals = llm_request(messages, model='gpt-4-turbo')
    LOGGER.log_with_depth(proposals.choices[0].message.content, depth=0)
    LOGGER.log_with_depth("\n======================    END   ================\n\n", depth=0)

    return problem_results, proposals.choices[0].message.content

In [0]:
def solve_clinical_trial_prediction_problem(problem):
    agent_tools = [
        {
            "type": "function",
            "function": {
                "name": "safety_agent",
                "description": "To understand the safety of the drug, including toxicity and side effects, consult the Safety Agent for safety information. Given drug name, return the safety information of the drug, e.g. drug introduction, toxicity and side effects etc.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "drug_name": {
                            "type": "string",
                            "description": "The drug name",
                        }
                    },
                    "required": ["drug_name"],
                },
            },
        },
        {
            "type": "function",
            "function": {
                "name": "efficiency_agent",
                "description": "To assess the drug's efficiency against the diseases, ask the efficiency Agent for information regarding the drug's effectiveness on the disease. Given drug name and disease name, return the drug introduction, disease introduction, and the path between drug and disease in the hetionet knowledge graph etc.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "drug_name": {
                            "type": "string",
                            "description": "The drug name",
                        },
                        "disease_name": {
                            "type": "string",
                            "description": "The disease name",
                        },
                    },
                    "required": ["drug_name", "disease_name"],
                },
            },
        },
        {
            "type": "function",
            "function": {
                "name": "enrollment_agent",
                "description": "To determine if the clinical trial's eligibility criteria facilitate easy enrollment with enough patients. Given eligibility criteria, return the clinical trial will be poor enrollment, good enrollment, or excellent enrollment.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "eligibility_criteria": {
                            "type": "string",
                            "description": "eligibility criteria, contains including criteria and excluding criteria",
                        }
                    },
                    "required": ["eligibility_criteria"],
                },
            },
        },
        {
            "type": "function",
            "function": {
                "name": "graph_reasoning_agent",
                "description": "The Graph Reasoning Agent is designed to interpret and respond to requests by analyzing graph structures. The agent loads the specified graph, processes the relationships or paths between the nodes, and delivers a response aligned with the user's instruction. Ideal for tasks involving graph-based data reasoning and exploration.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "keyword_1": {
                            "type": "string",
                            "description": "The first keyword indicating a node in the graph",
                        },
                        "keyword_2": {
                            "type": "string",
                            "description": "The second keyword indicating a node in the graph",
                        },
                        "user_instruction": {
                            "type": "string",
                            "description": "The request to be answered",
                        },
                    },
                    "required": [
                        "keyword_1",
                        "keyword_2",
                        "user_instruction",
                    ],
                },
            },
        },
    ]


    LOGGER.log_with_depth(f"User problem:\n {problem}", depth=0)

    ##################
    ###  PLANNING  ###
    ##################
    LOGGER.log_with_depth(f"Planing...", depth=0)
    time.sleep(2)
    LOGGER.log_with_depth(f"[Thought] Least to Most Reasoning: Decompose the original user problem", depth=0)
    decomposed_resp = decomposition(problem, agent_tools)

    subproblems = re.findall(r"<subproblem>(.*?)</subproblem>", decomposed_resp)
    subproblems = [subproblem.strip() for subproblem in subproblems]

    for idx, subproblem in enumerate(subproblems):
        LOGGER.log_with_depth(f"<subproblem>{subproblem}</subproblem>", depth=0)

    problem_results = []

    clinicalAgent = clinical_agent.ClinicalAgent(problem, depth=1)

    LOGGER.log_with_depth(f"[Action] Solve each subproblem...", depth=0)
    for sub_problem in subproblems:
        LOGGER.log_with_depth(f"Solving...", depth=0)
        response = clinicalAgent.request(f"The original user problem is: {problem}\nNow, please you solve this problem: {sub_problem}")

        LOGGER.log_with_depth(f"<solution>{response}</solution>", depth=0)
        problem_results.append(response)
        
    problem_results = '\n'.join(problem_results)
    
    messages = []

    LOGGER.log_with_depth("Thinking...")
    time.sleep(2)
    LOGGER.log_with_depth("[Action] Requesting the final result from the Reasoning Agent...", depth=0)

    fewshot_examples = open("few_shot.txt", "r").read()
    system_prompt = f''' 
        You are an expert in clinical trials. Based on your own knowledge and the sub-problems have solved, please solve the user's problem and provide the reason.
        First, Analysis the user's problem.
        Second, present the final result of the user's problem in <final_result></final_result>, for a binary problem, it is a value between 0 to 1. You must include the exact probability within the <final_result></final_result> tags, e.g., 'The failure rate of the clinical trial is <final_result>0.8</final_result>.'
        Third, explain the reason step by step.
        Noted, you must include the exact probability within the <final_result></final_result> tags.

        The following examples are essential for your understanding, offering significant learning opportunities. By analyzing these examples, including sub-problems and labels, you will learn how to make accurate predictions.
        Each example is within the <example></example> tags.
        Examples:
        {fewshot_examples}
    '''
    
    messages.append({ "role": "system", "content": system_prompt})
    messages.append({ "role": "user", "content": f"The original user problem is: {problem}"})
    messages.append({ "role": "user", "content": f"The subproblems have solved are: {problem_results}"})
    messages.append({ "role": "user", "content": "Please solve the user's problem and provide the reason."})

    final_results = llm_request(messages, model='gpt-4-turbo')
    LOGGER.log_with_depth(final_results.choices[0].message.content, depth=0)
    LOGGER.log_with_depth("\n======================    END   ================\n\n", depth=0)

    return problem_results, final_results.choices[0].message.content

In [0]:
def generate_new_drug(instruction, keyword_1, keyword_2):
    graph_text_file_name = "knowledgebasecondensed_2024-11-30.txt"
    graph_root = "knowledgebasecondensed"

    try:
        LOGGER.log_with_depth(f"graph_text_file_name: {graph_text_file_name}, graph_root: {graph_root}\n", depth=0)

        problem = f'''#My problem#: <problem>I need to get information from a graph that answer my below instruction.</problem>
                        #graph_text_file_name#: <graph_text_file_name>{graph_text_file_name}</graph_text_file_name>
                        #graph_root#: <graph_root>{graph_root}</graph_root>
                        #keyword_1#: <keyword_1>{keyword_1}</keyword_1>
                        #keyword_2#: <keyword_2>{keyword_2}</keyword_2>
                        #user_instruction#: <user_instruction>{instruction}</user_instruction>
                        '''

        subproblem_solve, new_drug_proposal = solve_generate_new_drug_problem(problem)
        LOGGER.log_with_depth(f"\nSub problem solve: {subproblem_solve}", depth=0)
        LOGGER.log_with_depth(f"\nFinal result: {new_drug_proposal}", depth=0)

    except Exception as e:
        LOGGER.log_with_depth(f"Error: {e}", depth=0)

    drug = re.search(r"<drug>(.*?)</drug>", new_drug_proposal).group(1)
    disease = re.search(r"<disease>(.*?)</disease>", new_drug_proposal).group(1)
    criteria = re.search(r"<proposal>(.*?)</proposal>", new_drug_proposal, re.DOTALL).group(1)

    return drug, disease, criteria

In [0]:
def predict_clinical_trial_failure_rate(drug, disease, criteria):
    try:
        problem = f'''#My problem#: <problem>I have designed a clinical trial and hope you can help me predict the probability of failure for this clinical trial.</problem>
        #drugs#: <drug>{drug}</drug>
        #diseases#: <disease>{disease}</disease>
        #criteria#: <criteria>{criteria}</criteria>
        '''

        subproblem_solve, final_result_str = solve_clinical_trial_prediction_problem(problem)

        LOGGER.log_with_depth(f"\nSub problem solve: {subproblem_solve}", depth=0)
        LOGGER.log_with_depth(f"\nFinal result: {final_result_str}", depth=0)

    except Exception as e:
        LOGGER.log_with_depth(f"Error: {e}", depth=0)

    return final_result_str

In [0]:
keyword_1 = "parkinson's disease"
keyword_2 = "treatment groups"
user_instruction = "Develop a new research idea on a new drug that can help mitigate Parkinson desease. The output must include the criteria of the new drug."

drug, disease, criteria = generate_new_drug(user_instruction, keyword_1, keyword_2)

graph_text_file_name: knowledgebasecondensed_2024-11-30.txt, graph_root: knowledgebasecondensed

User problem:
 #My problem#: <problem>I need to get information from a graph that answer my below instruction.</problem>
    #graph_text_file_name#: <graph_text_file_name>knowledgebasecondensed_2024-11-30.txt</graph_text_file_name>
    #graph_root#: <graph_root>knowledgebasecondensed</graph_root>
    #keyword_1#: <keyword_1>parkinson's disease</keyword_1>
    #keyword_2#: <keyword_2>treatment groups</keyword_2>
    #user_instruction#: <user_instruction>Develop a new research idea on a new drug that can help mitigate Parkinson desease. The output must include the criteria of the new drug.</user_instruction>
    
Planing...
[Thought] Least to Most Reasoning: Decompose the original user problem
<subproblem>Utilize the graph_reasoning_agent tool with the graph_root "knowledgebasecondensed", and the keywords "parkinson's disease" and "treatment groups" to gather insights on treatment groups rela

**Response:** ### Research Idea: Development of a Novel Multi-Target Drug for Parkinson's Disease

#### Background:
Parkinson's disease (PD) is a complex neurodegenerative disorder characterized by both motor and non-motor symptoms. The pathogenesis of PD involves multiple pathways, including alpha-synuclein aggregation, mitochondrial dysfunction, neuroinflammation, and synaptic dysfunction. Current treatments primarily provide symptomatic relief and do not address the underlying disease mechanisms. Given the multifactorial nature of PD, a multi-target drug approach may offer a more effective therapeutic strategy.

#### Research Hypothesis:
A novel drug that simultaneously targets alpha-synuclein aggregation, mitochondrial dysfunction, and neuroinflammation can provide neuroprotection and mitigate both motor and non-motor symptoms in Parkinson's disease.

#### Criteria for the New Drug:

1. **Alpha-Synuclein Aggregation Inhibition:**
   - The drug should inhibit the aggregation of alpha-synuclein, a key pathological feature of PD.
   - Mechanism: The compound should bind to alpha-synuclein monomers or oligomers to prevent their aggregation into toxic fibrils.
   - Example Target: Small molecules or peptides that stabilize alpha-synuclein in its non-aggregated form.

2. **Mitochondrial Protection:**
   - The drug should protect dopaminergic neurons from mitochondrial dysfunction, a hallmark of PD.
   - Mechanism: The compound should enhance mitochondrial biogenesis, reduce oxidative stress, and improve mitochondrial function.
   - Example Target: Agents that activate PGC-1alpha, a key regulator of mitochondrial biogenesis, or antioxidants that reduce mitochondrial oxidative stress.

3. **Anti-Inflammatory Properties:**
   - The drug should reduce neuroinflammation, which contributes to neurodegeneration in PD.
   - Mechanism: The compound should inhibit pro-inflammatory cytokines and modulate microglial activation.
   - Example Target: Inhibitors of NF-kB signaling pathway or agents that promote anti-inflammatory cytokine production.

4. **Neuroprotective and Neurorestorative Effects:**
   - The drug should promote the survival and function of dopaminergic neurons.
   - Mechanism: The compound should enhance neurotrophic factor signaling and support neuronal health.
   - Example Target: Agents that mimic or enhance the activity of neurotrophic factors like GDNF or BDNF.

5. **Symptomatic Relief:**
   - The drug should provide symptomatic relief for both motor and non-motor symptoms of PD.
   - Mechanism: The compound should have dopaminergic activity to improve motor function and address non-motor symptoms such as cognitive impairment and depression.
   - Example Target: Dopamine agonists or agents that modulate neurotransmitter systems involved in non-motor symptoms.

6. **Safety and Efficacy:**
   - The drug should have a favorable safety profile with minimal side effects.
   - Mechanism: The compound should be selective for its targets and have good pharmacokinetic properties.
   - Example Target: Optimization of drug design to enhance bioavailability and reduce off-target effects.

#### Research Plan:

1. **Drug Discovery and Design:**
   - Use computational modeling and high-throughput screening to identify potential compounds that meet the criteria.
   - Synthesize and optimize lead compounds for improved efficacy and safety.

2. **In Vitro Studies:**
   - Test the lead compounds in cell-based assays to evaluate their effects on alpha-synuclein aggregation, mitochondrial function, and neuroinflammation.
   - Assess neuroprotective effects in dopaminergic neuron cultures.

3. **In Vivo Studies:**
   - Evaluate the efficacy of lead compounds in animal models of PD.
   - Assess motor and non-motor symptoms, neuroprotection, and safety profile.

4. **Clinical Trials:**
   - Conduct phase I clinical trials to evaluate safety and pharmacokinetics in healthy volunteers.
   - Conduct phase II and III clinical trials to assess efficacy and safety in PD patients.

#### Conclusion:
The development of a multi-target drug that addresses the key pathological features of Parkinson's disease has the potential to provide a more effective treatment option. By targeting alpha-synuclein aggregation, mitochondrial dysfunction, and neuroinflammation, the new drug could offer neuroprotection and symptomatic relief, ultimately improving the quality of life for PD patients.

--------<solution><function>answer_to_instruction</function><result>### Research Idea: Development of a Novel Multi-Target Drug for Parkinson's Disease

#### Background:
Parkinson's disease (PD) is a complex neurodegenerative disorder characterized by both motor and non-motor symptoms. The pathogenesis of PD involves multiple pathways, including alpha-synuclein aggregation, mitochondrial dysfunction, neuroinflammation, and synaptic dysfunction. Current treatments primarily provide symptomatic relief and do not address the underlying disease mechanisms. Given the multifactorial nature of PD, a multi-target drug approach may offer a more effective therapeutic strategy.

#### Research Hypothesis:
A novel drug that simultaneously targets alpha-synuclein aggregation, mitochondrial dysfunction, and neuroinflammation can provide neuroprotection and mitigate both motor and non-motor symptoms in Parkinson's disease.

#### Criteria for the New Drug:

1. **Alpha-Synuclein Aggregation Inhibition:*

In [0]:
drug_failure_rate_reason = predict_clinical_trial_failure_rate(drug, disease, criteria)

drug_failure_rate_reason

User problem:
 #My problem#: <problem>I have designed a clinical trial and hope you can help me predict the probability of failure for this clinical trial.</problem>
    #drugs#: <drug>Multi-Target Novel Drug for Parkinson's Disease</drug>
    #diseases#: <disease>Parkinson's Disease</disease>
    #criteria#: <criteria>### Research Idea: Development of a Novel Multi-Target Drug for Parkinson's Disease

#### Background:
Parkinson's disease (PD) is a complex neurodegenerative disorder characterized by both motor and non-motor symptoms. The pathogenesis of PD involves multiple pathways, including alpha-synuclein aggregation, mitochondrial dysfunction, neuroinflammation, and synaptic dysfunction. Current treatments primarily provide symptomatic relief and do not address the underlying disease mechanisms. Given the multifactorial nature of PD, a multi-target drug approach may offer a more effective therapeutic strategy.

#### Research Hypothesis:
A novel drug that simultaneously targets alp

  torch.load(f"{current_file_path}/data/enrollment_model.pt")


--------<solution><function>get_enrollment_difficulty</function><result>The enrollment difficulty of the clinical trial based on the eligibility criteria is 0.36480000615119934. Higher values indicate greater difficulty in enrollment.</result></solution
--------<function>enrollment_agent</function><result><function>get_enrollment_difficulty</function><result>The enrollment difficulty of the clinical trial based on the eligibility criteria is 0.36480000615119934. Higher values indicate greater difficulty in enrollment.</result></result>
<solution><function>enrollment_agent</function><result><function>get_enrollment_difficulty</function><result>The enrollment difficulty of the clinical trial based on the eligibility criteria is 0.36480000615119934. Higher values indicate greater difficulty in enrollment.</result></result></solution>
Thinking...
[Action] Requesting the final result from the Reasoning Agent...
The probability of failure for the clinical trial on the "Multi-Target Novel Dru

'The probability of failure for the clinical trial on the "Multi-Target Novel Drug for Parkinson\'s Disease" is <final_result>1.0</final_result>.\n\n### Reasoning:\n\n1. **Historical Failure Rate of the Drug**: The historical failure rate of the Multi-Target Novel Drug in clinical trials has been stated to be 1.0. This indicates that all similar drugs targeting multiple pathways have experienced significant challenges or failures in prior clinical trials. These can be due to complexities in drug interactions, unforeseen side effects, or insufficient efficacy. This alone provides a strong indicator of potential failure.\n\n2. **Parkinson\'s Disease Complexity**: The historical failure rate of Parkinson\'s Disease in clinical trials is 0.3566. Although this is significantly lower compared to the drug\'s failure rate, it underlines the inherent difficulties in managing and treating neurodegenerative disorders like Parkinson\'s. The variable nature of disease progression and the complexity