In [11]:
import os
import re
import logging
import datetime
import json
import csv
from statistics import fmean
from typing import Dict, List, Callable, Union
from escargot import controller, language_models, operations, prompter, parser
from escargot.vector_db import azure_embedding
import xml.etree.ElementTree as ET
from functools import reduce
import escargot.cypher.memgraph as memgraph

def strip_answer_helper(text: str, tag: str = "") -> str:
    """
    Helper function to remove tags from a text.

    :param text: The input text.
    :type text: str
    :param tag: The tag to be stripped. Defaults to "".
    :type tag: str
    :return: The stripped text.
    :rtype: str
    """

    text = text.strip()
    if "Output:" in text:
        text = text[text.index("Output:") + len("Output:") :].strip()
    if tag != "":
        start = text.rfind(f"<{tag}>")
        end = text.rfind(f"</{tag}>")
        if start != -1 and end != -1:
            text = text[start + len(f"<{tag}>") : end].strip()
        elif start != -1:
            logging.warning(
                f"Only found the start tag <{tag}> in answer: {text}. Returning everything after the tag."
            )
            text = text[start + len(f"<{tag}>") :].strip()
        elif end != -1:
            logging.warning(
                f"Only found the end tag </{tag}> in answer: {text}. Returning everything before the tag."
            )
            text = text[:end].strip()
        else:
            logging.warning(
                f"Could not find any tag {tag} in answer: {text}. Returning the full answer."
            )
    return text

#strip answer helper but returns all instances of the tag
def strip_answer_helper_all(text: str, tag: str = "") -> str:
    """
    Helper function to remove tags from a text.

    :param text: The input text.
    :type text: str
    :param tag: The tag to be stripped. Defaults to "".
    :type tag: str
    :return: The stripped text.
    :rtype: str
    """

    text = text.strip()
    #get all instances of the tag
    start = [m.start() for m in re.finditer(f"<{tag}>", text)]
    end = [m.start() for m in re.finditer(f"</{tag}>", text)]
    # print(start)
    # print(end)
    return [text[text.index(f"<{tag}>", start[i]) + len(f"<{tag}>") : end[i]].strip() for i in range(len(start))]

def parse_xml(xml_data):
    # Parse the XML string
    #find <?xml version="1.0" encoding="UTF-8"?> and remove it
    xml_data = re.sub(r"<\?xml version=\"1.0\" encoding=\"UTF-8\"\?>", "", xml_data)
    #find ```xml and remove it
    xml_data = re.sub(r"```xml", "", xml_data)
    #find ``` and remove it
    xml_data = re.sub(r"```", "", xml_data)
    #find <Root> and remove it
    xml_data = re.sub(r"<Root>", "", xml_data)
    #find </Root> and remove it
    xml_data = re.sub(r"</Root>", "", xml_data)
    try:
        xml_data = '<Root>' + xml_data + '</Root>'
        root = ET.fromstring(xml_data)
    except Exception as e:
        logging.error(f"Could not parse XML data: {xml_data}. Encountered exception: {e}")

    def get_step(step):
        step_id = step.find('StepID').text
        instruction = step.find('Instruction')

        if step is None:
            return None  # Handle cases where there's no instruction element

        # Initialize empty lists to store information
        knowledge_requests = []
        for_info = None
        function = None

        # Check for KnowledgeRequest elements (can be multiple)
        for knowledge_request in step.findall('KnowledgeRequest'):
            knowledge_id = knowledge_request.find('KnowledgeID').text if knowledge_request.find('KnowledgeID') is not None else None
            node = knowledge_request.find('Node').text if knowledge_request.find('Node') is not None else None
            knowledge_requests.append({
                "KnowledgeID": knowledge_id,
                "Node": node
            })

        # Check for For element
        if step.find('For') is not None:
            for_var = step.find('For/ForVariable').text
            for_node = step.find('For/ForFunction/KnowledgeRequest/Node').text
            for_info = {
                "ForVariable": for_var,
                "ForNode": for_node
            }

        # Check for Function element
        if step.find('Function') is not None:
            function = step.find('Function').text.strip()

        # Return a list with relevant information (adjust as needed)
        return {
            "StepID": step_id,
            "InstructionType": "KnowledgeRequest" if knowledge_requests else "For" if for_info else "Function" if function else None,
            "Instruction": instruction.text.strip() if instruction.text else "",
            "KnowledgeRequests": knowledge_requests if knowledge_requests else None,
            # "For": for_info,
            "Function": function
        }

    # print('xml_data:',xml_data.split("\n"))
    # Extract and print details for each step
    instructions = root.find('Instructions').findall('Step')
    steps = []
    for step in instructions:
        parsed_step = get_step(step)
        # print(parsed_step)
        steps.append(parsed_step)

    # Extract and print edges
    edges = root.find('EdgeList').findall('Edge')
    #remove \n from the text and whitespace
    edges = [edge.text.replace("\n","").strip() for edge in edges]
    # for edge in edges:
    #     print(f'Edge: {edge.text}')
    # print("edges:", edges)
    return steps, edges

def output_controller(operations_graph):

    output = []
    # for operation in operations_log['MCQ_1hop.json']['Which of the following binds to the drug Leucovorin? 1. CAD 2. PDS5B 3. SEL1L 4. ABCC2 5. RMI1']:
    # print(len(operations_graph))
    index = 0
    operation = operations_graph[0]
    while len(operation.successors) > 0:
    # for operation in operations_graph:
        
        operation_serialized = {
            "id": "node_"+str(index),
            "operation": operation.operation_type.name,
            "thoughts": [thought.state for thought in operation.get_thoughts()],
        }
        print(operation_serialized["thoughts"][0]["prompt"])
        
        output.append(operation_serialized)
        index = index + 1
        operation = operation.successors[0]
    edge_data = []
    num_of_branches = (len(operations_graph)-3)
    for i in range(0, int(num_of_branches),2):
        # edge_data.append([0, i+1])
        edge_data.append(["node_0", "node_"+str(i+1)])
        # edge_data.append([i+1, i+2])
        edge_data.append(["node_"+str(i+1), "node_"+str(i+2)])
        # edge_data.append([i+2, len(operations_graph)-2])
        edge_data.append(["node_"+str(i+2), "node_"+str(len(operations_graph)-2)])

    # edge_data.append([len(operations_graph)-2, len(operations_graph)-1])
    edge_data.append(["node_"+str(len(operations_graph)-2), "node_"+str(len(operations_graph)-1)])
    print(json.dumps(output, indent=4))
    print(json.dumps(edge_data, indent=4))

def final_operation(operations_graph):
    output = []
    operation = operations_graph[0]
    while len(operation.successors) > 0:
        operation = operation.successors[0]
    print(operation)
    return operation


class ALZKBPrompter(prompter.Prompter):
    """
    ALZKBPrompter provides the generation of prompts specific to the
    ALZKB example for the language models.

    Inherits from the Prompter class and implements its abstract methods.
    """
    planning_prompt = """You are a brilliant strategic thinker with access to a knowledge base. You will receive a question that will require the knowledge base to answer. The knowledge base is built from a knowledge graph, but not the knowledge graph itself. You will break down the question into steps. If knowledge needs to be pulled from the knowledge base, you will provide what specific relationships or node information is necessary. You do not need to try to answer the question but simply plan out the steps.

The knowledge graph contains node types: {node_types}.
The knowledge graph contains relationships: {relationship_types}.

Only show the steps you will take and a small description for each step. If you can determine the knowledge graph relationship that can provide insight in the step, provide the relationship in it and if possible the specific node name, not the node type. If a question require a specific relationship between two specific nodes, provide the specific nodes in the relationship.

Here is your question:
{question}

Let's think step by step, and be very succinct, clear, and efficient in the number of steps by avoiding redundant knowledge extractions."""

    plan_assessment_prompt = """You are a brilliant strategic thinker with access to a biomedical knowledge graph. You will receive a query that will require the knowledge graph to answer and a few approaches that will try to resolve that query. 

Here is your question:
{question}

Here is ApproachNumber 1:
"{approach_1}"

Here is ApproachNumber 2:
"{approach_2}"

Here is ApproachNumber 3:
"{approach_3}"

If knowledge needs to be pulled from the knowledge graph, they will try to provide what specific relationships or node information is necessary.
The score should reflect on how clear clear, succinct, and efficient in the number of steps by avoiding redundant knowledge extractions. The highest score approach would be specific on what knowledge to extract, especially if it includes specific nodes. For instance, an approach with step 'Find the body parts or anatomy that over-express METTL5. (BODYPART OVEREXPRESSES GENE)' scores very high since it is specific on the node METTL5 and performs a 1 hop knowledge request.
An approach where there are steps that contain a specific node with a relationship scores higher than an approach where there any steps that contain only arbitrary node types.

The knowledge graph contains node types: {node_types}.
The knowledge graph contains relationships: {relationship_types}.

Return a XML formatted list with all the approaches in Approach tags. Each approach should be within <Approach> tags and will have an incremental <ApproachID> value within it. The score should be within <Score> tags.
An example is as follows:
<Approaches>
  <Approach>
    <ApproachID>1</ApproachID>
    <Score>6</Score>
  </Approach>
  <Approach>
    <ApproachID>2</ApproachID>
    <Score>7</Score>
  </Approach>
  <Approach>
    <ApproachID>3</ApproachID>
    <Score>9</Score>
  </Approach>
</Approaches>

Only return the XML.
"""

    xml_conversion_prompt = """You will be given a set of instructions and you must convert the instructions into XML with the following rules:

The knowledge graph contains node types: {node_types}.
The knowledge graph contains relationships: {relationship_types}.

Format your response in XML format, where the steps will be within <Instructions> tags. Each step will be within <Step> tags and will have an incremental <StepID> value within it. The full description of the step will be put in the <Instruction> tags within the <Step>. Following the <Instruction>, there should only be one type of instruction within a <Step>: <KnowledgeRequest> or <Function>

<KnowledgeRequest>:
 If a request to the knowledge graph needs to be made within a step, you must include the knowledge requests as simple single node to Relationship to node format. Each <KnowledgeRequest> should have an identifier in <KnowledgeID> tags such as Knowledge_1 and Knowledge_2.  Any knowledge request should have the format of <Knowledge> tag where it labeled with: Node Name-Relationship-Node Name. There must be at least one specific node that is requested such as a specific gene or disease. If there are any specific nodes and not a node type, put a ! before and after the word. For instance, Alzheimer's is a specific Disease node, so it should be labeled "!Alzheimer's Disease!". Another example are of gene symbols, which are specific genes, so they should be labeled "!APOE!". 
Each <KnowledgeRequest> should contain a specific node and should not be between two node types. For instance, "Drug-DRUG TREATS DISEASE-!Alzheimer's Disease!" is correct, but "Drug-DRUG TREATS DISEASE-Disease" is incorrect.        
If you detect two specific keywords in the query, you can use both of them in a single <Request> tag. For instance, "!APOE!-GENE ASSOCIATES WITH DISEASE-!Alzheimer's Disease!" is correct, instead of having two separate <Request> tags. Most requests should have one specific keyword. If a step requests for only a single node, omit that step altogether. You must be sure that the relationship is from the above list as well as the node types.

Here is an example <KnowledgeRequest> requesting for all body parts connected to the gene STYXL2: 
<KnowledgeRequest>
    <KnowledgeID>Knowledge_2</KnowledgeID>
    <Node>BODYPART-GENE EXPRESSES-!STYXL2!</Node>
</KnowledgeRequest>

<Function>:
You will have functionality of running array based functions that the machine will execute:
  UNION(x,y): This function returns a distinct union of elements that are in set x and set y
  INTERSECT(x,y): This function returns all elements that are found in both set x and set y
  DIFFERENCE(x, y): This function returns the elements that are in set x but not in set y. It's useful for finding the elements unique to one set compared to another. 
Any reference to arrays determined by previous steps should be by either the <StepID> identifier or <KnowledgeID> identifier. There should be no Knowledge Requests within a Function, only the identifiers. If an request is needed, the Knolwedge Request should be done within the same step.

If a function needs to be run such as UNION, INTERSECT, or DIFFERENCE using the knowledge, that should be within <Function> tags with nothing else other than the function and its variables.

Here is an example of a <Function> requesting the Intersect of two arrays from two knowledge requests:
<Function>
    INTERSECT(Knowledge_1, Knowledge_2)
</Function>

Here is an example XML:
<Instructions>
    <Step>
        <StepID>1</StepID>
        <Instruction>
            Find Body Parts Over-Expressing Gene METTL5
        </Instruction>
        <KnowledgeRequest>
                <KnowledgeID>Knowledge_1</KnowledgeID>
                <Node>BODYPART-BODYPART OVER EXPRESSES GENE-!METTL5!</Node>
            </KnowledgeRequest>
    </Step>
    <Step>
        <StepID>2</StepID>
        <Instruction>
            Find Body Parts Over-Expressing Gene STYXL2
        </Instruction>
        <KnowledgeRequest>
            <KnowledgeID>Knowledge_2</KnowledgeID>
            <Node>BODYPART-BODYPART OVER EXPRESSES GENE-!STYXL2!</Node>
        </KnowledgeRequest>
    </Step>
    <Step>
        <StepID>3</StepID>
        <Instruction>
            List the intersect of body parts
        </Instruction>
        <Function>
            INTERSECT(Knowledge_1, Knowledge_2)
        </Function>
    </Step>
</Instructions>

Outside of the <Instructions> tag, add an edge list in <EdgeList>, where information from one step to another will be listed. Each edge will be within <Edge> tags, and the edge would be in the format StepID1-StepID2 which describes that StepID1 directs to StepID2.
Do not include any other tags other than the ones mentioned above.

Here are the instructions you must convert:
{instructions}"""

    xml_conversion_prompt_1 = """You will be given a set of instructions and you must convert the instructions into XML with the following rules:

The knowledge graph contains node types: {node_types}.         
The knowledge graph contains relationships: {relationship_types}.

Format your response in XML format, where the steps will be within <Instructions> tags. Each step will be within <Step> tags and will have an incremental <StepID> value within it. The full description of the step will be put in the <Instruction> tags within the <Step>. Following the <Instruction>, there should only be one type of instruction within a <Step>: <KnowledgeRequest>, <For>, or <Function>

<KnowledgeRequest>:
 If a request to the knowledge graph needs to be made within a step, you must include the knowledge requests as simple single node to Relationship to node format. Each <KnowledgeRequest> should have an identifier in <KnowledgeID> tags such as Knowledge_1 and Knowledge_2.  Any knowledge request should have the format of <Knowledge> tag where it labeled with: Node Name-Relationship-Node Name. There must be at least one specific node that is requested such as a specific gene or disease. If there are any specific nodes and not a node type, put a ! before and after the word. For instance, Alzheimer's is a specific Disease node, so it should be labeled "!Alzheimer's Disease!". Another example are of gene symbols, which are specific genes, so they should be labeled "!APOE!". 
Each <KnowledgeRequest> should contain a specific node and should not be between two node types. For instance, "Drug-DRUG TREATS DISEASE-!Alzheimer's Disease!" is correct, but "Drug-DRUG TREATS DISEASE-Disease" is incorrect.        
If you detect two specific keywords in the query, you can use both of them in a single <Request> tag. For instance, "!APOE!-GENE ASSOCIATES WITH DISEASE-!Alzheimer's Disease!" is correct, instead of having two separate <Request> tags. Most requests should have one specific keyword. If a step requests for only a single node, omit that step altogether. You must be sure that the relationship is from the above list as well as the node types.

Here is an example <KnowledgeRequest> requesting for all body parts connected to the gene STYXL2: 
<KnowledgeRequest>
    <KnowledgeID>Knowledge_2</KnowledgeID>
    <Node>BODYPART-GENE EXPRESSES-!STYXL2!</Node>
</KnowledgeRequest>

<Function>:
You will have functionality of running array based functions that the machine will execute:
  UNION(x,y): This function returns a distinct union of elements that are in set x and set y
  INTERSECT(x,y): This function returns all elements that are found in both set x and set y
  DIFFERENCE(x, y): This function returns the elements that are in set x but not in set y. It's useful for finding the elements unique to one set compared to another. 
Any reference to arrays determined by previous steps should be by either the <StepID> identifier or <KnowledgeID> identifier.

If a function needs to be run such as UNION, INTERSECT, or DIFFERENCE using the knowledge, that should be within <Function> tags with nothing else other than the function and its variables.

Here is an example of a <Function> requesting the Intersect of two arrays from two knowledge requests:
<Function>
    INTERSECT(Knowledge_1, Knowledge_2)
</Function>

<For>:
A For loop maybe necessary for certain steps. The For loop will be determined by entering a <For> tag within the <Step>. The <For> tag must include a <ForVariable> which is a reference to a variable such as Knowledge_1 or StepID_1 or a distinct array (ie. an array of genes). The <For> tag must also include a <ForFunction>, which will include a <KnowledgeRequest> or a <Function> using the above format that will be used to execute for each element in the <ForVariable>. The element is labeled as "ForElement".

Here is an example of a <For> loop that gets the results from Step 1 and executes a knowledge request for each element in it, specifically if the gene list from Step 1 binds to a drug/chemical: 
<For>
    <ForVariable>StepID_1</ForVariable>
    <ForFunction>
        <KnowledgeRequest><Node>DRUG-CHEMICAL BINDS GENE-ForElement</Node></KnowledgeRequest>
    </ForFunction>
</For>

Here is an example XML:
<Instructions>
    <Step>
        <StepID>1</StepID>
        <Instruction>
            Find Body Parts Over-Expressing METTL5
        </Instruction>
        <KnowledgeRequest>
                <KnowledgeID>Knowledge_1</KnowledgeID>
                <Node>BODYPART-BODYPART OVER EXPRESSES GENE-!METTL5!</Node>
            </KnowledgeRequest>
    </Step>
    <Step>
        <StepID>2</StepID>
        <Instruction>
            Find Body Parts Over-Expressing STYXL2
        </Instruction>
        <KnowledgeRequest>
            <KnowledgeID>Knowledge_2</KnowledgeID>
            <Node>BODYPART-BODYPART OVER EXPRESSES GENE-!STYXL2!</Node>
        </KnowledgeRequest>
    </Step>
    <Step>
        <StepID>3</StepID>
        <Instruction>
            Intersect Body Parts
        </Instruction>
        <Function>
            INTERSECT(Knowledge_1, Knowledge_2)
        </Function>
    </Step>
</Instructions>

Outside of the <Instructions> tag, add an edge list in <EdgeList>, where information from one step to another will be listed. Each edge will be within <Edge> tags, and the edge would be in the format StepID1-StepID2 which describes that StepID1 directs to StepID2.
Do not include any other tags other than the ones mentioned above.

Here are the instructions you must convert:
{instructions}"""

    xml_cleanup_prompt = """Given the following XML:
{xml}

If you notice a Knowledge Request where the Node element refers to specific nodes and not a class of nodes (current node types: {node_types}), put a ! before and after the word. For instance, gene APOE would be !APOE! and Alzheimer's Disease would be !Alzheimer's Disease!
If a Node simply tries to retrieve a single node, the Node should just refer to the specific node without anything else. For instance if the node has Drug-BENZATROPINE, it should instead be !BENZATROPINE!
Respond in the same format as the above and nothing else."""

    knowledge_extraction_prompt = """Use the following cypher results for getting {statement_to_embed_cleaned}:
{knowledge}

You will be given a question to answer and MUST ONLY use the above knowledge statements. Assume that the knowledge statements come from a knowledge graph with nodes describing specific things. If the question is to identify a node, simply return that node if you see it in the knowledge statements.
Your answer must be in an array format within single brackets. Please answer the following question and if you cannot answer it, return an empty array: {instruction}"""

    function_prompt="""{function}"""

    output_prompt = """Use the following knowledge to answer the question:
{knowledge}

With the above knowledge, follow this step:
{instruction}
and answer this question: {question}"""

    memgraph_prompt = """You are an expert memgraph Cypher translator who understands the question in english and convert to Cypher strictly based on the Neo4j Schema provided and following the instructions below:
1. Generate Cypher query compatible ONLY for memgraph 2.17.0
2. Do not use EXISTS, SIZE, CONTAINS ANY keywords in the cypher. Use alias when using the WITH keyword
3. Please do not use same variable names for different nodes and relationships in the query.
4. Use only Nodes and relationships mentioned in the schema
5. Always enclose the Cypher output inside 3 backticks
6. Always do a case-insensitive and fuzzy search for any properties related search. Eg: to search for a Company name use toLower(c.name) contains 'neo4j'
7. Always use aliases to refer the node in the query
8. 'Answer' is NOT a Cypher keyword. Answer should never be used in a query.
9. Please generate only one Cypher query per question. 
10. Cypher is NOT SQL. So, do not mix and match the syntaxes.
11. Every Cypher query always starts with a MATCH keyword.
12. Always use IN keyword instead of CONTAINS ANY
13. If there is a word surrounded by !, it means it is a specific node and not a node type. For instance, if the word is !Alzheimer's Disease!, it means it is a specific Disease node and not a Disease node type.
14. If a node is a Gene, please make sure you use the geneSymbol property, not the commonName.
15. For the return, return only one property, either the commonName or the geneSymbol property. Do not return both properties.
16. The request may not clear and you do your best to assume the proper relationship to use based on the question.

Schema:
Node properties are the following:
Node name: 'BiologicalProcess', Node properties: ['commonName']
Node name: 'BodyPart', Node properties: ['commonName']
Node name: 'CellularComponent', Node properties: ['commonName']
Node name: 'Disease', Node properties: ['commonName']
Node name: 'Drug', Node properties: ['commonName']
Node name: 'DrugClass', Node properties: ['commonName']
Node name: 'Gene', Node properties: ['commonName', 'geneSymbol', 'typeOfGene']
Node name: 'MolecularFunction', Node properties: ['commonName']
Node name: 'Pathway', Node properties: ['commonName']
Node name: 'Symptom', Node properties: ['commonName']
Relationship properties are the following:
The relationships are the following:
(:Drug)-[:CHEMICALBINDSGENE]->(:Gene)
(:Drug)-[:CHEMICALDECREASESEXPRESSION]->(:Gene)
(:Drug)-[:CHEMICALINCREASESEXPRESSION]->(:Gene)
(:Drug)-[:DRUGINCLASS]->(:DrugClass)
(:Drug)-[:DRUGCAUSESEFFECT]->(:Disease)
(:Drug)-[:DRUGTREATSDISEASE]->(:Disease)
(:Gene)-[:GENEPARTICIPATESINBIOLOGICALPROCESS]->(:BiologicalProcess)
(:Gene)-[:GENEREGULATESGENE]->(:Gene)
(:Gene)-[:GENEINPATHWAY]->(:Pathway)
(:Gene)-[:GENEINTERACTSWITHGENE]->(:Gene)
(:Gene)-[:GENEHASMOLECULARFUNCTION]->(:MolecularFunction)
(:Gene)-[:GENEASSOCIATEDWITHCELLULARCOMPONENT]->(:CellularComponent)
(:Gene)-[:GENECOVARIESWITHGENE]->(:Gene)
(:Gene)-[:GENEASSOCIATESWITHDISEASE]->(:Disease)
(:Symptom)-[:SYMPTOMMANIFESTATIONOFDISEASE]->(:Disease)
(:BodyPart)-[:BODYPARTUNDEREXPRESSESGENE]->(:Gene)
(:BodyPart)-[:BODYPARTOVEREXPRESSESGENE]->(:Gene)
(:Disease)-[:DISEASELOCALIZESTOANATOMY]->(:BodyPart)
(:Disease)-[:DISEASEASSOCIATESWITHDISEASET]->(:Disease)

Samples:
Question: Provide the cypher for !METTL5!
Answer: MATCH (g:Gene {geneSymbol: "METTL5"}) RETURN g.geneSymbol

Question: !Alzheimer's Disease!-DRUG TREATS DISEASE-Drug
Answer: MATCH (d:Drug)-[:DRUGTREATSDISEASE]->(:Disease {commonName: "Alzheimer\'s Disease"}) RETURN d.commonName

Question: Provide the Cypher for Gene IN PATHWAY-!STYXL2!
Answer: MATCH (g:Gene {geneSymbol: "STYXL2"})-[:GENEINPATHWAY]->(p:Pathway) RETURN p.commonName

Provide the Cypher query for the following question """

    output_prompt = """Question:
{question}

Answer:
{input}
Format the answer."""
    def __init__(self,vector_db = None, lm = None, memgraph_client = None, node_types = "", relationship_types = "") -> None:
        self.vector_db = vector_db
        self.lm = lm
        self.memgraph_client = memgraph_client
        self.node_types = node_types
        self.relationship_types = relationship_types
        pass

    def extract_details(self, question_type: str, question: str ) -> None:
        if question_type == "true/false":
            additional_instruction = "You will be asked to answer the question with only a TRUE or FALSE response."
            # check if "True or False Question: " is in the question
            if "True or False Question: " in question:
                statement_to_embed = question[question.index("True or False Question: ") + len("True or False Question: "):]
            else:
                statement_to_embed = question
                question = "True or False Question: " + question
        elif question_type == "multiple choice":
            additional_instruction = "You will be asked to answer the question with only the multiple choice number response. For instance, if the correct answer is '2', you will need to answer '2'. If you are not sure, answer NA."
            # check if "? 1." is in the question and separate the question from the choice
            if "? 1." in question:
                statement_to_embed = question[:question.index("? 1.")]
            else:
                statement_to_embed = question
        elif question_type == "list":
            additional_instruction = "You will be asked to answer the question with only the list with each element separated by a newline."
            statement_to_embed = question
        else:
            additional_instruction = ""
            statement_to_embed = question
        return statement_to_embed, additional_instruction, question
     
    def generate_prompt(
        self,
        knowledge_list : Dict,
        question: str,
        method: str,
        input: str,
        **kwargs,
    ) -> str:
        """
        Generate a generate prompt for the language model.

        :param num_branches: The number of responses the prompt should ask the LM to generate.
        :type num_branches: int
        :param question: The question to be answered.
        :type question: str
        :param question_type: The type of the question.
        :type question_type: str
        :param method: The method used to generate the prompt.
        :type method: str
        :param input: The intermediate solution.
        :type input: str
        :param kwargs: Additional keyword arguments.
        :return: The generate prompt.
        :rtype: str
        :raise AssertionError: If method is not implemented yet.
        """
        assert question is not None, "Question should not be None."
        # statement_to_embed, additional_instruction, question = self.extract_details(question_type, question)
        if method == "got":
            if (input is None or input == "") and kwargs["phase"] == "planning":
                return self.planning_prompt.format(question=question, node_types=self.node_types, relationship_types=self.relationship_types)
            elif kwargs["phase"] == "plan_assessment":
                return self.plan_assessment_prompt.format(question=question, approach_1=input[0], approach_2=input[1], approach_3=input[2], node_types=self.node_types, relationship_types=self.relationship_types)
            elif kwargs["phase"] == "xml_conversion":
                # print("strategy",input)
                return self.xml_conversion_prompt.format(instructions=input, node_types=self.node_types, relationship_types=self.relationship_types)
            elif kwargs["phase"] == "xml_cleanup":
                # print("strategy",input)
                return self.xml_cleanup_prompt.format(xml=input, node_types=self.node_types, relationship_types=self.relationship_types)
            elif kwargs["phase"] == "steps":
                #check for all steps in got_steps for predecessors. If no predecessors, assign the self as predecessor
                if "StepID" not in kwargs:
                    return None
                else:
                    current_instruction = kwargs["instruction"]
                    print("current_instruction:", current_instruction)
                    if current_instruction['InstructionType'] == "KnowledgeRequest":
                        knowledge_requests = current_instruction["KnowledgeRequests"]
                        for knowledge_request in knowledge_requests:
                            statement_to_embed = knowledge_request["Node"]
                            statement_to_embed_cleaned = statement_to_embed.replace("!","")
                            for i in range(statement_to_embed.count("Knowledge_")):
                                #get the number that follows "Knowledge_" and fill it in from the knowledge_list
                                knowledge_number = re.search(r'Knowledge_(\d+)', statement_to_embed).group(1)
                                statement_to_embed = statement_to_embed.replace(f"Knowledge_{knowledge_number}", "!" + str(knowledge_list[f"Knowledge_{knowledge_number}"]) + "!")
                            
                            if self.vector_db is not None:
                                if statement_to_embed.count("!") >= 2:
                                    #if there is only once specific node and nothing else, then return the knowledge
                                    if statement_to_embed.count("!") == 2 and statement_to_embed[0] == "!" and statement_to_embed[-1] == "!":
                                        return self.knowledge_extraction_prompt.format(question=question, knowledge=statement_to_embed_cleaned, instruction=current_instruction["Instruction"])
                                    embedded_question = self.lm.get_embedding(statement_to_embed_cleaned)
                                    node_filters = re.findall(r'!(.*?)!', statement_to_embed)
                                    knowledge_arrays = []
                                    for node_filter in node_filters:
                                        knowledge_array,distances = self.vector_db.get_knowledge(embedded_question, keyword_filter = node_filter)
                                        if len(node_filters) > 1:
                                            for node_filter in node_filters:
                                                knowledge_array = [knowledge for knowledge in knowledge_array if node_filter in knowledge]
                                        knowledge_arrays.append(knowledge_array)
                                    knowledge_array = list(set().union(*knowledge_arrays))
                                    knowledge = "\n".join(knowledge_array)
                                else:
                                    embedded_question = self.lm.get_embedding(statement_to_embed)
                                    knowledge_array,distances = self.vector_db.get_knowledge(embedded_question)
                                    knowledge = "\n".join(knowledge_array)
                            # If it's a cypher query, then execute the query and return the results directly
                            elif self.memgraph_client is not None:
                                knowledge_array = self.memgraph_client.execute(self.lm, str(self.memgraph_prompt) + str(current_instruction["Instruction"]) + ". Return only the Cypher query:" + str(statement_to_embed))
                                knowledge = ",".join(knowledge_array)
                                return knowledge
                            if knowledge == "":
                                knowledge = "No knowledge"
                        return self.knowledge_extraction_prompt.format(question=question,statement_to_embed_cleaned=statement_to_embed_cleaned, knowledge=knowledge, instruction=current_instruction["Instruction"], node_types=self.node_types, relationship_types=self.relationship_types)
                    elif current_instruction['InstructionType'] == "Function":
                        return self.function_prompt.format(function=current_instruction["Function"], node_types=self.node_types, relationship_types=self.relationship_types)
            elif kwargs["phase"] == "output":
                return self.output_prompt.format(question=question, input=input)
        else:
            raise AssertionError(f"Method {method} is not implemented yet.")

    
class ALZKBParser(parser.Parser):
    """
    ALZKBParser provides the parsing of language model reponses specific to the
    ALZKB example.

    Inherits from the Parser class and implements its abstract methods.
    """

    def __init__(self) -> None:
        """
        Inits the response cache.
        """
        self.cache = {}

    def parse_generate_answer(self, state: Dict, texts: List[str]) -> List[Dict]:
        """
        Parse the response from the language model for a generate prompt.

        In GOT, the generate prompt is used for planning, plan assessment, xml conversion, knowledge extraction and array function 

        :param state: The thought state used to generate the prompt.
        :type state: Dict
        :param texts: The responses to the prompt from the language model.
        :type texts: List[str]
        :return: The new thought states after parsing the respones from the language model.
        :rtype: List[Dict]
        """
        if type(texts) == str:
            texts = [texts]
        for text in texts:
            if state["method"] == "got":
                try:
                    if state["phase"] == "planning":
                        new_state = state.copy()
                        new_state["input"] = text
                        #skipping plan_assessment phase for now
                        # new_state["phase"] = "plan_assessment"
                        new_state["phase"] = "plan_assessment"
                        new_state["generate_successors"] = 1
                        # print("planning:", text)
                    elif state["phase"] == "plan_assessment":
                        new_state = state.copy()
                        #convert text to json and select from the input the top strategy
                        try:
                            text = strip_answer_helper_all(text, "Approach")
                            # get the highest score and the approach
                            approach = max(text, key=lambda x: int(strip_answer_helper(x,"Score")))
                            approach = strip_answer_helper(approach, "ApproachID")
                            approach = int(approach)-1
                            new_state["input"] = new_state["input"][approach]
                        except Exception as e:
                            logging.error(f"Could not convert text to xml: {text}. Encountered exception: {e}")

                        new_state["phase"] = "xml_conversion"
                        new_state["generate_successors"] = 1
                    elif state["phase"] == "xml_conversion":
                        new_state = state.copy()
                        new_state["input"] = text
                        # new_state["phase"] = "xml_cleanup"
                        new_state["phase"] = "steps"
                        instructions, edges = parse_xml(text)
                        new_state["instructions"] = instructions
                        new_state["edges"] = edges
                    elif state["phase"] == "xml_cleanup":
                        new_state = state.copy()
                        new_state["input"] = text
                        new_state["phase"] = "steps"
                        instructions, edges = parse_xml(text)
                        new_state["instructions"] = instructions
                        new_state["edges"] = edges
                        print("instructions:", instructions)
                        print("edges:", edges)
                    elif state["phase"] == "steps":
                        new_state = state.copy()
                        new_state["input"] = text
                    elif state["phase"] == "output":
                        new_state = state.copy()
                        new_state["input"] = text
                except Exception as e:
                    logging.error(
                        f"Could not parse step answer: {text}. Encountered exception: {e}"
                    )
            
        return new_state


In [23]:
from escargot import controller, language_models, operations

from escargot.vector_db.weaviate import WeaviateClient
import escargot.cypher.memgraph as memgraph
import logging
import dill
import time
import sys 

question = "List the body parts/anatomy which over-express both the genes METTL5 and STYXL2"
# question = "Only answer if this is true or false: Basiliximab can treat Alzheimer's Disease"
# question = "Show me the drugs that can treat Alzheimer’s Disease"
# question = "Only answer if this is true or false: Benzatropine can treat Alzheimer's Disease"

# question = "Please provide a list of the 10 most promising drugs for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy, and indicate the diseases they were originally developed to treat. Please rank them in descending order of potential effectiveness. Please check if the generated list meets the requirement: 1) exclude the drugs that were originally designed for Alzheimer’s disease, 2) 10 distinct drugs, 3) rank in descending order according to the potential effectiveness. If not, please regenerate the list that meets the requirement."
# question = "Can Bepridil be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy and if so, why?"

node_types = "Gene, DrugClass, Drug, Disease, Pathway, BiologicalProcess, MolecularFunction, CellularComponent, Symptom, BodyPart"
relationship_types = """ "CHEMICAL BINDS GENE","CHEMICAL INCREASES EXPRESSION", "CHEMICAL DECREASES EXPRESSION", "DRUG IN CLASS", "DRUG TREATS DISEASE", "DRUG CAUSES EFFECT", "GENE PARTICIPATES IN BIOLOGICAL PROCESS", "GENE IN PATHWAY", "GENE INTERACTS WITH GENE", "GENE HAS MOLECULAR FUNCTION", "GENE ASSOCIATED WITH CELLULAR COMPONENT", "GENE ASSOCIATES WITH DISEASE", "BODYPART OVER EXPRESSES GENE", "BODYPART UNDEREXPRESSES GENE", "SYMPTOM MANIFESTATION OF DISEASE", "DISEASE LOCALIZES TO ANATOMY", "DISEASE ASSOCIATES WITH DISEASE" """


def got() -> operations.GraphOfOperations:
    """
    Generates the Graph of Operations for the IO method.

    :return: Graph of Operations
    :rtype: GraphOfOperations
    """
    operations_graph = operations.GraphOfOperations()

    instruction_node = operations.Generate(1, 1)
    operations_graph.append_operation(instruction_node)
    
    return operations_graph
# Retrieve the Graph of Operations
got = got()

config_file = "config.json"

# Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key)
# lm = language_models.AzureGPT(config_file, model_name="azuregpt4o")
lm = language_models.AzureGPT(config_file, model_name="azuregpt35-16k")
vdb = WeaviateClient(config_file)
memgraph_client = memgraph.MemgraphClient(config_file)
num_strategies = 3

# Create the Controller
try:
    ctrl = controller.Controller(
    lm, 
    got, 
    # ALZKBPrompter(vector_db = vdb,lm=lm,node_types=node_types,relationship_types=relationship_types),
    ALZKBPrompter(memgraph_client = memgraph_client,lm=lm,node_types=node_types,relationship_types=relationship_types),
    ALZKBParser(),
    # The following dictionary is used to configure the initial thought state
    {
        "question": question,
        "input": "",
        "phase": "planning",
        "method" : "got",
        "num_branches_response": num_strategies,
    }
    )
    # ctrl.logger.addHandler(logging.StreamHandler(sys.stdout))
    # ctrl.logger.setLevel(logging.INFO)

    ctrl.run()
except Exception as e:
    print("exception:",e)
# delete the controller to free up memory

# print(ctrl.graph.operations)
#clear up logger
ctrl.logger.handlers = []
ctrl.logger = None

operations_graph = ctrl.graph.operations
del ctrl

output = []
operation = operations_graph[0]
while len(operation.successors) > 0:
    operation = operation.successors[0]
print(operation.get_thoughts()[0].state)




current_instruction: {'StepID': '1', 'InstructionType': None, 'Instruction': "Identify the nodes involved in the question: Benzatropine (Drug), Alzheimer's Disease (Disease)", 'KnowledgeRequests': None, 'Function': None}
exception: 'NoneType' object is not subscriptable
{'question': "Only answer if this is true or false: Benzatropine can treat Alzheimer's Disease", 'input': '', 'phase': 'output', 'method': 'got', 'prompt': 'You will be given a set of instructions and you must convert the instructions into XML with the following rules:\n\nThe knowledge graph contains node types: Gene, DrugClass, Drug, Disease, Pathway, BiologicalProcess, MolecularFunction, CellularComponent, Symptom, BodyPart.\nThe knowledge graph contains relationships:  "CHEMICAL BINDS GENE","CHEMICAL INCREASES EXPRESSION", "CHEMICAL DECREASES EXPRESSION", "DRUG IN CLASS", "DRUG TREATS DISEASE", "DRUG CAUSES EFFECT", "GENE PARTICIPATES IN BIOLOGICAL PROCESS", "GENE IN PATHWAY", "GENE INTERACTS WITH GENE", "GENE HAS MO

In [25]:
output = []
operation = operations_graph[0]
while len(operation.successors) > 0:
    operation = operation.successors[0]
print(operation.get_thoughts()[0].state)








{'question': "Only answer if this is true or false: Benzatropine can treat Alzheimer's Disease", 'input': '', 'phase': 'output', 'method': 'got', 'prompt': 'You will be given a set of instructions and you must convert the instructions into XML with the following rules:\n\nThe knowledge graph contains node types: Gene, DrugClass, Drug, Disease, Pathway, BiologicalProcess, MolecularFunction, CellularComponent, Symptom, BodyPart.\nThe knowledge graph contains relationships:  "CHEMICAL BINDS GENE","CHEMICAL INCREASES EXPRESSION", "CHEMICAL DECREASES EXPRESSION", "DRUG IN CLASS", "DRUG TREATS DISEASE", "DRUG CAUSES EFFECT", "GENE PARTICIPATES IN BIOLOGICAL PROCESS", "GENE IN PATHWAY", "GENE INTERACTS WITH GENE", "GENE HAS MOLECULAR FUNCTION", "GENE ASSOCIATED WITH CELLULAR COMPONENT", "GENE ASSOCIATES WITH DISEASE", "BODYPART OVER EXPRESSES GENE", "BODYPART UNDEREXPRESSES GENE", "SYMPTOM MANIFESTATION OF DISEASE", "DISEASE LOCALIZES TO ANATOMY", "DISEASE ASSOCIATES WITH DISEASE" .\n\nFormat

In [2]:
from escargot import controller, language_models, operations

from escargot.vector_db.weaviate import WeaviateClient
import logging
import dill
import time
import sys 

question = "List the body parts/anatomy which over-express both the genes METTL5 and STYXL2"
# question = "Only answer if this is true or false: Basiliximab can treat Alzheimer's Disease"
# question = "Show me the drugs that can treat Alzheimer’s Disease"
question = "Only answer if this is true or false: Benzatropine can treat Alzheimer's Disease"
questions = ["Only answer if this is true or false: Benzatropine can treat Alzheimer's Disease",
"Only answer if this is true or false: Ropinirole can treat Alzheimer's Disease",
"Only answer if this is true or false: Haloperidol can treat Alzheimer's Disease",
"Only answer if this is true or false: Selegiline can treat Alzheimer's Disease",
"Only answer if this is true or false: Rivastigmine can treat Alzheimer's Disease",
"Only answer if this is true or false: Donepezil can treat Alzheimer's Disease",
"Only answer if this is true or false: Memantine can treat Alzheimer's Disease",
"Only answer if this is true or false: Quetiapine can treat Alzheimer's Disease",
"Only answer if this is true or false: Clozapine can treat Alzheimer's Disease",
"Only answer if this is true or false: Adenosine can treat Alzheimer's Disease",
"Only answer if this is true or false: Aripiprazole can treat Alzheimer's Disease",
"Only answer if this is true or false: Pioglitazone can treat Alzheimer's Disease",
"Only answer if this is true or false: Risperidone can treat Alzheimer's Disease",
"Only answer if this is true or false: Sunitinib can treat Alzheimer's Disease",
"Only answer if this is true or false: Tamoxifen can treat Alzheimer's Disease",
"Only answer if this is true or false: Vandetanib can treat Alzheimer's Disease",
"Only answer if this is true or false: Verapamil can treat Alzheimer's Disease",
"Only answer if this is true or false: Vorinostat can treat Alzheimer's Disease",
"Only answer if this is true or false: Vitamin E can treat Alzheimer's Disease",
"Only answer if this is true or false: Dexmethylphenidate can treat Alzheimer's Disease",
"Only answer if this is true or false: Ceforanide can treat Alzheimer's Disease",
"Only answer if this is true or false: Paliperidone can treat Alzheimer's Disease",
"Only answer if this is true or false: Triprolidine can treat Alzheimer's Disease",
"Only answer if this is true or false: Propranolol can treat Alzheimer's Disease",
"Only answer if this is true or false: Bepridil can treat Alzheimer's Disease",
"Only answer if this is true or false: Icosapent can treat Alzheimer's Disease",
"Only answer if this is true or false: Oxybutynin can treat Alzheimer's Disease",
"Only answer if this is true or false: Methylphenobarbital can treat Alzheimer's Disease",
"Only answer if this is true or false: Basiliximab can treat Alzheimer's Disease",
"Only answer if this is true or false: Muromonab can treat Alzheimer's Disease",
"Only answer if this is true or false: Trastuzumab can treat Alzheimer's Disease",
"Only answer if this is true or false: Rituximab can treat Alzheimer's Disease",
"Only answer if this is true or false: Ibritumomab tiuxetan can treat Alzheimer's Disease",
"Only answer if this is true or false: Digoxin Immune Fab (Ovine) can treat Alzheimer's Disease",
"Only answer if this is true or false: Hyaluronidase (ovine) can treat Alzheimer's Disease",
"Only answer if this is true or false: Insulin pork can treat Alzheimer's Disease",
"Only answer if this is true or false: Pancrelipase can treat Alzheimer's Disease",
"Only answer if this is true or false: Streptokinase can treat Alzheimer's Disease",
"Only answer if this is true or false: Clozapine can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Adenosine can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Aripiprazole can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Pioglitazone can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Risperidone can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Sunitinib can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Tamoxifen can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Vandetanib can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Verapamil can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Vorinostat can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Vitamin E can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Dexmethylphenidate can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Ceforanide can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Paliperidone can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Triprolidine can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Propranolol can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Bepridil can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Icosapent can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Oxybutynin can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Only answer if this is true or false: Methylphenobarbital can be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy",
"Show me the drugs that can treat Alzheimer’s Disease",
"Show me the drugs that connect to Alzheimer’s Disease genes but do not connect to Alzheimer’s Disease",
"Show me the drugs that connect to body part that is  related to  Alzheimer’s Disease",
"Show me the drugs that satisfy the two points. 1) connect to Alzheimer’s Disease  genes but do not connect to Alzheimer’s Disease. 2) connect to body part that is  related to  Alzheimer’s Disease",
"Please provide a list of the 10 most promising drugs for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy, and indicate the diseases they were originally developed to treat. Please rank them in descending order of potential effectiveness. Please check if the generated list meets the requirement: 1) exclude the drugs that were originally designed for Alzheimer’s disease, 2) 10 distinct drugs, 3) rank in descending order according to the potential effectiveness. If not, please regenerate the list that meets the requirement."
]

# question = "Please provide a list of the 10 most promising drugs for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy, and indicate the diseases they were originally developed to treat. Please rank them in descending order of potential effectiveness. Please check if the generated list meets the requirement: 1) exclude the drugs that were originally designed for Alzheimer’s disease, 2) 10 distinct drugs, 3) rank in descending order according to the potential effectiveness. If not, please regenerate the list that meets the requirement."
# question = "Can Bepridil be considered for repurposing in the treatment of Alzheimer’s disease based on their potential efficacy and if so, why?"

answers = {}
for question in questions:

    def got() -> operations.GraphOfOperations:
        """
        Generates the Graph of Operations for the IO method.

        :return: Graph of Operations
        :rtype: GraphOfOperations
        """
        operations_graph = operations.GraphOfOperations()

        instruction_node = operations.Generate(1, 1)
        operations_graph.append_operation(instruction_node)
        
        return operations_graph
    # Retrieve the Graph of Operations
    got = got()

    config_file = "config.json"

    # Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key)
    # lm = language_models.AzureGPT(config_file, model_name="azuregpt4o")
    lm = language_models.AzureGPT(config_file, model_name="azuregpt35-16k")
    vdb = WeaviateClient(config_file)

    # Create the Controller
    try:
        ctrl = controller.Controller(
        lm, 
        got, 
        ALZKBPrompter(vdb,lm), 
        ALZKBParser(),
        # The following dictionary is used to configure the initial thought state
        {
            "question": question,
            "input": "",
            "phase": "planning",
            "method" : "got"
        }
        )
        # ctrl.logger.addHandler(logging.StreamHandler(sys.stdout))
        # ctrl.logger.setLevel(logging.INFO)

        # Run the Controller and generate the output graph
        ctrl.run()
    except Exception as e:
        print("exception:",e)
    # delete the controller to free up memory

    # print(ctrl.graph.operations)
    #clear up logger
    ctrl.logger.handlers = []
    ctrl.logger = None

    operations_graph = ctrl.graph.operations
    del ctrl
    answers[question] = operations_graph
    print("question:",question)
    output = []
    operation = operations_graph[0]
    while len(operation.successors) > 0:
        operation = operation.successors[0]
    print(operation.get_thoughts()[0].state)
    break


            Please consider upgrading to the latest version. See https://weaviate.io/developers/weaviate/client-libraries/python for details.


exception: 'NoneType' object is not subscriptable
question: Only answer if this is true or false: Benzatropine can treat Alzheimer's Disease
{'question': "Only answer if this is true or false: Benzatropine can treat Alzheimer's Disease", 'input': '', 'phase': 'output', 'method': 'got', 'prompt': 'Given the following XML:\n<Instructions>\n    <Step>\n        <StepID>1</StepID>\n        <Instruction>\n            Identify the Drug node for "Benzatropine".\n        </Instruction>\n        <KnowledgeRequest>\n            <KnowledgeID>Knowledge_1</KnowledgeID>\n            <Node>!Benzatropine!</Node>\n        </KnowledgeRequest>\n    </Step>\n    <Step>\n        <StepID>2</StepID>\n        <Instruction>\n            Check if there is a relationship "DRUG TREATS DISEASE" between the Drug node for "Benzatropine" and the Disease node for "Alzheimer\'s Disease".\n        </Instruction>\n        <KnowledgeRequest>\n            <KnowledgeID>Knowledge_2</KnowledgeID>\n            <Node>Drug-DRUG T

In [9]:
for question, operations_graph in answers.items():
    print("question:",question)
    output = []
    operation = operations_graph[0]
    while len(operation.successors) > 0:
        operation = operation.successors[0]
    print(operation.get_thoughts()[0].state['phase'], operation.get_thoughts()[0].state['input'])
    print("--------------------------------------------------")
    # break

question: Only answer if this is true or false: Benzatropine can treat Alzheimer's Disease
output 
--------------------------------------------------
question: Only answer if this is true or false: Ropinirole can treat Alzheimer's Disease
output 
--------------------------------------------------
question: Only answer if this is true or false: Haloperidol can treat Alzheimer's Disease
steps [["DRUG TREATS DISEASE", "<drug>", "<disease>"]]
--------------------------------------------------
question: Only answer if this is true or false: Selegiline can treat Alzheimer's Disease
output 
--------------------------------------------------
question: Only answer if this is true or false: Rivastigmine can treat Alzheimer's Disease
steps L
--------------------------------------------------
question: Only answer if this is true or false: Donepezil can treat Alzheimer's Disease
steps )
--------------------------------------------------
question: Only answer if this is true or false: Memantine can

In [10]:
operations_graph[0].thoughts[0].state

{'question': 'List the body parts/anatomy which over-express both the genes METTL5 and STYXL2',
 'input': ['1. Identify the genes METTL5 and STYXL2.\n2. Determine the relationship that connects genes to body parts that over-express them. (BODYPART OVER EXPRESSES GENE)\n3. Retrieve the body parts that over-express METTL5. \n4. Retrieve the body parts that over-express STYXL2.\n5. Find the common body parts from steps 3 and 4.',
  '1. Extract the nodes for the genes METTL5 and STYXL2 from the knowledge base.\n\n2. Identify the relationship that indicates a body part over-expresses a gene.\n\n3. Extract the body parts/anatomy nodes that are connected to both METTL5 and STYXL2 through the relationship identified in step 2.',
  '1. Identify the genes METTL5 and STYXL2 - These are the specific genes we need to focus on.\n\n2. Determine the relationship between genes and body parts/anatomy - The relationship "BODYPART OVER EXPRESSES GENE" can provide the information we need.\n\n3. Extract the