In [None]:
!pip install llama-index
!pip install llama-index-llms-ollama

Imports

In [6]:
import sys
import os

# Add the parent directory of the current file's location to the sys.path
sys.path.append(os.path.abspath("../src"))

from llama_index.core.agent import ReActAgent
from llama_index.llms.ollama import Ollama
from llama_index.core.tools import FunctionTool
from dotenv import load_dotenv
import neo4j
from queries import get_abstract_pmids
from summarization.article_graph import generate_embedding
from queries import get_functional_term_proteins, cosine_similiarity
from llama_index.core import PromptTemplate

Necessary functions

In [2]:
def get_driver():
    load_dotenv()

    # set config
    NEO4J_HOST = os.getenv("NEO4J_HOST")
    NEO4J_PORT = os.getenv("NEO4J_PORT")
    NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
    NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
    # connect
    uri = f"bolt://{NEO4J_HOST}:{NEO4J_PORT}"
    driver = neo4j.GraphDatabase.driver(uri, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
    return driver

Tools

In [9]:
def fetch_abstracts(query: list, question: str)->str:
    """"Fetches protein abstracts, not for functional terms. The first argument is the protein in question, the second is the question, the 
    second argument is the question"""
    driver = get_driver()
    pmids = get_abstract_pmids(driver=driver, species="Mus_Musculus", query=query)
    pmids =[i["PMID"] for i in pmids]
    embedded_query = generate_embedding(query=question)
    abstracts = cosine_similiarity(driver=driver, pmids=pmids, embedding=embedded_query)
    if len(abstracts) == 0:
        return "No abstracts found, maybe use another tool?"
    return "abstracts related to the query: "+ "\n".join(abstracts)

fetch_protein_abstracts = FunctionTool.from_defaults(fn=fetch_abstracts)

def fetch_proteins_from_functional_terms(funct_term:list) -> str:
    "Queries neo4j to retrieve proteins associated to functional terms."
    driver = get_driver()
    proteins = get_functional_term_proteins(driver, funct_term)
    driver.close()
    if len(proteins) == 0:
        return ["No proteins found, format of input is funct_term~source"]
    return f"the associated proteins are: {proteins}"
protein_from_fn_terms = FunctionTool.from_defaults(fn=fetch_proteins_from_functional_terms)

Prompt Template adjusting

In [10]:
llm = Ollama(model="llama3.1")
agent = ReActAgent.from_tools([protein_from_fn_terms, fetch_protein_abstracts], llm=llm, verbose=True, max_iterations= 20)
react_system_header_str = """\

You are designed to help with a variety of tasks, from answering questions to providing summaries to other types of analyses.

## Tools

You have access to a wide variety of tools. You are responsible for using the tools in any sequence you deem appropriate to complete the task at hand. If the question is general, dont use a tool and answer directly.
This may require breaking the task into subtasks and using different tools to complete each subtask. However do not overcomplicate the task by using too many tools.

You have access to the following tools:
{tool_desc}


## Output Format

Please answer in the same language as the question and use the following format:

```
Thought: The current language of the user is: (user's language). I need to use a tool to help me answer the question.
Action: tool name (one of {tool_names}) if using a tool.
Action Input: the input to the tool, in a JSON format representing the kwargs (e.g. {{"input": "hello world", "num_beams": 5}})
```

Please ALWAYS start with a Thought.

NEVER surround your response with markdown code markers. You may use code markers within your response if you need to.

Please use a valid JSON format for the Action Input. Do NOT do this {{'input': 'hello world', 'num_beams': 5}}.

If this format is used, the user will respond in the following format:

```
Observation: tool response
```

You should keep repeating the above format till you have enough information to answer the question without using any more tools. At that point, you MUST respond in one of the following two formats:

```
Thought: I can answer without using any more tools. I'll use the user's language to answer
Answer: [your answer here (In the same language as the user's question)]
```

```
Thought: I cannot answer the question with the provided tools.
Answer: [your answer here (In the same language as the user's question)]
```

## Current Conversation

Below is the current conversation consisting of interleaving human and assistant messages.

"""
react_system_prompt = PromptTemplate(react_system_header_str)
agent.update_prompts({"agent_worker:system_prompt": react_system_prompt})
agent.reset()

Call agent

In [11]:
# response = agent.chat("What are proteins associated in PWY-5910~BIOCYC and PWY0-1305~BIOCYC")
response = agent.chat("whats the role of cd40?")
print(response)

> Running step 0eec2f35-ff2c-43e5-a19e-e09568522a0f. Step input: whats the role of cd40?
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: fetch_proteins_from_functional_terms
Action Input: {}
[0m[1;3;34mObservation: Error: fetch_proteins_from_functional_terms() missing 1 required positional argument: 'funct_term'
[0m> Running step 2605fbc9-9d3e-47be-8e52-c246ba25c4cd. Step input: None
[1;3;38;5;200mThought: I need another input from the user to pass to the tool. The user should provide a list of functional terms that might be associated with cd40.
Action: fetch_proteins_from_functional_terms
Action Input: {'funct_term': ['immune response', 'co-stimulation']}
[0m[1;3;34mObservation: ['No proteins found, format of input is funct_term~source']
[0m> Running step 0802ba80-1c03-424d-a0db-6c668de493e7. Step input: None
[1;3;38;5;200mThought: It seems like the tool expects a specific format for the input 