In [None]:
!pip install crewai
!pip install 'crewai[tools]'

Imports

In [10]:
import sys
import os

# Add the parent directory of the current file's location to the sys.path
sys.path.append(os.path.abspath("../src"))

from dotenv import load_dotenv
import neo4j
from ast import literal_eval
from queries import get_abstracts, get_abstract_pmids
from summarization.article_graph import generate_embedding
from queries import get_functional_term_proteins, cosine_similiarity
from crewai_tools import tool
from crewai import Crew, Process, Agent, Task
from crewai import Agent, LLM

Necessary functions

In [11]:
def get_driver():
    load_dotenv()

    # set config
    NEO4J_HOST = os.getenv("NEO4J_HOST")
    NEO4J_PORT = os.getenv("NEO4J_PORT")
    NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
    NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
    # connect
    uri = f"bolt://{NEO4J_HOST}:{NEO4J_PORT}"
    driver = neo4j.GraphDatabase.driver(uri, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
    return driver

def abstracts_for_agent(query):
    query = literal_eval(query)
    driver = get_driver()
    test = get_abstracts(driver=driver, species="Mus_Musculus", query=query)
    abstracts =[{i["PMID"]: i["abstract"]} for i in test]
    driver.close()
    return abstracts

def pmids_for_agent(query):
    driver = get_driver()
    test = get_abstract_pmids(driver=driver, species="Mus_Musculus", query=query)
    pmids =[i["PMID"] for i in test]
    driver.close()
    return pmids

@tool("abstracts fetcher")
def fetch_abstracts(query: list, question: str):
    """"Fetches abstracts relevant for question about proteins, not functional terms. First argument is the query, second is the question."""
    driver = get_driver()
    pmids = get_abstract_pmids(driver=driver, species="Mus_Musculus", query=query)
    pmids =[i["PMID"] for i in pmids]
    embedded_query = generate_embedding(query=question)
    abstracts = cosine_similiarity(driver=driver, pmids=pmids, embedding=embedded_query)
    if len(abstracts) == 0:
        return "No abstracts found, maybe use another tool?"
    return " \n ".join(abstracts)

@tool("protein_from_functional_term_fetcher")
def fetch_proteins_from_functional_terms(funct_term: list) -> list:
    "Queries neo4j to retrieve proteins associated to functional terms."
    driver = get_driver()
    proteins = get_functional_term_proteins(driver, funct_term)
    driver.close()
    return proteins

Agents

In [12]:
llm = LLM(model="ollama/llama3.1", base_url="http://localhost:11434", temperature=0)
class CustomAgents:
    def __init__(self):
        self.Ollama = llm

    def helper_agent(self):
        return Agent(
            role="Helper",
            goal="Help the team",
            backstory="You're an expert biologist, you're here to help the team",
            llm=self.Ollama,
            max_iter=1
        )
    
    def protein_from_functional_term_fetcher_agent(self):
        return Agent(
            role="Functional term to protein fetcher",
            goal="You query the database to retrieve proteins associated to functional terms, functional terms have the format funct_term~source",
            backstory="You're an expert biologist, you focus on retrieving proteins associated to functional terms",
            allow_delegation=True,
            llm=self.Ollama,
            tools=[fetch_proteins_from_functional_terms],
            max_iter=1
        )
    def abstract_fetcher_agent(self):
        return Agent(
            role="Information fetcher",
            goal="Fetch abstracts needed to answer questions regarding protein interactions or functions.",
            backstory="You're an expert biologist, you focus on retrieving scientific abstracts. You know that the ferch abstracts tool can be used to retrieve abstracts for a given question and has the format fetch_abstracts(query: list, question:str)",
            allow_delegation=True,
            llm=self.Ollama,
            tools=[fetch_abstracts],
            max_iter=1
        )

    def summarization_agent(self):
        return Agent(
            role="Information summarizer",
            goal="You summarize information",
            backstory="You're an expert biologist, your main field is protein/gene interactions",
            allow_delegation=True,
            llm=self.Ollama,
            max_iter=1
        )
    def manager_agent(self):
        return Agent(
            role="Manager",
            goal="You manage the team",
            backstory="You're an expert biologist, you manage a team of biologists. You know that functional terms have the format funct_term~source. The abstract_fetcher_agent retireves abstracts and the summarization agent summarizes them.",
            llm=self.Ollama,
            max_iter=1
        )

Tasks

In [13]:
# This is an example of how to define custom tasks.
# You can define as many tasks as you want.
# You can also define custom agents in agents.py
class CustomTasks:
    def bionet_helper_task(self, context):
        return Task(
            description = f"Help the user. The user input is: {context}",
            expected_output="An answer to the question",
            verbose=True
        )

Define agents, tasks and start the work

In [14]:
# Define agents
agents = CustomAgents()

manager = agents.manager_agent()
abstract_fetcher_agent = agents.abstract_fetcher_agent()
summarization_agent = agents.summarization_agent()
helper_agent = agents.helper_agent()
protein_fetcher_agent = agents.protein_from_functional_term_fetcher_agent()

# Define tasks
tasks = CustomTasks()

#task = tasks.bionet_helper_task("whats the role of cd40?")
task = tasks.bionet_helper_task("What are proteins associated in PWY-5910~BIOCYC and PWY0-1305~BIOCYC")

# Define the crew
crew = Crew(agents=[protein_fetcher_agent, summarization_agent, abstract_fetcher_agent], tasks=[task], process=Process.hierarchical, manager_agent=manager, verbose=True)

# Start the crew's work
result = crew.kickoff()
print(result)




[1m[95m# Agent:[00m [1m[92mManager[00m
[95m## Task:[00m [92mHelp the user. The user input is: What are proteins associated in PWY-5910~BIOCYC and PWY0-1305~BIOCYC[00m
[1m[95m# Agent:[00m [1m[92mFunctional term to protein fetcher[00m
[95m## Task:[00m [92mfetch proteins associated with PWY-5910~BIOCYC and PWY0-1305~BIOCYC[00m


[1m[95m# Agent:[00m [1m[92mFunctional term to protein fetcher[00m
[95m## Thought:[00m [92mThought: I need to query the database to retrieve proteins associated with PWY-5910~BIOCYC and PWY0-1305~BIOCYC.[00m
[95m## Using tool:[00m [92mprotein_from_functional_term_fetcher[00m
[95m## Tool Input:[00m [92m
"{\"funct_term\": [\"PWY-5910~BIOCYC\", \"PWY0-1305~BIOCYC\"]}"[00m
[95m## Tool Output:[00m [92m
[{'PWY0-1305~BIOCYC': ['GAD1', 'GAD2', 'GLUL']}, {'PWY-5910~BIOCYC': ['ACAT1', 'FDPS', 'ACAT2', 'IDI1', 'MVK', 'PMVK', 'GGPS1', 'HMGCR', 'MVD', 'HMGCS2']}][00m


[1m[95m# Agent:[00m [1m[92mFunctional term to protein fetcher[