In [98]:
!pip install -qU langgraph langchain-community langchain-openai

In [135]:
import os
import getpass
import re
import time
import urllib.request

from langchain.tools import tool
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_openai import ChatOpenAI

In [136]:
def call_api(url: str) -> str:
	time.sleep(1)
	url = url.replace(' ', '+')
	print(url)

	req = urllib.request.Request(url)
	with urllib.request.urlopen(req) as response:
		call = response.read()

	return call

In [142]:

@tool
def call_eutils_api(
    operation: str = "esearch",
    db: str = "gene",
    retmax: int = 5,
    id: str | None = None,
    term: str | None = None
) -> str:
  """
  Calling Eutils API
  Params are needed to create an url by template [https://eutils.ncbi.nlm.nih.gov/entrez/eutils/{esearch|efetch|esummary}.fcgi?db={gene|snp|omim}&retmax={}&{term|id}={term|id}]

  :param operation: esearch (input is a search term and output is database id(s)) | efetch | esummary
  esearch: input is a search term and output is database id(s).
  efectch/esummary: input is database id(s) and output is full records or summaries that contain name, chromosome location, and other information.
  Normally, you need to first call esearch to get the database id(s) of the search term, and then call efectch/esummary to get the information with the database id(s).

  :param db: gene | snp | omim
  Database: gene is for genes, snp is for SNPs, and omim is for genetic diseases.

  :param retmax: max count of retrieving objects

  :params term | id: search params, only one of them have to be passed

  :return: response from Eutils API
  """
  url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/{operation}.fcgi?db={db}&retmode=json&sort=relevance"
  if retmax:
     url += f"&retmax={retmax}"
  if term:
    url += f"&term={term}"
  elif id:
    url += f"&id={id}"

  return call_api(url)


@tool
def call_blast_api(query: str, max_hit_size: int = 5) -> str:
  """
  Calling BLAST API
  Params are needed to create an url by template [https://blast.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD={Put|Get}&PROGRAM=blastn&MEGABLAST=on&DATABASE=nt&FORMAT_TYPE={XML|Text}&QUERY={sequence}&HITLIST_SIZE={max_hit_size}]

  :param query: bioinformatics sequence
  BLAST maps a specific DNA sequence to its chromosome location among different specices.

  :params max_hit_size: hitlist size

  :return: response from BLAST API
  """
  put_url = f"https://blast.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=PUT&PROGRAM=blastn&MEGABLAST=on&DATABASE=nt&FORMAT_TYPE=Text&QUERY={query}&HITLIST_SIZE={max_hit_size}"

  call = call_api(put_url)
  rid = re.search('RID = (.*)\n', call.decode('utf-8')).group(1)
  get_url = f'https://blast.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_TYPE=Text&RID={rid}'

  time.sleep(30)
  return call_api(get_url)

In [143]:
tools = [call_eutils_api, call_blast_api]

In [144]:
os.environ["OPENAI_API_KEY"] = getpass.getpass()
model = ChatOpenAI(model="gpt-4o-mini")

system_message = (
    "You're a useful GeneGPT-based skin analysis assistant. "
    "Your main functional is analysis of genes associated with skin conditions and diseases, "
    "ranging from basic issues such as acne and wrinkles to various types of skin cancer and "
    "more severe skin conditions. Specifically, test its ability to find genes associated with "
    "specific diseases and check if a certain genetic profile, i.e., from a DNA test, "
    "is associated with any specific skin diseases. Focus on overall well-being, not just skin health, "
    "also check for associations with cardiovascular diseases and age-related mental dysfunctions "
    "such as Parkinson's and dementia. You can use NCBI Web APIs to answer genomic questions. "
    "Follow the user's commands, clarify the missing data."
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_message),
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)

agent = create_tool_calling_agent(model, tools, prompt)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,
)

··········


In [148]:
def start(user_input: str):
    try:
        result = agent_executor.invoke({"input": user_input})
        print(result["output"])
    except Exception as e:
        print(e)
        raise e

In [149]:
start("List genes associated with Acne vulgaris.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `call_eutils_api` with `{'operation': 'esearch', 'db': 'gene', 'term': 'Acne vulgaris'}`


[0mhttps://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gene&retmode=json&sort=relevance&retmax=5&term=Acne+vulgaris
[36;1m[1;3mb'{"header":{"type":"esearch","version":"0.3"},"esearchresult":{"count":"211","retmax":"5","retstart":"0","idlist":["378938","3673","7042","2678","5292"],"translationset":[],"translationstack":[{"term":"acne vulgaris[All Fields]","field":"All Fields","count":"211","explode":"N"},"GROUP"],"querytranslation":"acne vulgaris[All Fields]"}}\n'[0m[32;1m[1;3m
Invoking: `call_eutils_api` with `{'operation': 'efetch', 'db': 'gene', 'id': '378938'}`


[0mhttps://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&retmode=json&sort=relevance&retmax=5&id=378938
[36;1m[1;3mb'\n1. MALAT1\nOfficial Symbol: MALAT1 and Name: metastasis associated lung adenocarcinoma transcript 1 [Homo sapiens 

In [151]:
start("Detect genes (in the DNA test) associated with skin cancer (I don't know specific sequence)")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `call_eutils_api` with `{'operation': 'esearch', 'db': 'gene', 'term': 'skin cancer'}`


[0mhttps://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gene&retmode=json&sort=relevance&retmax=5&term=skin+cancer
[36;1m[1;3mb'{"header":{"type":"esearch","version":"0.3"},"esearchresult":{"count":"220","retmax":"5","retstart":"0","idlist":["4157","3662","7299","4254","1910"],"translationset":[],"translationstack":[{"term":"skin cancer[All Fields]","field":"All Fields","count":"220","explode":"N"},"GROUP"],"querytranslation":"skin cancer[All Fields]"}}\n'[0m[32;1m[1;3m
Invoking: `call_eutils_api` with `{'operation': 'efetch', 'db': 'gene', 'id': '4157'}`


[0mhttps://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&retmode=json&sort=relevance&retmax=5&id=4157
[36;1m[1;3mb'\n1. MC1R\nOfficial Symbol: MC1R and Name: melanocortin 1 receptor [Homo sapiens (human)]\nOther Aliases: CMM5, MSH-R, SHEP2\nOthe

In [None]:
start(input())