In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from tools import query_vector_db_articles, get_smiles_from_pubchem, predict_energy_from_smiles
title = "Targeted therapy in advanced non-small cell lung cancer: current advances and future trends."
title2 = "Low titers of SARS-CoV-2 neutralizing antibodies after first vaccination dose in cancer patients receiving checkpoint inhibitors."

  from tqdm.autonotebook import tqdm


In [3]:
## CODE TO DELETE PINECONE:
import os
from pinecone.grpc import PineconeGRPC as Pinecone

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index = pc.Index("pubmed")

index.delete(delete_all=True)



In [5]:
# completion llm  
from langchain_cohere import ChatCohere

llm = ChatCohere(model="command-r-plus",temperature=0.0,max_tokens=4096)

In [6]:
from langchain.agents import AgentExecutor
from langchain_cohere.react_multi_hop.agent import create_cohere_react_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents import AgentExecutor, create_react_agent
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain.memory import ChatMessageHistory

# Preamble
preamble = """
You are an expert who answers the user's question with the most relevant datasource.
You must answer in a precise and concise manner.
You are equipped with a vectordatabase about scientific articles,
a tool to get the SMILES string of a given molecule name,
and a tool to predict the energy of a molecule from its SMILES string.
"""

# Prompt template
prompt = ChatPromptTemplate.from_template("{input}")
# Create the ReAct agent
agent = create_cohere_react_agent(
   llm=llm,
   tools=[query_vector_db_articles,get_smiles_from_pubchem, predict_energy_from_smiles],
   prompt=prompt,
)

agent_executor = AgentExecutor(agent=agent,
                               tools=[query_vector_db_articles,get_smiles_from_pubchem, predict_energy_from_smiles],
                               verbose=True,
                               handle_parsing_errors=True,)

memory = ChatMessageHistory(session_id="test-session")

agent_with_chat_history = RunnableWithMessageHistory(
    agent_executor,
    # This is needed because in most real world scenarios, a session id is needed
    # It isn't really used here because we are using a simple in memory ChatMessageHistory
    lambda session_id: memory,
    input_messages_key="input",
    history_messages_key="chat_history",
)

In [7]:
response = agent_with_chat_history.invoke(
    {"input": "What disease are they discussing in the paper '{}'?".format(title)},
    config={"configurable": {"session_id": "<foo>"}},
)
print(response['output'])

Parent run b2d44ea3-194c-4bc8-b796-773431b2c2b8 not found for run 5c4003e2-6eb2-45e7-9b3b-14d95dbb0f85. Treating as a root run.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
I will search for the paper 'Targeted therapy in advanced non-small cell lung cancer: current advances and future trends' and then write an answer based on the information I find.
{'tool_name': 'query_vector_db_articles', 'parameters': {'query': 'Targeted therapy in advanced non-small cell lung cancer: current advances and future trends', 'metadata': {'title': 'Targeted therapy in advanced non-small cell lung cancer: current advances and future trends'}}}
[0mNo results found, Looking for the article online
Found a similar title with a similarity score of 99:
Targeted therapy in advanced non-small cell lung cancer: current advances and future trends.
Title: Targeted therapy in advanced non-small cell lung cancer: current advances and future trends
Authors: Umair Majeed, Rami Manochakian, Yujie Zhao, Yanyan Lou
Journal: Journal of Hematology &amp; Oncology
Published Date: 2021-07-08
Open access PDF available at: https://jhoon

In [8]:
response = agent_with_chat_history.invoke(
    {"input": "Summarize the main takeaways from the paper."},
    config={"configurable": {"session_id": "<foo>"}},
)
print(response['output'])

Parent run f60c5b58-78ad-4aa5-9bca-bfad2191428f not found for run 0c785632-1948-4d24-87a0-d1900d600254. Treating as a root run.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
I will read the paper and summarise its main points.
{'tool_name': 'query_vector_db_articles', 'parameters': {'query': 'Targeted therapy in advanced non-small cell lung cancer: current advances and future trends.', 'metadata': {'title': 'Targeted therapy in advanced non-small cell lung cancer: current advances and future trends.'}}}
[0m[36;1m[1;3m[Document(page_content='Majeed\xa0 et\xa0al. J Hematol Oncol          (2021) 14:108  \nhttps://doi.org/10.1186/s13045-021-01121-2\nREVIEW\nTargeted therapy in\xa0advanced non-small cell \nlung cancer: current advances and\xa0future trends\nUmair Majeed, Rami Manochakian, Yujie Zhao and Yanyan Lou*  \nAbstract \nLung cancer remains the leading cause of cancer-related mortality in both men and women in the US and worldwide. \nNon-small cell lung cancer is the most common variety accounting for 84% of the cases. For a subset of patients with \nactionable mutations, targeted therapy 

In [9]:
response = agent_with_chat_history.invoke(
    {"input": "Can you list the protein targets they highlight in the paper?"},
    config={"configurable": {"session_id": "<foo>"}},
)
print(response['output'])


Parent run 2ebd8d23-08af-4595-ac6d-f24f4f40c303 not found for run 22487288-6b05-4361-a8bd-d876e93a0d9c. Treating as a root run.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
I will read the paper to find the protein targets mentioned.
{'tool_name': 'query_vector_db_articles', 'parameters': {'query': 'protein targets', 'metadata': {'title': 'Targeted therapy in advanced non-small cell lung cancer: current advances and future trends.'}}}
[0m[36;1m[1;3m[Document(page_content='Page 20 of 20 Majeed\xa0et\xa0al. J Hematol Oncol          (2021) 14:108 \n•\n fast, convenient online submission\n •\n  thorough peer review by experienced researchers in your ﬁeld\n• \n rapid publication on acceptance\n• \n support for research data, including large and complex data types\n•\n  gold Open Access which fosters wider collaboration and increased citations \n maximum visibility for your research: over 100M website views per year •\n  At BMC, research is always in progress.\nLearn more biomedcentral.com/submissionsReady to submit y our researc h Ready to submit y our researc h  ?  Choose BMC and benefit fr om: 

In [10]:
response = agent_with_chat_history.invoke(
    {"input": "Can you list the small molecule drugs they highlight in the paper?"},
    config={"configurable": {"session_id": "<foo>"}},
)
print(response['output'])

Parent run 2999926a-510c-4279-9787-94a56beb9a84 not found for run f8fd1716-7a8e-4b59-888c-a9d7a031c5d0. Treating as a root run.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
I will read the paper to find the small molecule drugs highlighted in it.
{'tool_name': 'query_vector_db_articles', 'parameters': {'query': 'small molecule drugs', 'metadata': {'title': 'Targeted therapy in advanced non-small cell lung cancer: current advances and future trends.'}}}
[0m[36;1m[1;3m[Document(page_content='Page 15 of 20\n Majeed\xa0et\xa0al. J Hematol Oncol          (2021) 14:108 \n \nTable 13 Current ongoing early phase I/II trials with miscellaneous target inhibitors\nDrug name Mechanism of action Clinical trial (phase) Study design Disease\nERBB2(HER2) inhibitors\nAdo-trastuzumab emtansine HER2-targeted antibody–drug \nconjugateNCT03784599 (1/2) Combination with osimertinib EGFR mutant advanced NSCLC\nMobocertinib (TAk-788) TKI targeting EGFR ex20ins \nmutationsNCT02716116 (1/2) Combination with chemo -\ntherapyEGFR and HER2 exon 20 inser -\ntion in advanced NSCLC\nPoziotinib EGFR, HER2 and HER 4 inhibi-\

In [11]:
response = agent_with_chat_history.invoke(
    {"input": "Can you find the SMILES string of the first small molecule you found?"},
    config={"configurable": {"session_id": "<foo>"}},
)
print(response['output'])

Parent run 329f02ba-bb61-4c60-ab8a-855d8c7e49f4 not found for run be8eee41-8237-454d-bf55-39f61c5a775c. Treating as a root run.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
I will search for the SMILES string of Osimertinib.
{'tool_name': 'get_smiles_from_pubchem', 'parameters': {'name': 'Osimertinib'}}
[0m[33;1m[1;3mCN1C=C(C2=CC=CC=C21)C3=NC(=NC=C3)NC4=C(C=C(C(=C4)NC(=O)C=C)N(C)CCN(C)C)OC[0m[32;1m[1;3mRelevant Documents: 0
Cited Documents: 0
Answer: The SMILES string for Osimertinib is: CN1C=C(C2=CC=CC=C21)C3=NC(=NC=C3)NC4=C(C=C(C(=C4)NC(=O)C=C)N(C)CCN(C)C)OC.
Grounded answer: The SMILES string for Osimertinib is: <co: 0>CN1C=C(C2=CC=CC=C21)C3=NC(=NC=C3)NC4=C(C=C(C(=C4)NC(=O)C=C)N(C)CCN(C)C)OC</co: 0>.[0m

[1m> Finished chain.[0m
The SMILES string for Osimertinib is: CN1C=C(C2=CC=CC=C21)C3=NC(=NC=C3)NC4=C(C=C(C(=C4)NC(=O)C=C)N(C)CCN(C)C)OC.


In [12]:
response = agent_with_chat_history.invoke(
    {"input": "Can you predict its energy value?"},
    config={"configurable": {"session_id": "<foo>"}},
)
print(response['output'])

Parent run 46f0edad-3871-481c-aaf8-a1d816342219 not found for run bc1dd1f5-62a1-4343-8e31-c99c270d9664. Treating as a root run.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
I will use the 'predict_energy_from_smiles' tool to predict the energy value of the SMILES string for Osimertinib.
{'tool_name': 'predict_energy_from_smiles', 'parameters': {'smiles': 'CN1C=C(C2=CC=CC=C21)C3=NC(=NC=C3)NC4=C(C=C(C(=C4)NC(=O)C=C)N(C)CCN(C)C)OC'}}
[0mPrediction for CN1C=C(C2=CC=CC=C21)C3=NC(=NC=C3)NC4=C(C=C(C(=C4)NC(=O)C=C)N(C)CCN(C)C)OC: -11.39196263310017
[38;5;200m[1;3m-11.39196263310017[0m[32;1m[1;3mRelevant Documents: 0
Cited Documents: 0
Answer: The predicted energy value of Osimertinib is -11.39196263310017.
Grounded answer: The predicted energy value of Osimertinib is <co: 0>-11.39196263310017</co: 0>.[0m

[1m> Finished chain.[0m
The predicted energy value of Osimertinib is -11.39196263310017.
