In [136]:
from langchain_community.tools.pubmed.tool import PubmedQueryRun
from langchain_openai import ChatOpenAI
import os
from dotenv import load_dotenv
from langchain_core.messages import HumanMessage
from langgraph.prebuilt import create_react_agent

In [137]:
paper_fetcher = PubmedQueryRun()

In [138]:
tools = [paper_fetcher]

In [139]:
load_dotenv()
llm = ChatOpenAI(temperature=0, model="gpt-4o", api_key=os.environ.get("KEY"))

In [140]:
tooled_model = llm.bind_tools(tools)

In [141]:
response = tooled_model.invoke([HumanMessage(content="Can you tell me the effects of opiates?")])

In [142]:
print(response)
print(response.content)

content='' additional_kwargs={'tool_calls': [{'id': 'call_Lk3EMbS7RA8nZ5NUy3WX7UPo', 'function': {'arguments': '{"query":"effects of opiates"}', 'name': 'pub_med'}, 'type': 'function'}], 'refusal': None} response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 92, 'total_tokens': 110, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_831e067d82', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-6aaff82e-a47d-4602-a8d0-f1c34917d901-0' tool_calls=[{'name': 'pub_med', 'args': {'query': 'effects of opiates'}, 'id': 'call_Lk3EMbS7RA8nZ5NUy3WX7UPo', 'type': 'tool_call'}] usage_metadata={'input_tokens': 92, 'output_tokens': 18, 'total_tokens': 110, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0

In [143]:
agent_executor = create_react_agent(llm, tools)

In [144]:
agent_executor.invoke({"messages": [HumanMessage(content="What genes inherently cause overdose?")]})    

Too Many Requests, waiting for 0.20 seconds...


{'messages': [HumanMessage(content='What genes inherently cause overdose?', additional_kwargs={}, response_metadata={}, id='7fe07375-c805-47ec-898e-e6f90fc9d4f7'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_xqSXR5H0yghMWebgMwT3T5mz', 'function': {'arguments': '{"query":"genes associated with overdose risk"}', 'name': 'pub_med'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 87, 'total_tokens': 105, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_831e067d82', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-5bea024e-efd4-42ac-99de-f557a12dd8e5-0', tool_calls=[{'name': 'pub_med', 'args': {'query': 'genes associated with overdose risk'}, 'id': 'call_xqSXR5H0yghMWebgMw

In [145]:
#Tool invoked, model binded to tool

In [146]:
#Create embeddings model
from langchain_openai import OpenAIEmbeddings
from langchain_experimental.text_splitter import SemanticChunker
from langchain import hub
from langchain_core.documents import Document
from typing_extensions import TypedDict, List
from langchain_core.vectorstores import InMemoryVectorStore
import pandas as pd
from langchain_core.prompts import PromptTemplate
embeddings = OpenAIEmbeddings(model = "text-embedding-3-large", api_key=os.environ.get("KEY"))

In [151]:
csv_string = pd.read_csv("/Users/devammondal/PycharmProjects/GSEAAnalysis/outputs/degs.csv").to_string()
cell_subtypes = "NK cells"
experimental_description = "Control vs Experimental IgG"

#Create schema for RAG system

#First, create document loader.
from langchain_community.document_loaders import PubMedLoader
docs = PubMedLoader(f"{cell_subtypes}" + f"{experimental_description}").load_and_split(SemanticChunker(embeddings))
#Create vector store
vector_store = InMemoryVectorStore(embeddings)
vector_store.add_documents(docs)

template = '''
f"Avoid filler statements, mention specific genes and relevant literature, cite your sources in the format of in line citations and at the end of the paper, focus only on immunology pathways, and give a 3  page, dense-paragraph paper in an IMRAD (introduction, methods, results, and discussion) format.
{context}

Question: {question}

Answer: '''

prompt = PromptTemplate.from_template(template)

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [152]:
from langgraph.constants import START
from langgraph.graph import StateGraph

#generate graph
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [149]:
print(response)

{'question': 'Given the following CSV of gene groups, whether they are upregulated or downregulated, as well as corresponding genes: \n\n       Unnamed: 0  baseMean  log2FoldChange     lfcSE      stat    pvalue      padj           Gene      Rank\n0            Gzmd  0.423668        0.930404  0.826798  1.125310  0.260458  0.994112           Gzmd  0.002386\n1        AY036118  1.268024        0.508615  0.257461  1.975498  0.048212  0.994112       AY036118  0.001305\n2            Bcl2  1.324929        0.470331  0.141301  3.328582  0.000873  0.994112           Bcl2  0.001206\n3           Klra9  0.487408        0.428472  0.246747  1.736485  0.082478  0.994112          Klra9  0.001099\n4          Hspa1a  0.463775        0.417265  0.326464  1.278135  0.201202  0.994112         Hspa1a  0.001070\n5            Gzme  0.195276        0.338621  0.833392  0.406316  0.684510  0.994112           Gzme  0.000869\n6          Gimap9  0.353193        0.338371  0.256336  1.320027  0.186826  0.994112         G

In [150]:
print(response['answer'])

**Introduction**

Natural Killer (NK) cells are critical components of the innate immune system, known for their ability to recognize and eliminate virally infected cells and tumor cells without prior sensitization. The regulation of gene expression in NK cells is crucial for their function and is influenced by various immunological pathways. This study aims to explore the differential gene expression in NK cells under experimental conditions involving IgG treatment, focusing on the upregulation and downregulation of specific genes and their potential roles in immunological pathways.

**Methods**

The dataset provided includes gene expression data for NK cells, comparing control and experimental IgG conditions. The analysis focused on genes with significant log2 fold changes, indicating upregulation or downregulation. The genes were further analyzed for their involvement in known immunological pathways using literature review and pathway databases such as KEGG and Reactome. The statist