In [80]:
from langchain_community.tools.pubmed.tool import PubmedQueryRun
from langchain_openai import ChatOpenAI
import os
from dotenv import load_dotenv
from langchain_core.messages import HumanMessage
from langgraph.prebuilt import create_react_agent

In [81]:
paper_fetcher = PubmedQueryRun()

In [82]:
tools = [paper_fetcher]

In [83]:
load_dotenv()
llm = ChatOpenAI(temperature=0, model="gpt-4o", api_key=os.environ.get("KEY"))

In [84]:
tooled_model = llm.bind_tools(tools)

In [85]:
response = tooled_model.invoke([HumanMessage(content="Can you tell me the effects of opiates?")])

In [86]:
print(response)
print(response.content)

content='' additional_kwargs={'tool_calls': [{'id': 'call_1yxpId7TbS2k75WHGuOwYSlk', 'function': {'arguments': '{"query":"effects of opiates"}', 'name': 'pub_med'}, 'type': 'function'}], 'refusal': None} response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 92, 'total_tokens': 110, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_7f6be3efb0', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-b433d2e6-c600-48b8-aae4-0176d8223c78-0' tool_calls=[{'name': 'pub_med', 'args': {'query': 'effects of opiates'}, 'id': 'call_1yxpId7TbS2k75WHGuOwYSlk', 'type': 'tool_call'}] usage_metadata={'input_tokens': 92, 'output_tokens': 18, 'total_tokens': 110, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0

In [87]:
agent_executor = create_react_agent(llm, tools)

In [88]:
agent_executor.invoke({"messages": [HumanMessage(content="What genes inherently cause overdose?")]})    

Too Many Requests, waiting for 0.20 seconds...
Too Many Requests, waiting for 0.40 seconds...


{'messages': [HumanMessage(content='What genes inherently cause overdose?', additional_kwargs={}, response_metadata={}, id='2c773b99-9d30-4ea4-8e08-ca73f19ba088'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_DABKdkNZWL51jkCdXq5r4igJ', 'function': {'arguments': '{"query":"genes causing overdose"}', 'name': 'pub_med'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 87, 'total_tokens': 103, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_7f6be3efb0', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-d2080533-4112-4903-b52a-b8c1d540b73b-0', tool_calls=[{'name': 'pub_med', 'args': {'query': 'genes causing overdose'}, 'id': 'call_DABKdkNZWL51jkCdXq5r4igJ', 'type': 'tool_cal

In [13]:
#Tool invoked, model binded to tool

In [71]:
#Create embeddings model
from langchain_openai import OpenAIEmbeddings
from langchain_experimental.text_splitter import SemanticChunker
from langchain import hub
from langchain_core.documents import Document
from typing_extensions import TypedDict, List
from langchain_core.vectorstores import InMemoryVectorStore
import pandas as pd
from langchain_core.prompts import PromptTemplate
embeddings = OpenAIEmbeddings(model = "text-embedding-3-large", api_key=os.environ.get("KEY"))

In [96]:
csv_string = pd.read_csv("/Users/devammondal/PycharmProjects/GSEAAnalysis/outputs/degs.csv").to_string()
cell_subtypes = "NK cells"
experimental_description = "Control vs Experimental IgG"

#Create schema for RAG system

#First, create document loader.
from langchain_community.document_loaders import PubMedLoader
docs = PubMedLoader(f"{cell_subtypes}" + f"{experimental_description}").load_and_split(SemanticChunker(embeddings))
#Create vector store
vector_store = InMemoryVectorStore(embeddings)

template = '''
f"Avoid filler statements, mention specific genes and relevant literature, cite your sources in the format of in line citations and at the end of the paper, focus only on immunology pathways, and give a 3 page paper in an IMRAD (introduction, methods, results, and discussion) format.
{context}

Question: {question}

Answer: '''

prompt = PromptTemplate.from_template(template)

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [97]:
from langgraph.constants import START
from langgraph.graph import StateGraph

#generate graph
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [98]:
response = graph.invoke({"question":f"Given the following CSV of gene groups, whether they are upregulated or downregulated, as well as corresponding genes: \n\n{csv_string}\n\n, as well as that the cell type in question is \n\n{cell_subtypes}\n\n, given that {experimental_description}, propose new mechanisms for why this causes genes to be upregulated and down regulated using other relevant IMMUNOLOGY pathways found from the data provided."})
print(response['answer'])

**Introduction**

Natural Killer (NK) cells are critical components of the innate immune system, known for their ability to recognize and eliminate virally infected cells and tumor cells without prior sensitization. The regulation of gene expression in NK cells is crucial for their function and is influenced by various stimuli, including immunoglobulin G (IgG) interactions. This study aims to explore the differential gene expression in NK cells under experimental conditions involving IgG, focusing on the immunological pathways that may contribute to the observed upregulation and downregulation of specific genes.

**Methods**

The dataset provided includes gene expression data for NK cells under control and experimental IgG conditions. The analysis focused on genes with significant changes in expression, as indicated by log2 fold change and statistical significance (p-value). Genes with a log2 fold change greater than 0.5 and a p-value less than 0.05 were considered for further analysis