In [7]:
# from langchain_neo4j import Neo4jGraph
import os
from dotenv import load_dotenv
from langchain.graphs import Neo4jGraph
from langchain.prompts import PromptTemplate
from langchain.chains.graph_qa.cypher import GraphCypherQAChain
from langchain_openai import ChatOpenAI

In [8]:
load_dotenv()

url = os.getenv("url")
username = os.getenv("username")
password = os.getenv("password")
database = os.getenv("database")

In [9]:
os.environ["OPENAI_API_KEY"]

llm = ChatOpenAI(model_name="gpt-4o")

In [10]:
graph = Neo4jGraph(url=url, username=username, password=password, database=database)

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [11]:
# Check graph connection is working
graph.query("MATCH (n) RETURN COUNT(n)")

[{'COUNT(n)': 129375}]

In [12]:
# Retrieve the graph schema
schema = graph.get_schema

# Create template for generating cypher code using LLM
template = """
Task: Generate a Cypher statement to query the graph database.

Instructions:
Use only relationship types and properties provided in schema.
Do not use other relationship types or properties that are not provided.
You can match the different relationships to come up with the answer to the question
If a name is given, check if any property name contains the given name

schema:
{schema}

Examples:
1. Question: what could be the environmental risk factors for Diabetes?
	Cypher: MATCH (e:exposure)-[:exposure_disease]-(d:disease)
	WHERE d.node_name CONTAINS 'diabetes'
	RETURN e.node_name AS Environmental_Risk_Factor, d.node_name AS Disease

2. Question: What could be the environmental risk factors for Cardiovascular disease?
	Cypher: MATCH (e:exposure)-[:exposure_disease]-(d:disease)
	WHERE d.node_name CONTAINS 'cardiovascular disease'
	RETURN e.node_name AS Environmental_Risk_Factor, d.node_name AS Disease

3. Question: Which occupational hazards are linked to an increased risk of respiratory diseases?
	Cypher: MATCH (e:exposure)-[:exposure_disease]-(d:disease)
	WHERE d.node_name CONTAINS 'respiratory'
	RETURN e.node_name AS Environmental_Risk_Factor, d.node_name AS Disease

4. Question: What are the most common secondary diseases in patients with diabetes or obesity?
	Cypher: MATCH (d1:disease)-[:disease_disease]-(d2:disease),
      		(d1)-[:disease_protein]-(p:gene__protein)
	WHERE d1.node_name CONTAINS 'diabetes' OR d1.node_name CONTAINS 'obesity'
	RETURN d1.node_name AS Primary_Disease, d2.node_name AS Secondary_Disease, p.node_name AS Protein

5. Question: Explore the relationship between environmental exposures and diseases affecting reproduction
	Cypher: MATCH (e:exposure)-[:exposure_disease]-(d:disease),
      		(e)-[:exposure_protein]-(p:gene__protein),
      		(d)-[:disease_protein]-(p)
	WHERE d.node_name CONTAINS 'infertility' OR d.node_name CONTAINS 'ovary' or  d.node_name CONTAINS 'prostate'
	RETURN e.node_name AS Exposure, d.node_name AS Disease, p.node_name AS Protein

Note: Do not include explanations or apologies in your answers.
Do not answer questions that ask anything other than creating Cypher statements.

Question: {question}"""

# Create question prompt
question_prompt = PromptTemplate(
    template=template, 
    input_variables=["schema", "question"] 
)

# Generate Cyper code
qa = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    cypher_prompt=question_prompt,
    verbose=True,
    allow_dangerous_requests=True
)

In [13]:
# question = "What are the major causes of infertility in women"
question = "What are the most used treaments for infertility in women"
result = qa.invoke({"query": question})["result"]



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (dr:drug)-[:indication]->(d:disease) 
WHERE d.node_name CONTAINS 'infertility'
RETURN dr.node_name AS Treatment, d.node_name AS Disease[0m
Full Context:
[32;1m[1;3m[{'Treatment': 'Clomifene', 'Disease': 'female infertility'}][0m

[1m> Finished chain.[0m


In [15]:
# question = "What are the major causes of infertility in women"
question = "I did a test and i have herpatitis, what drugs should i take?"
result = qa.invoke({"query": question})["result"]



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (d:disease)<-[:indication]-(dr:drug)
WHERE d.node_name CONTAINS 'hepatitis'
RETURN dr.node_name AS Drug, d.node_name AS Disease
[0m
Full Context:
[32;1m[1;3m[{'Drug': 'Ritonavir', 'Disease': 'chronic hepatitis C virus infection'}, {'Drug': 'Ritonavir', 'Disease': 'hepatitis C virus infection'}, {'Drug': 'Lamivudine', 'Disease': 'chronic hepatitis B virus infection'}, {'Drug': 'Telaprevir', 'Disease': 'chronic hepatitis C virus infection'}, {'Drug': 'Telaprevir', 'Disease': 'hepatitis C virus infection'}, {'Drug': 'Simeprevir', 'Disease': 'chronic hepatitis C virus infection'}, {'Drug': 'Simeprevir', 'Disease': 'hepatitis C virus infection'}, {'Drug': 'Grazoprevir', 'Disease': 'chronic hepatitis C virus infection'}, {'Drug': 'Grazoprevir', 'Disease': 'hepatitis C virus infection'}, {'Drug': 'Tenofovir', 'Disease': 'chronic hepatitis B virus infection'}][0m

[1m> Finished chain.[0m


In [16]:
print(result)

Ritonavir, Telaprevir, Simeprevir, and Grazoprevir are drugs indicated for chronic hepatitis C virus infection and hepatitis C virus infection. Lamivudine and Tenofovir are indicated for chronic hepatitis B virus infection.
