# Using Knowledge Graph

In [None]:
! pip install pandas langchain_neo4j langchain_core langchain_ollama

In [6]:
# 1. Importing Required Libraries
from neo4j import GraphDatabase
from langchain_neo4j import Neo4jGraph

# 2. Connect to Neo4j
NEO4J_URI="neo4j+s://07561b92.databases.neo4j.io"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD="2wQMIQq_1cANjkh837eMSHUsYBH2F-XLialhvPmM3Bw"

URI = NEO4J_URI
AUTH = (NEO4J_USERNAME,NEO4J_PASSWORD) #AUTH = ("<Username>", "<Password>")
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()


graph=Neo4jGraph(url=NEO4J_URI,username=NEO4J_USERNAME,password=NEO4J_PASSWORD)
print(graph)
print("Connected to Neo4j graph.")

<langchain_neo4j.graphs.neo4j_graph.Neo4jGraph object at 0x00000221AA90F020>
Connected to Neo4j graph.


In [7]:
graph.query("""MATCH (n) DETACH DELETE n   // Delete all existing nodes and relationships""")
print("Suceesfully deleted all nodes and relations.")

Suceesfully deleted all nodes and relations.


In [8]:
# 3. Seed the Database (Load data from CSV)
graph.query("""
// Load CSV and create nodes and relationships
LOAD CSV WITH HEADERS FROM 'file:///C:/Users/admin/OneDrive/Desktop/Symptoms_based_disease_prediction/Datasets/preprocessed_data.csv' AS row
WITH row, split(row.Symptoms, ', ') AS symptoms_list // Split the symptoms string into a list
MERGE (d:Disease {name: row.Prognosis})  // Create nodes for Diseases
FOREACH (s IN symptoms_list |  // Iterate over each symptom
  MERGE (sym:Symptom {name: s})  // Create nodes for Symptoms
  MERGE (sym)-[:HAS_SYMPTOM]->(d)  // Create the relationship between the Symptom and Disease
)
""")


# 4. Refresh Schema (After inserting data, refresh the schema for changes)
graph.refresh_schema()
print(graph.schema)

ClientError: {code: Neo.ClientError.Statement.ExternalResourceFailed} {message: Cannot load from URL 'file:///C:/Users/admin/OneDrive/Desktop/Symptoms_based_disease_prediction/Datasets/preprocessed_data.csv': configuration property 'dbms.security.allow_csv_import_from_file_urls' is false ()}

In [None]:
from langchain_neo4j import Neo4jGraph, GraphCypherQAChain
from langchain_core.prompts.prompt import PromptTemplate
from langchain_ollama import ChatOllama


# 5. Define the Schema for the Knowledge Graph
SCHEMA = """
Nodes:
- Disease: {name: string}
- Symptom: {name: string}
Relationships:
- (Disease)-[:HAS_SYMPTOM]->(Symptom)
"""

# 6. Define the User Query (Question)
question = "What diseases are associated with itching, skin_rash, nodal_skin_eruptions?"

# 7. Define the Cypher Query Generation Template
CYPHER_GENERATION_TEMPLATE = """Task: Generate a Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: 
# What diseases are associated with fever and headache?
MATCH (s:Symptom)-[:HAS_SYMPTOM]->(d:Disease)
WHERE s.name IN ["fever", "headache"]
RETURN d.name AS Diseases

The question is:
{question}"""

# 8. Create the Cypher Query Generation Prompt
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE
)

# 9. Initialize the LLM
llm = ChatOllama(model="deepseek-r1:1.5b", temperature=0.8)

# 10. Set up the GraphCypherQAChain to handle graph queries
chain = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
    allow_dangerous_requests=True
)

In [None]:
# 11. Invoke the Query Chain and Get the Result
result = chain.invoke({"query": question})

# 12. Print the Result
print(result)

In [None]:
# 1. Importing Required Libraries
from langchain_neo4j import Neo4jGraph, GraphCypherQAChain
from langchain_core.prompts.prompt import PromptTemplate
from langchain_ollama import ChatOllama

# 2. Connect to Neo4j
graph = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="password")

# 3. Seed the Database (Load data from CSV)
graph.query("""
LOAD CSV WITH HEADERS FROM 'file:///path/to/data.csv' AS row
MERGE (d:Disease {name: row.Disease})  // Create nodes for Diseases
MERGE (s:Symptom {name: row.Symptom})  // Create nodes for Symptoms
MERGE (s)-[:HAS_SYMPTOM]->(d)  // Create the relationship between the Disease and Symptom
""")

# 4. Refresh Schema (After inserting data, refresh the schema for changes)
graph.refresh_schema()
print(graph.schema)

# 5. Define the Schema for the Knowledge Graph
SCHEMA = """
Nodes:
- Disease: {name: string}
- Symptom: {name: string}
Relationships:
- (Disease)-[:HAS_SYMPTOM]->(Symptom)
"""

# 6. Define the User Query (Question)
question = "What diseases are associated with itching, skin_rash, nodal_skin_eruptions?"

# 7. Define the Cypher Query Generation Template
CYPHER_GENERATION_TEMPLATE = """Task: Generate a Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: 
# What diseases are associated with fever and headache?
MATCH (s:Symptom)-[:HAS_SYMPTOM]->(d:Disease)
WHERE s.name IN ["fever", "headache"]
RETURN d.name AS Diseases

The question is:
{question}"""

# 8. Create the Cypher Query Generation Prompt
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE
)

# 9. Initialize the LLM
llm = ChatOllama(model="deepseek-r1:1.5b", temperature=0.8)

# 10. Set up the GraphCypherQAChain to handle graph queries
chain = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
    allow_dangerous_requests=True
)

# 11. Invoke the Query Chain and Get the Result
result = chain.invoke({"query": question})

# 12. Print the Result
print(result)


**1. Neo4j Connection Setup**

In [None]:
from neo4j import GraphDatabase
from langchain_neo4j import Neo4jGraph

NEO4J_URI="neo4j+s://5cc62887.databases.neo4j.io"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD="qMTz6UOEbEiMp_PCxwBKMoiizlRH5MPUFbOnWSkKldk"

URI = NEO4J_URI
AUTH = (NEO4J_USERNAME,NEO4J_PASSWORD) #AUTH = ("<Username>", "<Password>")
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()


graph=Neo4jGraph(url=NEO4J_URI,username=NEO4J_USERNAME,password=NEO4J_PASSWORD)
print(graph)
print("Connected to Neo4j graph.")

**2. Construct the Knowledge Graph in Neo4j**

In [None]:
graph.query("MATCH (n) DETACH DELETE n")
print("Graph cleared.")

In [None]:
import pandas as pd

# Load the CSV data
df = pd.read_csv('processed.csv')

# Extract symptom and disease data
symptoms = df.columns[:-1].tolist()  # All columns except 'prognosis'
diseases = df['prognosis'].unique().tolist()  # Unique diseases from the 'prognosis' column

def create_graph(graph, symptoms, diseases, df):
    # Create Symptom nodes using MERGE (avoiding duplicates)
    for symptom in symptoms:
        graph.query("""
            MERGE (:Symptom {name: $name})
        """, params={"name": symptom})
    
    # Create Disease nodes using MERGE
    for disease in diseases:
        graph.query(""" (:Disease {name: $name})""", params={"name": disease})
    
    # Create relationships between symptoms and diseases
    for _, row in df.iterrows():
        disease = row['prognosis']  # Disease name from the 'prognosis' column
        for symptom in symptoms:
            if row[symptom] == 1:  # If the symptom is present (value is 1)
                graph.query("""
                    MATCH (s:Symptom {name: $symptom}), (d:Disease {name: $disease})
                    MERGE (s)-[:ASSOCIATED_WITH]->(d)
                """, params={"symptom": symptom, "disease": disease})

# Create the graph
create_graph(graph, symptoms, diseases, df)
print("Knowledge graph constructed successfully!")


**3. Define Schema, Question, and Cypher Template; Initialize LLM and Create Query Chain**

In [90]:
from langchain_core.prompts.prompt import PromptTemplate
from langchain_ollama import ChatOllama
from langchain_neo4j import GraphCypherQAChain


# Define the schema for the knowledge graph
SCHEMA = """
Nodes:
- Disease: {name: string}
- Symptom: {name: string}
Relationships:
- (Disease)-[:HAS_SYMPTOM]->(Symptom)
"""


# Define the user query (question)
question = "What diseases are associated with itching, skin_rash, nodal_skin_eruptions?"

CYPHER_GENERATION_TEMPLATE = """Task: Generate a Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher statements for particular questions:
# What diseases are associated with fever and headache?
MATCH (s:Symptom)-[:ASSOCIATED_WITH]->(d:Disease)
WHERE s.name IN ["fever", "headache"]
RETURN d.name AS Diseases

The question is:
{question}"""

CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE
)

# Initialize the LLM
llm = ChatOllama(model="deepseek-r1:1.5b", temperature=0.8)

# Set up the GraphCypherQAChain to handle graph queries
chain = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
    allow_dangerous_requests=True
)

**4.Execute Query on Graph and Display Results**

In [None]:
# Define the schema for the knowledge graph
SCHEMA = """
Nodes:
- Disease: {name: string}
- Symptom: {name: string}
Relationships:
- (Disease)-[:HAS_SYMPTOM]->(Symptom)
"""

# Define the user query (question)
question = "What diseases are associated with itching, skin_rash, nodal_skin_eruptions?"# Run the query chain with the schema and the question

result = chain.run({"schema": SCHEMA, "query": question})

# Display the result
print("Diseases associated:", result)