In [7]:
import os
from langchain.chat_models import AzureChatOpenAI
from dotenv import load_dotenv
from langchain_community.graphs import Neo4jGraph
load_dotenv()

True

In [8]:
graph = Neo4jGraph(
    url=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD"),

)



In [9]:
from langchain_core.documents import Document
text="""Diseases are medical conditions that disrupt normal body functions, ranging from common illnesses like influenza and diabetes to rare disorders such as Wilson’s disease. Each disease manifests through a unique set of symptoms; for example, influenza often causes fever, fatigue, sore throat, and body aches, whereas diabetes may present with excessive thirst, frequent urination, and unexplained weight loss. Accurate identification of symptoms is crucial for effective treatment, as medicines are typically prescribed to alleviate these symptoms and target the underlying condition. Antiviral drugs such as oseltamivir can reduce the severity of influenza, while insulin therapy is essential for managing diabetes. Herbal or traditional medicines, like turmeric for inflammation or ginger for nausea, are sometimes used as complementary treatments. By systematically mapping diseases to their symptoms and corresponding medicines, healthcare systems can improve diagnosis, treatment, and patient outcomes.
"""
doc = Document(page_content=text)
doc

Document(metadata={}, page_content='Diseases are medical conditions that disrupt normal body functions, ranging from common illnesses like influenza and diabetes to rare disorders such as Wilson’s disease. Each disease manifests through a unique set of symptoms; for example, influenza often causes fever, fatigue, sore throat, and body aches, whereas diabetes may present with excessive thirst, frequent urination, and unexplained weight loss. Accurate identification of symptoms is crucial for effective treatment, as medicines are typically prescribed to alleviate these symptoms and target the underlying condition. Antiviral drugs such as oseltamivir can reduce the severity of influenza, while insulin therapy is essential for managing diabetes. Herbal or traditional medicines, like turmeric for inflammation or ginger for nausea, are sometimes used as complementary treatments. By systematically mapping diseases to their symptoms and corresponding medicines, healthcare systems can improve d

In [11]:
llm = AzureChatOpenAI(
            azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
            api_key=os.getenv("AZURE_OPENAI_API_KEY"),
            azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
            api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
            temperature=0.1,
            max_tokens=1000
        )

In [None]:
from langchain_experimental.graph_transformers import LLMGraphTransformer

llm_transformer = LLMGraphTransformer(llm=llm)

In [36]:
graph_documents = llm_transformer.convert_to_graph_documents([doc])

In [37]:
graph_documents

[GraphDocument(nodes=[Node(id='frequent urination', type='Symptom', properties={}), Node(id='Diseases', type='Medical Condition', properties={}), Node(id='fatigue', type='Symptom', properties={}), Node(id='body aches', type='Symptom', properties={}), Node(id='fever', type='Symptom', properties={}), Node(id='sore throat', type='Symptom', properties={}), Node(id='excessive thirst', type='Symptom', properties={}), Node(id='unexplained weight loss', type='Symptom', properties={}), Node(id='insulin therapy', type='Medicine', properties={}), Node(id='influenza', type='Medical Condition', properties={}), Node(id='diabetes', type='Medical Condition', properties={}), Node(id='turmeric', type='Medicine', properties={}), Node(id='normal body functions', type='Body Function', properties={}), Node(id='oseltamivir', type='Medicine', properties={}), Node(id='ginger', type='Medicine', properties={}), Node(id='inflammation', type='Symptom', properties={}), Node(id='nausea', type='Symptom', properties={

In [38]:
graph_documents[0].nodes

[Node(id='frequent urination', type='Symptom', properties={}),
 Node(id='Diseases', type='Medical Condition', properties={}),
 Node(id='fatigue', type='Symptom', properties={}),
 Node(id='body aches', type='Symptom', properties={}),
 Node(id='fever', type='Symptom', properties={}),
 Node(id='sore throat', type='Symptom', properties={}),
 Node(id='excessive thirst', type='Symptom', properties={}),
 Node(id='unexplained weight loss', type='Symptom', properties={}),
 Node(id='insulin therapy', type='Medicine', properties={}),
 Node(id='influenza', type='Medical Condition', properties={}),
 Node(id='diabetes', type='Medical Condition', properties={}),
 Node(id='turmeric', type='Medicine', properties={}),
 Node(id='normal body functions', type='Body Function', properties={}),
 Node(id='oseltamivir', type='Medicine', properties={}),
 Node(id='ginger', type='Medicine', properties={}),
 Node(id='inflammation', type='Symptom', properties={}),
 Node(id='nausea', type='Symptom', properties={})]

In [70]:
graph_documents[0].

[Relationship(source=Node(id='Diseases', type='Medical Condition', properties={}), target=Node(id='normal body functions', type='Body Function', properties={}), type='DISRUPTS', properties={}),
 Relationship(source=Node(id='influenza', type='Medical Condition', properties={}), target=Node(id='fever', type='Symptom', properties={}), type='HAS_SYMPTOM', properties={}),
 Relationship(source=Node(id='influenza', type='Medical Condition', properties={}), target=Node(id='fatigue', type='Symptom', properties={}), type='HAS_SYMPTOM', properties={}),
 Relationship(source=Node(id='influenza', type='Medical Condition', properties={}), target=Node(id='sore throat', type='Symptom', properties={}), type='HAS_SYMPTOM', properties={}),
 Relationship(source=Node(id='influenza', type='Medical Condition', properties={}), target=Node(id='body aches', type='Symptom', properties={}), type='HAS_SYMPTOM', properties={}),
 Relationship(source=Node(id='diabetes', type='Medical Condition', properties={}), target

In [39]:
# Method 1: Query nodes by type
print("=== Medical Conditions ===")
medical_conditions = [node for node in graph_documents[0].nodes if node.type == 'Medical Condition']
for condition in medical_conditions:
    print(f"- {condition.id}")

print("\n=== Symptoms ===")
symptoms = [node for node in graph_documents[0].nodes if node.type == 'Symptom']
for symptom in symptoms:
    print(f"- {symptom.id}")

print("\n=== Medicines ===")
medicines = [node for node in graph_documents[0].nodes if node.type == 'Medicine']
for medicine in medicines:
    print(f"- {medicine.id}")

=== Medical Conditions ===
- Diseases
- influenza
- diabetes

=== Symptoms ===
- frequent urination
- fatigue
- body aches
- fever
- sore throat
- excessive thirst
- unexplained weight loss
- inflammation
- nausea

=== Medicines ===
- insulin therapy
- turmeric
- oseltamivir
- ginger


In [40]:
# Store the graph documents in Neo4j database
graph.add_graph_documents(graph_documents)

In [42]:
# Method 1: Query all nodes and their types
query = """
MATCH (n) 
RETURN DISTINCT labels(n) as NodeType, collect(n.id) as Entities
ORDER BY NodeType
"""
result = graph.query(query)
print("=== All Node Types and Entities ===")
for record in result:
    print(f"{record['NodeType']}: {record['Entities']}")

=== All Node Types and Entities ===
['Body Function']: ['normal body functions']
['Medical Condition']: ['Diseases', 'influenza', 'diabetes']
['Medicine']: ['insulin therapy', 'turmeric', 'oseltamivir', 'ginger']
['Symptom']: ['frequent urination', 'fatigue', 'body aches', 'fever', 'sore throat', 'excessive thirst', 'unexplained weight loss', 'inflammation', 'nausea']


In [44]:
# Method 2: Query symptoms of a specific disease
query = """
MATCH (disease:MedicalCondition {id: 'influenza'})-[r:HAS_SYMPTOM]->(symptom:Symptom)
RETURN disease.id as Disease, r.type as Relationship, symptom.id as Symptom
"""
result = graph.query(query)
print("=== Influenza Symptoms ===")
for record in result:
    print(f"{record['Disease']} --{record['Relationship']}--> {record['Symptom']}")



=== Influenza Symptoms ===


In [45]:
# Check the actual graph structure
query = """
MATCH (n)-[r]->(m) 
RETURN labels(n) as SourceLabels, n.id as SourceId, type(r) as RelationType, labels(m) as TargetLabels, m.id as TargetId
LIMIT 10
"""
result = graph.query(query)
print("=== Graph Structure ===")
for record in result:
    print(f"{record['SourceLabels']} '{record['SourceId']}' --{record['RelationType']}--> {record['TargetLabels']} '{record['TargetId']}'")

=== Graph Structure ===
['Medical Condition'] 'Diseases' --DISRUPTS--> ['Body Function'] 'normal body functions'
['Medicine'] 'insulin therapy' --MANAGES--> ['Medical Condition'] 'diabetes'
['Medical Condition'] 'influenza' --HAS_SYMPTOM--> ['Symptom'] 'fatigue'
['Medical Condition'] 'influenza' --HAS_SYMPTOM--> ['Symptom'] 'body aches'
['Medical Condition'] 'influenza' --HAS_SYMPTOM--> ['Symptom'] 'fever'
['Medical Condition'] 'influenza' --HAS_SYMPTOM--> ['Symptom'] 'sore throat'
['Medical Condition'] 'diabetes' --HAS_SYMPTOM--> ['Symptom'] 'frequent urination'
['Medical Condition'] 'diabetes' --HAS_SYMPTOM--> ['Symptom'] 'excessive thirst'
['Medical Condition'] 'diabetes' --HAS_SYMPTOM--> ['Symptom'] 'unexplained weight loss'
['Medicine'] 'turmeric' --TREATS--> ['Symptom'] 'inflammation'


In [46]:
# Method 3: Query symptoms of influenza (corrected)
query = """
MATCH (disease:`Medical Condition` {id: 'influenza'})-[r:HAS_SYMPTOM]->(symptom:Symptom)
RETURN disease.id as Disease, type(r) as Relationship, symptom.id as Symptom
"""
result = graph.query(query)
print("=== Influenza Symptoms ===")
for record in result:
    print(f"• {record['Symptom']}")

=== Influenza Symptoms ===
• fatigue
• body aches
• fever
• sore throat


In [47]:
# Method 4: Find medicines that treat specific diseases
query = """
MATCH (medicine:Medicine)-[r:TREATS|MANAGES]->(target)
RETURN medicine.id as Medicine, type(r) as Action, target.id as Target, labels(target) as TargetType
ORDER BY Medicine
"""
result = graph.query(query)
print("=== Medicines and Their Treatments ===")
for record in result:
    print(f"• {record['Medicine']} {record['Action'].lower()} {record['Target']} ({record['TargetType'][0]})")

=== Medicines and Their Treatments ===
• ginger treats nausea (Symptom)
• insulin therapy manages diabetes (Medical Condition)
• oseltamivir treats influenza (Medical Condition)
• turmeric treats inflammation (Symptom)


In [48]:
# Method 5: Find treatment paths (disease -> symptom -> medicine)
query = """
MATCH path = (disease:`Medical Condition`)-[:HAS_SYMPTOM]->(symptom:Symptom)<-[:TREATS]-(medicine:Medicine)
RETURN disease.id as Disease, symptom.id as Symptom, medicine.id as Medicine
"""
result = graph.query(query)
print("=== Treatment Paths: Disease -> Symptom -> Medicine ===")
for record in result:
    print(f"• {record['Disease']} causes {record['Symptom']} → treated by {record['Medicine']}")

=== Treatment Paths: Disease -> Symptom -> Medicine ===


In [49]:
# Method 6: Count nodes by type
query = """
MATCH (n)
RETURN DISTINCT labels(n) as NodeType, count(n) as Count
ORDER BY Count DESC
"""
result = graph.query(query)
print("=== Node Count by Type ===")
for record in result:
    node_type = record['NodeType'][0] if record['NodeType'] else 'Unknown'
    print(f"• {node_type}: {record['Count']} nodes")

=== Node Count by Type ===
• Symptom: 9 nodes
• Medicine: 4 nodes
• Medical Condition: 3 nodes
• Body Function: 1 nodes


In [50]:
# Method 7: Find diseases by symptoms (diagnostic queries)
symptom_input = "fever"  # You can change this to any symptom
query = f"""
MATCH (symptom:Symptom {{id: '{symptom_input}'}})<-[:HAS_SYMPTOM]-(disease:`Medical Condition`)
RETURN disease.id as Disease, symptom.id as Symptom
"""
result = graph.query(query)
print(f"=== Diseases that cause '{symptom_input}' ===")
for record in result:
    print(f"• {record['Disease']}")

# Alternative: Find all diseases and their symptoms for diagnostic lookup
print("\n=== Complete Disease-Symptom Map ===")
query = """
MATCH (disease:`Medical Condition`)-[:HAS_SYMPTOM]->(symptom:Symptom)
RETURN disease.id as Disease, collect(symptom.id) as Symptoms
ORDER BY Disease
"""
result = graph.query(query)
for record in result:
    symptoms_str = ", ".join(record['Symptoms'])
    print(f"• {record['Disease']}: {symptoms_str}")

=== Diseases that cause 'fever' ===
• influenza

=== Complete Disease-Symptom Map ===
• diabetes: frequent urination, excessive thirst, unexplained weight loss
• influenza: fatigue, body aches, fever, sore throat


In [52]:
# Method 8: Advanced queries - Multi-hop relationships and aggregations

# Find the most connected nodes (highest degree centrality) - Fixed for Neo4j 5+
print("=== Most Connected Entities ===")
query = """
MATCH (n)
RETURN n.id as Entity, labels(n)[0] as Type, 
       COUNT { (n)--() } as connections
ORDER BY connections DESC
LIMIT 5
"""
result = graph.query(query)
for record in result:
    print(f"• {record['Entity']} ({record['Type']}): {record['connections']} connections")

# Find all relationship types in the graph
print("\n=== All Relationship Types ===")
query = """
MATCH ()-[r]->()
RETURN DISTINCT type(r) as RelationType, count(r) as Count
ORDER BY Count DESC
"""
result = graph.query(query)
for record in result:
    print(f"• {record['RelationType']}: {record['Count']} relationships")

# Complex query: Find treatment recommendations based on symptoms
print("\n=== Treatment Recommendations ===")
query = """
MATCH (disease:`Medical Condition`)-[:HAS_SYMPTOM]->(symptom:Symptom)
OPTIONAL MATCH (medicine:Medicine)-[:TREATS]->(symptom)
OPTIONAL MATCH (medicine2:Medicine)-[:MANAGES]->(disease)
RETURN disease.id as Disease, 
       collect(DISTINCT symptom.id) as Symptoms,
       collect(DISTINCT medicine.id) + collect(DISTINCT medicine2.id) as Treatments
ORDER BY Disease
"""
result = graph.query(query)
for record in result:
    symptoms = ", ".join(record['Symptoms']) if record['Symptoms'] else "None"
    treatments = [t for t in record['Treatments'] if t]  # Remove None values
    treatments_str = ", ".join(treatments) if treatments else "No specific treatment found"
    print(f"• {record['Disease']}")
    print(f"  Symptoms: {symptoms}")
    print(f"  Treatments: {treatments_str}")
    print()

=== Most Connected Entities ===
• influenza (Medical Condition): 5 connections
• diabetes (Medical Condition): 4 connections
• fatigue (Symptom): 1 connections
• fever (Symptom): 1 connections
• body aches (Symptom): 1 connections

=== All Relationship Types ===
• HAS_SYMPTOM: 7 relationships
• TREATS: 3 relationships
• DISRUPTS: 1 relationships
• MANAGES: 1 relationships

=== Treatment Recommendations ===
• influenza (Medical Condition): 5 connections
• diabetes (Medical Condition): 4 connections
• fatigue (Symptom): 1 connections
• fever (Symptom): 1 connections
• body aches (Symptom): 1 connections

=== All Relationship Types ===
• HAS_SYMPTOM: 7 relationships
• TREATS: 3 relationships
• DISRUPTS: 1 relationships
• MANAGES: 1 relationships

=== Treatment Recommendations ===
• diabetes
  Symptoms: frequent urination, excessive thirst, unexplained weight loss
  Treatments: insulin therapy

• influenza
  Symptoms: fatigue, body aches, fever, sore throat
  Treatments: No specific treatm

## Summary: How to Query Graph Documents in Neo4j

You now have several powerful ways to query your medical knowledge graph:

### Basic Query Patterns:
1. **Query by Node Type**: Filter nodes by their labels (`Medical Condition`, `Symptom`, `Medicine`)
2. **Query by Relationships**: Find connections between specific nodes using relationship types
3. **Query by Properties**: Search for nodes with specific property values

### Advanced Query Patterns:
4. **Multi-hop Queries**: Traverse multiple relationships to find complex patterns
5. **Aggregations**: Count nodes, collect related entities, and compute statistics
6. **Pattern Matching**: Find specific graph patterns like treatment paths
7. **Optional Matches**: Handle cases where relationships might not exist

### Key Neo4j Cypher Concepts Used:
- `MATCH`: Find patterns in the graph
- `RETURN`: Specify what to return from the query
- `WHERE`: Filter results based on conditions
- `OPTIONAL MATCH`: Match patterns that may not exist
- `COLLECT`: Aggregate related nodes into lists
- `COUNT`: Count occurrences
- `ORDER BY`: Sort results
- `LIMIT`: Limit number of results

### Practical Applications:
- **Medical Diagnosis**: Find diseases based on symptoms
- **Treatment Planning**: Discover medicines for specific conditions
- **Knowledge Discovery**: Identify the most connected entities
- **Data Analysis**: Count and analyze relationships in the graph

In [61]:
# Refresh the graph schema to pick up our medical data
graph.refresh_schema()
print("=== Updated Graph Schema ===")
print(graph.schema)

=== Updated Graph Schema ===
Node properties:
Symptom {id: STRING}
Medical Condition {id: STRING}
Medicine {id: STRING}
Body Function {id: STRING}
Relationship properties:

The relationships:
(:Medical Condition)-[:DISRUPTS]->(:Body Function)
(:Medical Condition)-[:HAS_SYMPTOM]->(:Symptom)
(:Medicine)-[:MANAGES]->(:Medical Condition)
(:Medicine)-[:TREATS]->(:Symptom)
(:Medicine)-[:TREATS]->(:Medical Condition)


In [62]:
from langchain.chains import GraphCypherQAChain
# Note: Setting allow_dangerous_requests=True is required for security acknowledgment
chain = GraphCypherQAChain.from_llm(
    llm, 
    graph=graph, 
    verbose=True, 
    allow_dangerous_requests=True
)
chain

GraphCypherQAChain(verbose=True, graph=<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x000002742E8F0910>, cypher_generation_chain=LLMChain(verbose=False, prompt=PromptTemplate(input_variables=['question', 'schema'], input_types={}, partial_variables={}, template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}'), llm=AzureChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000002744FC4B380>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object 

In [66]:
# Now you can ask natural language questions about your medical knowledge graph
question = "What are the symptoms of influenza?"
response = chain.invoke({"query": question})
print("Question:", question)
print("Answer:", response["result"])



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (mc:Medical Condition {id: 'influenza'})-[:HAS_SYMPTOM]->(s:Symptom)
RETURN s.id
[0m


CypherSyntaxError: {code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'Condition': expected a parameter, '&', ')', ':', 'WHERE', '{' or '|' (line 2, column 19 (offset: 25))
"MATCH (mc:Medical Condition {id: 'influenza'})-[:HAS_SYMPTOM]->(s:Symptom)"
                   ^}

In [67]:
# Fix the issue: Create a custom chain with proper label handling
from langchain_core.prompts import PromptTemplate

# Custom Cypher generation prompt that handles labels with spaces
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: The following labels have spaces and MUST be enclosed in backticks when used in Cypher queries:
- `Medical Condition` (not Medical Condition)
- `Body Function` (not Body Function)

The question is:
{question}"""

CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

# Create a new chain with the custom prompt
chain = GraphCypherQAChain.from_llm(
    llm,
    graph=graph,
    verbose=True,
    allow_dangerous_requests=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)

In [68]:
# Test the fixed chain
question = "What are the symptoms of influenza?"
response = chain.invoke({"query": question})
print("Question:", question)
print("Answer:", response["result"])



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (mc:`Medical Condition` {id: 'influenza'})-[:HAS_SYMPTOM]->(s:Symptom)
RETURN s.id AS Symptom
[0m
Full Context:
[32;1m[1;3m[{'Symptom': 'fatigue'}, {'Symptom': 'body aches'}, {'Symptom': 'fever'}, {'Symptom': 'sore throat'}][0m

[1m> Finished chain.[0m
Question: What are the symptoms of influenza?
Answer: Fatigue, body aches, fever, and sore throat are symptoms of influenza.


In [None]:
# Test with more questions
questions = [
    "What medicines treat diabetes?",
    "What are the symptoms of diabetes?",
    "Which medicine treats inflammation?",
    "What diseases cause fever?"
]

for q in questions:
    print(f"\n" + "="*60)
    print(f"Question: {q}")
    try:
        response = chain.invoke({"query": q})
        print(f"Answer: {response['result']}")
    except Exception as e:
        print(f"Error: {str(e)}")