In [1]:
import os
from langchain.chat_models import AzureChatOpenAI
from dotenv import load_dotenv
from langchain_neo4j import Neo4jGraph
load_dotenv()

True

In [2]:
# Let's check what environment variables are being loaded
print("NEO4J_2_URI:", os.getenv("NEO4J_2_URI"))
print("NEO4J_2_USERNAME:", os.getenv("NEO4J_2_USERNAME"))
print("NEO4J_2_PASSWORD:", os.getenv("NEO4J_2_PASSWORD"))
print("NEO4J_2_DATABASE:", os.getenv("NEO4J_2_DATABASE"))

NEO4J_2_URI: neo4j+s://e245fb0c.databases.neo4j.io
NEO4J_2_USERNAME: neo4j
NEO4J_2_PASSWORD: UcfxC_Se-KoosA2sZaybXankOkjcJDJeq84pWVZyO7w
NEO4J_2_DATABASE: neo4j


In [3]:
# Connect to Neo4j Environment 2 with proper database specification
graph = Neo4jGraph(
    url=os.getenv("NEO4J_2_URI"),
    username=os.getenv("NEO4J_2_USERNAME"),
    password=os.getenv("NEO4J_2_PASSWORD"),
    database=os.getenv("NEO4J_2_DATABASE")
)

In [4]:
# Test the connection with a simple query
result = graph.query("RETURN 'Connection successful!' as message")
print("Connection test result:", result)

Connection test result: [{'message': 'Connection successful!'}]


In [5]:
llm = AzureChatOpenAI(
            azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
            api_key=os.getenv("AZURE_OPENAI_API_KEY"),
            azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
            api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
            temperature=0.1,
            max_tokens=1000
        )

  llm = AzureChatOpenAI(


In [9]:
graph.schema

'Node properties:\nDiseaseSymptomRelationship {fromId: INTEGER, relationshipType: STRING}\nDiseaseTreatmentRelationship {fromId: INTEGER, relationshipType: STRING}\nDisease {id: INTEGER, name: STRING, type: STRING}\nSymptom {id: INTEGER, name: STRING, type: STRING}\nTreatment {id: INTEGER, name: STRING, type: STRING}\nRelationship properties:\n\nThe relationships:\n(:DiseaseSymptomRelationship)-[:FOR_DISEASE]->(:Disease)\n(:DiseaseSymptomRelationship)-[:FOR_SYMPTOM]->(:Symptom)\n(:DiseaseTreatmentRelationship)-[:FOR_DISEASE]->(:Disease)\n(:DiseaseTreatmentRelationship)-[:FOR_TREATMENT]->(:Treatment)'

In [7]:
# Step 6: Set up RAG system for knowledge graph querying
try:
    # Try using the new GraphCypherQAChain from langchain_neo4j
    from langchain_neo4j import GraphCypherQAChain
    
    print("Setting up knowledge graph RAG system with langchain_neo4j...")
    
    # Set up GraphCypherQAChain for knowledge graph querying
    graph_qa_chain = GraphCypherQAChain.from_llm(
        llm=llm,
        graph=graph,
        verbose=True,
        return_intermediate_steps=True,
        allow_dangerous_requests=True  # Required for security acknowledgment
    )
    
    print("✓ Graph QA chain setup complete!")
    
except ImportError:
    print("langchain_neo4j GraphCypherQAChain not available, trying community version...")
    from langchain_community.chains.graph_qa.cypher import GraphCypherQAChain
    
    # Fallback to community version with proper error handling
    try:
        graph_qa_chain = GraphCypherQAChain.from_llm(
            llm=llm,
            graph=graph,
            verbose=True,
            return_intermediate_steps=True,
            allow_dangerous_requests=True
        )
        print("✓ Community Graph QA chain setup complete!")
    except Exception as e:
        print(f"Error with community version: {e}")
        # Create a simple function-based approach
        def simple_graph_qa(question):
            # Get schema information
            schema_info = graph.get_schema
            
            # Create a simple prompt
            cypher_prompt = f"""
            Based on this Neo4j schema:
            {schema_info}
            
            Generate a Cypher query to answer: {question}
            
            Return only the Cypher query, no explanation.
            """
            
            # Get cypher query from LLM
            response = llm.invoke(cypher_prompt)
            cypher_query = response.content.strip()
            
            print(f"Generated Cypher: {cypher_query}")
            
            # Execute the query
            try:
                result = graph.query(cypher_query)
                return result
            except Exception as query_error:
                return f"Query execution error: {query_error}"
        
        # Store the function as our QA chain
        graph_qa_chain = simple_graph_qa
        print("✓ Simple function-based Graph QA setup complete!")

except Exception as e:
    print(f"Unexpected error: {e}")
    print("Setting up simple function-based approach...")
    
    def simple_graph_qa(question):
        schema_info = graph.get_schema
        cypher_prompt = f"""
        Based on this Neo4j schema:
        {schema_info}
        
        Generate a Cypher query to answer: {question}
        
        Return only the Cypher query, no explanation.
        """
        
        response = llm.invoke(cypher_prompt)
        cypher_query = response.content.strip()
        print(f"Generated Cypher: {cypher_query}")
        
        try:
            result = graph.query(cypher_query)
            return result
        except Exception as query_error:
            return f"Query execution error: {query_error}"
    
    graph_qa_chain = simple_graph_qa
    print("✓ Fallback Graph QA setup complete!")

Setting up knowledge graph RAG system with langchain_neo4j...
✓ Graph QA chain setup complete!


In [8]:
# Test the GraphCypherQAChain with a sample query
test_question = "What are the symptoms of diabetes?"

print(f"Testing with question: {test_question}")
print("-" * 50)

result = graph_qa_chain.invoke({"query": test_question})
print("Result:", result)

Testing with question: What are the symptoms of diabetes?
--------------------------------------------------


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (dsr:DiseaseSymptomRelationship)-[:FOR_DISEASE]->(d:Disease {name: 'diabetes'}), (dsr)-[:FOR_SYMPTOM]->(s:Symptom)
RETURN s.name
[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (dsr:DiseaseSymptomRelationship)-[:FOR_DISEASE]->(d:Disease {name: 'diabetes'}), (dsr)-[:FOR_SYMPTOM]->(s:Symptom)
RETURN s.name
[0m
Full Context:
[32;1m[1;3m[][0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
Result: {'query': 'What are the symptoms of diabetes?', 'result': "I don't know the answer.", 'intermediate_steps': [{'query': "cypher\nMATCH (dsr:DiseaseSymptomRelationship)-[:FOR_DISEASE]->(d:Disease {name: 'diabetes'}), (dsr)-[:FOR_SYMPTOM]->(s:Symptom)\nRETURN s.name\n"}, {'context': []}]}

[1m> Finished chain.[0m
Result: {'query': 'What are the symptoms of diabetes?', 'result':

In [14]:
# Helper functions for different types of medical queries (like in notebook 1)
def get_symptoms_for_condition(condition_name):
    """Get all symptoms for a specific medical condition"""
    query = f"""
    MATCH (dsr:DiseaseSymptomRelationship)-[:FOR_DISEASE]->(d:Disease), 
          (dsr)-[:FOR_SYMPTOM]->(s:Symptom)
    WHERE toLower(d.name) CONTAINS toLower('{condition_name}')
    RETURN d.name as condition, collect(s.name) as symptoms
    """
    result = graph.query(query)
    return result

def get_treatments_for_condition(condition_name):
    """Get all treatments for a specific medical condition"""
    query = f"""
    MATCH (dtr:DiseaseTreatmentRelationship)-[:FOR_DISEASE]->(d:Disease), 
          (dtr)-[:FOR_TREATMENT]->(t:Treatment)
    WHERE toLower(d.name) CONTAINS toLower('{condition_name}')
    RETURN d.name as condition, collect(t.name) as treatments
    """
    result = graph.query(query)
    return result

def find_conditions_by_symptoms(symptoms_list):
    """Find medical conditions that have any of the specified symptoms"""
    symptoms_str = "', '".join(symptoms_list)
    query = f"""
    MATCH (dsr:DiseaseSymptomRelationship)-[:FOR_DISEASE]->(d:Disease), 
          (dsr)-[:FOR_SYMPTOM]->(s:Symptom)
    WHERE toLower(s.name) IN [{', '.join([f"'{symptom.lower()}'" for symptom in symptoms_list])}]
    RETURN d.name as condition, collect(s.name) as matching_symptoms, count(s) as symptom_count
    ORDER BY symptom_count DESC
    LIMIT 10
    """
    result = graph.query(query)
    return result

print("✓ Helper functions created for medical queries!")

✓ Helper functions created for medical queries!


In [15]:
# Advanced Cypher Generation Template with Query Optimization (from notebook 1)
from langchain_core.prompts import PromptTemplate

ADVANCED_CYPHER_TEMPLATE = """You are an expert Neo4j Cypher query generator for a medical knowledge graph.

DATABASE SCHEMA:
{schema}

MEDICAL KNOWLEDGE GRAPH STRUCTURE:
- Nodes: Disease, Symptom, Treatment, DiseaseSymptomRelationship, DiseaseTreatmentRelationship
- Properties: All nodes have "name" property containing the text value
- Key Relationships:
  * (DiseaseSymptomRelationship)-[:FOR_DISEASE]->(Disease)
  * (DiseaseSymptomRelationship)-[:FOR_SYMPTOM]->(Symptom)
  * (DiseaseTreatmentRelationship)-[:FOR_DISEASE]->(Disease)
  * (DiseaseTreatmentRelationship)-[:FOR_TREATMENT]->(Treatment)

CRITICAL RULES:
1. Use "name" property for all nodes, NOT "id" 
2. Use toLower() and CONTAINS for flexible matching
3. Use collect() to group results
4. Always add LIMIT for multiple results
5. Follow the relationship pattern through intermediate nodes

QUERY PATTERNS:

For symptoms questions ("What are symptoms of X", "X symptoms", "signs of X"):
MATCH (dsr:DiseaseSymptomRelationship)-[:FOR_DISEASE]->(d:Disease), 
      (dsr)-[:FOR_SYMPTOM]->(s:Symptom)
WHERE toLower(d.name) CONTAINS toLower("condition_name")
RETURN d.name as condition, collect(s.name) as symptoms
LIMIT 5

For treatment questions ("How to treat X", "treatment for X", "cure for X"):
MATCH (dtr:DiseaseTreatmentRelationship)-[:FOR_DISEASE]->(d:Disease), 
      (dtr)-[:FOR_TREATMENT]->(t:Treatment)
WHERE toLower(d.name) CONTAINS toLower("condition_name")
RETURN d.name as condition, collect(t.name) as treatments
LIMIT 5

For diagnostic questions ("What could cause X", "diseases with X symptom", "conditions with X"):
MATCH (dsr:DiseaseSymptomRelationship)-[:FOR_DISEASE]->(d:Disease), 
      (dsr)-[:FOR_SYMPTOM]->(s:Symptom)
WHERE toLower(s.name) CONTAINS toLower("symptom_name")
RETURN d.name as condition, count(s) as symptom_matches
ORDER BY symptom_matches DESC
LIMIT 10

For general condition info ("Tell me about X", "What is X"):
MATCH (d:Disease)
WHERE toLower(d.name) CONTAINS toLower("condition_name")
OPTIONAL MATCH (dsr:DiseaseSymptomRelationship)-[:FOR_DISEASE]->(d), (dsr)-[:FOR_SYMPTOM]->(s:Symptom)
OPTIONAL MATCH (dtr:DiseaseTreatmentRelationship)-[:FOR_DISEASE]->(d), (dtr)-[:FOR_TREATMENT]->(t:Treatment)
RETURN d.name as condition, collect(DISTINCT s.name) as symptoms, collect(DISTINCT t.name) as treatments
LIMIT 5

Question: {question}

Generate ONLY the Cypher query without any explanation:"""

# Create an improved QA prompt template for better responses
QA_TEMPLATE = """You are a helpful medical assistant. Based on the following information from a medical knowledge graph, provide a clear and helpful response to the patient's question.

Context from knowledge graph:
{context}

Question: {question}

Please provide a response as if you are talking to a patient. Be empathetic, clear, and always remind them to consult with healthcare professionals for proper diagnosis and treatment.

Answer:"""

print("✓ Advanced templates created!")

✓ Advanced templates created!


In [36]:
# Create the optimized chain with improved templates (like in notebook 1)
# First, create a clean version without verbose output
optimized_chain = GraphCypherQAChain.from_llm(
    llm,
    graph=graph,
    verbose=False,  # Turn off verbose to avoid duplicate Cypher output
    allow_dangerous_requests=True,
    cypher_prompt=PromptTemplate(
        input_variables=["schema", "question"], 
        template=ADVANCED_CYPHER_TEMPLATE
    ),
    qa_prompt=PromptTemplate(
        input_variables=["context", "question"],
        template=QA_TEMPLATE
    ),
    return_intermediate_steps=True
)

# Also create a verbose version for debugging when needed
optimized_chain_verbose = GraphCypherQAChain.from_llm(
    llm,
    graph=graph,
    verbose=True,  # Keep verbose for debugging
    allow_dangerous_requests=True,
    cypher_prompt=PromptTemplate(
        input_variables=["schema", "question"], 
        template=ADVANCED_CYPHER_TEMPLATE
    ),
    qa_prompt=PromptTemplate(
        input_variables=["context", "question"],
        template=QA_TEMPLATE
    ),
    return_intermediate_steps=True
)

# Create a clean query function that shows only what we want
def ask_medical_question_clean(question, show_cypher=False):
    """Ask a medical question with clean output"""
    if show_cypher:
        result = optimized_chain_verbose.invoke({"query": question})
    else:
        result = optimized_chain.invoke({"query": question})
    
    print(f"Question: {question}")
    print(f"Answer: {result['result']}")
    
    if show_cypher and 'intermediate_steps' in result:
        print(f"\n🔧 Generated Cypher: {result['intermediate_steps'][0]['query']}")
    
    return result

print("🚀 Improved medical AI assistant chain created!")

# Test with a question to show it works cleanly
test_question = input("Enter your medical question: ")
print(f"\n🧪 Testing improved template (clean output): {test_question}")
print("-" * 50)

try:
    result = ask_medical_question_clean(test_question, show_cypher=True)
except Exception as e:
    print(f"Error: {str(e)}")

🚀 Improved medical AI assistant chain created!

🧪 Testing improved template (clean output): how to treat type 2 diabetes
--------------------------------------------------


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (dtr:DiseaseTreatmentRelationship)-[:FOR_DISEASE]->(d:Disease), 
      (dtr)-[:FOR_TREATMENT]->(t:Treatment)
WHERE toLower(d.name) CONTAINS toLower("type 2 diabetes")
RETURN d.name as condition, collect(t.name) as treatments
LIMIT 5[0m
Full Context:
[32;1m[1;3m[{'condition': 'Type 2 Diabetes', 'treatments': ['Exercise', 'Blood Sugar Monitoring', 'Oral Medications', 'Insulin Therapy (In Some Cases)']}][0m

[1m> Finished chain.[0m
Question: how to treat type 2 diabetes
Answer: Treating type 2 diabetes involves a combination of lifestyle changes and, in some cases, medications. Here are some key steps that can help manage the condition:

1. **Exercise**: Regular physical activity is very important. It helps your body use insu