# Insertion of graph into Neo4j

In [65]:
import pandas as pd
from neo4j import GraphDatabase
import os
from getpass import getpass

In [66]:
import dotenv
# Load environment variables from .env file
dotenv.load_dotenv()

True

In [67]:
path = os.getcwd()
full_path = path.replace('graphRAG', 'dataset\health_reports_data.csv')
df = pd.read_csv(full_path)

In [68]:
# Standardize all text data to lowercase for consistency
for col in ['actual_disease', 'intervention_target_disease', 'region', 'intervention_region', 'severity', 'day_of_week']:
    df[col] = df[col].astype(str).str.lower()

# Convert to datetime objects
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['intervention_start_date'] = pd.to_datetime(df['intervention_start_date'].fillna(pd.Timestamp('1970-01-01')))

# Add unique IDs
df['case_id'] = [f"case_{i}" for i in range(len(df))]
df['patient_id'] = [f"patient_{i}" for i in range(len(df))]

In [69]:
records = df.to_dict('records')

In [43]:
NEO4J_URI      = os.getenv("NEO4J_URI")
NEO4J_USER     = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

In [32]:
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

In [33]:
constraints = [
    "CREATE CONSTRAINT IF NOT EXISTS FOR (p:Patient) REQUIRE p.patientId IS UNIQUE",
    "CREATE CONSTRAINT IF NOT EXISTS FOR (c:Case) REQUIRE c.caseId IS UNIQUE",
    "CREATE CONSTRAINT IF NOT EXISTS FOR (d:Disease) REQUIRE d.name IS UNIQUE",
    "CREATE CONSTRAINT IF NOT EXISTS FOR (s:Symptom) REQUIRE s.name IS UNIQUE",
    "CREATE CONSTRAINT IF NOT EXISTS FOR (r:Region) REQUIRE r.name IS UNIQUE",
    "CREATE CONSTRAINT IF NOT EXISTS FOR (i:Intervention) REQUIRE i.id IS UNIQUE",
    "CREATE CONSTRAINT IF NOT EXISTS FOR (y:Year) REQUIRE y.year IS UNIQUE",
    "CREATE CONSTRAINT IF NOT EXISTS FOR (sl:SeverityLevel) REQUIRE sl.level IS UNIQUE",
    "CREATE CONSTRAINT IF NOT EXISTS FOR (dc:DiseaseCategory) REQUIRE dc.name IS UNIQUE",
]

In [34]:
with driver.session(database="neo4j") as session:
    for constraint in constraints:
        try:
            session.run(constraint)
            print(f"Applied or verified constraint: {constraint.split('FOR')[1].strip()}")
        except Exception as e:
            print(f"Could not apply constraint: {constraint}. Error: {e}")

Applied or verified constraint: (p:Patient) REQUIRE p.patientId IS UNIQUE
Applied or verified constraint: (c:Case) REQUIRE c.caseId IS UNIQUE
Applied or verified constraint: (d:Disease) REQUIRE d.name IS UNIQUE
Applied or verified constraint: (s:Symptom) REQUIRE s.name IS UNIQUE
Applied or verified constraint: (r:Region) REQUIRE r.name IS UNIQUE
Applied or verified constraint: (i:Intervention) REQUIRE i.id IS UNIQUE
Applied or verified constraint: (y:Year) REQUIRE y.year IS UNIQUE
Applied or verified constraint: (sl:SeverityLevel) REQUIRE sl.level IS UNIQUE
Applied or verified constraint: (dc:DiseaseCategory) REQUIRE dc.name IS UNIQUE


In [70]:
ingestion_query = """
UNWIND $records AS row

// 1. MERGE core entities
MERGE (patient:Patient {patientId: row.patient_id})
  ON CREATE SET
    patient.age      = toInteger(row.age),
    patient.ageGroup = row.age_group,
    patient.gender   = row.gender

MERGE (disease:Disease {name: row.actual_disease})
MERGE (case_region:Region   {name: row.region})
MERGE (intervention_region:Region {name: row.intervention_region})
MERGE (target_disease:Disease {name: row.intervention_target_disease})

// 2. MERGE conceptual/category nodes
MERGE (severity:SeverityLevel   {level: row.severity})
MERGE (category:DiseaseCategory {name: row.disease_category})

// 3. MERGE demographic nodes
MERGE (ageGroupNode:AgeGroup {name: row.age_group})
MERGE (genderNode:Gender   {value: row.gender})

// 4. MERGE intervention node
MERGE (intervention:Intervention {id: row.intervention_intervention_id})
  ON CREATE SET
    intervention.type               = row.intervention_type,
    intervention.startDate          = date(row.intervention_start_date),
    intervention.durationDays       = toInteger(row.intervention_duration_days),
    intervention.effectivenessScore = toFloat(row.intervention_effectiveness_score),
    intervention.cost               = toInteger(row.intervention_cost),
    intervention.populationAffected = toInteger(row.intervention_population_affected),
    intervention.complianceRate     = toFloat(row.intervention_compliance_rate)

// 5. MERGE Time‑Tree
MERGE (year:Year      {year: row.timestamp.year})
MERGE (month:Month    {month: row.timestamp.month})
MERGE (day:Day        {day: row.timestamp.day})
MERGE (weekday:DayOfWeek {name: row.day_of_week})
MERGE (hour:Hour      {hour: row.hour})

// 6. CREATE the Case node
CREATE (case:Case {
  caseId                 : row.case_id,
  timestamp              : datetime(row.timestamp),
  location               : point({latitude: toFloat(row.latitude), longitude: toFloat(row.longitude)}),
  isOutbreakRelated      : toBoolean(row.is_outbreak_related),
  contactTracingNeeded   : toBoolean(row.contact_tracing_needed),
  hospitalizationRequired: toBoolean(row.hospitalization_required)
})

// 7. Core relationships
MERGE (patient)-[:REPORTED]->(case)
MERGE (case)-[:DIAGNOSED_WITH]->(disease)
MERGE (case)-[:HAS_SEVERITY]->(severity)
MERGE (case)-[:OCCURRED_IN]->(case_region)
MERGE (case)-[:AFFECTED_BY]->(intervention)

// 8. Demographics on the case
MERGE (patient)-[:BELONGS_TO_AGE_GROUP]->(ageGroupNode)
MERGE (patient)-[:HAS_GENDER]->(genderNode)

// 9. Time relationships
MERGE (case)-[:OCCURRED_ON]->(day)
MERGE (day)-[:OF_MONTH]->(month)
MERGE (month)-[:OF_YEAR]->(year)
MERGE (day)-[:IS_WEEKDAY]->(weekday)
MERGE (case)-[:OCCURRED_AT_HOUR]->(hour)
MERGE (case)-[:REPORTED_IN_YEAR]->(year)

// 10. Symptoms (assumes row.symptoms_text is a list of symptom strings)
FOREACH (symptom_name IN row.symptoms_text |
  MERGE (symptom:Symptom {name: symptom_name})
  MERGE (case)-[:PRESENTED_SYMPTOM]->(symptom)
  MERGE (symptom)-[:COMMON_MANIFESTATION_OF]->(disease)
)

// 11. Intervention relationships
MERGE (intervention)-[:TARGETS_DISEASE]->(target_disease)
MERGE (intervention)-[:APPLIED_IN]->(intervention_region)

// 12. Inferred & conceptual links
MERGE (disease)-[:IS_A_TYPE_OF]->(category)
MERGE (patient)-[:LIVES_IN]->(case_region)
MERGE (patient)-[:HAS_HISTORY_OF]->(disease)
MERGE (disease)-[:PREVALENT_IN]->(case_region)

// 13. Outbreak linkage (only if flag = true)
FOREACH (_ IN CASE WHEN toBoolean(row.is_outbreak_related) THEN [1] ELSE [] END |
  MERGE (outbreak:Outbreak {
    disease: row.actual_disease,
    region : row.region
  })
    ON CREATE SET
      outbreak.startDate = date(row.timestamp)
  MERGE (case)-[:PART_OF_OUTBREAK]->(outbreak)
)

// 14. Conditional contact tracing
FOREACH (_ IN CASE WHEN toBoolean(row.contact_tracing_needed) THEN [1] ELSE [] END |
  MERGE (ctService:ContactTracingService {name: "ContactTracing"})
  MERGE (case)-[:REQUIRES_CONTACT_TRACING]->(ctService)
)

// 15. Conditional hospitalization
FOREACH (_ IN CASE WHEN toBoolean(row.hospitalization_required) THEN [1] ELSE [] END |
  MERGE (hospService:HospitalizationService {name: "Hospitalization"})
  MERGE (case)-[:REQUIRES_HOSPITALIZATION]->(hospService)
)
"""

In [71]:
def ingest_data_in_batches(driver, query, data, batch_size=500):
    total = len(data)
    print(f"Starting ingestion of {total} records with the hyper-robust schema...")
    with driver.session(database="neo4j") as session:
        for i in range(0, total, batch_size):
            batch = data[i:min(i + batch_size, total)]
            session.run(query, records=batch)
            print(f"Processed {len(batch)} records. ({i + len(batch)}/{total})")

In [72]:
ingest_data_in_batches(driver, ingestion_query, records, batch_size=500)
print("\nIngestion complete. Your graph is now ready for advanced querying.")
driver.close()

Starting ingestion of 10000 records with the hyper-robust schema...
Processed 500 records. (500/10000)
Processed 500 records. (1000/10000)
Processed 500 records. (1500/10000)
Processed 500 records. (2000/10000)
Processed 500 records. (2500/10000)
Processed 500 records. (3000/10000)
Processed 500 records. (3500/10000)
Processed 500 records. (4000/10000)
Processed 500 records. (4500/10000)
Processed 500 records. (5000/10000)
Processed 500 records. (5500/10000)
Processed 500 records. (6000/10000)
Processed 500 records. (6500/10000)
Processed 500 records. (7000/10000)
Processed 500 records. (7500/10000)
Processed 500 records. (8000/10000)
Processed 500 records. (8500/10000)
Processed 500 records. (9000/10000)
Processed 500 records. (9500/10000)
Processed 500 records. (10000/10000)

Ingestion complete. Your graph is now ready for advanced querying.


# Cypher Query Chain prompting

In [73]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import GraphCypherQAChain
from langchain_community.graphs import Neo4jGraph
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate

  from .autonotebook import tqdm as notebook_tqdm


In [74]:
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [75]:
graph = Neo4jGraph(
    url=os.getenv("NEO4J_URI"), 
    username="neo4j", 
    password=os.getenv("NEO4J_PASSWORD")
)

  graph = Neo4jGraph(


In [76]:
graph.refresh_schema()
print(graph.schema)

Node properties:
Disease {name: STRING}
Region {name: STRING}
Intervention {id: STRING, type: STRING, cost: INTEGER, startDate: DATE, durationDays: INTEGER, effectivenessScore: FLOAT, populationAffected: INTEGER, complianceRate: FLOAT}
Symptom {name: STRING}
Year {year: INTEGER}
Month {month: INTEGER}
Day {day: INTEGER}
DayOfWeek {name: STRING}
Hour {hour: INTEGER}
Case {timestamp: DATE_TIME, caseId: STRING, contactTracingNeeded: BOOLEAN, location: POINT, isOutbreakRelated: BOOLEAN, hospitalizationRequired: BOOLEAN}
Patient {gender: STRING, age: INTEGER, patientId: STRING, ageGroup: STRING}
SeverityLevel {level: STRING}
DiseaseCategory {name: STRING}
AgeGroup {name: STRING}
Gender {value: STRING}
Outbreak {startDate: DATE, disease: STRING, region: STRING}
HospitalizationService {name: STRING}
ContactTracingService {name: STRING}
Relationship properties:

The relationships:
(:Disease)-[:IS_A_TYPE_OF]->(:DiseaseCategory)
(:Disease)-[:PREVALENT_IN]->(:Region)
(:Intervention)-[:APPLIED_IN]

In [170]:
llm = ChatGroq(
    groq_api_key=os.environ.get('GROQ_API'),
    model_name='llama-3.3-70b-versatile',
    max_tokens=2048,
)

In [101]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA

llm = ChatNVIDIA(
    model_name="nvidia/llama-3.1-405b-instruct",
    api_key=os.getenv("NVIDIA_API_KEY"),
)

In [171]:
CYPHER_GENERATION_TEMPLATE_XML = """<cypher_generation_prompt>
    <instructions>
        <title>Instructions for Neo4j Cypher Query Generation</title>
        <rule>You are a world-class Neo4j Cypher query translator. Your sole purpose is to convert a user's question into a valid and efficient Cypher query based on the provided graph schema.</rule>
        <rule>Strictly adhere to the schema. Never use node labels, relationship types, or property names that are not explicitly defined in the `<schema>` block.</rule>
        <rule>Use underscores in multi-word names like 'coastal_region'.</rule>
        <rule>Your output MUST be a single, valid Cypher query and nothing else. No explanations, no markdown, no assumptions.</rule>
        <rule>Do not guess. If the query cannot be generated without assuming missing information, return an empty query.</rule>
        <rule>You are allowed to match symptoms to cases and infer probable diseases based on frequency counts of co-occurrence.</rule>
        <rule>Use aggregation such as count(*) and ORDER BY when needed to find most common patterns.</rule>
        <rule>Whenever asked for age average you need to return an integer.</rule>
        <rule>Avoid nested aggregates (e.g. `avg(avg(...))`).</rule>
        <rule>Do NOT use `collect(...)` followed by `avg(...)` on the collected list—you must aggregate directly on the numeric property in a single pass.</rule>
        <rule>If the question asks for 'most effective', include avg(i.effectivenessScore) and sort by it descending.</rule>
        <rule>Pay careful attention to relationship directions in the schema.</rule>
        <rule>Whenever asked for symptoms, you should only return the symptom names, not the full text descriptions and give common symptoms everytime, do not give all symptoms of every disease as it would be very long for context.</rule>
        <rule>Fix the token limit to be inbetween 5000 and 9000 tokens, so that the model can handle complex queries without truncation.</rule>
        <rule>When asked for symptoms, only make query for most common symptoms, not all symptoms.</rule>
        <rule>When asked for intervention name you should return the intervention type, not the intervention id.</rule>
        <rule>When using ORDER BY on an aggregate like count(), avg(), etc., ensure the aggregate is aliased in the RETURN or WITH clause before ordering—e.g., `RETURN x AS y, count(c) AS frequency ORDER BY frequency DESC`.</rule>
    </instructions>

    <schema>
        {schema}
    </schema>
    
    <few_shot_examples>
        <example>
            <question>Which intervention type was most effective for reducing severe malaria cases in the coastal region during the last two years, and what was the average age of patients who benefited from these interventions?</question>
            <cypher>
MATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease {{name: 'malaria'}}),
      (c)-[:OCCURRED_IN]->(r:Region {{name: 'coastal_region'}}),
      (c)-[:AFFECTED_BY]->(i:Intervention),
      (p:Patient)-[:REPORTED]->(c),
      (c)-[:HAS_SEVERITY]->(s:SeverityLevel {{level: 'severe'}}),
      (c)-[:REPORTED_IN_YEAR]->(y:Year)
WHERE y.year >= date().year - 2
RETURN i.type AS intervention_type, 
       toInteger(avg(p.age)) AS average_patient_age,
       avg(i.effectivenessScore) AS avg_effectiveness
ORDER BY avg_effectiveness DESC
LIMIT 1
            </cypher>
        </example>

        <example>
            <question>What is the total number of malaria cases reported in the coastal region last year?</question>
            <cypher>
MATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease {{name: 'malaria'}}),
      (c)-[:OCCURRED_IN]->(r:Region {{name: 'coastal_region'}}),
      (c)-[:REPORTED_IN_YEAR]->(y:Year)
WHERE y.year = date().year - 1
RETURN count(c) AS total_cases
            </cypher>
        </example>

        <example>
            <question>Which disease has the highest average age of patients in urban areas?</question>
            <cypher>
MATCH (p:Patient)-[:REPORTED]->(c:Case)-[:DIAGNOSED_WITH]->(d:Disease),
      (c)-[:OCCURRED_IN]->(r:Region {{name: 'urban_area'}})
RETURN d.name AS disease_name, 
       toInteger(avg(p.age)) AS average_age
ORDER BY average_age DESC
LIMIT 1
            </cypher>
        </example>

        <example>
            <question>What are the most common symptoms for dengue fever cases?</question>
            <cypher>
MATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease {{name: 'dengue_fever'}}),
      (c)-[:PRESENTED_SYMPTOM]->(s:Symptom)
RETURN s.name AS symptom, count(c) AS frequency
ORDER BY frequency DESC
LIMIT 5
            </cypher>
        </example>

        <example>
        <question>Which intervention type had the highest effectiveness score in reducing severe cases across different disease categories, and what was the average patient age and most common symptoms for those cases in the last two years?</question>
        <cypher>
        MATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease),
            (c)-[:AFFECTED_BY]->(i:Intervention),
            (p:Patient)-[:REPORTED]->(c),
            (c)-[:PRESENTED_SYMPTOM]->(s:Symptom),
            (c)-[:HAS_SEVERITY]->(sev:SeverityLevel {{level: 'severe'}}),
            (d)-[:IS_A_TYPE_OF]->(dc:DiseaseCategory),
            (c)-[:REPORTED_IN_YEAR]->(y:Year)
        WHERE y.year >= date().year - 2
        WITH i.type AS intervention_type, 
            avg(i.effectivenessScore) AS avg_effectiveness,
            collect(p.age) AS patient_ages,
            collect(s.name) AS symptoms,
            count(c) AS case_count
        RETURN intervention_type, 
            toInteger(avg([age IN patient_ages | age])) AS average_patient_age,
            avg_effectiveness,
            [symptom IN symptoms | symptom][0..3] AS common_symptoms,
            case_count
        ORDER BY avg_effectiveness DESC
        LIMIT 1
        </cypher>
        </example>

        <example>
            <question>Which region had the most severe tuberculosis cases in the last 6 months?</question>
            <cypher>
MATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease {{name: 'tuberculosis'}}),
      (c)-[:OCCURRED_IN]->(r:Region),
      (c)-[:HAS_SEVERITY]->(s:SeverityLevel {{level: 'severe'}})
WHERE c.timestamp >= datetime() - duration({{months: 6}})
RETURN r.name AS region_name, count(c) AS severe_cases
ORDER BY severe_cases DESC
LIMIT 1
            </cypher>
        </example>

        <example>
            <question>What is the average effectiveness score of interventions targeting malaria?</question>
            <cypher>
MATCH (i:Intervention)-[:TARGETS_DISEASE]->(d:Disease {{name: 'malaria'}})
RETURN avg(i.effectivenessScore) AS average_effectiveness
            </cypher>
        </example>

        <example>
            <question>Which gender has more cases of severe diseases in the mountain region?</question>
            <cypher>
MATCH (p:Patient)-[:REPORTED]->(c:Case),
      (p)-[:HAS_GENDER]->(g:Gender),
      (c)-[:OCCURRED_IN]->(r:Region {{name: 'mountain_region'}}),
      (c)-[:HAS_SEVERITY]->(s:SeverityLevel {{level: 'severe'}})
RETURN g.value AS gender, count(c) AS case_count
ORDER BY case_count DESC
LIMIT 1
            </cypher>
        </example>

        <example>
            <question>Find patients who have a history of malaria and currently live in the coastal region</question>
            <cypher>
MATCH (p:Patient)-[:HAS_HISTORY_OF]->(d:Disease {{name: 'malaria'}}),
      (p)-[:LIVES_IN]->(r:Region {{name: 'coastal_region'}})
RETURN p.patientId, p.age, p.gender
            </cypher>
        </example>
    </few_shot_examples>
    
    <important_notes>
        <note>Symptom names in the database are long text strings like "Symptoms include fever, cough and chills".</note>
        <note>To match individual symptoms, use `CONTAINS` or `=~` instead of `s.name IN [...]`.</note>
        <note>Remember relationship directions: Patient-[:REPORTED]->Case, Case-[:DIAGNOSED_WITH]->Disease</note>
        <note>Use date() functions for current date comparisons</note>
        <note>Use datetime() for timestamp comparisons with duration</note>
        <note>Always use toInteger() when returning average ages</note>
    </important_notes>

    <common_patterns>
        <pattern name="Most Effective Intervention">
            <description>When asking for most effective intervention, always include effectiveness score in ordering</description>
            <template>
MATCH (relevant patterns)
RETURN intervention_details, avg(i.effectivenessScore) AS effectiveness
ORDER BY effectiveness DESC
LIMIT 1
            </template>
        </pattern>

        <pattern name="Age Aggregation">
            <description>Direct aggregation on patient age property</description>
            <template>
MATCH (p:Patient)-[:REPORTED]->(c:Case)
WHERE (conditions)
RETURN toInteger(avg(p.age)) AS average_age
            </template>
        </pattern>

        <pattern name="Time-based Filtering">
            <description>Filtering by years or recent time periods</description>
            <template>
MATCH (c:Case)-[:REPORTED_IN_YEAR]->(y:Year)
WHERE y.year >= date().year - 2
            </template>
        </pattern>

        <pattern name="Disease-Region-Severity">
            <description>Common pattern for disease analysis by region and severity</description>
            <template>
MATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease {{name: 'disease_name'}}),
      (c)-[:OCCURRED_IN]->(r:Region {{name: 'region_name'}}),
      (c)-[:HAS_SEVERITY]->(s:SeverityLevel {{level: 'severity_level'}})
            </template>
        </pattern>
    </common_patterns>

    <task>
        <title>Current Task</title>
        <question>{question}</question>
        <cypher>
        </cypher>
    </task>
</cypher_generation_prompt>"""

In [172]:
cypher_prompt = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE_XML
)

In [173]:
chain = GraphCypherQAChain.from_llm(
    graph=graph,
    llm=llm,
    verbose=True, 
    cypher_prompt=cypher_prompt,
    allow_dangerous_requests=True
)

In [174]:
def ask_question(query: str):
    """
    Invokes the QA chain and prints the result in a user-friendly format.
    """
    try:
        result = chain.invoke({"query": query})
        print(f"❓ Question: {query}")
        # The final answer is in the 'result' key
        print(f"🤖 Answer: {result['result']}")
    except Exception as e:
        print(f"An error occurred: {e}")
    print("-" * 50)

In [159]:
print("Running corrected queries...\n")

# Test 1: The COVID-19 query 
ask_question("how many cases were reported for covid19 and tell which year and month it was reported in?")

# Test 2: The Intervention query 
ask_question("What was the effectiveness score for interventions targeting malaria?")

# Test 3: Other queries
ask_question("Which disease was most common in the coastal region in May?")
ask_question("What is the average age of male patients diagnosed with malaria?")

Running corrected queries...



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease {name: 'covid19'}),
      (c)-[:REPORTED_IN_YEAR]->(y:Year),
      (c)-[:OCCURRED_ON]->(day:Day)-[:OF_MONTH]->(m:Month)
RETURN count(c) AS total_cases, 
       y.year AS year, 
       m.month AS month[0m
Full Context:
[32;1m[1;3m[{'total_cases': 148, 'year': 2024, 'month': 12}, {'total_cases': 160, 'year': 2022, 'month': 12}, {'total_cases': 153, 'year': 2023, 'month': 12}, {'total_cases': 63, 'year': 2025, 'month': 12}, {'total_cases': 162, 'year': 2020, 'month': 12}, {'total_cases': 885, 'year': 2021, 'month': 12}, {'total_cases': 148, 'year': 2024, 'month': 5}, {'total_cases': 160, 'year': 2022, 'month': 5}, {'total_cases': 153, 'year': 2023, 'month': 5}, {'total_cases': 63, 'year': 2025, 'month': 5}][0m

[1m> Finished chain.[0m
❓ Question: how many cases were reported for covid19 and tell which year and month it was rep

In [160]:
ask_question("What is the most common symptom for patients diagnosed with pneumonia?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease {name: 'pneumonia'}), 
      (c)-[:PRESENTED_SYMPTOM]->(s:Symptom)
RETURN s.name AS symptom, count(c) AS frequency
ORDER BY frequency DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'symptom': 'Clinical presentation: shivering, shortness of breath and cough with phlegm', 'frequency': 2}][0m

[1m> Finished chain.[0m
❓ Question: What is the most common symptom for patients diagnosed with pneumonia?
🤖 Answer: The most common symptoms for patients diagnosed with pneumonia include shivering, shortness of breath, and cough with phlegm.
--------------------------------------------------


In [162]:
ask_question("What was the most common disease reported in the year 2023?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease), 
      (c)-[:REPORTED_IN_YEAR]->(y:Year)
WHERE y.year = 2023
RETURN d.name AS disease_name, count(c) AS case_count
ORDER BY case_count DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'disease_name': 'gastroenteritis', 'case_count': 1144}][0m

[1m> Finished chain.[0m
❓ Question: What was the most common disease reported in the year 2023?
🤖 Answer: The most common disease reported was gastroenteritis, with 1144 cases.
--------------------------------------------------


In [163]:
ask_question("What is the most common symptom for patients diagnosed with pneumonia in the last year?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease {name: 'pneumonia'}),
      (c)-[:PRESENTED_SYMPTOM]->(s:Symptom),
      (c)-[:REPORTED_IN_YEAR]->(y:Year)
WHERE y.year >= date().year - 1
RETURN s.name AS symptom, count(c) AS frequency
ORDER BY frequency DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'symptom': 'Symptoms include feeling hot, productive cough, chills and breathlessness', 'frequency': 1}][0m

[1m> Finished chain.[0m
❓ Question: What is the most common symptom for patients diagnosed with pneumonia in the last year?
🤖 Answer: The most common symptoms include feeling hot, productive cough, chills, and breathlessness.
--------------------------------------------------


In [164]:
ask_question("What is the most common disease reported in the coastal region in the last year?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease), 
      (c)-[:OCCURRED_IN]->(r:Region {name: 'coastal_region'}),
      (c)-[:REPORTED_IN_YEAR]->(y:Year)
WHERE y.year = date().year - 1
RETURN d.name AS disease_name, count(c) AS case_count
ORDER BY case_count DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'disease_name': 'malaria', 'case_count': 292}][0m

[1m> Finished chain.[0m
❓ Question: What is the most common disease reported in the coastal region in the last year?
🤖 Answer: The most common disease reported in the coastal region in the last year is malaria, with 292 reported cases.
--------------------------------------------------


In [27]:
ask_question("How effective are the interventions targeting malaria in the coastal region?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (i:Intervention)-[:TARGETS_DISEASE]->(d:Disease {name: 'malaria'}),
      (i)-[:APPLIED_IN]->(r:Region {name: 'coastal_region'})
RETURN avg(i.effectivenessScore) AS average_effectiveness[0m
Full Context:
[32;1m[1;3m[{'average_effectiveness': 0.5778607618678456}][0m

[1m> Finished chain.[0m
❓ Question: How effective are the interventions targeting malaria in the coastal region?
🤖 Answer: The average effectiveness of the interventions targeting malaria in the coastal region is 0.5778607618678456.
--------------------------------------------------


In [28]:
ask_question("What interventions are most effective in year 2023?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Case)-[:AFFECTED_BY]->(i:Intervention),
      (c)-[:REPORTED_IN_YEAR]->(y:Year {year: 2023})
RETURN i.type AS intervention_type, avg(i.effectivenessScore) AS avg_effectiveness
ORDER BY avg_effectiveness DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'intervention_type': 'school_closure', 'avg_effectiveness': 0.5954077505758271}][0m

[1m> Finished chain.[0m
❓ Question: What interventions are most effective in year 2023?
🤖 Answer: School closure has an average effectiveness of 0.5954077505758271.
--------------------------------------------------


In [110]:
ask_question("What is the overall best intervention type for covid19 till now?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (d:Disease {name: 'covid19'})<-[:TARGETS_DISEASE]-(i:Intervention)
RETURN i.type, avg(i.effectivenessScore) AS avgEffectiveness
ORDER BY avgEffectiveness DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'i.type': 'school_closure', 'avgEffectiveness': 0.7275812461491508}][0m

[1m> Finished chain.[0m
❓ Question: What is the overall best intervention type for covid19 till now?
🤖 Answer: The overall best intervention type for COVID-19 is school closure, with an average effectiveness of 0.7275812461491508.
--------------------------------------------------


In [30]:
ask_question("which intervention type was most effective in 2025?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (i:Intervention)-[:TARGETS_DISEASE]->(d:Disease),
      (c:Case)-[:AFFECTED_BY]->(i),
      (c)-[:REPORTED_IN_YEAR]->(y:Year)
WHERE y.year = 2025
RETURN i.type AS intervention_type, avg(i.effectivenessScore) AS avg_effectiveness
ORDER BY avg_effectiveness DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'intervention_type': 'school_closure', 'avg_effectiveness': 0.6592401521834965}][0m

[1m> Finished chain.[0m
❓ Question: which intervention type was most effective in 2025?
🤖 Answer: I'm not aware of the data from 2025, but according to the available information, the most effective intervention type had an average effectiveness of 0.6592401521834965, which was achieved by school closure.
--------------------------------------------------


In [31]:
ask_question("what is the most common symptoms for patients diagnosed with malaria?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease {name: 'malaria'}),
      (c)-[:PRESENTED_SYMPTOM]->(s:Symptom)
RETURN s.name AS symptom, count(c) AS frequency
ORDER BY frequency DESC
LIMIT 5[0m
Full Context:
[32;1m[1;3m[{'symptom': 'Clinical presentation: queasiness, migraine and sweats', 'frequency': 268}, {'symptom': 'Symptoms include feeling hot, chills and muscle pain', 'frequency': 262}, {'symptom': 'Patient presents with nausea and body aches', 'frequency': 246}, {'symptom': 'Patient presents with fever and body aches', 'frequency': 244}, {'symptom': 'Patient presents with sweats and chills', 'frequency': 7}][0m

[1m> Finished chain.[0m
❓ Question: what is the most common symptoms for patients diagnosed with malaria?
🤖 Answer: The most common symptoms for patients diagnosed with malaria include queasiness, migraine, and sweats, as reported by 268 patients, followed closely by feeling hot, chil

In [119]:
ask_question("If I have a patient with covid19, what interventions should I consider based on past data?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease {name: 'covid19'}),
      (c)-[:AFFECTED_BY]->(i:Intervention)
RETURN DISTINCT i.type AS intervention_type, 
       avg(i.effectivenessScore) AS avg_effectiveness
ORDER BY avg_effectiveness DESC[0m
Full Context:
[32;1m[1;3m[{'intervention_type': 'quarantine', 'avg_effectiveness': 0.752942636863599}, {'intervention_type': 'school_closure', 'avg_effectiveness': 0.7275812461491508}, {'intervention_type': 'vaccination_campaign', 'avg_effectiveness': 0.5690270690642407}, {'intervention_type': 'sanitization_drive', 'avg_effectiveness': 0.4717751585662345}, {'intervention_type': 'contact_tracing', 'avg_effectiveness': 0.3656554890507603}, {'intervention_type': 'social_distancing', 'avg_effectiveness': 0.3376470890380357}, {'intervention_type': 'mask_mandate', 'avg_effectiveness': 0.2957141892664496}, {'intervention_type': 'travel_restriction', 'avg_effectiveness'

In [165]:
ask_question("Patient presents with severe headache, sweats, body aches and chills, what diseases should I be concerned about?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease), 
      (c)-[:PRESENTED_SYMPTOM]->(s:Symptom)
WHERE s.name =~ '(?i).*severe headache.*' OR s.name =~ '(?i).*sweats.*' OR s.name =~ '(?i).*body aches.*' OR s.name =~ '(?i).*chills.*'
RETURN d.name AS disease, count(c) AS frequency
ORDER BY frequency DESC
LIMIT 5[0m
Full Context:
[32;1m[1;3m[{'disease': 'malaria', 'frequency': 1448}, {'disease': 'influenza', 'frequency': 476}, {'disease': 'meningitis', 'frequency': 385}, {'disease': 'pneumonia', 'frequency': 361}][0m

[1m> Finished chain.[0m
❓ Question: Patient presents with severe headache, sweats, body aches and chills, what diseases should I be concerned about?
🤖 Answer: Based on the symptoms you've described, such as severe headache, sweats, body aches, and chills, you should be concerned about influenza, meningitis, and pneumonia, as these diseases can present with similar symptoms. Malaria is also 

In [166]:
ask_question("Which intervention type was most effective for reducing severe malaria cases in the coastal region during the last two years, and what was the average age of patients who benefited from these interventions?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease {name: 'malaria'}),
      (c)-[:OCCURRED_IN]->(r:Region {name: 'coastal_region'}),
      (c)-[:AFFECTED_BY]->(i:Intervention),
      (p:Patient)-[:REPORTED]->(c),
      (c)-[:HAS_SEVERITY]->(s:SeverityLevel {level: 'severe'}),
      (c)-[:REPORTED_IN_YEAR]->(y:Year)
WHERE y.year >= date().year - 2
RETURN i.type AS intervention_type, 
       toInteger(avg(p.age)) AS average_patient_age,
       avg(i.effectivenessScore) AS avg_effectiveness
ORDER BY avg_effectiveness DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'intervention_type': 'public_awareness', 'average_patient_age': 16, 'avg_effectiveness': 0.3481607436976009}][0m

[1m> Finished chain.[0m
❓ Question: Which intervention type was most effective for reducing severe malaria cases in the coastal region during the last two years, and what was the average age of patients who benefited from these in

In [167]:
ask_question("What age group has suffered the most from severe malaria cases in the coastal region in the last 2 years?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease {name: 'malaria'}),
      (c)-[:OCCURRED_IN]->(r:Region {name: 'coastal_region'}),
      (c)-[:HAS_SEVERITY]->(s:SeverityLevel {level: 'severe'}),
      (c)-[:REPORTED_IN_YEAR]->(y:Year),
      (p:Patient)-[:REPORTED]->(c),
      (p)-[:BELONGS_TO_AGE_GROUP]->(ag:AgeGroup)
WHERE y.year >= date().year - 2
RETURN ag.name AS age_group, count(c) AS case_count
ORDER BY case_count DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'age_group': 'child', 'case_count': 2}][0m

[1m> Finished chain.[0m
❓ Question: What age group has suffered the most from severe malaria cases in the coastal region in the last 2 years?
🤖 Answer: Children have suffered the most from severe malaria cases in the given context.
--------------------------------------------------


In [168]:
ask_question("What age group has the highest number of cases for covid19 in the last year if present in data?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease {name: 'covid19'}),
      (p:Patient)-[:REPORTED]->(c),
      (p)-[:BELONGS_TO_AGE_GROUP]->(a:AgeGroup),
      (c)-[:REPORTED_IN_YEAR]->(y:Year)
WHERE y.year >= date().year - 1
RETURN a.name AS age_group, count(c) AS case_count
ORDER BY case_count DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'age_group': 'young_adult', 'case_count': 35}][0m

[1m> Finished chain.[0m
❓ Question: What age group has the highest number of cases for covid19 in the last year if present in data?
🤖 Answer: The young_adult age group has the highest number of cases for covid19, with 35 cases.
--------------------------------------------------


In [187]:
ask_question("Which disease patients in the coastal region are most likely to be hospitalized based on past data?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Patient)-[:REPORTED]->(c:Case)-[:DIAGNOSED_WITH]->(d:Disease),
      (c)-[:OCCURRED_IN]->(r:Region {name: 'coastal_region'}),
      (c)-[:REQUIRES_HOSPITALIZATION]->(:HospitalizationService)
RETURN d.name AS disease_name,
       count(c) AS hospitalization_count,
       (1.0 * count(c)) / count(distinct p) AS hospitalization_rate
ORDER BY hospitalization_rate DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'disease_name': 'meningitis', 'hospitalization_count': 178, 'hospitalization_rate': 1.0}][0m

[1m> Finished chain.[0m
❓ Question: Which disease patients in the coastal region are most likely to be hospitalized based on past data?
🤖 Answer: Patients with meningitis are most likely to be hospitalized based on past data, with a hospitalization count of 178 and a hospitalization rate of 1.0.
--------------------------------------------------


In [175]:
ask_question("From the data we have, can you give summary statistics on the most common diseases, their symptoms, and the effectiveness of interventions plus their respective intervention names?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease),
      (c)-[:PRESENTED_SYMPTOM]->(s:Symptom),
      (c)-[:AFFECTED_BY]->(i:Intervention)
RETURN d.name AS disease, 
       s.name AS symptom, 
       count(c) AS frequency,
       i.type AS intervention_type,
       avg(i.effectivenessScore) AS avg_effectiveness
ORDER BY frequency DESC, avg_effectiveness DESC
[0m
Full Context:
[32;1m[1;3m[{'disease': 'gastroenteritis', 'symptom': 'Symptoms include vomiting and abdominal pain', 'frequency': 280, 'intervention_type': 'mask_mandate', 'avg_effectiveness': 0.24869020543165593}, {'disease': 'gastroenteritis', 'symptom': 'Symptoms include diarrhea, abdominal pain and vomiting', 'frequency': 274, 'intervention_type': 'contact_tracing', 'avg_effectiveness': 0.8596030054002916}, {'disease': 'malaria', 'symptom': 'Clinical presentation: queasiness, migraine and sweats', 'frequency': 268, 'intervention_type': 

In [176]:
ask_question("I have a patient with high fever, fatigue, and joint pain. What diseases should I be most concerned about based on past cases, and what are the most effective interventions for these diseases?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease),
      (c)-[:PRESENTED_SYMPTOM]->(s:Symptom),
      (c)-[:AFFECTED_BY]->(i:Intervention)
WHERE s.name =~ '.*fever.*' OR s.name =~ '.*fatigue.*' OR s.name =~ '.*joint pain.*'
WITH d.name AS disease_name, i.type AS intervention_type, i.effectivenessScore AS effectiveness
RETURN disease_name, intervention_type, avg(effectiveness) AS avg_effectiveness
ORDER BY avg_effectiveness DESC
LIMIT 5
[0m
Full Context:
[32;1m[1;3m[{'disease_name': 'gastroenteritis', 'intervention_type': 'contact_tracing', 'avg_effectiveness': 0.8596030054002916}, {'disease_name': 'influenza', 'intervention_type': 'mask_mandate', 'avg_effectiveness': 0.6950974808743705}, {'disease_name': 'pneumonia', 'intervention_type': 'vaccination_campaign', 'avg_effectiveness': 0.692987640784457}, {'disease_name': 'covid19', 'intervention_type': 'mask_mandate', 'avg_effectiveness': 0.676844991

In [177]:
ask_question("Can you analyze the data for patients who presented with fever and respiratory symptoms in the last 3 months, showing which diseases were most commonly diagnosed and what interventions were most effective?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (c:Case)-[:PRESENTED_SYMPTOM]->(s:Symptom),
      (c)-[:DIAGNOSED_WITH]->(d:Disease),
      (c)-[:AFFECTED_BY]->(i:Intervention)
WHERE s.name =~ '.*fever.*' OR s.name =~ '.*respiratory.*'
      AND c.timestamp >= datetime() - duration({months: 3})
RETURN d.name AS disease, 
       count(c) AS case_count,
       i.type AS intervention_type,
       avg(i.effectivenessScore) AS avg_effectiveness
ORDER BY case_count DESC, avg_effectiveness DESC
[0m
Full Context:
[32;1m[1;3m[{'disease': 'gastroenteritis', 'case_count': 512, 'intervention_type': 'contact_tracing', 'avg_effectiveness': 0.8596030054002916}, {'disease': 'covid19', 'case_count': 319, 'intervention_type': 'mask_mandate', 'avg_effectiveness': 0.72360033404336}, {'disease': 'malaria', 'case_count': 227, 'intervention_type': 'school_closure', 'avg_effectiveness': 0.5567073690816289}, {'disease': 'malaria', 'case_count': 124, 'interven

In [178]:
ask_question("What was the most common disease reported in the coastal region in the last year, and what were the most effective interventions for it?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (c:Case)-[:DIAGNOSED_WITH]->(d:Disease),
      (c)-[:OCCURRED_IN]->(r:Region {name: 'coastal_region'}),
      (c)-[:REPORTED_IN_YEAR]->(y:Year),
      (c)-[:AFFECTED_BY]->(i:Intervention)
WHERE y.year = date().year - 1
WITH d.name AS disease_name, count(c) AS case_count, i.type AS intervention_type, avg(i.effectivenessScore) AS avg_effectiveness
ORDER BY case_count DESC, avg_effectiveness DESC
RETURN disease_name, intervention_type, avg_effectiveness
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'disease_name': 'malaria', 'intervention_type': 'public_awareness', 'avg_effectiveness': 0.3481607436976009}][0m

[1m> Finished chain.[0m
❓ Question: What was the most common disease reported in the coastal region in the last year, and what were the most effective interventions for it?
🤖 Answer: The most common disease reported was malaria. One of the most effective interventions for malaria was publ