In [None]:
from neo4j import GraphDatabase
import pandas as pd

# Replace with your Aura connection details
URI = "neo4j+s://104abb61.databases.neo4j.io"
USER = "neo4j"
PASSWORD = "uSV92xXhnyT0B2nLnSTOaqna4FfQxJHHbdRmqdE___A"
CSV_PATH1 = "Mock_Rule_Set_1.csv"
CSV_PATH2 = "Mock_Rule_Set_2.csv"

# Load CSV
df = pd.read_csv(CSV_PATH1)
df2 = pd.read_csv(CSV_PATH2)

# df['Age_Group'] = df['Age_Group'].replace('-', pd.NA).str.strip()
# df['Gender'] = df['Gender'].replace('-', pd.NA).str.strip()

# Connect to Neo4j
driver = GraphDatabase.driver(URI, auth=(USER, PASSWORD))


In [None]:
def create_graph(tx, row):
    # Always create Rule and Diagnosis
    tx.run("""
        MERGE (r:Rule {rule_id: $rule_id})
        MERGE (d:Diagnosis {name: $diagnosis})
        MERGE (r)-[:IMPLIES]->(d)
    """, rule_id=row['Rule_ID'], diagnosis=row['Diagnosis'])

    # Create PatientProfile ONLY IF valid age or gender
    has_valid_age = pd.notna(row['Age_Group']) and row['Age_Group'].strip() not in ["", "-"]
    has_valid_gender = pd.notna(row['Gender']) and row['Gender'].strip() not in ["", "-"]

    if has_valid_age or has_valid_gender:
        props = []
        params = {'rule_id': row['Rule_ID']}
        if has_valid_age:
            props.append('age_group: $age')
            params['age'] = row['Age_Group']
        if has_valid_gender:
            props.append('gender: $gender')
            params['gender'] = row['Gender']

        prop_str = ', '.join(props)

        query = f"""
            MERGE (r:Rule {{rule_id: $rule_id}})
            CREATE (p:PatientProfile {{{prop_str}}})
            MERGE (p)-[:PART_OF]->(r)
        """
        tx.run(query, params)

    # Symptoms
    if pd.notna(row['Symptoms']):
        for symptom in str(row['Symptoms']).split(','):
            symptom = symptom.strip()
            if symptom and symptom not in ["-"]:
                tx.run("""
                    MERGE (s:Symptom {name: $symptom})
                    MERGE (r:Rule {rule_id: $rule_id})
                    MERGE (s)-[:PART_OF]->(r)
                """, symptom=symptom, rule_id=row['Rule_ID'])

    # Lab Tests (new way)
    if pd.notna(row['Lab_Test_Results']):
        for test_result in str(row['Lab_Test_Results']).split(','):
            test_result = test_result.strip()
            if test_result and test_result not in ["-"]:
                # Assuming test_result like: Hemoglobin=Low
                if '=' in test_result:
                    test_name, result_value = test_result.split('=')
                    test_name = test_name.strip()
                    result_value = result_value.strip()
                    tx.run("""
                        MERGE (l:LabTest {name: $test_name})
                        MERGE (r:Rule {rule_id: $rule_id})
                        MERGE (l)-[rel:PART_OF]->(r)
                        SET rel.result = $result_value
                    """, test_name=test_name, result_value=result_value, rule_id=row['Rule_ID'])

def clear_database(tx):
    tx.run("MATCH (n) DETACH DELETE n")


def create_graph_rule2(tx, row):
    # Create Rule and Diagnosis
    tx.run("""
        MERGE (r:Rule {rule_id: $rule_id})
        MERGE (d:Diagnosis {name: $diagnosis})
        MERGE (d)-[:TRIGGERED_BY]->(r)
    """,
    rule_id=row['Rule_ID'],
    diagnosis=row['Diagnosis']
    )

    # Procedures
    if pd.notna(row['Procedures']):
        for proc in str(row['Procedures']).split(','):
            proc = proc.strip()
            if proc and proc not in ["-"]:
                tx.run("""
                    MERGE (p:Procedure {name: $proc})
                    MERGE (r:Rule {rule_id: $rule_id})
                    MERGE (r)-[rel:RECOMMENDS {type: 'Procedure'}]->(p)
                """,
                proc=proc,
                rule_id=row['Rule_ID']
                )

    # Prescriptions
    if pd.notna(row['Prescriptions']):
        for drug in str(row['Prescriptions']).split(','):
            drug = drug.strip()
            if drug and drug not in ["-"]:
                tx.run("""
                    MERGE (m:Prescription {name: $drug})
                    MERGE (r:Rule {rule_id: $rule_id})
                    MERGE (r)-[rel:RECOMMENDS {type: 'Prescription'}]->(m)
                """,
                drug=drug,
                rule_id=row['Rule_ID']
                )


# Run for all rows
with driver.session() as session:
    session.execute_write(clear_database)
    for _, row in df.iterrows():
        session.execute_write(create_graph, row)

    for _, row in df2.iterrows():
        session.execute_write(create_graph_rule2, row)
        
print("✅ Knowledge graph created successfully.")

In [7]:
import pandas as pd
import re
import hashlib
from neo4j import GraphDatabase

# ---------------------- Neo4j Connection ----------------------
URI = "bolt://localhost:7687"
USER = "neo4j"
PASSWORD = "12345678"  # Replace with your Neo4j Desktop password

# ---------------------- Load CSV ----------------------
CSV_PATH = "procedure_rules.csv"
df = pd.read_csv(CSV_PATH, engine='python')  # safer for complex rows

# ---------------------- Connect to Neo4j ----------------------
driver = GraphDatabase.driver(URI, auth=(USER, PASSWORD))

# ---------------------- Utilities ----------------------

def parse_frozenset_string(s):
    """Extract items inside frozenset({...}) safely."""
    if isinstance(s, str):
        match = re.search(r"\{(.*)\}", s)
        if match:
            items = match.group(1).split(',')
            return [item.strip().strip("'\"") for item in items if item.strip()]
    return []

def generate_rule_group_id(diagnosis):
    """Generate a consistent hash ID for a diagnosis-level RuleGroup."""
    return hashlib.sha256(diagnosis.encode()).hexdigest()

def clear_database(tx):
    """Delete all nodes and relationships."""
    tx.run("MATCH (n) DETACH DELETE n")

# ---------------------- Graph Insertion ----------------------

def create_rule_group(tx, rule_group_id, diagnosis, procedure, confidence, lift, support):
    tx.run("""
        MERGE (d:Diagnosis {name: $diagnosis})

        MERGE (g:RuleGroup {rule_group_id: $rule_group_id})
        ON CREATE SET g.name = $rule_group_id

        MERGE (d)-[:TRIGGERS]->(g)

        MERGE (p:Procedure {name: $procedure})
        MERGE (g)-[r:RECOMMENDS]->(p)
        SET r.confidence = toFloat($confidence),
            r.lift = toFloat($lift),
            r.support = toFloat($support)
    """, rule_group_id=rule_group_id, diagnosis=diagnosis, procedure=procedure,
         confidence=confidence, lift=lift, support=support)

# ---------------------- Main Execution ----------------------

with driver.session() as session:
    session.execute_write(clear_database)
    print("🧹 Database cleared.")

    for _, row in df.iterrows():
        try:
            antecedents = parse_frozenset_string(row['antecedents'])
            consequents = parse_frozenset_string(row['consequents'])

            # Only accept rules with 1 diagnosis and 1 procedure
            diagnoses = [a.replace("Diagnosis_", "").strip() for a in antecedents if a.startswith("Diagnosis_")]
            procedures = [c.replace("Procedure_", "").strip() for c in consequents if c.startswith("Procedure_")]

            if len(diagnoses) != 1 or len(procedures) != 1:
                continue

            diagnosis = diagnoses[0]
            procedure = procedures[0]
            rule_group_id = generate_rule_group_id(diagnosis)

            confidence = float(row['confidence'])
            lift = float(row['lift']) if str(row['lift']).lower() != 'inf' else 1e6
            support = float(row['support'])

            session.execute_write(create_rule_group, rule_group_id, diagnosis, procedure, confidence, lift, support)

        except Exception as e:
            print(f"⚠️ Failed to process row:\n{row}\nError: {e}\n")

print("✅ RuleGroup-based knowledge graph created successfully.")


🧹 Database cleared.
✅ RuleGroup-based knowledge graph created successfully.
