In [4]:
!pip install neo4j



In [11]:
#Data Loading

import logging
from neo4j import GraphDatabase

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Neo4j local connection details
NEO4J_URI = "neo4j://127.0.0.1:7687"  # Updated to match your instance
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "Iwin@27100"  # Password for umbc-test database

# Connect to Neo4j
try:
    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
    logger.info("Successfully connected to Neo4j database.")
except Exception as e:
    logger.error(f"Failed to connect to Neo4j: {e}")
    driver = None


INFO:__main__:Successfully connected to Neo4j database.


In [None]:
# Load New Synthetic Dataset into Neo4j
import os
import glob

def clear_database(driver):
    """Clear all existing data from the database"""
    print("🧹 Clearing existing data...")
    with driver.session(database="neo4j") as session:
        # Clear all data
        session.run("MATCH (n) DETACH DELETE n")
        print("✅ Database cleared successfully")

def load_cypher_files(driver, cypher_dir="umbc_data/cypher"):
    """Load all Cypher files in the correct order"""
    
    # Define the correct order for loading files
    file_order = [
        "00_indexes.cypher",
        "01_students.cypher", 
        "02_faculty.cypher",
        "03_terms.cypher",
        "04_courses.cypher", 
        "05_degrees.cypher",
        "06_requirement_groups.cypher",
        "07_course_prerequisites.cypher",
        "08_leads_to.cypher",
        "09_course_similarity.cypher",
        "10_student_degree.cypher",
        "11_teaching.cypher", 
        "12_completed_courses.cypher",
        "13_enrolled_courses.cypher",
        "14_student_similarity.cypher",
        "15_requirement_degree.cypher",
        "16_course_requirement.cypher",
        "17_course_term.cypher"
    ]
    
    print(f"📊 Loading {len(file_order)} Cypher files...")
    
    for i, filename in enumerate(file_order, 1):
        filepath = os.path.join(cypher_dir, filename)
        
        if os.path.exists(filepath):
            print(f"   [{i:2d}/{len(file_order)}] Loading {filename}...")
            
            try:
                with open(filepath, 'r', encoding='utf-8') as f:
                    cypher_content = f.read()
                
                # Split by semicolon and execute each statement
                statements = [stmt.strip() for stmt in cypher_content.split(';') if stmt.strip()]
                
                with driver.session(database="neo4j") as session:
                    for stmt in statements:
                        if stmt:  # Only execute non-empty statements
                            session.run(stmt)
                
                print(f"   ✅ {filename} loaded successfully")
                
            except Exception as e:
                print(f"   ❌ Error loading {filename}: {str(e)}")
                print("   Continuing with next file...")
        else:
            print(f"   ⚠️  File not found: {filename}")

def verify_data_import(driver):
    """Verify that the data was imported correctly"""
    print("🔍 Verifying data import...")
    
    with driver.session(database="neo4j") as session:
        # Count nodes by type
        result = session.run("""
            MATCH (n) 
            RETURN labels(n)[0] as NodeType, count(n) as Count
            ORDER BY Count DESC
        """)
        
        print("\nNode counts after import:")
        print("-" * 30)
        total_nodes = 0
        for record in result:
            node_type = record["NodeType"]
            count = record["Count"]
            total_nodes += count
            print(f"{node_type:15} | {count:4d}")
        
        print(f"\nTotal nodes: {total_nodes}")
        
        # Count relationships
        rel_result = session.run("""
            MATCH ()-[r]->() 
            RETURN type(r) as RelType, count(r) as Count
            ORDER BY Count DESC
        """)
        
        print("\nRelationship counts:")
        print("-" * 30)
        total_rels = 0
        for record in rel_result:
            rel_type = record["RelType"]
            count = record["Count"]
            total_rels += count
            print(f"{rel_type:20} | {count:4d}")
        
        print(f"\nTotal relationships: {total_rels}")

# Execute the data loading process
if driver:
    print("🚀 Starting Neo4j Data Loading Process")
    print("=" * 40)
    
    # Step 1: Clear existing data
    clear_database(driver)
    
    # Step 2: Load new data
    load_cypher_files(driver)
    
    # Step 3: Verify import
    verify_data_import(driver)
    
    print("\n🎉 Data loading completed!")
    print("Your Neo4j database now contains the full synthetic dataset!")
    
else:
    print("❌ No database connection available")


In [12]:
# Check what nodes exist in the database
def check_database_nodes(driver, database="umbc-test"):
    with driver.session(database=database) as session:
        # Get all node labels and counts
        result = session.run("""
            MATCH (n) 
            RETURN labels(n) as NodeType, count(n) as Count
            ORDER BY Count DESC
        """)
        
        print(f"Node Types and Counts in '{database}' database:")
        print("-" * 40)
        for record in result:
            node_type = ', '.join(record["NodeType"])
            count = record["Count"]
            print(f"{node_type}: {count}")
        
        # Get total node count
        total_result = session.run("MATCH (n) RETURN count(n) as total")
        total = total_result.single()["total"]
        print(f"\nTotal nodes: {total}")

# Run the check on neo4j database (where your nodes actually are)
if driver:
    check_database_nodes(driver, "smalldata")
else:
    print("No database connection available")

Node Types and Counts in 'smalldata' database:
----------------------------------------
Course: 50
RequirementGroup: 12
Student: 10
Degree: 4
Faculty: 2
Term: 2

Total nodes: 80
