In [1]:
# Cell 1: Import Libraries
import os
from neo4j import GraphDatabase
from dotenv import load_dotenv
import pandas as pd
from graphdatascience import GraphDataScience

load_dotenv()

True

In [None]:
# Cell 2: Load Environment Variables and Connect

try:
    load_dotenv()
    URI = os.getenv("NEO4J_URI")
    USERNAME = os.getenv("NEO4J_USERNAME")
    PASSWORD = os.getenv("NEO4J_PASSWORD")

    # Establish connection to the database
    driver = GraphDatabase.driver(URI, auth=(USERNAME, PASSWORD))
    driver.verify_connectivity()
    print("✅ Connection to AuraDB Free established successfully!")

    # # Instantiate the GDS client
    gds = GraphDataScience(URI, auth=(USERNAME, PASSWORD), aura_ds=True)
    print(f"✅ Connected to GDS. GDS Version: {gds.version()}")

except Exception as e:
    print(f"❌ Failed to connect to AuraDB or GDS: {e}")
    print("Please ensure your .env file path is correct and the database is running.")
    driver = None
    gds = None

✅ Connection to AuraDB Free established successfully!


In [6]:
if driver:
    with driver.session(database="neo4j") as session:
        print("🧹 Cleaning up any previous data...")
        session.run("MATCH (n) DETACH DELETE n")

        print("1️⃣ Creating constraints for data integrity and performance...")
        session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (m:Movie) REQUIRE m.title IS UNIQUE")
        session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (p:Person) REQUIRE p.name IS UNIQUE")
        print("   Constraints created for Movie titles and Person names.")

        print("\n2️⃣ Loading Movie and Person nodes...")
        movie_load_query = """
        LOAD CSV WITH HEADERS FROM 'https://data.neo4j.com/intro/movies/movies.csv' AS row
        MERGE (m:Movie {title: row.title})
        SET m.released = toInteger(row.released), m.tagline = row.tagline
        """
        session.run(movie_load_query)

        person_load_query = """
        LOAD CSV WITH HEADERS FROM 'https://data.neo4j.com/intro/movies/people.csv' AS row
        MERGE (p:Person {name: row.name})
        SET p.born = toInteger(row.born)
        """
        session.run(person_load_query)
        print("   Nodes loaded successfully.")

        print("\n3️⃣ Loading relationships (ACTED_IN, DIRECTED)...")
        actors_load_query = """
        LOAD CSV WITH HEADERS FROM 'https://data.neo4j.com/intro/movies/actors.csv' AS row
        MATCH (p:Person {name: row.person})
        MATCH (m:Movie {title: row.movie})
        MERGE (p)-[:ACTED_IN {roles: split(row.roles, ';')}]->(m)
        """
        session.run(actors_load_query)

        directors_load_query = """
        LOAD CSV WITH HEADERS FROM 'https://data.neo4j.com/intro/movies/directors.csv' AS row
        MATCH (p:Person {name: row.person})
        MATCH (m:Movie {title: row.movie})
        MERGE (p)-[:DIRECTED]->(m)
        """
        session.run(directors_load_query)
        print("   Relationships loaded successfully.")

        print("\n✅ Data ingestion complete!")
else:
    print("Driver not connected. Cannot ingest data.")

🧹 Cleaning up any previous data...
1️⃣ Creating constraints for data integrity and performance...
   Constraints created for Movie titles and Person names.

2️⃣ Loading Movie and Person nodes...
   Nodes loaded successfully.

3️⃣ Loading relationships (ACTED_IN, DIRECTED)...
   Relationships loaded successfully.

✅ Data ingestion complete!


In [7]:
if driver:
    with driver.session(database="neo4j") as session:
        print("### Query 1: Find Tom Hanks' Co-Actors (Multi-hop Traversal) ###\n")
        query_co_actors = """
        MATCH (tom:Person {name: 'Tom Hanks'})-[:ACTED_IN]->(movie)<-[:ACTED_IN]-(coActor:Person)
        WHERE tom <> coActor
        RETURN coActor.name AS CoActor, count(movie) AS MoviesTogether
        ORDER BY MoviesTogether DESC
        LIMIT 5
        """
        results = session.run(query_co_actors)
        for record in results:
            print(f"- {record['CoActor']} (acted in {record['MoviesTogether']} movies together)")

        print("\n### Query 2: Find the Shortest Path between two actors ###\n")
        query_shortest_path = """
        MATCH p=shortestPath(
          (p1:Person {name: 'Kevin Bacon'})-[*]-(p2:Person {name: 'Meg Ryan'})
        )
        RETURN [node in nodes(p) | node.name] AS Path
        """
        result = session.run(query_shortest_path).single()
        if result:
            print(f"Path: {' -> '.join(result['Path'])}")

        print("\n### Query 3: Using APOC to list available procedures ###\n")
        query_apoc_help = "CALL apoc.help('apoc.path') YIELD name, description RETURN name, description LIMIT 3"
        results = session.run(query_apoc_help)
        for record in results:
            print(f"- {record['name']}: {record['description']}")
else:
    print("Driver not connected. Cannot run queries.")

### Query 1: Find Tom Hanks' Co-Actors (Multi-hop Traversal) ###

- Meg Ryan (acted in 3 movies together)
- Bill Paxton (acted in 2 movies together)
- Rosie O'Donnell (acted in 2 movies together)
- Gary Sinise (acted in 2 movies together)
- Victor Garber (acted in 1 movies together)

### Query 2: Find the Shortest Path between two actors ###



TypeError: sequence item 1: expected str instance, NoneType found

In [10]:
# Create a new graph for the RBAC demo
if driver:
    with driver.session(database="neo4j") as session:
        print("🔄 Setting up a new dataset for RBAC emulation (Financial Transactions)...")
        # Clear existing data
        session.run("MATCH (n) DETACH DELETE n")

        # Create constraints
        session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (c:Customer) REQUIRE c.id IS UNIQUE")
        session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (a:Account) REQUIRE a.id IS UNIQUE")
        session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (b:Branch) REQUIRE b.id IS UNIQUE")

        # Load sample data
        session.run("""
        MERGE (c1:Customer {id:'cust-001', name:'Alice'})
        MERGE (c2:Customer {id:'cust-002', name:'Bob'})
        MERGE (c3:Customer {id:'cust-003', name:'Charlie'})

        MERGE (b1:Branch {id:'branch-A', location:'New York'})
        MERGE (b2:Branch {id:'branch-B', location:'London'})

        MERGE (acct1:Account {id:'acct-101', type:'Checking'})
        MERGE (acct2:Account {id:'acct-102', type:'Savings'})
        MERGE (acct3:Account {id:'acct-201', type:'Checking'})
        MERGE (acct4:Account {id:'acct-301', type:'Investment'})

        MERGE (c1)-[:OWNS]->(acct1)
        MERGE (c1)-[:OWNS]->(acct2)
        MERGE (c2)-[:OWNS]->(acct3)
        MERGE (c3)-[:OWNS]->(acct4)

        MERGE (acct1)-[:TRANSACTED_AT]->(b1)
        MERGE (acct2)-[:TRANSACTED_AT]->(b1)
        MERGE (acct3)-[:TRANSACTED_AT]->(b2)
        MERGE (acct4)-[:TRANSACTED_AT]->(b2)

        MERGE (t1:Transaction {id:'txn-556', amount:100.0, timestamp:datetime()})
        MERGE (t2:Transaction {id:'txn-557', amount:250.0, timestamp:datetime()})
        MERGE (t3:Transaction {id:'txn-558', amount:50.0, timestamp:datetime()})

        MERGE (acct1)-[:PERFORMED]->(t1)
        MERGE (acct3)-[:PERFORMED]->(t2)
        MERGE (acct4)-[:PERFORMED]->(t3)
        """)
        print("✅ Financial dataset created.")
else:
    print("Driver not connected. Cannot set up dataset.")

🔄 Setting up a new dataset for RBAC emulation (Financial Transactions)...
✅ Financial dataset created.


In [11]:
if driver:
    # This parameter would be set based on the logged-in user's role
    current_user_branch = 'branch-A'

    with driver.session(database="neo4j") as session:
        print(f"--- Running query as a Teller for {current_user_branch} ---\n")
        teller_query = """
        MATCH (b:Branch {id: $branch_id})<-[:TRANSACTED_AT]-(a:Account)-[:PERFORMED]->(t:Transaction)
        RETURN t.id AS TransactionID, t.amount AS Amount, a.id AS AccountID, b.location AS Branch
        """
        results = session.run(teller_query, branch_id=current_user_branch)
        for record in results:
            print(dict(record))
else:
    print("Driver not connected.")

--- Running query as a Teller for branch-A ---

{'TransactionID': 'txn-556', 'Amount': 100.0, 'AccountID': 'acct-101', 'Branch': 'New York'}


In [12]:
if driver:
    with driver.session(database="neo4j") as session:
        print("--- Running query as a Fraud Analyst ---\n")
        # Let's add a transfer relationship for this demo
        session.run("MATCH (a1:Account {id:'acct-101'}), (a2:Account {id:'acct-201'}) MERGE (a1)-[:SENT_TO {amount: 75}]->(a2)")

        analyst_query = """
        MATCH (a1:Account)-[s:SENT_TO]->(a2:Account)
        // Note we are NOT returning the Customer.name property
        RETURN a1.id AS FromAccount, a2.id AS ToAccount, s.amount AS Amount
        """
        results = session.run(analyst_query)
        for record in results:
            print(dict(record))
else:
    print("Driver not connected.")

--- Running query as a Fraud Analyst ---

{'FromAccount': 'acct-101', 'ToAccount': 'acct-201', 'Amount': 75}


In [13]:
if driver:
    with driver.session(database="neo4j") as session:
        print("--- Running query as an Auditor ---\n")
        auditor_query = """
        MATCH (c:Customer)-[:OWNS]->(a:Account)-[:TRANSACTED_AT]->(b:Branch)
        RETURN c.name AS CustomerName, a.id AS AccountID, b.location AS BranchLocation
        ORDER BY CustomerName
        """
        results = session.run(auditor_query)
        for record in results:
            print(dict(record))
else:
    print("Driver not connected.")

--- Running query as an Auditor ---

{'CustomerName': 'Alice', 'AccountID': 'acct-101', 'BranchLocation': 'New York'}
{'CustomerName': 'Alice', 'AccountID': 'acct-102', 'BranchLocation': 'New York'}
{'CustomerName': 'Bob', 'AccountID': 'acct-201', 'BranchLocation': 'London'}
{'CustomerName': 'Charlie', 'AccountID': 'acct-301', 'BranchLocation': 'London'}


In [14]:
# Don't forget to close the driver connection when you're done!
if driver:
    driver.close()
    print("\nConnection to AuraDB closed.")


Connection to AuraDB closed.
