In [4]:
import pandas as pd
from neo4j import GraphDatabase

# Adjust these as needed
BOLT_URL = "bolt://neo4j:7687"
NEO4J_USER = "neo4j"
NEO4J_PASS = ""

# Path to your CSV file
CSV_FILE_PATH = "/home/jovyan/work/cinescope/data/csv-files/actor-movie-ids-master.csv"
CHUNKSIZE = 10000  # Number of rows per chunk

driver = GraphDatabase.driver(BOLT_URL, auth=(NEO4J_USER, NEO4J_PASS))

def create_acted_in_relationships(csv_file):
    """
    Reads the CSV in chunks and creates ACTED_IN relationships
    between Actor(id=actor_id) and Movie(id=movie_id).
    """
    with pd.read_csv(csv_file, chunksize=CHUNKSIZE) as reader:
        for df_chunk in reader:
            # Convert columns to int if they're not already
            df_chunk["actor_id"] = df_chunk["actor_id"].astype(int)
            df_chunk["movie_id"] = df_chunk["movie_id"].astype(int)

            # For performance, we'll gather Cypher parameters
            # and run them in batches.
            records = df_chunk.to_dict("records")

            with driver.session() as session:
                session.execute_write(batch_merge_acted_in, records)

def batch_merge_acted_in(tx, records):
    """
    Executes MERGE statements in batches for the given records.
    Each record is a dict with 'actor_id' and 'movie_id'.
    """
    # We'll build a single Cypher query with UNWIND for efficiency.
    query = """
    UNWIND $pairs AS row
    MATCH (a:Actor {id: row.actor_id})
    MATCH (m:Movie {id: row.movie_id})
    MERGE (a)-[:ACTED_IN]->(m)
    """
    tx.run(query, pairs=records)

if __name__ == "__main__":
    create_acted_in_relationships(CSV_FILE_PATH)
    driver.close()
