In [None]:
import pandas as pd
from neo4j import GraphDatabase



#### STEP 1: Import Genres (genre.csv)

# 1) Read the genre.csv
df_genre = pd.read_csv("~/work/cinescope/data/csv-files/genre.csv")

# 2) Create Genre nodes
driver = GraphDatabase.driver("bolt://neo4j:7687", auth=("neo4j",""))
with driver.session() as session:
    for _, row in df_genre.iterrows():
        session.run(
            """
            MERGE (g:Genre {id: $id})
            ON CREATE SET g.name = $name
            """,
            {"id": int(row["id"]), "name": row["name"]}
        )
driver.close()

In [None]:
#### STEP 2: Import Movies (movies-master.csv)


import json

chunksize = 10000
with pd.read_csv("~/work/cinescope/data/csv-files/movies-master.csv", chunksize=chunksize) as reader:
    for df_chunk in reader:
        with driver.session() as session:
            for _, row in df_chunk.iterrows():
                movie_id = int(row["id"])
                title = row["title"] if pd.notnull(row["title"]) else None
                popularity = row["popularity"] if pd.notnull(row["popularity"]) else None
                vote_average = row["vote_average"] if pd.notnull(row["vote_average"]) else None

                # Create/MERGE the Movie node
                session.run(
                    """
                    MERGE (m:Movie {id: $id})
                    ON CREATE SET 
                      m.title = $title,
                      m.popularity = $popularity,
                      m.vote_average = $vote_average
                    """,
                    {
                        "id": movie_id,
                        "title": title,
                        "popularity": popularity,
                        "vote_average": vote_average
                    }
                )

                # Link to Genre nodes
                # 'genres' might be a string like '[{"id":35,"name":"Comedy"}]'
                try:
                    genres_json = json.loads(row["genres"])
                except:
                    genres_json = []

                for g in genres_json:
                    genre_id = g.get("id")
                    if genre_id is not None:
                        session.run(
                            """
                            MATCH (m:Movie {id: $movie_id})
                            MATCH (g:Genre {id: $genre_id})
                            MERGE (m)-[:HAS_GENRE]->(g)
                            """,
                            {"movie_id": movie_id, "genre_id": int(genre_id)}
                        )


In [None]:
#### STEP 3: Import People and Actor–Movie Relationships

import pandas as pd
from neo4j import GraphDatabase

# 1) Load person_ids.csv into memory (dictionary)
df_person = pd.read_csv("~/work/cinescope/data/csv-files/person_ids.csv")  # columns: [adult, id, name, popularity]
person_dict = {}
for _, row in df_person.iterrows():
    person_id = int(row["id"])
    person_dict[person_id] = {
        "name": row["name"],
        "popularity": row["popularity"],
        "adult": row["adult"]
    }

# 2) For actor-movie-ids-master.csv, process in chunks
chunksize = 10000
with pd.read_csv("~/work/cinescope/data/csv-files/actor-movie-ids-master.csv", chunksize=chunksize) as reader:
    for df_chunk in reader:
        with driver.session() as session:
            for _, row in df_chunk.iterrows():
                actor_id = int(row["actor_id"])
                movie_id = int(row["movie_id"])

                # Lookup name, popularity, adult
                person_info = person_dict.get(actor_id, {})
                actor_name = person_info.get("name", f"Unknown {actor_id}")
                actor_pop = person_info.get("popularity", 0.0)
                actor_adult = person_info.get("adult", False)

                # MERGE the Person node
                session.run(
                    """
                    MERGE (p:Person {id: $actor_id})
                    ON CREATE SET 
                      p.name = $name,
                      p.popularity = $pop,
                      p.adult = $adult
                    """,
                    {
                        "actor_id": actor_id,
                        "name": actor_name,
                        "pop": actor_pop,
                        "adult": actor_adult
                    }
                )

                # Create the ACTED_IN relationship
                session.run(
                    """
                    MATCH (p:Person {id: $actor_id})
                    MATCH (m:Movie {id: $movie_id})
                    MERGE (p)-[:ACTED_IN]->(m)
                    """,
                    {"actor_id": actor_id, "movie_id": movie_id}
                )
