In [None]:
%cd ../BackEnd

import json
import pandas as pd
from sqlalchemy.orm import Session
from database import SessionLocal
from models.credit import Credit
from models.person import Person
from models.cast_member import CastMember
from models.crew_member import CrewMember

In [None]:
%cd ../DB

def load_movie_credits_bulk(csv_path: str, batch_size: int = 1000):
    """
      - 'id': the movie TMDB ID
      - 'cast': JSON array cast members
      - 'crew': JSON array crew members
    """

    # local caches
    local_persons_cache = set()   # store person_id to skip duplicates
    local_credits_cache = set()   # store credit_id to skip duplicates

    # accumulate for bulk insert each chunk
    persons_to_insert = []
    credits_to_insert = []
    cast_to_insert = []
    crew_to_insert = []

    chunk_iter = pd.read_csv(csv_path,
                             chunksize=batch_size,
                             sep=",",
                             encoding="utf-8",
                             quotechar='"')
    db = SessionLocal()
    try:
        for chunk_index, chunk_df in enumerate(chunk_iter, start=1):
            print(f"Processing chunk #{chunk_index} with {len(chunk_df)} rows...")

            # reset for each chunk
            persons_to_insert.clear()
            credits_to_insert.clear()
            cast_to_insert.clear()
            crew_to_insert.clear()

            local_persons_cache.clear()
            local_credits_cache.clear()

            for _, row in chunk_df.iterrows():
                content_id = row["id"]  # movie TMDB ID
                cast_data = []
                crew_data = []

                # parse cast JSON
                if "cast" in row and pd.notna(row["cast"]):
                    try:
                        cast_data = json.loads(row["cast"])
                    except Exception:
                        pass

                # parse crew JSON
                if "crew" in row and pd.notna(row["crew"]):
                    try:
                        crew_data = json.loads(row["crew"])
                    except Exception:
                        pass

                # process cast
                for member in cast_data:
                    credit_id = member.get("credit_id")  # "52fe4226c3a36847f8007c27" - nezavisan od content type-a
                    person_id = member.get("id")         # TMDB person ID
                    if not credit_id or not person_id:
                        continue

                    # Add person
                    if person_id not in local_persons_cache:
                        local_persons_cache.add(person_id)
                        persons_to_insert.append({
                            "person_id":    person_id,
                            "name":         member.get("name"),
                            "profile_path": member.get("profile_path"),
                        })

                    # Add credit
                    if credit_id not in local_credits_cache:
                        local_credits_cache.add(credit_id)
                        credits_to_insert.append({
                            "credit_id":   credit_id,
                            "content_id":  content_id,
                            "content_type": "movie"
                        })

                    # Add cast
                    cast_to_insert.append({
                        "credit_id":  credit_id,
                        "person_id":  person_id,
                        "character":  member.get("character"),
                        "order":      member.get("order")
                    })

                # process crew
                for member in crew_data:
                    credit_id = member.get("credit_id") # "52fe4226c3a36847f8007c27" - nezavisan od content type-a
                    person_id = member.get("id") # TMDB person ID
                    if not credit_id or not person_id:
                        continue

                    # Add person
                    if person_id not in local_persons_cache:
                        local_persons_cache.add(person_id)
                        persons_to_insert.append({
                            "person_id":    person_id,
                            "name":         member.get("name"),
                            "profile_path": member.get("profile_path"),
                        })

                    # Add credit
                    if credit_id not in local_credits_cache:
                        local_credits_cache.add(credit_id)
                        credits_to_insert.append({
                            "credit_id":   credit_id,
                            "content_id":  content_id,
                            "content_type": "movie"
                        })

                    # Add crew
                    crew_to_insert.append({
                        "credit_id":  credit_id,
                        "person_id":  person_id,
                        "department": member.get("department"),
                        "job":        member.get("job")
                    })

            # Bulk inserts
            if persons_to_insert:
                print(f"Bulk inserting {len(persons_to_insert)} persons in chunk #{chunk_index}...")
                db.bulk_insert_mappings(Person, persons_to_insert)
                db.commit()
                print("Persons inserted.")

            if credits_to_insert:
                print(f"Bulk inserting {len(credits_to_insert)} credits in chunk #{chunk_index}...")
                db.bulk_insert_mappings(Credit, credits_to_insert)
                db.commit()
                print("Credits inserted.")

            if cast_to_insert:
                print(f"Bulk inserting {len(cast_to_insert)} cast entries in chunk #{chunk_index}...")
                db.bulk_insert_mappings(CastMember, cast_to_insert)
                db.commit()
                print("Cast inserted.")

            if crew_to_insert:
                print(f"Bulk inserting {len(crew_to_insert)} crew entries in chunk #{chunk_index}...")
                db.bulk_insert_mappings(CrewMember, crew_to_insert)
                db.commit()
                print("Crew inserted.")

        print("All movie credits chunks processed successfully!")

    except Exception as e:
        db.rollback()
        print(f"Error: {e}")
    finally:
        db.close()

if __name__ == "__main__":
    load_movie_credits_bulk("movies_credits.csv", batch_size=100000)
    print("Movie credits import complete!")
