# Use this for quickly trying different iterations for embedding n stuff

In [1]:
import json
from pathlib import Path
from classes import IMDBMovie
from llm_generations import generate_vibe_metadata

# Load movies from JSON file (relative to notebook location)
json_path = Path("../saved_imdb_movies.json")
with open(json_path, "r", encoding="utf-8") as f:
    movies_data = json.load(f)

# Convert each dictionary to an IMDBMovie object
movies = [IMDBMovie(**movie_dict) for movie_dict in movies_data]

In [2]:
for i in range(len(movies)):
    movie = movies[i]   
    print(f"{i}: {movie.title}")

0: spirited away
1: hereditary
2: everything everywhere all at once
3: the dark knight
4: klaus
5: the lion king
6: dune: part two
7: the iron giant
8: spider-man: across the spider-verse
9: a complete unknown
10: godzilla minus one
11: the wild robot
12: it's a wonderful life
13: poltergeist
14: the lego movie
15: seven samurai
16: leap year
17: parasite
18: the godfather
19: pulp fiction
20: the shawshank redemption
21: the matrix
22: inception
23: casablanca
24: 2001: a space odyssey
25: citizen kane
26: alien
27: blade runner
28: psycho
29: get out
30: mad max: fury road
31: no country for old men
32: la la land
33: eternal sunshine of the spotless mind
34: amélie
35: pan's labyrinth
36: moonlight
37: the grand budapest hotel
38: city of god
39: whiplash
40: the silence of the lambs
41: princess mononoke
42: inside out
43: the departed
44: toy story
45: crouching tiger, hidden dragon
46: the lives of others
47: the intouchables
48: the shining


In [3]:
movie = movies[1]
movie.title

'hereditary'

In [12]:
movie.genres

['drama', 'horror', 'mystery', 'thriller']

In [4]:
# Call generate_vibe_metadata with values from the movie object
# Convert synopsis from str | None to Optional[list[str]]
synopsis_list = None
if movie.synopsis:
    synopsis_list = [movie.synopsis]
elif movie.debug_synopses:
    synopsis_list = movie.debug_synopses

# Convert ParentalGuideItem objects to dicts for the function
parental_guide_dicts = [item.model_dump() for item in movie.parental_guide_items]

# Call the function with all movie attributes
result = generate_vibe_metadata(
    overview=movie.overview,
    genres=movie.genres,
    overall_keywords=movie.overall_keywords,
    plot_keywords=movie.plot_keywords,
    synopsis=synopsis_list,
    plot_summaries=movie.debug_plot_summaries if movie.debug_plot_summaries else None,
    maturity_rating=movie.maturity_rating,
    maturity_reasoning=movie.maturity_reasoning,
    parental_guide_items=parental_guide_dicts,
    reception_summary=movie.reception_summary
)

result

('vibe',
 VibeMetadata(mood_atmosphere='gritty', tonal_valence='bittersweet', pacing_momentum='slow-burn', kinetic_intensity='low-key', tension_pressure='tense', unpredictability_twistiness='twisty', scariness_level='scary', fear_mode='creepy', humor_level='serious', humor_flavor=None, violence_intensity='violent', gore_body_grossness='gory', romance_prominence='subtle', romance_tone='sweet', sexual_explicitness=None, erotic_charge=None, sexual_tone=None, emotional_heaviness='heavy', emotional_volatility='simmering', weirdness_surrealism='weird', attention_demand='attentive', narrative_complexity='layered', ambiguity_interpretive_ness='suggestive', sense_of_scale='grand'))

In [5]:
result[1]

VibeMetadata(mood_atmosphere='gritty', tonal_valence='bittersweet', pacing_momentum='slow-burn', kinetic_intensity='low-key', tension_pressure='tense', unpredictability_twistiness='twisty', scariness_level='scary', fear_mode='creepy', humor_level='serious', humor_flavor=None, violence_intensity='violent', gore_body_grossness='gory', romance_prominence='subtle', romance_tone='sweet', sexual_explicitness=None, erotic_charge=None, sexual_tone=None, emotional_heaviness='heavy', emotional_volatility='simmering', weirdness_surrealism='weird', attention_demand='attentive', narrative_complexity='layered', ambiguity_interpretive_ness='suggestive', sense_of_scale='grand')

In [6]:
result[1].sexual_explicitness

'none'

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Tuple

def process_single_movie(args: Tuple[int, IMDBMovie]) -> Tuple[int, IMDBMovie, bool, str]:
    """
    Process a single movie to generate vibe metadata.
    
    Args:
        args: Tuple of (index, movie) where index is the movie's position in the original list
        
    Returns:
        Tuple of (index, updated_movie, success, error_message)
        - index: Original position in the list (for maintaining order)
        - updated_movie: Updated IMDBMovie object (or original if generation failed)
        - success: Boolean indicating if generation was successful
        - error_message: Error message if generation failed, empty string otherwise
    """
    i, movie = args
    
    try:
        # Convert synopsis from str | None to Optional[list[str]]
        synopsis_list = None
        if movie.synopsis:
            synopsis_list = [movie.synopsis]
        elif movie.debug_synopses:
            synopsis_list = movie.debug_synopses
        
        # Convert ParentalGuideItem objects to dicts for the function
        parental_guide_dicts = [item.model_dump() for item in movie.parental_guide_items]
        
        # Generate vibe metadata for this movie
        result_type, vibe_metadata = generate_vibe_metadata(
            overview=movie.overview,
            genres=movie.genres,
            overall_keywords=movie.overall_keywords,
            plot_keywords=movie.plot_keywords,
            synopsis=synopsis_list,
            plot_summaries=movie.debug_plot_summaries if movie.debug_plot_summaries else None,
            maturity_rating=movie.maturity_rating,
            maturity_reasoning=movie.maturity_reasoning,
            parental_guide_items=parental_guide_dicts,
            reception_summary=movie.reception_summary
        )
        
        # Check if generation was successful
        if vibe_metadata is not None:
            # Create updated movie with new vibe_metadata
            # Using model_copy to create a new instance with updated fields
            updated_movie = movie.model_copy(update={
                "vibe_metadata": vibe_metadata
            })
            return (i, updated_movie, True, "")
        else:
            # Generation failed but keep original movie
            return (i, movie, False, "Generation returned None")
            
    except Exception as e:
        # Error occurred, keep original movie
        return (i, movie, False, str(e))

# Process all movies in parallel using ThreadPoolExecutor
# Using max_workers=None defaults to min(32, (os.cpu_count() or 1) + 4) threads
# This is good for I/O-bound tasks like API calls
print(f"Processing {len(movies)} movies in parallel...")
updated_movies_dict = {}  # Use dict to maintain order by index
failed_movies = []

# Create list of (index, movie) tuples for processing
movie_args = [(i, movie) for i, movie in enumerate(movies)]

# Process movies in parallel
with ThreadPoolExecutor(max_workers=10) as executor:
    # Submit all tasks
    future_to_movie = {executor.submit(process_single_movie, args): args[1] for args in movie_args}
    
    # Process completed tasks as they finish
    completed = 0
    for future in as_completed(future_to_movie):
        movie = future_to_movie[future]
        completed += 1
        
        try:
            index, updated_movie, success, error_msg = future.result()
            updated_movies_dict[index] = updated_movie
            
            if success:
                print(f"[{completed}/{len(movies)}] ✓ {movie.title}")
            else:
                print(f"[{completed}/{len(movies)}] ✗ {movie.title} - {error_msg}")
                failed_movies.append((index, movie.title, error_msg))
        except Exception as e:
            # Handle unexpected errors in result retrieval
            print(f"[{completed}/{len(movies)}] ✗ {movie.title} - Unexpected error: {str(e)}")
            # Find the index for this movie
            for idx, m in enumerate(movies):
                if m == movie:
                    updated_movies_dict[idx] = movie
                    failed_movies.append((idx, movie.title, f"Unexpected error: {str(e)}"))
                    break

# Reconstruct movies list in original order
updated_movies = [updated_movies_dict[i] for i in range(len(movies))]

print(f"\nCompleted processing {len(movies)} movies")
print(f"Successfully updated: {len(updated_movies) - len(failed_movies)}")
print(f"Failed: {len(failed_movies)}")

if failed_movies:
    print("\nFailed movies:")
    for idx, title, reason in failed_movies:
        print(f"  {idx}: {title} - {reason}")

# Save updated movies back to JSON file
json_path = Path("../saved_imdb_movies.json")
# Convert IMDBMovie objects to dictionaries for JSON serialization
movies_data_updated = [movie.model_dump() for movie in updated_movies]

with open(json_path, "w", encoding="utf-8") as f:
    json.dump(movies_data_updated, f, indent=2, ensure_ascii=False)

print(f"\n✓ Saved {len(updated_movies)} movies to {json_path}")

Processing 49 movies in parallel...
[1/49] ✓ hereditary
[2/49] ✓ spider-man: across the spider-verse
[3/49] ✓ spirited away
[4/49] ✓ the lion king
[5/49] ✓ dune: part two
[6/49] ✓ everything everywhere all at once
[7/49] ✓ klaus
[8/49] ✓ the dark knight
[9/49] ✓ a complete unknown
[10/49] ✓ the iron giant
[11/49] ✓ godzilla minus one
[12/49] ✓ the wild robot
[13/49] ✓ it's a wonderful life
[14/49] ✓ the lego movie
[15/49] ✓ leap year
[16/49] ✓ poltergeist
[17/49] ✓ seven samurai
[18/49] ✓ the godfather
[19/49] ✓ parasite
[20/49] ✓ pulp fiction
[21/49] ✓ the shawshank redemption
[22/49] ✓ casablanca
[23/49] ✓ citizen kane
[24/49] ✓ blade runner
[25/49] ✓ inception
[26/49] ✓ alien
[27/49] ✓ 2001: a space odyssey
[28/49] ✓ get out
[29/49] ✓ the matrix
[30/49] ✓ psycho
[31/49] ✓ no country for old men
[32/49] ✓ mad max: fury road
[33/49] ✓ la la land
[34/49] ✓ moonlight
[35/49] ✓ amélie
[36/49] ✓ eternal sunshine of the spotless mind
[37/49] ✓ pan's labyrinth
[38/49] ✓ city of god
[39/49] 