# Redoing LLM Generation For Each Vector 1 By 1

In [1]:
from __future__ import annotations

import os
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
from pydantic import BaseModel, Field, conlist, constr, ConfigDict
import sys
from pathlib import Path

# Add parent directory to path to import from implementation package
# Notebooks are in implementation/notebooks/, so we go up two levels to project root
sys.path.insert(0, str(Path().resolve().parent.parent))

from implementation.vectorize import create_plot_analysis_vector_text, create_plot_events_vector_text
from openai import OpenAI
from dotenv import load_dotenv
from typing import List, Optional
from pathlib import Path
from implementation.movie import IMDBMovie
from implementation.schemas import (
    ParentalGuideItem, 
    IMDBFeaturedReview,
    IMDBReviewTheme
)
from implementation.llm_generations import generate_llm_metadata

# Load environment variables (for API key)
load_dotenv()

# Get OpenAI API key from environment and initialize client once at module load
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError(
        "OPENAI_API_KEY environment variable not set. "
        "Please set it before importing this module."
    )

# Initialize OpenAI client - created once when module is loaded
client = OpenAI(api_key=api_key)

In [4]:
# LOAD MOVIES

json_path = Path("../../saved_imdb_movies.json")
with open(json_path, "r", encoding="utf-8") as f:
    movies_data = json.load(f)

# Convert each dictionary to an IMDBMovie object
movies = [IMDBMovie(**movie_dict) for movie_dict in movies_data]

In [None]:
for index, movie in enumerate(movies):
    print(f"{index}: {movie.title}")

In [6]:
movie = movies[3]

generated_metadata = generate_llm_metadata(
    title=movie.title,
    overview=movie.overview,
    plot_summaries=movie.debug_plot_summaries,
    plot_synopses=movie.debug_synopses,
    plot_keywords=movie.plot_keywords,
    featured_reviews=movie.featured_reviews,
    genres=movie.genres,
    overall_keywords=movie.overall_keywords,
    reception_summary=movie.reception_summary,
    audience_reception_attributes=movie.review_themes,
    maturity_rating=movie.maturity_rating,
    maturity_reasoning=movie.maturity_reasoning,
    parental_guide_items=movie.parental_guide_items
)

generated_metadata

Generating llm metadata for frozen
✓ plot_events_metadata for frozen: SUCCESS
✓ watch_context_metadata for frozen: SUCCESS
✓ reception_metadata for frozen: SUCCESS
✓ plot_analysis_metadata for frozen: SUCCESS
✓ viewer_experience_metadata for frozen: SUCCESS
✓ narrative_techniques_metadata for frozen: SUCCESS
✓ production_metadata for frozen: SUCCESS


{'plot_events_metadata': PlotEventsMetadata(plot_summary='Princess Elsa of Arendelle and her younger sister Anna play as children using Elsa\'s secret power to create ice and snow; a stray blast from Elsa accidentally strikes Anna in the head, turning some of her hair white. Their parents take them to the trolls led by Grand Pabbie, who heal Anna and erase her memories of magic but warn that Elsa must learn to control her power. The royal family isolates the girls; Elsa is taught to suppress emotion and wear gloves, growing fearful and withdrawn, while Anna becomes lonely. Years later, at Elsa\'s coronation, Anna meets Prince Hans of the Southern Isles and impulsively becomes engaged to him. When Anna presses Elsa about the sudden engagement, Elsa panics, loses control, and reveals her powers to the crowd. Elsa flees across the frozen fjord; her uncontrolled magic plunges Arendelle into an eternal winter. Anna volunteers to find Elsa and restore summer. She travels north, buying suppli

## Plot Analysis

In [None]:
movie = movies[18]

print(f"Generating plot analysis metadata for {movie.title}")

plot_analysis_metadata = generate_plot_analysis_metadata(
    title=movie.title,
    genres=movie.genres,
    overview=movie.overview,
    plot_synopsis=movie.plot_events_metadata.plot_summary,
    plot_keywords=movie.plot_keywords,
    reception_summary=movie.reception_summary,
    featured_reviews=movie.featured_reviews
)

print(json.dumps(plot_analysis_metadata.model_dump(), indent=4))

In [None]:
print(plot_analysis_metadata)

## Viewer Experience

In [None]:
movie = movies[31]

print(f"Generating viewer experience metadata for {movie.title}")

viewer_experience_metadata = generate_viewer_experience_metadata(
    title=movie.title,
    genres=movie.genres,
    plot_synopsis=movie.plot_events_metadata.plot_summary,
    plot_keywords=movie.plot_keywords,
    overall_keywords=movie.overall_keywords,
    maturity_rating=movie.maturity_rating,
    maturity_reasoning=movie.maturity_reasoning,
    parental_guide_items=movie.parental_guide_items,
    reception_summary=movie.reception_summary,
    audience_reception_attributes=movie.review_themes,
    featured_reviews=movie.featured_reviews
)

print(json.dumps(viewer_experience_metadata.model_dump(), indent=4))

In [None]:
print(viewer_experience_metadata)

## Watch Context

In [None]:
movie = movies[31]

print(f"Generating watch context metadata for {movie.title}")

watch_context_metadata = generate_watch_context_metadata(
    title=movie.title,
    genres=movie.genres,
    overview=movie.plot_events_metadata.plot_summary,
    plot_keywords=movie.plot_keywords,
    overall_keywords=movie.overall_keywords,
    reception_summary=movie.reception_summary,
    featured_reviews=movie.featured_reviews
)

print(json.dumps(watch_context_metadata.model_dump(), indent=4))

In [None]:
print(watch_context_metadata)

## Narrative Techniques

In [None]:
movie = movies[10] # 3, 10

print(f"Generating narrative techniques metadata for {movie.title}")

narrative_techniques_metadata = generate_narrative_techniques_metadata(
    title=movie.title,
    plot_synopsis=movie.plot_events_metadata.plot_summary,
    plot_keywords=movie.plot_keywords,
    overall_keywords=movie.overall_keywords,
    reception_summary=movie.reception_summary,
    featured_reviews=movie.featured_reviews
)

print(json.dumps(narrative_techniques_metadata.model_dump(), indent=4))

In [None]:
print(narrative_techniques_metadata)

## Production

In [None]:
movie = movies[15] # 3, 10

print(f"Generating production metadata for {movie.title}")

keywords = movie.plot_keywords + movie.overall_keywords

print(f"Keywords: {",".join(keywords)}")

production_metadata = generate_production_metadata(
    title=movie.title,
    plot_synopsis=movie.plot_events_metadata.plot_summary,
    plot_keywords=movie.plot_keywords,
    overall_keywords=movie.overall_keywords,
    featured_reviews=movie.featured_reviews
)

print(json.dumps(production_metadata.model_dump(), indent=4))

## Reception

In [None]:
movie = movies[45] # 3, 6, 31, 44, 45

print(f"Generating reception metadata for {movie.title}")

reception_metadata = generate_reception_metadata(
    title=movie.title,
    reception_summary=movie.reception_summary,
    audience_reception_attributes=movie.review_themes,
    featured_reviews=movie.featured_reviews
)

print(json.dumps(reception_metadata.model_dump(), indent=4))

## ADDING TO MOVIES

In [8]:
import concurrent.futures

def process_movie(movie):
    try:
        llm_metadata = generate_llm_metadata(
            title=movie.title,
            overview=movie.overview,
            plot_summaries=movie.debug_plot_summaries,
            plot_synopses=movie.debug_synopses,
            plot_keywords=movie.plot_keywords,
            featured_reviews=movie.featured_reviews,
            genres=movie.genres,
            overall_keywords=movie.overall_keywords,
            reception_summary=movie.reception_summary,
            audience_reception_attributes=movie.review_themes,
            maturity_rating=movie.maturity_rating,
            maturity_reasoning=movie.maturity_reasoning,
            parental_guide_items=movie.parental_guide_items
        )

        for key, value in llm_metadata.items():
            if value is None:
                raise RuntimeError(f"generate_llm_metadata returned None for key: {key}")

        movie.plot_events_metadata = llm_metadata["plot_events_metadata"]
        movie.plot_analysis_metadata = llm_metadata["plot_analysis_metadata"]
        movie.viewer_experience_metadata = llm_metadata["viewer_experience_metadata"]
        movie.watch_context_metadata = llm_metadata["watch_context_metadata"]
        movie.narrative_techniques_metadata = llm_metadata["narrative_techniques_metadata"]
        movie.production_metadata = llm_metadata["production_metadata"]
        movie.reception_metadata = llm_metadata["reception_metadata"]

        return (movie, llm_metadata)
    except Exception as e:
        print(f"Error processing movie {movie.id}: {e}")
        return None, None

fixed_movies = []
failed_movies = []
# Variable to hold the last scraped reviews, preserving behavior for subsequent cells
scraped_reviews = None

# Use ThreadPoolExecutor to fetch in parallel
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    # Map returns results in the order of the input iterable
    results = executor.map(process_movie, movies)
    
    for movie, reviews in results:
        if movie:
            fixed_movies.append(movie)
        else:
            failed_movies.append(movie)

if fixed_movies:
    with open("../../saved_imdb_movies.json", "w", encoding="utf-8") as f:
        json.dump([movie.model_dump() for movie in fixed_movies], f, indent=2, ensure_ascii=False)
if failed_movies:
    with open("../../failed_fetches.json", "w", encoding="utf-8") as f:
        json.dump([movie.model_dump() for movie in failed_movies], f, indent=2, ensure_ascii=False)

print(f"Successfully updated {len(fixed_movies)} movies with featured reviews")
print(f"Failed to update {len(failed_movies)} movies")


Generating llm metadata for ferris bueller's day offGenerating llm metadata for zootopia

Generating llm metadata for school of rock
Generating llm metadata for frozen
Generating llm metadata for the princess bride
Generating llm metadata for coco
Generating llm metadata for klaus
Generating llm metadata for up
Generating llm metadata for mulan
Generating llm metadata for shrek
✓ plot_events_metadata for school of rock: SUCCESS
✓ plot_events_metadata for mulan: SUCCESS
✓ plot_events_metadata for shrek: SUCCESS
✓ plot_events_metadata for up: SUCCESS
✓ plot_events_metadata for zootopia: SUCCESS
✓ plot_events_metadata for klaus: SUCCESS
✓ plot_events_metadata for coco: SUCCESS
✓ plot_events_metadata for frozen: SUCCESS
✓ watch_context_metadata for frozen: SUCCESS
✓ reception_metadata for frozen: SUCCESS
✓ plot_events_metadata for ferris bueller's day off: SUCCESS
✓ watch_context_metadata for coco: SUCCESS
✓ reception_metadata for coco: SUCCESS
✓ watch_context_metadata for shrek: SUCCESS
✓