In [None]:
import os
import re
import math
import copy
import numpy as np
import sys
# Add the root directory /workspaces/llm_etl to sys.path
sys.path.append(os.path.abspath(os.path.join('..', '..')))
# Now import your module
from spider2_utils import load_csv_database

-setup-

In [None]:
import pandas as pd
_database = load_csv_database("Pagila", rows_limit=-1)
# Load specific database
actor = _database["actor"]
film_actor = _database["film_actor"]
film = _database["film"]
film_category = _database["film_category"]
category = _database["category"]
language = _database["language"]

### Question
Could you help me determine which actor starred most frequently in English-language children's category films that were rated either G or PG, had a running time of 120 minutes or less, and were released between 2000 and 2010? Please provide the actor's full name.

### User Intent 1: Drop 'last_update' columns and rename overlapping 'name' columns for clarity

In [None]:
actor.drop(columns=['last_update'], inplace=True)
film_actor.drop(columns=['last_update'], inplace=True)
film.drop(columns=['last_update'], inplace=True)
film_category.drop(columns=['last_update'], inplace=True)
category = category.drop(columns=['last_update']).rename(columns={'name': 'category_name'})
language = language.drop(columns=['last_update']).rename(columns={'name': 'language_name'})

### User Intent 2: Merge actor with film_actor to associate actors with films

In [None]:
actor_film = pd.merge(actor, film_actor, on='actor_id')

### User Intent 3: Merge with film to get film details

In [None]:
actor_film_details = pd.merge(actor_film, film, on='film_id')

### User Intent 4: Merge with film_category to get category_id

In [None]:
actor_film_cat = pd.merge(actor_film_details, film_category, on='film_id')

### User Intent 5: Merge with category to get category name

In [None]:
actor_film_cat_name = pd.merge(actor_film_cat, category, on='category_id')

### User Intent 6: Merge with language to get language name

In [None]:
actor_film_full = pd.merge(actor_film_cat_name, language, on='language_id')

### User Intent 7: Filter rows based on given conditions

In [None]:
filtered = actor_film_full[
    (actor_film_full['category_name'] == 'Children') &
    (actor_film_full['release_year'].between(2000, 2010)) &
    (actor_film_full['rating'].isin(['G', 'PG'])) &
    (actor_film_full['language_name'] == 'English') &
    (actor_film_full['length'] <= 120)
]

### User Intent 8: Create full_name column for actors

In [None]:
filtered['full_name'] = filtered['first_name'] + ' ' + filtered['last_name']

### User Intent 9: Group by actor and count number of qualifying films

In [None]:
actor_film_count = (
    filtered.groupby(['actor_id', 'full_name'])
    .agg(num_films=('film_id', 'count'))
    .reset_index()
    .sort_values(by='num_films', ascending=False)
)

### User Intent 10: Get actor with most qualifying children films

In [None]:
top_actor = actor_film_count.head(1)
final_result = top_actor[['full_name']]
print(final_result)