In [None]:
# Cell 1: Setup
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns
%matplotlib inline
sns.set(style='whitegrid', palette='husl')
df = pd.read_csv('../data/raw/movies_1990_2025.csv')

# Cell 2: Distributions
fig, axes = plt.subplots(2,2, figsize=(14,10))
df['year'].hist(ax=axes[0,0], bins=36, edgecolor='black')
axes[0,0].set_title('Movie Releases by Year', fontweight='bold')
genre_counts = df['genres'].str.split(',').explode().value_counts().head(10)
genre_counts.plot(kind='barh', ax=axes[0,1], color='teal')
axes[0,1].set_title('Top 10 Genres')
df['cast_count'] = df['cast'].str.split(',').str.len()
df['cast_count'].hist(ax=axes[1,0], bins=20, color='purple', edgecolor='black')
axes[1,0].set_title('Cast Size Distribution')
movies_per_year = df.groupby('year').size()
movies_per_year.rolling(3).mean().plot(ax=axes[1,1], color='red', linewidth=2.5)
axes[1,1].set_title('3-Year Moving Average of Releases')
plt.tight_layout()
plt.savefig('../results/figures/distributions.png', dpi=150)
plt.show()

# Cell 3: Trends
print("\\n=== TEMPORAL TRENDS ===")
trend_df = df.groupby('year').agg({
    'title': 'count',
    'cast_count': 'mean'
}).rename(columns={'title': 'movies'}).round(2)
display(trend_df.tail(10))