In [None]:
# Cell 1: Setup
import pandas as pd, numpy as np, json, warnings, os, sys
from datetime import datetime
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 50)
print(f"Python {sys.version.split()[0]}, Pandas {pd.__version__}")

# Cell 2: Load
df = pd.read_csv('../data/raw/movies_1990_2025.csv', low_memory=False)
print(f"Shape: {df.shape}")
display(df.head())
print("\\nInfo:")
df.info()

# Cell 3: Validate
print("\\n=== VALIDATION ===")
print(f"Rows: {len(df):,}")
print(f"Years: {df['year'].min()} - {df['year'].max()}")
print(f"Nulls:\\n{df.isnull().sum().sort_values(ascending=False).head(10)}")
assert df['year'].min() >= 1990, "Pre-1990 data found"
assert df['year'].max() <= 2025, "Post-2025 data found"

# Cell 4: Report
os.makedirs('../results/tables/', exist_ok=True)
validation = {
    'extract_date': datetime.now().strftime('%Y-%m-%d'),
    'shape': list(df.shape),
    'columns': df.columns.tolist(),
    'dtypes': {k: str(v) for k,v in df.dtypes.items()},
    'null_pct': (df.isnull().sum()/len(df)*100).round(2).to_dict()
}
with open('../results/tables/validation.json', 'w') as f:
    json.dump(validation, f, indent=2)
print("âœ“ Validation saved")