# 03 - Feature Engineering

This notebook creates and transforms features for modeling.

## Objectives:
- Create new features from existing data
- Feature scaling and normalization
- Encoding categorical variables
- Feature selection
- Dimensionality reduction (if needed)

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load preprocessed data
df = pd.read_csv('../data/processed/movies_preprocessed.csv')
print(f"Data shape: {df.shape}")

In [None]:
# Create new features
# Example: derive features from existing columns
# df['new_feature'] = df['column1'] * df['column2']
pass

In [None]:
# Identify numerical and categorical features
numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()

print(f"Numerical columns: {numerical_cols}")
print(f"Categorical columns: {categorical_cols}")

In [None]:
# Preprocessing pipeline
# Scale numerical features and encode categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

print("Preprocessing pipeline created")

In [None]:
# Save engineered features
df.to_csv('../data/processed/movies_engineered.csv', index=False)
print("Engineered features saved")