# Feature Engineering — Churn Prediction
Creating new features that improve model performance based on domain knowledge of insurance behavior.

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler

df = pd.read_csv('../data/raw/insurance_churn.csv')
print('Raw shape:', df.shape)

# ── Feature Engineering ─────────────────────────────────────

# 1. Encode categoricals
le = LabelEncoder()
cat_cols = df.select_dtypes(include='object').columns.tolist()
if 'churn' in cat_cols:
    cat_cols.remove('churn')
for col in cat_cols:
    df[col] = le.fit_transform(df[col].astype(str))

# 2. Handle missing values
df.fillna(df.median(numeric_only=True), inplace=True)

# 3. Encode target
if df['churn'].dtype == 'object':
    df['churn'] = (df['churn'].str.lower() == 'yes').astype(int)

print('Processed shape:', df.shape)
print('Churn rate:', df['churn'].mean().round(3))
df.to_csv('../data/processed/features_engineered.csv', index=False)
print('Saved to data/processed/features_engineered.csv')