# Solution: Customer Churn

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.utils import resample

# 1. Generate Data
X, y = make_classification(n_samples=1000, n_features=4, weights=[0.95, 0.05], random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 2. Baseline
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
print("Baseline RF:")
print(classification_report(y_test, rf.predict(X_test)))

# 3. Method A: Class Weight
rf_cw = RandomForestClassifier(class_weight='balanced', random_state=42)
rf_cw.fit(X_train, y_train)
print("\nClass Weighted RF:")
print(classification_report(y_test, rf_cw.predict(X_test)))

# 4. Method B: Manual Upsampling
# Combine X and y for easier resampling
train_data = pd.DataFrame(X_train, columns=['f1', 'f2', 'f3', 'f4'])
train_data['target'] = y_train

majority = train_data[train_data.target == 0]
minority = train_data[train_data.target == 1]

minority_upsampled = resample(minority, replace=True, n_samples=len(majority), random_state=42)
train_upsampled = pd.concat([majority, minority_upsampled])

X_train_up = train_upsampled.drop('target', axis=1)
y_train_up = train_upsampled['target']

rf_up = RandomForestClassifier(random_state=42)
rf_up.fit(X_train_up, y_train_up)
print("\nUpsampled RF:")
print(classification_report(y_test, rf_up.predict(X_test)))