In [1]:
# train_model.ipynb

import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report

In [None]:
# Load Data
print("Loading data...")
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')

Loading data...


In [None]:
# Cleaning
df.drop('customerID', axis=1, inplace=True)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce').fillna(0)

In [None]:
# Encoding Targets
le = LabelEncoder()
df['Churn'] = le.fit_transform(df['Churn'])

In [None]:
# Preprocessing Features
X = df.drop('Churn', axis=1)
y = df['Churn']

X = pd.get_dummies(X, drop_first=True)

In [None]:
# Scaling
scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [None]:
# SMOTE
# We only balance the traning data, not the testing data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

In [None]:
# Optimization
print("Tuning hyperparameters... (This may take a minute)")
gb = GradientBoostingClassifier(random_state=42)

param_dist = {
    'n_estimators': [100, 200],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}

random_search = RandomizedSearchCV(gb, param_distributions=param_dist, n_iter=5, cv=3, n_jobs=-1, verbose=1)
random_search.fit(X_train_smote, y_train_smote)

best_model = random_search.best_estimator_
print(f"Best Params: {random_search.best_params_}")

Tuning hyperparameters... (This may take a minute)
Fitting 3 folds for each of 5 candidates, totalling 15 fits
Best Params: {'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.1}


In [None]:
#Save Everything

joblib.dump(best_model, 'churn_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(X.columns, 'model_columns.pkl')

print("Success! Model and files saved.")

Success! Model and files saved.
