### Model Evaluation & Interpretation

**Objective**:
The main objective of this process is to evaluate the best model i.e. Gradient Boosting using robust metrics and translate model outputs into business-actionable insights.



In [1]:
#Import the required packages
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

### Load Data(From Engineered dataset)
df = pd.read_csv('C:\customerchurnprediction\data\engineered\engineeredbank_churn.csv')

#Drop Unnecessary columns
df.drop(columns=['gender', 'country'], inplace=True)

### Define Target and Features
x= df.drop(columns=['churn'])
y=df['churn']

### Define Features Group
#Different feature types need different preprocessing tenchniques
numerical_features = ['age', 'tenure','balance','credit_score','products_number',
                      'products_per_tenure','balance_per_product','churn_risk_score']
binary_features = ['credit_card','active_member','inactive_single_product',
                   'zero_balance','high_balance','early_customer']
categorical_features = ['age_group','credit_score_band']

### Train-Test Split(Stratified)
X_train, X_test, y_train, y_test = train_test_split(
    x,y,test_size=0.2,random_state=42,stratify=y)

### Build Preprocessing Pipeline
numeric_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(drop='first', handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_pipeline, numerical_features),
    ('cat',categorical_pipeline, categorical_features)
],remainder='passthrough')

### Train Random Forest Model
rf = RandomForestClassifier(
    n_estimators = 300,
    max_depth = 8,
    min_samples_leaf = 50,
    class_weight = 'balanced',
    random_state = 42,
    n_jobs = -1
)

rf_pipeline = Pipeline(steps =[
    ('preprocessor', preprocessor),
    ('model', rf)
])

rf_pipeline.fit(X_train, y_train)

y_proba_rf = rf_pipeline.predict_proba(X_test)[:, 1]
rf_auc = roc_auc_score(y_test, y_proba_rf)

rf_auc

### Train Gradient Boosting Model
gb=GradientBoostingClassifier(
    n_estimators = 200,
    learning_rate = 0.05,
    max_depth = 3,
    random_state = 42
)

gb_pipeline = Pipeline(
    steps=[
        ('preprocessor',preprocessor),
        ('model', gb)
    ]
)

gb_pipeline.fit(X_train, y_train)

y_proba_gb =gb_pipeline.predict_proba(X_test)[:, 1]
gb_auc = roc_auc_score(y_test,y_proba_gb)

gb_auc

0.8515117582914193

##### 1. ROC CURVE Comparison with Visual Validation.
The ROC_AUC alon isn't sufficient to make a concusive receommendation. This is because it shows trade-offs across all thresholds.