# SHAP Analysis and Results

In [None]:
# Import Libraries

import pandas as pd
import numpy as np
import shap
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, balanced_accuracy_score, roc_auc_score
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE
from catboost import CatBoostClassifier

## SHAP with XGBoost

In [None]:
# Import dataframe 

df = pd.read_excel("Data/CustomerChurn_Transformed.xlsx", index_col=0)
df.head(5)

In [None]:
# Define input and label then split in training and test

X = df.drop(columns='Churn')
y = df['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=59)

In [None]:
# Apply SMOTE to training

smote = SMOTE(random_state=59)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [None]:
# Model training

model = XGBClassifier(learning_rate= 0.01, max_depth = 3, n_estimators = 1000)
model.fit(X_train_resampled, y_train_resampled)

y_test_proba = model.predict_proba(X_test)[:, 1]
y_test_pred = model.predict(X_test)

test_metrics = {
    'accuracy': accuracy_score(y_test, y_test_pred),
    'recall': recall_score(y_test, y_test_pred),
    'precision': precision_score(y_test, y_test_pred),
    'f1_score': f1_score(y_test, y_test_pred),
    'balanced_accuracy': balanced_accuracy_score(y_test, y_test_pred),
    'roc_auc': roc_auc_score(y_test, y_test_proba),
}

print("Test Metrics:", test_metrics)

In [None]:
# SHAP algorithm

explainer = shap.Explainer(model)
shap_values = explainer(X_test)

In [None]:
# Results visualization

# Save SHAP plots
shap.summary_plot(shap_values, X_test, show=False)
shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)

# Save the plots
shap.summary_plot(shap_values, X_test, show=False, plot_type="bar").savefig("shap_summary_plot_xgboost.png")
shap.summary_plot(shap_values, X_test, show=False).savefig("shap_summary_plot_xgboost_detail.png")