In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import shap
import os
import optuna
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report
from catboost import CatBoostClassifier, Pool

np.random.seed(42)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATA_PATH = os.path.join(os.getcwd(),"..", "data", "raw", "churn-data.csv")
df=pd.read_csv(DATA_PATH)

In [3]:
df.drop(columns=['RowNumber', 'CustomerId', 'Surname'], inplace=True)

In [4]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


# feature engineering based on previous analysis

In [5]:
class FeatureEngineering:
    def __init__(self, df):
        self.df = df.copy()

    # Age groups mapping
    def age_group(self, age):
        if age < 30:
            return "Young"
        elif age < 45:
            return "Adult"
        elif age < 60:
            return "Senior"
        else:
            return "Elderly"
    # Credit score tiers mapping based on VantageScore
    def credit_score_tier(self,score):
        if score >= 781:
            return "superprime"
        elif score >= 661:
            return "prime"
        elif score >= 601:
            return "near prime"
        elif score >= 300:
            return "subprime"
        else:
            return "Very Poor"
    def make_feature_extraction(self):
        df = self.df.copy()

        # -------------------------
        # 1. Zero balance indicator
        # -------------------------
        df["IsZeroBalance"] = (df["Balance"] == 0).astype(int)

        # -------------------------
        # 2. Age groups
        # -------------------------
        df["AgeGroup"] = df["Age"].apply(self.age_group)

        # -------------------------
        # 3. Credit score tiers VantageScore Credit Score
        # -------------------------

        df["CreditTier"] = df["CreditScore"].apply(self.credit_score_tier)

        # -------------------------
        # 4. Customer Value (improved)
        # Balance + Salary is better indicator
        # -------------------------
        df["CustomerValue"] = df["Balance"] + df["EstimatedSalary"]

        # -------------------------
        # 5. Age Ã— NumOfProducts
        # -------------------------
        df["AgeProduct"] = np.log1p(df["Age"] * df["NumOfProducts"])

        # -------------------------
        # 6. Activity Score 
        # -------------------------
        df["ActivityScore"] = df["IsActiveMember"] * df["NumOfProducts"]

        # -------------------------
        # 7. Log Balance/Salary Ratio (BEST version)
        # More stable, avoids skew
        # -------------------------
        df["LogBalanceSalaryRatio"] = (
            np.log1p(df["Balance"]) - np.log1p(df["EstimatedSalary"])
        )

        # -------------------------
        # 8. High balance flag
        # -------------------------
        df["HighBalance"] = (df["Balance"] > df["Balance"].median()).astype(int)

        # -------------------------
        # 9. Customer Lifetime Value (CLV)
        # -------------------------
        df["CLV"] = df["Tenure"] * df["Balance"]

        return df
fe = FeatureEngineering(df)
df_new_features = fe.make_feature_extraction()
df_new_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 20 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   CreditScore            10000 non-null  int64  
 1   Geography              10000 non-null  object 
 2   Gender                 10000 non-null  object 
 3   Age                    10000 non-null  int64  
 4   Tenure                 10000 non-null  int64  
 5   Balance                10000 non-null  float64
 6   NumOfProducts          10000 non-null  int64  
 7   HasCrCard              10000 non-null  int64  
 8   IsActiveMember         10000 non-null  int64  
 9   EstimatedSalary        10000 non-null  float64
 10  Exited                 10000 non-null  int64  
 11  IsZeroBalance          10000 non-null  int64  
 12  AgeGroup               10000 non-null  object 
 13  CreditTier             10000 non-null  object 
 14  CustomerValue          10000 non-null  float64
 15  Age

# split dataset

In [61]:
df_new_features.to_csv(os.path.join(os.getcwd(),"..", "data", "processed", "churn-data-features.csv"), index=False)

In [6]:
X = df_new_features.drop('Exited', axis=1)
y = df_new_features['Exited']

#split the data into training and testing sets with stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

# check shape of the splits
print("df shape:", df_new_features.shape)
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

df shape: (10000, 20)
X_train shape: (9000, 19)
X_test shape: (1000, 19)
y_train shape: (9000,)
y_test shape: (1000,)


In [7]:
# Define feature categories
numerical_features = ['CreditScore', 'Tenure','Age', 'Balance', 'EstimatedSalary','LogBalanceSalaryRatio','CustomerValue','AgeProduct','CLV']
categorical_features=['Geography', 'Gender','AgeGroup', 'CreditTier']
ready_cols = list(set(X_train.columns.tolist()) - set(numerical_features) - set(categorical_features))

In [8]:
X_train[numerical_features].head()

Unnamed: 0,CreditScore,Tenure,Age,Balance,EstimatedSalary,LogBalanceSalaryRatio,CustomerValue,AgeProduct,CLV
673,640,6,32,118879.35,19131.71,1.826718,138011.06,4.174387,713276.1
7709,598,1,64,62979.93,152273.57,-0.882853,215253.5,4.174387,62979.93
6100,796,2,30,137262.71,62905.29,0.780258,200168.0,4.110874,274525.42
8469,720,5,71,183135.39,197688.5,-0.076466,380823.89,4.962845,915676.95
8563,578,1,45,148600.91,143397.14,0.035646,291998.05,3.828641,148600.91


In [9]:
len(X_train.columns.tolist()) == len(ready_cols)+len(numerical_features)+len(categorical_features)

True

In [10]:
# check skewness of numerical features
skewed_feats = X_train[numerical_features].skew().sort_values(ascending=False)
print("Skewness of numerical features:\n", skewed_feats)    

Skewness of numerical features:
 Age                      1.024857
CLV                      0.869551
AgeProduct               0.264745
Tenure                   0.009788
EstimatedSalary          0.009628
CustomerValue           -0.068203
CreditScore             -0.078800
Balance                 -0.143684
LogBalanceSalaryRatio   -0.538818
dtype: float64


# pipline

In [11]:
# for non skewed numerical features
num_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
])


#for categorical features

cat_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore',sparse_output=False, drop='first'))
])


#for ready to use features
ready_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent'))
])

# Combine all pipelines into a ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', num_pipeline, numerical_features),
        ('cat', cat_pipeline, categorical_features),
        ('ready', ready_pipeline, ready_cols)
    ]
)


In [12]:
#---------------------------------------------------------
# add noise features to test preprocessing robustness
#---------------------------------------------------------

# num_noise_features = 1
# for i in range(num_noise_features):
#     noise_feature_name = f'NoiseFeature_{i+1}'
#     X_train[noise_feature_name] = np.random.rand(X_train.shape[0])
#     X_test[noise_feature_name] = np.random.rand(X_test.shape[0])
#     ready_cols.append(noise_feature_name)


#---------------------------------------------------------
# Fit and transform the training data
#---------------------------------------------------------
X_train_processed = preprocessor.fit_transform(X_train) 
X_test_processed = preprocessor.transform(X_test)
print("Processed X_train shape:", X_train_processed.shape)
print("Processed X_test shape:", X_test_processed.shape)

Processed X_train shape: (9000, 24)
Processed X_test shape: (1000, 24)


In [13]:
#compute class weights to handle class imbalance
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = {i : class_weights[i] for i in range(len(class_weights))}
print("Class weights:", class_weights_dict)

Class weights: {0: np.float64(0.6278777731268314), 1: np.float64(2.454991816693944)}


-------------

# baseline xgb

In [14]:
# xgboost model with smote
xgb_model = xgb.XGBClassifier(scale_pos_weight=class_weights_dict[1]/class_weights_dict[0], random_state=42)
xgb_model.fit(X_train_processed, y_train)
y_test_pred_xgb = xgb_model.predict(X_test_processed)
print("Classification Report for Test Set:\n", classification_report(y_test, y_test_pred_xgb))
print('f1 ', f1_score(y_test, y_test_pred_xgb))
print('===========================================================')
y_train_pred_xgb = xgb_model.predict(X_train_processed)
print("Classification Report for Training Set:\n", classification_report(y_train, y_train_pred_xgb))
print('f1 ', f1_score(y_train, y_train_pred_xgb))

Classification Report for Test Set:
               precision    recall  f1-score   support

           0       0.90      0.87      0.89       796
           1       0.55      0.64      0.59       204

    accuracy                           0.82      1000
   macro avg       0.73      0.75      0.74      1000
weighted avg       0.83      0.82      0.83      1000

f1  0.592255125284738
Classification Report for Training Set:
               precision    recall  f1-score   support

           0       1.00      0.96      0.98      7167
           1       0.85      0.98      0.91      1833

    accuracy                           0.96      9000
   macro avg       0.92      0.97      0.94      9000
weighted avg       0.97      0.96      0.96      9000

f1  0.9129113924050632


#  XGBoost hyperparameters tunning  

In [15]:
def xgb_objective(trial):
    # XGBoost hyperparameters
    params = {
        "objective": "binary:logistic",
        "eval_metric": "logloss",
        "use_label_encoder": False,

        # Class imbalance handling
        "scale_pos_weight": class_weights_dict[1] / class_weights_dict[0],

        # Trial suggestions
        "n_estimators": trial.suggest_int("n_estimators", 50, 300),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "gamma": trial.suggest_float("gamma", 0, 5),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10)
    }

    model = xgb.XGBClassifier(**params)
    model.fit(X_train_processed, y_train)

    preds = model.predict(X_test_processed)
    f1 = f1_score(y_test, preds)

    return f1


In [16]:
# Create and run study
study = optuna.create_study(direction="maximize")
study.optimize(xgb_objective, n_trials=200, show_progress_bar=False)
# Best results
print("Best F1 Score:", study.best_value)
print("Best Hyperparameters:", study.best_params)
xgb_best_params = study.best_params

[I 2025-11-27 17:08:36,858] A new study created in memory with name: no-name-6a3d7a1c-078a-4829-b60f-5b6b960baf25
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
[I 2025-11-27 17:08:37,294] Trial 0 finished with value: 0.6160520607375272 and parameters: {'n_estimators': 245, 'max_depth': 8, 'learning_rate': 0.018726990310762685, 'subsample': 0.6866138876692973, 'colsample_bytree': 0.8150715674727202, 'gamma': 4.926588432699576, 'min_child_weight': 1}. Best is trial 0 with value: 0.6160520607375272.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
[I 2025-11-27 17:08:37,419] Trial 1 finished with value: 0.6037735849056604 and parameters: {'n_estimators': 192, 'max_depth': 5, 'learning_rate': 0.12039332478994154, 'subsample': 0.7927232387876402, 'colsample_bytree': 0.7983841669454286, 'gamma': 2.379849880145416, 'min_child_weight': 5}. Best is trial 0 with value: 0.6160520607375272.
Parameters: {

Best F1 Score: 0.6398305084745762
Best Hyperparameters: {'n_estimators': 284, 'max_depth': 6, 'learning_rate': 0.04155462053710831, 'subsample': 0.7762560378019867, 'colsample_bytree': 0.9196657869222666, 'gamma': 4.36925049361824, 'min_child_weight': 2}


In [17]:
xgb_model = xgb.XGBClassifier(**xgb_best_params)
xgb_model.fit(X_train_processed, y_train)
y_test_pred_xgb = xgb_model.predict(X_test_processed)
print("Classification Report for Test Set:\n", classification_report(y_test, y_test_pred_xgb))
print('f1 ', f1_score(y_test, y_test_pred_xgb))
print('===========================================================')
y_train_pred_xgb = xgb_model.predict(X_train_processed)
print("Classification Report for Training Set:\n", classification_report(y_train, y_train_pred_xgb))
print('f1 ', f1_score(y_train, y_train_pred_xgb))

Classification Report for Test Set:
               precision    recall  f1-score   support

           0       0.88      0.96      0.92       796
           1       0.74      0.47      0.58       204

    accuracy                           0.86      1000
   macro avg       0.81      0.71      0.75      1000
weighted avg       0.85      0.86      0.85      1000

f1  0.5765765765765766
Classification Report for Training Set:
               precision    recall  f1-score   support

           0       0.89      0.97      0.93      7167
           1       0.81      0.52      0.64      1833

    accuracy                           0.88      9000
   macro avg       0.85      0.75      0.78      9000
weighted avg       0.87      0.88      0.87      9000

f1  0.6356382978723404


# SHAP feature importance

In [18]:
feature_names=preprocessor.get_feature_names_out().tolist()

In [19]:
explainer = shap.Explainer(xgb_model, X_train_processed)
shap_values = explainer(X_train_processed)
mean_abs_shap = np.abs(shap_values.values).mean(axis=0)

# Create DF
shap_df = pd.DataFrame({
    'Feature': feature_names,
    'SHAP_Value': mean_abs_shap
}).sort_values(by='SHAP_Value', ascending=False)



In [20]:
shap_df_xgb=shap_df.reset_index()
shap_df_xgb

Unnamed: 0,index,Feature,SHAP_Value
0,2,num__Age,0.656855
1,20,ready__NumOfProducts,0.382759
2,21,ready__IsActiveMember,0.312855
3,11,cat__Gender_Male,0.205226
4,7,num__AgeProduct,0.202391
5,9,cat__Geography_Germany,0.148347
6,3,num__Balance,0.117916
7,5,num__LogBalanceSalaryRatio,0.112232
8,13,cat__AgeGroup_Senior,0.089163
9,23,ready__ActivityScore,0.087921


In [21]:
shap_df_xgb['Feature'][:10]


0                      num__Age
1          ready__NumOfProducts
2         ready__IsActiveMember
3              cat__Gender_Male
4               num__AgeProduct
5        cat__Geography_Germany
6                  num__Balance
7    num__LogBalanceSalaryRatio
8          cat__AgeGroup_Senior
9          ready__ActivityScore
Name: Feature, dtype: object

In [52]:
top_features = shap_df_xgb['Feature'][:11].tolist()
top_feature_indices = [feature_names.index(feat) for feat in top_features]
X_train_top = X_train_processed[:, top_feature_indices]
X_test_top = X_test_processed[:, top_feature_indices] 

In [53]:
  
# xgboost model with class weights
final_xgb_model = xgb.XGBClassifier(**xgb_best_params)
final_xgb_model.fit(X_train_top, y_train)
y_test_pred_xgb = final_xgb_model.predict(X_test_top)
print("Classification Report for test:\n", classification_report(y_test, y_test_pred_xgb))    
print('f1 ', f1_score(y_test, y_test_pred_xgb))
y_train_pred_xgb = final_xgb_model.predict(X_train_top)
print("Classification Report for train:\n", classification_report(y_train, y_train_pred_xgb))
print('f1 ', f1_score(y_train, y_train_pred_xgb))

Classification Report for test:
               precision    recall  f1-score   support

           0       0.88      0.95      0.91       796
           1       0.72      0.47      0.57       204

    accuracy                           0.85      1000
   macro avg       0.80      0.71      0.74      1000
weighted avg       0.84      0.85      0.84      1000

f1  0.56973293768546
Classification Report for train:
               precision    recall  f1-score   support

           0       0.89      0.97      0.93      7167
           1       0.81      0.51      0.63      1833

    accuracy                           0.88      9000
   macro avg       0.85      0.74      0.78      9000
weighted avg       0.87      0.88      0.86      9000

f1  0.6263772954924874


# threshold tunning

In [54]:

probs = final_xgb_model.predict_proba(X_test_top)[:,1]
best_f1 = 0
best_thresh = 0.5

for thresh in np.arange(0.1, 0.9, 0.01):
    preds = (probs > thresh).astype(int)
    f1 = f1_score(y_test, preds)
    if f1 > best_f1:
        best_f1 = f1
        best_thresh = thresh

print("Best threshold:", best_thresh, "Best F1:", best_f1)


Best threshold: 0.30999999999999994 Best F1: 0.6350710900473934


In [55]:
probs = final_xgb_model.predict_proba(X_test_top)[:, 1]
y_test_pred_xgb = (probs > best_thresh).astype(int)
print("Classification Report for test:\n", classification_report(y_test, y_test_pred_xgb))    
print('f1 ', f1_score(y_test, y_test_pred_xgb))
print('===========================================================')
y_train_pred_xgb = (final_xgb_model.predict_proba(X_train_top)[:, 1] > best_thresh).astype(int)
print("Classification Report for train:\n", classification_report(y_train, y_train_pred_xgb))
print('f1 ', f1_score(y_train, y_train_pred_xgb))

Classification Report for test:
               precision    recall  f1-score   support

           0       0.91      0.89      0.90       796
           1       0.61      0.66      0.64       204

    accuracy                           0.85      1000
   macro avg       0.76      0.78      0.77      1000
weighted avg       0.85      0.85      0.85      1000

f1  0.6350710900473934
Classification Report for train:
               precision    recall  f1-score   support

           0       0.92      0.91      0.91      7167
           1       0.66      0.67      0.67      1833

    accuracy                           0.86      9000
   macro avg       0.79      0.79      0.79      9000
weighted avg       0.86      0.86      0.86      9000

f1  0.6655879180151025


-----------

# catboost model base

In [26]:
cat_model = CatBoostClassifier(verbose=False)
train_pool = Pool(X_train_processed, y_train)
cat_model.fit(train_pool)

y_test_pred_cat = cat_model.predict(X_test_processed)
print("Classification Report for test:\n", classification_report(y_test, y_test_pred_cat)) 
print('f1 score for test',f1_score(y_test, y_test_pred_cat))
print('=====================================================')
y_train_pred_cat = cat_model.predict(X_train_processed)
print("Classification Report for train:\n", classification_report(y_train, y_train_pred_cat))
print('f1 score for train',f1_score(y_train, y_train_pred_cat))

Classification Report for test:
               precision    recall  f1-score   support

           0       0.88      0.96      0.92       796
           1       0.77      0.48      0.59       204

    accuracy                           0.86      1000
   macro avg       0.82      0.72      0.75      1000
weighted avg       0.86      0.86      0.85      1000

f1 score for test 0.5878787878787879
Classification Report for train:
               precision    recall  f1-score   support

           0       0.91      0.98      0.95      7167
           1       0.91      0.63      0.74      1833

    accuracy                           0.91      9000
   macro avg       0.91      0.80      0.84      9000
weighted avg       0.91      0.91      0.90      9000

f1 score for train 0.7404777275661717


# Hyperparameters tuning for catboost

In [27]:
def cat_objective(trial):

    params = {
        "iterations": trial.suggest_int("iterations", 200, 1200),
        "depth": trial.suggest_int("depth", 3, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-3, 10.0),
        "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 10.0),
        "random_strength": trial.suggest_float("random_strength", 0.1, 10.0),

        "scale_pos_weight": trial.suggest_float("scale_pos_weight", 0.8, 5.0),

        "loss_function": "Logloss",
        "eval_metric": "F1",

        "verbose": False,
        "random_seed": 42,
        "use_best_model": True
    }

    # Pool
    train_pool = Pool(X_train_processed, y_train)
    valid_pool = Pool(X_test_processed, y_test)

    model = CatBoostClassifier(**params)
    model.fit(
        train_pool,
        eval_set=valid_pool,
        early_stopping_rounds=70,
        verbose=False
    )

    # Predict on validation
    preds = model.predict(valid_pool)
    f1 = f1_score(y_test, preds)

    return f1

In [28]:
study = optuna.create_study(direction="maximize")
study.optimize(cat_objective, n_trials=120, show_progress_bar=False)

print("\nðŸŽ¯ Best F1:", study.best_value)
print("ðŸ”¥ Best Params:\n", study.best_params)

[I 2025-11-27 17:09:46,470] A new study created in memory with name: no-name-a3936d17-d62b-452f-be59-d638397b3a03
[I 2025-11-27 17:09:47,295] Trial 0 finished with value: 0.6455981941309256 and parameters: {'iterations': 553, 'depth': 7, 'learning_rate': 0.1686853285547124, 'l2_leaf_reg': 2.9384461318747035, 'bagging_temperature': 5.247520547274956, 'random_strength': 6.266451198560795, 'scale_pos_weight': 2.7226847909950838}. Best is trial 0 with value: 0.6455981941309256.
[I 2025-11-27 17:09:47,756] Trial 1 finished with value: 0.6372549019607843 and parameters: {'iterations': 978, 'depth': 6, 'learning_rate': 0.15965444119856922, 'l2_leaf_reg': 5.104854802370285, 'bagging_temperature': 0.4633676925964336, 'random_strength': 2.777046007197401, 'scale_pos_weight': 2.2326365981758736}. Best is trial 0 with value: 0.6455981941309256.
[I 2025-11-27 17:09:48,003] Trial 2 finished with value: 0.5670886075949367 and parameters: {'iterations': 270, 'depth': 4, 'learning_rate': 0.011026934808


ðŸŽ¯ Best F1: 0.6695842450765864
ðŸ”¥ Best Params:
 {'iterations': 993, 'depth': 3, 'learning_rate': 0.2356797718799401, 'l2_leaf_reg': 9.972120267755898, 'bagging_temperature': 6.622206153811139, 'random_strength': 8.171993999319865, 'scale_pos_weight': 2.8014314699721883}


In [29]:
cat_best_params = study.best_params
cat_best_params["loss_function"] = "Logloss"
cat_best_params["eval_metric"] = "F1"
cat_best_params["verbose"] = False
cat_best_params["use_best_model"] = True
cat_best_params["random_seed"] = 42

In [30]:
tunned_cat_model = CatBoostClassifier(**cat_best_params)

train_pool = Pool(X_train_processed, y_train)
valid_pool = Pool(X_test_processed, y_test)

tunned_cat_model.fit(
    train_pool,
    eval_set=valid_pool,
    early_stopping_rounds=80,
    verbose=False
)

# Evaluate
y_test_pred_cat = tunned_cat_model.predict(X_test_processed)
print("\nClassification Report (Test):")
print(classification_report(y_test, y_test_pred_cat))
print("F1:", f1_score(y_test, y_test_pred_cat))
print('===================================================')
y_train_pred_cat = tunned_cat_model.predict(X_train_processed)
print("\nClassification Report (Train):")
print(classification_report(y_train, y_train_pred_cat))
print("F1:", f1_score(y_train, y_train_pred_cat))


Classification Report (Test):
              precision    recall  f1-score   support

           0       0.93      0.87      0.90       796
           1       0.60      0.75      0.67       204

    accuracy                           0.85      1000
   macro avg       0.77      0.81      0.79      1000
weighted avg       0.87      0.85      0.85      1000

F1: 0.6695842450765864

Classification Report (Train):
              precision    recall  f1-score   support

           0       0.92      0.88      0.90      7167
           1       0.61      0.71      0.66      1833

    accuracy                           0.85      9000
   macro avg       0.77      0.80      0.78      9000
weighted avg       0.86      0.85      0.85      9000

F1: 0.6582661290322581


In [31]:
# SHAP feature importance
explainer = shap.Explainer(tunned_cat_model, X_train_processed)
shap_values = explainer(X_train_processed)
mean_abs_shap = np.abs(shap_values.values).mean(axis=0)

# Create DF
shap_df = pd.DataFrame({
    'Feature': feature_names,
    'SHAP_Value': mean_abs_shap
}).sort_values(by='SHAP_Value', ascending=False)

In [32]:
shap_df_cat = shap_df.reset_index()
shap_df_cat

Unnamed: 0,index,Feature,SHAP_Value
0,2,num__Age,0.69547
1,20,ready__NumOfProducts,0.631179
2,21,ready__IsActiveMember,0.384348
3,11,cat__Gender_Male,0.239029
4,9,cat__Geography_Germany,0.183934
5,3,num__Balance,0.176496
6,7,num__AgeProduct,0.145871
7,5,num__LogBalanceSalaryRatio,0.127245
8,23,ready__ActivityScore,0.079072
9,0,num__CreditScore,0.061759


In [39]:
top_features = shap_df_cat['Feature'][:17].tolist()
top_feature_indices = [feature_names.index(feat) for feat in top_features]
X_train_top = X_train_processed[:, top_feature_indices]
X_test_top = X_test_processed[:, top_feature_indices] 

In [59]:

final_cat_model = CatBoostClassifier(**cat_best_params ,snapshot_file='cat_model.cbs')

train_pool = Pool(X_train_top, y_train)
valid_pool = Pool(X_test_top, y_test)

final_cat_model.fit(
    train_pool,
    eval_set=valid_pool,
    early_stopping_rounds=80,
    verbose=False
)

# Evaluate
y_test_pred = final_cat_model.predict(X_test_top)
print("\nClassification Report (Test):")
print(classification_report(y_test, y_test_pred))
print("F1:", f1_score(y_test, y_test_pred))

y_train_pred = final_cat_model.predict(X_train_top)
print("\nClassification Report (Train):")
print(classification_report(y_train, y_train_pred))
print("F1:", f1_score(y_train, y_train_pred))



Classification Report (Test):
              precision    recall  f1-score   support

           0       0.92      0.87      0.90       796
           1       0.59      0.71      0.64       204

    accuracy                           0.84      1000
   macro avg       0.75      0.79      0.77      1000
weighted avg       0.85      0.84      0.84      1000

F1: 0.6430155210643016

Classification Report (Train):
              precision    recall  f1-score   support

           0       0.92      0.88      0.90      7167
           1       0.60      0.70      0.65      1833

    accuracy                           0.84      9000
   macro avg       0.76      0.79      0.77      9000
weighted avg       0.85      0.84      0.85      9000

F1: 0.6452099572542117


--------

# save the final model and preprocessor

In [60]:
import os
import joblib
import datetime

# Base models directory
BASE_MODEL_DIR = os.path.join("..", "models")
os.makedirs(BASE_MODEL_DIR, exist_ok=True)

# Timestamp for versioning
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# --- XGBoost ---
xgb_dir = os.path.join(BASE_MODEL_DIR, f"xgb_{timestamp}")
os.makedirs(xgb_dir, exist_ok=True)

xgb_model_path = os.path.join(xgb_dir, "model.pkl")
xgb_checkpoint_path = os.path.join(xgb_dir, "checkpoint.json")

joblib.dump(final_xgb_model, xgb_model_path)
final_xgb_model.save_model(xgb_checkpoint_path)

# --- CatBoost ---
cat_dir = os.path.join(BASE_MODEL_DIR, f"catboost_{timestamp}")
os.makedirs(cat_dir, exist_ok=True)

cat_model_path = os.path.join(cat_dir, "model.pkl")
cat_checkpoint_path = os.path.join(cat_dir, "checkpoint.cbs")

joblib.dump(final_cat_model, cat_model_path)
final_cat_model.save_model(cat_checkpoint_path)

# --- Preprocessor ---
preprocessor_dir = os.path.join(BASE_MODEL_DIR, f"preprocessor_{timestamp}")
os.makedirs(preprocessor_dir, exist_ok=True)

preprocessor_path = os.path.join(preprocessor_dir, "preprocessor.pkl")
joblib.dump(preprocessor, preprocessor_path)

print("Models, checkpoints, and preprocessor saved successfully!")


Models, checkpoints, and preprocessor saved successfully!
