In [3]:
import pandas as pd
import numpy as np
import xgboost as xgb
import shap
import os
import sys
import optuna
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report
from catboost import CatBoostClassifier, Pool
#---------------------------------------------------------
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_root)

from utils.FeatureEngineering import FeatureEngineering
optuna.logging.set_verbosity(optuna.logging.WARNING)
np.random.seed(42)

In [4]:
DATA_PATH = os.path.join(os.getcwd(),"..", "data", "raw", "churn-data.csv")
df=pd.read_csv(DATA_PATH)
df.drop(columns=['RowNumber', 'CustomerId', 'Surname'], inplace=True)

# feature engineering based on previous analysis

In [5]:
df_new_features = FeatureEngineering().fit_transform(df)
df_new_features.info()   # type: ignore

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 20 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   CreditScore            10000 non-null  int64  
 1   Geography              10000 non-null  object 
 2   Gender                 10000 non-null  object 
 3   Age                    10000 non-null  int64  
 4   Tenure                 10000 non-null  int64  
 5   Balance                10000 non-null  float64
 6   NumOfProducts          10000 non-null  int64  
 7   HasCrCard              10000 non-null  int64  
 8   IsActiveMember         10000 non-null  int64  
 9   EstimatedSalary        10000 non-null  float64
 10  Exited                 10000 non-null  int64  
 11  IsZeroBalance          10000 non-null  int64  
 12  AgeGroup               10000 non-null  object 
 13  CreditTier             10000 non-null  object 
 14  CustomerValue          10000 non-null  float64
 15  Age

In [6]:
type(df_new_features)

pandas.core.frame.DataFrame

In [7]:
# Define feature categories
df_new_features=df_new_features.drop('Exited', axis=1) # type: ignore
numerical_features = ['CreditScore', 'Tenure','Age', 'Balance', 'EstimatedSalary','LogBalanceSalaryRatio','CustomerValue','AgeProduct','CLV']
categorical_features=['Geography', 'Gender','AgeGroup', 'CreditTier']
ready_cols = list(set(df_new_features.columns.tolist()) - set(numerical_features) - set(categorical_features))

In [8]:
len(df_new_features.columns.tolist()) == len(ready_cols)+len(numerical_features)+len(categorical_features)

True

# split dataset

In [9]:
#df_new_features.to_csv(os.path.join(os.getcwd(),"..", "data", "processed", "churn-data-features.csv"), index=False)

In [10]:
X = df.drop('Exited', axis=1)
y = df['Exited']

#split the data into training and testing sets with stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# check shape of the splits
print("df shape:", df_new_features.shape)
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

df shape: (10000, 19)
X_train shape: (8000, 10)
X_test shape: (2000, 10)
y_train shape: (8000,)
y_test shape: (2000,)


In [11]:
# check skewness of numerical features
skewed_feats = df_new_features[numerical_features].skew().sort_values(ascending=False)
print("Skewness of numerical features:\n", skewed_feats)    

Skewness of numerical features:
 Age                      1.011320
AgeProduct               0.261636
Tenure                   0.010991
EstimatedSalary          0.002085
CustomerValue           -0.067142
CreditScore             -0.071607
Balance                 -0.141109
CLV                     -0.428912
LogBalanceSalaryRatio   -0.539335
dtype: float64


# pipline

In [12]:
# for non skewed numerical features
num_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
])


#for categorical features

cat_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore',sparse_output=False, drop='first'))
])


#for ready to use features
ready_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent'))
])

# Combine all pipelines into a ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', num_pipeline, numerical_features),
        ('cat', cat_pipeline, categorical_features),
        ('ready', ready_pipeline, ready_cols)
    ]
)

full_pipeline = Pipeline(steps=[
    ('feature_engineering', FeatureEngineering()),
    ('preprocessing', preprocessor)
])

In [13]:
#---------------------------------------------------------
# add noise features to test preprocessing robustness
#---------------------------------------------------------

# num_noise_features = 1
# for i in range(num_noise_features):
#     noise_feature_name = f'NoiseFeature_{i+1}'
#     X_train[noise_feature_name] = np.random.rand(X_train.shape[0])
#     X_test[noise_feature_name] = np.random.rand(X_test.shape[0])
#     ready_cols.append(noise_feature_name)


#---------------------------------------------------------
# Fit and transform the training data
#---------------------------------------------------------
X_train_processed = full_pipeline.fit_transform(X_train) 
X_test_processed = full_pipeline.transform(X_test)
print("Processed X_train shape:", X_train_processed.shape)
print("Processed X_test shape:", X_test_processed.shape)

Processed X_train shape: (8000, 24)
Processed X_test shape: (2000, 24)


In [14]:
full_pipeline

0,1,2
,steps,"[('feature_engineering', ...), ('preprocessing', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('cat', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,missing_values,
,strategy,'median'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,missing_values,
,strategy,'most_frequent'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,categories,'auto'
,drop,'first'
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,missing_values,
,strategy,'most_frequent'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False


In [15]:
#compute class weights to handle class imbalance
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = {i : class_weights[i] for i in range(len(class_weights))}
print("Class weights:", class_weights_dict)

Class weights: {0: np.float64(0.6279434850863422), 1: np.float64(2.4539877300613497)}


-------------

# baseline xgb

In [16]:
# xgboost model with smote
xgb_model = xgb.XGBClassifier(scale_pos_weight=class_weights_dict[1]/class_weights_dict[0], random_state=42)
xgb_model.fit(X_train_processed, y_train)
y_test_pred_xgb = xgb_model.predict(X_test_processed)
print("Classification Report for Test Set:\n", classification_report(y_test, y_test_pred_xgb))
print('f1 ', f1_score(y_test, y_test_pred_xgb))
print('===========================================================')
y_train_pred_xgb = xgb_model.predict(X_train_processed)
print("Classification Report for Training Set:\n", classification_report(y_train, y_train_pred_xgb))
print('f1 ', f1_score(y_train, y_train_pred_xgb))

Classification Report for Test Set:
               precision    recall  f1-score   support

           0       0.90      0.89      0.89      1593
           1       0.58      0.60      0.59       407

    accuracy                           0.83      2000
   macro avg       0.74      0.74      0.74      2000
weighted avg       0.83      0.83      0.83      2000

f1  0.5890909090909091
Classification Report for Training Set:
               precision    recall  f1-score   support

           0       1.00      0.97      0.98      6370
           1       0.88      0.99      0.93      1630

    accuracy                           0.97      8000
   macro avg       0.94      0.98      0.96      8000
weighted avg       0.97      0.97      0.97      8000

f1  0.9339105339105339


#  XGBoost hyperparameters tunning  

In [134]:
def xgb_objective(trial):
    # XGBoost hyperparameters
    params = {
        "objective": "binary:logistic",
        "eval_metric": "logloss",
        "random_state": 42,

        # Class imbalance
        "scale_pos_weight": class_weights_dict[1] / class_weights_dict[0],

        # Trial suggestions
        "n_estimators": trial.suggest_int("n_estimators", 30, 300),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),

        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),

        "gamma": trial.suggest_float("gamma", 0, 5),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),

        # Regularization (L1 / L2)
        "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 10.0),   # L2
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 10.0)      # L1
    }

    model = xgb.XGBClassifier(**params)
    model.fit(X_train_processed, y_train)

    preds = model.predict(X_test_processed)
    f1 = f1_score(y_test, preds)

    return float(f1)


In [135]:
# Create and run study
study = optuna.create_study(direction="maximize")
study.optimize(xgb_objective, n_trials=100, show_progress_bar=False)
# Best results
print("Best F1 Score:", study.best_value)
print("Best Hyperparameters:", study.best_params)
xgb_best_params = study.best_params

Best F1 Score: 0.626
Best Hyperparameters: {'n_estimators': 276, 'max_depth': 8, 'learning_rate': 0.0646323864292814, 'subsample': 0.6493378179054865, 'colsample_bytree': 0.9344084654892358, 'gamma': 3.7052730362931654, 'min_child_weight': 3, 'reg_lambda': 7.841469099279617, 'reg_alpha': 9.176814213721237}


In [136]:
xgb_best_params = study.best_params
xgb_best_params["objective"] = "binary:logistic"
xgb_best_params["eval_metric"] = "logloss"
xgb_best_params["scale_pos_weight"] = class_weights_dict[1] / class_weights_dict[0]


In [137]:
xgb_model = xgb.XGBClassifier(**xgb_best_params,random_state=42)
xgb_model.fit(X_train_processed, y_train)
y_test_pred_xgb = xgb_model.predict(X_test_processed)
print("Classification Report for Test Set:\n", classification_report(y_test, y_test_pred_xgb))
print('f1 ', f1_score(y_test, y_test_pred_xgb))
print('===========================================================')
y_train_pred_xgb = xgb_model.predict(X_train_processed)
print("Classification Report for Training Set:\n", classification_report(y_train, y_train_pred_xgb))
print('f1 ', f1_score(y_train, y_train_pred_xgb))

Classification Report for Test Set:
               precision    recall  f1-score   support

           0       0.93      0.82      0.88      1593
           1       0.53      0.77      0.63       407

    accuracy                           0.81      2000
   macro avg       0.73      0.80      0.75      2000
weighted avg       0.85      0.81      0.82      2000

f1  0.626
Classification Report for Training Set:
               precision    recall  f1-score   support

           0       0.95      0.84      0.89      6370
           1       0.57      0.83      0.68      1630

    accuracy                           0.84      8000
   macro avg       0.76      0.84      0.79      8000
weighted avg       0.87      0.84      0.85      8000

f1  0.6802005012531328


# SHAP feature importance

In [138]:
feature_names=preprocessor.get_feature_names_out().tolist()

In [139]:
explainer = shap.Explainer(xgb_model, X_train_processed)
shap_values = explainer(X_train_processed)
mean_abs_shap = np.abs(shap_values.values).mean(axis=0)

# Create DF
shap_df = pd.DataFrame({
    'Feature': feature_names,
    'SHAP_Value': mean_abs_shap
}).sort_values(by='SHAP_Value', ascending=False)



In [190]:
shap_df_xgb = shap_df.copy()
shap_df_xgb['cumsum'] = shap_df_xgb['SHAP_Value'].cumsum()
shap_df_xgb['cumsum_percent'] = 100 * shap_df_xgb['cumsum'] / shap_df_xgb['SHAP_Value'].sum()

top_features_xgb = shap_df_xgb[shap_df_xgb['cumsum_percent'] <= 85]['Feature'].tolist()
print(top_features_xgb)


['num__Age', 'ready__NumOfProducts', 'cat__Gender_Male', 'ready__ActivityScore', 'ready__IsActiveMember', 'cat__Geography_Germany', 'cat__AgeGroup_Senior', 'num__Balance', 'num__AgeProduct']


In [191]:
top_feature_indices = [feature_names.index(feat) for feat in top_features_xgb]
X_train_top = X_train_processed[:, top_feature_indices]
X_test_top = X_test_processed[:, top_feature_indices] 

In [195]:
top_feature_indices

[2, 23, 11, 18, 20, 9, 13, 3, 7]

In [192]:
  
# xgboost model with class weights
final_xgb_model = xgb.XGBClassifier(**xgb_best_params,)
final_xgb_model.fit(X_train_top, y_train)
y_test_pred_xgb = final_xgb_model.predict(X_test_top)
print("Classification Report for test:\n", classification_report(y_test, y_test_pred_xgb))    
print('f1 ', f1_score(y_test, y_test_pred_xgb))
y_train_pred_xgb = final_xgb_model.predict(X_train_top)
print("Classification Report for train:\n", classification_report(y_train, y_train_pred_xgb))
print('f1 ', f1_score(y_train, y_train_pred_xgb))

Classification Report for test:
               precision    recall  f1-score   support

           0       0.93      0.81      0.87      1593
           1       0.51      0.77      0.62       407

    accuracy                           0.81      2000
   macro avg       0.72      0.79      0.74      2000
weighted avg       0.85      0.81      0.82      2000

f1  0.6153846153846154
Classification Report for train:
               precision    recall  f1-score   support

           0       0.94      0.82      0.87      6370
           1       0.53      0.79      0.63      1630

    accuracy                           0.81      8000
   macro avg       0.73      0.80      0.75      8000
weighted avg       0.85      0.81      0.82      8000

f1  0.6298206828788996


# threshold tunning

In [223]:

probs = final_xgb_model.predict_proba(X_test_top)[:,1]
best_f1 = 0
best_thresh = 0.5

for thresh in np.arange(0.1, 0.9, 0.01):
    preds = (probs > thresh).astype(int)
    f1 = f1_score(y_test, preds)
    if f1 > best_f1:
        best_f1 = f1
        best_thresh = thresh

print("Best threshold:", best_thresh, "Best F1:", best_f1)


Best threshold: 0.7099999999999996 Best F1: 0.6435006435006435


In [224]:
probs = final_xgb_model.predict_proba(X_test_top)[:, 1]
y_test_pred_xgb = (probs > best_thresh).astype(int)
print("Classification Report for test:\n", classification_report(y_test, y_test_pred_xgb))    
print('f1 ', f1_score(y_test, y_test_pred_xgb))
print('===========================================================')
y_train_pred_xgb = (final_xgb_model.predict_proba(X_train_top)[:, 1] > best_thresh).astype(int)
print("Classification Report for train:\n", classification_report(y_train, y_train_pred_xgb))
print('f1 ', f1_score(y_train, y_train_pred_xgb))

Classification Report for test:
               precision    recall  f1-score   support

           0       0.90      0.92      0.91      1593
           1       0.68      0.61      0.64       407

    accuracy                           0.86      2000
   macro avg       0.79      0.77      0.78      2000
weighted avg       0.86      0.86      0.86      2000

f1  0.6435006435006435
Classification Report for train:
               precision    recall  f1-score   support

           0       0.90      0.93      0.92      6370
           1       0.69      0.61      0.65      1630

    accuracy                           0.86      8000
   macro avg       0.80      0.77      0.78      8000
weighted avg       0.86      0.86      0.86      8000

f1  0.6468474354786018


-----------

# catboost model base

In [164]:
cat_model = CatBoostClassifier(verbose=False,random_seed=42)
train_pool = Pool(X_train_processed, y_train)
cat_model.fit(train_pool)

y_test_pred_cat = cat_model.predict(X_test_processed)
print("Classification Report for test:\n", classification_report(y_test, y_test_pred_cat)) 
print('f1 score for test',f1_score(y_test, y_test_pred_cat))
print('=====================================================')
y_train_pred_cat = cat_model.predict(X_train_processed)
print("Classification Report for train:\n", classification_report(y_train, y_train_pred_cat))
print('f1 score for train',f1_score(y_train, y_train_pred_cat))

Classification Report for test:
               precision    recall  f1-score   support

           0       0.88      0.97      0.92      1593
           1       0.79      0.49      0.61       407

    accuracy                           0.87      2000
   macro avg       0.84      0.73      0.76      2000
weighted avg       0.86      0.87      0.86      2000

f1 score for test 0.6060606060606061
Classification Report for train:
               precision    recall  f1-score   support

           0       0.91      0.98      0.95      6370
           1       0.90      0.64      0.75      1630

    accuracy                           0.91      8000
   macro avg       0.91      0.81      0.85      8000
weighted avg       0.91      0.91      0.91      8000

f1 score for train 0.75


# Hyperparameters tuning for catboost

In [None]:
def cat_objective(trial):

    params = {
        "iterations": trial.suggest_int("iterations", 200, 1200),
        "depth": trial.suggest_int("depth", 3, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),

        # L2 Regularization (main one)
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-3, 20.0),

        # Additional CatBoost regularization
        "random_strength": trial.suggest_float("random_strength", 0.1, 20.0),
        "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 10.0),

        # Model shrinkage (regularization over iterations)
        "model_shrink_rate": trial.suggest_float("model_shrink_rate", 0.0, 0.2),
        "model_shrink_mode": trial.suggest_categorical(
            "model_shrink_mode", ["Constant", "Decreasing"]
        ),

        # Feature penalties (helps generalization)
        "penalties_coefficient": trial.suggest_float("penalties_coefficient", 0.0, 5.0),

        # Class imbalance
        "scale_pos_weight": trial.suggest_float("scale_pos_weight", 0.8, 5.0),

        "loss_function": "Logloss",
        "eval_metric": "F1",
        "verbose": False,
        "random_seed": 42,
        "use_best_model": False
    }

    # Pool
    train_pool = Pool(X_train_processed, y_train)

    model = CatBoostClassifier(**params)
    model.fit(
        train_pool,
        early_stopping_rounds=70,
        verbose=False
    )

    # Predict
    preds = model.predict(X_test_processed)
    f1 = f1_score(y_test, preds)

    return float(f1)


In [166]:
study = optuna.create_study(direction="maximize")
study.optimize(cat_objective, n_trials=120)

print("\n Best F1:", study.best_value)
print(" Best Params:\n", study.best_params)


 Best F1: 0.6487148102815178
 Best Params:
 {'iterations': 278, 'depth': 3, 'learning_rate': 0.14633751310602228, 'l2_leaf_reg': 5.14758078317784, 'random_strength': 16.19176586015827, 'bagging_temperature': 8.974847144297865, 'model_shrink_rate': 0.1853142973161106, 'model_shrink_mode': 'Decreasing', 'penalties_coefficient': 4.442200958788622, 'scale_pos_weight': 2.1316925956089547}


In [169]:
cat_best_params = study.best_params
cat_best_params["loss_function"] = "Logloss"
cat_best_params["eval_metric"] = "F1"
cat_best_params["verbose"] = False
cat_best_params["use_best_model"] = True
cat_best_params["random_seed"] = 42

In [170]:
tunned_cat_model = CatBoostClassifier(**cat_best_params)

train_pool = Pool(X_train_processed, y_train)
valid_pool = Pool(X_test_processed, y_test)

tunned_cat_model.fit(
    train_pool,
    eval_set=valid_pool,
    early_stopping_rounds=80,
    verbose=False
)

# Evaluate
y_test_pred_cat = tunned_cat_model.predict(X_test_processed)
print("\nClassification Report (Test):")
print(classification_report(y_test, y_test_pred_cat))
print("F1:", f1_score(y_test, y_test_pred_cat))
print('===================================================')
y_train_pred_cat = tunned_cat_model.predict(X_train_processed)
print("\nClassification Report (Train):")
print(classification_report(y_train, y_train_pred_cat))
print("F1:", f1_score(y_train, y_train_pred_cat))


Classification Report (Test):
              precision    recall  f1-score   support

           0       0.91      0.91      0.91      1593
           1       0.65      0.64      0.65       407

    accuracy                           0.86      2000
   macro avg       0.78      0.78      0.78      2000
weighted avg       0.86      0.86      0.86      2000

F1: 0.645320197044335

Classification Report (Train):
              precision    recall  f1-score   support

           0       0.91      0.91      0.91      6370
           1       0.65      0.64      0.65      1630

    accuracy                           0.86      8000
   macro avg       0.78      0.78      0.78      8000
weighted avg       0.86      0.86      0.86      8000

F1: 0.6470951792336218


In [171]:
# SHAP feature importance
explainer = shap.Explainer(tunned_cat_model, X_train_processed)
shap_values = explainer(X_train_processed)
mean_abs_shap = np.abs(shap_values.values).mean(axis=0)

# Create DF
shap_df = pd.DataFrame({
    'Feature': feature_names,
    'SHAP_Value': mean_abs_shap
}).sort_values(by='SHAP_Value', ascending=False)

In [178]:
shap_df_cat = shap_df.reset_index()
shap_df_cat['cumsum'] = shap_df_cat['SHAP_Value'].cumsum()
shap_df_cat['cumsum_percent'] = 100 * shap_df_cat['cumsum'] / shap_df_cat['SHAP_Value'].sum()

top_features_cat = shap_df_cat[shap_df_cat['cumsum_percent'] <= 80]['Feature'].tolist()
print(top_features_cat)

['num__Age', 'ready__NumOfProducts', 'cat__Gender_Male', 'ready__ActivityScore', 'ready__IsActiveMember', 'cat__Geography_Germany', 'cat__AgeGroup_Senior', 'num__Balance']


In [179]:
top_feature_indices = [feature_names.index(feat) for feat in top_features_cat]
X_train_top = X_train_processed[:, top_feature_indices]
X_test_top = X_test_processed[:, top_feature_indices] 

In [189]:
top_feature_indices

[2, 23, 9, 3, 18, 7, 13, 11, 20, 5]

In [180]:

final_cat_model = CatBoostClassifier(**cat_best_params ,snapshot_file='cat_model.cbs')

train_pool = Pool(X_train_top, y_train)
valid_pool = Pool(X_test_top, y_test)

final_cat_model.fit(
    train_pool,
    eval_set=valid_pool,
    early_stopping_rounds=80,
    verbose=False
)

# Evaluate
y_test_pred = final_cat_model.predict(X_test_top)
print("\nClassification Report (Test):")
print(classification_report(y_test, y_test_pred))
print("F1:", f1_score(y_test, y_test_pred))

y_train_pred = final_cat_model.predict(X_train_top)
print("\nClassification Report (Train):")
print(classification_report(y_train, y_train_pred))
print("F1:", f1_score(y_train, y_train_pred))



Classification Report (Test):
              precision    recall  f1-score   support

           0       0.91      0.91      0.91      1593
           1       0.64      0.67      0.65       407

    accuracy                           0.86      2000
   macro avg       0.78      0.79      0.78      2000
weighted avg       0.86      0.86      0.86      2000

F1: 0.6537997587454765

Classification Report (Train):
              precision    recall  f1-score   support

           0       0.91      0.91      0.91      6370
           1       0.65      0.66      0.65      1630

    accuracy                           0.86      8000
   macro avg       0.78      0.78      0.78      8000
weighted avg       0.86      0.86      0.86      8000

F1: 0.6533212010919017


In [199]:

probs = final_cat_model.predict_proba(X_test_top)[:,1]
best_f1 = 0
best_thresh = 0.5

for thresh in np.arange(0.1, 0.9, 0.01):
    preds = (probs > thresh).astype(int)
    f1 = f1_score(y_test, preds)
    if f1 > best_f1:
        best_f1 = f1
        best_thresh = thresh

print("Best threshold:", best_thresh, "Best F1:", best_f1)


Best threshold: 0.4999999999999998 Best F1: 0.6537997587454765


# for deploy test

In [None]:
X_test.iloc[0]
{
  "CreditScore": 585,
  "Geography": "France",
  "Gender": "Male",
  "Age": 36,
  "Tenure": 7,
  "Balance": 0.0,
  "NumOfProducts": 2,
  "HasCrCard": 1,
  "IsActiveMember": 0,
  "EstimatedSalary": 94283.09
}


CreditScore             585
Geography            France
Gender                 Male
Age                      36
Tenure                    7
Balance                 0.0
NumOfProducts             2
HasCrCard                 1
IsActiveMember            0
EstimatedSalary    94283.09
Name: 5702, dtype: object

In [220]:
final_cat_model.predict_proba(X_test_processed[0, [2, 23, 9, 3, 18, 7, 13, 11, 20, 5]])

array([0.95352657, 0.04647343])

In [225]:
final_xgb_model.predict_proba(X_test_processed[0, [2, 23, 11, 18, 20, 9, 13, 3, 7]].reshape(1, -1))

array([[0.93326867, 0.06673136]], dtype=float32)

In [237]:
y_test[:1]

5702    0
Name: Exited, dtype: int64

--------

--------

# save the final model and preprocessor

In [239]:
import joblib

# Base models directory
BASE_MODEL_DIR = os.path.join("..", "models")
os.makedirs(BASE_MODEL_DIR, exist_ok=True)


# --- XGBoost ---
xgb_dir = os.path.join(BASE_MODEL_DIR, f"xgb")
os.makedirs(xgb_dir, exist_ok=True)

xgb_model_path = os.path.join(xgb_dir, "model.pkl")
xgb_checkpoint_path = os.path.join(xgb_dir, "checkpoint.json")

joblib.dump(final_xgb_model, xgb_model_path)
final_xgb_model.save_model(xgb_checkpoint_path)

# --- CatBoost ---
cat_dir = os.path.join(BASE_MODEL_DIR, f"catboost")
os.makedirs(cat_dir, exist_ok=True)

cat_model_path = os.path.join(cat_dir, "model.pkl")
cat_checkpoint_path = os.path.join(cat_dir, "checkpoint.cbs")

joblib.dump(final_cat_model, cat_model_path)
final_cat_model.save_model(cat_checkpoint_path)

# --- Preprocessor ---
preprocessor_dir = os.path.join(BASE_MODEL_DIR, f"preprocessor")
os.makedirs(preprocessor_dir, exist_ok=True)

preprocessor_path = os.path.join(preprocessor_dir, "preprocessor.pkl")
joblib.dump(full_pipeline, preprocessor_path)

print("Models, checkpoints, and preprocessor saved successfully!")


Models, checkpoints, and preprocessor saved successfully!
