In [None]:
%run Imports.ipynb
name = 'Kred' # Choose Kred or Mone

param_grid = {
    'learning_rate': [0.001, 0.005, 0.01],
    'max_leaves': [10],
    'min_samples_leaf': [2, 5, 10],
    'interactions': [5, 10, 15]
}

  from .autonotebook import tqdm as notebook_tqdm


# 1) Read in Files

In [3]:
key_featsubgroups = pd.read_pickle('../pickle/2_FS/' + name + '/key_featsubgroups.pkl')
df = pd.read_pickle('../pickle/2_FS/' + name + '/2_df_new_.pkl')

In [4]:
print(df[target].value_counts()/df.shape[0])
print('df_shape: ', df.shape)

arrears
1   0.646
0   0.354
Name: count, dtype: float64
df_shape:  (129457, 418)


# 2) Create Model prediction functions

## 2.1) Split dataset into train/testing while excluding demographic features

In [5]:
def split_data_4(df, key_featsubgroups=key_featsubgroups, target=target, test_size=0.2, random_state=42):
    """
    Splits the dataset into training and testing sets while excluding demographic features.

    Parameters:
    df (DataFrame): The dataset containing features and target variable.
    key_featsubgroups (DataFrame): A mapping of feature subgroups.
    target (str): The name of the target variable.
    test_size (float, optional): The proportion of the dataset to allocate for testing. Default is 0.2.
    random_state (int, optional): Random seed for reproducibility. Default is 42.

    Returns:
    tuple: X_train, X_test, y_train, y_test (training and testing datasets)
    """

    # Extract demographic features
    demo_feat = key_featsubgroups.loc[key_featsubgroups['subgroup'] == 'demo', 'list_features'].values[0]
    print("Demographic Features:", demo_feat)

    # Separate features (X) and target variable (y), excluding demographic features
    X = df.drop(columns=[target] + demo_feat)
    y = df[target]

    # Split the dataset into training (80%) and testing (20%) sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state
    )

    # Print dataset shapes
    print(f"Training Features Shape: {X_train.shape}")
    print(f"Training Labels Shape: {y_train.shape}")
    print(f"Testing Features Shape: {X_test.shape}")
    print(f"Testing Labels Shape: {y_test.shape}")

    return X_train, X_test, y_train, y_test

## 2.2) Model training and prediction

In [6]:
# with open('../pickle/4_Model_Optuna/results_dict_updated.pkl', 'rb') as f:
#     results_dict = pickle.load(f)

In [7]:
# with open('../pickle/3_Model/xgb_default_params.pkl', 'rb') as f:
#     default_params = pickle.load(f)

# Load results_dict_updated from the pickle file
with open('../pickle/4_Model_Optuna/results_dict_updated.pkl', 'rb') as f:
    results_dict = pickle.load(f)

In [8]:
np.__version__

'2.2.0'

# 5) EBM

## 5.1) Default run

In [7]:
import time
from copy import deepcopy
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, roc_auc_score
from interpret.glassbox import ExplainableBoostingClassifier  # Ensure you have the interpret package

def train_default_EBM(df, results_dict):
    """
    Trains an EBM model using the default parameters with CV=5,
    evaluates performance, and stores results in the given dictionary.

    Parameters:
    -----------
    df : DataFrame
        The dataset containing features and the target variable.
    results_dict : dict
        Dictionary to store model performance metrics.

    Returns:
    --------
    results_dict : dict
        Updated results dictionary containing model evaluation metrics.
    global_importance : object
        Global feature importance from the EBM model.
    local_importance : object
        Local feature importance from the EBM model.
    """
    start_time = time.time()
    print("\nStarting EBM model training and evaluation with default parameters...")

    # Split the dataset (assumes split_data_4 is defined elsewhere)
    X_train, X_test, y_train, y_test = split_data_4(df)

    # Initialize and train the EBM model using its default parameters
    ebm = ExplainableBoostingClassifier()
    ebm.fit(X_train, y_train)

    # Perform cross-validation with CV=5 on training data
    y_train_pred = cross_val_predict(ebm, X_train, y_train, cv=5)

    # Make predictions on the test set
    y_test_pred = ebm.predict(X_test)

    # Compute confusion matrices
    cfm_train = confusion_matrix(y_train, y_train_pred)
    cfm_test = confusion_matrix(y_test, y_test_pred)

    # Compute accuracy scores
    accs_train = accuracy_score(y_train, y_train_pred)
    accs_test = accuracy_score(y_test, y_test_pred)

    # Compute F1-scores for both classes (0 and 1)
    f1s_train_p1 = f1_score(y_train, y_train_pred, pos_label=1)
    f1s_train_p0 = f1_score(y_train, y_train_pred, pos_label=0)
    f1s_test_p1 = f1_score(y_test, y_test_pred, pos_label=1)
    f1s_test_p0 = f1_score(y_test, y_test_pred, pos_label=0)

    # Compute ROC-AUC score for the test data
    test_ras = roc_auc_score(y_test, ebm.predict_proba(X_test)[:, 1])

    # Extract feature importances
    global_importance = ebm.explain_global().data()
    local_importance = ebm.explain_local(X_test, y_test).data()

    total_time = (time.time() - start_time) / 60
    print(f"EBM Model training completed in {total_time:.2f} minutes")

    # Store the computed metrics in the results dictionary
    results_dict["ebm_default"] = {
        "classifier": deepcopy(ebm),
        "cfm_train": cfm_train,
        "cfm_test": cfm_test,
        "train_accuracy": accs_train,
        "test_accuracy": accs_test,
        "train F1-score label 1": f1s_train_p1,
        "train F1-score label 0": f1s_train_p0,
        "test F1-score label 1": f1s_test_p1,
        "test F1-score label 0": f1s_test_p0,
        "test roc auc score": test_ras,
        "default_params": ebm.get_params(),  # Save the default parameters used
        "global_importance": global_importance,
        "local_importance": local_importance,
        "time_m": total_time
    }

    return results_dict, global_importance, local_importance


In [8]:
# need around 40-45 minutes to run
results_dict, global_importance, local_importance = train_default_EBM(df, results_dict)


Starting EBM model training and evaluation with default parameters...
Demographic Features: ['clientdata.demo.gender', 'clientdata.demo.age_year', 'clientdata.demo.age_month', 'clientdata.demo.children', 'clientdata.demo.children_singleparent', 'clientdata.demo.maritalstatus_expand_SINGLE', 'clientdata.demo.maritalstatus_expand_MARRIED', 'clientdata.demo.maritalstatus_expand_DIVORCED', 'clientdata.demo.maritalstatus_expand_WIDOWED', 'clientdata.demo.maritalstatus_expand_newvalue', 'clientdata.demo.maritalstatus_woe']
Training Features Shape: (103565, 406)
Training Labels Shape: (103565,)
Testing Features Shape: (25892, 406)
Testing Labels Shape: (25892,)
EBM Model training completed in 14.31 minutes


In [11]:
# Save results_dict_updated to a pickle file
with open('../pickle/4_Model_Optuna/results_dict_updated_5.pkl', 'wb') as f:
    pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)

In [13]:
#printing benchmark, iterative grid search and randomized search ROC AUC / accuracy scores (test data set)
print(f"Benchmark RandomForest - AUC/accuracy score: {np.round(results_dict['rf0']['test roc auc score'],4)} / {np.round(results_dict['rf0']['test_accuracy'],4)} @ {np.round(results_dict['rf0']['time_m'],2)} minutes")
print(f"Benchmark XGBoost - AUC/accuracy score: {np.round(results_dict['xgbc0']['test roc auc score'],4)} / {np.round(results_dict['xgbc0']['test_accuracy'],4)} @ {np.round(results_dict['xgbc0']['time_m'],2)} minutes")
print(f"XGBoost w/ Optuna - AUC/accuracy score: {np.round(results_dict['xgbc_optuna']['test roc auc score'],4)} / {np.round(results_dict['xgbc_optuna']['test_accuracy'],4)} @ {np.round(results_dict['xgbc_optuna']['time_m'],2)} minutes")
print(f"Optimized XGBoost w/ Optuna - AUC/accuracy score: {np.round(results_dict['xgbc_optimized']['test roc auc score'],4)} / {np.round(results_dict['xgbc_optimized']['test_accuracy'],4)} @ {np.round(results_dict['xgbc_optimized']['time_m'],2)} minutes")
print(f"EBM Default - AUC/accuracy score: {np.round(results_dict['ebm_default']['test roc auc score'],4)} / {np.round(results_dict['ebm_default']['test_accuracy'],4)} @ {np.round(results_dict['ebm_default']['time_m'],2)} minutes")


Benchmark RandomForest - AUC/accuracy score: 0.7037 / 0.6878 @ 1.81 minutes
Benchmark XGBoost - AUC/accuracy score: 0.723 / 0.6962 @ 0.43 minutes
XGBoost w/ Optuna - AUC/accuracy score: 0.7363 / 0.7041 @ 0.23 minutes
Optimized XGBoost w/ Optuna - AUC/accuracy score: 0.7363 / 0.7041 @ 1.07 minutes
EBM Default - AUC/accuracy score: 0.7302 / 0.7014 @ 14.31 minutes


## 5.2) Tuning

In [None]:
def tune_grid_EBM(param_grid, df, results_dict):
    """
    Grid-search tunes and trains an Explainable Boosting Machine (EBM),
    evaluates performance, and stores results in the given dictionary.

    Parameters:
    -----------
    param_grid : dict
        Grid of EBM init parameters to try. Keys are parameter names,
        values are lists of candidate values.
    df : DataFrame
        Dataset containing features and the target variable.
    results_dict : dict
        Dictionary to store model performance metrics.

    Returns:
    --------
    best_params : dict
        Best parameters found by grid search.
    results_dict : dict
        Updated results dictionary containing model evaluation metrics.
    global_importance : object
        Global feature importance from the tuned EBM model.
    local_importance : object
        Local feature importance from the tuned EBM model.
    """
    start_time = time.time()
    print("\nStarting grid-search hyperparameter tuning for EBM...")

    # split_data_4 must return X_train, X_test, y_train, y_test
    X_train, X_test, y_train, y_test = split_data_4(df)

    # generate all combinations
    grid = list(ParameterGrid(param_grid))
    print(f"Total parameter combinations to try: {len(grid)}")

    best_score = -np.inf
    best_params = None

    # evaluate each combination via 5-fold CV accuracy
    for i, params in enumerate(grid, 1):
        print(f"Trying combo {i}/{len(grid)}: {params}")
        ebm = ExplainableBoostingClassifier(**params)
        scores = cross_val_score(ebm, X_train, y_train, cv=5, scoring="accuracy")
        mean_score = scores.mean()
        print(f" → mean CV accuracy: {mean_score:.4f}")
        if mean_score > best_score:
            best_score = mean_score
            best_params = params

    print("Best params found:", best_params)
    print(f"Best CV accuracy: {best_score:.4f}")

    # train final model on full training data
    ebm_best = ExplainableBoostingClassifier(**best_params)
    ebm_best.fit(X_train, y_train)

    # get cross-val predictions on training set
    y_train_pred = cross_val_predict(ebm_best, X_train, y_train, cv=5)
    # predict on test set
    y_test_pred  = ebm_best.predict(X_test)
    y_test_proba = ebm_best.predict_proba(X_test)[:, 1]

    # compute metrics
    cfm_train    = confusion_matrix(y_train, y_train_pred)
    cfm_test     = confusion_matrix(y_test,  y_test_pred)
    acc_train    = accuracy_score(y_train, y_train_pred)
    acc_test     = accuracy_score(y_test,  y_test_pred)
    f1_train_1   = f1_score(y_train, y_train_pred, pos_label=1)
    f1_train_0   = f1_score(y_train, y_train_pred, pos_label=0)
    f1_test_1    = f1_score(y_test,  y_test_pred,  pos_label=1)
    f1_test_0    = f1_score(y_test,  y_test_pred,  pos_label=0)
    roc_auc_test = roc_auc_score(y_test, y_test_proba)

    # extract feature importances
    global_importance = ebm_best.explain_global().data()
    local_importance  = ebm_best.explain_local(X_test, y_test).data()

    total_time = (time.time() - start_time) / 60
    print(f"Grid‐search & training completed in {total_time:.2f} minutes")

    # store results
    results_dict["ebm_grid"] = {
        "classifier":             deepcopy(ebm_best),
        "cfm_train":              cfm_train,
        "cfm_test":               cfm_test,
        "train_accuracy":         acc_train,
        "test_accuracy":          acc_test,
        "train F1-score label 1": f1_train_1,
        "train F1-score label 0": f1_train_0,
        "test F1-score label 1":  f1_test_1,
        "test F1-score label 0":  f1_test_0,
        "test roc auc score":     roc_auc_test,
        "param_grid":             param_grid,
        "best_params":            best_params,
        "global_importance":      global_importance,
        "local_importance":       local_importance,
        "time_m":                 total_time
    }

    return best_params, results_dict, global_importance, local_importance