In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import shap
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix,
    classification_report,
    roc_curve,
    precision_recall_curve,
)


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
#load the data
train_df = pd.read_csv("Tables/bnpl_train.csv")
test_df = pd.read_csv("Tables/bnpl_test.csv")
val_df = pd.read_csv("Tables/bnpl_val.csv")
target_col = "default_flag"

In [4]:
# Quick data checks
print(train_df.shape, test_df.shape, val_df.shape)
print(train_df.head(), test_df.head(), val_df.head())
print("Train nulls in target:", train_df["default_flag"].isna().sum())
print("Train class distribution:\n", train_df["default_flag"].value_counts(normalize=True))

(595, 14) (199, 14) (199, 14)
   external_repayment_loans  credit_card_interest_incidence  default_flag  \
0                         1                               0             0   
1                         0                               0             0   
2                         0                               0             0   
3                         0                               1             0   
4                         1                               1             0   

   bnpl_usage_frequency  financial_stress_score  credit_limit_utilisation  \
0             -0.730258               -1.246089                 -1.746734   
1              1.319109                0.529472                 -0.222965   
2              0.408279                1.239697                  0.805578   
3             -1.641088                1.594809                 -0.184871   
4              0.408279                0.529472                 -1.556263   

   payment_delinquency_count  impulsive_buyi

In [5]:
# Separate features and target for training set
X_train = train_df.drop(columns=[target_col])
y_train = train_df[target_col]
print("X_train shape:", X_train.shape, X_train.head())
print("y_train shape:", y_train.shape, y_train.head())



X_train shape: (595, 13)    external_repayment_loans  credit_card_interest_incidence  \
0                         1                               0   
1                         0                               0   
2                         0                               0   
3                         0                               1   
4                         1                               1   

   bnpl_usage_frequency  financial_stress_score  credit_limit_utilisation  \
0             -0.730258               -1.246089                 -1.746734   
1              1.319109                0.529472                 -0.222965   
2              0.408279                1.239697                  0.805578   
3             -1.641088                1.594809                 -0.184871   
4              0.408279                0.529472                 -1.556263   

   payment_delinquency_count  impulsive_buying_score  \
0                  -1.500097               -0.528311   
1                  -1

In [6]:
# Separate features and target for test set
X_test = test_df.drop(columns=[target_col])
y_test = test_df[target_col]
print("X_test shape:", X_test.shape, X_test.head())
print("y_test shape:", y_test.shape, y_test.head())

X_test shape: (199, 13)    external_repayment_loans  credit_card_interest_incidence  \
0                         0                               0   
1                         0                               1   
2                         1                               0   
3                         0                               0   
4                         0                               0   

   bnpl_usage_frequency  financial_stress_score  credit_limit_utilisation  \
0              0.180572               -0.890977                  0.005600   
1             -1.413380                1.594809                  1.643651   
2              1.546817                0.174360                 -1.670546   
3             -1.413380               -0.180752                 -1.327698   
4             -0.047135                0.529472                  1.567463   

   payment_delinquency_count  impulsive_buying_score  \
0                   1.462180                0.504892   
1                  -0.

In [7]:
# Separate features and target for validation set
X_val = val_df.drop(columns=[target_col])
y_val = val_df[target_col]
print("X_val shape:", X_val.shape, X_val.head())
print("y_val shape:", y_val.shape, y_val.head())

X_val shape: (199, 13)    external_repayment_loans  credit_card_interest_incidence  \
0                         0                               1   
1                         0                               0   
2                         0                               0   
3                         1                               0   
4                         0                               1   

   bnpl_usage_frequency  financial_stress_score  credit_limit_utilisation  \
0             -0.502550                1.239697                  0.272259   
1              0.408279               -0.535864                 -1.670546   
2              0.635987               -0.890977                  0.043694   
3             -0.730258                0.884585                 -0.108683   
4              0.635987                0.529472                 -0.337248   

   payment_delinquency_count  impulsive_buying_score  \
0                  -0.907642                0.160491   
1                   1.4

In [8]:
# check the feature names and target distribution
print("Features:", X_train.columns.tolist())
print("Train target balance:\n", y_train.value_counts(normalize=True))

Features: ['external_repayment_loans', 'credit_card_interest_incidence', 'bnpl_usage_frequency', 'financial_stress_score', 'credit_limit_utilisation', 'payment_delinquency_count', 'impulsive_buying_score', 'financial_literacy_assessment', 'debt_accumulation_metric', 'return_dispute_incidents', 'demographic_risk_factor', 'bnpl_debt_ratio', 'stress_usage_interaction']
Train target balance:
 default_flag
0    0.922689
1    0.077311
Name: proportion, dtype: float64


Balanced Model

In [9]:
# Fit a baseline Logistic Regression model
baseline_model = LogisticRegression(random_state=42, max_iter=1000)
baseline_model.fit(X_train, y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'lbfgs'
,max_iter,1000


In [10]:
# Evaluate on the validation set
y_val_pred   = baseline_model.predict(X_val)
y_val_proba  = baseline_model.predict_proba(X_val)[:, 1]

In [11]:
# Print baseline evaluation metrics
print("accuracy:", accuracy_score(y_val, y_val_pred))
print("precision:", precision_score(y_val, y_val_pred))
print("recall:", recall_score(y_val, y_val_pred))
print("f1_score:", f1_score(y_val, y_val_pred))
print("roc_auc:", roc_auc_score(y_val, y_val_proba))
baseline_cm = confusion_matrix(y_val, y_val_pred)
print("Confusion Matrix:")
print(baseline_cm)

accuracy: 0.9547738693467337
precision: 0.8181818181818182
recall: 0.5625
f1_score: 0.6666666666666666
roc_auc: 0.9497950819672131
Confusion Matrix:
[[181   2]
 [  7   9]]


Model

In [12]:
def best_f1_threshold(y_true, y_score):
    """
    y_true: 1D array of true binary labels {0,1}
    y_score: 1D array of predicted probabilities for the positive class
    Returns: threshold, f1_at_threshold, precision, recall
    """
    # precision_recall_curve gives P/R for increasing thresholds; the first P/R has no threshold
    precision, recall, thresholds = precision_recall_curve(y_true, y_score)
    f1 = 2 * precision[1:] * recall[1:] / (precision[1:] + recall[1:] + 1e-12)
    # argmax; break ties by preferring higher recall (lower threshold)
    ix = np.argmax(np.vstack([f1, recall[1:]]).T, axis=0)[0]
    return thresholds[ix], f1[ix], precision[ix+1], recall[ix+1]



In [13]:
# Fit the model with automatic balancing
balanced_model = LogisticRegression(
    class_weight="balanced",
    random_state=42,
    max_iter=1000
)
balanced_model.fit(X_train, y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,42
,solver,'lbfgs'
,max_iter,1000


In [14]:
probs = balanced_model.predict_proba(X_val)[:, 1]    # your trained LogisticRegression
thr, f1, prec, rec = best_f1_threshold(y_val, probs)
print(f"Best threshold={thr:.4f}, F1={f1:.4f} (P={prec:.4f}, R={rec:.4f})")

Best threshold=0.9319, F1=0.6897 (P=0.7692, R=0.6250)


In [15]:
y_pred_val = (probs >= thr).astype(int)
#print classification metrics at the best F1 threshold
print("Confusion Matrix at best F1 threshold:\n", confusion_matrix(y_val, y_pred_val))
print("Accuracy at best F1 threshold:", accuracy_score(y_val, y_pred_val))
print("Precision at best F1 threshold:", precision_score(y_val, y_pred_val))
print("Recall at best F1 threshold:", recall_score(y_val, y_pred_val))
print("F1 Score at best F1 threshold:", f1_score(y_val, y_pred_val))
print("ROC AUC at best F1 threshold:", roc_auc_score(y_val, probs))
print("\nClassification Report at best F1 threshold:")
print(classification_report(y_val, y_pred_val))

Confusion Matrix at best F1 threshold:
 [[179   4]
 [  6  10]]
Accuracy at best F1 threshold: 0.949748743718593
Precision at best F1 threshold: 0.7142857142857143
Recall at best F1 threshold: 0.625
F1 Score at best F1 threshold: 0.6666666666666666
ROC AUC at best F1 threshold: 0.9566256830601093

Classification Report at best F1 threshold:
              precision    recall  f1-score   support

           0       0.97      0.98      0.97       183
           1       0.71      0.62      0.67        16

    accuracy                           0.95       199
   macro avg       0.84      0.80      0.82       199
weighted avg       0.95      0.95      0.95       199



In [16]:
probs_test = balanced_model.predict_proba(X_test)[:, 1]
y_pred_test = (probs_test >= thr).astype(int)
# Print test set metrics at the best F1 threshold from validation
print("Confusion Matrix on Test Set at best F1 threshold:\n", confusion_matrix(y_test, y_pred_test))
print("Accuracy on Test Set at best F1 threshold:", accuracy_score(y_test, y_pred_test))
print("Precision on Test Set at best F1 threshold:", precision_score(y_test, y_pred_test))
print("Recall on Test Set at best F1 threshold:", recall_score(y_test, y_pred_test))
print("F1 Score on Test Set at best F1 threshold:", f1_score(y_test, y_pred_test))
print("ROC AUC on Test Set at best F1 threshold:", roc_auc_score(y_test, probs_test))

Confusion Matrix on Test Set at best F1 threshold:
 [[179   4]
 [  9   7]]
Accuracy on Test Set at best F1 threshold: 0.9346733668341709
Precision on Test Set at best F1 threshold: 0.6363636363636364
Recall on Test Set at best F1 threshold: 0.4375
F1 Score on Test Set at best F1 threshold: 0.5185185185185185
ROC AUC on Test Set at best F1 threshold: 0.9289617486338798


In [None]:
#print the best features that explain default
feature_names = X_train.columns
coefficients = balanced_model.coef_[0]
feature_importance = pd.DataFrame({
    'Feature': feature_names,
    'Coefficient': coefficients,
    'Abs_Coefficient': np.abs(coefficients)
}).sort_values(by='Abs_Coefficient', ascending=False)
feature_importance.head(6).to_csv("Results/logreg_feature_importance.csv", index=False)
print(feature_importance.head(6))

                      Feature  Coefficient  Abs_Coefficient
2        bnpl_usage_frequency     2.318099         2.318099
5   payment_delinquency_count     2.044539         2.044539
3      financial_stress_score     1.102664         1.102664
11            bnpl_debt_ratio     0.870560         0.870560
4    credit_limit_utilisation     0.672772         0.672772
12   stress_usage_interaction     0.586373         0.586373


In [None]:
# Save all results to a JSON file
fpr, tpr, thresholds = roc_curve(y_test, y_pred_test)
results = {
    "threshold": thr,
    "confusion_matrix": confusion_matrix(y_test, y_pred_test).tolist(),  # convert to list for saving
    "accuracy": accuracy_score(y_test, y_pred_test),
    "precision": precision_score(y_test, y_pred_test),
    "recall": recall_score(y_test, y_pred_test),
    "f1_score": f1_score(y_test, y_pred_test),
    "roc_auc": roc_auc_score(y_test, probs),
    "fpr": fpr.tolist(),
    "tpr": tpr.tolist(),
    "roc_thresholds": thresholds.tolist(),
}
with open("Results/LR_model_results.json", "w") as f:
    json.dump(results, f)