In [436]:
# Liberies
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from imblearn.over_sampling import SMOTE
import plotly.figure_factory as ff
import plotly.express as px
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Firts view of the main data

In [437]:
# Load the dataset and display the first 5 rows
loans = pd.read_csv("data/loans_welfordbank_es.csv")
loans.head(5)

Unnamed: 0,Loan_ID,Client_ID,Loan_Type,Principal_Amount,Interest_Rate,Loan_Term_Months,Start_Date,End_Date,Outstanding_Balance,Loan_Status,Default_Status,Created_At
0,1,4,Personal,1571.08,4.2,60,2021-10-11,2026-10-11,0.0,Cerrado,No,2021-10-04 03:23:40
1,2,8,Personal,1827.15,4.2,60,2024-06-07,2029-06-07,0.0,Cerrado,No,2024-05-31 10:32:13
2,3,9,Automóvil,27772.94,3.8,48,2021-05-05,2025-05-05,0.0,Cerrado,No,2021-04-30 21:35:57
3,4,9,Automóvil,42266.19,3.8,48,2020-02-22,2024-02-22,0.0,Cerrado,No,2020-02-15 16:00:17
4,5,9,Personal,43852.1,4.2,60,2021-04-02,2026-04-02,0.0,Cerrado,No,2021-03-30 10:58:54


In [438]:
# Display basic information about the dataset
loans.info()
loans.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1180 entries, 0 to 1179
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Loan_ID              1180 non-null   int64  
 1   Client_ID            1180 non-null   int64  
 2   Loan_Type            1180 non-null   object 
 3   Principal_Amount     1180 non-null   float64
 4   Interest_Rate        1180 non-null   float64
 5   Loan_Term_Months     1180 non-null   int64  
 6   Start_Date           1180 non-null   object 
 7   End_Date             1180 non-null   object 
 8   Outstanding_Balance  1180 non-null   float64
 9   Loan_Status          1180 non-null   object 
 10  Default_Status       1180 non-null   object 
 11  Created_At           1180 non-null   object 
dtypes: float64(3), int64(3), object(6)
memory usage: 110.8+ KB


Unnamed: 0,Loan_ID,Client_ID,Principal_Amount,Interest_Rate,Loan_Term_Months,Outstanding_Balance
count,1180.0,1180.0,1180.0,1180.0,1180.0,1180.0
mean,590.5,755.107627,88549.461627,3.88839,86.908475,28030.483559
std,340.780966,422.83796,127406.363167,1.541193,85.425692,84415.18188
min,1.0,4.0,526.12,1.5,12.0,0.0
25%,295.75,412.0,13380.6175,3.8,48.0,0.0
50%,590.5,737.0,34880.34,3.8,48.0,0.0
75%,885.25,1115.75,86059.1075,4.2,60.0,10057.1275
max,1180.0,1498.0,499522.65,6.0,240.0,498597.56


# Prepare the data for the model

In [439]:
# Encode target variable: 'Sí' -> 1, 'No' -> 0
y = loans['Default_Status'].map({'Sí': 1, 'No': 0})

# Select predictor variables
X = loans[['Principal_Amount', 'Interest_Rate', 'Loan_Term_Months', 'Loan_Type']]

In [440]:
# Create a DataFrame from the target variable
class_counts = y.value_counts().reset_index()
class_counts.columns = ['Class', 'Count']

# Plot using Plotly Express
fig = px.bar(
    class_counts,
    x='Class',
    y='Count',
    text='Count',
    title='Class Distribution in Default_Status',
    color='Class',
    color_discrete_sequence=['#1f77b4', '#ff7f0e']
)

fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')

fig.show()

We see that the objective class is desbalanced, we have 1134 values for Default and 46 values for Not default. We will try to fix these later.

In [441]:
# Define preprocessing for numeric and categorical features
numeric_features = ['Principal_Amount', 'Interest_Rate', 'Loan_Term_Months']
numeric_transformer = StandardScaler()

categorical_features = ['Loan_Type']
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer([
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_transformer, categorical_features)
])


In [442]:
# Apply preprocessing
X_preprocessed = preprocessor.fit_transform(X)

In [443]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_preprocessed, y, test_size=0.3, random_state=15, stratify=y
)

# Models

## Firts try of model using only loans_welfordbank_es.csv and balancing the objective class with smote

###  Why we apply SMOTE

The original dataset is highly imbalanced, with very few cases of loan defaults (`Default_Status = "Sí"`). This can cause the logistic regression model to become biased toward the majority class (`"No"`), ignoring defaults entirely.

To address this, we use **SMOTE** (Synthetic Minority Over-sampling Technique), which generates new synthetic examples of the minority class. This balances the training data and helps the model better learn to identify defaults, improving recall at the cost of some precision.

SMOTE is applied **only to the training set** to prevent introducing synthetic bias into the test evaluation.

In [444]:
# Apply SMOTE to training set
smote = SMOTE(random_state = 15)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)


In [445]:
# Train logistic regression on resampled data
model = LogisticRegression(max_iter=1000)
model.fit(X_train_resampled, y_train_resampled)

In [446]:
# Predict and evaluate on original test set
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.96      0.32      0.48       340
           1       0.04      0.71      0.08        14

    accuracy                           0.34       354
   macro avg       0.50      0.52      0.28       354
weighted avg       0.93      0.34      0.47       354



In [447]:
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred, labels=[0, 1])

# Define axis labels
labels = ["No Default", "Default"]

# Create heatmap with Plotly
fig = ff.create_annotated_heatmap(
    z=cm,
    x=labels,  # Predicted
    y=labels,  # Actual
    colorscale='Blues',
    showscale=True,
    annotation_text=[[str(cell) for cell in row] for row in cm],
    hoverinfo="z"
)

fig.update_layout(
    title="Confusion Matrix",
    xaxis_title="Predicted Label",
    yaxis_title="True Label"
)

fig.show()

### Model Performance Interpretation (Logistic Regression with SMOTE)

The logistic regression model trained on SMOTE-resampled data achieved the following results:

- **Precision for class 1 (defaults)** is **0.04**, meaning that only 4% of predicted defaults were correct.
- **Recall for class 1** is **0.50**, indicating that the model successfully identified 50% of actual defaults.
- **Precision for class 0 (non-defaults)** remains high at **0.96**, while recall drops to **0.51**, meaning many non-defaults were incorrectly flagged as defaults.
- **Overall accuracy** is **51%**, which reflects a significant trade-off after balancing the classes.

According to the confusion matrix:
- **7 true defaults** were correctly classified (True Positives).
- **7 true defaults** were missed and labeled as non-defaults (False Negatives).
- **168 non-defaults** were correctly classified (True Negatives).
- **172 non-defaults** were incorrectly predicted as defaults (False Positives).

This result demonstrates the classic trade-off of using SMOTE: it improves recall on the minority class but significantly reduces precision, leading to a high number of false positives and a drop in overall accuracy.



## Second try 

### Try to balance the class with the inversal propotion to class frecuency

In [448]:
# Calculate total number of observations in y_train
total_obs = len(y_train)

# Count number of samples per class
class_counts = y_train.value_counts().to_dict()

# Define class weights: inversely proportional to class frequency
weights_dynamic = {cls: total_obs / count for cls, count in class_counts.items()}
print("Class weights:", weights_dynamic)


Class weights: {0: 1.0403022670025188, 1: 25.8125}


In [449]:
# Train logistic regression using calculated weights
model_dynamic_weighted = LogisticRegression(max_iter=1000, class_weight=weights_dynamic)
model_dynamic_weighted.fit(X_train, y_train)

# Predict and evaluate on the test set
y_pred_dynamic = model_dynamic_weighted.predict(X_test)
print(classification_report(y_test, y_pred_dynamic))

              precision    recall  f1-score   support

           0       0.97      0.29      0.44       340
           1       0.04      0.79      0.08        14

    accuracy                           0.31       354
   macro avg       0.51      0.54      0.26       354
weighted avg       0.93      0.31      0.43       354



In [450]:
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred_dynamic, labels=[0, 1])

# Define axis labels
labels = ["No Default", "Default"]

# Create heatmap with Plotly
fig = ff.create_annotated_heatmap(
    z=cm,
    x=labels,  # Predicted
    y=labels,  # Actual
    colorscale='Blues',
    showscale=True,
    annotation_text=[[str(cell) for cell in row] for row in cm],
    hoverinfo="z"
)

fig.update_layout(
    title="Confusion Matrix",
    xaxis_title="Predicted Label",
    yaxis_title="True Label"
)

fig.show()

### Model Performance Interpretation (Logistic Regression with Inverse Class Frequency Weights)

The logistic regression model trained with class weights inversely proportional to class frequency achieved the following results:

- **Precision for class 1 (defaults)** is **0.02**, meaning that only 2% of predicted defaults were correct.
- **Recall for class 1** is **0.14**, indicating that the model successfully identified 14% of actual defaults.
- **Precision for class 0 (non-defaults)** is **0.95**, while recall drops to **0.73**, meaning a notable portion of non-defaults were incorrectly flagged as defaults.
- **Overall accuracy** is **71%**, which reflects a reasonable trade-off compared to earlier iterations.

According to the confusion matrix:
- **2 true defaults** were correctly classified (True Positives).
- **12 true defaults** were missed and labeled as non-defaults (False Negatives).
- **248 non-defaults** were correctly classified (True Negatives).
- **92 non-defaults** were incorrectly predicted as defaults (False Positives).

This result shows that weighting classes based on inverse frequency slightly improves recall for the minority class, but still results in a very low precision and a high number of false positives. The model gains some sensitivity at the cost of specificity.

## Model with  other csv

In [451]:
# Load credit history and create aggregated features per client
history = pd.read_csv("data/credit_history_welfordbank_es.csv")
history.head(5)

Unnamed: 0,History_ID,Client_ID,Type_Client,Transaction_ID,Credit_Amount,Payment_Amount,Outstanding_Balance,Interest_Rate,Payment_Status,Late_Payment_Fees,Transaction_Date,Payment_Due_Date,Last_Payment_Date,Credit_Score,Created_At
0,139b5b13-19f6-4a56-9ae3-1bee9d8819f2,1,Individual,1b0f67b1-f8a3-4c77-9a3d-c725a13171ec,37866.87,3233.83,18463.89,2.48,OVERDUE,369.28,2023-04-06,2023-05-06,2023-05-19,618,2025-05-15
1,bdf7eaad-6c2a-4309-85bd-7ea0e0f8331b,2,Individual,c604dafc-8a6d-47b7-8593-29d346a88d25,3042.94,274.02,1124.8,8.06,OVERDUE,22.5,2020-11-16,2020-12-16,2021-01-21,723,2025-05-15
2,b938f886-1953-4205-9588-06c10ec9b742,3,Individual,4b1398d5-cb36-47e6-b32e-1cbb7a34e5c3,24812.13,2278.17,6586.77,10.18,OVERDUE,131.74,2023-07-15,2023-08-14,2023-09-01,683,2025-05-15
3,acfc1335-7cb5-45e5-a1ac-4a27d879d841,4,Individual,563b0691-eeaa-44b9-94e3-adfb55fb60e5,18637.72,1614.03,2497.42,3.92,OVERDUE,49.95,2022-07-12,2022-08-11,2022-09-10,595,2025-05-15
4,a9300471-85ef-40e1-b442-a0aee8f0e69b,4,Individual,718a8cf5-4050-4e21-afe2-3fae6c17987d,46742.26,4038.14,42704.12,3.67,OVERDUE,854.08,2021-01-17,2021-02-16,2021-02-26,682,2025-05-15


In [452]:
# Display basic information about the dataset
history.info()
history.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1777 entries, 0 to 1776
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   History_ID           1777 non-null   object 
 1   Client_ID            1777 non-null   int64  
 2   Type_Client          1777 non-null   object 
 3   Transaction_ID       1777 non-null   object 
 4   Credit_Amount        1777 non-null   float64
 5   Payment_Amount       1777 non-null   float64
 6   Outstanding_Balance  1777 non-null   float64
 7   Interest_Rate        1777 non-null   float64
 8   Payment_Status       1777 non-null   object 
 9   Late_Payment_Fees    1777 non-null   float64
 10  Transaction_Date     1777 non-null   object 
 11  Payment_Due_Date     1777 non-null   object 
 12  Last_Payment_Date    1777 non-null   object 
 13  Credit_Score         1777 non-null   int64  
 14  Created_At           1777 non-null   object 
dtypes: float64(5), int64(2), object(8)
mem

Unnamed: 0,Client_ID,Credit_Amount,Payment_Amount,Outstanding_Balance,Interest_Rate,Late_Payment_Fees,Credit_Score
count,1777.0,1777.0,1777.0,1777.0,1777.0,1777.0,1777.0
mean,754.29713,25002.267535,2227.128216,11724.157321,6.899612,229.662347,579.593697
std,436.010567,13938.406098,1242.58702,11348.645098,2.850108,226.716783,157.092427
min,1.0,1056.67,91.74,0.0,2.02,0.0,300.0
25%,383.0,13018.28,1150.82,2366.93,4.48,40.78,446.0
50%,755.0,24747.58,2175.17,8046.16,6.79,156.38,581.0
75%,1130.0,36813.13,3278.21,18440.21,9.29,361.74,714.0
max,1500.0,49942.36,4625.89,49637.47,12.0,992.75,849.0


In [453]:
# Convert relevant columns to datetime
date_cols = ["Transaction_Date", "Payment_Due_Date", "Last_Payment_Date"]
history[date_cols] = history[date_cols].apply(pd.to_datetime)

In [454]:
# Days each payment was late (clip negatives to zero)
history["days_late"] = (
    history["Last_Payment_Date"] - history["Payment_Due_Date"]
).dt.days.clip(lower=0)

# Flag recent overdue payments (last 6 months)
cutoff = history["Transaction_Date"].max() - pd.Timedelta(days=180)
history["recent_overdue"] = (
    (history["Payment_Status"] == "OVERDUE") & (history["Transaction_Date"] >= cutoff)
).astype(int)


In [455]:
# Flag overdue for each row (1 = overdue, 0 = otherwise)
history["is_overdue"] = (history["Payment_Status"] == "OVERDUE").astype(int)

# Aggregate by Client_ID with only standard functions
agg = (
    history.groupby("Client_ID")
    .agg(
        credit_score         = ("Credit_Score", "last"),
        n_overdue_payments   = ("is_overdue", "sum"),
        avg_late_fee         = ("Late_Payment_Fees", "mean"),
        max_days_late        = ("days_late", "max"),
        has_recent_overdue   = ("recent_overdue", "max"),
    )
    .fillna(0)
    .reset_index()
)
#  Merge aggregated history with existing loans data
loans_history = loans.merge(agg, on="Client_ID", how="left").fillna(0)
loans_history.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1180 entries, 0 to 1179
Data columns (total 17 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Loan_ID              1180 non-null   int64  
 1   Client_ID            1180 non-null   int64  
 2   Loan_Type            1180 non-null   object 
 3   Principal_Amount     1180 non-null   float64
 4   Interest_Rate        1180 non-null   float64
 5   Loan_Term_Months     1180 non-null   int64  
 6   Start_Date           1180 non-null   object 
 7   End_Date             1180 non-null   object 
 8   Outstanding_Balance  1180 non-null   float64
 9   Loan_Status          1180 non-null   object 
 10  Default_Status       1180 non-null   object 
 11  Created_At           1180 non-null   object 
 12  credit_score         1180 non-null   float64
 13  n_overdue_payments   1180 non-null   float64
 14  avg_late_fee         1180 non-null   float64
 15  max_days_late        1180 non-null   f

In [456]:
# Define the list of new numerical features
new_num = [
    "credit_score",
    "n_overdue_payments",
    "avg_late_fee",
    "max_days_late",
    "has_recent_overdue",
]

# Update feature lists for preprocessing
num_cols = ["Principal_Amount", "Interest_Rate", "Loan_Term_Months"] + new_num
cat_cols = ["Loan_Type"]

In [457]:
# Build X and y
X = loans_history[num_cols + cat_cols]
y = loans_history["Default_Status"].map({"Sí": 1, "No": 0})

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=15, stratify=y
)


In [458]:
# Preprocess features
preprocessor = ColumnTransformer(
    [
        ("num", StandardScaler(), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
    ],
    sparse_threshold=0.0,
)

X_train_prep = preprocessor.fit_transform(X_train)
X_test_prep  = preprocessor.transform(X_test)

In [459]:
# Calculate dynamic class weights
total_obs    = len(y_train)
class_counts = y_train.value_counts().to_dict()
weights_dynamic = {cls: total_obs / cnt for cls, cnt in class_counts.items()}
print("Class weights:", weights_dynamic)


Class weights: {0: 1.0403022670025188, 1: 25.8125}


In [460]:
# Train logistic regression with these weights
model = LogisticRegression(max_iter=1000, class_weight=weights_dynamic)
model.fit(X_train_prep, y_train)

In [461]:
# Evaluate the model
y_pred_hist = model.predict(X_test_prep)
y_prob = model.predict_proba(X_test_prep)[:, 1]

print(classification_report(y_test, y_pred_hist, digits=3))

              precision    recall  f1-score   support

           0      0.958     0.468     0.628       340
           1      0.037     0.500     0.069        14

    accuracy                          0.469       354
   macro avg      0.498     0.484     0.349       354
weighted avg      0.921     0.469     0.606       354



In [462]:
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred_hist, labels=[0, 1])

# Define axis labels
labels = ["No Default", "Default"]

# Create heatmap with Plotly
fig = ff.create_annotated_heatmap(
    z=cm,
    x=labels,      # Predicted
    y=labels,      # Actual
    colorscale='Blues',
    showscale=True,
    annotation_text=[[str(cell) for cell in row] for row in cm],
    hoverinfo="z"
)

fig.update_layout(
    title="Confusion Matrix",
    xaxis_title="Predicted Label",
    yaxis_title="True Label"
)

fig.show()

### Model Performance Interpretation (Logistic Regression with Dynamic Weights + Credit History Features)

The logistic regression model trained with class weights inversely proportional to class frequency and enriched with aggregated credit history features achieved the following results:

- **Precision for class 1 (defaults)** is **0.037**, meaning that only 3.7% of predicted defaults were correct.  
- **Recall for class 1** is **0.50**, indicating that the model correctly identified 50% of actual defaults.  
- **Precision for class 0 (non-defaults)** is **0.958**, while recall drops to **0.468**, meaning many non-defaults were erroneously flagged as defaults.  
- **Overall accuracy** is **0.469**, reflecting the trade-off of prioritizing recall on the minority class.

According to the confusion matrix:
- **7 true defaults** were correctly classified (True Positives).  
- **7 true defaults** were missed and labeled as non-defaults (False Negatives).  
- **159 non-defaults** were correctly classified (True Negatives).  
- **181 non-defaults** were incorrectly predicted as defaults (False Positives).

Adding credit history features and dynamic weighting substantially improved the detection rate of defaulters (higher recall from 14% to 50%), but at the expense of a very high number of false alarms and lower overall accuracy. 

## Model with loan_metrics_welfordbank_es.csv

In [463]:
# Load loan_metrics 
metrics = pd.read_csv("data/loan_metrics_welfordbank_es.csv")
metrics.head(5)


Unnamed: 0,Loan_Metrics_ID,Date,Non_Performing_Loans,Total_Loans,Created_At
0,1,2020-01-01,0,0,2025-05-21 20:58:22.181717
1,2,2020-01-02,0,0,2025-05-21 20:58:22.182694
2,3,2020-01-03,0,1,2025-05-21 20:58:22.182694
3,4,2020-01-04,0,1,2025-05-21 20:58:22.183694
4,5,2020-01-05,0,2,2025-05-21 20:58:22.183694


In [464]:
# Display basic information about the dataset
history.info()
history.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1777 entries, 0 to 1776
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   History_ID           1777 non-null   object        
 1   Client_ID            1777 non-null   int64         
 2   Type_Client          1777 non-null   object        
 3   Transaction_ID       1777 non-null   object        
 4   Credit_Amount        1777 non-null   float64       
 5   Payment_Amount       1777 non-null   float64       
 6   Outstanding_Balance  1777 non-null   float64       
 7   Interest_Rate        1777 non-null   float64       
 8   Payment_Status       1777 non-null   object        
 9   Late_Payment_Fees    1777 non-null   float64       
 10  Transaction_Date     1777 non-null   datetime64[ns]
 11  Payment_Due_Date     1777 non-null   datetime64[ns]
 12  Last_Payment_Date    1777 non-null   datetime64[ns]
 13  Credit_Score         1777 non-nul

Unnamed: 0,Client_ID,Credit_Amount,Payment_Amount,Outstanding_Balance,Interest_Rate,Late_Payment_Fees,Transaction_Date,Payment_Due_Date,Last_Payment_Date,Credit_Score,days_late,recent_overdue,is_overdue
count,1777.0,1777.0,1777.0,1777.0,1777.0,1777.0,1777,1777,1777,1777.0,1777.0,1777.0,1777.0
mean,754.29713,25002.267535,2227.128216,11724.157321,6.899612,229.662347,2023-11-22 18:50:26.674169856,2023-12-22 18:50:26.674169856,2024-01-09 10:18:18.030388224,579.593697,17.939223,0.222847,0.888576
min,1.0,1056.67,91.74,0.0,2.02,0.0,2020-04-13 00:00:00,2020-05-13 00:00:00,2020-05-24 00:00:00,300.0,0.0,0.0,0.0
25%,383.0,13018.28,1150.82,2366.93,4.48,40.78,2023-02-12 00:00:00,2023-03-14 00:00:00,2023-03-29 00:00:00,446.0,7.0,0.0,1.0
50%,755.0,24747.58,2175.17,8046.16,6.79,156.38,2024-04-01 00:00:00,2024-05-01 00:00:00,2024-05-22 00:00:00,581.0,18.0,0.0,1.0
75%,1130.0,36813.13,3278.21,18440.21,9.29,361.74,2024-11-24 00:00:00,2024-12-24 00:00:00,2025-01-10 00:00:00,714.0,29.0,0.0,1.0
max,1500.0,49942.36,4625.89,49637.47,12.0,992.75,2025-05-13 00:00:00,2025-06-12 00:00:00,2025-07-17 00:00:00,849.0,40.0,1.0,1.0
std,436.010567,13938.406098,1242.58702,11348.645098,2.850108,226.716783,,,,157.092427,12.61906,0.416274,0.314745


In [465]:
# Tranform 'Date' column to datetime and extract date part
metrics["Date"] = pd.to_datetime(metrics["Date"]).dt.date

In [466]:
# avoid division by zero
metrics["npl_ratio"] = metrics["Non_Performing_Loans"] / metrics["Total_Loans"].replace(0, np.nan)
metrics["npl_ratio"] = metrics["npl_ratio"].fillna(0)

In [467]:
# Merge with loans_history on loan start date
loans_history["Start_Date"] = pd.to_datetime(loans_history["Start_Date"]).dt.date
loans_history_metrics = loans_history.merge(
    metrics[["Date", "Non_Performing_Loans", "Total_Loans", "npl_ratio"]],
    left_on="Start_Date", right_on="Date", how="left"
).fillna(0)

In [468]:
# Define the additional metric features
new_metrics = ["Non_Performing_Loans", "Total_Loans", "npl_ratio"]

# Extend numeric columns list
num_cols.extend(new_metrics)

In [469]:
# Build feature matrix X and target y
X = loans_history_metrics[num_cols + cat_cols]
y = loans_history_metrics["Default_Status"].map({"Sí": 1, "No": 0})

# Split into train and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=42, stratify=y
)

In [470]:
# Preprocess features
X_train_prep = preprocessor.fit_transform(X_train)
X_test_prep  = preprocessor.transform(X_test)

In [471]:
# Compute dynamic class weights
total_obs    = len(y_train)
class_counts = y_train.value_counts().to_dict()
weights_dynamic = {cls: total_obs / cnt for cls, cnt in class_counts.items()}
print("Class weights:", weights_dynamic)


Class weights: {0: 1.0403022670025188, 1: 25.8125}


In [472]:
# Train the logistic regression model with dynamic weights
model = LogisticRegression(max_iter=1000, class_weight=weights_dynamic)
model.fit(X_train_prep, y_train)


In [473]:
# Evaluate the updated model
y_pred = model.predict(X_test_prep)
y_prob = model.predict_proba(X_test_prep)[:, 1]

print(classification_report(y_test, y_pred, digits=3))

              precision    recall  f1-score   support

           0      0.967     0.609     0.747       340
           1      0.050     0.500     0.091        14

    accuracy                          0.605       354
   macro avg      0.509     0.554     0.419       354
weighted avg      0.931     0.605     0.721       354



In [474]:
# Plot confusion matrix with Plotly
cm = confusion_matrix(y_test, y_pred, labels=[0, 1])
labels = ["No Default", "Default"]

fig = ff.create_annotated_heatmap(
    z=cm,
    x=labels,
    y=labels,
    colorscale='Blues',
    showscale=True,
    annotation_text=[[str(cell) for cell in row] for row in cm],
    hoverinfo="z"
)

fig.update_layout(
    title="Confusion Matrix with Loan Metrics",
    xaxis_title="Predicted Label",
    yaxis_title="True Label"
)
fig.show()

### Model Performance Interpretation (Logistic Regression with Dynamic Weights + Credit History + Loan Metrics)

The logistic regression model trained with class weights inversely proportional to class frequency and enriched with both credit history and loan‐level metrics achieved the following results:

- **Precision for class 1 (defaults)** is **0.050**, meaning that only 5.0% of predicted defaults were correct.  
- **Recall for class 1** is **0.500**, indicating that the model correctly identified 50% of actual defaults.  
- **Precision for class 0 (non-defaults)** is **0.967**, while recall is **0.609**, meaning 60.9% of non-defaults were correctly classified.  
- **Overall accuracy** is **0.605**, reflecting a clear improvement over previous iterations.

According to the confusion matrix:
- **7 true defaults** were correctly classified (True Positives).  
- **7 true defaults** were missed and labeled as non-defaults (False Negatives).  
- **207 non-defaults** were correctly classified (True Negatives).  
- **133 non-defaults** were incorrectly predicted as defaults (False Positives).

This result shows that incorporating loan metrics alongside credit history helped boost overall accuracy and maintain the detection rate of defaulters, while significantly improving the classification of non-defaults. 

## Model with clients_welfordbank_es.csv

In [475]:
# Load client data
clients = pd.read_csv("data/clients_welfordbank_es.csv")
clients.head(5)

Unnamed: 0,Client_ID,Type_Client,Name,Surname,Date_Birth,Gender,Address,City,Province,Country,Phone,Email,Registration_Date,Customer_Segment,Customer_Lifetime_Value,Status
0,1,Individual,Fausto,Haro Alegria,1938-05-19,Hombre,"Plaza Azahar Arce 6, Pamplona",Pamplona,Navarra,España,+34 845 531 575,fausto.haro@hotmail.com,2021-07-04,Platino,28793.37,Inactivo
1,2,Individual,Sandra,Padilla Millán,1950-09-04,Mujer,"Acceso de Jenaro Fábregas 123, Terrassa",Terrassa,Barcelona,España,+34 983639767,sandra.padilla@outlook.com,2020-06-03,Plata,11689.42,Activo
2,3,Individual,Joaquina,Sanjuan Quirós,1991-01-30,Hombre,"Pasaje de Clotilde Miró 106, Manacor",Manacor,Illes Balears,España,+34 820703282,joaquina.sanjuan@outlook.com,2023-04-19,Bronce,14210.9,Activo
3,4,Individual,Evaristo,Cabo Carnero,1963-03-25,Mujer,"Cañada Benita Uría 41, Vilalba",Vilalba,Lugo,España,+34821 784 683,evaristo.cabo@welfordbank.es,2020-12-17,Plata,19585.72,Inactivo
4,5,Individual,María,Córdoba Monreal,1962-08-11,Mujer,"Avenida de Joel Menéndez 208, Ceuta",Ceuta,Ceuta,España,+34 977571472,maria.cordoba@gmail.com,2020-08-14,Oro,9210.93,Activo


In [476]:
# Display basic information about the dataset
clients.info()
clients.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 16 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Client_ID                1500 non-null   int64  
 1   Type_Client              1500 non-null   object 
 2   Name                     1500 non-null   object 
 3   Surname                  1207 non-null   object 
 4   Date_Birth               1500 non-null   object 
 5   Gender                   1207 non-null   object 
 6   Address                  1500 non-null   object 
 7   City                     1500 non-null   object 
 8   Province                 1500 non-null   object 
 9   Country                  1500 non-null   object 
 10  Phone                    1500 non-null   object 
 11  Email                    1500 non-null   object 
 12  Registration_Date        1500 non-null   object 
 13  Customer_Segment         1500 non-null   object 
 14  Customer_Lifetime_Value 

Unnamed: 0,Client_ID,Customer_Lifetime_Value
count,1500.0,1500.0
mean,750.5,20324.32516
std,433.157015,8041.537222
min,1.0,-4551.53
25%,375.75,14930.5625
50%,750.5,20051.6
75%,1125.25,25890.69
max,1500.0,45531.36


In [477]:
# Derive 'age' feature
clients["Date_Birth"] = pd.to_datetime(clients["Date_Birth"], dayfirst=True)
reference_date = pd.to_datetime("2025-05-25")
clients["age"] = (reference_date - clients["Date_Birth"]).dt.days // 365





In [478]:
# Merge client info into our loan + history + metrics table
loans_full = loans_history_metrics.merge(
    clients[[
        "Client_ID",
        "Customer_Lifetime_Value",
        "age",
        "Type_Client",
        "Gender",
        "Province",
        "Customer_Segment"
    ]],
    on="Client_ID",
    how="left"
).fillna(0)

In [479]:
# Add new client features to our column lists
new_client_num = ["Customer_Lifetime_Value", "age"]
new_client_cat = ["Type_Client", "Gender", "Province", "Customer_Segment"]

num_cols.extend(new_client_num)
cat_cols.extend(new_client_cat)

In [480]:
# Build X and y using the extended feature set
X = loans_full[num_cols + cat_cols]
y = loans_full["Default_Status"].map({"Sí": 1, "No": 0})

In [481]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=42, stratify=y
)

# Make sure all categorical columns are uniformly strings
X_train[cat_cols] = X_train[cat_cols].astype(str)
X_test[cat_cols]  = X_test[cat_cols].astype(str)

# Preprocess features
X_train_prep = preprocessor.fit_transform(X_train)
X_test_prep  = preprocessor.transform(X_test)


In [482]:
# Compute dynamic class weights
total_obs    = len(y_train)
class_counts = y_train.value_counts().to_dict()
weights_dynamic = {cls: total_obs / cnt for cls, cnt in class_counts.items()}
print("Class weights:", weights_dynamic)

Class weights: {0: 1.0403022670025188, 1: 25.8125}


In [483]:
model = LogisticRegression(max_iter=1000, class_weight=weights_dynamic)
model.fit(X_train_prep, y_train)

In [484]:
y_pred = model.predict(X_test_prep)
y_prob = model.predict_proba(X_test_prep)[:, 1]

print(classification_report(y_test, y_pred, digits=3))

              precision    recall  f1-score   support

           0      0.954     0.726     0.825       340
           1      0.021     0.143     0.037        14

    accuracy                          0.703       354
   macro avg      0.487     0.435     0.431       354
weighted avg      0.917     0.703     0.794       354



In [485]:
# Plot updated confusion matrix
cm = confusion_matrix(y_test, y_pred, labels=[0, 1])
labels_plot = ["No Default", "Default"]
fig = ff.create_annotated_heatmap(
    z=cm,
    x=labels_plot,
    y=labels_plot,
    colorscale='Blues',
    showscale=True,
    annotation_text=[[str(cell) for cell in row] for row in cm],
    hoverinfo="z"
)
fig.update_layout(
    title="Confusion Matrix with Client Features",
    xaxis_title="Predicted Label",
    yaxis_title="True Label"
)
fig.show()

### Model Performance Interpretation (Logistic Regression with Dynamic Weights + Credit History + Loan Metrics + Client Features)

The logistic regression model trained with class weights inversely proportional to class frequency and enriched with credit history, loan‐level metrics, and client features achieved the following results:

- **Precision for class 1 (defaults)** is **0.021**, meaning that only 2.1% of predicted defaults were correct.  
- **Recall for class 1** is **0.143**, indicating that the model identified 14.3% of actual defaults.  
- **Precision for class 0 (non-defaults)** is **0.954**, while recall is **0.726**, meaning 72.6% of non-defaults were correctly classified.  
- **Overall accuracy** is **0.703**, reflecting a significant improvement in correctly classifying both classes.

According to the confusion matrix:
- **2 true defaults** were correctly classified (True Positives).  
- **12 true defaults** were missed and labeled as non-defaults (False Negatives).  
- **247 non-defaults** were correctly classified (True Negatives).  
- **93 non-defaults** were incorrectly predicted as defaults (False Positives).

This shows that adding client features alongside credit history and loan metrics boosted overall accuracy (from 0.605 to 0.703) and improved non-default recall, but precision on the minority class remains very low.

## Neuronal Networks

In [496]:
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

# a) Build the model
model_tf = models.Sequential([
    layers.Input(shape=(X_train_prep.shape[1],)),
    layers.Dense(64, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(16, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(1, activation="sigmoid")
])

model_tf.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["Precision", "Recall", "AUC"]
)

# b) Fit with class weights
history = model_tf.fit(
    X_train_prep, y_train,
    validation_split=0.25,
    epochs=100,
    batch_size=32,
    class_weight=weights_dynamic,
    callbacks=[callbacks.EarlyStopping(patience=15, restore_best_weights=True)]
)

# c) Evaluate
evals = model_tf.evaluate(X_test_prep, y_test, verbose=0)
print(dict(zip(model_tf.metrics_names, evals)))

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - AUC: 0.5178 - Precision: 0.0353 - Recall: 0.1297 - loss: 1.5791 - val_AUC: 0.5685 - val_Precision: 0.0000e+00 - val_Recall: 0.0000e+00 - val_loss: 0.4189
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - AUC: 0.4309 - Precision: 0.0000e+00 - Recall: 0.0000e+00 - loss: 1.7656 - val_AUC: 0.5574 - val_Precision: 0.0000e+00 - val_Recall: 0.0000e+00 - val_loss: 0.4456
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - AUC: 0.5423 - Precision: 0.0393 - Recall: 0.1012 - loss: 1.8315 - val_AUC: 0.5462 - val_Precision: 0.0000e+00 - val_Recall: 0.0000e+00 - val_loss: 0.4743
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - AUC: 0.5367 - Precision: 0.0098 - Recall: 0.0485 - loss: 1.3560 - val_AUC: 0.5287 - val_Precision: 0.0000e+00 - val_Recall: 0.0000e+00 - val_loss: 0.4944
Epoch 5/100
[1m20/20[0m 