In [1]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from keras_tuner import RandomSearch
from sklearn.metrics import classification_report
import sqlite3

In [2]:
conn = sqlite3.connect("credit_risk.db")

In [4]:
query = "SELECT * FROM credit_risk_personal_loans;"
preprocessed_credit_risk_df = pd.read_sql_query(query, conn)

preprocessed_credit_risk_df.head()

Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_cred_hist_length,person_home_ownership_OTHER,person_home_ownership_OWN,person_home_ownership_RENT,loan_grade_B,loan_grade_C,loan_grade_D,loan_grade_E,loan_grade_F,loan_grade_G,cb_person_default_on_file_Y
0,22,59000,50.0,35000,16.02,1,0.59,3,0,0,1,0,0,1,0,0,0,1
1,24,83000,8.0,35000,8.9,1,0.42,2,0,0,1,0,0,0,0,0,0,0
2,25,137000,9.0,34800,16.77,0,0.25,2,0,0,1,0,0,0,1,0,0,1
3,24,10980,0.0,1500,7.29,0,0.14,3,0,1,0,0,0,0,0,0,0,0
4,22,80000,3.0,33950,14.54,1,0.42,4,0,0,1,0,0,1,0,0,0,1


In [5]:
conn.close()

In [6]:
# Split features and target
X = preprocessed_credit_risk_df.drop("loan_status", axis=1)
y = preprocessed_credit_risk_df["loan_status"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

In [7]:
# Build model function for Keras Tuner
def build_model(hp):
    model = keras.Sequential()
    model.add(keras.layers.Input(shape=(X_train.shape[1],)))

    # Tune number of layers
    for i in range(hp.Int("num_layers", 1, 3)):
        model.add(keras.layers.Dense(
            units=hp.Int(f"units_{i}", min_value=16, max_value=128, step=16),
            activation=hp.Choice("activation", ["relu", "tanh"])
        ))
    model.add(keras.layers.Dense(1, activation="sigmoid"))  # Binary output

    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Float("learning_rate", 1e-4, 1e-2, sampling="log")),
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )
    return model


In [8]:
# Set up tuner
tuner = RandomSearch(
    build_model,
    objective="val_accuracy",
    max_trials=10,
    executions_per_trial=1,
    directory="credit_risk_tuning",
    project_name="personal_loans"
)

In [9]:
# Run search
tuner.search(X_train, y_train, epochs=20, validation_split=0.2, verbose=1)

Trial 10 Complete [00h 00m 03s]
val_accuracy: 0.8309859037399292

Best val_accuracy So Far: 0.8463508486747742
Total elapsed time: 00h 00m 29s


In [10]:
# Retrieve best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Print best hyperparameter values
print("Best Hyperparameters:")
for param, value in best_hps.values.items():
    print(f"{param}: {value}")


Best Hyperparameters:
num_layers: 1
units_0: 48
activation: relu
learning_rate: 0.0017530477099128544
units_1: 32
units_2: 64


In [11]:
# Get best model
best_model = tuner.get_best_models(num_models=1)[0]

# Evaluate
y_pred = (best_model.predict(X_test) > 0.5).astype("int32")
print(classification_report(y_test, y_pred))

[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
              precision    recall  f1-score   support

           0       0.85      0.98      0.91       783
           1       0.77      0.32      0.45       193

    accuracy                           0.85       976
   macro avg       0.81      0.65      0.68       976
weighted avg       0.84      0.85      0.82       976



  saveable.load_own_variables(weights_store.get(inner_path))


In [12]:


def build_model_2(hp):
    model = keras.Sequential()
    model.add(keras.layers.Input(shape=(X_train.shape[1],)))

    # Tune number of layers (2 to 5)
    for i in range(hp.Int("num_layers", 2, 5)):
        model.add(keras.layers.Dense(
            units=hp.Int(f"units_{i}", min_value=32, max_value=256, step=32),
            activation=hp.Choice("activation", ["relu", "tanh"])
        ))
        # Add dropout after each dense layer
        model.add(keras.layers.Dropout(
            hp.Float(f"dropout_{i}", min_value=0.1, max_value=0.5, step=0.1)
        ))

    # Output layer for binary classification
    model.add(keras.layers.Dense(1, activation="sigmoid"))

    # Tune optimizer
    optimizer = hp.Choice("optimizer", ["adam", "rmsprop", "nadam"])

    model.compile(
        optimizer=optimizer,
        loss="binary_crossentropy",
        metrics=["accuracy", keras.metrics.AUC(name="auc")]
    )

    return model


In [13]:
from keras_tuner import RandomSearch

tuner = RandomSearch(
    build_model_2,
    objective='val_accuracy',
    max_trials=50,
    executions_per_trial=1,
    directory='new_credit_risk_tuning',
    project_name='personal_loans_v2'
)



In [14]:

from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=5)

tuner.search(
    X_train, y_train,
    epochs=50,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)


Trial 50 Complete [00h 00m 05s]
val_accuracy: 0.800256073474884

Best val_accuracy So Far: 0.8437899947166443
Total elapsed time: 00h 03m 25s


In [15]:
# Retrieve best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Print best hyperparameter values
print("Best Hyperparameters:")
for param, value in best_hps.values.items():
    print(f"{param}: {value}")


Best Hyperparameters:
num_layers: 4
units_0: 96
activation: tanh
dropout_0: 0.4
units_1: 128
dropout_1: 0.4
optimizer: rmsprop
units_2: 32
dropout_2: 0.1
units_3: 32
dropout_3: 0.1


In [16]:
# Get best model
best_model_2 = tuner.get_best_models(num_models=1)[0]

# Evaluate
y_pred = (best_model_2.predict(X_test) > 0.5).astype("int32")
print(classification_report(y_test, y_pred))

[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
              precision    recall  f1-score   support

           0       0.86      0.97      0.91       783
           1       0.78      0.36      0.49       193

    accuracy                           0.85       976
   macro avg       0.82      0.67      0.70       976
weighted avg       0.84      0.85      0.83       976



  saveable.load_own_variables(weights_store.get(inner_path))
