In [1]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import EarlyStopping
from keras_tuner import RandomSearch, BayesianOptimization, Hyperband
from sklearn.model_selection import train_test_split

df = pd.read_csv('loan_data.csv')
df

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,female,Master,71948.0,0,RENT,35000.0,PERSONAL,16.02,0.49,3.0,561,No,1
1,21.0,female,High School,12282.0,0,OWN,1000.0,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,25.0,female,High School,12438.0,3,MORTGAGE,5500.0,MEDICAL,12.87,0.44,3.0,635,No,1
3,23.0,female,Bachelor,79753.0,0,RENT,35000.0,MEDICAL,15.23,0.44,2.0,675,No,1
4,24.0,male,Master,66135.0,1,RENT,35000.0,MEDICAL,14.27,0.53,4.0,586,No,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44995,27.0,male,Associate,47971.0,6,RENT,15000.0,MEDICAL,15.66,0.31,3.0,645,No,1
44996,37.0,female,Associate,65800.0,17,RENT,9000.0,HOMEIMPROVEMENT,14.07,0.14,11.0,621,No,1
44997,33.0,male,Associate,56942.0,7,RENT,2771.0,DEBTCONSOLIDATION,10.02,0.05,10.0,668,No,1
44998,29.0,male,Bachelor,33164.0,4,RENT,12000.0,EDUCATION,13.23,0.36,6.0,604,No,1


# Data pre processing

In [3]:
X = df.drop(columns=['person_emp_exp','cb_person_cred_hist_length','loan_status'])
y = df['loan_status']

In [4]:
numerical_cols = X.select_dtypes(include = 'number').columns.tolist()
categorical_cols = X.drop(columns = numerical_cols).columns.tolist()

# Data splitting and scaling

In [7]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

data_transformer = ColumnTransformer(
    transformers=[
        ('num', MinMaxScaler(), numerical_cols),
        ('cat', OneHotEncoder(drop='first'), categorical_cols)
    ])

# Split into training (60%) and temp (40%)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)

# Split temp data into 50% validation, 50% test (which results in 20% validation, 20% test of full data)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Apply preprocessing (only fit on training data)
X_train_scaled = data_transformer.fit_transform(X_train)  # Fit only on training data
X_val_scaled = data_transformer.transform(X_val)          # Transform validation data
X_test_scaled = data_transformer.transform(X_test)

# Model building function

### Function with hyperparameter tuning for dropouts for each hidden layer, units, optimizer, learning rate, momentum

In [8]:
def build_model(hp):
    model = Sequential()

    # First hidden layer
    model.add(Dense(units=hp.Int('units_0', min_value=32, max_value=128, step=32),
                    activation=hp.Choice('activation', values=['relu', 'tanh', 'sigmoid']),
                    input_shape=(X_train_scaled.shape[1],)))

    # Dropout after first hidden layer
    model.add(Dropout(rate=hp.Float('dropout_0', min_value=0.0, max_value=0.5, step=0.1)))

    # Second hidden layer
    model.add(Dense(units=hp.Int('units_1', min_value=32, max_value=128, step=32),
                    activation=hp.Choice('activation', values=['relu', 'tanh', 'sigmoid'])))

    # Dropout after second hidden layer
    model.add(Dropout(rate=hp.Float('dropout_1', min_value=0.0, max_value=0.5, step=0.1)))

    # Output layer
    model.add(Dense(1, activation='sigmoid'))

    # Compile the model
    optimizer_choice = hp.Choice('optimizer', ['adam', 'sgd', 'rmsprop'])
    #adam
    if optimizer_choice == 'adam':
        optimizer = Adam(hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4]))
    #sgd with momentum with hyperparameter tuning
    elif optimizer_choice == 'sgd':
        optimizer = SGD(
            learning_rate=hp.Choice('sgd_lr', [1e-2, 1e-3, 1e-4]),
            momentum=hp.Float('sgd_momentum', min_value=0.0, max_value=0.9, step=0.1)
        )
    #rmsprop
    else:
        optimizer = RMSprop(hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4]))

    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])


    return model


# Model evaluation function (loss)

In [9]:
# Function to evaluate model on the test set and return loss
def evaluate_model(model, X_test, y_test):
    test_loss = model.evaluate(X_test, y_test, verbose=0)[0]  # Return only test loss
    return test_loss

Trials & Epochs

In [10]:
# Set number of trials (consistent across all methods)
max_trials = 20
max_epochs = 20

# Hyper parameter tuning with early stopping

## Hyperparameter tuning (Random Seaarh)

In [11]:
#early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Random Search with overwrite=True to force re-run
random_tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=max_trials,
    directory='random_search',
    project_name='loan_approval',
    overwrite=True
)

start_time = time.time()
#The search() method of Keras Tuner starts the hyperparameter tuning process, where it tries different combinations of hyperparameters
#and trains the model with those values.
random_tuner.search(X_train_scaled, y_train, epochs=max_epochs, validation_data=(X_val_scaled, y_val), callbacks=[early_stop], verbose=0)
random_search_time = time.time() - start_time

# Get best model and evaluate on the test set
random_search_best_model = random_tuner.get_best_models(num_models=1)[0]
random_search_test_loss = evaluate_model(random_search_best_model, X_test_scaled, y_test)
random_search_best_hyperparameters = random_tuner.get_best_hyperparameters(num_trials=1)[0].values

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


## Hyperparameter tuning (Bayesian optimization)

In [12]:
#early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Bayesian Optimization with overwrite=True
bayesian_tuner = BayesianOptimization(
    build_model,
    objective='val_loss',
    max_trials=max_trials,
    directory='bayesian_opt',
    project_name='loan_approval',
    overwrite=True
)

start_time = time.time()
bayesian_tuner.search(X_train_scaled, y_train, epochs=max_epochs, validation_data=(X_val_scaled, y_val), callbacks=[early_stop], verbose=0)
bayesian_search_time = time.time() - start_time

# Get best model and evaluate on the test set
bayesian_search_best_model = bayesian_tuner.get_best_models(num_models=1)[0]
bayesian_search_test_loss = evaluate_model(bayesian_search_best_model, X_test_scaled, y_test)
bayesian_search_best_hyperparameters = bayesian_tuner.get_best_hyperparameters(num_trials=1)[0].values

## Hyperparameter tuning (hyperband)

In [13]:
#early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Hyperband with overwrite=True
hyperband_tuner = Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=max_epochs,
    directory='hyperband',
    project_name='loan_approval',
    overwrite=True
)

start_time = time.time()
hyperband_tuner.search(X_train_scaled, y_train, validation_data=(X_val_scaled, y_val), callbacks=[early_stop], verbose=0)
hyperband_search_time = time.time() - start_time

# Get best model and evaluate on the test set
hyperband_search_best_model = hyperband_tuner.get_best_models(num_models=1)[0]
hyperband_search_test_loss = evaluate_model(hyperband_search_best_model, X_test_scaled, y_test)
hyperband_search_best_hyperparameters = hyperband_tuner.get_best_hyperparameters(num_trials=1)[0].values

  saveable.load_own_variables(weights_store.get(inner_path))
  saveable.load_own_variables(weights_store.get(inner_path))


# Hyperparameter tuning - overall best hyperparameters within different tuning method

In [14]:
# Adjust Pandas setting to display full DataFrame content
pd.set_option('display.max_colwidth', None)

# Collecting the results
results = {
    "Method": ["Random Search", "Bayesian Optimization", "Hyperband"],
    "Test Loss": [random_search_test_loss, bayesian_search_test_loss, hyperband_search_test_loss],
    "Time (seconds)": [random_search_time, bayesian_search_time, hyperband_search_time],
    "Best Hyperparameters": [
        random_search_best_hyperparameters,
        bayesian_search_best_hyperparameters,
        hyperband_search_best_hyperparameters
    ]
}

# Creating a DataFrame to display the results
results_df = pd.DataFrame(results)
print(results_df)

                  Method  Test Loss  Time (seconds)  \
0          Random Search   0.189407      899.210711   
1  Bayesian Optimization   0.186520      853.408742   
2              Hyperband   0.185415      614.121652   

                                                                                                                                                                                                                                                                                           Best Hyperparameters  
0                                                                                                                                 {'units_0': 96, 'activation': 'sigmoid', 'dropout_0': 0.2, 'units_1': 96, 'dropout_1': 0.2, 'optimizer': 'adam', 'learning_rate': 0.01, 'sgd_lr': 0.001, 'sgd_momentum': 0.1}  
1                                                                                                                                   {'units_0': 64, 'activation': 'relu

# Summary of bayesian tuner (best 10 trials)

In [15]:
bayesian_tuner.results_summary()

Results summary
Results in bayesian_opt/loan_approval
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 18 summary
Hyperparameters:
units_0: 64
activation: relu
dropout_0: 0.2
units_1: 96
dropout_1: 0.0
optimizer: adam
learning_rate: 0.001
sgd_lr: 0.001
sgd_momentum: 0.4
Score: 0.17761485278606415

Trial 06 summary
Hyperparameters:
units_0: 32
activation: relu
dropout_0: 0.0
units_1: 96
dropout_1: 0.0
optimizer: adam
learning_rate: 0.01
sgd_lr: 0.001
sgd_momentum: 0.30000000000000004
Score: 0.17965149879455566

Trial 16 summary
Hyperparameters:
units_0: 32
activation: relu
dropout_0: 0.0
units_1: 96
dropout_1: 0.1
optimizer: adam
learning_rate: 0.001
sgd_lr: 0.001
sgd_momentum: 0.6000000000000001
Score: 0.18234339356422424

Trial 12 summary
Hyperparameters:
units_0: 32
activation: tanh
dropout_0: 0.1
units_1: 32
dropout_1: 0.0
optimizer: adam
learning_rate: 0.01
sgd_lr: 0.01
sgd_momentum: 0.7000000000000001
Score: 0.18839086592197418

Trial 17 summary
Hyperparam

# Summary of hyperband tuner (best 10 trials)

In [16]:
hyperband_tuner.results_summary()

Results summary
Results in hyperband/loan_approval
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 0024 summary
Hyperparameters:
units_0: 96
activation: sigmoid
dropout_0: 0.1
units_1: 64
dropout_1: 0.30000000000000004
optimizer: adam
learning_rate: 0.01
sgd_lr: 0.0001
sgd_momentum: 0.0
tuner/epochs: 20
tuner/initial_epoch: 7
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 0019
Score: 0.1751154512166977

Trial 0016 summary
Hyperparameters:
units_0: 64
activation: sigmoid
dropout_0: 0.4
units_1: 32
dropout_1: 0.30000000000000004
optimizer: adam
learning_rate: 0.01
sgd_lr: 0.0001
sgd_momentum: 0.30000000000000004
tuner/epochs: 20
tuner/initial_epoch: 7
tuner/bracket: 2
tuner/round: 2
tuner/trial_id: 0013
Score: 0.18222105503082275

Trial 0017 summary
Hyperparameters:
units_0: 64
activation: relu
dropout_0: 0.30000000000000004
units_1: 96
dropout_1: 0.1
optimizer: sgd
learning_rate: 0.0001
sgd_lr: 0.01
sgd_momentum: 0.8
tuner/epochs: 20
tuner/initial_epoch: 7
tun

# Summary of random search (best 10 trials)

In [17]:
random_tuner.results_summary()

Results summary
Results in random_search/loan_approval
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 05 summary
Hyperparameters:
units_0: 96
activation: sigmoid
dropout_0: 0.2
units_1: 96
dropout_1: 0.2
optimizer: adam
learning_rate: 0.01
sgd_lr: 0.001
sgd_momentum: 0.1
Score: 0.1780054271221161

Trial 19 summary
Hyperparameters:
units_0: 64
activation: sigmoid
dropout_0: 0.2
units_1: 32
dropout_1: 0.2
optimizer: adam
learning_rate: 0.01
sgd_lr: 0.01
sgd_momentum: 0.7000000000000001
Score: 0.1780182123184204

Trial 01 summary
Hyperparameters:
units_0: 128
activation: sigmoid
dropout_0: 0.30000000000000004
units_1: 64
dropout_1: 0.1
optimizer: adam
learning_rate: 0.01
sgd_lr: 0.001
sgd_momentum: 0.5
Score: 0.17978033423423767

Trial 13 summary
Hyperparameters:
units_0: 64
activation: relu
dropout_0: 0.4
units_1: 64
dropout_1: 0.0
optimizer: rmsprop
learning_rate: 0.001
sgd_lr: 0.0001
sgd_momentum: 0.8
Score: 0.19463232159614563

Trial 08 summary
Hyperparamete

# Confusion Matrix and accuracy of all hyperparameter tuners used

In [18]:
from sklearn.metrics import confusion_matrix, accuracy_score

# Predict on the test set - bayesian search
y_pred_prob_bs = bayesian_search_best_model.predict(X_test_scaled)
y_pred_bs = (y_pred_prob_bs > 0.5).astype(int)

# Predict on the test set - random search
y_pred_prob_rs = random_search_best_model.predict(X_test_scaled)
y_pred_rs = (y_pred_prob_rs > 0.5).astype(int)

# Predict on the test set - hyperband search
y_pred_prob_hb = hyperband_search_best_model.predict(X_test_scaled)
y_pred_hb = (y_pred_prob_hb > 0.5).astype(int)

# Confusion Matrix - bayesian search
cm_bs = confusion_matrix(y_test, y_pred_bs)
print("Confusion Matrix - bayesian search:\n", cm_bs)

# Confusion Matrix - random search
cm_rs = confusion_matrix(y_test, y_pred_rs)
print("Confusion Matrix - random search:\n", cm_rs)

# Confusion Matrix - hyperband search
cm_hb = confusion_matrix(y_test, y_pred_hb)
print("Confusion Matrix - hyperband search:\n", cm_hb)

# Accuracy - bayesian search
accuracy_bs = accuracy_score(y_test, y_pred_bs)
print(f"Accuracy - bayesian search: {accuracy_bs}")

# Accuracy - random search
accuracy_rs = accuracy_score(y_test, y_pred_rs)
print(f"Accuracy - random search: {accuracy_rs}")

# Accuracy - hyperband search
accuracy_hb = accuracy_score(y_test, y_pred_hb)
print(f"Accuracy - hyperband search: {accuracy_hb}")

[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Confusion Matrix - bayesian search:
 [[6769  238]
 [ 513 1480]]
Confusion Matrix - random search:
 [[6787  220]
 [ 538 1455]]
Confusion Matrix - hyperband search:
 [[6780  227]
 [ 521 1472]]
Accuracy - bayesian search: 0.9165555555555556
Accuracy - random search: 0.9157777777777778
Accuracy - hyperband search: 0.9168888888888889


In [19]:
!ls

bayesian_opt  hyperband  loan_data.csv	random_search  sample_data


<h2>zipping all the training weights in different trials to download </h2>

In [20]:
!zip -r hyperparameter_tuning.zip bayesian_opt hyperband random_search


  adding: bayesian_opt/ (stored 0%)
  adding: bayesian_opt/loan_approval/ (stored 0%)
  adding: bayesian_opt/loan_approval/oracle.json (deflated 71%)
  adding: bayesian_opt/loan_approval/trial_12/ (stored 0%)
  adding: bayesian_opt/loan_approval/trial_12/trial.json (deflated 75%)
  adding: bayesian_opt/loan_approval/trial_12/checkpoint.weights.h5 (deflated 54%)
  adding: bayesian_opt/loan_approval/trial_12/build_config.json (stored 0%)
  adding: bayesian_opt/loan_approval/trial_13/ (stored 0%)
  adding: bayesian_opt/loan_approval/trial_13/trial.json (deflated 74%)
  adding: bayesian_opt/loan_approval/trial_13/checkpoint.weights.h5 (deflated 21%)
  adding: bayesian_opt/loan_approval/trial_13/build_config.json (stored 0%)
  adding: bayesian_opt/loan_approval/trial_17/ (stored 0%)
  adding: bayesian_opt/loan_approval/trial_17/trial.json (deflated 75%)
  adding: bayesian_opt/loan_approval/trial_17/checkpoint.weights.h5 (deflated 47%)
  adding: bayesian_opt/loan_approval/trial_17/build_conf