In [34]:
! pip install hyperas
! pip install hyperopt



In [35]:
# Standard Imports
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import log_loss

import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import Input, Dense, Dropout, Activation

from hyperopt import tpe, STATUS_OK, Trials
from hyperas import optim
from hyperas.distributions import choice, uniform

## Let's create a function which processes the data in such a way that that the data is directly fed into the model

In [36]:
def data():  
  global test
  # Importing data
  train = pd.read_csv("/content/drive/Othercomputers/My Laptop (1)/Programming Languages/Python/Projects/loan-defaulter/data/train.csv")
  test = pd.read_csv("/content/drive/Othercomputers/My Laptop (1)/Programming Languages/Python/Projects/loan-defaulter/data/test.csv")

  # Dropping unnecessary columns in both test and training sets
  train = train.drop("ID", axis=1)
  test = test.drop(["Loan Status", "ID"], axis=1)

  # Let's split the data into features and target variables
  X = train.drop("Loan Status", axis=1)
  y = train["Loan Status"]

  # Splitting the data into training and validation sets
  X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

  # Converting non- numerical columns into numerica Columns
  for label, content in X_train.items():
    if not pd.api.types.is_numeric_dtype(content):
      encoder = LabelEncoder()
      X_train[label] = encoder.fit_transform(X_train[label])
      X_valid[label] = encoder.transform(X_valid[label])
      test[label] = encoder.transform(test[label])

  # Now we scale the data
  scaler = StandardScaler()
  X_train = scaler.fit_transform(X_train)
  X_valid = scaler.transform(X_valid)
  test = scaler.transform(test)

  return X_train, y_train, X_valid, y_valid

# Calling our function
X_train, y_train, X_valid, y_valid = data()

## Let's create our model

In [4]:
def model():
  # Set random seed
  tf.random.set_seed(42)

  # Create a model
  model = Sequential()
  model.add(Input(shape=X_train[0].shape))
  model.add(Dense({{choice([20, 30, 40])}}))
  model.add(Activation("relu"))
  model.add(Dropout({{uniform(0, 1)}}))
  model.add(Dense({{choice([10, 20, 30])}}))
  model.add(Activation("relu"))
  model.add(Dropout({{uniform(0, 1)}}))

  # Let's add another layer according to choice
  if {{choice(["a", "b"])}} == "b":
    model.add(Dense(5, activation="relu"))
    model.add(Dropout({{uniform(0, 1)}}))

  # Let's add the final layer
  model.add(Dense(1))
  model.add(Activation({{choice(["sigmoid", "softmax"])}}))

  # let's compile the model
  model.compile(loss="binary_crossentropy",
                optimizer=tf.keras.optimizers.Adam(learning_rate={{choice([0.1, 0.001, 0.01, 3e-4])}}),
                metrics=["accuracy"])
  
  # Fit the model
  model.fit(X_train, y_train,
            validation_data=(X_valid, y_valid),
            epochs=100,
            callbacks=[tf.keras.callbacks.EarlyStopping(patience=10)], verbose=0)
  
  # Evaluate our model
  loss, acc = model.evaluate(X_valid, y_valid, verbose=0)
  print("Validation accuracy :", acc)

  return {"loss" : -acc, "model" : model, 'status' : STATUS_OK}

In [5]:
# Now let's optimize our model
best_run, best_model = optim.minimize(data=data,
                                      model=model,
                                      max_evals=10,
                                      algo=tpe.suggest,
                                      trials=Trials(),
                                      notebook_name="drive/Othercomputers/My Laptop (1)/Programming Languages/Python/Projects/loan-defaulter/loan_defaulter-hyperas-colab")

>>> Imports:
#coding=utf-8

try:
    import numpy as np
except:
    pass

try:
    import pandas as pd
except:
    pass

try:
    from sklearn.model_selection import train_test_split
except:
    pass

try:
    from sklearn.preprocessing import StandardScaler, LabelEncoder
except:
    pass

try:
    import tensorflow as tf
except:
    pass

try:
    from tensorflow import keras
except:
    pass

try:
    from keras import Sequential
except:
    pass

try:
    from keras.layers import Input, Dense, Dropout, Activation
except:
    pass

try:
    from hyperopt import tpe, STATUS_OK, Trials
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import choice, uniform
except:
    pass

>>> Hyperas search space:

def get_space():
    return {
        'Dense': hp.choice('Dense', [20, 30, 40]),
        'Dropout': hp.uniform('Dropout', 0, 1),
        'Dense_1': hp.choice('Dense_1', [10, 20, 30]),
        'Dropout_1': hp.uniform('Dropout_1', 0, 

In [9]:
log_loss(y_valid, best_model.predict(X_valid))

0.3034542779984374

In [13]:
best_run

{'Activation': 0,
 'Dense': 2,
 'Dense_1': 2,
 'Dropout': 0.6108763092812357,
 'Dropout_1': 0.7371698374615214,
 'Dropout_2': 0,
 'Dropout_3': 0.21280043312755825,
 'learning_rate': 2}

In [45]:
# Final model

# Create a model
final = Sequential([
  Input(shape=X_train[0].shape),
  Dense(100, activation="relu"),
  Dropout(0.6108763092812357),
  Dense(50, activation="relu"),
  Dropout(0.7371698374615214),
  Dense(25, activation="relu"),
  Dropout(0.21280043312755825),
  Dense(1, activation="sigmoid")
])

# Compile the model
final.compile(loss="binary_crossentropy",
              optimizer=tf.keras.optimizers.Adam(learning_rate=3e-4),
              metrics=["accuracy"])

#Fit the model
history =   final.fit(X_train, y_train, epochs=100,
            validation_data=(X_valid, y_valid),
            callbacks=[tf.keras.callbacks.EarlyStopping(patience=10)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100


In [46]:
predictions=final.predict(test).flatten()
submissions = pd.DataFrame()
predictions

array([0.0990999 , 0.08767685, 0.0990999 , ..., 0.09805545, 0.09900853,
       0.09895027], dtype=float32)

In [47]:
log_loss(y_valid, final.predict(X_valid))

0.3034117250054255

In [48]:
submissions["Loan Status"] = predictions
submissions.to_csv("predictions.csv", index=False)