# Deep MLP

# Preliminaries

Check for GPU

In [None]:
import tensorflow as tf

In [None]:
if tf.test.gpu_device_name() != '/device:GPU:0':
  print("No GPU found")
else:
  print("GPU ready: {}".format(tf.test.gpu_device_name()))

Import preprocessing helper functions

In [None]:
import sys
sys.path.append("../../helper-modules")
from preprocessing_utils import read_in_data, preprocess

Read in the data to df_train, df_val and df_test

In [None]:
df_train, df_val, df_test = read_in_data()

Create X_train, y_train, X_val, y_val, X_test, y_test

In [None]:
(X_train, y_train), (X_val, y_val), (X_test, y_test) = preprocess("MLP", df_train, df_test, df_val)

# Model Building

Imports

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from itertools import product
import time
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.utils import to_categorical

Enable seed setting for improved reproducibility

In [None]:
import os
import random
import numpy as np

In [None]:
def set_seed(seed=42):
  SEED=seed

  # 1. Set `PYTHONHASHSEED` environment variable at a fixed value
  os.environ['PYTHONHASHSEED']=str(SEED)

  # 2. Set `python` built-in pseudo-random generator at a fixed value
  random.seed(SEED)

  # 3. Set `numpy` pseudo-random generator at a fixed value
  np.random.seed(SEED)

  # 4. Set the `tensorflow` pseudo-random generator at a fixed value
  tf.random.set_seed(SEED)

Extract number of classes (10 classes)

In [None]:
NUM_CLASSES = df_test["label"].nunique() 

Model design

In [None]:
def compile_model(hidden_layers_sizes=(1024,), act_function="relu", dropout_rate=0.1, l_rate=0.001, reg='l2', opt_algo='adam'):
    model = Sequential()

    # Add hidden layers with dropout to prevent overfitting
    for i, h in enumerate(hidden_layers_sizes):
      if i == 0:
        model.add(Dense(h, activation=act_function, input_shape=(1480,)))
      else: 
        model.add(Dense(h, activation=act_function))
      model.add(Dropout(dropout_rate))
    
    # LAST LAYER IS THE CLASSIFIER, THUS 12 POSSIBLE CLASSES
    model.add(Dense(NUM_CLASSES, activation='softmax', kernel_regularizer=reg))

    # Optimization algorithm
    if opt_algo == 'adam':
      opt = tf.keras.optimizers.Adam(learning_rate=l_rate)
    elif opt_algo == 'sgd':
      opt = tf.keras.optimizers.SGD(learning_rate=l_rate)

    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])
    
    return model

# Experiment: Grid Search

Defining grid search hyperparameter options

In [None]:
hidden_layers_sizes = [(1, 1, 1), (5, 10, 10), (20, 50, 50), (70, 100, 100),
       (200, 350, 350), (450, 900, 900)]
l_rates = [0.01, 0.005, 0.001, 0.0005, 0.0001]
dropout_rates = [0.05, 0.1, 0.2, 0.5]
act_functions=["relu"]
regs = ['l2']
opt_algos = ['adam']

all_hparams = [hidden_layers_sizes, l_rates, dropout_rates, act_functions, regs, opt_algos]

Conduct the grid search

In [None]:
def grid_search():
  """
  For each `hidden_layers_sizes` option (corresponding to a given number of parameters), conduct a grid search
  over the hyperparameter options, writing the validation results and model details for each model to a file.
  
  All trained models are saved as h5 files. 
  """
 
  best_model = None
  best_accuracy = 0
  best_history = None

  EPOCHS = 50
  BATCH_SIZE = 32
  STOPPING_PATIENCE = 5

  STARTING_POINT = 1

  with open('../training-results/ExperimentLogs_Deep-MLP.csv', 'w') as log_file:
      log_file.write("ModelNumber;Timestamp;NumEpochs;ValAccuracy;NumParams;HiddenLayerSizes;LearningRate;DropoutRate;ActivationFunction;Regularization;OptimizationAlgorithm\n")

  for model_num, hparam_set in enumerate(list(product(*all_hparams))[106:], start=STARTING_POINT):
    # Extract hyperparams for current model from grid search grid
    hidden_layers_sizes_tup, l_rate, dropout_rate, act_function, reg, opt_algo = hparam_set

    # Define hyperparameters for the current model 
    hparams = {
        "hidden_layers_sizes" : hidden_layers_sizes_tup,
        "act_function" : act_function, 
        "dropout_rate" : dropout_rate, 
        "l_rate": l_rate,
        "reg" : reg,
        "opt_algo" : opt_algo
    }

    # Compile model and count number of parameters
    model = compile_model(**hparams)
    num_params = model.count_params()

    model_str = f"{hidden_layers_sizes_tup}; {l_rate}; {dropout_rate}; {act_function}; {reg}; {opt_algo}"
    print(f"Training model {model_num} with {num_params} params - {model_str}")

    # Prevent overfitting with early stopping
    early_stop = EarlyStopping(monitor='val_accuracy', patience=STOPPING_PATIENCE)
    # For saving best val accuracy model as an h5 (for test predictions later)
    model_check = ModelCheckpoint(f"/content/drive/My Drive/data/experiments/trained-models/Deep-MLP_{model_num}.h5", monitor='val_accuracy', mode='max', verbose=0, save_best_only=True)

    # Set seed for reproducibility
    set_seed(42)

    # Train model
    # Note: one reason val accuracy might be higher than train accuracy during training is because dropout affects training but not validation
    history = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(X_val, y_val), callbacks=[early_stop, model_check])
    n_epochs = len(history.history['loss'])
                   
    # Time stamp when model finished training
    timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

    # Extract best validation accuracy at inex 1 (index 0 has the loss)
    val_accuracy = max(history.history['val_accuracy'])
    print(f"Best val accuracy: {val_accuracy}")

    # Store results in log file
    with open('../training-results/ExperimentLogs_Deep-MLP.csv', 'a') as log_file:
      log_file.write(f"{model_num};{timestamp};{n_epochs};{val_accuracy};{num_params};{model_str}\n")

    if val_accuracy > best_accuracy:
      best_accuracy = val_accuracy
      best_model = model
      best_history = history

  # Return the model with the highest validation accuracy
  return best_model, best_history


In [None]:
best_model, best_history = grid_search()

Training model 107 with 1892260 params - (450, 900, 900); 0.005; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Best val accuracy: 0.5877500176429749
Training model 108 with 1892260 params - (450, 900, 900); 0.005; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Best val accuracy: 0.3166874945163727
Training model 109 with 1892260 params - (450, 900, 900); 0.001; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50