Original Notebook: https://www.kaggle.com/pourchot/simple-neural-network

## Simple Neural Networks - Hyperparameters tuning

In this notebook, we will try to improve the performance of the Neural Network presented in the notebook https://www.kaggle.com/pourchot/simple-neural-network using Hyperparameters Tuning. The baseline model is identical from the one of the original notebook.

In [1]:
# load pycodestyle-magic to be compliant with PEP8 conventions
%load_ext pycodestyle_magic

In [2]:
# activate pycodestyle for each cell
%pycodestyle_on

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split
import gc
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from tensorflow import keras
import keras_tuner as kt
from tensorflow.keras import layers
from tensorflow.keras import activations,callbacks
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import initializers
from tensorflow.keras.models import Model

### Data preparation

In [4]:
train = pd.read_csv('train.csv')
test = pd.read_csv("test.csv")

In [5]:
train['target'] = train['target'].str[-1]
train = train.astype(np.int16)

In [6]:
X = train.iloc[:, 1:-1].values
y = train['target'].values
X.shape, y.shape

((200000, 75), (200000,))

In [7]:
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=0)

In [8]:
num_classes = 9
y_train = keras.utils.to_categorical(
    y_train-1, num_classes)
y_val = keras.utils.to_categorical(
    y_val-1, num_classes)

### Custom metric

In [9]:
cce = tf.keras.losses.CategoricalCrossentropy()
def custom_metric(y_true, y_pred):
    y_pred = K.clip(y_pred, 1e-15, 1-1e-15)
    loss = K.mean(cce(y_true, y_pred))
    return loss

### Model with hyperparameters choices

In [10]:
es = tf.keras.callbacks.EarlyStopping(
    monitor='val_custom_metric',
    min_delta=1e-05,
    patience=5,
    verbose=0,
    mode='min',
    baseline=None,
    restore_best_weights=True)
plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_custom_metric',
    factor=0.7,
    patience=2,
    verbose=0,
    mode='min')

In [11]:
def conv_model(hp):

    conv_inputs = layers.Input(shape=(75))
    # Embedding layers
    hp_outdim = hp.Choice('output_dim', [7, 9, 11])
    embed = layers.Embedding(
        input_dim=354, 
        output_dim=hp_outdim,
        embeddings_regularizer='l2')(conv_inputs)
    # Convolution layers
    embed = layers.Conv1D(
        hp.Choice('filters', [8, 10, 12]),
        1,
        activation='relu')(embed)        
    embed = layers.Flatten()(embed)
    hidden = layers.Dropout(0.33)(embed)
    # Residual blocks layers
    hp_units_1 = hp.Choice('units1', [28, 32, 36])
    hidden = tfa.layers.WeightNormalization(layers.Dense(
        units=hp_units_1,
        activation='selu',
        kernel_initializer="lecun_normal"))(hidden)
    output = layers.Dropout(0.33)(layers.Concatenate()([embed, hidden]))
    hp_units_2 = hp.Choice('units2', [28, 32, 36])
    output = tfa.layers.WeightNormalization(
    layers.Dense(
        units=hp_units_2,
        activation='relu',
        kernel_initializer="he_normal"))(output) 
    output = layers.Dropout(0.45)(layers.Concatenate()([embed, hidden, output]))
    hp_units_3 = hp.Choice('units3', [28, 32, 36])
    output = tfa.layers.WeightNormalization(layers.Dense(
        units=hp_units_3, 
        activation='elu',
        kernel_initializer="he_normal"))(output)
    # Final layer
    conv_outputs = layers.Dense(
        units=9, 
        activation='softmax')(output)
    # Model instantiation
    model = Model(conv_inputs,conv_outputs)
    # Model compilation
    model.compile(
        loss='categorical_crossentropy',
        optimizer=tf.keras.optimizers.Adam(learning_rate=2e-4),
        metrics=custom_metric)
    return model

### Keras Tuner Optimization

In [12]:
tuner = kt.Hyperband(
    conv_model,
    objective=kt.Objective("val_custom_metric", direction="min"),
    max_epochs=15,
    directory='simple_neural_network_tuner')

In [13]:
tuner.search_space_summary()

Search space summary
Default search space size: 5
output_dim (Choice)
{'default': 7, 'conditions': [], 'values': [7, 9, 11], 'ordered': True}
filters (Choice)
{'default': 8, 'conditions': [], 'values': [8, 10, 12], 'ordered': True}
units1 (Choice)
{'default': 28, 'conditions': [], 'values': [28, 32, 36], 'ordered': True}
units2 (Choice)
{'default': 28, 'conditions': [], 'values': [28, 32, 36], 'ordered': True}
units3 (Choice)
{'default': 28, 'conditions': [], 'values': [28, 32, 36], 'ordered': True}


In [14]:
%%time
BS = 64
tuner.search(
    X_train,
    y_train,
    batch_size=BS, 
    validation_data=(X_val, y_val),
    callbacks=[es, plateau])

Trial 30 Complete [00h 06m 43s]
val_custom_metric: 1.7432328462600708

Best val_custom_metric So Far: 1.7426027059555054
Total elapsed time: 01h 11m 33s
INFO:tensorflow:Oracle triggered exit
Wall time: 1h 11min 33s


In [15]:
# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

In [16]:
print("Best Hyperparameters")
print("-------------------------------")
print(f"Output dimension of Embedding Layer : {best_hps.get('output_dim')}")
print(f"Number of filters in Convolution Layer : {best_hps.get('filters')}")
print(f"Number of neurons in first Dense Layer : {best_hps.get('units1')}")
print(f"Number of neurons in second Dense Layer : {best_hps.get('units2')}")
print(f"Number of neurons in third Dense Layer : {best_hps.get('units3')}")

Best Hyperparameters
-------------------------------
Output dimension of Embedding Layer : 11
Number of filters in Convolution Layer : 10
Number of neurons in first Dense Layer : 32
Number of neurons in second Dense Layer : 36
Number of neurons in third Dense Layer : 32


### Training the network with the best hyperparameters found

In [17]:
train = pd.read_csv('train.csv')
test = pd.read_csv("test.csv")
targets = pd.get_dummies(train['target'])

In [18]:
%%time
oof_NN = np.zeros((train.shape[0], 9))
pred_NN = np.zeros((test.shape[0], 9))
N_FOLDS = 20
SEED = 42
EPOCH = 50
skf = StratifiedKFold(
    n_splits=N_FOLDS, 
    shuffle=True,
    random_state=SEED)
for fold, (tr_idx, ts_idx) in enumerate(skf.split(train, train.iloc[:, -1])):
    print(f"\n ====== TRAINING FOLD {fold} =======\n")
    X_train = train.iloc[:, 1:-1].iloc[tr_idx]
    y_train = targets.iloc[tr_idx]
    X_test = train.iloc[:, 1:-1].iloc[ts_idx]
    y_test = targets.iloc[ts_idx]
    K.clear_session()
    # NN CONV MODEL training
    print("\n-----Convolution model Training----\n")
    # Build the model with the optimal hyperparameters
    model_conv = tuner.hypermodel.build(best_hps)
    model_conv.fit(
        X_train,
        y_train,
        batch_size=256,
        epochs=EPOCH,
        validation_data=(X_test, y_test),
        callbacks=[es, plateau],
        verbose=0)
    # Convolution Model prediction
    pred = model_conv.predict(X_test)
    oof_NN[ts_idx] += pred
    score_NN = log_loss(y_test, pred)
    print(f"\nFOLD {fold} Score convolution model: {score_NN}\n")
    pred_NN += model_conv.predict(test.iloc[:, 1:])/N_FOLDS
score = log_loss(targets, oof_NN)
print(f"\n=== FINAL SCORE CONVOLUTION MODEL : {score}===\n")




-----Convolution model Training----


FOLD 0 Score convolution model: 1.7481772336250172




-----Convolution model Training----


FOLD 1 Score convolution model: 1.744758539665304




-----Convolution model Training----


FOLD 2 Score convolution model: 1.74275841412805




-----Convolution model Training----


FOLD 3 Score convolution model: 1.7472772572143003




-----Convolution model Training----


FOLD 4 Score convolution model: 1.7342940068321302




-----Convolution model Training----


FOLD 5 Score convolution model: 1.7391398285418749




-----Convolution model Training----


FOLD 6 Score convolution model: 1.7458022547552363




-----Convolution model Training----


FOLD 7 Score convolution model: 1.7324220458250492




-----Convolution model Training----


FOLD 8 Score convolution model: 1.7399855792038144




-----Convolution model Training----


FOLD 9 Score convolution model: 1.7417716698246077




-----Convolution model Training----


FOLD 10 Score convolution model:

In [19]:
pred_embedding = pred_NN

In [20]:
submission = pd.read_csv("sample_submission.csv")
for i in range(9):
    submission[f'Class_{i+1}'] = pred_embedding[:, i]
submission.to_csv("Purchot_tuned_neural_network.csv", index=False)