# **MLP - Multi Layer Perceptron (ML-CUP22)**

In [None]:
# Install packages
!pip install keras_tuner

In [None]:
# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from statistics import mean, stdev

# MLP
import tensorflow as tf
from tensorflow import keras
from keras import layers

# keras_tuner for GridSearch
import keras_tuner

In [None]:
# Mount google drive to access data loaded on Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

**Definition of Functions**

In [None]:
## Definition of loss/accuracy/MEE plot functions

def loss_plot(history, label_loss, label_val_loss):
  loss = history.history['loss']
  val_loss = history.history['val_loss']
  epochs = range(1, len(loss) + 1)
  plt.figure(figsize=(9, 5))
  # Training & Validation loss
  if label_loss=='Training loss' and label_val_loss=='Validation loss':
    plt.plot(epochs, loss, label='Training loss', color='royalblue')
    plt.plot(epochs, val_loss, label='Validation loss', linestyle='dashed', color='darkorange')
    plt.title('Training & Validation Loss', fontsize=14)
  # Design & Test loss
  elif label_loss=='Design loss' and label_val_loss=='Test loss':
    plt.plot(epochs, loss, label='Design loss', color='royalblue')
    plt.plot(epochs, val_loss, label='Test loss', linestyle='dashed', color='darkorange')
    plt.title('Design & Test Loss', fontsize=14)
  plt.xlabel('Epochs', fontsize=14)
  plt.ylabel('Loss', fontsize=14)
  plt.xticks(fontsize=12)
  plt.yticks(fontsize=12)
  plt.legend(fontsize=14)
  plt.show()

def MEE_plot(history, label_MEE, label_val_MEE):
  MEE = history.history['MEE']
  val_MEE = history.history['val_MEE']
  epochs = range(1, len(MEE) + 1)
  plt.figure(figsize=(9, 5))
  # Training & Validation MEE
  if label_MEE=='Training MEE' and label_val_MEE=='Validation MEE':
    plt.plot(epochs, MEE, label='Training MEE', color='royalblue')
    plt.plot(epochs, val_MEE, label='Validation MEE', linestyle='dashed', color='darkorange')
    plt.title('Training & Validation MEE', fontsize=14)
  # Design & Test MEE
  elif label_MEE=='Design MEE' and label_val_MEE=='Test MEE':
    plt.plot(epochs, MEE, label='Design MEE', color='royalblue')
    plt.plot(epochs, val_MEE, label='Test MEE', linestyle='dashed', color='darkorange')
    plt.title('Design & Test MEE', fontsize=14)
  plt.xlabel('Epochs', fontsize=14)
  plt.ylabel('MEE', fontsize=14)
  plt.xticks(fontsize=12)
  plt.yticks(fontsize=12)
  plt.legend(fontsize=14)
  plt.show()

In [None]:
## Definition of Mean Euclidean Error (MEE): metric used for performance evaluation of the model
def MEE(y_true, y_pred):
  eucl_norm = tf.norm(y_true - y_pred, ord='euclidean', axis=1)
  return tf.reduce_mean(eucl_norm)

In [None]:
## Definition of MLP model
def build_model(n_hidden_layers, units, activation, kernel_initializer, optimizer):
  # Define the model
  model = keras.Sequential()
  model.add(tf.keras.Input(shape=(X.shape[1],)))
  for i in range(n_hidden_layers):
    model.add(layers.Dense(units=units, activation=activation, kernel_initializer=kernel_initializer))
  model.add(layers.Dense(units=2))
  # Compile the model
  model.compile(optimizer=optimizer,
                loss=MEE,
                metrics=MEE)
  return model

---
## **Data Preparation**

In [None]:
# Loading the training dataset ML-CUP22-TR.csv
path = '/content/drive/MyDrive/data/Data_CUP/ML-CUP22-TR.csv'
col_names = ['a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9', 'x', 'y']

data = pd.read_csv(path, names=col_names, comment='#')
data.shape

In [None]:
# Check the first 5 rows
data.head()

In [None]:
# Check for NaN values
data.isnull().any()

In [None]:
# Split data into design (85%) and test (15%) sets
design, test = train_test_split(data, test_size=0.15, shuffle=True, random_state=42)
print(f'train shape: {design.shape}')
print(f'test shape: {test.shape}')

In [None]:
# Descriptive statistics on design data
design.describe()

In [None]:
# X, y split
X , y = design.iloc[:, :-2], design.loc[:, ['x', 'y']]

print(f'X shape: {X.shape}')
print(f'y shape: {y.shape}')

In [None]:
# X_test, y_test split
X_test , y_test = test.iloc[:, :-2], test.loc[:, ['x', 'y']]

print(f'X shape: {X_test.shape}')
print(f'y shape: {y_test.shape}')

----
## **Preliminary Experimental Phase by K-Fold Cross Validation**

In [None]:
# Define early stopping
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=30)

# Val MEE on the individual folds.
MEE_per_fold = []

# Define the K-fold Cross Validator
kfold = KFold(n_splits=5, shuffle=True)

# Run a 10-folds cross-validation.
for  fold_idx, (train_indices, val_indices) in enumerate(kfold.split(design)):
  print(f"Running fold {fold_idx+1}")

  # Extract the training and testing examples.
  X_train , y_train = design.iloc[train_indices, :-2], design.iloc[train_indices, -2:]
  X_val , y_val = design.iloc[val_indices, :-2], design.iloc[val_indices, -2:]

  # Define the model
  model = build_model(
    n_hidden_layers=3,
    units=64,
    activation='elu',
    kernel_initializer='RandomUniform',
    optimizer=tf.keras.optimizers.Adam(weight_decay=0.001)
    )

  # Train the model
  history = model.fit(X_train, y_train,
                      validation_data=(X_val, y_val),
                      batch_size=64,
                      epochs=1000,
                      callbacks=[early_stopping],
                      verbose=0)

  # Evaluate the model
  val_mee = min(history.history['val_MEE'])
  print(f'val_MEE: {val_mee}')
  MEE_per_fold.append(val_mee)

print(f"Mean: {mean(MEE_per_fold)}")
print(f"Stdev: {stdev(MEE_per_fold)}")

----
## **GridSearch**

### **Coarse-grained GridSearch (SGD & Weight decay)**

In [None]:
# Define HyperModel
class MyHyperModel(keras_tuner.HyperModel):
  def build(self, hp):
    # Hyperparameters
    units=hp.Choice("units", [16, 64, 256])
    learning_rate = hp.Choice("learning_rate", [1e-3, 1e-2])
    momentum = hp.Choice("momentum", [0.0, 0.9])
    weight_decay = hp.Choice('weight_decay', [1e-4, 1e-3, 1e-2])
    nesterov = hp.Boolean("nesterov")
    # Define the Model
    model = keras.Sequential()
    model.add(layers.Input(shape=[9,]))
    for i in range(hp.Int("num_layers", 2, 3, sampling='linear')):
      model.add(layers.Dense(units=units, activation='elu', kernel_initializer='RandomUniform'))
    model.add(layers.Dense(2))
    # Compile the model
    model.compile(
        optimizer=keras.optimizers.SGD(learning_rate=learning_rate,
                                       momentum=momentum,
                                       weight_decay=weight_decay,
                                       nesterov=nesterov),
        loss=MEE,
        metrics=MEE
        )
    return model

  def fit(self, hp, model, *args, **kwargs):
    return model.fit(
        *args,
        batch_size=hp.Choice('batch_size', [16, 64, 256]),
        **kwargs
        )

In [None]:
# Define Keras Tuner
tuner = keras_tuner.GridSearch(
    hypermodel=MyHyperModel(),
    objective=keras_tuner.Objective('val_MEE', direction='min'),
    executions_per_trial=3,
    max_consecutive_failed_trials=1,
    overwrite=True
)

In [None]:
# Search space summary
tuner.search_space_summary()

In [None]:
%%time
# Define stopping criteria
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

# Starting the search
tuner.search(X, y, epochs=500, validation_split=0.2, verbose=1, callbacks=[early_stopping])

In [None]:
# Summary results
tuner.results_summary()

### **Coarse-grained GridSearch (Adam & Weight decay)**

In [None]:
# Define HyperModel
class MyHyperModel(keras_tuner.HyperModel):
    def build(self, hp):
        # Hyperparameters
        units=hp.Choice("units", [16, 64, 256])
        activation=hp.Choice("activation", ['relu', 'elu'])
        kernel_initializer=hp.Choice("kernel_initializer", ['GlorotUniform', 'RandomUniform'])
        weight_decay = hp.Choice('weight_decay', [1e-4, 1e-3, 1e-2])
        # Define the model
        model = keras.Sequential()
        model.add(layers.Input(shape=[9,]))
        for i in range(hp.Int("num_layers", 2, 3, sampling='linear')):
            model.add(layers.Dense(units=units, activation=activation, kernel_initializer=kernel_initializer))
        model.add(layers.Dense(2))
        # Compile the model
        model.compile(optimizer=keras.optimizers.Adam(weight_decay=weight_decay),
                      loss=MEE,
                      metrics=MEE)
        return model

    def fit(self, hp, model, *args, **kwargs):
        return model.fit(
            *args,
            batch_size=hp.Choice('batch_size', [16, 64, 256]),
            **kwargs
        )

In [None]:
# Define Keras Tuner
tuner = keras_tuner.GridSearch(
    hypermodel=MyHyperModel(),
    objective=keras_tuner.Objective('val_MEE', direction='min'),
    executions_per_trial=3,
    max_consecutive_failed_trials=1,
    overwrite=True
)

In [None]:
# Search space summary
tuner.search_space_summary()

In [None]:
%%time
# Define stopping criteria
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

# Starting the search
tuner.search(X, y, epochs=500, validation_split=0.2, verbose=1, callbacks=[early_stopping])

In [None]:
# Summary results
tuner.results_summary()

### **Coarse-grained GridSearch (SGD & Dropout)**

In [None]:
# Define HyperModel
class MyHyperModel(keras_tuner.HyperModel):
  def build(self, hp):
    # Hyperparameters
    units=hp.Choice("units", [16, 64, 256])
    dropout=hp.Boolean("dropout")
    rate=hp.Choice("rate", [0.1, 0.3])
    learning_rate = hp.Choice("learning_rate", [1e-3, 1e-2])
    momentum = hp.Choice("momentum", [0.0, 0.9])
    # Define the model
    model = keras.Sequential()
    model.add(layers.Input(shape=[9,]))
    for i in range(hp.Int("num_layers", 2, 3, sampling='linear')):
      model.add(layers.Dense(units=units, activation='elu', kernel_initializer='RandomUniform'))
      if dropout:
        model.add(layers.Dropout(rate=rate))
    model.add(layers.Dense(2))
    # Compile the model
    model.compile(
        optimizer=keras.optimizers.SGD(learning_rate=learning_rate,
                                       momentum=momentum),
        loss=MEE,
        metrics=MEE
        )
    return model

  def fit(self, hp, model, *args, **kwargs):
    return model.fit(
        *args,
        batch_size=hp.Choice('batch_size', [16, 64, 256]),
        **kwargs)

In [None]:
# Define Keras Tuner
tuner = keras_tuner.GridSearch(
    hypermodel=MyHyperModel(),
    objective=keras_tuner.Objective('val_MEE', direction='min'),
    executions_per_trial=3,
    max_consecutive_failed_trials=1,
    overwrite=True
)

In [None]:
# Search space summary
tuner.search_space_summary()

In [None]:
%%time
# Define stopping criteria
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

# Starting the search
tuner.search(X, y, epochs=500, validation_split=0.2, verbose=1, callbacks=[early_stopping])

In [None]:
# Summary results
tuner.results_summary()

### **Coarse-grained GridSearch (Adam & Dropout)**

In [None]:
# Define HyperModel
class MyHyperModel(keras_tuner.HyperModel):
    def build(self, hp):
        # Hyperparameters
        units=hp.Choice("units", [16, 64, 256])
        activation=hp.Choice("activation", ['relu', 'elu'])
        kernel_initializer=hp.Choice("kernel_initializer", ['GlorotUniform', 'RandomUniform'])
        dropout=hp.Boolean("dropout")
        rate=hp.Choice("rate", [0.1, 0.3, 0.5])
        # Define the model
        model = keras.Sequential()
        model.add(layers.Input(shape=[9,]))
        for i in range(hp.Int("num_layers", 2, 3, sampling='linear')):
            model.add(layers.Dense(units=units, activation=activation, kernel_initializer=kernel_initializer))
            if dropout:
                model.add(layers.Dropout(rate=rate))
        model.add(layers.Dense(2))
        # Compile the model
        model.compile(optimizer='Adam', loss=MEE, metrics=MEE)
        return model

    def fit(self, hp, model, *args, **kwargs):
        return model.fit(
            *args,
            batch_size=hp.Choice('batch_size', [16, 64, 256]),
            **kwargs
        )

In [None]:
# Define Keras Tuner
tuner = keras_tuner.GridSearch(
    hypermodel=MyHyperModel(),
    objective=keras_tuner.Objective('val_MEE', direction='min'),
    executions_per_trial=3,
    max_consecutive_failed_trials=1,
    overwrite=True
)

In [None]:
# Search space summary
tuner.search_space_summary()

In [None]:
%%time
# Define stopping criteria
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

# Starting the search
tuner.search(X, y, epochs=500, validation_split=0.2, verbose=1, callbacks=[early_stopping])

In [None]:
# Summary results
tuner.results_summary()

### **Fine-grained GridSearch (SGD & Weight decay)**

In [None]:
# Define HyperModel
class MyHyperModel(keras_tuner.HyperModel):
  def build(self, hp):
    # Hyperparameters
    units = hp.Choice("units", [16, 32])
    momentum = hp.Choice("momentum", [0.5, 0.9])
    weight_decay = hp.Choice("weight_decay", [0.001, 0.002, 0.004, 0.006, 0.008, 0.01])
    # Define the model
    model = keras.Sequential()
    model.add(layers.Input(shape=[9,]))
    for i in range(3):
      model.add(layers.Dense(units=units, activation='elu', kernel_initializer='RandomUniform'))
    model.add(layers.Dense(2))
    # Compile the model
    model.compile(optimizer=keras.optimizers.SGD(learning_rate=0.01,
                                                 momentum=momentum,
                                                 weight_decay=weight_decay,
                                                 nesterov=True),
                  loss=MEE,
                  metrics=MEE)
    return model

  def fit(self, hp, model, *args, **kwargs):
    return model.fit(
        *args,
        batch_size=64,
        **kwargs
    )

In [None]:
# Define Keras Tuner
tuner = keras_tuner.GridSearch(
    hypermodel=MyHyperModel(),
    objective=keras_tuner.Objective('val_MEE', direction='min'),
    executions_per_trial=3,
    max_consecutive_failed_trials=1,
    overwrite=True
)

In [None]:
# Search space summary
tuner.search_space_summary()

In [None]:
%%time
# Define stopping criteria
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

# Starting the search
tuner.seach(X, y, epochs=500, validation_split=0.2, verbose=1, callbacks=[early_stopping])

In [None]:
# Summary results
tuner.results_summary()

### **Fine-grained GridSearch (Adam & Weight decay)**

In [None]:
# Define HyperModel
class MyHyperModel(keras_tuner.HyperModel):
  def build(self, hp):
    # Hyperparameters
    units = hp.Choice("units", [128, 256])
    weight_decay = hp.Choice("weight_decay", [0.0001, 0.0002, 0.0004, 0.0007, 0.0008, 0.001])
    # Define the model
    model = keras.Sequential()
    model.add(layers.Input(shape=[9,]))
    for i in range(3):
      model.add(layers.Dense(units=units, activation='elu', kernel_initializer='RandomUniform'))
    model.add(layers.Dense(2))
    # Compile the model
    model.compile(optimizer=keras.optimizers.Adam(weight_decay=weight_decay),
                  loss=MEE,
                  metrics=MEE)
    return model

  def fit(self, hp, model, *args, **kwargs):
    return model.fit(
        *args,
        batch_size=16,
        **kwargs
    )

In [None]:
# Define Keras Tuner
tuner = keras_tuner.GridSearch(
    hypermodel=MyHyperModel(),
    objective=keras_tuner.Objective('val_MEE', direction='min'),
    executions_per_trial=3,
    max_consecutive_failed_trials=1,
    overwrite=True
)

In [None]:
# Search space summary
tuner.search_space_summary()

In [None]:
%%time
# Define stopping criteria
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

# Starting the search
tuner.seach(X, y, epochs=500, validation_split=0.2, verbose=1, callbacks=[early_stopping])

In [None]:
# Summary results
tuner.results_summary()

---
## **K-Fold Cross Validation best model**

In [None]:
# Val MEE on the individual folds
MEE_per_fold = []

# Define the K-fold Cross Validator
kfold = KFold(n_splits=5, shuffle=True)

# Run a 5-folds cross-validation
for  fold_idx, (train_indices, val_indices) in enumerate(kfold.split(design)):
  print(f"Running fold {fold_idx+1}")

  # Extract the training and testing examples
  X_train , y_train = design.iloc[train_indices, :-2], design.iloc[train_indices, -2:]
  X_val , y_val = design.iloc[val_indices, :-2], design.iloc[val_indices, -2:]

  # Define the model
  model = build_model(
    n_hidden_layers=3,
    units=32,
    activation='elu',
    kernel_initializer='RandomUniform',
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.01,
                                                    momentum=0.9,
                                                    weight_decay=0.002,
                                                    nesterov=True))
  # Train the model
  history = model.fit(X_train, y_train,
                      validation_data=(X_val, y_val),
                      batch_size=64,
                      epochs=500,
                      verbose=0)

  # Evaluate the model
  val_mee = min(history.history['val_MEE'])
  print(f'val_MEE: {val_mee}')
  MEE_per_fold.append(val_mee)

print(f"Mean: {mean(MEE_per_fold)}")
print(f"Stdev: {stdev(MEE_per_fold)}")

---
## **Model Assessment**

### **Training & Validation Loss/MEE**

In [None]:
# Define the model
model = build_model(
    n_hidden_layers=3,
    units=32,
    activation='elu',
    kernel_initializer='RandomUniform',
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.01,
                                                    momentum=0.9,
                                                    weight_decay=0.002,
                                                    nesterov=True))
# Define stopping criteria
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

# Fit the model
history = model.fit(X,
                    y,
                    validation_split=0.2,
                    batch_size=64,
                    epochs=1000,
                    shuffle=True,
                    callbacks=[early_stopping],
                    verbose=1)

In [None]:
loss_plot(history, label_loss='Training loss', label_val_loss='Validation loss')

In [None]:
MEE_plot(history, label_MEE='Training MEE', label_val_MEE='Validation MEE')

In [None]:
# Prediction on design set
results_design = model.evaluate(x=X, y=y)
design_loss, design_MEE = results_design[0], results_design[1]
print()
print(f'Design loss: {design_loss}')
print(f'Design MEE: {design_MEE}')

In [None]:
# Prediction on test set
results_test = model.evaluate(x=X_test, y=y_test)
test_loss, test_MEE = results_test[0], results_test[1]
print()
print(f'Test loss: {test_loss}')
print(f'Test MEE: {test_MEE}')

### **Prediction Plots**

In [None]:
# Generate prediction of target variables
test_pred = model.predict(X_test.values, verbose=0)
y_pred = tf.convert_to_tensor(test_pred, dtype=tf.double)

In [None]:
# y prediction
plt.figure(figsize=(8, 6))
sns.set_theme(style="darkgrid")
sns.scatterplot(x=y_test['x'], y=y_test['y'], label='y_true')
sns.scatterplot(x=y_test['x'], y=y_pred[:, 1], color='r', label='y_pred')
plt.title('Multi Layer Perceptron y prediction', fontsize=14)
plt.xlabel('x', fontsize=14)
plt.ylabel('y', fontsize=14)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.legend()
plt.show()

In [None]:
# x prediction
plt.figure(figsize=(8, 6))
sns.set_theme(style="darkgrid")
sns.scatterplot(x=y_test['x'], y=y_test['y'], label='x_true')
sns.scatterplot(x=y_pred[:, 0], y=y_test['y'], color='r', label='x_pred')
plt.title('Multi Layer Perceptron x prediction', fontsize=14)
plt.xlabel('x', fontsize=14)
plt.ylabel('y', fontsize=14)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.legend()
plt.show()

### **Design & Test Loss/MEE**

In [None]:
# Define the model
model = build_model(
    n_hidden_layers=3,
    units=32,
    activation='elu',
    kernel_initializer='RandomUniform',
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.01,
                                                    momentum=0.9,
                                                    weight_decay=0.002,
                                                    nesterov=True))
# Fit the model
history = model.fit(X,
                    y,
                    validation_data=(X_test, y_test),
                    batch_size=64,
                    epochs=160, # Previously, the model stopped after 152 epochs
                    shuffle=True,
                    verbose=1)

In [None]:
loss_plot(history, label_loss='Design loss', label_val_loss='Test loss')

In [None]:
MEE_plot(history, label_MEE='Design MEE', label_val_MEE='Test MEE')