# ANNDL Homework 2 - Best Model
**Team**: MercyMain

**Team Members**:

- Azimi Arya
- Belotti Ottavia
- Izzo Riccardo

The best model consists in an ensemble of three models, two of them consist in 1DCNNs while the last one is a BiLSTM model. 

In [None]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)


# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
# Dataset loading

dataset = np.load('/kaggle/input/dataset-homework-2/x_train.npy')
outputs = np.load('/kaggle/input/dataset-homework-2/y_train.npy')

print("Mean: " + str(dataset.mean()))
print("Standard Variation: " + str(dataset.std()))

label_mapping = {
    "Wish": 0,
    "Another": 1,
    "Comfortably": 2,
    "Money": 3,
    "Breathe": 4,
    "Time": 5,
    "Brain": 6,
    "Echoes": 7,
    "Wearing": 8,
    "Sorrow": 9,
    "Hey": 10,
    "Shine": 11
}

## Exploration Data Analysis (EDA)

In [None]:
# Convert to Dataframe to print and plot dataset's properties
dataset_res = dataset.reshape([dataset.shape[0]*dataset.shape[1], dataset.shape[2]])
print("Dataset reshaped: " + str(dataset_res.shape))

scale_columns = ['F1', 'F2', 'F3', 'F4', 'F5', 'F6']
df = pd.DataFrame(dataset_res, columns=scale_columns)
out_df = pd.DataFrame(outputs, columns=['target_class'])
new_out = pd.DataFrame()

# Lengthen the output dataframe to match the input size by repeating the classes
# not optimized
for i in range(out_df.shape[0]):
    for j in range(36):
        new_out = new_out.append(out_df.iloc[i], ignore_index=True)
        
print('df_out.shape ' + str(out_df.shape))
print('new_out.shape ' + str(new_out.shape))

# Create 12 elements array containing starting and ending index element in dataset corresponding to each class
# not optimized
classes = []
ctr = 0
start = 0
for i in range(new_out.shape[0]):
    if new_out.iloc[i,0] > ctr:
        classes.append((start, i-1))
        start = i
        ctr = new_out.iloc[i,0]
classes.append((start, new_out.shape[0]-1))

print(df.info())
print(df.head())
print(df.describe())

In [None]:
# Plot feature distribution over the dataset
fig, axs = plt.subplots(2, 3, figsize=(34,22))
x = np.arange(0, df.shape[0])

def boolean_masking(arr, class_idx=0):
    bool_arr = []
    for i in arr:
        if i >= classes[class_idx][0] and i <= classes[class_idx][1]:
            bool_arr.append(True)
        else:
            bool_arr.append(False)
    return bool_arr

colors = ['blue', 'orange', 'green', 'red', 'saddlebrown', 'purple']
class_colors = ['purple', 'green', 'red', 'royalblue', 'yellow', 'sienna', 'pink', 'lime', 'darkorange', 'aqua', 'gold', 'orchid']

for i in range(6):
    j = 0 if i <3 else 1
    axs[j][i%3].plot(df[scale_columns[i]], label='Feature ' + str(i+1), color=colors[i], linewidth=2)
    axs[j][i%3].legend()
    for k in range(12):
        axs[j][i%3].fill_between(x, -10000, 50000, where=boolean_masking(x, k), color=class_colors[k], alpha=0.3)

fig.show()

In [None]:
# Draw heatmap visualization of the correlation matrix
corr_matrix = df.corr(method='spearman')
f, ax = plt.subplots(figsize=(10,8))
sns.heatmap(corr_matrix, annot=True, fmt='.2f', linewidth=0.4,
            annot_kws={"size": 11}, cmap='coolwarm', ax=ax)
plt.title("Correlation matrix of training set")
plt.xticks(fontsize=11)
plt.yticks(fontsize=11)
plt.show()

## Data Preprocessing

In [None]:
# Dataset Standardization for CNNs 
# (do not run this if building/fitting BiLSTM model)
dataset = (dataset - np.mean(dataset)) / np.std(dataset)

In [None]:
# Split dataset in training set and validation set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(dataset, outputs, test_size=0.15, random_state=42)

In [None]:
# class_weights for BiLSTM model fit
from sklearn.utils.class_weight import compute_class_weight

class_weights = compute_class_weight(class_weight = "balanced", classes= np.unique(y_train), y= y_train)
class_weights = dict(zip(np.unique(y_train), class_weights))

In [None]:
# Convert the sparse labels to categorical values
y_train = tfk.utils.to_categorical(y_train)
y_test = tfk.utils.to_categorical(y_test)
X_train.shape, y_train.shape, X_test.shape, y_test.shape


input_shape = X_train.shape[1:]
classes = y_train.shape[-1]
batch_size = 32 # for CNNs - see later for BiLSTM batch size
epochs = 200

## First Model: Best 1D CNN

In [None]:
def build_1DCNN_classifier(input_shape, classes):
    # Build the neural network layer by layer
    input_layer = tfkl.Input(shape=input_shape, name='Input')

    # Feature extractor
    cnn = tfkl.Conv1D(128,3,padding='same',activation='relu')(input_layer)
    cnn = tfkl.MaxPooling1D()(cnn)
    cnn = tfkl.Conv1D(128,3,padding='same',activation='relu')(cnn)
    cnn = tfkl.MaxPooling1D()(cnn)
    cnn = tfkl.Conv1D(256,3,padding='same',activation='relu')(cnn)
    cnn = tfkl.MaxPooling1D()(cnn)
    cnn = tfkl.Conv1D(256,3,padding='same',activation='relu')(cnn)
    gap = tfkl.GlobalAveragePooling1D()(cnn)
    dropout = tfkl.Dropout(.2, seed=seed)(gap)

    # Classifier
    classifier = tfkl.Dense(128, activation='relu')(dropout)
    classifier = tfkl.Dense(64, activation='relu')(classifier)
    output_layer = tfkl.Dense(classes, activation='softmax')(classifier)

    # Connect input and output through the Model class
    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='model')

    # Compile the model
    model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics='accuracy')

    # Return the model
    return model

## Second Model: Second 1D CNN

In [None]:
def build_1DCNN_classifier_2(input_shape, classes):
    # Build the neural network layer by layer
    input_layer = tfkl.Input(shape=input_shape, name='Input')

    # Feature extractor
    cnn = tfkl.Conv1D(64,3,padding='same',activation='relu')(input_layer)
    cnn = tfkl.MaxPooling1D()(cnn)
    cnn = tfkl.Conv1D(128,3,padding='same',activation='relu')(cnn)
    cnn = tfkl.Conv1D(128,3,padding='same',activation='relu')(cnn)
    cnn = tfkl.MaxPooling1D()(cnn)
    cnn = tfkl.Conv1D(256,3,padding='same',activation='relu')(cnn)
    cnn = tfkl.Conv1D(256,3,padding='same',activation='relu')(cnn)
    cnn = tfkl.GlobalAveragePooling1D()(cnn)
    cnn = tfkl.Dropout(.25, seed=seed)(cnn)

    # Classifier
    classifier = tfkl.Dense(128, activation='relu')(cnn)
    classifier = tfkl.Dropout(.5, seed=seed)(classifier)
    classifier = tfkl.Dense(64, activation='relu')(classifier)
    output_layer = tfkl.Dense(classes, activation='softmax')(classifier)

    # Connect input and output through the Model class
    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='model')

    # Compile the model
    model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics='accuracy')

    # Return the model
    return model


## Third Model: BiLSTM

In [None]:
def build_BiLSTM_classifier(input_shape, classes):
    # Build the neural network layer by layer
    input_layer = tfkl.Input(shape=input_shape, name='Input')

    # Feature extractor
    bilstm = tfkl.Bidirectional(tfkl.LSTM(128, return_sequences=True))(input_layer)
    bilstm = tfkl.Bidirectional(tfkl.LSTM(128))(bilstm)
    dropout = tfkl.Dropout(.5, seed=seed)(bilstm)

    # Classifier
    classifier = tfkl.Dense(128, activation='relu')(dropout)
    output_layer = tfkl.Dense(12, activation='softmax')(classifier)

    # Connect input and output through the Model class
    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='model')

    # Compile the model
    model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics='accuracy')

    # Return the model
    return model

## Models training

In [None]:
# Training first CNN
model1 = build_1DCNN_classifier(input_shape, classes)
model1.summary()

# Train the model
history1 = model1.fit(
    x = X_train,
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=15, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_accuracy', mode='max', patience=7, factor=0.5, min_lr=1e-5)
    ]
).history

model1.save('/kaggle//working/1DCNN')

In [None]:
# Training second CNN
model2 = build_1DCNN_classifier_2(input_shape, classes)
model2.summary()

# Train the model
history2 = model2.fit(
    x = X_train,
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=15, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_accuracy', mode='max', patience=7, factor=0.5, min_lr=1e-5)
    ]
).history

model2.save('/kaggle/working/1DCNN_2')

In [None]:
# Training BiLSTM - be aware that BiLSTM doesn't use the standardized training set
batch_size = 128

model3 = build_BiLSTM_classifier(input_shape, classes)
model3.summary()

# Train the model
history3 = model3.fit(
    x = X_train,
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    class_weights=class_weights,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=15, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_accuracy', mode='max', patience=7, factor=0.5, min_lr=1e-5)
    ]
).history

model3.save('/kaggle/working/BiLSTM')

## Ensemble
Once the models have been trained, the ensemble is done at prediction time as follow, for each input:
1. Get the class predictions/probabilities from all the models
2. Compute the average of the scores between the 3 classification output vectors, class-wise
3. Consider the new averaged vector as the classification output, hence extract the predicted class for the image by choosing the most likely one (i.e. _argmax_)

In the code snippet below, we insert the procedure done in the `Model.py` file placed in the submission folder

In [None]:
import os
import tensorflow as tf
import numpy as np


class model:
    def __init__(self, path):
        self.model_1 = tf.keras.models.load_model(os.path.join(path, 'SubmissionModel', 'firstCnn'))
        self.model_2 = tf.keras.models.load_model(os.path.join(path, 'SubmissionModel', 'secondCnn'))
        self.model_3 = tf.keras.models.load_model(os.path.join(path, 'SubmissionModel', 'bilstm'))

    def predict(self, X):
        # Insert your preprocessing here
        # Standardization - mean and std come from the training dataset
        mean = 25.295960919260704
        std = 658.4301638513654
        X_std = (X - mean) / std

        # Only the CNNs use standardized data
        out_1 = self.model_1.predict(X_std)
        out_2 = self.model_2.predict(X_std)
        out_3 = self.model_3.predict(X)

        # Initialize averaged prediction matrix
        out_avg = np.empty(shape=out_1.shape)

        # Compute avg prediction scores between the 2 available models
        for i in range(len(out_1)):
            for j in range(len(out_1[i])):
                out_avg[i, j] = (out_1[i, j] + out_2[i, j] + out_3[i, j]) / 3

        # Get best class prediction for each image
        out = tf.argmax(out_avg, axis=-1)

        return out