# Exercises: neural network overfitting

Exercise on classification using NN. 

Author: Stefano Pagani <stefano.pagani@polimi.it>.

Date: 2023

Course: Scientific computing tools for advanced mathematical modelling.


Exercise on overfitting.
Example adapted from:
[3] Tensorflow tutotials
Data from:
[1] Brunton, S. L., & Kutz, J. N. (2022).
Data-driven science and engineering: Machine learning,
dynamical systems, and control. Cambridge University Press.

In [None]:

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
from scipy import io
import os
from sklearn import linear_model
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import regularizers


In [None]:


dogdata_mat = io.loadmat(os.path.join('DATA','dogData.mat'))
catdata_mat = io.loadmat(os.path.join('DATA','catData.mat'))

dog = dogdata_mat['dog']
cat = catdata_mat['cat']

#CD = np.concatenate((dog,cat),axis=1)

FEATURES = 50

np.random.seed(1)
tf.random.set_seed(1)

ind_selected = np.floor( 1/(64*64) + ((64*64)-1)*np.random.rand(FEATURES) ).astype(int)

X_train =np.concatenate((dog[ind_selected,:60],cat[ind_selected,:60]),axis=1)
X_test = np.concatenate((dog[ind_selected,60:80],cat[ind_selected,60:80]),axis=1)
y_train = np.repeat(np.array([1,-1]),60)
y_test = np.repeat(np.array([1,-1]),20)

y = np.array([[1,0],[0,1]])

X_tf_train = tf.constant( np.transpose(X_train)/256, dtype = tf.float32 )
X_tf_test = tf.constant( np.transpose(X_test)/256, dtype = tf.float32 )
y_tf_train = tf.constant( np.repeat(y, [60, 60], axis=0) , dtype = tf.float32 )
y_tf_test = tf.constant( np.repeat(y, [20, 20], axis=0) , dtype = tf.float32 )



In [None]:


lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
  0.001,
  decay_steps=100,
  decay_rate=1,
  staircase=False)

def get_optimizer():
    return tf.keras.optimizers.Adam(lr_schedule)

def get_callbacks(name):
    return [
        tf.keras.callbacks.EarlyStopping(monitor='val_binary_crossentropy', patience=200, min_delta=0.001),
        ]

def compile_and_fit(model, name, optimizer=None, max_epochs=10000):

    if optimizer is None:
        optimizer = get_optimizer()

    model.compile(optimizer=optimizer,
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                metrics=[
                  tf.keras.losses.BinaryCrossentropy(
                      from_logits=True, name='binary_crossentropy'),
                  'accuracy'])

    model.summary()

    history = model.fit(
            X_tf_train, y_tf_train,
            #steps_per_epoch = STEPS_PER_EPOCH,
            epochs=1000,
            validation_data=(X_tf_test, y_tf_test),
            callbacks=get_callbacks(name),
            verbose=2)

    return history



Complete the notebook to train and test the following architectures:

1. a tiny FFNNs made by
  - hidden layer of 8 neurons with `elu` activation function,
  - output layer of 1 neurons with `softmax` activation function;

2. a medium FFNNs made by
  - hidden layer of 8 neurons with `elu` activation function,
  - hidden layer of 8 neurons with `elu` activation function,
  - output layer of 1 neurons with `softmax` activation function;

3. a large FFNNs made by
  - hidden layer of 32 neurons with `elu` activation function,
  - hidden layer of 32 neurons with `elu` activation function,
  - hidden layer of 32 neurons with `elu` activation function,
  - output layer of 1 neurons with `softmax` activation function;

4. a large FFNNs with $`\ell_2`$ regularization of the weights (find a sub-optimal regularization coefficient);

5. a large FFNNs with dropout regularization of the weights;

6. a large FFNNs with both $`\ell_2`$ and dropout regularization of the weights;

Compare on an image the trends of the binary cross-entropy (evaluated on the training and test sets) of these different architectures.

In [None]:


tiny_model = tf.keras.Sequential([
    # layers.Dense( ),
    # to do 
])

medium_model = tf.keras.Sequential([
    # to do
])

large_model = tf.keras.Sequential([
    # to do
])



In [None]:


size_histories = {}

size_histories['Tiny'] = compile_and_fit(tiny_model, 'sizes/Tiny')
size_histories['Medium'] = compile_and_fit(medium_model, 'sizes/Medium')
size_histories['Large'] = compile_and_fit(large_model, 'sizes/Large')



In [None]:


#plt.figure()

for NNdim in ['Tiny','Medium','Large']:

    history = size_histories[NNdim]

    bc     = history.history['binary_crossentropy']
    val_bc = history.history['val_binary_crossentropy']

    plt.plot(range(np.shape(bc)[0]),bc, linewidth=2,)
    plt.plot(range(np.shape(val_bc)[0]),val_bc,'--', linewidth=2,)
    plt.title('NN reconstruction')
    #plt.show(block=False)

plt.legend(('Tiny','Tiny val','Medium','Medium val','Large','Large val'))
plt.show()
