# TP réseaux de neurones

Diane Lingrand (diane.lingrand@univ-cotedazur)

Polytech SI4 - CVML - 2020-21

## imports nécessaires pour la suite

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import ensemble
from sklearn.metrics import confusion_matrix, plot_confusion_matrix, f1_score
from sklearn.utils import shuffle
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation 

In [None]:
# reading the dataset
from tensorflow.keras.datasets import mnist
import tensorflow.keras.utils
(x_train, y_train), (x_test, y_test) = mnist.load_data()

nbClasses = 10 # 10 digits from 0 to 9
# flatten the images...
xTrain = x_train.reshape(60000, 784)
xTest = x_test.reshape(10000, 784)
# ... and normalize the data (grey levels are integers from 0 to 255)
xTrain = xTrain.astype('float32')/255
xTest = xTest.astype('float32')/255

# original labels corresponds to digits. We transform the labels to categorical labels.
yTrain = tensorflow.keras.utils.to_categorical(y_train, nbClasses)
yTest = tensorflow.keras.utils.to_categorical(y_test, nbClasses)

print('shape of yTrain :', yTrain.shape)


In [None]:
# case of binary classification

# you can change the classes
class1 = 4
class2 = 8
nameClass1 = '4'
nameClass2 = '8'

## TRAIN
#class 1: positives
x_train1 = xTrain[y_train==class1,:]
#class 2: negatives
x_train2 = xTrain[y_train==class2,:]
# merging the 2 classes and shuffle
x_trainBinaire = np.append(x_train1,x_train2,axis=0)
y_trainBinaire = np.append(np.full(len(x_train1),-1), np.full(len(x_train2),1))
(x_trainBinaire,y_trainBinaire) = shuffle(x_trainBinaire,y_trainBinaire,random_state=0)
y_trainBinaire = tensorflow.keras.utils.to_categorical(y_trainBinaire, 2)

## TEST
#class 1: positives
x_test1 = xTest[y_test==class1,:]
#class 2: negatives
x_test2 = xTest[y_test==class2,:]
# merging the 2 classes and shuffle
x_testBinaire = np.append(x_test1,x_test2,axis=0)
y_testBinaire = np.append(np.full(len(x_test1),-1), np.full(len(x_test2),1))
(x_testBinaire,y_testBinaire) = shuffle(x_testBinaire,y_testBinaire,random_state=0)
y_testBinaire = tensorflow.keras.utils.to_categorical(y_testBinaire, 2)


# 1. Un premier MLP

In [None]:
nbClasses=2
#Let's build a simple neural network using the keras sequential method
model = Sequential()
#topology: input as the size of data, one hidden layer with 4 neurons and usual sigmoid activation
model.add(Dense(4, input_dim=784, activation='sigmoid'))
#softmax for the output using as many neurons as classes (2 in this case)
model.add(Dense(nbClasses, activation='softmax'))
model.summary()

In [None]:
#we need to define the loss function for the training, the optimisation method (RMSprop) and the accuracy as a metric
model.compile(optimizer='rmsprop',loss='categorical_crossentropy', metrics=['accuracy'])

#now, let's train for real the network: only 20 epochs and batch size of 128 (so that an epoch contains 60000/128 iterations)
model.fit(x_trainBinaire, y_trainBinaire, epochs=20, batch_size=128)



Que vaut le score F1?

In [None]:
#is it good? we know the truth: y_testBinaire and we will compare to the output of the network

score = model.evaluate(x_testBinaire,y_testBinaire)
print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))

from sklearn.metrics import confusion_matrix, plot_confusion_matrix, f1_score
pred_testBinaire = np.argmax(model.predict(x_testBinaire),axis=1)
print(pred_testBinaire.shape, y_testBinaire.shape)
print("F1 score: ", f1_score(pred_testBinaire,np.argmax(y_testBinaire,axis=1)))

# 2. Mêmes questions avec les 10 classes


In [None]:
nbClasses=10 
#Let's build a simple neural network using the keras sequential method
model = Sequential()
#topology: input as the size of data, one hidden layer with 20 neurons and usual sigmoid activation
model.add(Dense(20, input_dim=784, activation='sigmoid'))
#model.add(Dense(50, activation='sigmoid'))
#softmax for the output using as many neurons as classes (10 in this case)
model.add(Dense(nbClasses, activation='softmax'))
model.summary()

In [None]:
#we need to define the loss function for the training, the optimisation method (RMSprop) and the accuracy as a metric
model.compile(optimizer='rmsprop',loss='categorical_crossentropy', metrics=['accuracy'])

#now, let's train for real the network: only 20 epochs and batch size of 128 (so that an epoch contains 60000/128 iterations)
model.fit(xTrain, yTrain, epochs=20, batch_size=128)


In [None]:
#is it good? we know the truth: y_testBinaire and we will compare to the output of the network

score = model.evaluate(xTest,yTest)
print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))

pred_test = np.argmax(model.predict(xTest),axis=1)
print(pred_test.shape,np.argmax(yTest,axis=1).shape)
print("F1 score: ", f1_score(pred_test,np.argmax(yTest,axis=1), average=None))
print("F1 score micro: ", f1_score(pred_test,np.argmax(yTest,axis=1), average='micro'))
print("F1 score macro: ", f1_score(pred_test,np.argmax(yTest,axis=1), average='macro'))

print('confusion matrix\n',confusion_matrix(np.argmax(yTest,axis=1), pred_test))



In [None]:
#confusion matrix


# 3. Essayons de faire mieux ...

## A t-on laissé le temps à l'algorithme de converger ?
Modifiez le nombre d'itérations. Les résultats sont-ils meilleurs ?

## Critère d'arrêt autre que le nombre d'itérations

In this small example, we decided, as a default behavior, to stop after 20 epochs. Of course this value can be changed. Another way to deal with that is to use early stopping criterion. All options are described in the keras documentation. Feel free to experiment all options!

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

# we define a callback function that will control if the accuracy 
# on the validation set (a part of train set) is not changing more than 10-4 with a patience of 20 iterations
# If the last accuracy value is not the best one, we still keep the last results
# In this example, we extracted 20% of the train set for the validation set that will be used to monitor the convergence.

ourCallback = EarlyStopping(monitor='val_accuracy', min_delta=0.0001, patience=20, verbose=0, mode='auto', baseline=None, restore_best_weights=False)

# let's learn the network again !
# We do not know when the training will stop but no more than 2000 epochs.
model.fit(xTrain, yTrain, epochs=2000, batch_size=128, validation_split=0.2, callbacks=[ourCallback])


A quel 'epoch' l'algorithme s'est-il arrêté ?

## Evolution de la convergence

In [None]:
def plot_history(history):
  plt.plot(history.history['accuracy'])
  plt.plot(history.history['val_accuracy'])
  plt.title('model accuracy')
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.legend(['train', 'val'], loc='upper left')
  plt.show()

In [None]:
import os
import datetime
import matplotlib.pyplot as plt
from IPython.display import Image
from tensorflow.keras.callbacks import EarlyStopping

ourCallback = tensorflow.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.0001, patience=20, verbose=0, mode='auto', baseline=None, restore_best_weights=False)

model.compile(optimizer='rmsprop',loss='categorical_crossentropy', metrics=['accuracy'])
logdir = os.path.join("/home/lingrand/logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboardCb = tensorflow.keras.callbacks.TensorBoard(logdir) #, histogram_freq=1)

In [None]:


historyCNN = model.fit(xTrain, yTrain, epochs=2000, batch_size=128, validation_split=0.2, callbacks=[tensorboardCb,ourCallback])
plot_history(historyCNN)
#%tensorboard --logdir logs

## La topologie du réseau convient ?
Ajoutez des neurones à la couche cachée ou bien augmentez le nombre de couches cachées.
Exemple avec 2 couches cachées de 20 neurones (utilisez les lignes en les modifiant et en enlevant les commentaires):


In [None]:
#model.add(Dense(20, input_dim=784, activation='sigmoid'))
#model.add(Dense(20, activation='sigmoid'))
#model.add(Dense(nbClasses, activation='softmax'))

Est-ce qu'augmenter le nombre de couches de neurones augmente les performances?

Est-ce qu'augmenter le nombre de neurones par couche augmente les performances?

### Modifiez également l'activation 'sigmoid' par 'relu'. Observez-vous une différence ?

### Essayez les différents 'optimizer' disponibles. Quels sont-ils ? Observez-vous des différences ?

# 4. Modifiez les données en considérant FMNIST (Fashion MNIST)

In [None]:
from tensorflow.keras.datasets import fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
