<a href="https://colab.research.google.com/github/SamuelBFG/DL-studies/blob/master/IA353/EF1p3_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import os

In [2]:
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [3]:
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

x_train shape: (60000, 28, 28)
x_test shape: (10000, 28, 28)
y_train shape: (60000,)
y_test shape: (10000,)


# BASELINE MODEL

In [4]:
model = tf.keras.models.Sequential([
 tf.keras.layers.Flatten(),
 tf.keras.layers.Dense(512, activation=tf.nn.relu),
 tf.keras.layers.Dropout(0.5),
 tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])


model.compile(optimizer='adam',
 loss='sparse_categorical_crossentropy',
 metrics=['accuracy'])

model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fd2d2a676d0>

In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (32, 784)                 0         
_________________________________________________________________
dense (Dense)                (32, 512)                 401920    
_________________________________________________________________
dropout (Dropout)            (32, 512)                 0         
_________________________________________________________________
dense_1 (Dense)              (32, 10)                  5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


In [6]:
model.evaluate(x_test, y_test)



[0.0734843909740448, 0.977400004863739]

In [7]:
model_json = model.to_json()
json_file = open("model_MLP.json", "w")
json_file.write(model_json)
json_file.close()
model.save_weights("model_MLP.h5")
print("Model saved to disk")
os.getcwd()

Model saved to disk


'/content'

### Working with multiples executions:

In [8]:
executions = 5
histories = []
evaluations = []

for i in range(executions):
  model = tf.keras.models.Sequential([
                                      tf.keras.layers.Flatten(),
                                      tf.keras.layers.Dense(512, activation=tf.nn.relu),
                                      tf.keras.layers.Dropout(0.5),
                                      tf.keras.layers.Dense(10, activation=tf.nn.softmax)
                                      ])
  
  print('## Training model number: ', i+1)

  model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
  
  histories.append(model.fit(x_train, y_train, epochs=5, verbose=0))

  print('#• Test set:')
  evaluations.append(model.evaluate(x_test, y_test))
  print('\n')



## Training model number:  1
#• Test set:


## Training model number:  2
#• Test set:


## Training model number:  3
#• Test set:


## Training model number:  4
#• Test set:


## Training model number:  5
#• Test set:




In [9]:
histories[0].history.keys()

dict_keys(['loss', 'accuracy'])

In [10]:
histories[-1].history['accuracy'] # Last training acc log per epochs

[0.9218999743461609,
 0.9593166708946228,
 0.9659000039100647,
 0.9710500240325928,
 0.974133312702179]

In [11]:
evaluations[0] # Loss and acc for the first model (LIST)

[0.064346544444561, 0.9801999926567078]

In [12]:
acc_val = []
acc_test = []

for i in range(executions):
  acc_val.append(sum(histories[i].history['accuracy']) / len(histories[i].history['accuracy']))
  acc_test.append(evaluations[i][1])

acc_val

[0.9577700138092041,
 0.9578333258628845,
 0.9582966685295105,
 0.957426655292511,
 0.958459997177124]

In [13]:
acc_test

[0.9801999926567078,
 0.9790999889373779,
 0.9771000146865845,
 0.9793999791145325,
 0.9794999957084656]

#### Average validation accuracy (training set)

In [14]:
avg_acc_val = sum(acc_val)/len(acc_val)
avg_acc_val

0.9579573321342469

#### Average test accuracy

In [15]:
avg_acc_test = sum(acc_test)/len(acc_test)
avg_acc_test

0.9790599942207336

# MODIFIED MODEL

• Goal: Better test accuracy results

### Holdout:

In [16]:
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

x_train shape: (60000, 28, 28)
x_test shape: (10000, 28, 28)
y_train shape: (60000,)
y_test shape: (10000,)


One-hot encoding train and test labels:

In [17]:
from keras.utils.np_utils import to_categorical

y_train = to_categorical(y_train, num_classes = 10)
y_test = to_categorical(y_test, num_classes = 10)

20% for validation set

80% for training set

In [18]:
from sklearn.model_selection import train_test_split
train_data, train_data_val, train_labels, train_labels_val = train_test_split(x_train, y_train, test_size = 0.2, random_state=261663)

In [19]:
print('train_data shape:', x_train.shape)
print('train_data_val shape:', train_data_val.shape)
print('train_labels shape:', y_train.shape)
print('train_labels_val shape:', train_labels_val.shape)
print('x_test shape:', x_test.shape)
print('y_test shape:', y_test.shape)


train_data shape: (60000, 28, 28)
train_data_val shape: (12000, 28, 28)
train_labels shape: (60000, 10)
train_labels_val shape: (12000, 10)
x_test shape: (10000, 28, 28)
y_test shape: (10000, 10)


In [20]:
train_data = tf.expand_dims(train_data, axis=-1)
train_data_val = tf.expand_dims(train_data_val, axis=-1)

x_test = tf.expand_dims(x_test, axis=-1)

### Model

Inspired by [LeNet-5, 1998](https://ieeexplore.ieee.org/document/726791), the architecture follows the structure: conv->pool->conv->pool->fc->output

Modifications:
• Adam with learning rate 0.001
• Mini-batch size of 64

In [21]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D


model = Sequential()

model.add(Conv2D(filters = 16, kernel_size = (5,5), padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(filters = 32, kernel_size = (5,5), padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(256, activation = "relu"))

model.add(Dense(10, activation = "softmax"))

In [22]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 16)        416       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 32)        12832     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 32)          0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 1568)              0         
_________________________________________________________________
dense_12 (Dense)             (None, 256)               401664    
_________________________________________________________________
dense_13 (Dense)             (None, 10)               

In [23]:
from keras.optimizers import Adam
model.compile(optimizer = Adam(lr=0.001),
              loss = "categorical_crossentropy",
              metrics=["accuracy"])

In [24]:
history = model.fit(train_data, train_labels,
                    batch_size = 64, 
                    epochs = 5, 
                    validation_data = (train_data_val, train_labels_val), verbose = 2)

Epoch 1/5
750/750 - 5s - loss: 0.1755 - accuracy: 0.9473 - val_loss: 0.0712 - val_accuracy: 0.9774
Epoch 2/5
750/750 - 2s - loss: 0.0514 - accuracy: 0.9830 - val_loss: 0.0523 - val_accuracy: 0.9832
Epoch 3/5
750/750 - 2s - loss: 0.0345 - accuracy: 0.9890 - val_loss: 0.0373 - val_accuracy: 0.9877
Epoch 4/5
750/750 - 2s - loss: 0.0250 - accuracy: 0.9921 - val_loss: 0.0317 - val_accuracy: 0.9899
Epoch 5/5
750/750 - 2s - loss: 0.0200 - accuracy: 0.9934 - val_loss: 0.0328 - val_accuracy: 0.9906


In [25]:
model.evaluate(x_test, y_test)



[0.035248689353466034, 0.9883000254631042]

### Working with multiples executions:

In [28]:
executions = 5
histories = []
evaluations = []

for i in range(executions):
  tf.keras.backend.clear_session()
  model = 0
  model = Sequential()
  model.add(Conv2D(filters = 16, kernel_size = (5,5), padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
  model.add(MaxPool2D(pool_size=(2,2)))
  model.add(Conv2D(filters = 32, kernel_size = (5,5), padding = 'Same', 
                 activation ='relu'))
  model.add(MaxPool2D(pool_size=(2,2)))
  model.add(Flatten())
  model.add(Dense(256, activation = "relu"))
  model.add(Dense(10, activation = "softmax"))
  
  print('## Training model number: ', i+1)

  model.compile(optimizer = Adam(lr=0.001),
                loss = "categorical_crossentropy",
                metrics=['accuracy'])
  
  histories.append(model.fit(train_data, train_labels,
                             batch_size = 64, 
                             epochs = 5, 
                             validation_data = (train_data_val, train_labels_val), verbose = 0))

  print('#• Test set:')
  evaluations.append(model.evaluate(x_test, y_test))
  print('\n')

## Training model number:  1
#• Test set:


## Training model number:  2
#• Test set:


## Training model number:  3
#• Test set:


## Training model number:  4
#• Test set:


## Training model number:  5
#• Test set:




In [29]:
histories[0].history.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])

In [30]:
evaluations[0]

[0.03155602887272835, 0.989300012588501]

In [31]:
acc_val = []
acc_test = []

for i in range(executions):
  acc_val.append(sum(histories[i].history['val_accuracy']) / len(histories[i].history['val_accuracy']))
  acc_test.append(evaluations[i][1])

acc_val

[0.986733329296112,
 0.9881166577339172,
 0.9878166675567627,
 0.9879333257675171,
 0.9877333402633667]

In [32]:
acc_test

[0.989300012588501,
 0.9919000267982483,
 0.9919999837875366,
 0.9891999959945679,
 0.9872000217437744]

#### Average validation accuracy (validation set)

In [33]:
avg_acc_val = sum(acc_val)/len(acc_val)
avg_acc_val

0.9876666641235351

#### Average test accuracy

In [34]:
avg_acc_test = sum(acc_test)/len(acc_test)
avg_acc_test

0.9899200081825257