<a href="https://colab.research.google.com/github/Al-ameen007/CIFAR_10/blob/main/MNIST_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Importing the data and some preprocessing

In [1]:
import numpy as np
from matplotlib import pyplot as plt
import tensorflow
from tensorflow import keras

In [2]:
from tensorflow.keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [3]:
# Normalizing the data and one-hot encoding the labels
from tensorflow.keras.utils import to_categorical
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [4]:
# Splitting the data into training and validation sets as we want to save the test set for the evaluation of the best model
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)

In [5]:
from keras import models
from keras import layers
from tensorflow.keras import optimizers

In [6]:
saved_models = []
accuracies = []

#Base Model

In [7]:
model = models.Sequential()
model.add(layers.Conv2D(64, (5, 5), strides=(2, 2),activation='relu', kernel_initializer='truncated_normal', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu', kernel_initializer='he_uniform'))
model.add(layers.Dense(10, activation='softmax'))

In [8]:
opt = optimizers.SGD(learning_rate=0.0001, momentum=0.9)
opt_1 = optimizers.SGD(learning_rate=0.05, momentum=0.9)
opt_2 = optimizers.SGD(learning_rate=0.001, momentum=0.9)
opt_3 = optimizers.SGD(learning_rate=0.01, momentum=0.9)
opt_4 = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999)
opt_5 = optimizers.RMSprop(learning_rate=0.01, rho=0.9, momentum=0.1,)

In [9]:
def evaluate_model(saved_models, X, y):
  best_model, best_model_idx = saved_models[0], 0
  best_accuracy= (best_model.evaluate(X, y, verbose=1))[1]
  for i in range(1, len(saved_models)):
    loss, acc = saved_models[i].evaluate(X, y, verbose=1)
    if acc > best_accuracy:
      best_model, best_model_idx , best_accuracy= saved_models[i], i, acc
  print('model: ' + str(best_model_idx + 1), 'accuracy: ' + str('%.2f'%(best_accuracy * 100) + ' %'))
  return best_model, acc

In [10]:
def summaries(saved_models):
  for model in saved_models:
    model.summary()

#Trying differnt number of epochs

In [11]:
model_1 = keras.models.clone_model(model)
model_1.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_1.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
model_2 = keras.models.clone_model(model)
model_2.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_2.fit(X_train, y_train, epochs=15, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_2)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [13]:
model_3 = keras.models.clone_model(model)
model_3.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_3.fit(X_train, y_train, epochs=20, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_3)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [14]:
best_model, best_accuracy = evaluate_model(saved_models, X_test, y_test)

model: 3 accuracy: 94.86 %


In [15]:
summaries(saved_models)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 12, 12, 64)        1664      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 6, 6, 64)         0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 2304)              0         
                                                                 
 dense (Dense)               (None, 64)                147520    
                                                                 
 dense_1 (Dense)             (None, 10)                650       
                                                                 
Total params: 149,834
Trainable params: 149,834
Non-trainable params: 0
__________________________________________________

#Trying different learning rates

Model 3 was our best model so far, using 20 epochs, so we take it and apply different learning rates to see if we can get better results.

In [16]:
model_4 = keras.models.clone_model(best_model)
opt_1 = optimizers.SGD(learning_rate=0.05, momentum=0.9)
model_4.compile(optimizer=opt_1, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_4.fit(X_train, y_train, epochs=20, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_4)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [17]:
model_5 = keras.models.clone_model(best_model)
model_5.compile(optimizer=opt_2, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_5.fit(X_train, y_train, epochs=20, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_5)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [18]:
model_6 = keras.models.clone_model(best_model)
model_6.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_6.fit(X_train, y_train, epochs=20, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_6)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [19]:
best_model, best_accuracy = evaluate_model(saved_models, X_test, y_test)

model: 6 accuracy: 98.99 %


In [20]:
summaries(saved_models)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 12, 12, 64)        1664      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 6, 6, 64)         0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 2304)              0         
                                                                 
 dense (Dense)               (None, 64)                147520    
                                                                 
 dense_1 (Dense)             (None, 10)                650       
                                                                 
Total params: 149,834
Trainable params: 149,834
Non-trainable params: 0
__________________________________________________

#Trying different architectures

Our last model was our best model, with 20 epochs and a learning rate of 0.01, we will try and explore different architectures.

In [21]:
model_7 = models.Sequential()
model_7.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_7.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_7.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_7.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_7.add(layers.Flatten())
model_7.add(layers.Dense(64, activation='relu', kernel_initializer='he_uniform'))
model_7.add(layers.Dense(32, activation='relu', kernel_initializer='he_uniform'))
model_7.add(layers.Dense(10, activation='softmax'))

In [22]:
model_7.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_7.fit(X_train, y_train, epochs=20, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_7)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [23]:
model_8 = models.Sequential()
model_8.add(layers.Conv2D(16, (3, 3), strides = (2, 2),activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_8.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_8.add(layers.Conv2D(32, (3, 3), strides = (2, 2),activation='relu', kernel_initializer='he_uniform'))
model_8.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_8.add(layers.Flatten())
model_8.add(layers.Dense(64, activation='relu', kernel_initializer='he_uniform'))
model_8.add(layers.Dense(32, activation='relu', kernel_initializer='he_uniform'))
model_8.add(layers.Dense(10, activation='softmax'))

In [24]:
model_8.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_8.fit(X_train, y_train, epochs=20, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_8)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [25]:
model_9 = models.Sequential()
model_9.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_9.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_9.add(layers.Conv2D(32, (5, 5), activation='relu', kernel_initializer='he_uniform'))
model_9.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_9.add(layers.Flatten())
model_9.add(layers.Dense(32, activation='relu', kernel_initializer='he_uniform'))
model_9.add(layers.Dense(10, activation='softmax'))

In [26]:
model_9.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_9.fit(X_train, y_train, epochs=20, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_9)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [27]:
model_10 = models.Sequential()
model_10.add(layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_10.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_10.add(layers.Flatten())
model_10.add(layers.Dense(32, activation='relu', kernel_initializer='he_uniform'))
model_10.add(layers.Dense(10, activation='softmax'))

In [28]:
model_10.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_10.fit(X_train, y_train, epochs=20, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_10)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [29]:
best_model, best_accuracy = evaluate_model(saved_models, X_test, y_test)

model: 7 accuracy: 99.20 %


In [30]:
summaries(saved_models)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 12, 12, 64)        1664      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 6, 6, 64)         0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 2304)              0         
                                                                 
 dense (Dense)               (None, 64)                147520    
                                                                 
 dense_1 (Dense)             (None, 10)                650       
                                                                 
Total params: 149,834
Trainable params: 149,834
Non-trainable params: 0
__________________________________________________

#Trying different batch sizes

Our last model always seem to be our best model, reaching an accuarcy of 99.3,
we used 20 epochs, a learning rate of 0.01 and our architecture was a single convultion layer containing 128 filters of size (3, 3) and a max pooling layer followed by a densle layer of 32 neurons.
we will countinue with it and decide to change the batch size this time around.

In [31]:
model_11 = keras.models.clone_model(best_model)
model_11.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_11.fit(X_train, y_train, epochs=20, batch_size=16, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_11)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [32]:
model_12 = keras.models.clone_model(best_model)
model_12.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_12.fit(X_train, y_train, epochs=20, batch_size=64, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_12)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [33]:
best_model, best_accuracy = evaluate_model(saved_models, X_test, y_test)

model: 7 accuracy: 99.20 %


In [34]:
summaries(saved_models)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 12, 12, 64)        1664      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 6, 6, 64)         0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 2304)              0         
                                                                 
 dense (Dense)               (None, 64)                147520    
                                                                 
 dense_1 (Dense)             (None, 10)                650       
                                                                 
Total params: 149,834
Trainable params: 149,834
Non-trainable params: 0
__________________________________________________

#Trying different activation functions

It seems taht we lucked out and started with the perfect batch size, because model 9 is still our best model, we will try and experiment with different activation functions to see if we can get a better result.

In [35]:
model_13 = models.Sequential()
model_13.add(layers.Conv2D(32, (3, 3), activation='tanh', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_13.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_13.add(layers.Conv2D(32, (3, 3), activation='tanh', kernel_initializer='he_uniform'))
model_13.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_13.add(layers.Flatten())
model_13.add(layers.Dense(64, activation='tanh', kernel_initializer='he_uniform'))
model_13.add(layers.Dense(32, activation='tanh', kernel_initializer='he_uniform'))
model_13.add(layers.Dense(10, activation='softmax'))

In [36]:
model_13.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_13.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_13)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [37]:
model_14 = models.Sequential()
model_14.add(layers.Conv2D(32, (3, 3), activation='selu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_14.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_14.add(layers.Conv2D(32, (3, 3), activation='selu', kernel_initializer='he_uniform'))
model_14.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_14.add(layers.Flatten())
model_14.add(layers.Dense(64, activation='selu', kernel_initializer='he_uniform'))
model_14.add(layers.Dense(32, activation='selu', kernel_initializer='he_uniform'))
model_14.add(layers.Dense(10, activation='softmax'))

In [38]:
model_14.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_14.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_14)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [39]:
from keras.layers import LeakyReLU
model_15 = models.Sequential()
model_15.add(layers.Conv2D(32, (3, 3), kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_15.add(LeakyReLU(alpha=0.05))
model_15.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_15.add(layers.Conv2D(32, (3, 3), kernel_initializer='he_uniform'))
model_15.add(LeakyReLU(alpha=0.05))
model_15.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_15.add(layers.Flatten())
model_15.add(layers.Dense(64, kernel_initializer='he_uniform'))
model_15.add(LeakyReLU(alpha=0.05))
model_15.add(layers.Dense(32, kernel_initializer='he_uniform'))
model_15.add(LeakyReLU(alpha=0.05))
model_15.add(layers.Dense(10, activation='softmax'))

In [40]:
model_15.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_15.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_15)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [41]:
best_model, best_accuracy = evaluate_model(saved_models, X_test, y_test)

model: 7 accuracy: 99.20 %


In [42]:
summaries(saved_models)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 12, 12, 64)        1664      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 6, 6, 64)         0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 2304)              0         
                                                                 
 dense (Dense)               (None, 64)                147520    
                                                                 
 dense_1 (Dense)             (None, 10)                650       
                                                                 
Total params: 149,834
Trainable params: 149,834
Non-trainable params: 0
__________________________________________________

#Trying different optimizers

In [43]:
model_16 = keras.models.clone_model(best_model)
model_16.compile(optimizer=opt_4, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_16.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_16)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [44]:
model_17 = keras.models.clone_model(best_model)
model_17.compile(optimizer=opt_5, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_17.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_17)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [45]:
best_model, best_accuracy = evaluate_model(saved_models, X_test, y_test)

model: 7 accuracy: 99.20 %


In [46]:
summaries(saved_models)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 12, 12, 64)        1664      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 6, 6, 64)         0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 2304)              0         
                                                                 
 dense (Dense)               (None, 64)                147520    
                                                                 
 dense_1 (Dense)             (None, 10)                650       
                                                                 
Total params: 149,834
Trainable params: 149,834
Non-trainable params: 0
__________________________________________________

#Trying different dropout rates

Model 7 is still our best model, we try adding dropout layers and see what diiference it will make.

In [50]:
from keras.layers import Dropout

In [51]:
model_18 = models.Sequential()
model_18.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_18.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.4))
model_18.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_18.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_18.add(layers.Flatten())
model_18.add(layers.Dense(64, activation='relu', kernel_initializer='he_uniform'))
model_18.add(layers.Dense(32, activation='relu', kernel_initializer='he_uniform'))
model.add(Dropout(0.4))
model_18.add(layers.Dense(10, activation='softmax'))

In [52]:
opt_3 = optimizers.SGD(learning_rate=0.01, momentum=0.9)
model_18.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_18.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_18)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [53]:
model_19 = models.Sequential()
model_19.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_19.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_19.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_19.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_19.add(Dropout(0.4))
model_19.add(layers.Flatten())
model_19.add(layers.Dense(64, activation='relu', kernel_initializer='he_uniform'))
model_19.add(Dropout(0.4))
model_19.add(layers.Dense(32, activation='relu', kernel_initializer='he_uniform'))
model_19.add(Dropout(0.4))
model_19.add(layers.Dense(10, activation='softmax'))

In [54]:
opt_3 = optimizers.SGD(learning_rate=0.01, momentum=0.9)
model_19.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_19.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_19)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [55]:
model_20 = models.Sequential()
model_20.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_20.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_20.add(Dropout(0.75))
model_20.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_20.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_20.add(layers.Flatten())
model_20.add(layers.Dense(64, activation='relu', kernel_initializer='he_uniform'))
model_20.add(layers.Dense(32, activation='relu', kernel_initializer='he_uniform'))
model_20.add(Dropout(0.75))
model_20.add(layers.Dense(10, activation='softmax'))

In [56]:
opt_3 = optimizers.SGD(learning_rate=0.01, momentum=0.9)
model_20.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_20.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [57]:
model_21 = models.Sequential()
model_21.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_21.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_21.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_21.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_21.add(Dropout(0.75))
model_21.add(layers.Flatten())
model_21.add(layers.Dense(64, activation='relu', kernel_initializer='he_uniform'))
model_21.add(Dropout(0.75))
model_21.add(layers.Dense(32, activation='relu', kernel_initializer='he_uniform'))
model_21.add(Dropout(0.75))
model_21.add(layers.Dense(10, activation='softmax'))

In [58]:
opt_3 = optimizers.SGD(learning_rate=0.01, momentum=0.9)
model_21.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_21.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_21)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [59]:
best_model, best_accuracy = evaluate_model(saved_models, X_test, y_test)

model: 7 accuracy: 99.20 %


In [60]:
summaries(saved_models)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 12, 12, 64)        1664      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 6, 6, 64)         0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 2304)              0         
                                                                 
 dense (Dense)               (None, 64)                147520    
                                                                 
 dense_1 (Dense)             (None, 10)                650       
                                                                 
Total params: 149,834
Trainable params: 149,834
Non-trainable params: 0
__________________________________________________

#Models Evaluation

In [61]:
from operator import itemgetter
for i in range(0, len(saved_models)):
  loss, acc = saved_models[i].evaluate(X_test, y_test, verbose = 0)
  accuracies.append(acc)
  print('models: ' + str(i + 1), '%.2f'%(acc * 100) + '%')

models: 1 92.13%
models: 2 93.85%
models: 3 94.86%
models: 4 98.30%
models: 5 98.37%
models: 6 98.99%
models: 7 99.20%
models: 8 97.16%
models: 9 98.97%
models: 10 98.61%
models: 11 98.80%
models: 12 99.10%
models: 13 98.93%
models: 14 99.00%
models: 15 98.91%
models: 16 98.40%
models: 17 95.14%
models: 18 99.05%
models: 19 98.80%
models: 20 97.39%
models: 21 11.35%
