# SWMAL Exercise


## Convolutional Neural Networks (CNNs)


Excercise 9 from [HOML], p496  (slighty modified)::

__"9. Build your own CNN from scratch and try to achieve the highest possible accuracy on MNIST."__

For the journal: 

* write an introduction to CNNs (what are CNNs, what is a convolution layer, etc..), 
* document your experiments towards the end-goal of reaching 'a high accuracy' (what did you try, what work/did not work), 
* document how you use '_generalization_' in your setup (us of simple hold-out/train-test split or k-fold, or etc..),
* produce some sort of '_learning-curve_' that illustrates the drop in cost- or increase in score-function with respect to, say training iteration (for inspiration see fig 4.20, 10-12 or 10.17 in [HOML])
* document the final CNN setup (layers etc., perhaps as a graph/drawing), 
* discus on your iterations towards the end-goal and other findings you had,
* and, as always, write a conclusion.

In [1]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical #This works dont worry


def MNIST_InitData():
    return fetch_openml('mnist_784', return_X_y=True, cache=True, as_frame=False)

def MNIST_GetDataSet(X):
    return (X / 255)

MNIST_X, MNIST_Y = MNIST_InitData()
MNIST_X = MNIST_GetDataSet(MNIST_X)

reshaped_MNIST_X = MNIST_X.reshape(len(MNIST_X), 28, 28)
print(reshaped_MNIST_X.shape)

x_train, x_test, y_train, y_test = train_test_split(reshaped_MNIST_X, MNIST_Y, test_size=0.2, random_state=69)
y_train = to_categorical(y_train, dtype ="uint8")
y_test = to_categorical(y_test, dtype ="uint8")

(70000, 28, 28)


In [2]:
print(x_train.shape)
print(y_test.shape)
print(y_test.shape)

(56000, 28, 28)
(14000, 10)
(14000, 10)


In [70]:
# Array that holds all the compiled models
models = []

In [71]:
import keras
from keras import layers

# BASE
seq = keras.Sequential(name="BASE")

seq.add(layers.Conv2D(filters= 6, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Flatten())

seq.add(layers.Dense(units=120, activation='relu'))
seq.add(layers.Dense(units=84, activation='relu'))
seq.add(layers.Dense(units=10, activation='softmax'))

seq.summary()

seq.compile(optimizer='sgd', loss=keras.losses.CategoricalCrossentropy(
    from_logits=True,
),metrics=[keras.metrics.Accuracy()]) # optimizer='SGD', loss='MSE'

models.append(seq)

Model: "BASE"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_53 (Conv2D)          (None, 26, 26, 6)         60        
                                                                 
 average_pooling2d_36 (Avera  (None, 13, 13, 6)        0         
 gePooling2D)                                                    
                                                                 
 conv2d_54 (Conv2D)          (None, 11, 11, 32)        1760      
                                                                 
 average_pooling2d_37 (Avera  (None, 5, 5, 32)         0         
 gePooling2D)                                                    
                                                                 
 flatten_24 (Flatten)        (None, 800)               0         
                                                                 
 dense_67 (Dense)            (None, 120)               96120  

In [72]:
import keras
from keras import layers

# Use Adam & MaxPooling

seq = keras.Sequential(name="ADAM")

seq.add(layers.Conv2D(filters= 6, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.MaxPooling2D())

seq.add(layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.MaxPooling2D())

seq.add(layers.Flatten())

seq.add(layers.Dense(units=120, activation='relu'))
seq.add(layers.Dense(units=84, activation='relu'))
seq.add(layers.Dense(units=10, activation='softmax'))

seq.summary()

seq.compile(optimizer='adam', loss=keras.losses.CategoricalCrossentropy(
    from_logits=True,
),metrics=[keras.metrics.Accuracy()]) # optimizer='SGD', loss='MSE'

models.append(seq)

Model: "ADAM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_55 (Conv2D)          (None, 26, 26, 6)         60        
                                                                 
 max_pooling2d_17 (MaxPoolin  (None, 13, 13, 6)        0         
 g2D)                                                            
                                                                 
 conv2d_56 (Conv2D)          (None, 11, 11, 32)        1760      
                                                                 
 max_pooling2d_18 (MaxPoolin  (None, 5, 5, 32)         0         
 g2D)                                                            
                                                                 
 flatten_25 (Flatten)        (None, 800)               0         
                                                                 
 dense_70 (Dense)            (None, 120)               96120  

In [73]:
import keras
from keras import layers

# Use extra Conv2D

seq = keras.Sequential(name="Conv2D")

seq.add(layers.Conv2D(filters= 6, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())
seq.add(layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.MaxPooling2D())
seq.add(layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Flatten())

seq.add(layers.Dense(units=120, activation='relu'))
seq.add(layers.Dense(units=84, activation='relu'))
seq.add(layers.Dense(units=10, activation='softmax'))

seq.summary()

seq.compile(optimizer='sgd', loss=keras.losses.CategoricalCrossentropy(
    from_logits=True,
),metrics=[keras.metrics.Accuracy()]) # optimizer='SGD', loss='MSE'

models.append(seq)

Model: "Conv2D"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_57 (Conv2D)          (None, 26, 26, 6)         60        
                                                                 
 average_pooling2d_38 (Avera  (None, 13, 13, 6)        0         
 gePooling2D)                                                    
                                                                 
 conv2d_58 (Conv2D)          (None, 11, 11, 16)        880       
                                                                 
 max_pooling2d_19 (MaxPoolin  (None, 5, 5, 16)         0         
 g2D)                                                            
                                                                 
 conv2d_59 (Conv2D)          (None, 3, 3, 16)          2320      
                                                                 
 average_pooling2d_39 (Avera  (None, 1, 1, 16)         0    

In [74]:
import keras
from keras import layers

# remove Dense layer

seq = keras.Sequential(name="Dense")

seq.add(layers.Conv2D(filters= 6, kernel_size=(3,3), activation='sigmoid', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Conv2D(filters=32, kernel_size=(3,3), activation='sigmoid', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Flatten())

seq.add(layers.Dense(units=120, activation='relu'))
seq.add(layers.Dense(units=10, activation='softmax'))

seq.summary()

seq.compile(optimizer='sgd', loss=keras.losses.CategoricalCrossentropy(
    from_logits=True,
),metrics=[keras.metrics.Accuracy()]) # optimizer='SGD', loss='MSE'

models.append(seq)

Model: "Dense"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_60 (Conv2D)          (None, 26, 26, 6)         60        
                                                                 
 average_pooling2d_40 (Avera  (None, 13, 13, 6)        0         
 gePooling2D)                                                    
                                                                 
 conv2d_61 (Conv2D)          (None, 11, 11, 32)        1760      
                                                                 
 average_pooling2d_41 (Avera  (None, 5, 5, 32)         0         
 gePooling2D)                                                    
                                                                 
 flatten_27 (Flatten)        (None, 800)               0         
                                                                 
 dense_76 (Dense)            (None, 120)               96120 

In [75]:
import time
model_results = {}

total_start_time = time.time()
# For each model fit 10 epochs and for each epoch save the eval for train and test datasets
for model in models:
    train_results = []
    test_results = []
    print("Model %s:" % (model.name))

    for i in range(10):
        print('\tEpoch %d' % (i))
        start_time = time.time()
        model.fit(x_train, y_train, epochs=1, verbose=0)
        train_val = model.evaluate(x_train, y_train, verbose=0)
        train_results.append(train_val)
        test_val = model.evaluate(x_test, y_test, verbose=0)
        test_results.append(test_val)
        print("\tTime elapsed = {0} sec".format(round(time.time()-start_time,2)))

    model_results[model.name] = (train_results, test_results)
    print('Train results:')
    for tval in train_results:
        print("loss: %5.8f \t- acc: %5.8f" % (tval[0], tval[1]))

    print('\nTest results:')
    for tval in test_results:
        print("loss: %5.8f \t- acc: %5.8f" % (tval[0], tval[1]))

print("\tTime elapsed = {0} sec".format(round(time.time()-total_start_time,2)))

Model BASE:
	Epoch 0


  return dispatch_target(*args, **kwargs)


	Time elapsed = 79.0
	Epoch 1
	Time elapsed = 71.0
	Epoch 2
	Time elapsed = 70.0
	Epoch 3
	Time elapsed = 71.0
	Epoch 4
	Time elapsed = 70.0
	Epoch 5
	Time elapsed = 70.0
	Epoch 6
	Time elapsed = 70.0
	Epoch 7
	Time elapsed = 69.0
	Epoch 8
	Time elapsed = 68.0
	Epoch 9
	Time elapsed = 70.0
Train results:
loss: 0.29256228 	- acc: 0.00000000
loss: 0.19548905 	- acc: 0.00000179
loss: 0.14913721 	- acc: 0.00000357
loss: 0.11614167 	- acc: 0.00003214
loss: 0.10271072 	- acc: 0.00003929
loss: 0.09138685 	- acc: 0.00018929
loss: 0.10323255 	- acc: 0.00030536
loss: 0.08153365 	- acc: 0.00051250
loss: 0.07787854 	- acc: 0.00057143
loss: 0.06307453 	- acc: 0.00083036

Test results:
loss: 0.30082771 	- acc: 0.00000000
loss: 0.20379822 	- acc: 0.00000000
loss: 0.15754873 	- acc: 0.00000714
loss: 0.12734379 	- acc: 0.00006429
loss: 0.11655109 	- acc: 0.00006429
loss: 0.10630090 	- acc: 0.00017143
loss: 0.11675653 	- acc: 0.00036429
loss: 0.09722948 	- acc: 0.00054286
loss: 0.09470630 	- acc: 0.0006

In [47]:
print(seq.name)

print('Train results:')
for tval in train_results:
    print("loss: %5.8f \t- acc: %5.8f" % (tval[0], tval[1]))

print('\nTest results:')
for tval in test_results:
    print("loss: %5.8f \t- acc: %5.8f" % (tval[0], tval[1]))

Removed Dense layer
Train results:
loss: 0.27132291 	- acc: 0.00000000
loss: 0.19789621 	- acc: 0.00007679
loss: 0.15082665 	- acc: 0.00000357
loss: 0.12329338 	- acc: 0.00007500
loss: 0.11290929 	- acc: 0.00002143

Test results:
loss: 0.27190727 	- acc: 0.00000000
loss: 0.20477143 	- acc: 0.00009286
loss: 0.15918493 	- acc: 0.00000000
loss: 0.13293028 	- acc: 0.00012143
loss: 0.12209960 	- acc: 0.00005714


In [16]:
results = seq.predict(x_test)
print(results)
print(seq.evaluate(x_test, y_test))

[[9.1470075e-01 1.6290819e-08 1.8611152e-03 ... 2.0238457e-04
  2.5736915e-03 2.9833813e-04]
 [2.1608108e-04 6.2580562e-01 1.6144083e-01 ... 4.3501183e-03
  2.5767220e-02 2.3605800e-03]
 [2.8630038e-04 3.6705823e-03 3.7862506e-02 ... 3.3295149e-04
  3.9914619e-02 1.0040342e-02]
 ...
 [5.2151759e-04 8.4010577e-03 4.9488333e-01 ... 6.8453453e-03
  1.9727719e-01 3.0113619e-03]
 [4.0944951e-04 8.2575264e-07 5.5926446e-02 ... 4.3923959e-05
  7.2406110e-04 6.3879648e-04]
 [7.8199673e-01 1.4679162e-07 1.4951080e-02 ... 5.4814718e-05
  2.7454974e-02 3.6626324e-04]]
[0.6409022212028503, 0.0]


In [76]:
from tensorflow.keras import utils

utils.plot_model(
    seq,
    to_file="model.png",
    show_shapes=False,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
    layer_range=None,
    show_layer_activations=False,
)


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


REVISIONS||
---------||
2021-10-20| CEF, initial version, clone from [HOML].
2021-10-26| CEF, added learning curve item.
2022-01-25| CEF, update to SWMAL F22.
