# SWMAL Exercise


## Convolutional Neural Networks (CNNs)


Excercise 9 from [HOML], p496  (slighty modified)::

__"9. Build your own CNN from scratch and try to achieve the highest possible accuracy on MNIST."__

For the journal: 

* write an introduction to CNNs (what are CNNs, what is a convolution layer, etc..), 
* document your experiments towards the end-goal of reaching 'a high accuracy' (what did you try, what work/did not work), 
* document how you use '_generalization_' in your setup (us of simple hold-out/train-test split or k-fold, or etc..),
* produce some sort of '_learning-curve_' that illustrates the drop in cost- or increase in score-function with respect to, say training iteration (for inspiration see fig 4.20, 10-12 or 10.17 in [HOML])
* document the final CNN setup (layers etc., perhaps as a graph/drawing), 
* discus on your iterations towards the end-goal and other findings you had,
* and, as always, write a conclusion.

In [1]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical #This works dont worry


def MNIST_InitData():
    return fetch_openml('mnist_784', return_X_y=True, cache=True, as_frame=False)

def MNIST_GetDataSet(X):
    return (X / 255)

MNIST_X, MNIST_Y = MNIST_InitData()
MNIST_X = MNIST_GetDataSet(MNIST_X)

reshaped_MNIST_X = MNIST_X.reshape(len(MNIST_X), 28, 28)
print(reshaped_MNIST_X.shape)

x_train, x_test, y_train, y_test = train_test_split(reshaped_MNIST_X, MNIST_Y, test_size=0.2, random_state=69)
y_train = to_categorical(y_train, dtype ="uint8")
y_test = to_categorical(y_test, dtype ="uint8")

(70000, 28, 28)


In [2]:
print(x_train.shape)
print(y_test.shape)
print(y_test.shape)

(56000, 28, 28)
(14000, 10)
(14000, 10)


In [3]:
import keras
from keras import layers

# BASE

seq = keras.Sequential()

seq.add(layers.Conv2D(filters= 6, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Flatten())

seq.add(layers.Dense(units=120, activation='relu'))
seq.add(layers.Dense(units=84, activation='relu'))
seq.add(layers.Dense(units=10, activation='softmax'))

seq.summary()

seq.compile(optimizer='sgd', loss=keras.losses.CategoricalCrossentropy(
    from_logits=True,
)) # optimizer='SGD', loss='MSE'

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 6)         60        
                                                                 
 average_pooling2d (AverageP  (None, 13, 13, 6)        0         
 ooling2D)                                                       
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 32)        1760      
                                                                 
 average_pooling2d_1 (Averag  (None, 5, 5, 32)         0         
 ePooling2D)                                                     
                                                                 
 flatten (Flatten)           (None, 800)               0         
                                                                 
 dense (Dense)               (None, 120)               9

In [4]:
import keras
from keras import layers

# Use Adam & MaxPooling

seq = keras.Sequential()

seq.add(layers.Conv2D(filters= 6, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.MaxPooling2D())

seq.add(layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.MaxPooling2D())

seq.add(layers.Flatten())

seq.add(layers.Dense(units=120, activation='relu'))
seq.add(layers.Dense(units=84, activation='relu'))
seq.add(layers.Dense(units=10, activation='softmax'))

seq.summary()

seq.compile(optimizer='adam', loss=keras.losses.CategoricalCrossentropy(
    from_logits=True,
)) # optimizer='SGD', loss='MSE'

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 26, 26, 6)         60        
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 6)        0         
 )                                                               
                                                                 
 conv2d_3 (Conv2D)           (None, 11, 11, 32)        1760      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 5, 5, 32)         0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 800)               0         
                                                                 
 dense_3 (Dense)             (None, 120)              

In [5]:
import keras
from keras import layers

# Use extra Conv2D

seq = keras.Sequential()

seq.add(layers.Conv2D(filters= 6, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())
seq.add(layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.MaxPooling2D())
seq.add(layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Flatten())

seq.add(layers.Dense(units=120, activation='relu'))
seq.add(layers.Dense(units=84, activation='relu'))
seq.add(layers.Dense(units=10, activation='softmax'))

seq.summary()

seq.compile(optimizer='sgd', loss=keras.losses.CategoricalCrossentropy(
    from_logits=True,
)) # optimizer='SGD', loss='MSE'

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 26, 26, 6)         60        
                                                                 
 average_pooling2d_2 (Averag  (None, 13, 13, 6)        0         
 ePooling2D)                                                     
                                                                 
 conv2d_5 (Conv2D)           (None, 11, 11, 16)        880       
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 5, 5, 16)         0         
 2D)                                                             
                                                                 
 conv2d_6 (Conv2D)           (None, 3, 3, 16)          2320      
                                                                 
 average_pooling2d_3 (Averag  (None, 1, 1, 16)        

In [6]:
import keras
from keras import layers

# remove Dense layer

seq = keras.Sequential()

seq.add(layers.Conv2D(filters= 6, kernel_size=(3,3), activation='sigmoid', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Conv2D(filters=32, kernel_size=(3,3), activation='sigmoid', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Flatten())

seq.add(layers.Dense(units=120, activation='relu'))
seq.add(layers.Dense(units=10, activation='softmax'))

seq.summary()

seq.compile(optimizer='sgd', loss=keras.losses.CategoricalCrossentropy(
    from_logits=True,
)) # optimizer='SGD', loss='MSE'

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_7 (Conv2D)           (None, 26, 26, 6)         60        
                                                                 
 average_pooling2d_4 (Averag  (None, 13, 13, 6)        0         
 ePooling2D)                                                     
                                                                 
 conv2d_8 (Conv2D)           (None, 11, 11, 32)        1760      
                                                                 
 average_pooling2d_5 (Averag  (None, 5, 5, 32)         0         
 ePooling2D)                                                     
                                                                 
 flatten_3 (Flatten)         (None, 800)               0         
                                                                 
 dense_9 (Dense)             (None, 120)              

In [7]:
print(x_train.shape)
print(y_train.shape)

(56000, 28, 28)
(56000, 10)


In [8]:
seq.fit(x_train, y_train, epochs=5)

Epoch 1/5


  return dispatch_target(*args, **kwargs)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1f76f393e80>

In [9]:
results = seq.predict(x_test)
print(results)
print(seq.evaluate(x_test, y_test))

[[9.86018777e-01 1.33960454e-10 2.45475414e-04 ... 1.78108949e-05
  5.42441907e-04 5.53429782e-05]
 [6.73981049e-05 5.08157492e-01 4.32523072e-01 ... 4.13141679e-05
  2.12407839e-02 1.03702281e-04]
 [7.83962532e-05 2.85446440e-04 1.32202813e-02 ... 1.02620959e-06
  1.65680367e-02 1.68064435e-04]
 ...
 [1.39931235e-05 3.05276993e-03 8.28408837e-01 ... 2.34307532e-04
  1.43016204e-01 1.56395079e-04]
 [6.51571600e-06 8.95320351e-09 4.39394154e-02 ... 1.09146008e-07
  2.49687087e-04 3.55187649e-06]
 [9.65725243e-01 1.25971999e-09 8.97517719e-04 ... 1.30342869e-06
  1.60870329e-03 9.17784018e-06]]
0.428378164768219


In [10]:
from tensorflow.keras import utils

utils.plot_model(
    seq,
    to_file="model.png",
    show_shapes=False,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
    layer_range=None,
    show_layer_activations=False,
)


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


REVISIONS||
---------||
2021-10-20| CEF, initial version, clone from [HOML].
2021-10-26| CEF, added learning curve item.
2022-01-25| CEF, update to SWMAL F22.
