# SWMAL Exercise


## Convolutional Neural Networks (CNNs)


Excercise 9 from [HOML], p496  (slighty modified)::

__"9. Build your own CNN from scratch and try to achieve the highest possible accuracy on MNIST."__

For the journal: 

* write an introduction to CNNs (what are CNNs, what is a convolution layer, etc..), 
* document your experiments towards the end-goal of reaching 'a high accuracy' (what did you try, what work/did not work), 
* document how you use '_generalization_' in your setup (us of simple hold-out/train-test split or k-fold, or etc..),
* produce some sort of '_learning-curve_' that illustrates the drop in cost- or increase in score-function with respect to, say training iteration (for inspiration see fig 4.20, 10-12 or 10.17 in [HOML])
* document the final CNN setup (layers etc., perhaps as a graph/drawing), 
* discus on your iterations towards the end-goal and other findings you had,
* and, as always, write a conclusion.

In [None]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical #This works dont worry


def MNIST_InitData():
    return fetch_openml('mnist_784', return_X_y=True, cache=True, as_frame=False)

def MNIST_GetDataSet(X):
    return (X / 255)

MNIST_X, MNIST_Y = MNIST_InitData()
MNIST_X = MNIST_GetDataSet(MNIST_X)

reshaped_MNIST_X = MNIST_X.reshape(len(MNIST_X), 28, 28)
print(reshaped_MNIST_X.shape)

x_train, x_test, y_train, y_test = train_test_split(reshaped_MNIST_X, MNIST_Y, test_size=0.2, random_state=69)
y_train = to_categorical(y_train, dtype ="uint8")
y_test = to_categorical(y_test, dtype ="uint8")

In [None]:
print(x_train.shape)
print(y_test.shape)
print(y_test.shape)

In [None]:
import keras
from keras import layers

# BASE

seq = keras.Sequential()

seq.add(layers.Conv2D(filters= 6, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Flatten())

seq.add(layers.Dense(units=120, activation='relu'))
seq.add(layers.Dense(units=84, activation='relu'))
seq.add(layers.Dense(units=10, activation='softmax'))

seq.summary()

seq.compile(optimizer='sgd', loss=keras.losses.CategoricalCrossentropy(
    from_logits=True,
)) # optimizer='SGD', loss='MSE'

In [None]:
import keras
from keras import layers

# Use Adam & MaxPooling

seq = keras.Sequential()

seq.add(layers.Conv2D(filters= 6, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.MaxPooling2D())

seq.add(layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.MaxPooling2D())

seq.add(layers.Flatten())

seq.add(layers.Dense(units=120, activation='relu'))
seq.add(layers.Dense(units=84, activation='relu'))
seq.add(layers.Dense(units=10, activation='softmax'))

seq.summary()

seq.compile(optimizer='adam', loss=keras.losses.CategoricalCrossentropy(
    from_logits=True,
)) # optimizer='SGD', loss='MSE'

In [None]:
import keras
from keras import layers

# Use extra Conv2D

seq = keras.Sequential()

seq.add(layers.Conv2D(filters= 6, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())
seq.add(layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.MaxPooling2D())
seq.add(layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Flatten())

seq.add(layers.Dense(units=120, activation='relu'))
seq.add(layers.Dense(units=84, activation='relu'))
seq.add(layers.Dense(units=10, activation='softmax'))

seq.summary()

seq.compile(optimizer='sgd', loss=keras.losses.CategoricalCrossentropy(
    from_logits=True,
)) # optimizer='SGD', loss='MSE'

In [None]:
import keras
from keras import layers

# remove Dense layer

seq = keras.Sequential()

seq.add(layers.Conv2D(filters= 6, kernel_size=(3,3), activation='sigmoid', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Conv2D(filters=32, kernel_size=(3,3), activation='sigmoid', input_shape=(28,28,1)))
seq.add(layers.AveragePooling2D())

seq.add(layers.Flatten())

seq.add(layers.Dense(units=120, activation='relu'))
seq.add(layers.Dense(units=10, activation='softmax'))

seq.summary()

seq.compile(optimizer='sgd', loss=keras.losses.CategoricalCrossentropy(
    from_logits=True,
)) # optimizer='SGD', loss='MSE'

In [None]:
print(x_train.shape)
print(y_train.shape)

In [None]:
seq.fit(x_train, y_train, epochs=5)

In [None]:
results = seq.predict(x_test)
print(results)
print(seq.evaluate(x_test, y_test))

In [None]:
from tensorflow.keras import utils

utils.plot_model(
    seq,
    to_file="model.png",
    show_shapes=False,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
    layer_range=None,
    show_layer_activations=False,
)


REVISIONS||
---------||
2021-10-20| CEF, initial version, clone from [HOML].
2021-10-26| CEF, added learning curve item.
2022-01-25| CEF, update to SWMAL F22.
