In [None]:
# import everything
import matplotlib.pyplot as plt
from tensorflow import keras
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from keras import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from keras.callbacks import TensorBoard
import time


np.random.seed(1)
tf.random.set_seed(1)

In [None]:
# load the data from numpy objects
X_train = np.load('../my_dataset/X_train.npy')
X_test = np.load('../my_dataset/X_test.npy')
X_validate = np.load('../my_dataset/X_validate.npy')

y_train = np.load('../my_dataset/y_train.npy')
y_test = np.load('../my_dataset/y_test.npy')
y_validate = np.load('../my_dataset/y_validate.npy')

## Conclusions from the runs (Iteration 2)

### 1. Failure of the dense layer
The memory capabilities provided by the dense layers has led to much worse results. All of the models with dense layers had a less than 40% accuracy. For the optimal model no dense layers will be used besides the final output layer. <br><br>
<img src="iteration2_image_stats\dense_layer_accuracy_FAIL.svg">

### 2. Best peforming models
From the validation accuracy graph we see pretty similar results from the models with no dense layers. Here are the results on the 45th step: <br><br>
<img src="iteration2_image_stats\no_dense_layer_accuracy.svg">
| Model             | Validation accuracy   |
| -----------       | -----------           |
| 4-conv-32-nodes   | 89.38 %               |
| 3-conv-64-nodes   | 88.75 %               |
| 3-conv-48-nodes   | 88.13 %               |
| 3-conv-32-nodes   | 88.13 %               |
| 4-conv-48-nodes   | 87.50 %               |
| 3-conv-16-nodes   | 86.25 %               |
| 4-conv-64-nodes   | 85.00 %               |
| 4-conv-16-nodes   | 78.75 %               |

However, we should not draw conclusions just from the final step because the models seem to hit peak validation accuracy earlier and then they start to overfit. For this reasons let's check the graphs for 3 and 4 convolutional layers seperately.

* #### 3 Convolutional layers <br>

##### Accuracy
<img src="iteration2_image_stats\3_conv_layer_accuracy.svg">

##### Loss
<img src="iteration2_image_stats\3_conv_layer_loss.svg">

We can see that the best results for each model occur much earlier and at different points: <br>
| Colour    | Step | Model              | Validation accuracy   |
| --------- | ---- | ------             | -----------           |
| Blue      | 16   | 3-conv-16-nodes    | 89.38 %               |
| Cyan      | 26   | 3-conv-32-nodes    | 90.00 %               |
| Green     | 16   | 3-conv-48-nodes    | 90.00 %               |
| Orange    | 26   | 3-conv-64-nodes    | 89.38 %               |


* #### 4 Convolutional layers <br>

##### Accuracy
<img src="iteration2_image_stats\4_conv_layer_accuracy.svg">

##### Loss
<img src="iteration2_image_stats\4_conv_layer_loss.svg">

We can see that the best results for each model occur much earlier and at different points: <br>
| Colour    | Step | Model              | Validation accuracy   |
| --------- | ---- | ------             | -----------           |
| Orange    | 32   | 4-conv-16-nodes    | 81.88 %               |
| Pink      | 39   | 4-conv-32-nodes    | 90.00 %               |
| Green     | 16   | 4-conv-48-nodes    | 90.00 %               |
| Blue      | 17   | 4-conv-64-nodes    | 89.38 %               |


By grouping the charts by number of convolutional layers we can safely conclude that: <br>
* The best results are with 32 and 48 convolutional nodes
* It is now necessary to implement dropout and early stopping to prevent over fitting


In [None]:
dense_layers = [0, 1]
dense_nodes = [8, 16, 24]


conv_layers = [3, 4]
conv_nodes = [16, 32, 48, 64]
epochs = 45

# zero dense layers
for conv_layer in conv_layers:
    for conv_node in conv_nodes:
        NAME = "{}-conv-{}-convNodes-{}-dense-{}-denseNodes-{}".format(conv_layer, conv_node, 0, 0, int(time.time()))
        print(NAME)

        model = Sequential()

        model.add(Conv2D(conv_node, (3, 3), input_shape=X_train.shape[1:]))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))

        for l in range(conv_layer-1):
            model.add(Conv2D(conv_node, (3, 3)))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2)))

        model.add(Flatten())

        # output layer (as big as the number of words we teach)
        output_neurons = len(np.unique(y_train))
        model.add(tf.keras.layers.Dense(output_neurons, activation=tf.nn.softmax))

        tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

        model.compile(loss='sparse_categorical_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

        model.fit(X_train, y_train,
                epochs=epochs,
                validation_data=(X_validate, y_validate),
                callbacks=[tensorboard])

        # save the model
        model.save("models/"+NAME)

# one dense layer
for conv_layer in conv_layers:
    for conv_node in conv_nodes:
        for dense_node in dense_nodes:
            NAME = "{}-conv-{}-convNodes-{}-dense-{}-denseNodes-{}".format(conv_layer, conv_node, 1, dense_node, int(time.time()))
            print(NAME)

            model = Sequential()

            model.add(Conv2D(conv_node, (3, 3), input_shape=X_train.shape[1:]))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2)))

            for l in range(conv_layer-1):
                model.add(Conv2D(conv_node, (3, 3)))
                model.add(Activation('relu'))
                model.add(MaxPooling2D(pool_size=(2, 2)))

            model.add(Flatten())

            # smaller dense layer
            model.add(tf.keras.layers.Dense(dense_node, activation=tf.nn.softmax))

            # output layer (as big as the number of words we teach)
            output_neurons = len(np.unique(y_train))
            model.add(tf.keras.layers.Dense(output_neurons, activation=tf.nn.softmax))

            tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

            model.compile(loss='sparse_categorical_crossentropy',
                        optimizer='adam',
                        metrics=['accuracy'])

            model.fit(X_train, y_train,
                    epochs=epochs,
                    validation_data=(X_validate, y_validate),
                    callbacks=[tensorboard])

            # save the model
            model.save("models/"+NAME)