In [1]:
# import everything
import matplotlib.pyplot as plt
from tensorflow import keras
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from keras import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from keras.callbacks import TensorBoard
import time


np.random.seed(1)
tf.random.set_seed(1)

In [None]:
# load the data from numpy objects
X_train = np.load('../my_dataset/X_train.npy')
X_test = np.load('../my_dataset/X_test.npy')
X_validate = np.load('../my_dataset/X_validate.npy')

y_train = np.load('../my_dataset/y_train.npy')
y_test = np.load('../my_dataset/y_test.npy')
y_validate = np.load('../my_dataset/y_validate.npy')

## Conclusions from the runs (Iteration 1)

### 1. The accuracy from the train data cannot be used to judge the efficiency of a model
Since the model uses the train data to train, it can overfit to these specific data and end up with an accuracy of a 100%.
However, when we provide the same model with different data (validation data) and ask it to make predictions we will notice
a significantly lower accuracy rate since it hasn't "memorised" these new data. For this reason only the accuracy from the
validation data will be taken into consideration when choosing the better model.

### 2. Best peforming models
From the validation accuracy graph we filter and keep the 8 best performing runs out of the 36 (best 25%). And we get the following results: <br> <br>
<img src="iteration1_image_stats\iteration1_accuracy.svg">
| Model                     | Validation accuracy   |
| -----------               | -----------           |
| 3-conv-32-nodes-0-dense   | 88.75 %               |
| 3-conv-16-nodes-1-dense   | 86.25 %               |
| 3-conv-32-nodes-1-dense   | 85.62 %               |
| 3-conv-32-nodes-2-dense   | 85.00 %               |
| 3-conv-16-nodes-0-dense   | 83.75 %               |
| 2-conv-16-nodes-0-dense   | 82.50 %               |
| 3-conv-16-nodes-2-dense   | 81.25 %               |
| 1-conv-16-nodes-0-dense   | 81.25 %               |

* The majority of the best performing models had 3 convolutional layers.<br> 
Does the accuracy increase further if we **increase the number of the convolutional layers**?<br> 

* Models with more nodes generally performed better.<br> 
Does the accuracy increase further if we **increase the number of nodes per layer**? 

* Models with less dense layers generally performed better. These layers serve as memory and it seems that with less of them the model would memorise less and generalise more. However, it seems that a little bit of memory does help. Models with less nodes (16) and 1 dense layer performed better than the same model without a dense layer (3-conv-16-nodes-1-dense (86.25 %) VS  3-conv-16-nodes-0-dense (83.75 %)). During testing the convolutional layers and the dense layers all had the same amount of nodes.<br>
Does the accuracy increase further if we **use different nodes per type of layer** and if we use **less nodes for the dense layers than the convolutional layers**?<br> 

### 3. Validation loss graph
From the graph we can see that the loss decreases until the 25-30th epoch and then it starts to slowly increase again. For this reason we will decrease the number of epochs from 50 to 45 and we may adjust further based on the new results.<br> <br> 
<img src="iteration1_image_stats\iteration1_loss.svg">

In [None]:
dense_layers = [0, 1, 2]
layer_sizes = [4, 8, 16, 32]
conv_layers = [1, 2, 3]
epochs = 50

for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:
                NAME = "{}-conv-{}-nodes-{}-dense-{}".format(conv_layer, layer_size, dense_layer, int(time.time()))
                print(NAME)

                model = Sequential()

                model.add(Conv2D(layer_size, (3, 3), input_shape=X_train.shape[1:]))
                model.add(Activation('relu'))
                model.add(MaxPooling2D(pool_size=(2, 2)))

                for l in range(conv_layer-1):
                    model.add(Conv2D(layer_size, (3, 3)))
                    model.add(Activation('relu'))
                    model.add(MaxPooling2D(pool_size=(2, 2)))

                model.add(Flatten())

                for _ in range(dense_layer):
                    model.add(Dense(layer_size))
                    model.add(Activation('relu'))

                # output layer (as big as the number of words we teach)
                output_neurons = len(np.unique(y_train))
                model.add(tf.keras.layers.Dense(output_neurons, activation=tf.nn.softmax))

                tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

                model.compile(loss='sparse_categorical_crossentropy',
                            optimizer='adam',
                            metrics=['accuracy'])

                model.fit(X_train, y_train,
                        epochs=epochs,
                        validation_data=(X_validate, y_validate),
                        callbacks=[tensorboard])

                # save the model
                model.save("models/"+NAME)