In [43]:
# !pip show tensorflow
# !pip show keras

In [44]:
# !pip install tensorflow
# !pip install keras

In [45]:
import numpy as np
import time
import keras
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [46]:
X = np.squeeze(np.load('src/smiley_X.npy'))
y = np.load('src/smiley_y.npy')

In [47]:
X.shape

(144, 9, 9)

# Binary Classification Model


### Test Train Split

In [48]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [49]:
y_test.shape

(29,)

Sigmoid is used for the output layer and there's only 1 output node.

In [50]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[9, 9]),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(10, activation="relu"),
    keras.layers.Dense(1, activation="sigmoid")
])

In [51]:
model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=keras.optimizers.SGD(),
              metrics=[keras.metrics.binary_accuracy])

In [52]:
model.fit(x_train, y_train, epochs=10)

Epoch 1/10


  output, from_logits = _get_logits(


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1f57f7d9420>

### Metrics
Loss and Accuracy

In [53]:
model.evaluate(x_test, y_test)



[0.7136676907539368, 0.37931033968925476]

### Confusion Matrix

In [54]:
#Predict
y_prediction = np.argmax(model.predict(x_test), axis=1)

#Create confusion matrix and normalizes it over predicted (columns)
confusion_matrix(y_test, y_prediction , normalize='pred')



array([[0.62068966, 0.        ],
       [0.37931034, 0.        ]])

# 🔷 Q: I don't get why it's 0 here for the confusion matrix?

# Categorical Classification Model

### One-hot encode y and new test train split

In [55]:
y = keras.utils.to_categorical(y-1, num_classes = 2)

In [56]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [57]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[9, 9]),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(10, activation="relu"),
    keras.layers.Dense(2, activation="softmax")
])
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.SGD(learning_rate=0.01),
              metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(x_train, y_train, epochs=10)
# The metrics are for the train data

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1f55184cb80>

### Metrics
Loss, Accuracy, TP, FP, TN, FN, Precision, Recall

In [58]:
model.evaluate(x_test, y_test)



[0.6312966346740723,
 0.6896551847457886,
 20.0,
 9.0,
 20.0,
 9.0,
 0.6896551847457886,
 0.6896551847457886]

# Experiment With Various Parameters That Control The Learning
Configuration Suite

In [59]:
layers_list = [[64,16,2],[32,8,2],[16,4,2],[5,3,2]]
η_list = [0.1, 0.01, 0.001, 0.0001, 0.00001]
epochs = [3, 5, 10, 50, 100]
optimisations = [keras.optimizers.SGD, keras.optimizers.RMSprop, keras.optimizers.Adam, keras.optimizers.Nadam]
activations = ["sigmoid", "tanh", "relu", "LeakyReLU"]

In [60]:
run_config_suit = False
file_name = "df_lab10.pkl"

if run_config_suit:
    start = time.time()
    list_of_results = []
    for layer in layers_list:
        for opt in optimisations:
            for η in η_list:
                for act in activations:
                    print("\n####################################################################################")
                    print("Layers      Eta Act.    <Optimisation>")
                    print(f"{layer} {η} {act} {opt}")
                    print("Epoch: [Loss, Accuracy, TP, FP, TN, FN, Precision, Recall]")
                    for epoch in epochs:
                        model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(layer[0], activation=act),
                            keras.layers.Dense(layer[1], activation=act),
                            keras.layers.Dense(2, activation="softmax")
                        ])
                        model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=opt(learning_rate=η),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
                        model.fit(x_train, y_train, epochs=epoch, verbose= 0)
                        result = model.evaluate(x_test, y_test, verbose= 0)
                        config = [layer, opt, η, act, epoch]
                        list_of_results.append(result + config)
                        print(f"\t{epoch}: {result}")

    print("\n\n############## DONE")
    print(time.time() - start)

    labels = ["Loss", "Accuracy", "TP", "FP", "TN", "FN", "Precision", "Recall", "Layers", "optimiser", "η", "activation", "epoch"]
    df = pd.DataFrame(data = list_of_results, columns=labels)
    df.to_pickle(file_name)

else:
    df = pd.read_pickle(file_name)

It took 4414.6s (~1.2hrs) to run the configuration suite on G Drive

Overall 1600 model was generated with different hyperparameters.
* layers_list = 4 variation
* η_list = 5 variation
* epochs = 5 variation
* optimisations = 4 variation
* activations = 4 variation

4 × 5 × 5 × 4 × 4 = 1600.

The dataframe holds the metrics for each configuration and the configuration details.

In [61]:
df

Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
0,0.725694,0.379310,11.0,18.0,11.0,18.0,0.379310,0.379310,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.10000,sigmoid,3
1,0.708019,0.379310,11.0,18.0,11.0,18.0,0.379310,0.379310,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.10000,sigmoid,5
2,0.682639,0.413793,12.0,17.0,12.0,17.0,0.413793,0.413793,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.10000,sigmoid,10
3,0.589108,0.965517,28.0,1.0,28.0,1.0,0.965517,0.965517,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.10000,sigmoid,50
4,0.084866,1.000000,29.0,0.0,29.0,0.0,1.000000,1.000000,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.10000,sigmoid,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,0.562120,0.758621,22.0,7.0,22.0,7.0,0.758621,0.758621,"[5, 3, 2]",<class 'keras.optimizers.optimizer_v2.nadam.Na...,0.00001,LeakyReLU,3
1596,0.656760,0.724138,21.0,8.0,21.0,8.0,0.724138,0.724138,"[5, 3, 2]",<class 'keras.optimizers.optimizer_v2.nadam.Na...,0.00001,LeakyReLU,5
1597,0.812644,0.275862,8.0,21.0,8.0,21.0,0.275862,0.275862,"[5, 3, 2]",<class 'keras.optimizers.optimizer_v2.nadam.Na...,0.00001,LeakyReLU,10
1598,0.824477,0.620690,18.0,11.0,18.0,11.0,0.620690,0.620690,"[5, 3, 2]",<class 'keras.optimizers.optimizer_v2.nadam.Na...,0.00001,LeakyReLU,50


Filter for 0 loss
- 5  epochs
- 10 epochs

The lower the loss the better so 0 is the most desirable. Same is true for epochs. The fewer epochs it has to run the better it is.

In [62]:
df[df.Loss == 0 ]

Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
104,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,sigmoid,100
109,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,tanh,100
114,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,relu,100
118,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,LeakyReLU,50
119,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,LeakyReLU,100
124,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,sigmoid,100
129,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,tanh,100
134,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,relu,100
139,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,LeakyReLU,100
211,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,relu,5


### Overall Best:

In [63]:
df[df.Loss == 0 ][df.epoch == 5]

  df[df.Loss == 0 ][df.epoch == 5]


Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
211,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,relu,5
216,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,5


### Second Best:

In [64]:
df[df.Loss == 0 ][df.epoch == 10]

  df[df.Loss == 0 ][df.epoch == 10]


Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
217,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,10
1017,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[16, 4, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,10


### Third:

In [65]:
df[df.Loss == 0 ][df.epoch == 50]

  df[df.Loss == 0 ][df.epoch == 50]


Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
118,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,LeakyReLU,50
218,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,50
618,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[32, 8, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,50
1013,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[16, 4, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,relu,50
1018,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[16, 4, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,50
1418,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[5, 3, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,50


# Conclusion:
The best configuration (for epochs below 51 and with 0 loss):
- __Nodes Per Layers__: `[64, 16, 2]`. This was the most frequent out of the best configurations. It's notable that there was a configuration for `[5, 3, 2]` that reached 0 loss with 50 epochs. There's only a very few nodes in this setting, however, it did reach 0.
- __Optimiser__: `Adam`. This was the most frequent among the best configurations. `RMSprop` was the only other optimiser function that appeared.  `Nadam` and `SGD` didn't reach 0 within 50 epochs.
- __η__: `0.1` was the only one to appear. There were a few cases of `0.01` for 100 epochs.
- __Activation Function__ is either `LeakyReLU` or `ReLU`.

In [None]:
layers_list = [[64,16,2],[32,8,2],[16,4,2]]
η_list = [0.1, 0.01, 0.001]
epochs = [3, 5, 10, 50]
optimisations = [keras.optimizers.SGD, keras.optimizers.RMSprop, keras.optimizers.Adam, keras.optimizers.Nadam]
activations = ["sigmoid", "tanh", "relu", "LeakyReLU"]

In [None]:
from sklearn.model_selection import KFold

run_config_suit = False
file_name = "df2_lab10.pkl"
kf = KFold(n_splits = 10)

if run_config_suit:
    start = time.time()
    list_of_results = []
    list_of_crossedevaluated = []
    for layer in layers_list:
        for opt in optimisations:
            for η in η_list:
                for act in activations:
                    print("\n####################################################################################")
                    print("Layers      Eta Act.    <Optimisation>")
                    print(f"{layer} {η} {act} {opt}")
                    print("Epoch: [Loss, Accuracy, TP, FP, TN, FN, Precision, Recall]")
                    for epoch in epochs:
                        model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(layer[0], activation=act),
                            keras.layers.Dense(layer[1], activation=act),
                            keras.layers.Dense(2, activation="softmax")
                        ])
                        model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=opt(learning_rate=η),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])

                        for train, test in kf.split(X):
                            model.fit(X[train],y[train], epochs=epoch, verbose= 0)
                            result = model.evaluate(X[test],y[test], verbose= 0)
                            list_of_crossedevaluated.append(result)

                        config = [layer, opt, η, act, epoch]

                        loss = 0
                        acc = 0
                        TP = 0
                        FP = 0
                        TN = 0
                        FN = 0
                        Precision = 0
                        Recall = 0
                        for i in list_of_crossedevaluated:
                            loss += i[0]
                            acc += i[1]
                            TP += i[2]
                            FP += i[3]
                            TN += i[4]
                            FN += i[5]
                            Precision += i[6]
                            Recall += i[7]
                        result10 = [loss,acc,TP,FP,TN,FN,Precision,Recall]
                        result = []
                        for i in range(2):
                            result.append(result10[i]/10)
                        result.append(TP)
                        result.append(FP)
                        result.append(TN)
                        result.append(FN)
                        for i in range(6,8):
                            result.append(result10[i]/10)
                        list_of_results.append(result + config)
                        list_of_crossedevaluated.clear()
                        print(f"\t{epoch}: {result}")

    print("\n\n############## DONE")
    print(time.time() - start)

    labels = ["Loss", "Accuracy", "TP", "FP", "TN", "FN", "Precision", "Recall", "Layers", "optimiser", "η", "activation", "epoch"]
    dfcv = pd.DataFrame(data = list_of_results, columns=labels)
    dfcv.to_pickle(file_name)

else:
    dfcv = pd.read_pickle(file_name)

It took 1998.28s (~33.3mins) to run this

Overall 576 models were generated with different hyperparameters.
I've reduced the number of hyperparameters explored to reduce computation time.
* layers_list = 3 variation
* η_list = 3 variation
* epochs = 3 variation
* optimisations = 4 variation
* activations = 4 variation

3 × 3 × 3 × 4 × 4 = 432.

The dataframe holds the metrics for each configuration and the configuration details.

In [None]:
dfcv

Filter for 0 loss
- 5  epochs
- 10 epochs

The lower the loss the better so 0 is the most desirable. Same is true for epochs. The fewer epochs it has to run the better it is.

In [None]:
dfcv[dfcv.Loss == 0 ]

### Overall Best:

In [None]:
dfcv[dfcv.Loss == 0 ][dfcv.epoch == 5]

### Second Best:

In [None]:
dfcv[dfcv.Loss == 0 ][dfcv.epoch == 10]

### Third:

In [None]:
dfcv[dfcv.Loss == 0 ][dfcv.epoch == 50]

# Conclusion:
The best configuration (for epochs below 51 and with 0 loss):
- __Nodes Per Layers__: `[64, 16, 2]`. This was the most frequent out of the best configurations. It's notable that there was a configuration for `[5, 3, 2]` that reached 0 loss with 50 epochs. There's only a very few nodes in this setting, however, it did reach 0.
- __Optimiser__: `Adam`. This was the most frequent among the best configurations. `RMSprop` was the only other optimiser function that appeared.  `Nadam` and `SGD` didn't reach 0 within 50 epochs.
- __η__: `0.1` was the only one to appear. There were a few cases of `0.01` for 100 epochs.
- __Activation Function__ is either `LeakyReLU` or `ReLU`.

Classifier over-fitting experiment

In [None]:
#Classifier 1
#First train test data set with a 0.2 split
split1 = 0.2
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=split1)

#I'm not sure if we need to keep the first train set in the other classifier and only add more data coming from the train set to the test set or if
#we move the data from the train test to the test set and delete it from the train set, we've used the second solution in the rest of the lab
#Classifier 2
#Here we move 30% of the previous train set into the data set
split2 = split1 + (1-split1) * 0.3
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=split2)

#Classifier 3
#Here we move 60% of the previous train set into the data set
split3 = split1 + (1-split1) * 0.3
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=split3)

#List to store the evaluations
list_of_results_overfitting = []

In [None]:
#Evaluating our first classifier on the test set
model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(64, activation="LeakyReLU"),
                            keras.layers.Dense(16, activation="LeakyReLU"),
                            keras.layers.Dense(2, activation="softmax")
                        ])
model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=keras.optimizers.Adam(learning_rate=0.1),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(X_train1,y_train1, epochs = 5)
resulttest1 = model.evaluate(X_test1,y_test1)
list_of_results_overfitting.append(resulttest1)

In [None]:
#Evaluating our first classifier on the train set
model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(64, activation="LeakyReLU"),
                            keras.layers.Dense(16, activation="LeakyReLU"),
                            keras.layers.Dense(2, activation="softmax")
                        ])
model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=keras.optimizers.Adam(learning_rate=0.1),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(X_train1,y_train1, epochs = 5)
resulttrain1 = model.evaluate(X_train1,y_train1)
list_of_results_overfitting.append(resulttrain1)

In [None]:
#Evaluating our second classifier on the test set
model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(64, activation="LeakyReLU"),
                            keras.layers.Dense(16, activation="LeakyReLU"),
                            keras.layers.Dense(2, activation="softmax")
                        ])
model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=keras.optimizers.Adam(learning_rate=0.1),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(X_train2,y_train2, epochs = 5)
resulttest2 = model.evaluate(X_test2,y_test2)
list_of_results_overfitting.append(resulttest2)

In [None]:
#Evaluating our second classifier on the train set
model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(64, activation="LeakyReLU"),
                            keras.layers.Dense(16, activation="LeakyReLU"),
                            keras.layers.Dense(2, activation="softmax")
                        ])
model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=keras.optimizers.Adam(learning_rate=0.1),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(X_train2,y_train2, epochs = 5)
resulttrain2 = model.evaluate(X_train2,y_train2)
list_of_results_overfitting.append(resulttrain2)

In [None]:
#Evaluating our third classifier on the test set
model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(64, activation="LeakyReLU"),
                            keras.layers.Dense(16, activation="LeakyReLU"),
                            keras.layers.Dense(2, activation="softmax")
                        ])
model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=keras.optimizers.Adam(learning_rate=0.1),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(X_train3,y_train3, epochs = 5)
resulttest3 = model.evaluate(X_test3,y_test3)
list_of_results_overfitting.append(resulttest3)

In [None]:
#Evaluating our third classifier on the train set
model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(64, activation="LeakyReLU"),
                            keras.layers.Dense(16, activation="LeakyReLU"),
                            keras.layers.Dense(2, activation="softmax")
                        ])
model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=keras.optimizers.Adam(learning_rate=0.1),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(X_train3,y_train3, epochs = 5)
resulttrain3 = model.evaluate(X_train3,y_train3)
list_of_results_overfitting.append(resulttest3)

In [None]:
labels = ["Loss", "Accuracy", "TP", "FP", "TN", "FN", "Precision", "Recall"]
labels2 = ["Classifier 1 test", "Classifier 1 train", "Classifier 2 test", "Classifier 2 train", "Classifier 3 test", "Classifier 3 train"]
df = pd.DataFrame(data = list_of_results_overfitting, columns=labels, index = labels2)
df

I can't seem to notice anything at least in the table as the results are the same for each one

In [66]:
layers_list = [[64,16,2],[32,8,2],[16,4,2]]
η_list = [0.1, 0.01, 0.001]
epochs = [3, 5, 10, 50]
optimisations = [keras.optimizers.SGD, keras.optimizers.RMSprop, keras.optimizers.Adam, keras.optimizers.Nadam]
activations = ["sigmoid", "tanh", "relu", "LeakyReLU"]

In [67]:
from sklearn.model_selection import KFold

run_config_suit = False
file_name = "df2_lab10.pkl"
kf = KFold(n_splits = 10)

if run_config_suit:
    start = time.time()
    list_of_results = []
    list_of_crossedevaluated = []
    for layer in layers_list:
        for opt in optimisations:
            for η in η_list:
                for act in activations:
                    print("\n####################################################################################")
                    print("Layers      Eta Act.    <Optimisation>")
                    print(f"{layer} {η} {act} {opt}")
                    print("Epoch: [Loss, Accuracy, TP, FP, TN, FN, Precision, Recall]")
                    for epoch in epochs:
                        model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(layer[0], activation=act),
                            keras.layers.Dense(layer[1], activation=act),
                            keras.layers.Dense(2, activation="softmax")
                        ])
                        model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=opt(learning_rate=η),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])

                        for train, test in kf.split(X):
                            model.fit(X[train],y[train], epochs=epoch, verbose= 0)
                            result = model.evaluate(X[test],y[test], verbose= 0)
                            list_of_crossedevaluated.append(result)

                        config = [layer, opt, η, act, epoch]

                        loss = 0
                        acc = 0
                        TP = 0
                        FP = 0
                        TN = 0
                        FN = 0
                        Precision = 0
                        Recall = 0
                        for i in list_of_crossedevaluated:
                            loss += i[0]
                            acc += i[1]
                            TP += i[2]
                            FP += i[3]
                            TN += i[4]
                            FN += i[5]
                            Precision += i[6]
                            Recall += i[7]
                        result10 = [loss,acc,TP,FP,TN,FN,Precision,Recall]
                        result = []
                        for i in range(2):
                            result.append(result10[i]/10)
                        result.append(TP)
                        result.append(FP)
                        result.append(TN)
                        result.append(FN)
                        for i in range(6,8):
                            result.append(result10[i]/10)
                        list_of_results.append(result + config)
                        list_of_crossedevaluated.clear()
                        print(f"\t{epoch}: {result}")

    print("\n\n############## DONE")
    print(time.time() - start)

    labels = ["Loss", "Accuracy", "TP", "FP", "TN", "FN", "Precision", "Recall", "Layers", "optimiser", "η", "activation", "epoch"]
    dfcv = pd.DataFrame(data = list_of_results, columns=labels)
    dfcv.to_pickle(file_name)

else:
    dfcv = pd.read_pickle(file_name)

It took 1998.28s (~33.3mins) to run this

Overall 576 models were generated with different hyperparameters.
I've reduced the number of hyperparameters explored to reduce computation time.
* layers_list = 3 variation
* η_list = 3 variation
* epochs = 3 variation
* optimisations = 4 variation
* activations = 4 variation

3 × 3 × 3 × 4 × 4 = 432.

The dataframe holds the metrics for each configuration and the configuration details.

In [68]:
dfcv

Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
0,0.717583,0.508571,73.0,71.0,73.0,71.0,0.508571,0.508571,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.100,sigmoid,3
1,0.578264,0.665714,95.0,49.0,95.0,49.0,0.665714,0.665714,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.100,sigmoid,5
2,0.394858,0.772857,110.0,34.0,110.0,34.0,0.772857,0.772857,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.100,sigmoid,10
3,0.061830,0.986667,142.0,2.0,142.0,2.0,0.986667,0.986667,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.100,sigmoid,50
4,0.063752,0.980000,141.0,3.0,141.0,3.0,0.980000,0.980000,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.100,tanh,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,0.007844,1.000000,144.0,0.0,144.0,0.0,1.000000,1.000000,"[16, 4, 2]",<class 'keras.optimizers.optimizer_v2.nadam.Na...,0.001,relu,50
572,0.355346,0.924286,133.0,11.0,133.0,11.0,0.924286,0.924286,"[16, 4, 2]",<class 'keras.optimizers.optimizer_v2.nadam.Na...,0.001,LeakyReLU,3
573,0.233274,0.966667,139.0,5.0,139.0,5.0,0.966667,0.966667,"[16, 4, 2]",<class 'keras.optimizers.optimizer_v2.nadam.Na...,0.001,LeakyReLU,5
574,0.117824,0.986667,142.0,2.0,142.0,2.0,0.986667,0.986667,"[16, 4, 2]",<class 'keras.optimizers.optimizer_v2.nadam.Na...,0.001,LeakyReLU,10


Filter for 0 loss
- 5  epochs
- 10 epochs

The lower the loss the better so 0 is the most desirable. Same is true for epochs. The fewer epochs it has to run the better it is.

In [69]:
dfcv[dfcv.Loss == 0 ]

Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
59,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,relu,50
63,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,LeakyReLU,50
71,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,tanh,50
75,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,relu,50
79,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,LeakyReLU,50
105,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,relu,5
109,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,5
111,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,50
247,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[32, 8, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,tanh,50
251,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[32, 8, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,relu,50


### Overall Best:

In [70]:
dfcv[dfcv.Loss == 0 ][dfcv.epoch == 5]

  dfcv[dfcv.Loss == 0 ][dfcv.epoch == 5]


Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
105,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,relu,5
109,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,5
297,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[32, 8, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,relu,5
301,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[32, 8, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,5


### Second Best:

In [71]:
dfcv[dfcv.Loss == 0 ][dfcv.epoch == 10]

  dfcv[dfcv.Loss == 0 ][dfcv.epoch == 10]


Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
302,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[32, 8, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,10


### Third:

In [72]:
dfcv[dfcv.Loss == 0 ][dfcv.epoch == 50]

  dfcv[dfcv.Loss == 0 ][dfcv.epoch == 50]


Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
59,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,relu,50
63,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,LeakyReLU,50
71,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,tanh,50
75,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,relu,50
79,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,LeakyReLU,50
111,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,50
247,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[32, 8, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,tanh,50
251,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[32, 8, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,relu,50
255,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[32, 8, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,LeakyReLU,50
271,0.0,1.0,144.0,0.0,144.0,0.0,1.0,1.0,"[32, 8, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,LeakyReLU,50


# Conclusion:
The best configuration (for epochs below 51 and with 0 loss):
- __Nodes Per Layers__: `[64, 16, 2]`. This was the most frequent out of the best configurations. It's notable that there was a configuration for `[5, 3, 2]` that reached 0 loss with 50 epochs. There's only a very few nodes in this setting, however, it did reach 0.
- __Optimiser__: `Adam`. This was the most frequent among the best configurations. `RMSprop` was the only other optimiser function that appeared.  `Nadam` and `SGD` didn't reach 0 within 50 epochs.
- __η__: `0.1` was the only one to appear. There were a few cases of `0.01` for 100 epochs.
- __Activation Function__ is either `LeakyReLU` or `ReLU`.

Classifier over-fitting experiment

In [73]:
#Classifier 1
#First train test data set with a 0.2 split
split1 = 0.2
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=split1)

#I'm not sure if we need to keep the first train set in the other classifier and only add more data coming from the train set to the test set or if
#we move the data from the train test to the test set and delete it from the train set, we've used the second solution in the rest of the lab
#Classifier 2
#Here we move 30% of the previous train set into the data set
split2 = split1 + (1-split1) * 0.3
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=split2)

#Classifier 3
#Here we move 60% of the previous train set into the data set
split3 = split1 + (1-split1) * 0.3
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=split3)

#List to store the evaluations
list_of_results_overfitting = []

In [74]:
#Evaluating our first classifier on the test set
model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(64, activation="LeakyReLU"),
                            keras.layers.Dense(16, activation="LeakyReLU"),
                            keras.layers.Dense(2, activation="softmax")
                        ])
model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=keras.optimizers.Adam(learning_rate=0.1),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(X_train1,y_train1, epochs = 5)
resulttest1 = model.evaluate(X_test1,y_test1)
list_of_results_overfitting.append(resulttest1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [75]:
#Evaluating our first classifier on the train set
model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(64, activation="LeakyReLU"),
                            keras.layers.Dense(16, activation="LeakyReLU"),
                            keras.layers.Dense(2, activation="softmax")
                        ])
model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=keras.optimizers.Adam(learning_rate=0.1),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(X_train1,y_train1, epochs = 5)
resulttrain1 = model.evaluate(X_train1,y_train1)
list_of_results_overfitting.append(resulttrain1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [76]:
#Evaluating our second classifier on the test set
model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(64, activation="LeakyReLU"),
                            keras.layers.Dense(16, activation="LeakyReLU"),
                            keras.layers.Dense(2, activation="softmax")
                        ])
model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=keras.optimizers.Adam(learning_rate=0.1),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(X_train2,y_train2, epochs = 5)
resulttest2 = model.evaluate(X_test2,y_test2)
list_of_results_overfitting.append(resulttest2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [77]:
#Evaluating our second classifier on the train set
model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(64, activation="LeakyReLU"),
                            keras.layers.Dense(16, activation="LeakyReLU"),
                            keras.layers.Dense(2, activation="softmax")
                        ])
model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=keras.optimizers.Adam(learning_rate=0.1),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(X_train2,y_train2, epochs = 5)
resulttrain2 = model.evaluate(X_train2,y_train2)
list_of_results_overfitting.append(resulttrain2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [78]:
#Evaluating our third classifier on the test set
model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(64, activation="LeakyReLU"),
                            keras.layers.Dense(16, activation="LeakyReLU"),
                            keras.layers.Dense(2, activation="softmax")
                        ])
model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=keras.optimizers.Adam(learning_rate=0.1),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(X_train3,y_train3, epochs = 5)
resulttest3 = model.evaluate(X_test3,y_test3)
list_of_results_overfitting.append(resulttest3)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [79]:
#Evaluating our third classifier on the train set
model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(64, activation="LeakyReLU"),
                            keras.layers.Dense(16, activation="LeakyReLU"),
                            keras.layers.Dense(2, activation="softmax")
                        ])
model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=keras.optimizers.Adam(learning_rate=0.1),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(X_train3,y_train3, epochs = 5)
resulttrain3 = model.evaluate(X_train3,y_train3)
list_of_results_overfitting.append(resulttest3)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [85]:
labels = ["Loss", "Accuracy", "TP", "FP", "TN", "FN", "Precision", "Recall"]
labels2 = ["Classifier 1 test", "Classifier 1 train", "Classifier 2 test", "Classifier 2 train", "Classifier 3 test", "Classifier 3 train"]
df = pd.DataFrame(data = list_of_results_overfitting, columns=labels, index = labels2)
df

Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall
Classifier 1 test,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0
Classifier 1 train,2.218047e-06,1.0,115.0,0.0,115.0,0.0,1.0,1.0
Classifier 2 test,0.0,1.0,64.0,0.0,64.0,0.0,1.0,1.0
Classifier 2 train,5.029911e-05,1.0,80.0,0.0,80.0,0.0,1.0,1.0
Classifier 3 test,8.307205e-07,1.0,64.0,0.0,64.0,0.0,1.0,1.0
Classifier 3 train,8.307205e-07,1.0,64.0,0.0,64.0,0.0,1.0,1.0


I can't seem to notice anything at least in the table as the results are the same for each one