In [1]:
# !pip show tensorflow
# !pip show keras

In [2]:
# !pip install tensorflow
# !pip install keras

In [3]:
import numpy as np
import time
import keras
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [4]:
X = np.squeeze(np.load('src/smiley_X.npy'))
y = np.load('src/smiley_y.npy')

In [5]:
X.shape

(144, 9, 9)

# Binary Classification Model


### Test Train Split

In [6]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [7]:
y_test.shape

(29,)

Sigmoid is used for the output layer and there's only 1 output node.

In [8]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[9, 9]),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(10, activation="relu"),
    keras.layers.Dense(1, activation="sigmoid")
])

In [9]:
model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=keras.optimizers.SGD(),
              metrics=[keras.metrics.binary_accuracy])

In [10]:
model.fit(x_train, y_train, epochs=10)

Epoch 1/10


  output, from_logits = _get_logits(


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x23224b8b4c0>

### Metrics
Loss and Accuracy

In [11]:
model.evaluate(x_test, y_test)



[0.6089975237846375, 0.6896551847457886]

### Confusion Matrix

In [12]:
#Predict
y_prediction = np.argmax(model.predict(x_test), axis=1)

#Create confusion matrix and normalizes it over predicted (columns)
confusion_matrix(y_test, y_prediction , normalize='pred')



array([[0.62068966, 0.        ],
       [0.37931034, 0.        ]])

# 🔷 Q: I don't get why it's 0 here for the confusion matrix?

# Categorical Classification Model

### One-hot encode y and new test train split

In [13]:
y = keras.utils.to_categorical(y-1, num_classes = 2)

In [14]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [15]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[9, 9]),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(10, activation="relu"),
    keras.layers.Dense(2, activation="softmax")
])
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.SGD(learning_rate=0.01),
              metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
model.fit(x_train, y_train, epochs=10)
# The metrics are for the train data

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x23225eb11e0>

### Metrics
Loss, Accuracy, TP, FP, TN, FN, Precision, Recall

In [16]:
model.evaluate(x_test, y_test)



[0.5640305280685425,
 0.7241379022598267,
 21.0,
 8.0,
 21.0,
 8.0,
 0.7241379022598267,
 0.7241379022598267]

# Experiment With Various Parameters That Control The Learning
Configuration Suite

In [17]:
layers_list = [[64,16,2],[32,8,2],[16,4,2],[5,3,2]]
η_list = [0.1, 0.01, 0.001, 0.0001, 0.00001]
epochs = [3, 5, 10, 50, 100]
optimisations = [keras.optimizers.SGD, keras.optimizers.RMSprop, keras.optimizers.Adam, keras.optimizers.Nadam]
activations = ["sigmoid", "tanh", "relu", "LeakyReLU"]

In [18]:
run_config_suit = False
file_name = "df_lab10.pkl"

if run_config_suit:
    start = time.time()
    list_of_results = []
    for layer in layers_list:
        for opt in optimisations:
            for η in η_list:
                for act in activations:
                    print("\n####################################################################################")
                    print("Layers      Eta Act.    <Optimisation>")
                    print(f"{layer} {η} {act} {opt}")
                    print("Epoch: [Loss, Accuracy, TP, FP, TN, FN, Precision, Recall]")
                    for epoch in epochs:
                        model = keras.models.Sequential([
                            keras.layers.Flatten(input_shape=[9, 9]),
                            keras.layers.Dense(layer[0], activation=act),
                            keras.layers.Dense(layer[1], activation=act),
                            keras.layers.Dense(2, activation="softmax")
                        ])
                        model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=opt(learning_rate=η),
                                      metrics=[keras.metrics.categorical_accuracy, keras.metrics.TruePositives(), keras.metrics.FalsePositives(), keras.metrics.TrueNegatives(), keras.metrics.FalseNegatives(), keras.metrics.Precision(), keras.metrics.Recall()])
                        model.fit(x_train, y_train, epochs=epoch, verbose= 0)
                        result = model.evaluate(x_test, y_test, verbose= 0)
                        config = [layer, opt, η, act, epoch]
                        list_of_results.append(result + config)
                        print(f"\t{epoch}: {result}")

    print("\n\n############## DONE")
    print(time.time() - start)

    labels = ["Loss", "Accuracy", "TP", "FP", "TN", "FN", "Precision", "Recall", "Layers", "optimiser", "η", "activation", "epoch"]
    df = pd.DataFrame(data = list_of_results, columns=labels)
    df.to_pickle(file_name)

else:
    df = pd.read_pickle(file_name)

It took 4414.6s (~1.2hrs) to run the configuration suite on G Drive

Overall 1600 model was generated with different hyperparameters.
* layers_list = 4 variation
* η_list = 5 variation
* epochs = 5 variation
* optimisations = 4 variation
* activations = 4 variation

4 × 5 × 5 × 4 × 4 = 1600.

The dataframe holds the metrics for each configuration and the configuration details.

In [19]:
df

Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
0,0.725694,0.379310,11.0,18.0,11.0,18.0,0.379310,0.379310,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.10000,sigmoid,3
1,0.708019,0.379310,11.0,18.0,11.0,18.0,0.379310,0.379310,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.10000,sigmoid,5
2,0.682639,0.413793,12.0,17.0,12.0,17.0,0.413793,0.413793,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.10000,sigmoid,10
3,0.589108,0.965517,28.0,1.0,28.0,1.0,0.965517,0.965517,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.10000,sigmoid,50
4,0.084866,1.000000,29.0,0.0,29.0,0.0,1.000000,1.000000,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.gradient...,0.10000,sigmoid,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,0.562120,0.758621,22.0,7.0,22.0,7.0,0.758621,0.758621,"[5, 3, 2]",<class 'keras.optimizers.optimizer_v2.nadam.Na...,0.00001,LeakyReLU,3
1596,0.656760,0.724138,21.0,8.0,21.0,8.0,0.724138,0.724138,"[5, 3, 2]",<class 'keras.optimizers.optimizer_v2.nadam.Na...,0.00001,LeakyReLU,5
1597,0.812644,0.275862,8.0,21.0,8.0,21.0,0.275862,0.275862,"[5, 3, 2]",<class 'keras.optimizers.optimizer_v2.nadam.Na...,0.00001,LeakyReLU,10
1598,0.824477,0.620690,18.0,11.0,18.0,11.0,0.620690,0.620690,"[5, 3, 2]",<class 'keras.optimizers.optimizer_v2.nadam.Na...,0.00001,LeakyReLU,50


Filter for 0 loss
- 5  epochs
- 10 epochs

The lower the loss the better so 0 is the most desirable. Same is true for epochs. The fewer epochs it has to run the better it is.

In [20]:
df[df.Loss == 0 ]

Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
104,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,sigmoid,100
109,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,tanh,100
114,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,relu,100
118,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,LeakyReLU,50
119,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,LeakyReLU,100
124,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,sigmoid,100
129,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,tanh,100
134,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,relu,100
139,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.01,LeakyReLU,100
211,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,relu,5


### Overall Best:

In [21]:
df[df.Loss == 0 ][df.epoch == 5]

  df[df.Loss == 0 ][df.epoch == 5]


Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
211,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,relu,5
216,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,5


### Second Best:

In [22]:
df[df.Loss == 0 ][df.epoch == 10]

  df[df.Loss == 0 ][df.epoch == 10]


Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
217,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,10
1017,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[16, 4, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,10


### Third:

In [23]:
df[df.Loss == 0 ][df.epoch == 50]

  df[df.Loss == 0 ][df.epoch == 50]


Unnamed: 0,Loss,Accuracy,TP,FP,TN,FN,Precision,Recall,Layers,optimiser,η,activation,epoch
118,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.rmsprop....,0.1,LeakyReLU,50
218,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[64, 16, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,50
618,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[32, 8, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,50
1013,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[16, 4, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,relu,50
1018,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[16, 4, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,50
1418,0.0,1.0,29.0,0.0,29.0,0.0,1.0,1.0,"[5, 3, 2]",<class 'keras.optimizers.optimizer_v2.adam.Adam'>,0.1,LeakyReLU,50


# Conclusion:
The best configuration (for epochs below 51 and with 0 loss):
- __Nodes Per Layers__: `[64, 16, 2]`. This was the most frequent out of the best configurations. It's notable that there was a configuration for `[5, 3, 2]` that reached 0 loss with 50 epochs. There's only a very few nodes in this setting, however, it did reach 0.
- __Optimiser__: `Adam`. This was the most frequent among the best configurations. `RMSprop` was the only other optimiser function that appeared.  `Nadam` and `SGD` didn't reach 0 within 50 epochs.
- __η__: `0.1` was the only one to appear. There were a few cases of `0.01` for 100 epochs.
- __Activation Function__ is either `LeakyReLU` or `ReLU`.