In [6]:
import pickle
import random

import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

# Loading the data

In [7]:
with open("data/randomized/train_data", "rb") as f:
    train_data = pickle.load(f)

train_features = train_data[0]
train_targets  = train_data[1]

In [8]:
with open("data/randomized/val_data", "rb") as f:
    val_data = pickle.load(f)

val_features = val_data[0]
val_targets  = val_data[1]

In [9]:
with open("data/randomized/test_data", "rb") as f:
    test_data = pickle.load(f)

test_features = test_data[0]
test_targets  = test_data[1]

# Optimzing the Model over the validation data

In [10]:
num_hidden_layers = [2, 3, 4, 5]
h1_neurons = [pow(2, i) for i in range(5, 8)]
h2_neurons = [pow(2, i) for i in range(5, 13)]
h3_neurons = [pow(2, i) for i in range(5, 13)]
h4_neurons = [pow(2, i) for i in range(5, 8)]
h5_neurons = [pow(2, i) for i in range(5, 8)]

hps = dict()
i = 0

for num in num_hidden_layers:
    for h1 in h1_neurons:
        for h2 in h2_neurons:
            for h3 in h3_neurons:
                for h4 in h4_neurons:
                    for h5 in h5_neurons:
                        hps[i] = {
                            "num": num,
                            "h1": h1,
                            "h2": h2,
                            "h3": h3,
                            "h4": h4,
                            "h5": h5
                        }
                        i += 1

In [19]:
valid = random.sample(list(hps.keys()), 40)

df = pd.DataFrame(columns=["accuracy", "F1-score"], index=valid)

for index in valid:
    print(index)

    hp = hps[index]

    mlp_model = Sequential()

    mlp_model.add(layers.Input(shape = (21, 2, ), dtype = "int32"))
    mlp_model.add(layers.Flatten())

    for i in range(hp["num"]):
        mlp_model.add(layers.Dense(hp[f"h{i + 1}"], activation="sigmoid"))

    mlp_model.add(layers.Dense(10, activation="softmax"))

    mlp_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

    mlp_model.fit(x=train_features, y=train_targets, epochs=10, verbose=0)

    predictions = mlp_model.predict(val_features)
    predictions = [pred.argmax() for pred in predictions]

    acc = accuracy_score(y_true=val_targets, y_pred=predictions)
    acc = round(acc*100, 2)

    f1score = f1_score(y_true=val_targets, y_pred=predictions, average="macro")
    f1score = round(f1score*100, 2)

    df["accuracy"].loc[index] = acc
    df["F1-score"].loc[index] = f1score

fig = px.line(df.sort_values(by="accuracy", ascending=False).reset_index().drop("index", axis=1), markers=True)
fig.update_layout(yaxis_title="Percentual [%]", legend_title="Métrica")

fig.show()

print(df.sort_values(by="accuracy", ascending=False).reset_index())

# print(hps)

3784
663
6412
109
3748
5303
3996
4063
5076
3594
6025
4657
2650
1585
3292
122
6812
2628
2962
1653
1024
3787
4866
6467
1596
6484
4747
2709
4648
4032
4094
936
4844
2219
231
5016
4356
4300
2534
4244


    index accuracy F1-score
0    2962    82.22    73.11
1    4648    81.78    70.77
2    4063    81.78    70.79
3     663    81.33    71.63
4     231     80.0    71.56
5    2628     80.0    71.43
6    2534    79.56    72.52
7    1653    79.56     73.7
8     122    79.11    71.28
9    3787    78.67    71.95
10   3748    78.67    67.65
11   4844    78.22    73.84
12   1585    78.22    66.08
13   3292    77.78    72.95
14   5016    77.78    73.02
15   1596    77.78    65.01
16   6484    77.78    70.09
17   4356    77.33    72.59
18   4866    77.33    72.36
19   3784    76.89    67.69
20   4657    76.44    67.33
21    936    76.44    66.55
22   2650     76.0    69.71
23   4300     76.0    69.39
24   2219    75.56    60.56
25   2709    75.11    64.68
26    109    73.78    67.89
27   1024    72.89    71.58
28   5076    68.89    57.62
29   4032    66.22     50.5
30   6025    65.33    53.08
31   4094    62.67    47.24
32   6412     56.0    37.69
33   6812    48.44    36.89
34   4244    12.44  

In [21]:
hps[3996]

{'num': 4, 'h1': 32, 'h2': 4096, 'h3': 512, 'h4': 32, 'h5': 32}

In [20]:
print(hps[2962])
print(hps[4468])
print(hps[4063])

{'num': 3, 'h1': 128, 'h2': 64, 'h3': 64, 'h4': 32, 'h5': 64}
{'num': 4, 'h1': 64, 'h2': 2048, 'h3': 32, 'h4': 64, 'h5': 64}
{'num': 4, 'h1': 64, 'h2': 32, 'h3': 256, 'h4': 64, 'h5': 64}


# Training a baseline model

In [39]:
mlp_model = Sequential()

mlp_model.add(layers.Input(shape = (21, 2, ), dtype = "int32"))
mlp_model.add(layers.Flatten())
mlp_model.add(layers.Dense(128, activation="sigmoid"))
mlp_model.add(layers.Dense(64, activation="sigmoid"))
mlp_model.add(layers.Dense(10, activation="softmax"))

mlp_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

baseline_history = mlp_model.fit(x=train_features, y=train_targets, epochs=10, verbose=0)

# Testing the model

In [40]:
predictions = mlp_model.predict(test_features)
predictions = [pred.argmax() for pred in predictions]

In [42]:
len(hps)

6912

# Metrics

In [41]:
acc = accuracy_score(y_true=test_targets, y_pred=predictions)
accuracy = round(acc*100, 2)

print(f"The accuracy is {accuracy}%")

f1 = f1_score(y_true=test_targets, y_pred=predictions, average="macro")
f1 = round(f1*100, 2)

print(f"The F1-Score is {f1}%")

print(classification_report(y_true=test_targets, y_pred=predictions, digits=4))

conf_matrix = confusion_matrix(y_true=test_targets, y_pred=predictions)
px.imshow(conf_matrix, color_continuous_scale="turbo")

The accuracy is 73.42%
The F1-Score is 66.57%
              precision    recall  f1-score   support

           0     0.9725    0.9907    0.9815       107
           1     0.8793    0.4322    0.5795       118
           2     0.9821    1.0000    0.9910       110
           3     0.0000    0.0000    0.0000        99
           4     0.5912    0.7344    0.6551       128
           5     0.2727    0.0632    0.1026        95
           6     1.0000    1.0000    1.0000       145
           7     0.4037    0.9820    0.5722       111
           8     0.9914    1.0000    0.9957       115
           9     0.6716    0.9278    0.7792        97

    accuracy                         0.7342      1125
   macro avg     0.6765    0.7130    0.6657      1125
weighted avg     0.6990    0.7342    0.6885      1125




Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

