In [6]:
import pickle

import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

# Loading the data

In [7]:
with open("data/randomized/train_data", "rb") as f:
    train_data = pickle.load(f)

train_features = train_data[0]
train_targets  = train_data[1]

In [8]:
with open("data/randomized/val_data", "rb") as f:
    val_data = pickle.load(f)

val_features = val_data[0]
val_targets  = val_data[1]

In [9]:
with open("data/randomized/test_data", "rb") as f:
    test_data = pickle.load(f)

test_features = test_data[0]
test_targets  = test_data[1]

# Optimzing the Model over the validation data

In [32]:
num_hidden_layers = [2, 3, 4, 5]
h1_neurons = [pow(2, i) for i in range(5, 8)]
h2_neurons = [pow(2, i) for i in range(5, 13)]
h3_neurons = [pow(2, i) for i in range(5, 13)]
h4_neurons = [pow(2, i) for i in range(5, 8)]
h5_neurons = [pow(2, i) for i in range(5, 8)]

hps = dict()
i = 0

for num in num_hidden_layers:
    for h1 in h1_neurons:
        for h2 in h2_neurons:
            for h3 in h3_neurons:
                for h4 in h4_neurons:
                    for h5 in h5_neurons:
                        hps[i] = {
                            "num": num,
                            "h1": h1,
                            "h2": h2,
                            "h3": h3,
                            "h4": h4,
                            "h5": h5
                        }
                        i += 1

In [33]:
len(hps)

6912

In [35]:
df = pd.DataFrame(columns=["accuracy", "F1-score"], index=[i for i in range(len(hps))])

valid = random.sample(list(hps.keys()), 40)

for index in valid:
    print(index)

    hp = hps[index]

    mlp_model = Sequential()

    mlp_model.add(layers.Input(shape = (21, 2, ), dtype = "int32"))
    mlp_model.add(layers.Flatten())

    for i in range(hp["num"]):
        mlp_model.add(layers.Dense(hp[f"h{i + 1}"], activation="sigmoid"))

    mlp_model.add(layers.Dense(10, activation="softmax"))

    mlp_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

    mlp_model.fit(x=train_features, y=train_targets, epochs=10, verbose=0)

    predictions = mlp_model.predict(val_features)
    predictions = [pred.argmax() for pred in predictions]

    acc = accuracy_score(y_true=val_targets, y_pred=predictions)
    acc = round(acc*100, 2)

    f1score = f1_score(y_true=val_targets, y_pred=predictions, average="macro")
    f1score = round(f1score*100, 2)

    df["accuracy"].loc[index] = acc
    df["F1-score"].loc[index] = f1score

print(df)

print(hps)

3405
4429
854
3857
6804
1922
6297
6362
5185
1608
1250
279
6550
3000
3396
742
5384
4004
6287
6682
2245
3198
6891
6111
3287
2505
555
3408
5853
4221
4793
3234
3296
4412
3982
2579
6015
4958
177
3477
     accuracy F1-score
0         NaN      NaN
1         NaN      NaN
2         NaN      NaN
3         NaN      NaN
4         NaN      NaN
...       ...      ...
6907      NaN      NaN
6908      NaN      NaN
6909      NaN      NaN
6910      NaN      NaN
6911      NaN      NaN

[6912 rows x 2 columns]
{0: {'num': 2, 'h1': 32, 'h2': 32, 'h3': 32, 'h4': 32, 'h5': 32}, 1: {'num': 2, 'h1': 32, 'h2': 32, 'h3': 32, 'h4': 32, 'h5': 64}, 2: {'num': 2, 'h1': 32, 'h2': 32, 'h3': 32, 'h4': 32, 'h5': 128}, 3: {'num': 2, 'h1': 32, 'h2': 32, 'h3': 32, 'h4': 64, 'h5': 32}, 4: {'num': 2, 'h1': 32, 'h2': 32, 'h3': 32, 'h4': 64, 'h5': 64}, 5: {'num': 2, 'h1': 32, 'h2': 32, 'h3': 32, 'h4': 64, 'h5': 128}, 6: {'num': 2, 'h1': 32, 'h2': 32, 'h3': 32, 'h4': 128, 'h5': 32}, 7: {'num': 2, 'h1': 32, 'h2': 32, 'h3': 32, '

In [37]:
df.sort_values(by="F1-score", ascending=False)

Unnamed: 0,accuracy,F1-score
1250,80.0,74.66
4412,80.0,74.33
4793,81.33,72.83
742,76.89,71.67
854,80.0,71.56
...,...,...
6907,,
6908,,
6909,,
6910,,


In [38]:
print(hps[1250])
print(hps[4412])
print(hps[4793])

{'num': 2, 'h1': 128, 'h2': 64, 'h3': 128, 'h4': 128, 'h5': 128}
{'num': 4, 'h1': 64, 'h2': 1024, 'h3': 128, 'h4': 32, 'h5': 128}
{'num': 4, 'h1': 128, 'h2': 128, 'h3': 512, 'h4': 64, 'h5': 128}


# Training a baseline model

In [39]:
mlp_model = Sequential()

mlp_model.add(layers.Input(shape = (21, 2, ), dtype = "int32"))
mlp_model.add(layers.Flatten())
mlp_model.add(layers.Dense(128, activation="sigmoid"))
mlp_model.add(layers.Dense(64, activation="sigmoid"))
mlp_model.add(layers.Dense(10, activation="softmax"))

mlp_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

baseline_history = mlp_model.fit(x=train_features, y=train_targets, epochs=10, verbose=0)

# Testing the model

In [40]:
predictions = mlp_model.predict(test_features)
predictions = [pred.argmax() for pred in predictions]

In [42]:
len(hps)

6912

# Metrics

In [41]:
acc = accuracy_score(y_true=test_targets, y_pred=predictions)
accuracy = round(acc*100, 2)

print(f"The accuracy is {accuracy}%")

f1 = f1_score(y_true=test_targets, y_pred=predictions, average="macro")
f1 = round(f1*100, 2)

print(f"The F1-Score is {f1}%")

print(classification_report(y_true=test_targets, y_pred=predictions, digits=4))

conf_matrix = confusion_matrix(y_true=test_targets, y_pred=predictions)
px.imshow(conf_matrix, color_continuous_scale="turbo")

The accuracy is 73.42%
The F1-Score is 66.57%
              precision    recall  f1-score   support

           0     0.9725    0.9907    0.9815       107
           1     0.8793    0.4322    0.5795       118
           2     0.9821    1.0000    0.9910       110
           3     0.0000    0.0000    0.0000        99
           4     0.5912    0.7344    0.6551       128
           5     0.2727    0.0632    0.1026        95
           6     1.0000    1.0000    1.0000       145
           7     0.4037    0.9820    0.5722       111
           8     0.9914    1.0000    0.9957       115
           9     0.6716    0.9278    0.7792        97

    accuracy                         0.7342      1125
   macro avg     0.6765    0.7130    0.6657      1125
weighted avg     0.6990    0.7342    0.6885      1125




Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

