In [7]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import numpy as np

train_samples = 10000

X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)

def expand(i):
    res = [0 for _ in range(0, 10)]
    res[i] = 1 
    return res

y = [expand(int(v)) for v in y]
X_train, X_temp, y_train, y_temp = train_test_split(X, y, train_size=0.8, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [8]:
import sys
import os

sys.path.append(os.path.abspath(".."))

import src.FFNN as ffnn
from src.utils import plot_training_results

In [9]:
# Define neural network parameters
layer_size = [784, 128, 128, 10]
activations = ["sigmoid", "sigmoid", "sigmoid"]

## Training

In [26]:
# Create and train the model
model_ffnn = ffnn.FFNN(layer_sizes=layer_size, activations=activations, loss="bce", weight_initializer="normal", weight_init_args={"seed": 73})
plot_ffnn = model_1.fit(X_train, y_train, 10, 0.05, 50, True, X_val, y_val)

Epoch 1/10, Loss: 0.0750, Val_loss: 0.0737
Epoch 2/10, Loss: 0.0711, Val_loss: 0.0717
Epoch 3/10, Loss: 0.0680, Val_loss: 0.0685
Epoch 4/10, Loss: 0.0655, Val_loss: 0.0653
Epoch 5/10, Loss: 0.0639, Val_loss: 0.0636
Epoch 6/10, Loss: 0.0614, Val_loss: 0.0636
Epoch 7/10, Loss: 0.0599, Val_loss: 0.0615
Epoch 8/10, Loss: 0.0574, Val_loss: 0.0577
Epoch 9/10, Loss: 0.0559, Val_loss: 0.0580
Epoch 10/10, Loss: 0.0546, Val_loss: 0.0568


In [31]:
from sklearn.neural_network import MLPClassifier
model_sk = MLPClassifier(hidden_layer_sizes=layer_size, activation="logistic", learning_rate_init=0.05, batch_size=50, solver='sgd', max_iter=10, verbose=True)
model_sk.fit(X_train, y_train)

Iteration 1, loss = 2.98620681
Iteration 2, loss = 2.93660242
Iteration 3, loss = 3.26023985
Iteration 4, loss = 3.07373092
Iteration 5, loss = 2.65977280
Iteration 6, loss = 2.44568977
Iteration 7, loss = 2.22512595
Iteration 8, loss = 2.07776761
Iteration 9, loss = 2.14787254
Iteration 10, loss = 2.09321414




In [32]:
y_test_labels = np.argmax(y_test, axis=1)

## Classification Report

In [33]:
y_pred = model_ffnn.predict(X_test)

# Convert one-hot predictions to labels
y_pred_labels = np.argmax(y_pred, axis=1)

# Compute evaluation metrics
accuracy = accuracy_score(y_test_labels, y_pred_labels)
f1 = f1_score(y_test_labels, y_pred_1_labels, average="weighted")
conf_matrix = confusion_matrix(y_test_labels, y_pred_labels)

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test_labels, y_pred_1_labels))

Accuracy: 0.1187
F1-score: 0.9154

Confusion Matrix:
[[  0 577   0  77  26   0   0   0   0   5]
 [  0 684   0 134   2   0   0   0   0   0]
 [  0 623   0  79  12   0   0   0   1   1]
 [  0 588   0 119   8   0   0   0   0   0]
 [  0 375   0 252   9   0   0   0   0  23]
 [  0 480   0 133  17   0   0   0   1   3]
 [  0 486   0 181   2   0   0   0   0  20]
 [  0 314   0 380   7   0   0   0   0   9]
 [  0 524   0 111   8   0   0   0   0   0]
 [  0 437   0 267   6   0   0   0   0  19]]

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.97      0.95       685
           1       0.94      0.98      0.96       820
           2       0.93      0.89      0.91       716
           3       0.90      0.89      0.90       715
           4       0.90      0.93      0.92       659
           5       0.89      0.82      0.85       634
           6       0.93      0.94      0.93       689
           7       0.93      0.95      0.94       710
     

In [34]:
y_pred = model_sk.predict(X_test)

# Convert one-hot predictions to labels
y_pred_labels = np.argmax(y_pred, axis=1)

# Compute evaluation metrics
accuracy = accuracy_score(y_test_labels, y_pred_labels)
f1 = f1_score(y_test_labels, y_pred_1_labels, average="weighted")
conf_matrix = confusion_matrix(y_test_labels, y_pred_labels)

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test_labels, y_pred_1_labels))

Accuracy: 0.3551
F1-score: 0.9154

Confusion Matrix:
[[677   0   0   3   2   0   3   0   0   0]
 [ 90 727   0   1   0   0   0   2   0   0]
 [616  10   0  29   7   0  54   0   0   0]
 [483   2   0 222   0   0   1   7   0   0]
 [263   1   0   0 388   0   1   6   0   0]
 [587   4   0  23   4   0  11   5   0   0]
 [614   0   0   0  15   0  60   0   0   0]
 [277   9   0   1  11   0   0 412   0   0]
 [621   5   0  14   0   0   2   1   0   0]
 [561   0   0   1 155   0   0  12   0   0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.97      0.95       685
           1       0.94      0.98      0.96       820
           2       0.93      0.89      0.91       716
           3       0.90      0.89      0.90       715
           4       0.90      0.93      0.92       659
           5       0.89      0.82      0.85       634
           6       0.93      0.94      0.93       689
           7       0.93      0.95      0.94       710
     