In [6]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import numpy as np

train_samples = 10000

X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)

def expand(i):
    res = [0 for _ in range(0, 10)]
    res[i] = 1 
    return res

y = [expand(int(v)) for v in y]
X_train, X_temp, y_train, y_temp = train_test_split(X, y, train_size=0.8, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [7]:
import sys
import os

sys.path.append(os.path.abspath(".."))

import src.FFNN as ffnn
from src.utils import plot_training_results

# Activation

## Sigmoid

In [None]:

layer_size = [784, 128, 128, 64, 10]
activations = ["sigmoid", "sigmoid", "sigmoid", "sigmoid"]

sigmoid = ffnn.FFNN(layer_sizes=layer_size, activations=activations, loss="mse", weight_initializer="normal", weight_init_args={"seed": 73})
plot_sigmoid = sigmoid.fit(X_train, y_train, 100, 0.1, 50, True, X_val, y_val)


Epoch 1/300, Loss: 1.0185, Val_loss: 0.9008
Epoch 2/300, Loss: 0.9000, Val_loss: 0.8996
Epoch 3/300, Loss: 0.8996, Val_loss: 0.8996
Epoch 4/300, Loss: 0.8996, Val_loss: 0.8995
Epoch 5/300, Loss: 0.8995, Val_loss: 0.8995
Epoch 6/300, Loss: 0.8995, Val_loss: 0.8994
Epoch 7/300, Loss: 0.8994, Val_loss: 0.8993
Epoch 8/300, Loss: 0.8993, Val_loss: 0.8992
Epoch 9/300, Loss: 0.8991, Val_loss: 0.8990
Epoch 10/300, Loss: 0.8989, Val_loss: 0.8987
Epoch 11/300, Loss: 0.8986, Val_loss: 0.8982
Epoch 12/300, Loss: 0.8980, Val_loss: 0.8975
Epoch 13/300, Loss: 0.8970, Val_loss: 0.8963
Epoch 14/300, Loss: 0.8952, Val_loss: 0.8940
Epoch 15/300, Loss: 0.8919, Val_loss: 0.8895
Epoch 16/300, Loss: 0.8853, Val_loss: 0.8808
Epoch 17/300, Loss: 0.8730, Val_loss: 0.8648
Epoch 18/300, Loss: 0.8532, Val_loss: 0.8418
Epoch 19/300, Loss: 0.8297, Val_loss: 0.8201
Epoch 20/300, Loss: 0.8115, Val_loss: 0.8054
Epoch 21/300, Loss: 0.7981, Val_loss: 0.7928
Epoch 22/300, Loss: 0.7838, Val_loss: 0.7764
Epoch 23/300, Loss:

KeyboardInterrupt: 

In [None]:
y_pred = sigmoid.predict(X_test)

y_test_labels = np.argmax(y_test, axis=1)
y_pred_labels = np.argmax(y_pred, axis=1)

accuracy = accuracy_score(y_test_labels, y_pred_labels)
f1 = f1_score(y_test_labels, y_pred_labels, average="weighted")
conf_matrix = confusion_matrix(y_test_labels, y_pred_labels)

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test_labels, y_pred_labels))

In [None]:
sigmoid.plot_gradient_weight()
sigmoid.plot_weight()

## ReLU

In [None]:

layer_size = [784, 128, 128, 64, 10]
activations = ["relu" for i in range(4)]

relu = ffnn.FFNN(layer_sizes=layer_size, activations=activations, loss="mse", weight_initializer="normal", weight_init_args={"seed": 73})
plot_relu = relu.fit(X_train, y_train, 300, 0.1, 50, True, X_val, y_val)


In [None]:
y_pred = relu.predict(X_test)

y_test_labels = np.argmax(y_test, axis=1)
y_pred_labels = np.argmax(y_pred, axis=1)

accuracy = accuracy_score(y_test_labels, y_pred_labels)
f1 = f1_score(y_test_labels, y_pred_labels, average="weighted")
conf_matrix = confusion_matrix(y_test_labels, y_pred_labels)

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test_labels, y_pred_labels))

In [None]:
relu.plot_gradient_weight()
relu.plot_weight()

## Linear

In [None]:

layer_size = [784, 128, 128, 64, 10]
activations = ["linear" for i in range(4)]

linear = ffnn.FFNN(layer_sizes=layer_size, activations=activations, loss="mse", weight_initializer="normal", weight_init_args={"seed": 73})
plot_linear = linear.fit(X_train, y_train, 300, 0.1, 50, True, X_val, y_val)


In [None]:
y_pred = linear.predict(X_test)

y_test_labels = np.argmax(y_test, axis=1)
y_pred_labels = np.argmax(y_pred, axis=1)

accuracy = accuracy_score(y_test_labels, y_pred_labels)
f1 = f1_score(y_test_labels, y_pred_labels, average="weighted")
conf_matrix = confusion_matrix(y_test_labels, y_pred_labels)

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test_labels, y_pred_labels))

In [None]:
linear.plot_gradient_weight()
linear.plot_weight()

## Tanh

In [None]:

layer_size = [784, 128, 128, 64, 10]
activations = ["tanh" for i in range(4)]

tanh = ffnn.FFNN(layer_sizes=layer_size, activations=activations, loss="mse", weight_initializer="normal", weight_init_args={"seed": 73})
plot_tanh = tanh.fit(X_train, y_train, 300, 0.1, 50, True, X_val, y_val)


In [None]:
y_pred = tanh.predict(X_test)

y_test_labels = np.argmax(y_test, axis=1)
y_pred_labels = np.argmax(y_pred, axis=1)

accuracy = accuracy_score(y_test_labels, y_pred_labels)
f1 = f1_score(y_test_labels, y_pred_labels, average="weighted")
conf_matrix = confusion_matrix(y_test_labels, y_pred_labels)

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test_labels, y_pred_labels))

In [None]:
tanh.plot_gradient_weight()
tanh.plot_weight()

## SeLU

In [None]:

layer_size = [784, 128, 128, 64, 10]
activations = ["selu" for i in range(4)]

selu = ffnn.FFNN(layer_sizes=layer_size, activations=activations, loss="mse", weight_initializer="normal", weight_init_args={"seed": 73})
plot_selu = selu.fit(X_train, y_train, 300, 0.1, 50, True, X_val, y_val)


In [None]:
y_pred = selu.predict(X_test)

y_test_labels = np.argmax(y_test, axis=1)
y_pred_labels = np.argmax(y_pred, axis=1)

accuracy = accuracy_score(y_test_labels, y_pred_labels)
f1 = f1_score(y_test_labels, y_pred_labels, average="weighted")
conf_matrix = confusion_matrix(y_test_labels, y_pred_labels)

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test_labels, y_pred_labels))

In [None]:
selu.plot_gradient_weight()
selu.plot_weight()

## Leaky

In [None]:

layer_size = [784, 128, 128, 64, 10]
activations = ["leaky" for i in range(4)]

leaky = ffnn.FFNN(layer_sizes=layer_size, activations=activations, loss="mse", weight_initializer="normal", weight_init_args={"seed": 73})
plot_leaky = leaky.fit(X_train, y_train, 300, 0.1, 50, True, X_val, y_val)


In [None]:
y_pred = leaky.predict(X_test)

y_test_labels = np.argmax(y_test, axis=1)
y_pred_labels = np.argmax(y_pred, axis=1)

accuracy = accuracy_score(y_test_labels, y_pred_labels)
f1 = f1_score(y_test_labels, y_pred_labels, average="weighted")
conf_matrix = confusion_matrix(y_test_labels, y_pred_labels)

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test_labels, y_pred_labels))

In [None]:
leaky.plot_gradient_weight()
leaky.plot_weight()

In [None]:
plot_training_results([plot_sigmoid,plot_relu,plot_linear,plot_tanh,plot_selu,plot_leaky])