# Comparison of Prediction Results with the Sklearn MLP Library

Import Libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.datasets import fetch_openml
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from FFNN.FFNN import FFNN

Import Dataset

In [3]:
# import dataset MNIST
mnist = fetch_openml(name='mnist_784', version=1, as_frame=False)

# Separate features (X) and labels (y)
X, y = mnist.data, mnist.target

# convert label from string to integer
y = y.astype(np.uint8)

# print dataset info
print(f"Shape X: {X.shape}")  # (70000, 784)
print(f"Shape y: {y.shape}")  # (70000,)

# split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Train set: {X_train.shape}, Test set: {X_test.shape}")

# normalize the data
# pixel values are in range [0, 255], we normalize them to [0, 1]
X_train = X_train / 255.0
X_test = X_test / 255.0

# convert labels to one-hot encoding
num_classes = 10  # MNIST have 10 class (0-9)
y_train_one_hot = np.eye(num_classes)[y_train]
y_test_one_hot = np.eye(num_classes)[y_test]

# print the shape of the one-hot encoded labels
print(f"Shape y_train (one-hot): {y_train_one_hot.shape}")  # (56000, 10)
print(f"Shape y_test (one-hot): {y_test_one_hot.shape}")    # (14000, 10)

Shape X: (70000, 784)
Shape y: (70000,)
Train set: (56000, 784), Test set: (14000, 784)
Shape y_train (one-hot): (56000, 10)
Shape y_test (one-hot): (14000, 10)


### Library Sklearn

In [4]:
# Inisialisasi model MLPClassifier without optimisation (solver='lbfgs' or 'sgd' without momentum)
mlp = MLPClassifier(hidden_layer_sizes=(128, 64),   # 2 hidden layer (128 dan 64 neuron)
                    activation='relu',              # activation function ReLU
                    solver='sgd',                   # SGD without momentum
                    alpha=0.0001,                   # Regularisasi L2 
                    batch_size=64,                  # Batch size 64
                    learning_rate_init=0.1,         # Learning rate 0.1
                    max_iter=20,                    # Maksimum 20 epoch
                    momentum=0,                     # without momentum (no optimisation)
                    n_iter_no_change=20,            # Early stopping if no improvement in 20 epochs
                    random_state=42,
                    verbose=True)

# Training model
mlp.fit(X_train, y_train)

# Prediksi
y_pred = mlp.predict(X_test)

# Evaluasi akurasi
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of MLPClassifier: {accuracy:.4f}")
print(f"y_pred: {y_pred[:20]}")
print(f"y_test: {y_test[:20]}")

Iteration 1, loss = 0.35535325
Iteration 2, loss = 0.15636760
Iteration 3, loss = 0.11103278
Iteration 4, loss = 0.08572238
Iteration 5, loss = 0.06890429
Iteration 6, loss = 0.05620323
Iteration 7, loss = 0.04737141
Iteration 8, loss = 0.04007790
Iteration 9, loss = 0.03450268
Iteration 10, loss = 0.02965972
Iteration 11, loss = 0.02509956
Iteration 12, loss = 0.02102496
Iteration 13, loss = 0.01739772
Iteration 14, loss = 0.01450034
Iteration 15, loss = 0.01197771
Iteration 16, loss = 0.01007774
Iteration 17, loss = 0.00855166
Iteration 18, loss = 0.00700714
Iteration 19, loss = 0.00551464
Iteration 20, loss = 0.00491283
Accuracy of MLPClassifier: 0.9774
y_pred: [8 4 8 7 7 0 6 2 7 4 3 9 9 8 2 5 9 1 7 8]
y_test: [8 4 8 7 7 0 6 2 7 4 3 9 9 8 2 5 9 1 7 8]




### From Scratch

In [5]:
model1 = FFNN()

# Add layers to the model
model1.add_layer(784)
model1.add_layer(128, activation_function='relu', initialization_method="he_normal", seed=42)
model1.add_layer(64, activation_function='relu', initialization_method="he_normal", seed=42)
model1.add_layer(10, activation_function='relu', initialization_method="he_normal", seed=42)

# Train the model
model1.train(X_train, y_train_one_hot, X_test, y_test_one_hot, 
             learning_rate=0.1, batch_size=64, epochs=20, loss_function="MSE", l2_lambda=0.0001,
             verbose=1, seed=42)

# Predict the labels for the test set
y_pred = model1.predict(X_test)
y_pred_label = np.argmax(y_pred, axis=1)

# Accuracy
accuracy = accuracy_score(y_test, y_pred_label)
print(f"Accuracy of FFNN: {accuracy:.4f}")
print(f"y_pred: {y_pred_label[:20]}")
print(f"y_test: {y_test[:20]}")

Training completed. Final Train Loss: 0.0701, Val Loss: 0.0838
Accuracy of FFNN: 0.9641
y_pred: [8 4 8 7 7 0 6 2 7 4 3 9 9 8 2 5 9 1 7 8]
y_test: [8 4 8 7 7 0 6 2 7 4 3 9 9 8 2 5 9 1 7 8]
