# Import Libraries
Import necessary libraries including NumPy, Matplotlib, scikit-learn for data, pickle for model saving/loading, networkx for visualization, and tqdm for progress tracking.

In [7]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Training and Evaluation on Dataset
Load the dataset, preprocess it for classification, and train the FFNN model with appropriate hyperparameters. Evaluate model performance using accuracy metrics.

In [8]:
from activation import Linear, ReLU, Sigmoid, Tanh, Softmax, LeakyReLU, ELU
from loss import MeanSquaredError, BinaryCrossEntropy, CategoricalCrossEntropy
from initialization import ZeroInitialization, UniformInitialization, NormalInitialization, XavierInitialization, HeInitialization
from model import FFNN
from rmsnorm import RMSNorm

### Load Dataset & Data Preparation

In [9]:
mnist = fetch_openml('mnist_784', version=1, as_frame=False, parser='auto')
X = mnist.data.astype('float32')
y = mnist.target.astype('int64')

scaler = StandardScaler()
X = scaler.fit_transform(X)

# Label butuh di-encode dengan One Hot
encoder = OneHotEncoder(sparse_output=False)
y_onehot = encoder.fit_transform(y.reshape(-1, 1))

X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

In [11]:
layer_sizes = [784, 128, 32, 10]  # Fitur input ada 784

activations = [
    ELU(),
    ELU(), 
    Softmax()     
]

loss_function = CategoricalCrossEntropy()

initializations = [
    HeInitialization(seed=42),  
    XavierInitialization(seed=42),  
    HeInitialization(seed=42)   
]

listnormalisasi = [
    None,
    RMSNorm,
    None
]

model_no_norm = FFNN(
    layer_sizes=layer_sizes,
    activations=activations,
    loss=loss_function,
    initializations=initializations
)

history_no_norm = model_no_norm.train(
    x_train=X_train,
    y_train=y_train,
    batch_size=32,
    learning_rate=0.01,
    epochs=20,
    x_y_val=(X_test, y_test),
    verbose=1
)

# Model dengan normalisasi RMS
model_norm = FFNN(
    layer_sizes=layer_sizes,
    activations=activations,
    loss=loss_function,
    initializations=initializations,
    normalization=listnormalisasi
)

history_norm = model_norm.train(
    x_train=X_train,
    y_train=y_train,
    batch_size=32,
    learning_rate=0.01,
    epochs=20,
    x_y_val=(X_test, y_test),
    verbose=1
)


# plot loss tanpa normalisasi 
plt.figure(figsize=(10, 6))
plt.plot(history_no_norm['train_loss'], label='Training Loss')
plt.plot(history_norm['val_loss'], label='Validation Loss')
plt.title('Train and Val Loss without Normalisasi')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# prediksi
y_pred = model_no_norm.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)
accuracy = np.mean(y_pred_classes == y_true_classes)
print(f"Test accuracy without Normalization: {accuracy:.4f}")


# plot loss dengan normalisasi RMS
# plot loss
plt.figure(figsize=(10, 6))
plt.plot(history_norm['train_loss'], label='Training Loss')
plt.plot(history_norm['val_loss'], label='Validation Loss')
plt.title('Train and Val Loss With RMS Normalization')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# prediksi
y_pred = model_norm.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)
accuracy = np.mean(y_pred_classes == y_true_classes)
print(f"Test accuracy with L1: {accuracy:.4f}")

Epoch 1/20


100%|██████████| 56000/56000 [00:09<00:00, 6100.91it/s, train_loss=0.4252, val_loss=0.2901]


Epoch 2/20


100%|██████████| 56000/56000 [00:08<00:00, 6635.66it/s, train_loss=0.2305, val_loss=0.2353]


Epoch 3/20


100%|██████████| 56000/56000 [00:07<00:00, 7437.65it/s, train_loss=0.1817, val_loss=0.2054]


Epoch 4/20


100%|██████████| 56000/56000 [00:06<00:00, 8373.69it/s, train_loss=0.1515, val_loss=0.1856]


Epoch 5/20


100%|██████████| 56000/56000 [00:06<00:00, 8485.93it/s, train_loss=0.1292, val_loss=0.1729]


Epoch 6/20


100%|██████████| 56000/56000 [00:06<00:00, 8530.95it/s, train_loss=0.1124, val_loss=0.1622]


Epoch 7/20


100%|██████████| 56000/56000 [00:06<00:00, 8270.96it/s, train_loss=0.0993, val_loss=0.1584]


Epoch 8/20


100%|██████████| 56000/56000 [00:06<00:00, 8594.86it/s, train_loss=0.0889, val_loss=0.1468]


Epoch 9/20


100%|██████████| 56000/56000 [00:06<00:00, 8522.56it/s, train_loss=0.0796, val_loss=0.1453]


Epoch 10/20


100%|██████████| 56000/56000 [00:06<00:00, 8587.38it/s, train_loss=0.0716, val_loss=0.1414]


Epoch 11/20


100%|██████████| 56000/56000 [00:06<00:00, 8143.11it/s, train_loss=0.0647, val_loss=0.1382]


Epoch 12/20


100%|██████████| 56000/56000 [00:06<00:00, 8450.28it/s, train_loss=0.0593, val_loss=0.1351]


Epoch 13/20


100%|██████████| 56000/56000 [00:06<00:00, 8090.15it/s, train_loss=0.0539, val_loss=0.1351]


Epoch 14/20


100%|██████████| 56000/56000 [00:06<00:00, 8134.81it/s, train_loss=0.0495, val_loss=0.1340]


Epoch 15/20


100%|██████████| 56000/56000 [00:07<00:00, 7991.82it/s, train_loss=0.0455, val_loss=0.1332]


Epoch 16/20


100%|██████████| 56000/56000 [00:08<00:00, 6448.78it/s, train_loss=0.0418, val_loss=0.1333]


Epoch 17/20


100%|██████████| 56000/56000 [00:07<00:00, 7426.86it/s, train_loss=0.0383, val_loss=0.1310]


Epoch 18/20


100%|██████████| 56000/56000 [00:06<00:00, 8558.51it/s, train_loss=0.0349, val_loss=0.1322]


Epoch 19/20


100%|██████████| 56000/56000 [00:06<00:00, 8328.92it/s, train_loss=0.0326, val_loss=0.1325]


Epoch 20/20


100%|██████████| 56000/56000 [00:06<00:00, 8622.37it/s, train_loss=0.0297, val_loss=0.1344]


TypeError: FFNN.__init__() got an unexpected keyword argument 'normalization'

# Model Visualization
Implement visualization of model architecture, training history, weight distributions, and gradient distributions using matplotlib and networkx.

In [None]:
# Distribusi Bobot di tiap layer
print("Bobot model tanpa normalisasi:")
model_no_norm.plot_weight_distribution()
print("Bobot model dengan normalisasi RMS:")
model_norm.plot_weight_distribution()

# Distribusi gradient di tiap layer
print("Gradient model tanpa normalisasi:")
model_no_norm.plot_gradient_distribution()
print("Gradient model dengan normalisasi RMS:")
model_norm.plot_gradient_distribution()