# MNIST Full Pipeline

End-to-end MNIST digit classification using our from-scratch neural network:

1. Load & explore data
2. Build network
3. Train with Adam
4. Evaluate & visualize
5. Compare with sklearn (validation only)

In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from pathlib import Path

from src.core.activations import ReLU, Softmax
from src.core.layer import DenseLayer
from src.core.losses import CrossEntropyLoss
from src.core.optimizers import Adam
from src.network.sequential import Sequential
from src.network.model import Model
from src.utils.data_utils import one_hot_encode
from src.utils.metrics import accuracy, confusion_matrix as compute_cm
from src.utils.visualization import (
    plot_training_curves, plot_confusion_matrix, plot_weight_distributions
)

output_dir = Path('..') / 'outputs' / 'plots'
output_dir.mkdir(parents=True, exist_ok=True)

## 1. Load MNIST

In [None]:
from examples.mnist_example import load_mnist

X_train, Y_train_int, X_test, Y_test_int = load_mnist()

print(f'Train: X={X_train.shape}, Y={Y_train_int.shape}')
print(f'Test:  X={X_test.shape}, Y={Y_test_int.shape}')
print(f'Pixel range: [{X_train.min():.1f}, {X_train.max():.1f}]')
print(f'Classes: {np.unique(Y_train_int)}')

## 2. Explore Data

In [None]:
fig, axes = plt.subplots(2, 5, figsize=(12, 5))
for i, ax in enumerate(axes.ravel()):
    ax.imshow(X_train[i].reshape(28, 28), cmap='gray')
    ax.set_title(f'Label: {Y_train_int[i]}')
    ax.axis('off')
fig.suptitle('Sample MNIST Digits', fontsize=14)
fig.tight_layout()
fig.savefig(output_dir / 'mnist_samples.png', dpi=150)
plt.close(fig)
print('Saved mnist_samples.png')

## 3. Build & Train Network

Architecture: `784 → 256(ReLU) → 128(ReLU) → 10(Softmax)`

In [None]:
Y_train = one_hot_encode(Y_train_int, 10)
Y_test = one_hot_encode(Y_test_int, 10)

network = Sequential(
    DenseLayer(784, 256, activation=ReLU(), seed=42),
    DenseLayer(256, 128, activation=ReLU(), seed=43),
    DenseLayer(128, 10,  activation=Softmax(), seed=44),
)

model = Model(
    network=network,
    loss_fn=CrossEntropyLoss(),
    optimizer=Adam(lr=0.001),
)

print(network.summary())

In [None]:
history = model.fit(
    X_train, Y_train,
    epochs=10,
    batch_size=128,
    X_val=X_test, Y_val=Y_test,
    verbose=True,
)

## 4. Evaluate

In [None]:
Y_pred = np.argmax(model.predict(X_test), axis=1)
acc = accuracy(Y_test_int, Y_pred)
print(f'Test Accuracy: {acc:.2%}')

# Training curves
plot_training_curves(history, save_path=output_dir / 'nb_mnist_training.png', title='MNIST Training')

# Confusion matrix
cm = compute_cm(Y_test_int, Y_pred, n_classes=10)
plot_confusion_matrix(cm, save_path=output_dir / 'nb_mnist_confusion.png', title='MNIST Confusion Matrix')

# Weight distributions
plot_weight_distributions(network.layers, save_path=output_dir / 'nb_mnist_weights.png')

print('All plots saved.')

## 5. Compare with sklearn (Validation Only)

We use sklearn's MLPClassifier to compare — this is purely for
validation, not part of our framework.

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Use a subset for speed
n_sub = 10000
clf = MLPClassifier(
    hidden_layer_sizes=(256, 128),
    activation='relu',
    solver='adam',
    learning_rate_init=0.001,
    max_iter=10,
    random_state=42,
    verbose=True,
)
clf.fit(X_train[:n_sub], Y_train_int[:n_sub])
sk_pred = clf.predict(X_test)
sk_acc = accuracy_score(Y_test_int, sk_pred)

print(f'\nOur framework accuracy:  {acc:.2%}')
print(f'sklearn MLP accuracy:    {sk_acc:.2%}')
print(f'Difference:              {abs(acc - sk_acc):.2%}')

## Summary

Our from-scratch neural network achieves competitive accuracy on MNIST
compared to sklearn's optimized implementation — validating that our
backpropagation, activations, and optimizer implementations are correct.