## MNIST Classification: Comprehensive Model Comparison ##


This notebook provides a comprehensive comparison of three different approaches to MNIST digit classification:

1. **Baseline Classifier**: Average pixel values + cosine similarity
2. **Sequential Neural Network**: From-scratch NumPy implementation
3. **PyTorch Neural Network**: Modern deep learning framework

Each model is trained and evaluated with consistent metrics, visualizations, and analysis.

### Setup and Imports


In [4]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
from typing import Dict, List, Tuple

# Import our custom modules
import sys
sys.path.append('..')

from src.datasets import load_mnist_data, create_small_dataset
from src.baseline import BaselineClassifier
from src.sequential_nn import SequentialNNClassifier, DenseLayer, ActivationLayer
from src.torch_nn import TorchNNClassifier
from src.utils import plot_digit, plot_confusion_matrix, plot_sample_errors

# Set up plotting
plt.style.use('default')
sns.set_palette("husl")
np.random.seed(42)

print("🚀 All imports successful!")

🚀 All imports successful!


In [None]:
# Load MNIST dataset and prepare for model comparison
print("🎯 COMPREHENSIVE MNIST MODEL COMPARISON")
print("=" * 60)

# Load data
X_train, y_train, X_val, y_val, X_test, y_test = load_mnist_data(
    normalize=True, flatten=True, one_hot_labels=False, random_state=42
)

# Results storage
results = {'models': {}, 'metrics': {}, 'times': {}}

print(f"Dataset loaded - Train: {len(X_train)}, Test: {len(X_test)} samples")


In [None]:
# 1. BASELINE CLASSIFIER - Simple yet effective approach
print("\n🎯 Training Baseline Classifier...")
print("Method: Average pixel values per digit + cosine similarity")

start_time = time.time()
baseline = BaselineClassifier(normalize_centroids=True)
baseline.train(X_train, y_train)
baseline_time = time.time() - start_time

# Evaluate on test set
baseline_acc, baseline_cm = baseline.evaluate(X_test, y_test)

# Store results
results['models']['baseline'] = baseline
results['metrics']['baseline'] = baseline_acc  
results['times']['baseline'] = baseline_time

print(f"✅ Baseline Classifier Results:")
print(f"   • Test Accuracy: {baseline_acc:.4f} ({baseline_acc*100:.2f}%)")
print(f"   • Training Time: {baseline_time:.2f} seconds")
print(f"   • Method: Computes centroid (average) for each digit class")
print(f"   • Classification: Cosine similarity to nearest centroid")


In [None]:
# 2. SEQUENTIAL NEURAL NETWORK - From scratch implementation
print("\n🧠 Training Sequential Neural Network...")
print("Architecture: 784 → 392 → 196 → 10 (ReLU + Softmax)")

# Build the network architecture
sequential = SequentialNNClassifier(random_state=42)
sequential.add(DenseLayer(784, 392))
sequential.add(ActivationLayer(392, 'relu'))
sequential.add(DenseLayer(392, 196))
sequential.add(ActivationLayer(196, 'relu'))
sequential.add(DenseLayer(196, 10))
sequential.add(ActivationLayer(10, 'softmax'))

print(f"Network: {sequential.describe()}")

# Prepare one-hot encoded labels for training
y_train_onehot = one_hot_encode(y_train)

# Train the network (reduced epochs for demo speed)
start_time = time.time()
sequential.train(
    X_train, y_train_onehot, 
    epochs=5,  # Reduced for faster demo
    batch_size=100, 
    learning_rate=0.1,
    verbose=True
)
sequential_time = time.time() - start_time

# Evaluate on test set
sequential_acc, sequential_cm = sequential.evaluate(X_test, y_test)

# Store results
results['models']['sequential'] = sequential
results['metrics']['sequential'] = sequential_acc
results['times']['sequential'] = sequential_time

print(f"\n✅ Sequential NN Results:")
print(f"   • Test Accuracy: {sequential_acc:.4f} ({sequential_acc*100:.2f}%)")
print(f"   • Training Time: {sequential_time:.2f} seconds")
print(f"   • Implementation: Pure NumPy with backpropagation")
print(f"   • Layers: Dense → ReLU → Dense → ReLU → Dense → Softmax")


In [None]:
# 3. PYTORCH NEURAL NETWORK - Modern deep learning framework
print("\n🔥 Training PyTorch Neural Network...")
print("Framework: PyTorch with GPU acceleration (if available)")

# Create PyTorch model
torch_model = TorchNNClassifier(random_state=42)

# Train the model (reduced epochs for demo speed)
start_time = time.time()
torch_model.train(
    X_train, y_train, 
    epochs=5,  # Reduced for faster demo
    batch_size=64, 
    learning_rate=0.001,
    verbose=True
)
torch_time = time.time() - start_time

# Evaluate on test set
torch_acc, torch_cm = torch_model.evaluate(X_test, y_test)

# Store results
results['models']['torch'] = torch_model
results['metrics']['torch'] = torch_acc
results['times']['torch'] = torch_time

print(f"\n✅ PyTorch NN Results:")
print(f"   • Test Accuracy: {torch_acc:.4f} ({torch_acc*100:.2f}%)")
print(f"   • Training Time: {torch_time:.2f} seconds")
print(f"   • Framework: PyTorch with Adam optimizer")
print(f"   • Features: GPU support, automatic differentiation")


In [None]:
# COMPREHENSIVE COMPARISON TABLE
print("\n📊 FINAL RESULTS SUMMARY")
print("=" * 80)
print(f"{'Model':<20} {'Accuracy':<12} {'Training Time':<15} {'Architecture':<25}")
print("-" * 80)

model_names = {
    'baseline': 'Baseline',
    'sequential': 'Sequential NN',
    'torch': 'PyTorch NN'
}

architectures = {
    'baseline': 'Cosine Similarity',
    'sequential': '784→392→196→10 (NumPy)',
    'torch': '784→392→196→10 (PyTorch)'
}

for model_key in ['baseline', 'sequential', 'torch']:
    accuracy = results['metrics'][model_key]
    train_time = results['times'][model_key]
    
    print(f"{model_names[model_key]:<20} {accuracy:.4f} ({accuracy*100:5.2f}%) {train_time:8.2f}s      {architectures[model_key]:<25}")

print("=" * 80)

# VISUAL COMPARISON
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Accuracy comparison
models = list(results['metrics'].keys())
accuracies = [results['metrics'][m] for m in models]
model_labels = [model_names[m] for m in models]

bars1 = axes[0].bar(model_labels, accuracies, color=['skyblue', 'lightgreen', 'lightcoral'])
axes[0].set_title('Model Accuracy Comparison', fontsize=14, fontweight='bold')
axes[0].set_ylabel('Test Accuracy')
axes[0].set_ylim(0, 1)

# Add value labels on bars
for i, (bar, acc) in enumerate(zip(bars1, accuracies)):
    axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
                f'{acc:.3f}\\n({acc*100:.1f}%)', ha='center', va='bottom', fontweight='bold')

# Training time comparison
times = [results['times'][m] for m in models]
bars2 = axes[1].bar(model_labels, times, color=['skyblue', 'lightgreen', 'lightcoral'])
axes[1].set_title('Training Time Comparison', fontsize=14, fontweight='bold')
axes[1].set_ylabel('Training Time (seconds)')

# Add value labels on bars
for i, (bar, time_val) in enumerate(zip(bars2, times)):
    axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5, 
                f'{time_val:.1f}s', ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.show()


In [None]:
print("🎉 MNIST Classification Analysis Complete!")
print("")
print("This notebook demonstrated three different approaches to machine learning:")
print("1. Simple statistical methods (baseline)")
print("2. From-scratch neural network implementation")  
print("3. Modern deep learning frameworks")
print("")
print("Each approach has its own strengths and use cases in the ML toolkit.")
print("Happy learning! 🤖")

# Final comparison of key metrics
print(f"\n📈 FINAL SCORECARD:")
print(f"🥇 Highest Accuracy: {model_names[max(results['metrics'], key=results['metrics'].get)]} ({max(results['metrics'].values()):.3f})")
print(f"⚡ Fastest Training: {model_names[min(results['times'], key=results['times'].get)]} ({min(results['times'].values()):.1f}s)")
print(f"🎯 Best Balance: Sequential NN (good accuracy + educational value)")

# Show the professional repository structure we created
print(f"\n🏗️  Professional Repository Structure Created:")
print(f"   ✅ Modular src/ package with baseline, sequential_nn, torch_nn")
print(f"   ✅ Comprehensive tests with pytest")
print(f"   ✅ Clean configuration (.flake8, requirements.txt)")
print(f"   ✅ Professional README with usage examples")
print(f"   ✅ This summary notebook for easy comparison")
print(f"\n   Ready for production, research, and learning! 🚀")
