# CINIC-10 MLP Training Notebook

This notebook demonstrates training a Multi-Layer Perceptron (MLP) on the CINIC-10 dataset.

## Mathematical Foundation

The MLP processes flattened images through fully connected layers:
- **Input**: 32×32×3 = 3072 features (flattened RGB image)
- **Hidden layers**: Apply linear transformation followed by ReLU activation
- **Forward pass**: `y = f(Wx + b)` where `f` is ReLU
- **Loss**: Cross-entropy loss for multi-class classification
- **Optimization**: Adam optimizer with backpropagation

In [None]:
# Import required libraries
import sys
import os
sys.path.append('..')

import torch
import torch.nn as nn
import yaml
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

# Import project modules
from src.models.mlp import MLP
from src.data.dataset import CINIC10DataModule
from src.training.trainer import ModelTrainer
from src.training.evaluator import ModelEvaluator
from src.utils.export import ModelExporter

print("Libraries imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

## 1. Configuration and Setup

In [None]:
# Load configuration
with open('../configs/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Set random seeds for reproducibility
torch.manual_seed(config['seed'])
np.random.seed(config['seed'])
if torch.cuda.is_available():
    torch.cuda.manual_seed(config['seed'])

print("Configuration loaded and random seeds set")

## 2. Data Setup

Load and prepare the CINIC-10 dataset with appropriate preprocessing for MLP models.

In [None]:
# Initialize data module
data_module = CINIC10DataModule(
    data_dir=config['dataset']['data_dir'],
    batch_size=config['data_loader']['batch_size'],
    num_workers=config['data_loader']['num_workers'],
    pin_memory=config['data_loader']['pin_memory'],
    validation_split=config['data_loader']['validation_split'],
    seed=config['seed']
)

# Setup data loaders
print("Setting up data loaders...")
data_loaders = data_module.setup_data_loaders(use_augmentation=True)

# Display dataset information
dataset_info = data_module.get_dataset_info()
print("\nDataset Information:")
for key, value in dataset_info.items():
    if key != 'class_names':
        print(f"  {key}: {value}")

print(f"\nClass names: {dataset_info['class_names']}")

In [None]:
# Visualize sample data
print("Visualizing sample data...")
data_module.visualize_samples(num_samples=8, split="train")

## 3. MLP Model Architecture

Create and examine the MLP model architecture.

In [None]:
# Create MLP model
mlp_model = MLP(
    input_size=3072,  # 32 * 32 * 3 (flattened CINIC-10 image)
    hidden_layers=config['models']['mlp']['hidden_layers'],
    num_classes=config['dataset']['num_classes'],
    dropout=config['models']['mlp']['dropout'],
    activation=config['models']['mlp']['activation']
)

# Display model information
print("MLP Model Architecture:")
print(mlp_model.summary())

print("\nDetailed Model Information:")
model_info = mlp_model.get_model_info()
for key, value in model_info.items():
    if key != 'mathematical_foundation':
        print(f"  {key}: {value}")

print("\nMathematical Foundation:")
for key, value in model_info['mathematical_foundation'].items():
    print(f"  {key}: {value}")

## 4. Training Process

Train the MLP model with comprehensive monitoring and visualization.

In [None]:
# Initialize trainer
mlp_trainer = ModelTrainer(
    model=mlp_model,
    device=device,
    config=config,
    experiment_name="MLP_CINIC10"
)

print("Starting MLP training...")
print(f"Training for {config['training']['epochs']} epochs")
print(f"Learning rate: {config['training']['learning_rate']}")
print(f"Optimizer: {config['training']['optimizer']}")
print(f"Scheduler: {config['training']['scheduler']}")

In [None]:
# Train the model
training_history = mlp_trainer.train(
    train_loader=data_loaders['train'],
    val_loader=data_loaders['val'],
    save_checkpoints=True
)

print("Training completed!")
print(f"Best validation accuracy: {mlp_trainer.best_val_acc:.2f}%")

In [None]:
# Plot training history
mlp_trainer.plot_training_history(save_plot=True)

## 5. Model Evaluation

Comprehensive evaluation of the trained MLP model.

In [None]:
# Initialize evaluator
evaluator = ModelEvaluator(
    class_names=data_module.class_names,
    device=device,
    save_dir="./mlp_evaluation_results"
)

# Evaluate the model
print("Evaluating MLP model...")
mlp_results = evaluator.evaluate_model(
    model=mlp_model,
    test_loader=data_loaders['test'],
    model_name="MLP"
)

# Display overall results
print("\nMLP Evaluation Results:")
print(f"Overall Accuracy: {mlp_results['overall_metrics']['accuracy']:.2f}%")
print(f"Top-2 Accuracy: {mlp_results['overall_metrics']['top2_accuracy']:.2f}%")
print(f"Top-3 Accuracy: {mlp_results['overall_metrics']['top3_accuracy']:.2f}%")
print(f"Macro F1-Score: {mlp_results['overall_metrics']['macro_f1']:.2f}%")
print(f"Weighted F1-Score: {mlp_results['overall_metrics']['weighted_f1']:.2f}%")

In [None]:
# Plot confusion matrix
evaluator.plot_confusion_matrix(mlp_results, normalize=True, save_plot=True)

In [None]:
# Per-class performance analysis
print("Per-Class Performance:")
per_class = mlp_results['per_class_metrics']

for i, class_name in enumerate(per_class['class_names']):
    print(f"{class_name:12s}: Acc={per_class['accuracy'][i]:5.1f}% | "
          f"Prec={per_class['precision'][i]:5.1f}% | "
          f"Rec={per_class['recall'][i]:5.1f}% | "
          f"F1={per_class['f1_score'][i]:5.1f}% | "
          f"Support={per_class['support'][i]:4d}")

## 6. Model Export for Deployment

Export the trained model in multiple formats for production deployment.

In [None]:
# Initialize model exporter
exporter = ModelExporter(export_dir="../exported_models")

# Export model in all formats
print("Exporting MLP model...")
export_results = exporter.export_all_formats(
    model=mlp_model,
    model_name="MLP",
    input_shape=(1, 3, 32, 32),
    config={
        'onnx': {'opset_version': 11, 'verify': True},
        'torchscript': {'method': 'trace', 'verify': True},
        'state_dict': {'include_metadata': True}
    }
)

# Display export results
print("\nExport Results:")
for format_name, result in export_results['exports'].items():
    if 'error' not in result:
        print(f"{format_name.upper():15s}: ✓ Success - {result['file_size_mb']:.2f} MB")
        if 'verification' in result and result['verification']['verified']:
            print(f"{'':15s}  Verification: ✓ Outputs match (max diff: {result['verification'].get('max_difference', 'N/A')})")
    else:
        print(f"{format_name.upper():15s}: ✗ Failed - {result['error']}")

## 7. Analysis and Insights

Analyze the MLP model's performance and characteristics.

In [None]:
# Model analysis
print("MLP Model Analysis:")
print("=" * 50)

print("\n1. Architecture Characteristics:")
print(f"   - Input features: 3,072 (32×32×3 flattened)")
print(f"   - Hidden layers: {config['models']['mlp']['hidden_layers']}")
print(f"   - Total parameters: {mlp_model.count_parameters():,}")
print(f"   - Model size: {mlp_model.get_parameter_size_mb():.2f} MB")

print("\n2. Performance Summary:")
print(f"   - Test accuracy: {mlp_results['overall_metrics']['accuracy']:.2f}%")
print(f"   - Best validation accuracy: {mlp_trainer.best_val_acc:.2f}%")
print(f"   - Training epochs: {len(training_history['train_loss'])}")

if 'inference_time' in mlp_results:
    print(f"   - Average inference time: {mlp_results['inference_time']['mean_ms']:.2f} ms")

print("\n3. Mathematical Insights:")
print("   - Flattens spatial information into 1D vector")
print("   - Relies on global patterns rather than local features")
print("   - Uses ReLU activation for non-linearity")
print("   - Dropout regularization prevents overfitting")

# Find best and worst performing classes
accuracies = mlp_results['per_class_metrics']['accuracy']
best_class_idx = np.argmax(accuracies)
worst_class_idx = np.argmin(accuracies)

print("\n4. Class Performance:")
print(f"   - Best class: {data_module.class_names[best_class_idx]} ({accuracies[best_class_idx]:.1f}%)")
print(f"   - Worst class: {data_module.class_names[worst_class_idx]} ({accuracies[worst_class_idx]:.1f}%)")
print(f"   - Performance range: {max(accuracies) - min(accuracies):.1f}%")

## 8. Save Results and Summary

Save all results for comparison with CNN model.

In [None]:
# Generate comprehensive report
report = evaluator.generate_report([mlp_results], save_report=True)
print("Comprehensive evaluation report:")
print(report)

# Save training summary
training_summary = mlp_trainer.get_summary()
print("\nTraining Summary:")
for key, value in training_summary.items():
    if key != 'config':
        print(f"  {key}: {value}")

print("\n" + "="*50)
print("MLP training and evaluation completed successfully!")
print("Next: Run CNN training notebook for comparison")
print("="*50)