# MNIST Model Training with NeuroScribe

This notebook demonstrates how to train a simple convolutional neural network (CNN) on the MNIST dataset using the NeuroScribe framework.

## Overview
- **Dataset**: MNIST (handwritten digits)
- **Framework**: NeuroScribe
- **Model**: Convolutional Neural Network (CNN) with LeakyReLU activations
- **Optimizer**: Adam
- **Loss Function**: Mean Squared Error (MSE)
- **Device**: CUDA

## Steps
1. **Data Preparation**: Load and preprocess the MNIST dataset.
2. **Model Definition**: Define the CNN architecture using NeuroScribe.
3. **Training**: Train the model on the training dataset.
4. **Evaluation**: Evaluate the model's accuracy on the test dataset.
5. **Visualization**: Plot training loss and accuracy over epochs.
6. **Saving the Model**: Save the trained model for future use.

## Prerequisites
- Ensure you have installed `NeuroScribe`.
- A CUDA-compatible GPU.

## Usage
Follow the steps in this notebook to train a CNN on the MNIST dataset and visualize the results.


In [None]:
import time
import cupy as cp
import neuroscribe as ns
import neuroscribe.nn as nn
import neuroscribe.optim as optim
from neuroscribe.utils.data import data_loader, datasets
from neuroscribe.utils.data.transforms import Compose, Normalize
from neuroscribe.utils.metrics import accuracy_score
from neuroscribe.utils.plot import plot_loss, plot_accuracy


class MNIST(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1, bias=True)
        self.relu1 = nn.LeakyReLU()
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.relu2 = nn.LeakyReLU()
        self.fc1 = nn.Linear(32 * 28 * 28, 512, bias=True)
        self.relu3 = nn.LeakyReLU()
        self.fc2 = nn.Linear(512, 10, bias=False)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = x.reshape((-1, 32 * 28 * 28))
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        return x


def one_hot_encode(labels, num_classes=10):
    """Convert labels to one-hot encoding."""
    labels = labels.data.reshape(-1).astype('int32')
    return ns.tensor(cp.eye(num_classes)[labels], requires_grad=True, device='cuda')


def evaluate_accuracy(model, data_loader):
    """Evaluate the accuracy of the model on the given data loader."""
    score = 0
    num_batches = 0
    model.eval()
    for inputs, targets in data_loader:
        inputs = inputs.reshape((-1, 1, 28, 28))
        targets = one_hot_encode(targets)
        outputs = model(inputs)

        predicted = outputs.argmax(dim=1)
        actual = targets.argmax(dim=1)
        local_score = accuracy_score(actual, predicted)
        score += local_score.item()
        num_batches += 1

    return 100.0 * score / num_batches


def train(model, optimizer, criterion, inputs, targets):
    """Train the model for one batch."""
    model.train()
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()
    return loss


def main():
    # Set up data transformation
    transform = Compose([Normalize((0.1307,), (0.3081,))])

    # Load datasets
    training_data = datasets.MNIST(root="data", train=True, download=True, transform=transform)
    test_data = datasets.MNIST(root="data", train=False, download=True, transform=transform)

    # Set up data loaders
    train_data_loader = data_loader.DataLoader(training_data, batch_size=128, shuffle=True, train=True)
    test_data_loader = data_loader.DataLoader(test_data, batch_size=128, shuffle=False, train=False)

    train_data_loader.to('cuda')
    test_data_loader.to('cuda')

    # Initialize model, optimizer, and loss function
    model = MNIST()
    model.to('cuda')
    optimizer = optim.Adam(model.parameters(), lr=0.003)
    criterion = nn.MSELoss()

    # Training loop
    train_losses = []
    accuracies = []
    num_epochs = 8
    start_time = time.perf_counter()

    for epoch in range(num_epochs):
        epoch_loss = 0.0
        for inputs, targets in train_data_loader:
            inputs = inputs.reshape((-1, 1, 28, 28))
            targets = one_hot_encode(targets, num_classes=10)
            loss = train(model, optimizer, criterion, inputs, targets)
            epoch_loss += loss.item()

        average_loss = epoch_loss / len(train_data_loader)
        train_losses.append(average_loss)
        accuracy = evaluate_accuracy(model, test_data_loader)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1}/{num_epochs} - Loss: {average_loss:.4f} - Accuracy on test data: {accuracy:.2f}%')

    end_time = time.perf_counter()
    print(f"Training on {model._device} took: {end_time - start_time:.2f}s")

    # Plot loss and accuracy
    plot_loss(train_losses, title="Training Loss")
    plot_accuracy(accuracies, title="Accuracy")

    # Save the trained model
    ns.save(model, filename="mnist")


if __name__ == '__main__':
    main()