In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets,transforms
%matplotlib inline

transformation = transforms.Compose([
    transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))
    ])

train_dataset = datasets.MNIST('data/', train=True, transform=transformation, download=True)
test_dataset = datasets.MNIST('data/', train=False, transform=transformation, download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)

class Net(nn.Module):
    """
    A simple convolutional neural network (CNN) model for image classification.

    This network consists of two convolutional layers followed by two fully connected layers. It includes dropout for regularization.

    Methods:
        forward(x):
            Defines the forward pass of the network.
    """

    def __init__(self):
        """
        Initializes the network layers.
        """

        super().__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5, stride=1, padding=0)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5, stride=1, padding=0)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320,50)
        self.fc2 = nn.Linear(50,10)

    def forward(self, x):
        """
        Defines the forward pass of the network.

        Args:
            x (torch.Tensor): Input tensor with shape (N, 1, H, W), where N is the batch size, 
                              and H and W are the height and width of the input images.

        Returns:
            torch.Tensor: The output tensor with shape (N, 10), representing the log probabilities for each class.
        """

        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1,320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x,dim=1)

def fit(epoch, model, data_loader, phase='training'):
    """
    Trains or evaluates the model for one epoch.

    Depending on the phase, the function either trains the model or evaluates it on the validation set.

    Args:
        epoch (int): The current epoch number.
        model (torch.nn.Module): The model to be trained or evaluated.
        data_loader (torch.utils.data.DataLoader): DataLoader providing the dataset for the current phase.
        phase (str, optional): Specifies the phase of operation. 'training' for training and 'validation' for evaluation. Defaults to 'training'.

    Returns:
        tuple: A tuple containing:
            - loss (float): The average loss for the epoch.
            - accuracy (float): The accuracy percentage for the epoch.

    """
    
    if phase == 'training':
        model.train()
    if phase == 'validation':
        model.eval()

    running_loss = 0.0
    running_correct = 0

    with torch.no_grad() if phase == 'validation' else torch.enable_grad():
        for batch_idx, (data, target) in enumerate(data_loader):
            data, target = Variable(data), Variable(target)
            if phase == 'training':
                optimizer.zero_grad()

            output = model(data)
            loss = F.nll_loss(output, target)
            running_loss += F.nll_loss(output, target, reduction='mean').item()
            preds = output.data.max(dim=1, keepdim=True)[1]
            running_correct += preds.eq(target.data.view_as(preds)).cpu().sum()

            if phase == 'training':
                loss.backward()
                optimizer.step()

    loss = running_loss / len(data_loader.dataset)
    accuracy = 100.0 * running_correct.item() / len(data_loader.dataset)

    print(f'{phase} loss is {loss:{5}.{2}} and {phase} accuracy is {running_correct}/{len(data_loader.dataset)} {accuracy:{10}.{4}}')
    return loss,accuracy

model = Net()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
train_losses,train_accuracy = [],[]
val_losses,val_accuracy = [],[]

for epoch in range(1,20):
    train_epoch_loss,train_epoch_accuracy = fit(epoch, model, train_loader, phase='training')
    val_epoch_loss,val_epoch_accuracy = fit(epoch, model, test_loader,phase='validation')
    train_losses.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    val_losses.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)

plt.plot(range(1,len(train_losses)+1), train_losses, 'bo', label='training')
plt.plot(range(1,len(val_losses)+1), val_losses, 'r', label='validation')
plt.title('Loss')
plt.legend()

plt.plot(range(1,len(train_accuracy)+1), train_accuracy, 'bo', label='training')
plt.plot(range(1,len(val_accuracy)+1), val_accuracy, 'r', label='validation')
plt.title('Accuracy')
plt.legend()
