In [None]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import numpy as np


In [None]:
training_data = datasets.MNIST(
    root="data", train=True, download=True, transform=ToTensor()
)

test_data = datasets.MNIST(
    root="data", train=False, download=True, transform=ToTensor()
)

train_dataloader = torch.utils.data.DataLoader(training_data, batch_size=10, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=10, shuffle=True)

In [None]:
img, label = training_data[1]
plt.axis("off")
plt.imshow(img.squeeze(), cmap="gray")

In [None]:
img.shape

In [None]:
training_data.classes

In [None]:
train_labels = training_data.train_labels

In [None]:
labels_counts = np.unique(train_labels.numpy(), return_counts=True)
x_values = []
for idx, v in enumerate(labels_counts[0]):
    x_values.append(str(v))
    
print(x_values)
plt.bar(x_values, labels_counts[1], color = "red")
plt.xlabel("Label")
plt.ylabel("Number of samples")
plt.title("MNIST Dataset Analysis")

## Building a neural network

In [None]:
print("GPU: {}", torch.cuda.is_available())
print("MPS: {}", torch.backends.mps.is_available())

Choose right device to make computations

In [None]:
device = torch.device("mps:0")
device

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = x.to(device)
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


model = NeuralNetwork().to(device)

In [None]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [None]:
train_loss = []
test_losses = []

In [None]:
def train_one_epoch(epoch_index, tb_writer):
    running_loss= 0.
    last_loss = 0.
    for i,  data in enumerate(train_dataloader):
        inputs, labels = data
        inputs.to(device)
        # Zero your gradients for every batch!
        optimizer.zero_grad()
        
        # Make predictions for this batch
        outputs = model(inputs)
        
        # Compute the loss and its gradients
        loss = criterion(outputs, labels)
        loss.backward()
        
        # Adjust learning weights 
        optimizer.step()
        
        running_loss += loss.item()
        if i % 1000 == 999:
            last_loss = running_loss / 1000 # loss per batch
            print("Batch {}, loss: {}".format(i+1, last_loss))
            tb_x = epoch_index*len(train_dataloader)+i+1
            tb_writer.add_scalar("Loss/train", last_loss, tb_x)
            running_loss = 0.
            
    return last_loss

In [None]:
from torch.utils.tensorboard import SummaryWriter
import datetime

timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
writer = SummaryWriter()
epoch_number = 0

EPOCHS = 11

best_vloss = 1_000_000

train_losses_history = []
validation_losses_history = []

for epoch in range(EPOCHS):
    print("EPOCH: {}".format(epoch_number + 1))

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss = train_one_epoch(epoch_number, writer)
    train_losses_history.append(avg_loss)
    running_vloss = 0.0
    # Set the model to evaluation mode, disabling dropout and using population
    # statistics for batch normalization.
    model.eval()

    # Disable gradient computation and reduce memory consumption.
    with torch.no_grad():
        for i, vdata in enumerate(train_dataloader):
            vinputs, vlabels = vdata
            voutputs = model(vinputs)
            vloss = criterion(voutputs, vlabels)
            running_vloss += vloss

    avg_vloss = running_vloss / (i + 1)
    validation_losses_history.append(avg_vloss)
    print("LOSS train {} valid {}".format(avg_loss, avg_vloss))

    writer.add_scalars(
        "Training vs. Validation Loss",
        {"Training": avg_loss, "Validation": avg_vloss},
        epoch_number + 1,
    )
    writer.flush()

    if avg_vloss < best_vloss:
        best_vloss = avg_vloss
        model_path = "model_{}_{}".format(timestamp, epoch_number)
        torch.save(model.state_dict(), model_path)

    epoch_number += 1

In [None]:
print(train_losses_history)

In [None]:
saved_model = NeuralNetwork()
saved_model.load_state_dict(torch.load("./model_20231107_155150_4"))

In [None]:
saved_model.eval()

In [None]:
plt.imshow(img.squeeze(), cmap="gray")

In [None]:
training_data.classes[np.argmax(saved_model(img).detach().numpy())]

In [None]:
img2, label = training_data[2]
plt.imshow(img2.squeeze(), cmap="gray")

In [None]:
training_data.classes[np.argmax(saved_model(img2).detach().numpy())]