<a href="https://colab.research.google.com/github/RoyEHamlin/PyTorch-Lightning-Practice-01/blob/main/MNIST_nn_GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Practice MNIST
#### from https://www.youtube.com/watch?v=OMDn66kM9Qc
#### NOTE: Select "Runtime" "Change Runtime Type" "GPU" for the following code.

In [1]:
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

In [2]:
# https://youtu.be/OMDn66kM9Qc?t=1781
# Tensor, living in the GPU 
torch.randn(5).cuda()

tensor([-1.1561, -1.2523, -0.0602,  0.1842, -1.5902], device='cuda:0')

In [3]:
# Train, Val split
train_data = datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000])
train_loader = DataLoader(train, batch_size=32)
val_loader = DataLoader(val, batch_size=32)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [4]:
# Defining model
model = nn.Sequential(
    nn.Linear(28 * 28, 64),
    nn.ReLU(), 
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Dropout(0.1),  # if we're overfitting https://youtu.be/OMDn66kM9Qc?t=1965
    nn.Linear(64, 10)
)

In [5]:
# Define a more flexible model
class ResNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(28 * 28, 64)
        self.l2 = nn.Linear(64, 64)
        self.l3 = nn.Linear(64, 10)
        self.do = nn.Dropout(0.1)

    def forward(self, x):
        h1 = nn.functional.relu(self.l1(x))
        h2 = nn.functional.relu(self.l2(h1))
        do = self.do(h2 + h1)
        logits = self.l3(do)
        return logits

model = ResNet().cuda()  # move from CPU -> GPU  # https://youtu.be/OMDn66kM9Qc?t=1984

In [6]:
# Define my optimiser
params = model.parameters()
optimiser = optim.SGD(params, lr=1e-2)

In [7]:
# Define loss
loss = nn.CrossEntropyLoss()

### Both Model and optimiser can 'zero' gradient.
#### https://youtu.be/OMDn66kM9Qc?t=712

In [11]:
# Training and validation Loop
nb_epochs = 5
for epoch in range(nb_epochs):
    losses = list()
    accuracies = list()
    model.train() # Because 'Dropout' used
    for batch in train_loader:
        x, y = batch

        # x: b x  1 x 28 x 28
        b = x.size(0)  # b = number of rows
        x = x.view(b, -1).cuda()  # -> GPU # https://youtu.be/OMDn66kM9Qc?t=2013

        # 1 forward
        l = model(x) # l: logits

        # 2 computer objective function
        J = loss (l, y.cuda()) # l = logit, y = label # Cuda # https://youtu.be/OMDn66kM9Qc?t=2016

        # 3 cleaning the gradient  # https://youtu.be/OMDn66kM9Qc?t=1235
        model.zero_grad()
        # optimiser.zero_grad()
        # params.grad.zero_()

        # 4 accumulate the partial derivatives of J wrt params
        # https://youtu.be/OMDn66kM9Qc?t=1339
        J.backward()
        # params.grad.sum_(dJ/dparams)

        # 5 step in the opposite direction of the gradient
        optimiser.step()
        # with torch.no_grad(): params = params - eta * params.grad   # long hand # https://youtu.be/OMDn66kM9Qc?t=796 (logic)


        losses.append(J.item())
        accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())
        # losses.append(J.item()) # otherwise, would run out of mem, https://youtu.be/OMDn66kM9Qc?t=1557

    # print(f'Epoch {epoch + 1}, train loss: {torch.tensor(losses).mean():.2f}')
    print(f'Epoch {epoch + 1}', end=', ')
    print(f'training loss: {torch.tensor(losses).mean():.2f}', end=', ')
    print(f'training accuracy: {torch.tensor(accuracies).mean():.2f}')

    losses = list()
    accuracies = list()
    model.eval()

    for batch in val_loader:
        x, y = batch

        # x: b x  1 x 28 x 28
        b = x.size(0)  # b = number of rows
        x = x.view(b, -1).cuda()  # x = length of each row = 28^2

        # 1 forward (no gradient) # https://youtu.be/OMDn66kM9Qc?t=1617
        with torch.no_grad():
            l = model(x) # l: logits

        # 2 computer objective function
        J = loss (l, y.cuda()) # l = logit, y = label


        losses.append(J.item()) 
        accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())
        # accuracies.append(y.eq(l.detach().argmax(dim=1)).flota().mean())

    
    print(f'Epoch {epoch + 1}', end=', ')
    print(f'validation loss: {torch.tensor(losses).mean():.2f}', end=', ')
    print(f'validation accuracy: {torch.tensor(accuracies).mean():.2f}')


        

Epoch 1, training loss: 0.83, training accuracy: 0.79
Epoch 1, validation loss: 0.41, validation accuracy: 0.89
Epoch 2, training loss: 0.37, training accuracy: 0.89
Epoch 2, validation loss: 0.33, validation accuracy: 0.90
Epoch 3, training loss: 0.30, training accuracy: 0.91
Epoch 3, validation loss: 0.28, validation accuracy: 0.92
Epoch 4, training loss: 0.26, training accuracy: 0.92
Epoch 4, validation loss: 0.24, validation accuracy: 0.93
Epoch 5, training loss: 0.23, training accuracy: 0.93
Epoch 5, validation loss: 0.22, validation accuracy: 0.94
