<a href="https://colab.research.google.com/github/MichalBalcerak/ML24-25/blob/main/Homework10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Training LeNet5 model

In [1]:
import torch
import torchvision
from matplotlib import pyplot

transform = torchvision.transforms.Compose(
    [ torchvision.transforms.ToTensor(), #Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
      torchvision.transforms.Normalize((0.1307), (0.3081))])

trainset = torchvision.datasets.MNIST(root='./data',
                                      train=True,
                                      download=True,
                                      transform=transform)

trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=2048,
                                          shuffle=True)   #we do shuffle it to give more randomizations to training epochs

testset = torchvision.datasets.MNIST(root='./data',
                                     train=False,
                                     download=True,
                                     transform=transform)

testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=1,
                                         shuffle=False)

100%|██████████| 9.91M/9.91M [00:00<00:00, 11.5MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 339kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 2.69MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 5.12MB/s]


In [2]:
import torch.nn as nn
import torch.nn.functional as F

class LeNet5(torch.nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = torch.nn.Conv2d(in_channels= 1, out_channels= 6, kernel_size = 5)
        self.conv2 = torch.nn.Conv2d(in_channels= 6, out_channels= 16, kernel_size = 5)
        self.conv3 = torch.nn.Conv2d(in_channels= 16, out_channels= 120, kernel_size = 4)

        self.relu = torch.nn.ReLU()

        self.avgpool = torch.nn.AvgPool2d(kernel_size= 2, stride = 2)

        self.linear1 = torch.nn.Linear(120, 80)
        self.linear2 = torch.nn.Linear(80,10)

        self.dropout = torch.nn.Dropout(0.05)

    def forward(self, x):

        x = self.relu(self.conv1(x))     # B, 6, 24, 24
        x = self.avgpool(x)               # B, 6, 12, 12
        x = self.relu(self.conv2(x))      # B, 16, 8, 8
        x = self.avgpool(x)               # B, 16, 4, 4
        x = self.relu(self.conv3(x))      # B, 120, 1, 1

        x = x.squeeze(-1).squeeze(-1)     # B, 120

        x = self.relu(self.linear1(x))    # B, 80
        x = self.linear2(x)               # B, 10


        x = self.dropout(x)
        return x

In [3]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Working on {device}")

net = LeNet5().to(device)
optimizer = torch.optim.Adam(net.parameters(), 0.001)   #initial and fixed learning rate of 0.001.

net.train()    #it notifies the network layers (especially batchnorm or dropout layers, which we don't use in this example) that we are doing traning
for epoch in range(16):  #  an epoch is a training run through the whole data set

    for batch, data in enumerate(trainloader):
        batch_inputs, batch_labels = data

        batch_inputs = batch_inputs.to(device)  #explicitly moving the data to the target device
        batch_labels = batch_labels.to(device)

        optimizer.zero_grad()

        batch_outputs = net(batch_inputs)   #this line calls the forward(self, x) method of the LeNet5 object. Please note,
                                            # the nonlinear activation after the last layer is NOT applied
        loss = torch.nn.functional.cross_entropy(batch_outputs, batch_labels, reduction = "mean") #instead, nonlinear softmax is applied internally in THIS loss function
        print("epoch:", epoch, "batch:", batch, "current batch loss:", loss.item())
        loss.backward()       #this computes gradients as we have seen in previous workshops
        optimizer.step()     #but this line in fact updates our neural network.
                                ####You can experiment - comment this line and check, that the loss DOE

Working on cpu
epoch: 0 batch: 0 current batch loss: 2.3044567108154297
epoch: 0 batch: 1 current batch loss: 2.2964251041412354
epoch: 0 batch: 2 current batch loss: 2.2890684604644775
epoch: 0 batch: 3 current batch loss: 2.2800509929656982
epoch: 0 batch: 4 current batch loss: 2.2729737758636475
epoch: 0 batch: 5 current batch loss: 2.259610652923584
epoch: 0 batch: 6 current batch loss: 2.242342948913574
epoch: 0 batch: 7 current batch loss: 2.224454641342163
epoch: 0 batch: 8 current batch loss: 2.202317237854004
epoch: 0 batch: 9 current batch loss: 2.1775426864624023
epoch: 0 batch: 10 current batch loss: 2.1499242782592773
epoch: 0 batch: 11 current batch loss: 2.1157116889953613
epoch: 0 batch: 12 current batch loss: 2.0781326293945312
epoch: 0 batch: 13 current batch loss: 2.0337820053100586
epoch: 0 batch: 14 current batch loss: 1.9712457656860352
epoch: 0 batch: 15 current batch loss: 1.9188660383224487
epoch: 0 batch: 16 current batch loss: 1.8578848838806152
epoch: 0 batc

In [4]:
good = 0
wrong = 0

net.eval()              #it notifies the network layers (especially batchnorm or dropout layers, which we don't use in this example) that we are doing evaluation
with torch.no_grad():   #it prevents that the net learns during evalution. The gradients are not computed, so this makes it faster, too
    for batch, data in enumerate(testloader): #batches in test are of size 1
        datapoint, label = data

        prediction = net(datapoint.to(device))                  #prediction has values representing the "prevalence" of the corresponding class
        classification = torch.argmax(prediction)    #the class is the index of maximal "prevalence"

        if classification.item() == label.item():
            good += 1
        else:
            wrong += 1

print("accuracy = ", good/(good+wrong))

accuracy =  0.9786
