<a href="https://colab.research.google.com/github/ChiaraSolito/ComputerVision/blob/main/exercises1_lab02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lab 02 - 1 #

## Image classification on the Fashion-MNIST dataset using a ResNet-18 ##

**1**

* Create a custom `FMnistResNet18` class in which:
    * Download the pre-trained ResNet-18
    * Change the first and last layers

Specifically, the input and output layers of a pre-trained ResNet-18 need to be changed, since ResNet was originally designed for ImageNet competition, which was a color (3-channel) image classification task with 1000 classes. Fashon-MNIST, on the other hand, only contains 10 classes, and it’s images are in the grayscale (*i.e.*,1-channel).

In [1]:
import torch
import torch.optim
import torchvision

import torch.nn as nn
import torchvision.models as models

# from tqdm import tqdm
from tqdm.notebook import tqdm
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.models import ResNet18_Weights


# Hyperparameters.
LR = 3e-4
EPOCH = 5
BATCH_SIZE = 50
DEVICE = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [2]:
class FMnistResNet18(nn.Module):
    def __init__(self, in_channels=1):
        super(FMnistResNet18, self).__init__()

        # Load the pre-trained ResNet-18 model from torchvision.models.
        self.model = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)

        # Change the input layer to take grayscale images, instead of RGB images.
        self.model.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)

        # Change the output layer to output 10 classes instead of 1000 classes.
        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Linear(num_ftrs, 10)

    def forward(self, x):
        return self.model(x)

# Test the network, and verify the layers.
test_my_resnet = FMnistResNet18()
# [N, C, H, W]: batch N, channels C, depth D, height H, width W.
dummy_input = torch.randn((32, 1, 244, 244))
output = test_my_resnet(dummy_input)
print(output.shape)

# print(test_my_resnet)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

torch.Size([32, 10])


**2**

* Create DataLoaders
    * Hint: https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
* Define the model
* Define the loss function
* Define the optimizer

In [3]:
# Dataset.
fashion_mnist = torchvision.datasets.FashionMNIST(download=True,
                                                  train=True,
                                                  root=".").train_data.float()

# Transformations.
data_transform = transforms.Compose([transforms.Resize((224, 224)),
                                     transforms.ToTensor(),
                                     transforms.Normalize((fashion_mnist.mean()/255,), (fashion_mnist.std()/255,))])

# DataLoaders.
train_loader = DataLoader(torchvision.datasets.FashionMNIST(download=True, root=".", transform=data_transform, train=True),
                          batch_size=BATCH_SIZE, shuffle=True)
print(f"Fashion-MNIST - train: {len(train_loader) * BATCH_SIZE}")

test_loader = DataLoader(torchvision.datasets.FashionMNIST(download=True, root=".", transform=data_transform, train=False),
                         batch_size=BATCH_SIZE, shuffle=False)
print(f"Fashion-MNIST - test: {len(test_loader) * BATCH_SIZE}")

# Define the model.
model = FMnistResNet18().to(DEVICE)

# Define the loss function.
criterion = nn.CrossEntropyLoss()

# Define the optimizer.
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting ./FashionMNIST/raw/train-images-idx3-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting ./FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting ./FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting ./FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/raw





Fashion-MNIST - train: 60000
Fashion-MNIST - test: 10000


**3**

* Write the training step
    * Hint: https://pytorch.org/tutorials/beginner/introyt/trainingyt.html

In [None]:
losses = []

# Training step.
print(f"Start training on {DEVICE} [...]")

for e in range(EPOCH):
    e_loss = 0.0

    for i, data in (tepoch := tqdm(enumerate(train_loader), unit="batch", total=len(train_loader))):
        tepoch.set_description(f"Epoch {e}")
        x, y = data[0].to(DEVICE), data[1].to(DEVICE)

        # Training step for the single batch.
        model.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

        # Print statistics.
        e_loss += loss.item() * x.shape[0]
        tepoch.set_postfix(loss=loss.item())

    print(f"Epoch: {e} - loss: {e_loss/len(fashion_mnist)}")

Start training on cpu [...]


  0%|          | 0/1200 [00:00<?, ?batch/s]

**4**

* Write the evaluation step

In [None]:
# Evaluation step.
t_loss = 0
correct = 0

model.eval()
with torch.no_grad():
    for i, data in (tepoch := tqdm(enumerate(test_loader), unit="batch", total=len(test_loader))):
        tepoch.set_description("Validation")
        x, y = data[0].to(DEVICE), data[1].to(DEVICE)

        # This get's the prediction from the network.
        output = model(x)
        # Sum up batch loss.
        t_loss += criterion(output, y).item()

        # Get the index of the max log-probability.
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(y.view_as(pred)).sum().item()

t_loss /= len(test_loader.dataset)

print('AVG loss: {:.4f}, ACC: {}/{} ({:.0f}%)'.format(
      t_loss, correct, len(test_loader.dataset),
      100. * correct / len(test_loader.dataset)))