In [None]:
"""
this is a duplicated notebook of 03a_PyTorch_Computer_Vision
with minimal change to apply to CIFAR10, instead of FashionMNIST
"""

# PyTorch Computer Vision - Cifar10

* See reference online book - https://www.learnpytorch.io/03_pytorch_computer_vision/
* This page may be helpful - https://github.com/rasbt/deeplearning-models/blob/master/pytorch_ipynb/transfer/transferlearning-vgg16-cifar10-1.ipynb

In [None]:
import os
import pathlib
import time

import torch
from torch import nn
from torch.utils.data import DataLoader

import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor

from torchinfo import summary

from tqdm.auto import tqdm
import matplotlib.pyplot as plt

print(torch.__version__)
print(torchvision.__version__)

In [3]:
# parameters
EPOCHS = 20
BATCH_SIZE = 256

In [None]:
# device check
# device = "cuda" if torch.cuda.is_available() else "cpu"
device = "mps" if torch.backends.mps.is_available() else "cpu"
device

In [None]:
# setup training data
train_data = datasets.CIFAR10(
    root="data",   # where to download data to?
    train=True,   # do we want the training set? If False, the testing dataset is downloaded
    download=True,   # do we want to download yes/no?
    transform=torchvision.transforms.ToTensor(),   # how do we want to transform the data?
    target_transform=None   # how do we want to transform the labels/targets?
)

test_data = datasets.CIFAR10(
    root="data",
    train=False,
    download=True,
    transform=torchvision.transforms.ToTensor(),
    target_transform=None
)

In [None]:
len(train_data), len(test_data)

In [None]:
# see the first training example
image, label = train_data[0]
image.shape, label

In [None]:
class_names = train_data.classes
class_names

In [None]:
class_to_idx = train_data.class_to_idx
class_to_idx

In [None]:
# check the shape of our image
print(f"image shape: {image.shape}, -> [channel, height, width]")
print(f"image label: {label}")

In [None]:
# visualize the first image

plt.figure()
plt.imshow(torch.permute(image, (1, 2, 0)))
plt.title(class_names[label])
plt.axis(False)
plt.show()

In [None]:
# plot more images
torch.manual_seed(42)

fig = plt.figure(figsize=(9, 9))
rows, cols = 3, 3
# rows, cols = 4, 4
for i in range(1, rows*cols+1):
    random_idx = torch.randint(0, len(train_data), size=[1]).item()
    img, label = train_data[random_idx]

    fig.add_subplot(rows, cols, i)
    plt.imshow(torch.permute(img, (1, 2, 0)))
    plt.title(class_names[label])
    plt.axis(False)


In [None]:
train_data, test_data

In [None]:
# prepare dataloader
train_dataloader = DataLoader(
    dataset=train_data,
    batch_size=BATCH_SIZE,
    shuffle=True   # shuffle to learn the pattern in data instead of the order of data
)

test_dataloader = DataLoader(
    dataset=test_data,
    batch_size=BATCH_SIZE,
    shuffle=False   # test is only used for evaluation not training, so shuffle=False is ok
)

train_dataloader, test_dataloader

In [None]:
print(f"DataLoaders: {train_dataloader, test_dataloader}")
print(f"Length of train_dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}...")
print(f"Length of test_dataloader :  {len(test_dataloader)} batches of {BATCH_SIZE}...")

In [None]:
# check out what's inside the training dataloader

train_features_batch, train_labels_batch = next(iter(train_dataloader))
train_features_batch.shape, train_labels_batch.shape

In [None]:
# show a sample
torch.manual_seed(42)

random_idx = torch.randint(0, len(train_features_batch), size=[1]).item()
img, label = train_features_batch[random_idx], train_labels_batch[random_idx]

plt.imshow(torch.permute(img, (1, 2, 0)))
plt.title(class_names[label])
plt.axis(False)

print(f"Image size: {img.shape}")
print(f"Label: {label}, label size: {label.shape}")

In [None]:
# create a flatten layer
flatten_model = nn.Flatten()

# get a single sample
x = train_features_batch[0]

# flatten the sample
output = flatten_model(x)

print(f"shape before flattening: {x.shape}")
print(f"shape after flattening : {output.shape}")

### Model 0: MLP classifier

In [19]:
class MLP(nn.Module):
    def __init__(
        self,
        input_shape: int,
        hidden_units: int,
        output_shape: int
    ):
        super().__init__()
        self.layers_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(
                in_features=input_shape,
                out_features=hidden_units
            ),
            nn.ReLU(),
            nn.Linear(
                in_features=hidden_units,
                out_features=output_shape
            )
        )

    def forward(self, x: torch.Tensor):
        return self.layers_stack(x)

In [None]:
torch.manual_seed(42)

model_0 = MLP(
    input_shape=32 * 32 * 3,
    hidden_units=1024,
    output_shape=len(class_names)
)

print(summary(model_0, input_size=(BATCH_SIZE, 3, 32, 32)))

In [None]:
dummy_x = torch.rand([1, 3, 32, 32])
model_0(dummy_x)

In [22]:
# accuracy metric
def accuracy_fn(y_true, y_pred):
    """Calculates accuracy between truth labels and predictions.

    Args:
        y_true (torch.Tensor): Truth labels for predictions.
        y_pred (torch.Tensor): Predictions to be compared to predictions.

    Returns:
        [torch.float]: Accuracy value between y_true and y_pred, e.g. 78.45
    """
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [None]:
# create an instance of model_0
torch.manual_seed(42)
print(next(model_0.parameters()).device)
# summary of the model
print(summary(model_0, input_size=(BATCH_SIZE, 3, 32, 32)))
model_0 = model_0.to(device)

In [24]:
# from helper_functions import accuracy_fn
loss_fn = nn.CrossEntropyLoss()   # measure how wrong our model is
# optimizer = torch.optim.SGD(
#     params=model_0.parameters(),
#     lr=.1
# )
optimizer = torch.optim.Adam(
    params=model_0.parameters(),
    lr=.001
)

In [25]:
def train_step(
    model: torch.nn.Module,
    data_loader: torch.utils.data.DataLoader,
    optimizer: torch.optim.Optimizer,
    loss_fn: torch.nn.Module,
    accuracy_fn,
    device: torch.device=device
):
    """
    performs a training with model trying to learn on data_loader
    """
    train_loss, train_acc = 0., 0.

    # put model into training mode
    model.train()

    # add a loop to loop through the training batches
    for batch, (X, y) in enumerate(data_loader):
        # # check the progress of batch loop
        # if batch % 50 == 0:
        #     print(f"Batch: {batch}/{len(data_loader)}")

        # put data on target device
        X, y = X.to(device), y.to(device)

        # forward pass
        y_pred = model(X)

        # calculate loss and accuracy (per batch)
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()   # accumulate train loss
        acc = accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))   # logits -> prediction labels
        train_acc += acc   # accumulate train acc

        # # top-1 accuracy
        # _, y_pred = torch.max(y_pred, dim=1)
        # acc = torch.mean((y_pred == y).float())
        # train_acc += acc

        # # top-5 accuracy
        # _, y_pred = torch.topk(y_pred, k=5)
        # y_pred_in_top_5 = torch.any(y_pred == y.unsqueeze(dim=1), dim=1)
        # acc = torch.mean(y_pred_in_top_5.float())
        # train_acc += acc

        optimizer.zero_grad()   # reset gradients
        loss.backward()         # calculate gradients
        optimizer.step()        # update parameters

    # divide total train loss and accuracy by length of train dataloader
    train_loss /= len(data_loader)
    train_acc /=  len(data_loader)
    return train_loss, train_acc

In [26]:
def test_step(
    model: torch.nn.Module,
    data_loader: torch.utils.data.DataLoader,
    loss_fn: torch.nn.Module,
    accuracy_fn,
    device: torch.device=device
):
    """
    performs a testing loop step on model going over data_loader
    """
    # testing
    test_loss, test_acc = 0., 0.

    # put the model in eval mode
    model.eval()

    # turn on inference mode context manager
    with torch.inference_mode():
        for X, y in data_loader:
            # send the data to the target device
            X, y = X.to(device), y.to(device)

            # forward pass
            test_pred = model(X)

            # calculate the loss and acc
            loss = loss_fn(test_pred, y)
            test_loss += loss.item()
            test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))

        # adjust metrics and print out
        test_loss /= len(data_loader)
        test_acc /= len(data_loader)
        return test_loss, test_acc

In [27]:
def train(
    model: torch.nn.Module,
    train_dataloader: torch.utils.data.DataLoader,
    test_dataloader: torch.utils.data.DataLoader,
    optimizer: torch.optim.Optimizer,
    loss_fn: torch.nn.Module,
    accuracy_fn,
    epochs: int=5,
    device: torch.device=device,
    print_model: bool=True
):
    """
    performs a training loop on model going over train_dataloader
    and evaluate the model on test_dataloader
    """
    if print_model:
        # print out the model summary
        print(summary(model, input_size=(BATCH_SIZE, 3, 32, 32)))

    # send the model to target device
    model = model.to(device)

    # define a dictionary to keep track of train and test metrics
    hist_dict = {
        "epoch": [],
        "train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }

    # start training loop
    t0 = time.perf_counter()
    for epoch in range(1, epochs+1):
        # train step
        train_loss, train_acc = train_step(
            model=model,
            data_loader=train_dataloader,
            optimizer=optimizer,
            loss_fn=loss_fn,
            accuracy_fn=accuracy_fn,
            device=device
        )

        # test step
        test_loss, test_acc = test_step(
            model=model,
            data_loader=test_dataloader,
            loss_fn=loss_fn,
            accuracy_fn=accuracy_fn,
            device=device
        )

        # append train and test loss
        hist_dict["epoch"].append(epoch)
        hist_dict["train_loss"].append(train_loss)
        hist_dict["train_acc"].append(train_acc)
        hist_dict["test_loss"].append(test_loss)
        hist_dict["test_acc"].append(test_acc)

        # print the progress
        t1 = time.perf_counter()
        elps = t1 - t0
        log = f"epoch: {epoch}/{epochs}, " \
                f"train_loss: {train_loss:.4f}, train_acc: {train_acc:.2f}, " \
                f"test_loss: {test_loss:.4f}, test_acc: {test_acc:.2f}, " \
                f"elapsed: {elps:.2f} sec"
        print(log)

    return hist_dict

In [None]:
torch.manual_seed(42)

# train the model
hist_dict_model_0 = train(
    model=model_0,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    epochs=EPOCHS,
    device=device
)

In [None]:
# plot the results

plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.plot(hist_dict_model_0["train_loss"], label="train_loss")
plt.plot(hist_dict_model_0["test_loss"], label="test_loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(hist_dict_model_0["train_acc"], label="train_acc")
plt.plot(hist_dict_model_0["test_acc"], label="test_acc")
plt.legend()

plt.tight_layout()
plt.show()

In [30]:
def eval_model(
    model: torch.nn.Module,
    data_loader: torch.utils.data.DataLoader,
    loss_fn: torch.nn.Module,
    accuracy_fn,
    device=device
):
    """
    Returns a dictioary containing the results of model predicting on data_loader.
    """
    loss, acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X, y in tqdm(data_loader):
            # make our data device agnostic
            X, y = X.to(device), y.to(device)

            t0 = time.perf_counter()
            y_pred = model(X)
            t1 = time.perf_counter()
            elps = t1 - t0   # elapsed time for inference

            loss += loss_fn(y_pred, y)
            acc += accuracy_fn(
                y_true=y,
                y_pred=y_pred.argmax(dim=1)
            )

        loss /= len(data_loader)
        acc  /= len(data_loader)

    return {
        "model_name": model.__class__.__name__,
        "model_loss": loss.item(),
        "model_acc": acc,
        "model_inference_time": elps
    }

In [None]:
# get model_0 results dictionary
model_0_results = eval_model(
    model=model_0,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    device=device
)

model_0_results

### Model 1: TinyVGG (Simple Convolutional Neural Network)

Original VGG has 16 / 19 trainable layers (known as VGG16 / VGG19).

We implement a simplified structure, which we call TinyVGG, with 5 trainable layers.

To find out what's happening inside a CNN, see this website: https://poloclub.github.io/cnn-explainer/

In [32]:
# create a convolutional neural network
class TinyVGG(nn.Module):
    """
    Model architecture that replicates the TinyVGG from CNN explainer website.
    """
    def __init__(
        self,
        input_shape: int,
        hidden_units: int,
        output_shape: int
    ):
        super().__init__()
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(
                in_channels=input_shape,
                out_channels=hidden_units,
                kernel_size=3, stride=1, padding=1
            ),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=hidden_units,
                out_channels=hidden_units,
                kernel_size=3, stride=1, padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(
                in_channels=hidden_units,
                out_channels=hidden_units,
                kernel_size=3, stride=1, padding=1
            ),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=hidden_units,
                out_channels=hidden_units,
                kernel_size=3, stride=1, padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(
                in_features=hidden_units*8*8,   # there's a trick calculating this...
                out_features=output_shape
            )
        )

    def forward(self, x):
        x = self.conv_block_1(x)
        # print(f"after conv_block_1: {x.shape}")
        x = self.conv_block_2(x)
        # print(f"after conv_block_2: {x.shape}")
        x = self.classifier(x)
        # print(f"after classifier: {x.shape}")
        return x

In [None]:
torch.manual_seed(42)

model_1 = TinyVGG(
    input_shape=3,   # 1 for B&W image, 3 for color image
    hidden_units=10,
    output_shape=len(class_names)
)

# print(model_1)
print(summary(model_1, input_size=(BATCH_SIZE, 3, 32, 32)))
model_1 = model_1.to(device)

In [None]:
plt.imshow(torch.permute(image, (1, 2, 0)))

In [None]:
rand_image_tensor = torch.randn(size=(3, 32, 32))
rand_image_tensor = rand_image_tensor.unsqueeze(0)
model_1(rand_image_tensor.to(device))

In [None]:
torch.manual_seed(42)

# create a batch of images
images = torch.randn(size=(32, 3, 64, 64))
test_image = images[0]

print(f"Image batch shape: {images.shape}")
print(f"Single image shape: {test_image.shape}")
print(f"Test image: \n {test_image}")

In [None]:
test_image.shape

In [None]:
# create a single conv2d layer
# recall we are replicating https://poloclub.github.io/cnn-explainer/
conv_layer = nn.Conv2d(
    in_channels=3,
    out_channels=10,
    kernel_size=3,   # you can also write kernel_size=(3, 3)
    stride=1,        # how many jump you make in pixels
    padding=1        # extra pixels around the edges
)

# pass the data through the convolutional layer
conv_output = conv_layer(test_image)
conv_output.shape

In [None]:
print(f"shape before anything: {test_image.shape}")

test_image_through_conv = conv_layer(test_image)
print(f"shape after going through conv: {test_image_through_conv.shape}")

In [None]:
test_image.shape

In [None]:
# create a single pool layer
pool_layer = nn.MaxPool2d(kernel_size=2)

print(f"shape before anything: {test_image.shape}")

test_image_through_pool = pool_layer(test_image)
print(f"shape after going through pool: {test_image_through_pool.shape}")

test_image_through_pool_and_conv = pool_layer(test_image_through_conv)
print(f"shape after going through pool and conv: {test_image_through_pool_and_conv.shape}")

In [42]:
# from helper_functions import accuracy_fn

loss_fn = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(
#     params=model_1.parameters(),
#     lr=.1
# )
optimizer = torch.optim.Adam(
    params=model_1.parameters(),
    lr=.001,   # .1 / .01 did not work
)

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# train the model
hist_dict_model_1 = train(
    model=model_1,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    epochs=EPOCHS,
    device=device
)

In [None]:
plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.plot(hist_dict_model_1["train_loss"], label="train_loss")
plt.plot(hist_dict_model_1["test_loss"], label="test_loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(hist_dict_model_1["train_acc"], label="train_acc")
plt.plot(hist_dict_model_1["test_acc"], label="test_acc")
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# get model_1 results dictionary
model_1_results = eval_model(
    model=model_1,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    device=device
)
model_1_results

### Model 2: Training VGG16 from random initialization

Let's try the original VGG16 architecture.

In [None]:
# load VGG16
model_2 = torchvision.models.vgg16(weights=None)
print("--- old ---")
print(summary(model_2, input_size=(BATCH_SIZE, 3, 32, 32)))

In [None]:
# replace the final layer
model_2.classifier[6] = nn.Linear(in_features=4096, out_features=len(class_names))

print("\n --- new ---")
print(summary(model_2, input_size=(BATCH_SIZE, 3, 32, 32)))
model_2 = model_2.to(device)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    params=model_2.parameters(),
    lr=.001
)

torch.manual_seed(42)
torch.cuda.manual_seed(42)

# train the model
hist_dict_model_2 = train(
    model=model_2,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    epochs=EPOCHS,
    device=device
)

In [None]:
plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.plot(hist_dict_model_2["train_loss"], label="train_loss")
plt.plot(hist_dict_model_2["test_loss"], label="test_loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(hist_dict_model_2["train_acc"], label="train_acc")
plt.plot(hist_dict_model_2["test_acc"], label="test_acc")
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# get model_2 results dictionary
model_2_results = eval_model(
    model=model_2,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    device=device
)
model_2_results

### Model 3: Fine tuning VGG16

This is fine tuning discussed at https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

In [None]:
# load pre-trained model
# for more detail, see: https://pytorch.org/vision/stable/models.html
# for VGG16, see: https://pytorch.org/vision/main/models/generated/torchvision.models.vgg16.html

model_3 = torchvision.models.vgg16(weights="IMAGENET1K_V1")
print("\n --- old ---")
summary(model_3, input_size=(BATCH_SIZE, 3, 32, 32))

In [None]:
# model is not frozen, so we train the whole model, which is kinda expensive
# replace the final layer with 10 class classifier
model_3.classifier[6] = nn.Linear(in_features=4096, out_features=len(class_names))

print("\n --- new ---")
print(summary(model_3, input_size=(BATCH_SIZE, 3, 32, 32)))
model_3 = model_3.to(device)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    params=model_3.parameters(),
    lr=.001
)

torch.manual_seed(42)
torch.cuda.manual_seed(42)

# train the model
hist_dict_model_3 = train(
    model=model_3,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    epochs=EPOCHS,
    device=device
)

In [None]:
plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.plot(hist_dict_model_3["train_loss"], label="train_loss")
plt.plot(hist_dict_model_3["test_loss"], label="test_loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(hist_dict_model_3["train_acc"], label="train_acc")
plt.plot(hist_dict_model_3["test_acc"], label="test_acc")
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# get model_3 results dictionary
model_3_results = eval_model(
    model=model_3,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    device=device
)
model_3_results

### Model 4: VGG16 as frozen feature extractor

This is frozen feature extractor discussed at https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

In [None]:
model_4 = torchvision.models.vgg16(weights="IMAGENET1K_V1")
for param in model_4.parameters():
    param.requires_grad = False   # freeze
print("\n --- old ---")
# print(model_4)   # print to see the structure
print(summary(model_4, input_size=(BATCH_SIZE, 3, 32, 32)))

In [None]:
print(model_4)

In [None]:
# adjust the output shape
# replace the final layer, so you have a very tiny trainable part (computationally efficient)
# model_4.classifier[6] = nn.Sequential(
#     nn.Linear(in_features=4096, out_features=1024, bias=True),
#     nn.ReLU(inplace=True),
#     nn.Dropout(p=0.5, inplace=False),
#     nn.Linear(in_features=1024, out_features=len(class_names), bias=True)
# )   # switch the final layer to 10 class classifier

# or replace the whole classifier head
model_4.classifier = nn.Sequential(
    nn.Linear(in_features=25088, out_features=4096, bias=True),
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.5, inplace=False),
    nn.Linear(in_features=4096, out_features=256, bias=True),
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.5, inplace=False),
    nn.Linear(in_features=256, out_features=len(class_names), bias=True)
)   # switch the final layer to 10 class classifier

print("\n --- new ---")
# print(model_4)   # print to see the structure
print(summary(model_4, input_size=(BATCH_SIZE, 3, 32, 32)))
model_4 = model_4.to(device)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    params=model_4.parameters(),
    lr=.001
)

torch.manual_seed(42)
torch.cuda.manual_seed(42)

# train the model
hist_dict_model_4 = train(
    model=model_4,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    epochs=EPOCHS,
    device=device
)

In [None]:
plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.plot(hist_dict_model_4["train_loss"], label="train_loss")
plt.plot(hist_dict_model_4["test_loss"], label="test_loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(hist_dict_model_4["train_acc"], label="train_acc")
plt.plot(hist_dict_model_4["test_acc"], label="test_acc")
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# get model_4 results dictionary
model_4_results = eval_model(
    model=model_4,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    device=device
)
model_4_results

In [None]:
# fine tuning vgg19
model_5 = torchvision.models.vgg19(weights="IMAGENET1K_V1")
model_5.classifier[6] = nn.Linear(in_features=4096, out_features=len(class_names))
model_5 = model_5.to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    params=model_5.parameters(),
    lr=.001
)

torch.manual_seed(42)
torch.cuda.manual_seed(42)

hist_dict_model_5 = train(
    model=model_5,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    epochs=EPOCHS,
    device=device
)

model_5_results = eval_model(
    model=model_5,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    device=device
)
print(model_5_results)

In [None]:
# fine tuning resnet18
model_6 = torchvision.models.resnet18(weights="IMAGENET1K_V1")
model_6.fc = nn.Linear(in_features=512, out_features=len(class_names))
model_6 = model_6.to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    params=model_6.parameters(),
    lr=.001
)

torch.manual_seed(42)
torch.cuda.manual_seed(42)

hist_dict_model_6 = train(
    model=model_6,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    epochs=EPOCHS,
    device=device
)

model_6_results = eval_model(
    model=model_6,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    device=device
)
print(model_6_results)

In [None]:
# fine tuning resnet50
model_7 = torchvision.models.resnet50(weights="IMAGENET1K_V2")
model_7.fc = nn.Linear(in_features=2048, out_features=len(class_names))
model_7 = model_7.to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    params=model_7.parameters(),
    lr=.001
)

torch.manual_seed(42)
torch.cuda.manual_seed(42)

hist_dict_model_7 = train(
    model=model_7,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    epochs=EPOCHS,
    device=device
)

model_7_results = eval_model(
    model=model_7,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    device=device
)
print(model_7_results)

## Compare model results and training time

In [None]:
import pandas as pd
df_results_comparison = pd.DataFrame(
    [
        model_0_results,
        model_1_results,
        model_2_results,
        model_3_results,
        model_4_results,
        model_5_results,
        model_6_results,
        model_7_results,
    ]
)
df_results_comparison

In [None]:
# save the model

# directory to save the model
PATH = pathlib.Path("./models")
if PATH.exists():
    print(f"path '{PATH}' exists, nothing to do...")
else:
    print(f"path '{PATH}' does not exist, creating path...")
    PATH.mkdir(parents=True, exist_ok=True)

# save
MODEL_NAME = "fine_tuned_vgg16.pt"
print(f"saving model to {PATH/MODEL_NAME}")
torch.save(model_3.state_dict(), PATH/MODEL_NAME)

In [None]:
# load the model

# create an instance of model_3
loaded_model = torchvision.models.vgg16(weights=None)
loaded_model.classifier[6] = nn.Linear(in_features=4096, out_features=10)

# load
print(f"loading model from {PATH/MODEL_NAME}")
loaded_model.load_state_dict(torch.load(PATH/MODEL_NAME))
loaded_model = loaded_model.to(device)

# evaluate
print(f"evaluating model_3")
model_3_results = eval_model(
    model=model_3,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    device=device
)
print(f"results of model_3: {model_3_results}")

print(f"evaluating loaded_model")
loaded_model_results = eval_model(
    model=loaded_model,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn,
    device=device
)
print(f"results of loaded_model: {loaded_model_results}")

In [68]:
# are they close enough?
assert model_3_results["model_loss"] == loaded_model_results["model_loss"] \
    and model_3_results["model_acc"] == loaded_model_results["model_acc"], \
    "model_3 and loaded_model are not the same"

In [None]:
# add inference time to the comparison dataframe

df_results_comparison["model_inference_time"] = [
    model_0_results["model_inference_time"],
    model_1_results["model_inference_time"],
    model_2_results["model_inference_time"],
    model_3_results["model_inference_time"],
    model_4_results["model_inference_time"],
    model_5_results["model_inference_time"],
    model_6_results["model_inference_time"],
    model_7_results["model_inference_time"],
]
df_results_comparison

In [None]:
# plot the results (test loss and test accuracy)

plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.plot(hist_dict_model_0["test_loss"], alpha=.7, label="MLP")
plt.plot(hist_dict_model_1["test_loss"], alpha=.7, label="TinyVGG")
plt.plot(hist_dict_model_2["test_loss"], alpha=.7, label="VGG16 (not pre-trained)")
plt.plot(hist_dict_model_3["test_loss"], alpha=.7, label="VGG16 (pre-trained, fine-tuned)")
plt.plot(hist_dict_model_4["test_loss"], alpha=.7, label="VGG16 (pre-trained, frozen feature extractor)")
plt.plot(hist_dict_model_5["test_loss"], alpha=.7, label="VGG19 (pre-trained, fine-tuned)")
plt.plot(hist_dict_model_6["test_loss"], alpha=.7, label="ResNet18 (pre-trained, fine-tuned)")
plt.plot(hist_dict_model_7["test_loss"], alpha=.7, label="ResNet50 (pre-trained, fine-tuned)")
plt.legend()
plt.xlabel("epoch")
plt.ylabel("loss")

plt.subplot(1, 2, 2)
plt.plot(hist_dict_model_0["test_acc"], alpha=.7)
plt.plot(hist_dict_model_1["test_acc"], alpha=.7)
plt.plot(hist_dict_model_2["test_acc"], alpha=.7)
plt.plot(hist_dict_model_3["test_acc"], alpha=.7)
plt.plot(hist_dict_model_4["test_acc"], alpha=.7)
plt.plot(hist_dict_model_5["test_acc"], alpha=.7)
plt.plot(hist_dict_model_6["test_acc"], alpha=.7)
plt.plot(hist_dict_model_7["test_acc"], alpha=.7)
# plt.legend()
plt.xlabel("epoch")
plt.ylabel("accuracy")

plt.tight_layout()
plt.show()

In [None]:
# visualize our model results

plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
df_results_comparison.set_index("model_name")["model_acc"].plot(kind="barh")
plt.xlabel("accuracy (%)")
plt.ylabel("model")

plt.subplot(1, 2, 2)
df_results_comparison.set_index("model_name")["model_inference_time"].plot(kind="barh")
plt.xlabel("inference time (sec)")
plt.ylabel("model")

plt.tight_layout()
plt.show()

NOTE:
* model 0: MLP classifier
* model 1: TinyVGG
* model 2: VGG16 (random initialization)
* model 3: VGG16 (fine tuned)
* model 4: VGG16 (frozen feature extractor)
* model 5: VGG19 (fine tuned)
* model 6: ResNet18 (fine tuned)
* model 7: ResNet50 (fine tuned)

## 9. Make and evaluate random predictions with best model

In [72]:
def make_predictions(
    model: torch.nn.Module,
    data: list,
    device: torch.device = device
):
    pred_probs = []
    model.eval()
    with torch.inference_mode():
        for sample in data:
            # prepare the sample (add a batch dimension and pass to target device)
            sample = torch.unsqueeze(sample, dim=0).to(device)

            # forward pass (model outputs raw digits)
            pred_logit = model(sample)

            ## get prediction probability (logit -> prediction probability)
            pred_prob = torch.softmax(pred_logit.squeeze(), dim=0)

            # get pred_prob off the GPU for further calculations
            pred_probs.append(pred_prob.cpu())

    # stack the pred_probs to turn list into a tensor
    return torch.stack(pred_probs)

In [None]:
img, label = test_data[0][:10]
img.shape, label

In [None]:
import random
# random.seed(42)
test_samples = []
test_labels = []
for sample, label in random.sample(list(test_data), k=9):
    test_samples.append(sample)
    test_labels.append(label)

# view the first sample shape
test_samples[0].shape

In [None]:
plt.imshow(torch.permute(test_samples[0], (1, 2, 0)))
plt.title(class_names[test_labels[0]])

In [None]:
# make predicitons
pred_probs = make_predictions(
    model=model_2,
    data=test_samples
)

# view first two predictions probabilities
pred_probs[:2]

In [None]:
# convert pred probabilities to labels
pred_classes = pred_probs.argmax(dim=1)
print(pred_classes)
print(test_labels)

In [None]:
# make predicitons
pred_probs = make_predictions(
    model=model_1,
    data=test_samples
)

# convert pred probabilities to labels
pred_classes = pred_probs.argmax(dim=1)
print(pred_classes)
print(test_labels)

plt.figure(figsize=(9, 9))
nrows = 3
ncols = 3
for i, sample in enumerate(test_samples):
    plt.subplot(nrows, ncols, i+1)
    plt.imshow(torch.permute(sample, (1, 2, 0)))
    pred_label = class_names[pred_classes[i]]
    truth_label = class_names[test_labels[i]]
    title_text = f"Pred: {pred_label} | truth: {truth_label}"

    if pred_label == truth_label:
        plt.title(title_text, fontsize=10, c="g")
    else:
        plt.title(title_text, fontsize=10, c="r")
    plt.axis(False)

In [None]:
# make predicitons
pred_probs = make_predictions(
    model=model_2,
    data=test_samples
)

# convert pred probabilities to labels
pred_classes = pred_probs.argmax(dim=1)
print(pred_classes)
print(test_labels)

plt.figure(figsize=(9, 9))
nrows = 3
ncols = 3
for i, sample in enumerate(test_samples):
    plt.subplot(nrows, ncols, i+1)
    plt.imshow(torch.permute(sample, (1, 2, 0)))
    pred_label = class_names[pred_classes[i]]
    truth_label = class_names[test_labels[i]]
    title_text = f"Pred: {pred_label} | truth: {truth_label}"

    if pred_label == truth_label:
        plt.title(title_text, fontsize=10, c="g")
    else:
        plt.title(title_text, fontsize=10, c="r")
    plt.axis(False)

In [None]:
# make predicitons
pred_probs = make_predictions(
    model=model_3,
    data=test_samples
)

# convert pred probabilities to labels
pred_classes = pred_probs.argmax(dim=1)
print(pred_classes)
print(test_labels)

plt.figure(figsize=(9, 9))
nrows = 3
ncols = 3
for i, sample in enumerate(test_samples):
    plt.subplot(nrows, ncols, i+1)
    plt.imshow(torch.permute(sample, (1, 2, 0)))
    pred_label = class_names[pred_classes[i]]
    truth_label = class_names[test_labels[i]]
    title_text = f"Pred: {pred_label} | truth: {truth_label}"

    if pred_label == truth_label:
        plt.title(title_text, fontsize=10, c="g")
    else:
        plt.title(title_text, fontsize=10, c="r")
    plt.axis(False)

In [None]:
# make predicitons
pred_probs = make_predictions(
    model=model_4,
    data=test_samples
)

# convert pred probabilities to labels
pred_classes = pred_probs.argmax(dim=1)
print(pred_classes)
print(test_labels)

plt.figure(figsize=(9, 9))
nrows = 3
ncols = 3
for i, sample in enumerate(test_samples):
    plt.subplot(nrows, ncols, i+1)
    plt.imshow(torch.permute(sample, (1, 2, 0)))
    pred_label = class_names[pred_classes[i]]
    truth_label = class_names[test_labels[i]]
    title_text = f"Pred: {pred_label} | truth: {truth_label}"

    if pred_label == truth_label:
        plt.title(title_text, fontsize=10, c="g")
    else:
        plt.title(title_text, fontsize=10, c="r")
    plt.axis(False)