EVALUATION OF ASSIGNMENT 2 - Daniele Gotti


This notebook contains the code to evaluate the final trained model. To ensure the reproducibility and correctness of the submission, I have verified the pipeline by downloading the model weights directly from my Hugging Face repository. Additionally, since the test set is not available, I performed a test run using the training dataset to confirm that the code executes without errors.

In [4]:
# import all the libraries
import torch
import torch.nn as nn
import torchvision

from huggingface_hub import hf_hub_download
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms, datasets
from torchvision import models
from google.colab import drive

In [5]:
# constant used during training
IMG_SIZE = 224
NUM_CLASSES=9
BATCH_SIZE = 16
NUM_PATCHES=7*7
EMBED_DIM=256
NUM_HEADS=8
NUM_LAYERS=1

# define the test transforms
test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# I replaced 'test' with 'train' to see if the code works; the professor will use 'test'
# drive.mount('/content/drive')
# test_path = '/content/drive/MyDrive/train/'
test_path = '/content/test/'

testset = datasets.ImageFolder(root=test_path, transform=test_transform)

testloader = DataLoader(
    testset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
    drop_last=False
)

Mounted at /content/drive


In [6]:
class ResnetTransformer(nn.Module):
    def __init__(self, embed_dim, num_patches, num_heads, num_layers, num_classes=9):
        super().__init__()

        # get a pre-trained ResNet-18 backbone
        self.backbone = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        self.backbone = nn.Sequential(*list(self.backbone.children())[:-2]) # remove the last two layers
        resnet_feat_dim = 512 # number of features

        # add a projection layer between resnet and transformer
        self.proj_layer = nn.Linear(resnet_feat_dim, embed_dim)

        # positional embedding
        self.pos_embedding = nn.Parameter(
            torch.randn(1, 1 + num_patches, embed_dim),
            requires_grad=True
        )

        # CLS token
        self.class_token = nn.Parameter(
            torch.randn(1, 1, embed_dim),
            requires_grad=True
        )

        # transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim,
            nhead=num_heads,
            dim_feedforward=embed_dim*4,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_layers
        )

        # classification head
        self.class_head = nn.Sequential(
            nn.LayerNorm(embed_dim),
            nn.Linear(embed_dim, num_classes)
        )

    def forward(self, x):
        bs = x.shape[0]

        # extract features from Resnet
        feat = self.backbone(x) # shape (bs, 512, 7, 7)

        feat = feat.flatten(2) # flatten to tokens of shape (bs, 512, 49)

        feat = feat.transpose(1, 2) # (bs, 512, 49) -> (bs, 49, 512)

        # apply the projection layer
        feat = self.proj_layer(feat) # (bs, 49, 512) -> (bs, 49, embed_dim)

        # forward pass
        class_token = self.class_token.expand(bs, -1, -1)
        x = torch.cat((class_token, feat), dim=1)
        x = x + self.pos_embedding
        x = self.transformer(x)
        cls_out = x[:, 0, :]
        out = self.class_head(cls_out)

        return out

# initialize the model
model = ResnetTransformer(
    embed_dim=EMBED_DIM,
    num_patches=NUM_PATCHES,
    num_heads=NUM_HEADS,
    num_layers=NUM_LAYERS,
    num_classes=NUM_CLASSES
)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 198MB/s]


In [7]:
# download the checkpoint from HF repo
file_path = hf_hub_download(
    repo_id="daniele-gotti/Waste_Classifier",
    filename="resnet_transformer.pth",
    local_dir="/content",
    local_dir_use_symlinks=False
)

print("Downloaded to:", file_path)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.


resnet_transformer.pth:   0%|          | 0.00/48.5M [00:00<?, ?B/s]

Downloaded to: /content/resnet_transformer.pth


In [8]:
# load the checkpoint
state_dict = torch.load(file_path, map_location="cpu")
model.load_state_dict(state_dict)

<All keys matched successfully>

In [9]:
# code for evaluation
correct = 0
total = 0
model.eval()

# check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

with torch.no_grad():
  for images, labels in testloader:
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs, dim=1)
    correct += (predicted == labels).sum().item()
    total += images.shape[0]

accuracy = correct / total * 100.0
print(f'The accuracy on the test set: {accuracy:.4f}')

The accuracy on the test set: 99.8796
