# DINOv2 model fine-tuning

This notebook describe the process of fine-tuning DINOv2 model on a modified ImageNet-100 dataset (ImageNet-200)

## Import libraries

In [1]:
# Ignore warning while running the code
import warnings
warnings.filterwarnings('ignore', category=UserWarning)

In [2]:
# Handling path
import os
from pathlib import Path

# PyTorch
import torch
from torch import nn, optim
from torchvision import datasets, transforms

# DINOv2 ViT model
from dinov2.models.vision_transformer import vit_small

# Dataset path

Firstly, populate the train and validation dataset by creating two folders, one named train and one named val. In each of the folder, group images based on their classes and put them in the folder with name of the class.

In [5]:
# Define the local directory, as well as the path to training and validation set
local_directory = os.getcwd()
train_dataset_dir = Path("../../data/train")
valid_dataset_dir = Path("../../data/val")

## Image Resizing

Here we define resizing method to make sure that the size of the image fit with our model

In [6]:
class ResizeAndPad:
    def __init__(self, target_size, multiple):
        """
        Helper class to perform resize and padding on the image
        """
        self.target_size = target_size
        self.multiple = multiple

    def __call__(self, img):
        """
        Call transformation on the image
        """
        # Resize the image
        img = transforms.Resize(self.target_size)(img)

        # Calculate padding
        pad_width = (self.multiple - img.width % self.multiple) % self.multiple
        pad_height = (self.multiple - img.height % self.multiple) % self.multiple

        # Apply padding
        img = transforms.Pad(
            (pad_width // 2, 
             pad_height // 2, 
             pad_width - pad_width // 2, 
             pad_height - pad_height // 2)
        )(img)
        
        return img

In [7]:
# Define supported image size
IMAGE_SIZE = 256
TARGET_SIZE = (IMAGE_SIZE, IMAGE_SIZE)

In [8]:
# Define the DATA TRANSFORMATION process that images have to go through
DATA_TRANSFORM = {
    "train": transforms.Compose(
        [
            ResizeAndPad(TARGET_SIZE, 14),
            transforms.RandomRotation(360),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]
    ),
}

In [12]:
# Define the DATASETS, DATALOADERS and CLASSNAME
DATASETS = {
    "train": datasets.ImageFolder(train_dataset_dir, DATA_TRANSFORM["train"])
}

DATALOADERS = {
    "train": torch.utils.data.DataLoader(DATASETS["train"], batch_size=8, shuffle=True)
}

CLASSES = DATASETS["train"].classes

In [13]:
# Define the DEVICE for training the model
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

## DINOv2 Classification Model

In [None]:
class DINOClassificationModel(nn.Module):
    def __index__(self):
        """
        Load the pretrained DINOv2 Classification Model
        """
        super(DINOClassificationModel, self).__init__()