# Parameter Efficient Fine Tuning (PEFT)

In [1]:
! pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126

Looking in indexes: https://download.pytorch.org/whl/cu126
Collecting torchaudio
  Downloading https://download.pytorch.org/whl/cu126/torchaudio-2.7.1%2Bcu126-cp312-cp312-win_amd64.whl.metadata (6.8 kB)
Collecting torch
  Downloading https://download.pytorch.org/whl/cu126/torch-2.7.1%2Bcu126-cp312-cp312-win_amd64.whl.metadata (27 kB)
Downloading https://download.pytorch.org/whl/cu126/torchaudio-2.7.1%2Bcu126-cp312-cp312-win_amd64.whl (4.2 MB)
   ---------------------------------------- 0.0/4.2 MB ? eta -:--:--
   ------------------- -------------------- 2.1/4.2 MB 9.8 MB/s eta 0:00:01
   ---------------------------------------  4.2/4.2 MB 11.0 MB/s eta 0:00:01
   ---------------------------------------- 4.2/4.2 MB 10.1 MB/s eta 0:00:00
Downloading https://download.pytorch.org/whl/cu126/torch-2.7.1%2Bcu126-cp312-cp312-win_amd64.whl (2716.9 MB)
   ---------------------------------------- 0.0/2.7 GB ? eta -:--:--
   ---------------------------------------- 0.0/2.7 GB 9.8 MB/s eta 0:04:37


В этой домашке будем дообучать DinoV2 (у которого архитекура ViT)
1. Просто влоб собираем классификатор на DinoV2, добавляю к первому CLS токену, линейную голову. Учим только голову
2. Линейная голова \+ Прикручиваем к Query и Value весам LoRA адаптеры. Учим LoRA + голову
3. Учим всю сетку.

In [2]:
# Сначала ставим pytorch, потом
! pip install transformers pandas



In [6]:
! pip install numpy



In [9]:
%cd ..

c:\Users\rpd-g\CV_Rocket\HomeWork_3\dino_tune_hw_ready\dino_tune_hw\dino_tune_hw


In [4]:
! nvcc -V

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Fri_Jun_14_16:44:19_Pacific_Daylight_Time_2024
Cuda compilation tools, release 12.6, V12.6.20
Build cuda_12.6.r12.6/compiler.34431801_0


In [10]:
%pwd

'c:\\Users\\rpd-g\\CV_Rocket\\HomeWork_3\\dino_tune_hw_ready\\dino_tune_hw\\dino_tune_hw'

### Скачиваем датасет

В `resources` (лежит в архиве) запустить bash tinyimagenet.sh и скачать датасет.

В скаченную папку с датасетом закинуть csv с путями для трейна и валидации (`train.csv`, `validation.csv`).

In [14]:
from tiny_imagenet.tiny_imagenet_dataset import CsvDataset, get_valid_transform, MEAN, STD
from tiny_imagenet.tiny_imagenet_evaluate import ImageNetEvaluate
import torch
from torch import nn
from transformers import Dinov2Model, Dinov2PreTrainedModel
from transformers import AutoModel
from torch.utils.data import DataLoader
from transformers import AutoImageProcessor, AutoModel
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm

### Собираем линейный классификатор на DinoV2

In [15]:
class Dinov2Classifier(nn.Module):

    def __init__(self):
        super().__init__()
        self.dinov2 = AutoModel.from_pretrained('facebook/dinov2-small')
        # Замораживаем веса  DinoV2
        for param in self.dinov2.parameters():
            param.requires_grad = False
        # Добавляем линейный классификатор
        self.classifier = nn.Linear(384, 1000)
    
    def forward(self, pixel_values, output_hidden_states=False, output_attentions=False):
        outputs = self.dinov2(
            pixel_values,
            output_hidden_states=output_hidden_states,
            output_attentions=output_attentions,
        )
        # Достать из выхода CLS токен
        cls_token = outputs.last_hidden_state[:, 0, :]

        logits = self.classifier(cls_token)
        return logits


LOSS_LIST = []


def train_epoch(dataloader, model, optimizer, loss_fn):
    model.train()
    running_loss = 0.0
    total_batches = 0
    
    with tqdm(dataloader, unit="batch") as tepoch:
        for data, target in tepoch:
            target = target.type(torch.LongTensor) 
            data, target = data.cuda(), target.cuda()

            optimizer.zero_grad()
            outputs = model(data)
            loss = loss_fn(outputs, target)
            loss.backward()
            optimizer.step()
            running_loss = running_loss * 0.9 + loss.item() * 0.1
            total_batches += 1

            LOSS_LIST.append(running_loss)
            tepoch.set_postfix(loss=running_loss)


def draw_loss_plot(losses):
    """
    Function to draw a loss plot from a list of losses.
    
    Args:
        losses (list): List of loss values (floats or integers) to plot.
    """
    if not losses:
        print("The list of losses is empty.")
        return
    plt.figure(figsize=(10, 6))
    plt.plot(losses, label='Loss', color='blue', linewidth=2)
    plt.title('Loss vs. Iterations', fontsize=16)
    plt.xlabel('Iteration/Epoch', fontsize=14)
    plt.ylabel('Loss', fontsize=14)
    plt.grid(True)
    plt.legend()
    plt.show()


dataset = CsvDataset('./resources/tiny-imagenet-200/train.csv', './resources/tiny-imagenet-200/', get_valid_transform(224, MEAN, STD))

### Дообучаем на датасете только веса головы
учим одну эпоху

За это получаем +20 баллов

In [16]:
model = Dinov2Classifier().cuda();
optimizer = torch.optim.AdamW(model.parameters())
loss_fn = nn.CrossEntropyLoss()
dataloader = DataLoader(dataset, batch_size=64)
train_epoch(dataloader, model, optimizer, loss_fn)
validation_dataset = CsvDataset('./resources/tiny-imagenet-200/validation.csv', './resources/tiny-imagenet-200/', get_valid_transform(224, MEAN, STD))
validation_dataloader = DataLoader(validation_dataset, batch_size=64)
validator = ImageNetEvaluate(validation_dataloader, model, 'cuda')
validator.evaluate()

100%|██████████| 1563/1563 [14:32<00:00,  1.79batch/s, loss=0.778]
batch accuracy 0.79080: 100%|██████████| 157/157 [01:28<00:00,  1.77it/s]


0.7908

### Соберём LoRA адаптер

In [17]:
class LoRA(nn.Module):

    def __init__(self, orig_linear, n=384, r=8):
        super().__init__()
        # LoRA состоит из двух линейных слоёв
        # Веса второого линейного слоя нужно инициализовать нулями! 
        # TODO
        self.orig_linear = orig_linear

        self.linear_in = nn.Linear(n, r, bias=False)
        self.linear_out = nn.Linear(r, n, bias=False)
        nn.init.zeros_(self.linear_out.weight)

        for param in self.orig_linear.parameters():
            param.requires_grad = False
        

    def forward(self, x):
        orig_x = self.orig_linear(x)
        lora_x = self.linear_out(self.linear_in(x))
        return orig_x + lora_x


model = Dinov2Classifier().cuda();
for l in model.dinov2.encoder.layer:
    # Меняем только query и Value
    l.attention.attention.query = LoRA(l.attention.attention.query)
    l.attention.attention.value = LoRA(l.attention.attention.value)
    
model.cuda()
optimizer = torch.optim.AdamW(model.parameters())
loss_fn = nn.CrossEntropyLoss()
dataloader = DataLoader(dataset, batch_size=64)
train_epoch(dataloader, model, optimizer, loss_fn)
validation_dataset = CsvDataset('./resources/tiny-imagenet-200/validation.csv', './resources/tiny-imagenet-200/', get_valid_transform(224, MEAN, STD))
validation_dataloader = DataLoader(validation_dataset, batch_size=64)
validator = ImageNetEvaluate(validation_dataloader, model, 'cuda')
validator.evaluate()

100%|██████████| 1563/1563 [11:34<00:00,  2.25batch/s, loss=0.674]
batch accuracy 0.81220: 100%|██████████| 157/157 [00:38<00:00,  4.07it/s]


0.8122

### Сравниваем с дообучением всей сети

Мы получили результаты лучше, добавив паметров к сети. 

Может если мы выучим всю сеть, то точность будет ещё выше?

За итоговое сравнение Head vs LoRA vs all получаете 30 баллов

In [18]:
model = Dinov2Classifier().cuda();
for p in model.dinov2.parameters():
    p.requires_grad = True
optimizer = torch.optim.AdamW(model.parameters())
loss_fn = nn.CrossEntropyLoss()
dataloader = DataLoader(dataset, batch_size=64)
train_epoch(dataloader, model, optimizer, loss_fn)
validation_dataset = CsvDataset('./resources/tiny-imagenet-200/validation.csv', './resources/tiny-imagenet-200/', get_valid_transform(224, MEAN, STD))
validation_dataloader = DataLoader(validation_dataset, batch_size=64)
validator = ImageNetEvaluate(validation_dataloader, model, 'cuda')
validator.evaluate()

100%|██████████| 1563/1563 [13:55<00:00,  1.87batch/s, loss=4.73]
batch accuracy 0.05990: 100%|██████████| 157/157 [00:36<00:00,  4.28it/s]


0.0599