In [1]:
import torch
import torch.nn as nn
import torchvision

from sklearn.metrics import accuracy_score

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import os

import pandas as pd
import cv2

In [4]:
import albumentations
from albumentations.pytorch import ToTensorV2

In [7]:
!wget http://cs231n.stanford.edu/tiny-imagenet-200.zip

--2024-08-26 11:17:19--  http://cs231n.stanford.edu/tiny-imagenet-200.zip
Resolving cs231n.stanford.edu (cs231n.stanford.edu)... 171.64.64.64
Connecting to cs231n.stanford.edu (cs231n.stanford.edu)|171.64.64.64|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://cs231n.stanford.edu/tiny-imagenet-200.zip [following]
--2024-08-26 11:17:19--  https://cs231n.stanford.edu/tiny-imagenet-200.zip
Connecting to cs231n.stanford.edu (cs231n.stanford.edu)|171.64.64.64|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 248100043 (237M) [application/zip]
Saving to: ‘tiny-imagenet-200.zip’


2024-08-26 11:17:28 (26.5 MB/s) - ‘tiny-imagenet-200.zip’ saved [248100043/248100043]



In [8]:
!unzip tiny-imagenet-200.zip >> out.txt

In [9]:
# train_transform = albumentations.Compose(
#     [
#         albumentations.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
#         albumentations.RandomCrop(height=256, width=256),
#         albumentations.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
#         albumentations.RandomBrightnessContrast(p=0.5),
#         albumentations.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
#         ToTensorV2(),
#     ]
# )

# val_transform = albumentations.Compose(
#     [
#         albumentations.CenterCrop(height=256, width=256),
#         albumentations.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
#         ToTensorV2(),
#     ]
# )

In [10]:
!pip install timm

Collecting timm
  Downloading timm-1.0.9-py3-none-any.whl.metadata (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->timm)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->timm)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->timm)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->timm)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->timm)
  

In [11]:
import timm

In [12]:
class NNModel(torch.nn.Module):
  """Implements custom ResNet18 model."""

  def __init__(
          self,
          model_name: str,
          pretrained: bool,
          exportable: bool,
          num_classes: int,
          global_pool: str,
          out_features: int,
    ) -> None:
    """
    Wrapper of the timm model.
    """
    super().__init__()
    self._backbone = timm.create_model(
      model_name=model_name,
      pretrained=pretrained,
      exportable=exportable,
      num_classes=num_classes,
      global_pool=global_pool,
    )
    in_features = self._backbone.get_classifier().in_features

    # выключить встроенную классификационную часть (оставить ТОЛЬКО сверточные слои)
    self._backbone.reset_classifier(num_classes=0)

    # самостоятельно реализовать классификационную часть (она же `классификационная голова`)
    self.fc = torch.nn.Sequential(
        torch.nn.Linear(
          in_features=in_features,
          out_features=out_features,
          bias=False,
        ),
        torch.nn.BatchNorm1d(num_features=out_features),
        torch.nn.ReLU(inplace=True),
        torch.nn.Linear(
          in_features=out_features,
          out_features=num_classes,
        )
    )

  def forward(self, tensor: torch.Tensor) -> torch.Tensor:
    """
    Perform forward pass over input tensors.

    Parameters:
      tensor: Batch of images with shape (B, C, H, W).

    Returns:
      Raw logits with shape (B, config.num_classes).
    """
    return self.fc(self._backbone(tensor))

In [13]:
class DatasetCustom(torch.utils.data.Dataset):
    def __init__(self, root, ordered_classes):
        self.files = sorted(os.listdir(os.path.join(root, "images")))
        self.path = os.path.join(root, "images")

        df = pd.read_csv(os.path.join(root,'val_annotations.txt'),
                         sep='\t',
                         header=None)
        df = df[[0,1]]
        self.idx_to_class = {k:v for k,v
                             in zip(df.values[:,0], df.values[:,1])}
        self.label_dict = {j:i for i,j
                           in enumerate(ordered_classes)}

    def __getitem__(self, idx):
        img = cv2.imread(os.path.join(self.path, self.files[idx]))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (128,128), interpolation = cv2.INTER_AREA)/255.

        class_id = self.idx_to_class[self.files[idx]]
        label = self.label_dict[class_id]
        return (
            torch.tensor(np.transpose(img, [2,0,1]), dtype=torch.float32),
            torch.tensor(label),
        )

    def __len__(self):
        return len(self.files)

In [14]:
train_dataset = torchvision.datasets.ImageFolder(
    './tiny-imagenet-200/train/',
    transform=torchvision.transforms.Compose(
        [
            torchvision.transforms.Resize((128,128)),
            torchvision.transforms.ToTensor()
        ]
    )
)

In [15]:
# train_dataset.class_to_idx

{'n01443537': 0,
 'n01629819': 1,
 'n01641577': 2,
 'n01644900': 3,
 'n01698640': 4,
 'n01742172': 5,
 'n01768244': 6,
 'n01770393': 7,
 'n01774384': 8,
 'n01774750': 9,
 'n01784675': 10,
 'n01855672': 11,
 'n01882714': 12,
 'n01910747': 13,
 'n01917289': 14,
 'n01944390': 15,
 'n01945685': 16,
 'n01950731': 17,
 'n01983481': 18,
 'n01984695': 19,
 'n02002724': 20,
 'n02056570': 21,
 'n02058221': 22,
 'n02074367': 23,
 'n02085620': 24,
 'n02094433': 25,
 'n02099601': 26,
 'n02099712': 27,
 'n02106662': 28,
 'n02113799': 29,
 'n02123045': 30,
 'n02123394': 31,
 'n02124075': 32,
 'n02125311': 33,
 'n02129165': 34,
 'n02132136': 35,
 'n02165456': 36,
 'n02190166': 37,
 'n02206856': 38,
 'n02226429': 39,
 'n02231487': 40,
 'n02233338': 41,
 'n02236044': 42,
 'n02268443': 43,
 'n02279972': 44,
 'n02281406': 45,
 'n02321529': 46,
 'n02364673': 47,
 'n02395406': 48,
 'n02403003': 49,
 'n02410509': 50,
 'n02415577': 51,
 'n02423022': 52,
 'n02437312': 53,
 'n02480495': 54,
 'n02481823': 55,
 '

In [16]:
val_dataset = DatasetCustom(root='./tiny-imagenet-200/val', ordered_classes=train_dataset.class_to_idx)

In [17]:
model = NNModel(
    model_name='resnet18',
    pretrained=True,
    exportable=True,
    num_classes=200,
    global_pool="avg",
    out_features=512, # меняем
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

In [18]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=128,
    shuffle=True,
    pin_memory=True,
    num_workers=2,
)

# disable shuffling
test_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=128,
    shuffle=False,
    pin_memory=True,
    num_workers=2
)

In [19]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5) # меняем lr

In [20]:
N_EPOCH = 10 # на деле достаточно было 4

model.cuda()

NNModel(
  (_backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (drop_block): Identity()
        (act1): ReLU(inplace=True)
        (aa): Identity()
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): ReLU(inplace=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1,

In [21]:
import warnings
warnings.filterwarnings("ignore")

In [22]:
def accuracy_score_gpu(y_true, y_pred):
    return ((y_true==y_pred).sum()/y_true.shape[0]).item()

In [23]:
epochs = 10
for i in range(epochs):
    model.train()

    for j, (x, y) in enumerate(train_loader):
        optimizer.zero_grad()
        x = x.cuda()
        y = y.cuda()
        y_pred = model(x)
        loss = criterion(y_pred, y)
        acc = accuracy_score_gpu(y, y_pred.detach().argmax(dim=1))
        if j%100 == 0:
            print(f"Epoch {i:3}\t iter {j:3}\t loss {loss:1.4}\t acc {acc:1.4}")
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        for j, (x, y) in enumerate(test_loader):
            x = x.cuda()
            y = y.cuda()
            y_pred = model(x)
            loss = criterion(y_pred, y)
            acc = accuracy_score_gpu(y, y_pred.argmax(dim=1))
            if j%20==0:
                print(f"Epoch {i:3}\t iter {j:3}\t val_loss {loss:1.4}\t val_acc {acc:1.4}")

Epoch   0	 iter   0	 loss 5.388	 acc 0.007812
Epoch   0	 iter 100	 loss 5.031	 acc 0.02344
Epoch   0	 iter 200	 loss 4.602	 acc 0.1797
Epoch   0	 iter 300	 loss 4.036	 acc 0.3438
Epoch   0	 iter 400	 loss 3.691	 acc 0.4297
Epoch   0	 iter 500	 loss 3.56	 acc 0.375
Epoch   0	 iter 600	 loss 3.262	 acc 0.4141
Epoch   0	 iter 700	 loss 2.581	 acc 0.5781
Epoch   0	 iter   0	 val_loss 2.97	 val_acc 0.4531
Epoch   0	 iter  20	 val_loss 2.878	 val_acc 0.4609
Epoch   0	 iter  40	 val_loss 3.031	 val_acc 0.4609
Epoch   0	 iter  60	 val_loss 3.108	 val_acc 0.3906
Epoch   1	 iter   0	 loss 2.41	 acc 0.6172
Epoch   1	 iter 100	 loss 2.357	 acc 0.5938
Epoch   1	 iter 200	 loss 2.003	 acc 0.6172
Epoch   1	 iter 300	 loss 2.082	 acc 0.5859
Epoch   1	 iter 400	 loss 2.059	 acc 0.6016
Epoch   1	 iter 500	 loss 1.733	 acc 0.6719
Epoch   1	 iter 600	 loss 1.919	 acc 0.5547
Epoch   1	 iter 700	 loss 1.731	 acc 0.5938
Epoch   1	 iter   0	 val_loss 2.193	 val_acc 0.5156
Epoch   1	 iter  20	 val_loss 2.084	 

KeyboardInterrupt: 

In [24]:
y_preds = []
y_trues = []

model.eval()
for iter, (x, y) in enumerate(test_loader):
    with torch.no_grad():
        x = x.cuda(non_blocking=True)
        y_trues.extend(y.tolist())
        y = y.long().cuda(non_blocking=True)
        y_pred = model(x).argmax(1).tolist()
        y_preds.extend(y_pred)
        print(f"Iter {iter}/{len(test_loader)}")

Iter 0/79
Iter 1/79
Iter 2/79
Iter 3/79
Iter 4/79
Iter 5/79
Iter 6/79
Iter 7/79
Iter 8/79
Iter 9/79
Iter 10/79
Iter 11/79
Iter 12/79
Iter 13/79
Iter 14/79
Iter 15/79
Iter 16/79
Iter 17/79
Iter 18/79
Iter 19/79
Iter 20/79
Iter 21/79
Iter 22/79
Iter 23/79
Iter 24/79
Iter 25/79
Iter 26/79
Iter 27/79
Iter 28/79
Iter 29/79
Iter 30/79
Iter 31/79
Iter 32/79
Iter 33/79
Iter 34/79
Iter 35/79
Iter 36/79
Iter 37/79
Iter 38/79
Iter 39/79
Iter 40/79
Iter 41/79
Iter 42/79
Iter 43/79
Iter 44/79
Iter 45/79
Iter 46/79
Iter 47/79
Iter 48/79
Iter 49/79
Iter 50/79
Iter 51/79
Iter 52/79
Iter 53/79
Iter 54/79
Iter 55/79
Iter 56/79
Iter 57/79
Iter 58/79
Iter 59/79
Iter 60/79
Iter 61/79
Iter 62/79
Iter 63/79
Iter 64/79
Iter 65/79
Iter 66/79
Iter 67/79
Iter 68/79
Iter 69/79
Iter 70/79
Iter 71/79
Iter 72/79
Iter 73/79
Iter 74/79
Iter 75/79
Iter 76/79
Iter 77/79
Iter 78/79


In [25]:
from sklearn.metrics import accuracy_score
accuracy_score(y_trues, y_preds)

0.5442

In [26]:
torch.save(model.state_dict(), 'my_resnet.pt')