In [1]:
!pip install timm
!pip install peft
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from PIL import Image
import torchvision
from torch import nn, optim
import timm
import imageio.v2 as imageio
from torchvision.transforms import v2
from glob import glob
from sklearn.preprocessing import LabelEncoder

Collecting peft
  Downloading peft-0.10.0-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.10.0-py3-none-any.whl (199 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.1/199.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m0m
[?25hInstalling collected packages: peft
Successfully installed peft-0.10.0


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
metadata = pd.read_csv('/kaggle/input/ham1000/dataverse_files — копия/HAM10000_metadata.csv')
metadata.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern


In [4]:
first_part = glob('/kaggle/input/ham1000/dataverse_files — копия/HAM10000_images_part_1/*')
second_part = glob('/kaggle/input/ham1000/dataverse_files — копия/HAM10000_images_part_2/*')
first_part.extend(second_part)
first_part_id = [x.split('/')[-1].split('.')[0] for x in first_part]

df_paths = pd.DataFrame({'id': first_part_id, 'path': first_part})
df_paths.head()

Unnamed: 0,id,path
0,ISIC_0028933,/kaggle/input/ham1000/dataverse_files — копия/...
1,ISIC_0028394,/kaggle/input/ham1000/dataverse_files — копия/...
2,ISIC_0027799,/kaggle/input/ham1000/dataverse_files — копия/...
3,ISIC_0028100,/kaggle/input/ham1000/dataverse_files — копия/...
4,ISIC_0027960,/kaggle/input/ham1000/dataverse_files — копия/...


In [5]:
metadata = metadata.merge(df_paths, left_on='image_id', right_on='id').drop(columns=['id'])

In [6]:
class ClassificationDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, transform=None):
        self.data = dataframe.values
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        img = np.array(imageio.imread(img_path, pilmode='RGB'))
        img = torchvision.transforms.ToTensor()(img)
        img = self.transform(img)
        return img, label

In [7]:
transforms_train = v2.Compose([
    v2.ToDtype(torch.float32, scale=True),
    v2.Resize(size=(518, 518)),
    v2.RandomRotation(15),
    v2.RandomHorizontalFlip(p=0.5),
    v2.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

transforms_test = v2.Compose([
    v2.ToDtype(torch.float32, scale=True),
    v2.Resize(size=(518, 518)),
    v2.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

In [8]:
le = LabelEncoder()
metadata['dx_encoded'] = le.fit_transform(metadata['dx'])

In [9]:
train_idx, valid_idx = train_test_split(np.arange(metadata.shape[0]), test_size=0.3,
                                            random_state=0)
trainset = ClassificationDataset(metadata.iloc[train_idx][['path', 'dx_encoded']],
                                 transform=transforms_train)
valset = ClassificationDataset(metadata.iloc[valid_idx][['path', 'dx_encoded']],
                               transform=transforms_test)

def collate_fn(data):
    images, labels = zip(*data)
    images = torch.stack(images)
    labels = torch.tensor(labels)

    return images.float(), labels.long()

train_loader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                           shuffle=True, num_workers=2, collate_fn = collate_fn)
val_loader = torch.utils.data.DataLoader(valset, batch_size=32,
                                         shuffle=False, num_workers=2, collate_fn = collate_fn)

In [10]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


## ResNet

In [14]:
model = timm.create_model('resnet50', pretrained=True)
num_classes = 7

num_features = model.fc.in_features

model.fc = nn.Sequential(
    nn.Linear(num_features, num_classes)
)
model=model.to(device)

model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

In [32]:
def test(model, loader):
    loss_log = []
    acc_log = []
    model.eval()

    for data, target in loader:
        data = data.to(device)
        target = target.to(device)

        with torch.no_grad():
            predictions = model(data)
            loss = nn.functional.cross_entropy(predictions, target)

            loss_log.append(loss.item())

            acc = sum(torch.argmax(predictions.cpu(), dim=1) ==
                   target.cpu()) / target.cpu().shape[0]

            acc_log.append(acc.item())

    return np.mean(loss_log), np.mean(acc_log)

def train_epoch(model, optimizer, train_loader):
    loss_log = []
    acc_log = []
    model.train()

    for data, target in train_loader:
        data = data.to(device)
        target = target.to(device)

        optimizer.zero_grad()
        predictions = model(data)
        loss = nn.functional.cross_entropy(predictions, target)
        loss.backward()
        optimizer.step()

        loss_log.append(loss.item())

        with torch.no_grad():
            acc = sum(torch.argmax(predictions.cpu(), dim=1) ==
                   target.cpu()) / target.cpu().shape[0]

        acc_log.append(acc.item())

    return loss_log, acc_log

def train(model, optimizer, n_epochs, train_loader, val_loader, scheduler=None):
    train_loss_log, train_acc_log, val_loss_log, val_acc_log = [], [], [], []

    for epoch in range(n_epochs):
        train_loss, train_acc = train_epoch(model, optimizer, train_loader)
        val_loss, val_acc = test(model, val_loader)

        train_loss_log.extend(train_loss)
        train_acc_log.extend(train_acc)

        val_loss_log.append(val_loss)
        val_acc_log.append(val_acc)

        print(f"Epoch {epoch}")
        print(f" train loss: {np.mean(train_loss)}, train acc: {np.mean(train_acc)}")
        print(f" val loss: {val_loss}, val acc: {val_acc}\n")

        if scheduler is not None:
            scheduler.step(val_acc)

    return train_loss_log, train_acc_log, val_loss_log, val_acc_log

In [12]:
optimizer = optim.Adam(model.parameters())
train_loss_log, train_acc_log, val_loss_log, val_acc_log = train(model, optimizer,
                                                                 10, train_loader, val_loader)



Epoch 0
 train loss: 0.8001652205532247, train acc: 0.7207386363636363
 val loss: 0.6571267729109906, val acc: 0.7714026963457148





Epoch 1
 train loss: 0.5933692026544701, train acc: 0.7894886363636363
 val loss: 0.8654627473430431, val acc: 0.7222349599320838





Epoch 2
 train loss: 0.5173066445372322, train acc: 0.8166193181818182
 val loss: 0.6269754699253022, val acc: 0.7900884994801055





Epoch 3
 train loss: 0.4336710686033422, train acc: 0.845028409090909
 val loss: 0.5195487839110354, val acc: 0.8166154623031616





Epoch 4
 train loss: 0.36045251884074375, train acc: 0.8663352272727273
 val loss: 0.500482254047343, val acc: 0.8332721939746369





Epoch 5
 train loss: 0.2980596147900955, train acc: 0.8897727272727273
 val loss: 0.48118413223865186, val acc: 0.8250641968656094





Epoch 6
 train loss: 0.2733974726870656, train acc: 0.9012784090909091
 val loss: 0.565740058238202, val acc: 0.8293172228843608





Epoch 7
 train loss: 0.25113705433905126, train acc: 0.9102272727272728
 val loss: 0.47533258890852015, val acc: 0.8456071165013821





Epoch 8
 train loss: 0.1957782373509624, train acc: 0.9315340909090909
 val loss: 0.5422119561662065, val acc: 0.8310482394188008





Epoch 9
 train loss: 0.16190014078112488, train acc: 0.9420454545454545
 val loss: 0.5167277663787628, val acc: 0.8509950479294391



## ViT base

In [17]:
model = timm.create_model('vit_base_patch16_224.augreg2_in21k_ft_in1k', pretrained=True)
num_classes = 7

num_features = model.head.in_features

model.head = nn.Sequential(
    nn.Linear(num_features, num_classes)
)
model=model.to(device)

In [None]:
optimizer = optim.Adam(model.parameters())
train_loss_log, train_acc_log, val_loss_log, val_acc_log = train(model, optimizer,
                                                                 10, train_loader, val_loader)

Epoch 0
 train loss: 1.2499654177576303, train acc: 0.6526988636363636
 val loss: 1.0815122165578477, val acc: 0.6688715149747565

Epoch 1
 train loss: 0.9677933809432117, train acc: 0.6616477272727272
 val loss: 1.1011183198462142, val acc: 0.6416452679228275

Epoch 2
 train loss: 0.9465008036656813, train acc: 0.6703125
 val loss: 0.903980958651989, val acc: 0.6789480923338139

Epoch 3
 train loss: 0.8703149643980644, train acc: 0.6822443181818182
 val loss: 0.8716304536829603, val acc: 0.6802090977100615

Epoch 4
 train loss: 0.8335374092852528, train acc: 0.6938920454545454
 val loss: 0.8588976143522465, val acc: 0.6823069516648638

Epoch 5
 train loss: 0.8618105301802809, train acc: 0.6855113636363637
 val loss: 0.9298524117850243, val acc: 0.643972395582402



## Base dinov2

In [15]:
model = timm.create_model('vit_base_patch14_dinov2.lvd142m', pretrained=True)
model.eval()
model = model.to(device)

KeyboardInterrupt: 

In [37]:
from tqdm.notebook import tqdm
embeddings = []
labels = []
for i in tqdm(range(len(trainset))):
    data, label = trainset[i]
    embeddings.append(model(data.unsqueeze(0).to(device)).cpu().detach().numpy())
    labels.append(label)

  0%|          | 0/7010 [00:00<?, ?it/s]

In [47]:
np.squeeze(np.array(embeddings), 1).shape

(7010, 768)

In [48]:
from sklearn import svm

clf = svm.SVC(gamma='scale')

clf.fit(np.squeeze(np.array(embeddings), 1), labels)

In [50]:
predictions = clf.predict(np.squeeze(np.array(embeddings), 1))

In [51]:
sum(predictions==np.array(labels))/predictions.shape[0]

0.846077032810271

In [52]:
embeddings_test = []
labels_test = []
for i in tqdm(range(len(valset))):
    data, label = valset[i]
    embeddings_test.append(model(data.unsqueeze(0).to(device)).cpu().detach().numpy())
    labels_test.append(label)

  0%|          | 0/3005 [00:00<?, ?it/s]

In [53]:
predictions_test = clf.predict(np.squeeze(np.array(embeddings_test), 1))

In [54]:
sum(predictions_test==np.array(labels_test))/predictions_test.shape[0]

0.8066555740432613

In [16]:
model = timm.create_model('vit_large_patch14_dinov2.lvd142m', pretrained=True)
model.eval()
model = model.to(device)

## Large dinov2

In [20]:
from tqdm.notebook import tqdm
embeddings = []
labels = []

trainset_dino = ClassificationDataset(metadata.iloc[train_idx][['path', 'dx_encoded']],
                                 transform=transforms_test)
train_loader_dino = torch.utils.data.DataLoader(trainset_dino, batch_size=64,shuffle=False,
                                           collate_fn = collate_fn)

for data, target in tqdm(train_loader_dino):
    data = data.to(device)

    with torch.no_grad():
        embeddings.extend(model(data).cpu().numpy())
        
    labels.extend(target)
    
    

  0%|          | 0/110 [00:00<?, ?it/s]

In [35]:
from sklearn import svm

clf = svm.SVC(gamma='scale')

clf.fit(np.array(embeddings), labels)

In [27]:
predictions = clf.predict(np.array(embeddings))
sum(predictions==np.array(labels))/predictions.shape[0]

0.8626248216833096

In [37]:
predictions_val = clf.predict(np.array(embeddings_val))
sum(predictions_val==np.array(labels_val))/predictions_val.shape[0]

0.8133111480865225

In [28]:
from catboost import CatBoostClassifier
clf = CatBoostClassifier()

clf.fit(np.array(embeddings), labels)

Learning rate set to 0.087389
0:	learn: 1.7132590	total: 1.24s	remaining: 20m 41s
1:	learn: 1.5625689	total: 2.08s	remaining: 17m 17s
2:	learn: 1.4502644	total: 2.9s	remaining: 16m 4s
3:	learn: 1.3642465	total: 3.72s	remaining: 15m 27s
4:	learn: 1.2939101	total: 4.53s	remaining: 15m 1s
5:	learn: 1.2338379	total: 5.34s	remaining: 14m 44s
6:	learn: 1.1852548	total: 6.19s	remaining: 14m 38s
7:	learn: 1.1394666	total: 7.03s	remaining: 14m 31s
8:	learn: 1.1001411	total: 7.89s	remaining: 14m 28s
9:	learn: 1.0650195	total: 8.72s	remaining: 14m 23s
10:	learn: 1.0333502	total: 9.55s	remaining: 14m 18s
11:	learn: 1.0057806	total: 10.4s	remaining: 14m 15s
12:	learn: 0.9822913	total: 11.3s	remaining: 14m 14s
13:	learn: 0.9612714	total: 12.1s	remaining: 14m 11s
14:	learn: 0.9407298	total: 12.9s	remaining: 14m 9s
15:	learn: 0.9230102	total: 13.8s	remaining: 14m 7s
16:	learn: 0.9048600	total: 14.6s	remaining: 14m 5s
17:	learn: 0.8879767	total: 15.5s	remaining: 14m 3s
18:	learn: 0.8722224	total: 16.3s

<catboost.core.CatBoostClassifier at 0x79d5ed94bbe0>

In [32]:
predictions = np.squeeze(clf.predict(np.array(embeddings)))
sum(predictions==np.array(labels))/predictions.shape[0]

0.9971469329529244

In [31]:
np.squeeze(clf.predict(np.array(embeddings)))

array([5, 5, 5, ..., 5, 0, 1])

In [33]:
embeddings_val = []
labels_val = []

for data, target in tqdm(val_loader_dino):
    data = data.to(device)

    with torch.no_grad():
        embeddings_val.extend(model(data).cpu().numpy())
        
    labels_val.extend(target)

  0%|          | 0/47 [00:00<?, ?it/s]

In [34]:
predictions = np.squeeze(clf.predict(np.array(embeddings_val)))
sum(predictions==np.array(labels_val))/predictions.shape[0]

0.8126455906821963

## Swinv2

In [12]:
model = timm.create_model('swinv2_base_window8_256.ms_in1k', pretrained=True)
model.eval()
model = model.to(device)

model.safetensors:   0%|          | 0.00/354M [00:00<?, ?B/s]

In [19]:
transforms_256 = v2.Compose([
    v2.ToDtype(torch.float32, scale=True),
    v2.Resize(size=(256, 256)),
    v2.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])
trainset_256 = ClassificationDataset(metadata.iloc[train_idx][['path', 'dx_encoded']],
                                 transform=transforms_256)
valset_256 = ClassificationDataset(metadata.iloc[valid_idx][['path', 'dx_encoded']],
                                 transform=transforms_256)
train_loader_256 = torch.utils.data.DataLoader(trainset_256, batch_size=64,
                                                      shuffle=False,
                                           collate_fn = collate_fn)
val_loader_256 = torch.utils.data.DataLoader(valset_256, batch_size=64,
                                         shuffle=False, collate_fn = collate_fn)

In [20]:
from tqdm.notebook import tqdm
embeddings = []
labels = []

for data, target in tqdm(train_loader_256):
    data = data.to(device)

    with torch.no_grad():
        embeddings.extend(model(data).cpu().numpy())
        
    labels.extend(target)
    

  0%|          | 0/110 [00:00<?, ?it/s]

In [21]:
from sklearn import svm

clf = svm.SVC(gamma='scale')

clf.fit(np.array(embeddings), labels)

In [22]:
predictions = clf.predict(np.array(embeddings))
sum(predictions==np.array(labels))/predictions.shape[0]

0.8114122681883025

In [23]:
embeddings_val = []
labels_val = []

for data, target in tqdm(val_loader_256):
    data = data.to(device)

    with torch.no_grad():
        embeddings_val.extend(model(data).cpu().numpy())
        
    labels_val.extend(target)

  0%|          | 0/47 [00:00<?, ?it/s]

In [24]:
predictions = np.squeeze(clf.predict(np.array(embeddings_val)))
sum(predictions==np.array(labels_val))/predictions.shape[0]

0.762063227953411

## Base dinov2 + linear layer and peft training

In [30]:
model = timm.create_model('vit_base_patch14_dinov2.lvd142m', pretrained=True)
for param in model.parameters():
    param.requires_grad = False
    
num_classes = 7

num_features = 768

model.head = nn.Sequential(
    nn.Linear(num_features, num_classes)
)
model=model.to(device)    

for name, param in model.named_parameters():
    print(name,param.requires_grad)

cls_token False
pos_embed False
patch_embed.proj.weight False
patch_embed.proj.bias False
blocks.0.norm1.weight False
blocks.0.norm1.bias False
blocks.0.attn.qkv.weight False
blocks.0.attn.qkv.bias False
blocks.0.attn.proj.weight False
blocks.0.attn.proj.bias False
blocks.0.ls1.gamma False
blocks.0.norm2.weight False
blocks.0.norm2.bias False
blocks.0.mlp.fc1.weight False
blocks.0.mlp.fc1.bias False
blocks.0.mlp.fc2.weight False
blocks.0.mlp.fc2.bias False
blocks.0.ls2.gamma False
blocks.1.norm1.weight False
blocks.1.norm1.bias False
blocks.1.attn.qkv.weight False
blocks.1.attn.qkv.bias False
blocks.1.attn.proj.weight False
blocks.1.attn.proj.bias False
blocks.1.ls1.gamma False
blocks.1.norm2.weight False
blocks.1.norm2.bias False
blocks.1.mlp.fc1.weight False
blocks.1.mlp.fc1.bias False
blocks.1.mlp.fc2.weight False
blocks.1.mlp.fc2.bias False
blocks.1.ls2.gamma False
blocks.2.norm1.weight False
blocks.2.norm1.bias False
blocks.2.attn.qkv.weight False
blocks.2.attn.qkv.bias False
bloc

In [36]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["head.0"],
    lora_dropout=0.1,
    bias="none",
    modules_to_save=["classifier"],
)
model = get_peft_model(model, config)
model=model.to(device)
model.print_trainable_parameters()

trainable params: 12,400 || all params: 86,597,495 || trainable%: 0.014319120893739478


In [37]:
optimizer = optim.Adam(model.parameters())
train_loss_log, train_acc_log, val_loss_log, val_acc_log = train(model, optimizer,
                                                                 10, train_loader, val_loader)

Epoch 0
 train loss: 0.6071464641527696, train acc: 0.7732954545454546
 val loss: 0.6786566928346106, val acc: 0.7531869037354246

Epoch 1
 train loss: 0.5782031683081931, train acc: 0.7869318181818182
 val loss: 0.6222907472798165, val acc: 0.7734661590545735

Epoch 2
 train loss: 0.5487538021756336, train acc: 0.7940340909090909
 val loss: 0.6230489059965661, val acc: 0.7677801726980412

Epoch 3
 train loss: 0.5157719948075035, train acc: 0.8063920454545455
 val loss: 0.5665523105479301, val acc: 0.791418286714148

Epoch 4
 train loss: 0.49756884944032537, train acc: 0.8142045454545455
 val loss: 0.5741238365782068, val acc: 0.7847349599320838

Epoch 5
 train loss: 0.4863752846013416, train acc: 0.81875
 val loss: 0.5801024725462528, val acc: 0.778086023761871

Epoch 6
 train loss: 0.4727133125574751, train acc: 0.8247159090909091
 val loss: 0.5689197141439357, val acc: 0.7910514492937859

Epoch 7
 train loss: 0.46172675061970947, train acc: 0.8276988636363637
 val loss: 0.5651326715