In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader 
 
from torchvision import datasets
from torchvision import transforms
from torchvision.io import read_image
from torchvision.transforms import ToTensor

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')
from IPython.display import clear_output

**Преобразуем тестовую выборку**

In [2]:
class MedicalImageDataset(Dataset):
    def __init__(self, img_dir, data='train', transform=None):
        super().__init__()
        self.img_dir = img_dir,
        self.data = data
        self.labels = torch.load(f'{img_dir}/{data}/labels.pt')
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx): 
        image = read_image(f'{self.img_dir[0]}/{self.data}/images/image({idx}).jpg') / 255
        if self.transform:
            image = self.transform(image)
            
        return (image, self.labels[idx])

In [3]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_dataset = MedicalImageDataset(
    img_dir=f'../input/image-preprocessing-to-nn',
    data='test', 
    transform=preprocess
)

In [4]:
BATCH_SIZE = 64

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 3, 224, 224])
Shape of y: torch.Size([64]) torch.float32


**Функция вывода метрик на тестовой выборке**

In [5]:
from torchmetrics import Accuracy, Precision, Recall, F1Score, AUROC, AveragePrecision

torch.manual_seed(123)


def test_metrics(dataloader, model, task='multiclass', num_classes=23):
    model.eval()
    size = len(dataloader.dataset)
    
    # микро-усреднение
    metrics_accuracy_micro = Accuracy(task=task, num_classes=num_classes, average='micro')
    metrics_precision_micro = Precision(task=task, num_classes=num_classes, average='micro')
    metrics_recall_micro = Recall(task=task, num_classes=num_classes, average='micro')
    metrics_f1score_micro = F1Score(task=task, num_classes=num_classes, average='micro')
#     metrics_aucroc_micro = AUROC(task=task, num_classes=num_classes, average='micro')
#     metrics_aucpr_micro = AveragePrecision(task=task, num_classes=num_classes, average='micro')
    
    # макро-усреднение
    metrics_accuracy_macro = Accuracy(task=task, num_classes=num_classes, average='macro')
    metrics_precision_macro = Precision(task=task, num_classes=num_classes, average='macro')
    metrics_recall_macro = Recall(task=task, num_classes=num_classes, average='macro')
    metrics_f1score_macro = F1Score(task=task, num_classes=num_classes, average='macro')
    metrics_aucroc_macro = AUROC(task=task, num_classes=num_classes, average='macro')
    metric_aucpr_macro = AveragePrecision(task=task, num_classes=num_classes, average='macro')
    
    with torch.no_grad():
#         pbar = tqdm(dataloader)
        for X_batch, y_batch in dataloader:
            predictions = model(X_batch)
            
            # микро-усреднение
            accuracy_micro = metrics_accuracy_micro(predictions, y_batch)
            precision_micro = metrics_precision_micro(predictions, y_batch)
            recall_micro = metrics_recall_micro(predictions, y_batch)
            f1score_micro = metrics_f1score_micro(predictions, y_batch)
#             aucroc_micro = metrics_aucroc_micro(predictions, y_batch.int())
#             aucpr_micro = metric_aucpr_micro(predictions, y_batch)
            
            # макро-усреднение
            accuracy_mаcro = metrics_accuracy_macro(predictions, y_batch)
            aucroc_mаcro = metrics_aucroc_macro(predictions, y_batch.int())
            precision_mаcro = metrics_precision_macro(predictions, y_batch)
            recall_mаcro = metrics_recall_macro(predictions, y_batch)
            f1score_macro = metrics_f1score_macro(predictions, y_batch)
            aucpr_macro = metric_aucpr_macro(predictions, y_batch.int())
            
        # микро-усреднение
        accuracy_micro = metrics_accuracy_micro.compute()
        precision_micro = metrics_precision_micro.compute()
        recall_micro = metrics_recall_micro.compute()
        f1score_micro = metrics_f1score_micro.compute()
#         aucroc_micro = metrics_aucroc_micro.compute()
#         aucpr_micro = metric_aucpr_micro.compute()
        
        # макро-усреднение
        accuracy_macro = metrics_accuracy_macro.compute()
        aucroc_macro = metrics_aucroc_macro.compute()
        precision_macro = metrics_precision_macro.compute()
        recall_macro = metrics_recall_macro.compute()
        f1score_macro = metrics_f1score_macro.compute()
        aucpr_macro = metric_aucpr_macro.compute()
        
        print(f'Result metrics for test dataset')
        # микро-усреднение
        print(f'\n>>> Average - MICRO')
        print(f'\tAccuracy:  {accuracy_micro:.2f}')
        print(f'\tPrecision: {precision_micro:.2f}')
        print(f'\tRecall:    {recall_micro:.2f}')
        print(f'\tF1 Score:  {f1score_micro:.2f}')
#         print(f'\tAUC-ROC:   {aucroc_micro:.2f}')
#         print(f'\tAUC-PR:    {aucpr_micro:.2f}')
        
        # макро-усреднение
        print(f'\n>>> Average - MACRO')
        print(f'\tAccuracy:  {accuracy_macro:.2f}')
        print(f'\tPrecision: {precision_macro:.2f}')
        print(f'\tRecall:    {recall_macro:.2f}')
        print(f'\tF1 Score:  {f1score_macro:.2f}')
        print(f'\tAUC-ROC:   {aucroc_macro:.2f}')
        print(f'\tAUC-PR:    {aucpr_macro:.2f}')

**Смотрим метрики**

In [6]:
from torchvision.models import resnet50

**Model ResNet-50 with Adam (lr=3e-4)**

In [7]:
model_adam = resnet50()
model_adam.fc = nn.Linear(2048, 23)

model_path = '../input/resnet-50-for-dermnet/pytorch/adam-3e-4-epochs-30/1/model-Adam-3e-4-epochs-30.pth'
model_adam.load_state_dict(torch.load(model_path, map_location='cpu'));

test_metrics(test_dataloader, model_adam)

Result metrics for test dataset

>>> Average - MICRO
	Accuracy:  0.61
	Precision: 0.61
	Recall:    0.61
	F1 Score:  0.61

>>> Average - MACRO
	Accuracy:  0.59
	Precision: 0.58
	Recall:    0.59
	F1 Score:  0.58
	AUC-ROC:   0.94
	AUC-PR:    0.62


**Model ResNet-50 with RMSprop (lr=1e-4, momentum=0.9)** 

In [8]:
model_rms = resnet50()
model_rms.fc = nn.Linear(2048, 23)

model_path = '../input/resnet-50-for-dermnet/pytorch/rmsprop-1e-4-epochs-30/1/model-RMSprop-1e-4-epochs-30.pth'
model_rms.load_state_dict(torch.load(model_path, map_location='cpu'));

test_metrics(test_dataloader, model_rms)

Result metrics for test dataset

>>> Average - MICRO
	Accuracy:  0.52
	Precision: 0.52
	Recall:    0.52
	F1 Score:  0.52

>>> Average - MACRO
	Accuracy:  0.47
	Precision: 0.50
	Recall:    0.47
	F1 Score:  0.47
	AUC-ROC:   0.91
	AUC-PR:    0.50
