In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

from torchvision.models import resnet18, ResNet18_Weights
import torch
import PIL

Попробуем построить модель для датасета пород собак. Сделаем Feature Extract через предобученный ResNet18 и обучим SVM.

In [2]:
import numpy as np
from PIL import Image

DATASET_PATH = '/kaggle/input/stanford-dogs-dataset/images/Images'

data = {'img': [], 'breed': []}

for dirname, _, filenames in os.walk(DATASET_PATH):
    for filename in filenames:
        data['img'].append(os.path.join(dirname, filename))
        data['breed'].append(os.path.basename(dirname).split('-', 1)[1])

df = pd.DataFrame.from_dict(data)

In [3]:
df

Unnamed: 0,img,breed
0,/kaggle/input/stanford-dogs-dataset/images/Ima...,otterhound
1,/kaggle/input/stanford-dogs-dataset/images/Ima...,otterhound
2,/kaggle/input/stanford-dogs-dataset/images/Ima...,otterhound
3,/kaggle/input/stanford-dogs-dataset/images/Ima...,otterhound
4,/kaggle/input/stanford-dogs-dataset/images/Ima...,otterhound
...,...,...
20575,/kaggle/input/stanford-dogs-dataset/images/Ima...,bloodhound
20576,/kaggle/input/stanford-dogs-dataset/images/Ima...,bloodhound
20577,/kaggle/input/stanford-dogs-dataset/images/Ima...,bloodhound
20578,/kaggle/input/stanford-dogs-dataset/images/Ima...,bloodhound


In [4]:
df['breed'].unique()
breed_labels_map = {}

unique_breeds = df['breed'].unique()
for i in range(len(unique_breeds)):
    breed_labels_map[unique_breeds[i]] = i

In [5]:
df['breed_label'] = df['breed'].apply(lambda x: breed_labels_map[x])
df

Unnamed: 0,img,breed,breed_label
0,/kaggle/input/stanford-dogs-dataset/images/Ima...,otterhound,0
1,/kaggle/input/stanford-dogs-dataset/images/Ima...,otterhound,0
2,/kaggle/input/stanford-dogs-dataset/images/Ima...,otterhound,0
3,/kaggle/input/stanford-dogs-dataset/images/Ima...,otterhound,0
4,/kaggle/input/stanford-dogs-dataset/images/Ima...,otterhound,0
...,...,...,...
20575,/kaggle/input/stanford-dogs-dataset/images/Ima...,bloodhound,119
20576,/kaggle/input/stanford-dogs-dataset/images/Ima...,bloodhound,119
20577,/kaggle/input/stanford-dogs-dataset/images/Ima...,bloodhound,119
20578,/kaggle/input/stanford-dogs-dataset/images/Ima...,bloodhound,119


In [6]:
from torch.utils.data import DataLoader, Dataset

class Dataset(Dataset):
    def __init__(self, data, transform):
        self.data = data
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        image = PIL.Image.open(self.data.loc[idx, "img"]).convert('RGB')
        image = self.transform(image)
        label = torch.tensor(self.data.loc[idx, "breed_label"])
        return image, label

In [7]:
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True

In [8]:
from torchvision.transforms import v2
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df["img"], df["breed_label"], random_state=42)

X_train.index = np.arange(len(X_train))
y_train.index = np.arange(len(y_train))
X_test.index = np.arange(len(X_test))
y_test.index = np.arange(len(y_test))

In [9]:
transform_train = v2.Compose([
    v2.RandomResizedCrop(size=(224, 224), antialias=True),
    v2.RandomHorizontalFlip(p=0.5),
    v2.PILToTensor(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transform_test = v2.Compose([
    v2.Resize(size=(224, 224)),
    v2.PILToTensor(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [10]:
train_ds = Dataset(pd.concat([X_train, y_train], axis=1), transform_train)
test_ds = Dataset(pd.concat([X_test, y_test], axis=1), transform_test)

train_loader = DataLoader(train_ds, batch_size=32, num_workers=4)
test_loader = DataLoader(test_ds, batch_size=32, num_workers=4)

In [18]:
class Extraction:
    def __init__(self, network):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.network = network.eval().to(self.device)
        
    def extract(self, loader):
        data_tmp = []
        label_tmp = []

        with torch.no_grad():
            for x, y in loader:
                x = x.to(self.device)
            
                outputs = self.network(x)
                data_tmp.append(outputs.view(-1, 512).cpu().numpy())
                
                label_tmp.append(y.cpu().numpy())
                
        return np.vstack(data_tmp), np.hstack(label_tmp)

In [12]:
model = resnet18(weights=ResNet18_Weights.DEFAULT)
feature_extractor = torch.nn.Sequential(*list(model.children())[:-1])

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 187MB/s]


In [19]:
ext = Extraction(feature_extractor)

train_feature, train_label = ext.extract(train_loader)
test_feature, test_label = ext.extract(test_loader)

In [15]:
train_feature.shape

(15435, 512)

In [20]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV

param = {
    "kernel": ['linear', 'poly', 'rbf', 'sigmoid'],
    #'C': [0.1, 0.5, 1, 2, 5, 10]
}

svc = svm.SVC()
svm_grid = GridSearchCV(svc, param_grid=param, verbose=2, n_jobs=-1)

svm_grid.fit(train_feature, train_label)

Fitting 5 folds for each of 4 candidates, totalling 20 fits


[CV] END ......................................kernel=linear; total time=  50.7s
[CV] END ......................................kernel=linear; total time=  51.1s
[CV] END ........................................kernel=poly; total time=  55.2s
[CV] END .........................................kernel=rbf; total time= 1.3min
[CV] END .....................................kernel=sigmoid; total time= 1.3min
[CV] END ......................................kernel=linear; total time=  52.4s
[CV] END ........................................kernel=poly; total time=  56.8s
[CV] END ........................................kernel=poly; total time=  53.6s
[CV] END .........................................kernel=rbf; total time= 1.3min
[CV] END .....................................kernel=sigmoid; total time= 1.3min
[CV] END ......................................kernel=linear; total time=  52.7s
[CV] END ........................................kernel=poly; total time=  56.5s
[CV] END ...................

In [27]:
y_pred_svm_grid = svm_grid.predict(test_feature)

In [28]:
from sklearn.metrics import accuracy_score, multilabel_confusion_matrix

accuracy_score(test_label, y_pred_svm_grid)

0.7904761904761904

In [43]:
labels = [i for i in range(120)]

def get_multilabel_accuracies(y_true, y_pred, labels):
    cm = multilabel_confusion_matrix(test_label, y_pred, labels=labels)
    total_count = len(y_true)
    accuracies = []
    precisions = []
    recalls = []
    for i in range(len(labels)):
        true_positive_count = np.sum(cm[i,1,1]).item()
        true_negative_count = np.sum(cm[i,0,0]).item()
        false_positive_count = np.sum(cm[i,0,1]).item()
        false_negative_count = np.sum(cm[i,1,0]).item()
        
        accuracy = (true_positive_count + true_negative_count) / total_count
        precision = true_positive_count/(true_positive_count + false_positive_count)
        recall = true_negative_count/(true_negative_count + false_positive_count)
        
        accuracies.append(accuracy)
        precisions.append(precision)
        recalls.append(recall)
    return accuracies, precisions, recalls


In [44]:
acs, precs, recs = get_multilabel_accuracies(test_label, y_pred_svm_grid, labels)
print(f'multiclass metrics\naverage accuracy: {np.mean(acs)}\naverage precision: {np.mean(precs)}\naverage recall: {np.mean(recs)}')

multiclass metrics
average accuracy: 0.9965079365079366
average precision: 0.7939936658352342
average recall: 0.9982388498709386
