In [1]:
import os
import sys
import pandas as pd
import numpy as np
import cv2

import torch
import torch.nn as nn
import torchvision.models as models

from multiprocessing import Pool
from tqdm.notebook import tqdm

from torch.utils.data import Dataset

from albumentations import Compose, Resize, HorizontalFlip, Normalize, RandomBrightnessContrast, \
    RandomGamma, GaussNoise, ShiftScaleRotate, ImageCompression, CoarseDropout
from albumentations.pytorch import ToTensorV2
from sklearn import preprocessing

from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [2]:
path = '/kaggle/input/vk-made-sports-image-classification'

train = pd.read_csv(os.path.join(path, 'train.csv'))
train['path'] = train.image_id.apply(lambda x: os.path.join(path, 'train', x.split('.')[0] + '.jpeg'))

test = pd.read_csv(os.path.join(path, 'test.csv'))
test['path'] = test.image_id.apply(lambda x: os.path.join(path, 'test', x.split('.')[0] + '.jpeg'))

In [3]:
df = train
df.head()

Unnamed: 0,image_id,label,path
0,46514481-2d8b-4d49-8991-012e1bfd34f6.jpeg,swimming,/kaggle/input/vk-made-sports-image-classificat...
1,ec66e513-adac-4a30-b6a9-3d647ee6e46b.jpeg,greco-Roman_wrestling,/kaggle/input/vk-made-sports-image-classificat...
2,4d60732e-d680-4bfd-9067-70ff8137f537.jpeg,running,/kaggle/input/vk-made-sports-image-classificat...
3,93327011-8e3d-4f0d-849d-a26ddaf6488b.jpeg,football,/kaggle/input/vk-made-sports-image-classificat...
4,b6853478-48c1-48b2-b104-74903730c831.jpeg,sailing,/kaggle/input/vk-made-sports-image-classificat...


In [4]:
df.shape, test.shape

((45373, 3), (19446, 2))

### Распределение по классам

In [5]:
df[~df['label'].isna()].groupby('label').image_id.count()

label
alpinism                 1858
archery                   982
arm_wrestling            1392
badminton                1140
basketball               2519
boating                  1753
boxing                   1801
fencing                  1421
football                 2150
golf                     1393
greco-Roman_wrestling    1298
gymnastics               1415
handball                 1509
hockey                   2151
horseback_riding         1568
javelin-throwing          811
pole_vault               1047
rugby                    1967
running                  1991
sailing                  1181
sambo                    1264
skating                  1957
ski_race                 1335
surfing                  1420
swimming                 1283
taekwondo                1110
tennis                   1735
velo                     1216
volleyball               1856
water_polo                850
Name: image_id, dtype: int64

##### Итог - классы распределены относительно равномерно

### Для нормализации значений посчитаю mean и std по датасету

In [15]:
def get_mean_std(filepath: str) -> dict:
    image = cv2.imread(filepath)
    img_mean = np.array([np.mean(image[:,:,2]), np.mean(image[:,:,1]), np.mean(image[:,:,0])])
    img_std = np.array([np.std(image[:,:,2]), np.std(image[:,:,1]), np.std(image[:,:,0])])
    return {
        'mean': img_mean,
        'std': img_std,
    }

In [16]:
filepaths = [p for p in df[~df.label.isna()]['path'].values]

with Pool(12) as p:
    samples = list(tqdm(iterable=p.imap_unordered(get_mean_std, filepaths), total=len(filepaths)))

In [17]:
mean_ = list(pd.DataFrame(samples)['mean'].mean() / 255)
std_ = list(pd.DataFrame(samples)['std'].mean() / 255)

In [19]:
# mean_ = [0.4740159605137972, 0.46442562157460615, 0.4598326978744472]
# std_ = [0.25182542253038637, 0.2385384052725654, 0.24216548789726602]

In [20]:
class Imagewoof(Dataset):
    def __init__(self, dataframe, labels, path, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        self.labels = labels
        self.path = path

    def __getitem__(self, idx):
        filepath = self.dataframe.iloc[idx]['path']
        label = self.labels[idx]

        image = cv2.imread(os.path.join(self.path, filepath))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image, label

    def __len__(self):
        return self.dataframe.shape[0]

In [21]:
def get_train_augmentations(image_size_h, image_size_w):
    return Compose([
        Resize(image_size_h, image_size_w),
        HorizontalFlip(p=0.5),
        RandomBrightnessContrast(p=0.4, brightness_limit=0.25, contrast_limit=0.3),
        RandomGamma(p=0.4),
        CoarseDropout(p=0.1, max_holes=8, max_height=8, max_width=8),
        GaussNoise(p=0.1, var_limit=(5.0, 50.0)),
        ShiftScaleRotate(shift_limit=0.1, scale_limit=0.15, rotate_limit=45, p=0.8),
        ImageCompression(quality_lower=80, quality_upper=100, p=0.4),
        Normalize(
            mean=mean_,
            std=std_,
        ),
        ToTensorV2(),
    ])


def get_val_augmentations(image_size_h, image_size_w):
    return Compose([
        Resize(image_size_h, image_size_w),
        Normalize(
            mean=mean_,
            std=std_,
        ),
        ToTensorV2()
    ])

def preprocess_data(df):
    train_df, val_df = train_test_split(df, random_state=128, test_size=0.2)
    le = preprocessing.LabelEncoder()
    le = le.fit(train_df['label'].values)
    train_labels = le.transform(train_df['label'].values)
    val_labels = le.transform(val_df['label'].values)
    return train_df, val_df, train_labels, val_labels, le

In [22]:
n_classes = df.label.nunique()
n_classes

30

In [25]:
def train_up():
    BATCH_SIZE = 64
    NUM_WORKERS = 8
    IMAGE_SIZE_H, IMAGE_SIZE_W = 350, 350
    N_EPOCHS = 20
    device_ids = [0, 1]
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def get_train_loader():
        albumentations_transform = get_train_augmentations(IMAGE_SIZE_H, IMAGE_SIZE_W)
        train_data = Imagewoof(dataframe=train_df,
                               labels=train_labels,
                               path='input',
                               transform=albumentations_transform)
        train_loader = DataLoader(dataset=train_data,
                                  batch_size=BATCH_SIZE,
                                  num_workers=NUM_WORKERS,
                                  shuffle=True,
                                  drop_last=True)
        return train_loader
    
    albumentations_transform = get_train_augmentations(IMAGE_SIZE_H, IMAGE_SIZE_W)
    albumentations_transform_validate = get_val_augmentations(IMAGE_SIZE_H, IMAGE_SIZE_W)
    train_df, val_df, train_labels, val_labels, le = preprocess_data(df[:])

    train_data = Imagewoof(dataframe=train_df,
                           labels=train_labels,
                           path='input',
                           transform=albumentations_transform)
    train_loader = DataLoader(dataset=train_data,
                              batch_size=BATCH_SIZE,
                              num_workers=NUM_WORKERS,
                              shuffle=True,
                              drop_last=True)

    validate_data = Imagewoof(dataframe=val_df[:],
                              labels=val_labels[:],
                              path='input',
                              transform=albumentations_transform_validate)
    validate_loader = DataLoader(dataset=validate_data,
                                 batch_size=BATCH_SIZE,
                                 num_workers=NUM_WORKERS,
                                 shuffle=False,
                                 drop_last=False)

    model = models.resnet50(pretrained=True)
    model.fc = nn.Linear(2048, n_classes)
#     model = nn.DataParallel(model, device_ids=device_ids, output_device=device)
    model.to(device)
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.0003)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.05)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer=optimizer, T_0=200)

    best_acc_val = 0
    for epoch in range(N_EPOCHS):
        model.train()
        train_loss = 0
        train_acc = 0

        train_loader = get_train_loader()
        train_len = len(train_loader)
        for i, (imgs, labels) in tqdm(enumerate(train_loader), total=train_len):
            imgs = imgs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            output = model(imgs)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            pred = torch.argmax(torch.softmax(output, 1), 1).cpu().detach().numpy()
            true = labels.cpu().numpy()
            train_acc += f1_score(true, pred, average='micro')
            scheduler.step(epoch + i / train_len)

        model.eval()
        val_loss = 0
        acc_val = 0
        val_len = len(validate_loader)
        for i, (imgs, labels) in tqdm(enumerate(validate_loader), total=val_len):
            with torch.no_grad():
                imgs_vaild, labels_vaild = imgs.to(device), labels.to(device)
                output_test = model(imgs_vaild)
                val_loss += criterion(output_test, labels_vaild).item()
                pred = torch.argmax(torch.softmax(output_test, 1), 1).cpu().detach().numpy()
                true = labels.cpu().numpy()
                acc_val += f1_score(true, pred, average='micro')

        avg_val_acc = acc_val / val_len

        print(
            f'Epoch {epoch}/{N_EPOCHS}  train_loss {train_loss / train_len} train_f1 {train_acc / train_len}  val_loss {val_loss / val_len}  val_f1 {avg_val_acc}')

        torch.save(model.state_dict(), f'weight_{epoch}.pth')
        if avg_val_acc > best_acc_val:
            best_acc_val = avg_val_acc
            torch.save(model.state_dict(), f'weight_best.pth')

    return le

le = train_up()

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 0/2  train_loss 2.8371068636576333 train_f1 0.2604166666666667  val_loss 2.256637454032898  val_f1 0.41276041666666663


  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1/2  train_loss 1.5582587122917175 train_f1 0.6536458333333334  val_loss 2.2271311283111572  val_f1 0.3910590277777778


In [None]:
test['label'] = train.label.values[:test.shape[0]]
test

In [None]:
paths = ['/kaggle/working/weight_15.pth',
        '/kaggle/working/weight_16.pth',
         '/kaggle/working/weight_17.pth',
         '/kaggle/working/weight_18.pth',
         '/kaggle/working/weight_19.pth',
         '/kaggle/working/weight_20.pth',
         '/kaggle/working/weight_21.pth',
         '/kaggle/working/weight_best.pth'
        ]

for path_weight_model in paths:
    BATCH_SIZE = 64
    NUM_WORKERS = 8
    IMAGE_SIZE_H, IMAGE_SIZE_W = 350, 350
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    device_ids = [0, 1]

    val_df = test[:]
    val_labels = le.transform(val_df['label'].values)

    albumentations_transform_validate = get_val_augmentations(IMAGE_SIZE_H, IMAGE_SIZE_W)

    validate_data = Imagewoof(dataframe=val_df,
                              labels=val_labels,
                              path='input',
                              transform=albumentations_transform_validate)
    validate_loader = DataLoader(dataset=validate_data,
                                 batch_size=BATCH_SIZE,
                                 num_workers=NUM_WORKERS,
                                 shuffle=False,
                                 drop_last=False)


    model = models.resnet50(pretrained=False)
    model.fc = nn.Linear(2048, n_classes)
    # model = nn.DataParallel(model, device_ids=device_ids, output_device=device)
    checkpoint = torch.load(path_weight_model, map_location=torch.device('cpu'))
    model.load_state_dict(checkpoint)
    model.to(device)


    pred_all = []
    model.eval()
    val_len = len(validate_loader)
    for i, (imgs, labels) in tqdm(enumerate(validate_loader), total=val_len):
        with torch.no_grad():
            imgs_vaild, labels_vaild = imgs.to(device), labels.to(device)
            output_test = model(imgs_vaild)
            pred = torch.argmax(torch.softmax(output_test, 1), 1).cpu().detach().numpy()
            pred_all.extend(list(pred))

    test_ = test.copy()[:]
    test_['label'] = le.inverse_transform(pred_all)
    test_[['image_id', 'label']].to_csv(f'sub_{path_weight_model.split("/")[-1][:-4]}_resnet50_22ep_64b_0003lr_350r_8020_128r.csv', index=False)

### Для блендинга

In [None]:
# import pandas as pd
# import sys
# import os


# df = pd.DataFrame()

# for sub in os.listdir('submit'):
#     df = df.append(pd.read_csv('submit/' + sub))

# tmp = df.groupby('image_id', sort=False).label.agg(list).reset_index()
# tmp.label = tmp.label.apply(lambda x: max(x, key=x.count))
# tmp.to_csv('sub_blend_31.csv', index=False)