# Name: Roman_Kaderov_529383613


### Установка зависимостей

In [None]:
import torch
import numpy as np

train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

In [None]:
import PIL
print(PIL.__version__)

In [None]:
!nvidia-smi
import torch
torch.cuda.is_available()

In [None]:
import pickle
import pandas as pd
import numpy as np
import random
import os
import copy
from skimage import io
from tqdm.autonotebook import tqdm, trange
from PIL import Image
from pathlib import Path

from torchvision import datasets, models, transforms
import torch.nn.functional as F
from torch.optim import lr_scheduler
import torch.optim as optim
from multiprocessing.pool import ThreadPool
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

import matplotlib.patches as patches
from matplotlib.font_manager import FontProperties
from matplotlib import colors, pyplot as plt
%matplotlib inline
import time

import warnings
warnings.filterwarnings(action='ignore', category=DeprecationWarning)


In [None]:

DATA_MODES = ['train', 'val', 'test']

RESCALE_SIZE = 224

DEVICE = torch.device("cuda")

https://jhui.github.io/2018/02/09/PyTorch-Data-loading-preprocess_torchvision/


In [None]:
class SimpsonsDataset(Dataset):
    """
    Датасет с картинками, который паралельно подгружает их из папок
    производит скалирование и превращение в торчевые тензоры
    """
    def __init__(self, files, mode):
        super().__init__()
        # список файлов для загрузки
        self.files = sorted(files)
        # режим работы
        self.mode = mode

        if self.mode not in DATA_MODES:
            print(f"{self.mode} is not correct; correct modes: {DATA_MODES}")
            raise NameError

        self.len_ = len(self.files)

        self.label_encoder = LabelEncoder()

        if self.mode != 'test':
            self.labels = [path.parent.name for path in self.files]
            self.label_encoder.fit(self.labels)

            with open('label_encoder.pkl', 'wb') as le_dump_file:
                  pickle.dump(self.label_encoder, le_dump_file)

    def __len__(self):
        return self.len_

    def load_sample(self, file):
        image = Image.open(file)
        image.load()
        return image

    def __getitem__(self, index):
        # для преобразования изображений в тензоры PyTorch и нормализации входа
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        x = self.load_sample(self.files[index])
        x = self._prepare_sample(x)
        x = np.array(x / 255, dtype='float32')
        x = transform(x)
        if self.mode == 'test':
            return x
        else:
            label = self.labels[index]
            label_id = self.label_encoder.transform([label])
            y = label_id.item()
            return x, y

    def _prepare_sample(self, image):
        image = image.resize((RESCALE_SIZE, RESCALE_SIZE))
        return np.array(image)

In [None]:
def imshow(inp, title=None, plt_ax=plt, default=False):
    """Imshow для тензоров"""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt_ax.imshow(inp)
    if title is not None:
        plt_ax.set_title(title)
    plt_ax.grid(False)

In [None]:
#определим директории с тренировочными и тестовыми файлами
TRAIN_DIR = Path('/kaggle/input/journey-springfield/train/')
TEST_DIR = Path('/kaggle/input/journey-springfield/testset')

In [None]:
train_val_files = sorted(list(TRAIN_DIR.rglob('*.jpg')))
test_files = sorted(list(TEST_DIR.rglob('*.jpg')))

In [None]:
from sklearn.model_selection import train_test_split

train_val_labels = [path.parent.name for path in train_val_files]
train_files, val_files = train_test_split(train_val_files, test_size=0.25, \
                                          stratify=train_val_labels)

In [None]:
val_dataset = SimpsonsDataset(val_files, mode='val')

Давайте посмотрим на наших героев внутри датасета.

In [None]:
fig, ax = plt.subplots(nrows=3, ncols=3,figsize=(8, 8), \
                        sharey=True, sharex=True)
plt.subplots_adjust(wspace=0.5, hspace=0.5)
for fig_x in ax.flatten():
    random_characters = int(np.random.uniform(0,1000))
    im_val, label = val_dataset[random_characters]
    img_label = " ".join(map(lambda x: x.capitalize(),\
                val_dataset.label_encoder.inverse_transform([label])[0].split('_')))
    imshow(im_val.data.cpu(), \
          title=img_label,plt_ax=fig_x)

Можете добавить ваши любимые сцены и классифицировать их. (веселые результаты можно кидать в чат)

### Построение нейросети

Запустить данную сеть будет вашим мини-заданием на первую неделю, чтобы было проще участвовать в соревновании.

Данная архитектура будет очень простой и нужна для того, чтобы установить базовое понимание и получить простенький сабмит на Kaggle

<!-- Здесь вам предлагается дописать сверточную сеть глубины 4/5.  -->

*Описание слоев*:



1. размерность входа: 3x224x224
2.размерности после слоя:  8x111x111
3. 16x54x54
4. 32x26x26
5. 64x12x12
6. выход: 96x5x5


In [None]:
# Очень простая сеть
class SimpleCnn(nn.Module):

    def __init__(self, n_classes):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv5 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=96, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.out = nn.Linear(96 * 5 * 5, n_classes)


    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)

        x = x.view(x.size(0), -1)
        logits = self.out(x)
        return logits

In [None]:
def fit_epoch(model, train_loader, criterion, optimizer):
    running_loss = 0.0
    running_corrects = 0
    processed_data = 0

    for inputs, labels in train_loader:
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        preds = torch.argmax(outputs, 1)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        processed_data += inputs.size(0)

    train_loss = running_loss / processed_data
    train_acc = running_corrects.cpu().numpy() / processed_data
    return train_loss, train_acc

In [None]:
def eval_epoch(model, val_loader, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    processed_size = 0

    for inputs, labels in val_loader:
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)

        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            preds = torch.argmax(outputs, 1)

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        processed_size += inputs.size(0)
    val_loss = running_loss / processed_size
    val_acc = running_corrects.double() / processed_size
    return val_loss, val_acc

In [None]:
def train(train_files, val_files, model, epochs, batch_size):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    history = []
    log_template = "\nEpoch {ep:03d} train_loss: {t_loss:0.4f} \
    val_loss {v_loss:0.4f} train_acc {t_acc:0.4f} val_acc {v_acc:0.4f}"

    with tqdm(desc="epoch", total=epochs) as pbar_outer:
        opt = torch.optim.Adam(model.parameters())
        criterion = nn.CrossEntropyLoss()

        for epoch in range(epochs):
            train_loss, train_acc = fit_epoch(model, train_loader, criterion, opt)
            print("loss", train_loss)

            val_loss, val_acc = eval_epoch(model, val_loader, criterion)
            history.append((train_loss, train_acc, val_loss, val_acc))

            pbar_outer.update(1)
            tqdm.write(log_template.format(ep=epoch+1, t_loss=train_loss,\
                                           v_loss=val_loss, t_acc=train_acc, v_acc=val_acc))

    return history

In [None]:
def predict(model, test_loader):
    with torch.no_grad():
        logits = []

        for inputs in test_loader:
            inputs = inputs.to(DEVICE)
            model.eval()
            outputs = model(inputs).cpu()
            logits.append(outputs)

    probs = F.softmax(torch.cat(logits), dim=-1).numpy()
    return probs

In [None]:
n_classes = len(np.unique(train_val_labels))
simple_cnn = SimpleCnn(n_classes).to(DEVICE)
model = simple_cnn
print("we will classify :{}".format(n_classes))
print(simple_cnn)

Запустим обучение сети.

In [None]:
if val_dataset is None:
    val_dataset = SimpsonsDataset(val_files, mode='val')

train_dataset = SimpsonsDataset(train_files, mode='train')

In [None]:
history = train(train_dataset, val_dataset, model=simple_cnn, epochs=5, batch_size=64)

Построим кривые обучения

In [None]:
loss, acc, val_loss, val_acc = zip(*history)

In [None]:
plt.figure(figsize=(15, 9))
plt.plot(loss, label="train_loss")
plt.plot(val_loss, label="val_loss")
plt.legend(loc='best')
plt.xlabel("epochs")
plt.ylabel("loss")
plt.show()

Хорошо бы понять, как сделать сабмит.
У нас есть сеть и методы eval у нее, которые позволяют перевести сеть в режим предсказания. Стоит понимать, что у нашей модели на последнем слое стоит softmax, которые позволяет получить вектор вероятностей  того, что объект относится к тому или иному классу. Давайте воспользуемся этим.

In [None]:
def predict_one_sample(model, inputs, device=DEVICE):
    """Предсказание, для одной картинки"""
    with torch.no_grad():
        inputs = inputs.to(device)
        model.eval()
        logit = model(inputs).cpu()
        probs = torch.nn.functional.softmax(logit, dim=-1).numpy()
    return probs

In [None]:
random_characters = int(np.random.uniform(0,1000))
ex_img, true_label = val_dataset[random_characters]
probs_im = predict_one_sample(simple_cnn, ex_img.unsqueeze(0))

In [None]:
idxs = list(map(int, np.random.uniform(0,1000, 20)))
imgs = [val_dataset[id][0].unsqueeze(0) for id in idxs]

probs_ims = predict(simple_cnn, imgs)

In [None]:
label_encoder = pickle.load(open("label_encoder.pkl", 'rb'))

In [None]:
y_pred = np.argmax(probs_ims,-1)

actual_labels = [val_dataset[id][1] for id in idxs]

preds_class = [label_encoder.classes_[i] for i in y_pred]

Обратите внимание, что метрика, которую необходимо оптимизировать в конкурсе --- f1-score. Вычислим целевую метрику на валидационной выборке.

In [None]:
f1 = f1_score(actual_labels, y_pred, average='micro')

print("F1-оценка:", f1)


### Submit на Kaggle

In [None]:
test_dataset = SimpsonsDataset(test_files, mode="test")
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=64)
probs = predict(simple_cnn, test_loader)


preds = label_encoder.inverse_transform(np.argmax(probs, axis=1))
test_filenames = [path.name for path in test_dataset.files]


In [None]:
#my_submit = pd.DataFrame({'Id': test_filenames, 'Expected': preds})
#my_submit.to_csv('/kaggle/working/simple_nn_submission.csv', index=False)

# efficientnet_b2

In [None]:
torch.cuda.empty_cache()

Посмотрим на сбалансированность классов

In [None]:
count = pd.Series(train_val_labels).value_counts()
print(count)

Видно, что классы сильно несбалансированны. Требуется сделать ауггментацию, будем использовать
* RandomHorizontalFlip(отзеракаливание картинки)
* RandomRotation(поворот картинки)



Посчитаем сколько картинок для каждого класса нам нужно добавить.
* В data['to_add'] будет лежать количество которое нужно добавить в класс
* В data['one_iteration'] будет лежать количество которое нужно добавить за одну итерацию

In [None]:
data = count.to_frame('count')
data['to_add'] = data['count'].apply(lambda a: 1400 - a if a < 1400 else 0)
data['one_iteration'] = data.apply(lambda a: int(a['to_add'] / a['count']), axis=1)

In [None]:
data

In [None]:
def load_image(file):
    image = Image.open(file)
    image.load()
    return image

In [None]:
augmenters = {
    'Crop': transforms.Compose([
                                transforms.CenterCrop(size=300),
                                transforms.RandomCrop(250)
                                ]),
    'Rotate': transforms.RandomRotation(degrees=(-25, 25)),
    'HFlip': transforms.RandomHorizontalFlip(p=1)
}

Скопируем данные в выходную директорию, чтобы мы могли сохранять картинки

In [None]:
!cp -r "../input/journey-springfield" "./"

In [None]:
TRAIN_DIR = Path('/kaggle/working/journey-springfield/train/simpsons_dataset')
train_val_files = sorted(list(TRAIN_DIR.rglob('*.jpg')))

for image_path in tqdm(train_val_files):
    path = image_path.parents[0]
    character = image_path.parent.name
    img = load_image(image_path)
    if data.loc[character]['to_add'] <= 0:
        continue
    # если количество изображений, которые нужно создать за одну итерацию, больше, чем остальные изображения, которые нужно создать,
    # мы должны использовать «to_add» вместо «one_iteration»
    if data.loc[character]['one_iteration'] > data.loc[character]['to_add']:
        iter_size = data.loc[character]['to_add']
    else:
        iter_size = data.loc[character]['one_iteration']
    data.loc[character]['to_add'] -= iter_size

    for i in range(iter_size):
        augmenter = random.choice(list(augmenters.values()))
        aug_img = augmenter(img)
        aug_img.save(f"{path}/{image_path.name.split('.')[0]}_{i}.jpg")

In [None]:
TRAIN_DIR = Path('/kaggle/working/journey-springfield/train/simpsons_dataset')
aug_files = list(TRAIN_DIR.rglob('*.jpg'))
aug_labels = [path.parent.name for path in aug_files]
aug_counts = pd.Series(aug_labels).value_counts()
print(aug_counts)

Не идеально,но лучше чем ничего

# Обучение

In [None]:
train_files_aug, val_files_aug = train_test_split(aug_files, test_size=0.25, \
                                          stratify=aug_labels)
train_dataset_aug = SimpsonsDataset(train_files_aug, mode='train')
val_dataset_aug = SimpsonsDataset(val_files_aug, mode='val')
BATCH_SIZE = 32

In [None]:
train_dataloader = torch.utils.data.DataLoader(train_dataset_aug , shuffle=True, batch_size=BATCH_SIZE, num_workers=2)
val_dataloader = torch.utils.data.DataLoader(val_dataset_aug, shuffle=False, batch_size=BATCH_SIZE, num_workers=2)

In [None]:
datasets = {'train': train_dataset_aug,
            'val': val_dataset_aug}

In [None]:
loaders = {'train': train_dataloader,
           'val': val_dataloader}

In [None]:
def fit_model(model, criterion, optimizer, sheduler, n_epochs):
    model.to(DEVICE)

    losses = {"train": [], "val": []}
    acc = {"train": [], "val": []}

    best_acc = 0
    best_model_state = None

    for epoch_num in trange(n_epochs, desc="Epoch:"):
        since = time.time()

        print('Epoch {}/{}'.format(epoch_num+1, n_epochs))


        for phase in ['train', 'val']:
            epoch_loss = 0
            epoch_acc = 0

            for batch, labels in loaders[phase]:
                batch = batch.to(DEVICE)
                labels = labels.to(DEVICE)

                if phase == 'train':
                    model.train()
                    model.zero_grad()
                    outp = model(batch)
                    loss = criterion(outp, labels)
                    loss.backward()
                    optimizer.step()
                elif phase == "val":
                    model.eval()
                    with torch.no_grad():
                        outp = model(batch.cuda())
                        loss = criterion(outp, labels)

                epoch_loss += loss.item()/len(datasets[phase])
                y_pred = torch.argmax(outp, -1)
                epoch_acc += int(torch.sum(y_pred == labels).cpu())/len(datasets[phase])

            losses[phase].append(epoch_loss)
            acc[phase].append(epoch_acc)

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_state = copy.deepcopy(model.state_dict())

            if phase == 'train':
                print('Train loss: {:.4f} Train acc: {:.4f}'.format(epoch_loss, epoch_acc))

            if phase == 'val':
                print('Val loss: {:.4f} Val acc: {:.4f}'.format(epoch_loss, epoch_acc))
                print('-' * 10)
        sheduler.step()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    model.load_state_dict(best_model_state)

    return model, (losses, acc)

In [None]:
#model_extractor = models.vgg19(pretrained=True)
#model_extractor = models.resnet34(weights=None)
model_extractor =  models.efficientnet_b2(pretrained=True)
n_classes = len(np.unique(train_val_labels))


layers_to_unfreeze = 6

# Выключаем подсчет градиентов для слоев, которые не будем обучать
for param in model_extractor.features[:-layers_to_unfreeze].parameters():
    param.requires_grad = False


# Заменяем Fully-Connected слой
model_extractor.classifier = nn.Sequential(
    nn.Linear(1408, 256),
    nn.BatchNorm1d(256),
    nn.ReLU(),
    nn.Linear(256, 42)
)

model_extractor = model_extractor.cuda()

# В качестве loss function используем кросс-энтропию
loss_fn = nn.CrossEntropyLoss()

# Обучаем только классификатор
optimizer = torch.optim.AdamW(model_extractor.parameters(), lr=0.001)

# Умножает learning_rate на 0.1 каждые 7 эпох (это одна из эвристик, не было на лекциях)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
model, metrics = fit_model(model_extractor, loss_fn, optimizer, exp_lr_scheduler, 20)

In [None]:
label_encoder = pickle.load(open("label_encoder.pkl", 'rb'))

In [None]:
def predict(model, test_loader):
    with torch.no_grad():
        logits = []

        for inputs in test_loader:
            inputs = inputs.to(DEVICE)
            model.eval()
            outputs = model(inputs).cpu()
            logits.append(outputs)

    probs = F.softmax(torch.cat(logits), dim=-1).numpy()
    return probs

In [None]:
test_dataset = SimpsonsDataset(test_files, mode="test")
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=64)
probs = predict(model, test_loader)


preds = label_encoder.inverse_transform(np.argmax(probs, axis=1))
test_filenames = [path.name for path in test_dataset.files]

In [None]:
my_submit = pd.DataFrame({'Id': test_filenames, 'Expected': preds})
my_submit.to_csv('/kaggle/working/homework_simpsons.csv', index=False)

# Итоговая точность 0.99787
# Name: Roman_Kaderov_529383613
