In [11]:
import random
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.optim as optim
import torchvision
import torchvision.transforms as tt
from torchvision.io import read_image
from sklearn.preprocessing import LabelEncoder

from PIL import Image
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision.models import ResNet18_Weights
from tqdm.notebook import tqdm
from torchvision import models as vision_models
import timm
from torchvision.utils import make_grid
from torchvision.io import decode_image
from pathlib import Path
import torchvision.transforms.functional as F
from torch.optim.lr_scheduler import StepLR


# Зафиксируем сиды, чтобы обучение было воспроизводимым.
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(1001)

In [12]:
import torch

if torch.cuda.is_available():
    print("Все хорошо, установлена версия с поддержкой видеокарт")
else:
    print("Что-то не так, стоит torch с поддержкой только CPU (если у вас MacOS или так и задумано, то все нормально.")
device='cuda'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Для обучения выбран девайс {}".format(device))

Все хорошо, установлена версия с поддержкой видеокарт
Для обучения выбран девайс cuda


In [13]:
import os
import shutil
import pandas as pd


source_dir = r"journey-springfield\train"
target_dir = os.path.join(os.path.dirname(source_dir), "dataset", "images")
csv_path = os.path.join(os.path.dirname(source_dir), "dataset", "labels.csv")

os.makedirs(target_dir, exist_ok=True)

data = []


for class_name in os.listdir(source_dir):
    class_path = os.path.join(source_dir, class_name)

    if os.path.isdir(class_path): 
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            new_img_path = os.path.join(target_dir, img_name)


            if os.path.exists(new_img_path):
                base, ext = os.path.splitext(img_name)
                new_img_name = f"{base}_{class_name}{ext}"
                new_img_path = os.path.join(target_dir, new_img_name)


            shutil.move(img_path, new_img_path)


            data.append([os.path.relpath(new_img_path, os.path.dirname(csv_path)), class_name])


df = pd.DataFrame(data, columns=["image_path", "class"])
df.to_csv(csv_path, index=False)

print(f"Готово! Все изображения перемещены в {target_dir}, CSV создан по пути {csv_path}.")


Готово! Все изображения перемещены в journey-springfield\dataset\images, CSV создан по пути journey-springfield\dataset\labels.csv.


In [14]:
le1 = LabelEncoder()
data = pd.read_csv(r'D:\data science\IOAI_DZ\Lesson 5, Homework\ioai-journey-to-springfield\journey-springfield\dataset\labels.csv')
data['class_id'] = le1.fit_transform(data['class'])
data['class_id'] = data['class_id'].astype('int64')
train, val = train_test_split(data, test_size=0.1, random_state=1, stratify=data['class'])

train = train.reset_index(drop=True)
val = val.reset_index(drop=True)

print(train.shape, val.shape)

(18839, 3) (2094, 3)


In [15]:
images_path = r'journey-springfield\dataset'

In [16]:
data['unified_class'] = data['class']

In [17]:
train['unified_class'] = train['class']

In [18]:
train = train.drop('class',axis = 1)

In [19]:
train

Unnamed: 0,image_path,class_id,unified_class
0,images\pic_0432_lisa_simpson.jpg,20,lisa_simpson
1,images\pic_0689_chief_wiggum.jpg,7,chief_wiggum
2,images\pic_0161_apu_nahasapeemapetilon.jpg,2,apu_nahasapeemapetilon
3,images\pic_0867_marge_simpson.jpg,22,marge_simpson
4,images\pic_0891_marge_simpson.jpg,22,marge_simpson
...,...,...,...
18834,images\pic_0551_marge_simpson.jpg,22,marge_simpson
18835,images\pic_0437_principal_skinner.jpg,32,principal_skinner
18836,images\pic_0113_charles_montgomery_burns.jpg,6,charles_montgomery_burns
18837,images\pic_0262_homer_simpson.jpg,15,homer_simpson


In [20]:
val['unified_class'] = val['class']

In [21]:
val = val.drop('class',axis = 1)

In [22]:
val

Unnamed: 0,image_path,class_id,unified_class
0,images\pic_0772_moe_szyslak.jpg,27,moe_szyslak
1,images\pic_0609_moe_szyslak.jpg,27,moe_szyslak
2,images\pic_0219_milhouse_van_houten.jpg,25,milhouse_van_houten
3,images\pic_0366_moe_szyslak.jpg,27,moe_szyslak
4,images\pic_0319.jpg,0,abraham_grampa_simpson
...,...,...,...
2089,images\pic_0398_bart_simpson.jpg,4,bart_simpson
2090,images\pic_0044_carl_carlson.jpg,5,carl_carlson
2091,images\pic_0230_kent_brockman.jpg,16,kent_brockman
2092,images\pic_0474_lisa_simpson.jpg,20,lisa_simpson


In [23]:
class EfficientNet(nn.Module):
    def __init__(self, num_classes: int):
        super().__init__()
        self.model = vision_models.efficientnet_b1(vision_models.EfficientNet_B1_Weights.DEFAULT)
        self.model.classifier[1] = torch.nn.Linear(self.model.classifier[1].in_features, num_classes)

    def forward(self, batch):
        inputs, _ = batch
        return self.model(inputs)

In [24]:
class SimpDataset(Dataset):
    def __init__(self, dataframe: pd.DataFrame, path_to_images: Path, transforms: tt.Compose) -> None:
        self.df = dataframe
        self.path_to_images = path_to_images
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        # print(row)
        image = Image.open(self.path_to_images + '\\' + row["image_path"]).convert('RGB')
        # print(image)
        if self.transforms is not None:
            image = self.transforms(image)
        return image, row["class_id"]

In [25]:
def show(imgs):
    if not isinstance(imgs, list):
        imgs = [imgs]
    fig, axs = plt.subplots(ncols=len(imgs), squeeze=False)
    for i, img in enumerate(imgs):
        img = img.detach()
        img = F.to_pil_image(img)
        axs[0, i].imshow(np.asarray(img))
        axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])

In [26]:
idx = 1
img = read_image(images_path +'\\'+ data.iloc[idx]["image_path"])

In [29]:
train_transform = tt.Compose([
    # tt.RandomGrayscale(p=0.5),
    tt.RandomHorizontalFlip(),
    tt.RandomCrop(224),
    tt.RandomRotation((-5, 5)),
    tt.Resize((int(244 * 1.25), int(244 * 1.25))),
    tt.ToTensor(),
    tt.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

val_transform = tt.Compose([
    tt.RandomHorizontalFlip(),
    tt.RandomCrop(224),
    tt.RandomRotation((-5, 5)),
    tt.Resize((int(244 * 1.25), int(244 * 1.25))),
    tt.ToTensor(),
    tt.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = SimpDataset(train, images_path, transforms=train_transform)
val_dataset = SimpDataset(val, images_path, transforms=val_transform)


train_dataloader = DataLoader(train_dataset, batch_size=64, num_workers=0, shuffle=True)
valid_dataloader = DataLoader(val_dataset, batch_size=64, num_workers=0, shuffle=False)
train_dataloader = DataLoader(train_dataset, batch_size=64,  shuffle=True)
valid_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=False)

In [30]:
next(iter(train_dataloader))[1]

tensor([ 6,  0, 20, 28,  5, 27, 15, 32, 32, 15,  5, 16, 17,  4, 32,  6,  4,  9,
        22, 25, 20,  9, 20,  2, 29, 25,  9, 28,  6,  6,  0, 27, 15, 35, 27, 28,
        32,  6, 20,  6, 28,  6, 17,  4, 27, 29, 25, 26,  5, 20,  4, 25, 15, 25,
         6, 20,  4, 17,  7, 20, 28, 15, 32,  4])

In [31]:
torch.cuda.empty_cache()

In [32]:
# Напишем код для обучения нашей нейронной сети:
# model = ResNet18(num_classes=data["unified_class"].nunique()).to(device)
model = EfficientNet(num_classes=data["unified_class"].nunique()).to(device)
# model = Megadescriptor(num_classes=data["unified_class"].nunique()).to(device)


# Инициализируем функцию потерь (loss/criterion), а так же оптимизатор, который будет регулировать обновление весов нашей модели
optimizer = optim.AdamW(model.parameters(), lr=3e-4)
criterion = nn.CrossEntropyLoss()

# Переменные для визуализации метрик и функции потерь
train_losses = []
val_losses = []

# Для удобства оценивать качество модели будем той же метрику, что на лидерборде - F1 score
train_f1_scores = []
val_f1_scores = []

best_val_f1 = 0.0
best_model_path = 'best_model.pth'

# Определим, сколько раз мы пройдёмся по всему датасету, прежде, чем закончим обучение модели и выберем лучшую версию
num_epochs = 50

# Шаговое уменьшение (StepLR)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)  # Каждые 5 эпох уменьшать lr в 10 раз

# Напишем свой train_loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    train_true = []
    train_pred = []

    for batch in tqdm(train_dataloader):
        inputs, labels = batch
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model((inputs, labels))
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        preds = torch.argmax(outputs, dim=1)
        # train_true.extend(labels.cpu().numpy())
        # train_pred.extend(preds.cpu().numpy())

    # train_f1 = f1_score(train_true, train_pred, average='macro')
    # train_losses.append(running_loss / len(train_dataloader))
    # train_f1_scores.append(train_f1)

    scheduler.step()
    model.eval()
    val_running_loss = 0.0
    val_true = []
    val_pred = []

    # валидационный цикл, когда мы оцениваем качество работы модели на отложенной выборке
    with torch.no_grad():
        for batch in tqdm(valid_dataloader):
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model((inputs, labels))
            loss = criterion(outputs, labels)

            val_running_loss += loss.item()

            preds = torch.argmax(outputs, dim=1)
            val_true.extend(labels.cpu().numpy())
            val_pred.extend(preds.cpu().numpy())

    val_f1 = f1_score(val_true, val_pred, average='macro')
    val_losses.append(val_running_loss / len(valid_dataloader))
    val_f1_scores.append(val_f1)

    # если получившаяся модель лучше предыдущей, сохраним чекпоинт
    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), best_model_path)
        print(f'New best model saved with F1: {best_val_f1:.4f}')


    # выведем в консоль получившиеся результаты на отдельной эпохе
    print(f'Epoch [{epoch+1}/{num_epochs}], '
        #   f'Train Loss: {train_losses[-1]:.4f}, Train F1: {train_f1:.4f}, '
          f'Val Loss: {val_losses[-1]:.4f}, Val F1: {val_f1:.4f}')



  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

New best model saved with F1: 0.4957
Epoch [1/50], Val Loss: 0.6631, Val F1: 0.4957


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

New best model saved with F1: 0.6491
Epoch [2/50], Val Loss: 0.4568, Val F1: 0.6491


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

New best model saved with F1: 0.7064
Epoch [3/50], Val Loss: 0.4282, Val F1: 0.7064


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

New best model saved with F1: 0.7773
Epoch [4/50], Val Loss: 0.3843, Val F1: 0.7773


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch [5/50], Val Loss: 0.4036, Val F1: 0.7734


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

New best model saved with F1: 0.8137
Epoch [6/50], Val Loss: 0.3056, Val F1: 0.8137


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

New best model saved with F1: 0.8185
Epoch [7/50], Val Loss: 0.3436, Val F1: 0.8185


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

New best model saved with F1: 0.8416
Epoch [8/50], Val Loss: 0.3042, Val F1: 0.8416


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch [9/50], Val Loss: 0.3153, Val F1: 0.8151


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch [10/50], Val Loss: 0.3282, Val F1: 0.7935


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch [11/50], Val Loss: 0.3485, Val F1: 0.7867


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch [12/50], Val Loss: 0.3202, Val F1: 0.8046


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch [13/50], Val Loss: 0.3026, Val F1: 0.8186


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch [14/50], Val Loss: 0.3267, Val F1: 0.8357


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch [15/50], Val Loss: 0.3495, Val F1: 0.8039


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

New best model saved with F1: 0.8422
Epoch [16/50], Val Loss: 0.2941, Val F1: 0.8422


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch [17/50], Val Loss: 0.3252, Val F1: 0.8234


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch [18/50], Val Loss: 0.3150, Val F1: 0.8230


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

New best model saved with F1: 0.8527
Epoch [19/50], Val Loss: 0.3026, Val F1: 0.8527


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch [20/50], Val Loss: 0.3122, Val F1: 0.8335


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch [21/50], Val Loss: 0.3010, Val F1: 0.8261


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

Epoch [22/50], Val Loss: 0.3435, Val F1: 0.8085


  0%|          | 0/295 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [33]:
sample = pd.read_csv(r"D:\data science\IOAI_DZ\Lesson 5, Homework\ioai-journey-to-springfield\journey-springfield\sample_submission.csv")


In [34]:
sample

Unnamed: 0,Id,Expected
0,img0.jpg,bart_simpson
1,img1.jpg,bart_simpson
2,img2.jpg,bart_simpson
3,img3.jpg,bart_simpson
4,img4.jpg,bart_simpson
...,...,...
986,img986.jpg,bart_simpson
987,img987.jpg,bart_simpson
988,img988.jpg,bart_simpson
989,img989.jpg,bart_simpson


In [35]:
sample['image_name'] = r'journey-springfield/testset/'+sample['Id']
sample

Unnamed: 0,Id,Expected,image_name
0,img0.jpg,bart_simpson,journey-springfield/testset/img0.jpg
1,img1.jpg,bart_simpson,journey-springfield/testset/img1.jpg
2,img2.jpg,bart_simpson,journey-springfield/testset/img2.jpg
3,img3.jpg,bart_simpson,journey-springfield/testset/img3.jpg
4,img4.jpg,bart_simpson,journey-springfield/testset/img4.jpg
...,...,...,...
986,img986.jpg,bart_simpson,journey-springfield/testset/img986.jpg
987,img987.jpg,bart_simpson,journey-springfield/testset/img987.jpg
988,img988.jpg,bart_simpson,journey-springfield/testset/img988.jpg
989,img989.jpg,bart_simpson,journey-springfield/testset/img989.jpg


In [38]:
class InferenceDataset(Dataset):
    def __init__(self, image_paths, transforms=None):
        self.image_paths = image_paths
        self.transforms = transforms

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        if self.transforms is not None:
            image = self.transforms(image)
        return image, image_path


# Тут важно не ошибиться и не использовать тренировочные трансформы
infer_transform = tt.Compose([
    tt.RandomHorizontalFlip(),
    tt.RandomRotation((-5, 5)),
    tt.Resize((int(244 * 1.25), int(244 * 1.25))),
    tt.ToTensor(),
    tt.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Найдем все тестовые картинки
test_image_paths = sample.image_name.tolist()

infer_dataset = InferenceDataset(test_image_paths, transforms=infer_transform)
infer_dataloader = DataLoader(infer_dataset, batch_size=1, shuffle=False)


# Инициализируем нашу модель и загрузим в неё лучшие после эксперимента веса
# model = ResNet18(num_classes=data["unified_class"].nunique()).to(device)
# model = EfficientNet(num_classes=data["unified_class"].nunique()).to(device)

best_model_path = r'D:\data science\IOAI_DZ\Lesson 5, Homework\ioai-journey-to-springfield\best_model.pth'
model.load_state_dict(torch.load(best_model_path))

# Не забудем перевести модель в режим предсказания, а не обучения.
model.eval()

# Для ускорения инференса будем подавать в модель картинки батчами (по несколько картинок за раз) и сохраним предсказанные метки классов.
results = []
for images, image_names in tqdm(infer_dataloader):
    images = images.to(device)

    with torch.no_grad():
        outputs = model((images, None)) #для не хагина
        preds = torch.argmax(outputs, dim=1).cpu().numpy()

        # outputs = model(images) # для хагина
        # preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
        
        results.append(preds[0])


# Для удобства объединим все пары "имя файла - предсказанный класс" в датафрейм (таблицу) с колонками image_name, predicted_class
sample['predicted_class'] = results

# Вывод DataFrame
sample

  model.load_state_dict(torch.load(best_model_path))


  0%|          | 0/991 [00:00<?, ?it/s]

Unnamed: 0,Id,Expected,image_name,predicted_class
0,img0.jpg,bart_simpson,journey-springfield/testset/img0.jpg,29
1,img1.jpg,bart_simpson,journey-springfield/testset/img1.jpg,4
2,img2.jpg,bart_simpson,journey-springfield/testset/img2.jpg,24
3,img3.jpg,bart_simpson,journey-springfield/testset/img3.jpg,29
4,img4.jpg,bart_simpson,journey-springfield/testset/img4.jpg,20
...,...,...,...,...
986,img986.jpg,bart_simpson,journey-springfield/testset/img986.jpg,37
987,img987.jpg,bart_simpson,journey-springfield/testset/img987.jpg,29
988,img988.jpg,bart_simpson,journey-springfield/testset/img988.jpg,6
989,img989.jpg,bart_simpson,journey-springfield/testset/img989.jpg,6


In [39]:
sample['Expected'] = le1.inverse_transform(sample['predicted_class'])


In [40]:
sample

Unnamed: 0,Id,Expected,image_name,predicted_class
0,img0.jpg,nelson_muntz,journey-springfield/testset/img0.jpg,29
1,img1.jpg,bart_simpson,journey-springfield/testset/img1.jpg,4
2,img2.jpg,mayor_quimby,journey-springfield/testset/img2.jpg,24
3,img3.jpg,nelson_muntz,journey-springfield/testset/img3.jpg,29
4,img4.jpg,lisa_simpson,journey-springfield/testset/img4.jpg,20
...,...,...,...,...
986,img986.jpg,sideshow_bob,journey-springfield/testset/img986.jpg,37
987,img987.jpg,nelson_muntz,journey-springfield/testset/img987.jpg,29
988,img988.jpg,charles_montgomery_burns,journey-springfield/testset/img988.jpg,6
989,img989.jpg,charles_montgomery_burns,journey-springfield/testset/img989.jpg,6


In [41]:
sample.drop(['image_name','predicted_class'],axis=1,inplace=True)

In [42]:
sample

Unnamed: 0,Id,Expected
0,img0.jpg,nelson_muntz
1,img1.jpg,bart_simpson
2,img2.jpg,mayor_quimby
3,img3.jpg,nelson_muntz
4,img4.jpg,lisa_simpson
...,...,...
986,img986.jpg,sideshow_bob
987,img987.jpg,nelson_muntz
988,img988.jpg,charles_montgomery_burns
989,img989.jpg,charles_montgomery_burns


In [43]:
sample.to_csv('otv.csv',index = False)