In [1]:
import random
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.optim as optim
import torchvision
import torchvision.transforms as tt
from torchvision.io import read_image
from sklearn.preprocessing import LabelEncoder

from PIL import Image
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision.models import ResNet18_Weights
from tqdm.notebook import tqdm
from torchvision import models as vision_models
import timm
from torchvision.utils import make_grid
from torchvision.io import decode_image
from pathlib import Path
import torchvision.transforms.functional as F
from torch.optim.lr_scheduler import StepLR

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [None]:
import os
import shutil
import pandas as pd

source_dir = r"journey-springfield\train"
target_dir = os.path.join(os.path.dirname(source_dir), "dataset", "images")
csv_path = os.path.join(os.path.dirname(source_dir), "dataset", "labels.csv")

# Создаем целевую директорию, если её нет
os.makedirs(target_dir, exist_ok=True)

# Массив для хранения данных
data = []

# Перебираем классы в исходной директории
for class_name in os.listdir(source_dir):
    class_path = os.path.join(source_dir, class_name)
    
    if not os.path.isdir(class_path): 
        continue

    # Перебираем изображения в каждом классе
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        new_img_path = os.path.join(target_dir, img_name)

        # Проверяем, существует ли уже файл с таким именем
        if os.path.exists(new_img_path):
            base, ext = os.path.splitext(img_name)
            new_img_name = f"{base}_{class_name}{ext}"
            new_img_path = os.path.join(target_dir, new_img_name)

        # Перемещаем изображение в целевую директорию
        shutil.move(img_path, new_img_path)

        # Добавляем путь к изображению и класс в список
        rel_path = os.path.relpath(new_img_path, os.path.dirname(csv_path))
        data.append([rel_path, class_name])

# Создаем DataFrame и сохраняем его в CSV
df = pd.DataFrame(data, columns=["image_path", "class"])
df.to_csv(csv_path, index=False)

print(f"Готово! Все изображения перемещены в {target_dir}, CSV создан по пути {csv_path}.")

In [3]:
le1 = LabelEncoder()
data = pd.read_csv(r'journey-springfield\dataset\labels.csv')
data['class_id'] = le1.fit_transform(data['class'])
data['class_id'] = data['class_id'].astype('int64')
train, val = train_test_split(data, test_size=0.15, random_state=1, stratify=data['class'])

train = train.reset_index(drop=True)
val = val.reset_index(drop=True)

print(train.shape, val.shape)

(17793, 3) (3140, 3)


In [4]:
images_path = r'journey-springfield\dataset'
data['unified_class'] = data['class']
train['unified_class'] = train['class']
train = train.drop('class',axis = 1)
val['unified_class'] = val['class']
val = val.drop('class',axis = 1)

In [5]:
class EfficientNet(nn.Module):
    def __init__(self, num_classes: int):
        super().__init__()
        self.model = vision_models.efficientnet_b1(vision_models.EfficientNet_B1_Weights.DEFAULT)
        self.model.classifier[1] = torch.nn.Linear(self.model.classifier[1].in_features, num_classes)

    def forward(self, batch):
        inputs, _ = batch
        return self.model(inputs)

In [6]:
class SimpDataset(Dataset):
    def __init__(self, dataframe: pd.DataFrame, path_to_images: Path, transforms: tt.Compose) -> None:
        self.df = dataframe
        self.path_to_images = path_to_images
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        # print(row)
        image = Image.open(self.path_to_images + '\\' + row["image_path"]).convert('RGB')
        # print(image)
        if self.transforms is not None:
            image = self.transforms(image)
        return image, row["class_id"]

In [7]:
idx = 1
img = read_image(images_path +'\\'+ data.iloc[idx]["image_path"])

In [8]:
rescale_size = 244
# Imagenet mean and standard (are calculated from all of images)
imagenet_mean = np.array([0.485, 0.456, 0.406])
imagenet_std = np.array([0.229, 0.224, 0.225])

In [9]:
train_transform = tt.Compose([
    tt.Resize((int(rescale_size * 1.25), int(rescale_size * 1.25))),
    tt.RandomCrop(rescale_size),
    tt.RandomHorizontalFlip(),
    tt.ToTensor(),
    tt.Normalize(imagenet_mean, imagenet_std)
])

val_transform = tt.Compose([
    tt.Resize((int(rescale_size * 1.05), int(rescale_size * 1.05))),
    tt.CenterCrop(rescale_size),
    tt.ToTensor(),
    tt.Normalize(imagenet_mean, imagenet_std)
])

train_dataset = SimpDataset(train, images_path, transforms=train_transform)
val_dataset = SimpDataset(val, images_path, transforms=val_transform)


train_dataloader = DataLoader(train_dataset, batch_size=64, num_workers=0, shuffle=True)
valid_dataloader = DataLoader(val_dataset, batch_size=64, num_workers=0, shuffle=False)
train_dataloader = DataLoader(train_dataset, batch_size=64,  shuffle=True)
valid_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=False)

In [10]:
next(iter(train_dataloader))[1]

tensor([20, 18, 26, 15, 32, 39, 22, 27,  6,  4,  4, 36, 17, 22, 16, 37, 25,  4,
        25, 29, 32, 15, 20, 20, 15, 20, 27,  4,  6, 24,  4, 22, 25,  6, 20, 28,
        22, 17,  0,  3, 16, 32,  4, 17, 24, 37, 22, 32, 18, 22, 17, 28, 32, 15,
        15, 28,  4, 15, 27, 37,  0, 15,  4, 32])

In [11]:
model = EfficientNet(num_classes=data["unified_class"].nunique()).to(device)


# Инициализируем функцию потерь (loss/criterion), а так же оптимизатор, который будет регулировать обновление весов нашей модели
optimizer = optim.AdamW(model.parameters(), lr=3e-4)
criterion = nn.CrossEntropyLoss()

# Переменные для визуализации метрик и функции потерь
train_losses = []
val_losses = []

# Для удобства оценивать качество модели будем той же метрику, что на лидерборде - F1 score
train_f1_scores = []
val_f1_scores = []

best_val_f1 = 0.0
best_model_path = 'best_model.pth'

# Определим, сколько раз мы пройдёмся по всему датасету, прежде, чем закончим обучение модели и выберем лучшую версию
num_epochs = 25

# Шаговое уменьшение (StepLR)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)  # Каждые 5 эпох уменьшать lr в 10 раз

# Напишем свой train_loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    train_true = []
    train_pred = []

    for batch in tqdm(train_dataloader):
        inputs, labels = batch
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model((inputs, labels))
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        preds = torch.argmax(outputs, dim=1)

    scheduler.step()
    model.eval()
    val_running_loss = 0.0
    val_true = []
    val_pred = []

    # валидационный цикл, когда мы оцениваем качество работы модели на отложенной выборке
    with torch.no_grad():
        for batch in tqdm(valid_dataloader):
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model((inputs, labels))
            loss = criterion(outputs, labels)

            val_running_loss += loss.item()

            preds = torch.argmax(outputs, dim=1)
            val_true.extend(labels.cpu().numpy())
            val_pred.extend(preds.cpu().numpy())

    val_f1 = f1_score(val_true, val_pred, average='macro')
    val_losses.append(val_running_loss / len(valid_dataloader))
    val_f1_scores.append(val_f1)

    # если получившаяся модель лучше предыдущей, сохраним чекпоинт
    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), best_model_path)
        print(f'New best model saved with F1: {best_val_f1:.4f}')


    # выведем в консоль получившиеся результаты на отдельной эпохе
    print(f'Epoch [{epoch+1}/{num_epochs}], '
          f'Val Loss: {val_losses[-1]:.4f}, Val F1: {val_f1:.4f}')



  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

New best model saved with F1: 0.6996
Epoch [1/25], Val Loss: 0.2245, Val F1: 0.6996


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

New best model saved with F1: 0.8477
Epoch [2/25], Val Loss: 0.1652, Val F1: 0.8477


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

New best model saved with F1: 0.8604
Epoch [3/25], Val Loss: 0.1338, Val F1: 0.8604


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

New best model saved with F1: 0.8996
Epoch [4/25], Val Loss: 0.0986, Val F1: 0.8996


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

New best model saved with F1: 0.9166
Epoch [5/25], Val Loss: 0.1089, Val F1: 0.9166


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

New best model saved with F1: 0.9264
Epoch [6/25], Val Loss: 0.0989, Val F1: 0.9264


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [7/25], Val Loss: 0.0943, Val F1: 0.9254


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

New best model saved with F1: 0.9274
Epoch [8/25], Val Loss: 0.0960, Val F1: 0.9274


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [9/25], Val Loss: 0.0987, Val F1: 0.9038


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [10/25], Val Loss: 0.0985, Val F1: 0.9261


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [11/25], Val Loss: 0.0983, Val F1: 0.9269


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

New best model saved with F1: 0.9289
Epoch [12/25], Val Loss: 0.0949, Val F1: 0.9289


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [13/25], Val Loss: 0.0961, Val F1: 0.9275


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

New best model saved with F1: 0.9318
Epoch [14/25], Val Loss: 0.0956, Val F1: 0.9318


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [15/25], Val Loss: 0.0951, Val F1: 0.9012


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [16/25], Val Loss: 0.0958, Val F1: 0.9056


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [17/25], Val Loss: 0.0956, Val F1: 0.9281


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [18/25], Val Loss: 0.0986, Val F1: 0.9272


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [19/25], Val Loss: 0.0947, Val F1: 0.9284


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [20/25], Val Loss: 0.0974, Val F1: 0.9309


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [21/25], Val Loss: 0.0943, Val F1: 0.9291


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

New best model saved with F1: 0.9336
Epoch [22/25], Val Loss: 0.0955, Val F1: 0.9336


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [23/25], Val Loss: 0.0962, Val F1: 0.9264


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [24/25], Val Loss: 0.0967, Val F1: 0.9283


  0%|          | 0/279 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [25/25], Val Loss: 0.0983, Val F1: 0.9272


In [12]:
sample = pd.read_csv(r"journey-springfield\sample_submission.csv")
sample['image_name'] = r'journey-springfield/testset/'+sample['Id']

In [13]:
class InferenceDataset(Dataset):
    def __init__(self, image_paths, transforms=None):
        self.image_paths = image_paths
        self.transforms = transforms

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        if self.transforms is not None:
            image = self.transforms(image)
        return image, image_path


# Тут важно не ошибиться и не использовать тренировочные трансформы
infer_transform = tt.Compose([
    tt.RandomHorizontalFlip(),
    tt.RandomRotation((-5, 5)),
    tt.Resize((int(244 * 1.25), int(244 * 1.25))),
    tt.ToTensor(),
    tt.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Найдем все тестовые картинки
test_image_paths = sample.image_name.tolist()

infer_dataset = InferenceDataset(test_image_paths, transforms=infer_transform)
infer_dataloader = DataLoader(infer_dataset, batch_size=1, shuffle=False)

best_model_path = r'best_model.pth'
model.load_state_dict(torch.load(best_model_path))

# Не забудем перевести модель в режим предсказания, а не обучения.
model.eval()

# Для ускорения инференса будем подавать в модель картинки батчами (по несколько картинок за раз) и сохраним предсказанные метки классов.
results = []
for images, image_names in tqdm(infer_dataloader):
    images = images.to(device)

    with torch.no_grad():
        outputs = model((images, None)) #для не хагина
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        
        results.append(preds[0])


# Для удобства объединим все пары "имя файла - предсказанный класс" в датафрейм (таблицу) с колонками image_name, predicted_class
sample['predicted_class'] = results

# Вывод DataFrame
sample

  model.load_state_dict(torch.load(best_model_path))


  0%|          | 0/991 [00:00<?, ?it/s]

Unnamed: 0,Id,Expected,image_name,predicted_class
0,img0.jpg,bart_simpson,journey-springfield/testset/img0.jpg,29
1,img1.jpg,bart_simpson,journey-springfield/testset/img1.jpg,4
2,img2.jpg,bart_simpson,journey-springfield/testset/img2.jpg,24
3,img3.jpg,bart_simpson,journey-springfield/testset/img3.jpg,29
4,img4.jpg,bart_simpson,journey-springfield/testset/img4.jpg,20
...,...,...,...,...
986,img986.jpg,bart_simpson,journey-springfield/testset/img986.jpg,37
987,img987.jpg,bart_simpson,journey-springfield/testset/img987.jpg,29
988,img988.jpg,bart_simpson,journey-springfield/testset/img988.jpg,28
989,img989.jpg,bart_simpson,journey-springfield/testset/img989.jpg,6


In [14]:
sample['Expected'] = le1.inverse_transform(sample['predicted_class'])
sample.drop(['image_name','predicted_class'],axis=1,inplace=True)

In [15]:
sample.to_csv('submit.csv',index = False)