In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torchvision
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
from torchvision.io import read_image
from torchmetrics.classification import BinaryAUROC

from PIL import Image
from IPython.display import clear_output

from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score

In [2]:
np.random.seed(0)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

PATH = '/kaggle/input/aaa-comp/avito-auto-moderation'
TRAIN_FILE = 'train_v2.csv'
SAMPLE_SUB_FILE = 'sample_submission_v2.csv'
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  
print(DEVICE)

train = pd.read_csv(os.path.join(PATH, TRAIN_FILE))
sample_submission = pd.read_csv(os.path.join(PATH, SAMPLE_SUB_FILE))

cuda:0


In [3]:
print(train.shape)
train.head(3)

(1239, 2)


Unnamed: 0,image,label
0,1798.jpg,0
1,372.jpg,1
2,124.jpg,0


In [4]:
f'{train.label.mean():.4f}'

'0.2163'

In [5]:
sample_submission.head(3)

Unnamed: 0,image,score
0,474.jpg,0.5
1,1052.jpg,0.5
2,63.jpg,0.5


In [6]:
# submission.to_csv('submission.csv', index=False)

In [7]:
model = torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, 1)
model.to(DEVICE);

MEAN = np.array([0.485, 0.456, 0.406])
STD = np.array([0.229, 0.224, 0.225])
SIZE = 224

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 189MB/s]


In [8]:
class SmokeDataset(Dataset):

    def __init__(self, image_folder, images, labels, mode='predict'):
        self.folder = image_folder
        self.images = images
        self.labels = labels
        self.is_train = (mode == 'fit')
        if self.is_train:
            self.aug_list = [
                T.RandomHorizontalFlip(p=0.5),  # случайный горизонтальный флип
                T.RandomVerticalFlip(p=0.2),    # редкий вертикальный флип
                T.RandomRotation(degrees=15),   # небольшие повороты
                T.ColorJitter(brightness=0.2,   # изменение яркости
                              contrast=0.2,     #           контраста
                              saturation=0.2,   #           насыщенности
                              hue=0.05),        #           оттенка
                T.RandomResizedCrop(SIZE, scale=(0.8, 1.0), ratio=(0.9, 1.1)),  # случайный кроп
            ]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.folder, self.images[idx])
        image = Image.open(img_path).convert('RGB')  # всегда RGB

        # Padding чтобы сделать квадрат
        width, height = image.size
        pad_size = abs(width - height)
        if width > height:
            padding = (0, pad_size // 2, 0, pad_size - pad_size // 2)  # (left, top, right, bottom)
        else:
            padding = (pad_size // 2, 0, pad_size - pad_size // 2, 0)

        # Базовые преобразования
        transform_list = [T.Pad(padding), T.Resize(SIZE)]

        if self.is_train:
            # Аугментации для обучения
            transform_list = transform_list + self.aug_list

        # Финальные преобразования в тензор и нормализация
        transform_list += [
            T.ToTensor(),
            T.Normalize(MEAN, STD)
        ]

        transform = T.Compose(transform_list)
        image = transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float)
        return image, label

In [9]:
X_train, X_val, y_train, y_val = train_test_split(train.image, train.label, test_size=0.1, stratify=train.label)

In [10]:
train_dataset = SmokeDataset(image_folder=PATH, images=X_train.values, labels=y_train.values, mode='fit')
val_dataset = SmokeDataset(image_folder=PATH, images=X_val.values, labels=y_val.values, mode='predict')

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2, pin_memory=True)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=True, num_workers=2, pin_memory=True)

In [11]:
def schedule(step_number, n_steps, breaking_step1=125, wait_steps=300):
    factor = 0.005 ** (1 / (n_steps - breaking_step1 - wait_steps))
    if step_number < breaking_step1:
        return step_number / breaking_step1
    elif step_number < breaking_step1 + wait_steps:
        return 1
    else:
        return (factor ** (step_number - breaking_step1 - wait_steps))

In [12]:
def train_model(model, optim, sched, criterion, train_loader, val_loader, n_epochs):
    train_auc_metric = BinaryAUROC().to(DEVICE)
    val_auc_metric = BinaryAUROC().to(DEVICE)

    for epoch in range(n_epochs):
        # Эпоха
        train_loss = 0
        train_auc_metric.reset()
    
        model.train()
        for i, (x_train, y_train) in enumerate(train_loader):
            optim.zero_grad()
    
            x_train = x_train.to(DEVICE)
            y_train = y_train.to(DEVICE)
    
            outputs = model(x_train).squeeze()  # (batch_size,) - логиты
            loss = criterion(outputs, y_train)
            
            loss.backward()
            optim.step()
            sched.step()
    
            # так как softmax это монотонное преобразование,
            # то, если конечно я не ошибаюсь, его необязательно делать перед подачей в ROC-AUC
            train_auc_metric.update(outputs.detach(), y_train)
            train_loss += loss.item()
    
        train_auc = train_auc_metric.compute()
        train_loss /= len(train_dataloader)
    
        # Валидация
    
        val_loss = 0
        val_auc_metric.reset()
        
        model.eval()
        with torch.no_grad():
            for i, (x_val, y_val) in enumerate(val_loader):
                x_val = x_val.to(DEVICE)
                y_val = y_val.to(DEVICE)
    
                outputs = model(x_val).squeeze()
    
                val_auc_metric.update(outputs, y_val)
                val_loss += criterion(outputs, y_val).item()
    
            val_loss /= len(val_dataloader)
            val_auc = val_auc_metric.compute()
    
        print(f'epoch: {epoch}, TRAIN: [ loss={train_loss:.4f}, auc={train_auc:.4f} ], VAL: [ loss={val_loss:.4f}, auc={val_auc:.4f} ]')

In [13]:
model = torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Sequential(
    nn.Dropout(0.4),
    nn.Linear(model.fc.in_features, 1)
)
model.to(DEVICE);

for param in model.parameters():
    param.requires_grad = False

# Разморозить только классификатор
for param in model.fc.parameters():
    param.requires_grad = True

# первая стадия
optimizer = torch.optim.AdamW(
    model.fc.parameters(),
    lr=1e-3,
    weight_decay=1e-4
)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

criterion = nn.BCEWithLogitsLoss()


train_model(
    model,
    optimizer,
    scheduler,
    criterion,
    train_dataloader,
    val_dataloader,
    n_epochs=10
)

# вторая стадия
for param in model.layer4.parameters():
    param.requires_grad = True

optimizer = torch.optim.AdamW(
    [
        {"params": model.layer4.parameters(), "lr": 1e-4},
        {"params": model.fc.parameters(), "lr": 5e-4},
    ],
    weight_decay=1e-4
)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15)

train_model(
    model,
    optimizer,
    scheduler,
    criterion,
    train_dataloader,
    val_dataloader,
    n_epochs=15
)


epoch: 0, TRAIN: [ loss=0.5987, auc=0.4758 ], VAL: [ loss=0.6042, auc=0.2776 ]
epoch: 1, TRAIN: [ loss=0.5324, auc=0.5677 ], VAL: [ loss=0.5527, auc=0.4166 ]
epoch: 2, TRAIN: [ loss=0.5184, auc=0.6026 ], VAL: [ loss=0.5235, auc=0.5788 ]
epoch: 3, TRAIN: [ loss=0.5085, auc=0.6452 ], VAL: [ loss=0.4926, auc=0.6869 ]
epoch: 4, TRAIN: [ loss=0.4697, auc=0.7267 ], VAL: [ loss=0.4706, auc=0.7499 ]
epoch: 5, TRAIN: [ loss=0.4599, auc=0.7510 ], VAL: [ loss=0.4511, auc=0.7892 ]
epoch: 6, TRAIN: [ loss=0.4529, auc=0.7442 ], VAL: [ loss=0.4445, auc=0.8102 ]
epoch: 7, TRAIN: [ loss=0.4610, auc=0.7342 ], VAL: [ loss=0.4287, auc=0.8316 ]
epoch: 8, TRAIN: [ loss=0.4159, auc=0.8130 ], VAL: [ loss=0.4123, auc=0.8419 ]
epoch: 9, TRAIN: [ loss=0.4236, auc=0.7978 ], VAL: [ loss=0.4016, auc=0.8496 ]
epoch: 0, TRAIN: [ loss=0.3630, auc=0.8646 ], VAL: [ loss=0.3643, auc=0.9003 ]
epoch: 1, TRAIN: [ loss=0.3105, auc=0.9006 ], VAL: [ loss=0.3076, auc=0.9198 ]
epoch: 2, TRAIN: [ loss=0.2514, auc=0.9446 ], VAL: [

In [14]:
# Сохраняем модель после обучения
torch.save(model.state_dict(), 'smoke_resnet18_final.pth')
print("Модель сохранена в 'smoke_resnet18_final.pth'")

Модель сохранена в 'smoke_resnet18_final.pth'


In [15]:
# Подготавливаем тестовый датасет (без меток)
class TestSmokeDataset(Dataset):
    def __init__(self, image_folder, images):
        self.folder = image_folder
        self.images = images

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.folder, self.images[idx])
        image = Image.open(img_path).convert('RGB')

        # Тот же padding, что и раньше
        width, height = image.size
        pad_size = abs(width - height)
        if width > height:
            padding = (0, pad_size // 2, 0, pad_size - pad_size // 2)
        else:
            padding = (pad_size // 2, 0, pad_size - pad_size // 2, 0)

        transform = T.Compose([
            T.Pad(padding),
            T.Resize(SIZE),
            T.ToTensor(),
            T.Normalize(MEAN, STD)
        ])

        image = transform(image)
        return image, self.images[idx]  # возвращаем также имя файла


# Создаём даталоадер для теста
test_images = sample_submission['image'].values
test_dataset = TestSmokeDataset(image_folder=PATH, images=test_images)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)


# Предсказание
model.eval()
all_scores = []
all_image_names = []

with torch.no_grad():
    for images, img_names in tqdm(test_dataloader, desc="Inference"):
        images = images.to(DEVICE)
        outputs = model(images)                  # логиты
        probs = torch.sigmoid(outputs).view(-1) # вероятности [0,1]
        
        all_scores.extend(probs.cpu().numpy())
        all_image_names.extend(img_names)


# Заполняем submission
sample_submission['score'] = all_scores

# Проверяем, что всё на месте
print(sample_submission.head(3))
print(f"Всего предсказаний: {len(sample_submission)}")

# Сохраняем
sample_submission.to_csv('submission.csv', index=False)
print("Файл submission.csv успешно сохранён!")

Inference:   0%|          | 0/6 [00:00<?, ?it/s]

      image     score
0   474.jpg  0.923482
1  1052.jpg  0.000395
2    63.jpg  0.020401
Всего предсказаний: 381
Файл submission.csv успешно сохранён!
