In [3]:
!pip install -q albumentations

# 1. Общие параметры

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torch.cuda.amp import GradScaler, autocast
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torchmetrics.classification import MulticlassAccuracy
from sklearn.model_selection import StratifiedShuffleSplit

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 256
EPOCHS = 20
LR = 3e-4
IMG_SIZE = 48       
NUM_CLASSES = 26           # A–Z
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

  from .autonotebook import tqdm as notebook_tqdm


# 2. Загрузка и валидационный сплит

In [2]:
images = np.load('./mipt-dl-captcha-2025/mds-misis-dl-captchan/images.npy')      # (20000, 48, 48, 3)
labels = np.load('./mipt-dl-captcha-2025/mds-misis-dl-captchan/labels.npy')      # (20000,)

# Разделим 10 % под валидацию
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.10, random_state=SEED)
train_idx, val_idx = next(sss.split(images, labels))

train_imgs, val_imgs = images[train_idx], images[val_idx]
train_lbls, val_lbls = labels[train_idx], labels[val_idx]

# 3. Аугментации и Dataset

In [4]:
train_tfms = A.Compose([
    A.Rotate(limit=15, p=0.5),
    A.Perspective(scale=(0.05,0.15), p=0.5),
    A.MotionBlur(p=0.2),
    A.RandomBrightnessContrast(p=0.5),
    A.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5)),
    ToTensorV2()
])

val_tfms = A.Compose([
    A.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5)),
    ToTensorV2()
])

In [6]:
class CaptchaDataset(Dataset):
    def __init__(self, imgs, lbls=None, tfms=None):
        self.imgs  = imgs
        self.lbls  = lbls
        self.tfms  = tfms            # сохраняем аугментации

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        img = self.imgs[idx]

        # --- корректное применение трансформаций ---
        if self.tfms is not None:
            img = self.tfms(image=img)["image"]          # Albumentations → Tensor
        else:
            img = torch.from_numpy(img).permute(2, 0, 1).float() / 255.

        if self.lbls is not None:
            label = torch.tensor(self.lbls[idx], dtype=torch.long)
            return img, label

        return img

# 4. DataLoaders

In [7]:
train_ds = CaptchaDataset(train_imgs, train_lbls, train_tfms)
val_ds   = CaptchaDataset(val_imgs,   val_lbls,   val_tfms)

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=4, pin_memory=True)
val_dl   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)

# 5. Модель

In [8]:
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)  # адаптация под 48×48
model.maxpool = nn.Identity()
model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
model = model.to(DEVICE)

# 6. Оптимизатор, scheduler, метрика

In [10]:
optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=5, T_mult=2)
criterion = nn.CrossEntropyLoss(label_smoothing=0.05)
metric = MulticlassAccuracy(num_classes=NUM_CLASSES, average='micro').to(DEVICE)
scaler = GradScaler(enabled=(DEVICE=="cuda"))

# 7. Цикл обучения

In [11]:
for epoch in range(EPOCHS):
    model.train()
    for imgs, lbls in train_dl:
        imgs, lbls = imgs.to(DEVICE), lbls.to(DEVICE)
        optimizer.zero_grad()
        with autocast(enabled=(DEVICE=="cuda")):
            logits = model(imgs)
            loss   = criterion(logits, lbls)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step(epoch + (len(train_dl) / len(train_dl)))
    
    # валидация
    model.eval(); metric.reset()
    val_loss = 0
    with torch.no_grad(), autocast(enabled=(DEVICE=="cuda")):
        for imgs, lbls in val_dl:
            imgs, lbls = imgs.to(DEVICE), lbls.to(DEVICE)
            logits = model(imgs)
            val_loss += criterion(logits, lbls).item() * imgs.size(0)
            preds = torch.argmax(logits, dim=1)
            metric.update(preds, lbls)
    val_acc = metric.compute().item()
    print(f'Epoch {epoch+1}/{EPOCHS} '
          f'Val loss: {val_loss/len(val_ds):.4f} | Val accuracy: {val_acc:.4%}')


Epoch 1/20 Val loss: 0.7565 | Val accuracy: 87.9000%
Epoch 2/20 Val loss: 0.5825 | Val accuracy: 93.9500%
Epoch 3/20 Val loss: 0.5354 | Val accuracy: 95.2000%
Epoch 4/20 Val loss: 0.5166 | Val accuracy: 95.5000%
Epoch 5/20 Val loss: 0.5757 | Val accuracy: 93.5500%
Epoch 6/20 Val loss: 0.5086 | Val accuracy: 95.3500%
Epoch 7/20 Val loss: 0.4903 | Val accuracy: 96.2500%
Epoch 8/20 Val loss: 0.4851 | Val accuracy: 96.4000%
Epoch 9/20 Val loss: 0.4774 | Val accuracy: 96.3500%
Epoch 10/20 Val loss: 0.4614 | Val accuracy: 97.3000%
Epoch 11/20 Val loss: 0.4552 | Val accuracy: 97.1000%
Epoch 12/20 Val loss: 0.4500 | Val accuracy: 97.0000%
Epoch 13/20 Val loss: 0.4417 | Val accuracy: 97.4500%
Epoch 14/20 Val loss: 0.4416 | Val accuracy: 97.7000%
Epoch 15/20 Val loss: 0.5105 | Val accuracy: 95.2500%
Epoch 16/20 Val loss: 0.4936 | Val accuracy: 95.8500%
Epoch 17/20 Val loss: 0.4737 | Val accuracy: 96.6000%
Epoch 18/20 Val loss: 0.4798 | Val accuracy: 96.3000%
Epoch 19/20 Val loss: 0.4640 | Val ac

# 8. Инференс на тесте и сабмит

In [12]:
test_imgs = np.load('./mipt-dl-captcha-2025/mds-misis-dl-captchan/images_sub.npy')
test_ds   = CaptchaDataset(test_imgs, tfms=val_tfms)
test_dl   = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)

model.eval()
all_preds = []
with torch.no_grad(), autocast(enabled=(DEVICE=="cuda")):
    for imgs in test_dl:
        imgs = imgs.to(DEVICE)
        logits = model(imgs)
        preds = torch.argmax(logits, dim=1).cpu().numpy()
        all_preds.append(preds)
all_preds = np.concatenate(all_preds)

# создаём файл сабмита
sub = pd.read_csv('./mipt-dl-captcha-2025/mds-misis-dl-captchan/sample_submission.csv')
sub['Category'] = all_preds
sub.to_csv('submission.csv', index=False)
print('submission.csv готов!')

submission.csv готов!
