In [1]:
!pip install segmentation_models_pytorch


Collecting segmentation_models_pytorch
  Downloading segmentation_models_pytorch-0.4.0-py3-none-any.whl.metadata (32 kB)
Collecting efficientnet-pytorch>=0.6.1 (from segmentation_models_pytorch)
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pretrainedmodels>=0.7.1 (from segmentation_models_pytorch)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting munch (from pretrainedmodels>=0.7.1->segmentation_models_pytorch)
  Downloading munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)
Downloading segmentation_models_pytorch-0.4.0-py3-none-any.whl (121 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.3/121.3 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading munch-4.0.0-py2.py3-none-any.w

In [2]:
import os
import gc
import random
import cv2
import torch
import numpy as np
import pandas as pd
import segmentation_models_pytorch as smp
from tqdm.notebook import tqdm
from torch.nn import Parameter
import albumentations as A
import torch.nn.functional as F
import torch.nn as nn
import timm
from torch.amp import GradScaler
from torch.autograd import Variable
from skimage.metrics import structural_similarity as ssim
from albumentations.pytorch import ToTensorV2
import cv2
from torch.optim.lr_scheduler import CosineAnnealingLR

import torch
from torch.utils.data import Dataset, DataLoader

  check_for_updates()


In [3]:
#Data augmentation
import albumentations as A
from albumentations.pytorch import ToTensorV2

train_transform = A.Compose([
    # Пространственные аугментации
    A.HorizontalFlip(p=0.5),  # Отражение по горизонтали
    A.VerticalFlip(p=0.5),    # Отражение по вертикали
    A.RandomRotate90(p=0.5),  # Случайный поворот на 90°
    A.Transpose(p=0.5),       # Транспонирование изображения

    # Изменения яркости и контраста для реалистичности
    A.RandomBrightnessContrast(p=0.2),  # Случайное изменение яркости и контраста
    A.RandomGamma(p=0.3),                # Случайная гамма коррекция

    # Сложные искажения
    A.ElasticTransform(alpha=1.0, sigma=50, p=0.5),   # Эластичные искажения
    A.GridDistortion(num_steps=5, distort_limit=0.3, p=0.5),  # Сетка для более сложных искажений
    A.OpticalDistortion(distort_limit=0.3, shift_limit=0.1, p=0.5),  # Оптические искажения

    # Изменение масштаба и положения
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=15, p=0.5),

    # Аугментации, имитирующие текстуры
    A.CoarseDropout(max_holes=8, max_height=16, max_width=16, p=0.5),  # Случайные блоки пропадания для текстурных аугментаций
    A.MotionBlur(p=0.2),  # Эффект размытия от движения (для создания бархатного эффекта)

    # Нормализация (предобученные веса для ImageNet)
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    
    # Конвертирование в Tensor
    ToTensorV2(),
])

test_transform = A.Compose([
    # Нормализация для тестов
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    
    # Конвертирование в Tensor
    ToTensorV2(),
])


class SegmentationDataset(Dataset):
    def __init__(self, image_paths, masks, transform=None):
        self.image_paths = image_paths
        self.masks = masks
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = cv2.imread(self.image_paths[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = self.masks[idx]
        
        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image, mask = augmented['image'], augmented['mask']
        
        return image, mask.unsqueeze(0)

In [4]:
class UNetModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.unet = smp.Unet(
            encoder_name='efficientnet-b2',
            encoder_weights='imagenet',
            classes=1,
            decoder_channels=[512, 256, 128, 64, 32]
        )
    
    def forward(self, x):
        return self.unet(x)

In [5]:
def shift_mask(mask, n, m):
    """
    Сместить маску на n пикселей вверх и на m пикселей вправо.
    
    Parameters:
    - mask: Исходная маска.
    - n: Смещение вверх (отрицательное значение для смещения вниз).
    - m: Смещение вправо (отрицательное значение для смещения влево).
    
    Returns:
    - shifted_mask: Маска, смещенная на указанные значения.
    """
    h, w = mask.shape
    shifted_mask = np.zeros_like(mask)
    
    # Сместите пиксели маски
    for y in range(h):
        for x in range(w):
            new_y = y - n
            new_x = x - m
            
            # Проверьте, чтобы новые координаты были в пределах маски
            if 0 <= new_y < h and 0 <= new_x < w:
                shifted_mask[new_y, new_x] = mask[y, x]
    
    return shifted_mask


In [6]:
train_masks = np.load('/kaggle/input/ioai-2025-preparation-class-lesson-8-homework/msk_array.npy')
train_masks = np.array([shift_mask(mask, 23, 25) for mask in tqdm(train_masks)])

  0%|          | 0/545 [00:00<?, ?it/s]

In [7]:
# Hyperparameters
batch_size = 10
epochs = 20
lr = 3e-4
clip_grad_norm = 10.0
device = 'cuda'

train_images = [f'/kaggle/input/ioai-2025-preparation-class-lesson-8-homework/data/train/{img}' for img in sorted(os.listdir('/kaggle/input/ioai-2025-preparation-class-lesson-8-homework/data/train'))]
test_images = [f'/kaggle/input/ioai-2025-preparation-class-lesson-8-homework/data/test/{img}' for img in sorted(os.listdir('/kaggle/input/ioai-2025-preparation-class-lesson-8-homework/data/test'))]
test_masks = np.zeros((len(test_images), train_masks.shape[1], train_masks.shape[2]))

# Dataloaders
train_loader = DataLoader(SegmentationDataset(train_images[:-50], train_masks[:-50], train_transform), batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(SegmentationDataset(train_images[-50:], train_masks[-50:], test_transform), batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(SegmentationDataset(test_images, test_masks, test_transform), batch_size=batch_size, shuffle=False, num_workers=2)

# Model, Loss, Optimizer, Scheduler
model = UNetModel().to(device)
loss_func = smp.losses.DiceLoss(mode="binary", smooth=1.)
optimizer = torch.optim.AdamW(model.parameters(), lr)
scheduler = CosineAnnealingLR(optimizer, len(train_loader) * epochs, 1e-6)
scaler = GradScaler()

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b2-8bb594d6.pth
100%|██████████| 35.1M/35.1M [00:00<00:00, 62.8MB/s]


In [8]:
# Training loop
best_val_loss = float('inf')  # Инициализируйте на бесконечность
best_model_path = "best_model.pth"

for epoch in range(epochs):
    model.train()
    avg_train_loss = 0
    loop = tqdm(train_loader, total=len(train_loader))
    for img, target in loop:
        img, target = img.to(device), target.to(device)
        
        optimizer.zero_grad()
        
        with torch.cuda.amp.autocast():
            outputs = model(img)
            loss = loss_func(outputs, target)
        
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad_norm)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        
        avg_train_loss += loss.item()
        loop.set_postfix(loss=avg_train_loss / (loop.n + 1), epoch=epoch)
    
    # Validation step
    model.eval()
    avg_val_loss = 0
    with torch.no_grad():
        val_loop = tqdm(val_loader, total=len(val_loader))
        for img, target in val_loop:
            img, target = img.to(device), target.to(device)
            with torch.cuda.amp.autocast():
                outputs = model(img)
                loss = loss_func(outputs, target)
            
            avg_val_loss += loss.item()
            val_loop.set_postfix(val_loss=avg_val_loss / (val_loop.n + 1), epoch=epoch)

    avg_train_loss /= len(train_loader)
    avg_val_loss /= len(val_loader)

    print(f"Epoch {epoch+1}/{epochs} - Train Loss: {avg_train_loss} - Val Loss: {avg_val_loss}")

    # Сохраните лучшую модель
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), best_model_path)
        print(f"🔥 New best model saved with val_loss {best_val_loss}")

# Загрузите лучшую модель
print("✅ Loading best model from disk...")
model.load_state_dict(torch.load(best_model_path))
model.to(device)
model.eval()


  0%|          | 0/50 [00:00<?, ?it/s]

  with torch.cuda.amp.autocast():


  0%|          | 0/5 [00:00<?, ?it/s]

  with torch.cuda.amp.autocast():


Epoch 1/20 - Train Loss: 0.49521334409713746 - Val Loss: 0.3791393280029297
🔥 New best model saved with val_loss 0.3791393280029297


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 2/20 - Train Loss: 0.3586007046699524 - Val Loss: 0.3028324723243713
🔥 New best model saved with val_loss 0.3028324723243713


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 3/20 - Train Loss: 0.3051437473297119 - Val Loss: 0.2766440749168396
🔥 New best model saved with val_loss 0.2766440749168396


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 4/20 - Train Loss: 0.28245217323303223 - Val Loss: 0.25283644199371336
🔥 New best model saved with val_loss 0.25283644199371336


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 5/20 - Train Loss: 0.25931600689888 - Val Loss: 0.23868210315704347
🔥 New best model saved with val_loss 0.23868210315704347


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 6/20 - Train Loss: 0.2526634097099304 - Val Loss: 0.2371431350708008
🔥 New best model saved with val_loss 0.2371431350708008


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 7/20 - Train Loss: 0.240291268825531 - Val Loss: 0.2364608883857727
🔥 New best model saved with val_loss 0.2364608883857727


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 8/20 - Train Loss: 0.23808629870414733 - Val Loss: 0.23914530277252197


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 9/20 - Train Loss: 0.23395177364349365 - Val Loss: 0.2287617325782776
🔥 New best model saved with val_loss 0.2287617325782776


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 10/20 - Train Loss: 0.23043610215187071 - Val Loss: 0.23179572820663452


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 11/20 - Train Loss: 0.2247859311103821 - Val Loss: 0.227635395526886
🔥 New best model saved with val_loss 0.227635395526886


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 12/20 - Train Loss: 0.22405753016471863 - Val Loss: 0.22807669639587402


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 13/20 - Train Loss: 0.21699921131134034 - Val Loss: 0.22457226514816284
🔥 New best model saved with val_loss 0.22457226514816284


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 14/20 - Train Loss: 0.21317266583442687 - Val Loss: 0.22155948877334594
🔥 New best model saved with val_loss 0.22155948877334594


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 15/20 - Train Loss: 0.21428906321525573 - Val Loss: 0.22451857328414918


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 16/20 - Train Loss: 0.2134494400024414 - Val Loss: 0.22569278478622437


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 17/20 - Train Loss: 0.20989515900611877 - Val Loss: 0.22460584640502929


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 18/20 - Train Loss: 0.20691142082214356 - Val Loss: 0.22383421659469604


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 19/20 - Train Loss: 0.21189252853393556 - Val Loss: 0.2230844020843506


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 20/20 - Train Loss: 0.2034302508831024 - Val Loss: 0.223030161857605
✅ Loading best model from disk...


  model.load_state_dict(torch.load(best_model_path))


UNetModel(
  (unet): Unet(
    (encoder): EfficientNetEncoder(
      (_conv_stem): Conv2dStaticSamePadding(
        3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
        (static_padding): ZeroPad2d((0, 1, 0, 1))
      )
      (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_blocks): ModuleList(
        (0): MBConvBlock(
          (_depthwise_conv): Conv2dStaticSamePadding(
            32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
            (static_padding): ZeroPad2d((1, 1, 1, 1))
          )
          (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
          (_se_reduce): Conv2dStaticSamePadding(
            32, 8, kernel_size=(1, 1), stride=(1, 1)
            (static_padding): Identity()
          )
          (_se_expand): Conv2dStaticSamePadding(
            8, 32, kernel_size=(1, 1), stride=(1, 1)
            (static_padding): Identity

In [9]:
# TTA & Post-processing (без TTA)
model.eval()
preds = []
with torch.no_grad():
    for img, _ in tqdm(test_loader):
        img = img.to(device)
        with torch.cuda.amp.autocast():
            pred = model(img).sigmoid().cpu().numpy()
            pred = (pred > 0.5).astype(np.uint8)  # Thresholding
            preds.append(pred)

preds = np.concatenate(preds)[:, 0, ...]


  0%|          | 0/15 [00:00<?, ?it/s]

  with torch.cuda.amp.autocast():


In [10]:
def rle_encode(x, fg_val=1):
    """
    Args:
        x:  numpy array of shape (height, width), 1 - mask, 0 - background
    Returns: run length encoding as list
    """

    dots = np.where(
        x.T.flatten() == fg_val)[0]  # .T sets Fortran order down-then-right
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths


def list_to_string(x):
    """
    Converts list to a string representation
    Empty list returns '-'
    """
    if x: # non-empty list
        s = str(x).replace("[", "").replace("]", "").replace(",", "")
    else:
        s = '-'
    return s
true_list = [list_to_string(rle_encode(ans)) for ans in preds]

predict_df = pd.DataFrame()
predict_df['Id'] = [f'{x:03d}.jpg' for x in range(150)]
predict_df['Target'] = true_list
predict_df.to_csv('submission.csv', index = None)