In [52]:
!pip install -q segmentation-models-pytorch==0.3.0 albumentations==1.3.0

# 1) Imports and basic config
import os, gc, random, math
from pathlib import Path
from tqdm import tqdm
import numpy as np, pandas as pd, cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import segmentation_models_pytorch as smp

# Paths
DATA_DIR = Path("/kaggle/input/severstal-steel-defect-detection")
TRAIN_CSV = DATA_DIR / "train.csv"
IMG_DIR = DATA_DIR / "train_images"
WORK_DIR = Path("/kaggle/working")
WORK_DIR.mkdir(exist_ok=True)

# Constants
H, W = 256, 1600
NUM_CLASSES = 4
BATCH_SIZE = 2 # set 2 (if OOM -> 1); if you have more mem, increase
EPOCHS = 30
LR = 1e-4
SEED = 42
NUM_WORKERS = 4
USE_AMP = True # mixed precision

# Repro
def seed_everything(seed=SEED):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_everything()

In [53]:

# 2) RLE decode and helper
def rle_decode(mask_rle, shape=(256,1600)):
    if not isinstance(mask_rle, str) or mask_rle == '':
        return np.zeros(shape, dtype=np.uint8)
    s = np.array(mask_rle.split(), dtype=int)
    starts, lengths = s[0::2] - 1, s[1::2]
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')

In [54]:

# 3) Build memmap masks (one-time)
df = pd.read_csv(TRAIN_CSV)
image_ids = df['ImageId'].unique()
N = len(image_ids)
MASK_MEMMAP = WORK_DIR / "masks_memmap.npy"
IDS_PATH = WORK_DIR / "image_ids.npy"

if not MASK_MEMMAP.exists() or not IDS_PATH.exists():
    print("Building mask memmap... (takes a few minutes)")
    memmap = np.memmap(str(MASK_MEMMAP), dtype=np.uint8, mode='w+', shape=(N, H, W, NUM_CLASSES))
    ids = []
    for i, img_id in enumerate(tqdm(image_ids)):
        ids.append(img_id)
        df_img = df[df['ImageId'] == img_id]
        for c in range(1, NUM_CLASSES+1):
            rle = df_img[df_img['ClassId'] == c]['EncodedPixels']
            rle = rle.values[0] if len(rle) > 0 else ''
            mask = rle_decode(rle, shape=(256,1600)).astype(np.uint8)
            memmap[i,:,:,c-1] = mask
    memmap.flush()
    del memmap
    np.save(str(IDS_PATH), np.array(ids))
    print("Saved memmap and ids")
else:
    print("Memmap exists, skipping build")


Memmap exists, skipping build


In [55]:

# 4) Dataset + transforms
train_transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.OneOf([A.RandomBrightnessContrast(), A.CLAHE()], p=0.6),
    A.ShiftScaleRotate(shift_limit=0.02, scale_limit=0.05, rotate_limit=10, p=0.6),
    A.OneOf([A.GaussNoise(), A.Blur(blur_limit=3)], p=0.3),
    A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
    ToTensorV2()
])

valid_transform = A.Compose([
    A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
    ToTensorV2()
])

class SeverstalDataset(Dataset):
    def __init__(self, ids, img_dir, memmap_path, transforms=None):
        self.ids = ids
        self.img_dir = str(img_dir)
        self.transforms = transforms
        # open memmap read-only
        self.masks = np.memmap(str(memmap_path), dtype=np.uint8, mode='r', shape=(len(np.load(str(IDS_PATH))), H, W, NUM_CLASSES))
        # We assume ids are subset of IDS_PATH order - we will map idx -> global index
        self.global_ids = np.load(str(IDS_PATH)).tolist()
        self.index_map = {img: i for i, img in enumerate(self.global_ids)}

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        img_id = self.ids[idx]
        # load image
        img_path = os.path.join(self.img_dir, img_id)
        img = cv2.imread(img_path)
        if img is None:
            raise FileNotFoundError(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # load mask from memmap using global index
        gidx = self.index_map[img_id]
        mask = self.masks[gidx] # (H,W,4), uint8
        mask = mask.astype('float32')
        if self.transforms:
            augmented = self.transforms(image=img, mask=mask)
            img = augmented['image'] # tensor C,H,W
            mask = augmented['mask'] # tensor H,W,4 -> ToTensorV2 gives C,H,W? Albumentations ToTensorV2 returns tensor HWC->CHW for image, mask is converted similarly as ndarray
            # ensure mask shape is (C,H,W)
            if isinstance(mask, torch.Tensor):
                # sometimes mask is HWC tensor, convert to CHW
                if mask.ndim == 3 and mask.shape[0] != NUM_CLASSES:
                    mask = mask.permute(2,0,1)
        else:
            img = torch.from_numpy(img.transpose(2,0,1)).float().div(255.0)
            mask = torch.from_numpy(mask.transpose(2,0,1)).float()
        return img, mask

  original_init(self, **validated_kwargs)


In [56]:

# 5) Train/val split and dataloaders
all_ids = np.load(str(IDS_PATH)).tolist()
from sklearn.model_selection import train_test_split
train_ids, val_ids = train_test_split(all_ids, test_size=0.1, random_state=SEED)

train_ds = SeverstalDataset(train_ids, IMG_DIR, MASK_MEMMAP, transforms=train_transform)
val_ds = SeverstalDataset(val_ids, IMG_DIR, MASK_MEMMAP, transforms=valid_transform)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size=1, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

print("Train:", len(train_ds), "Val:", len(val_ds))


Train: 5999 Val: 667


In [57]:

# 6) Model — segmentation_models_pytorch Unet with resnet34
ENCODER = "resnet34"
ENCODER_WEIGHTS = "imagenet"
model = smp.Unet(
    encoder_name=ENCODER,
    encoder_weights=ENCODER_WEIGHTS,
    in_channels=3,
    classes=NUM_CLASSES,
    activation=None, # we will use BCEWithLogits
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet34-333f7ec4.pth" to /root/.cache/torch/hub/checkpoints/resnet34-333f7ec4.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 285MB/s]


In [58]:

# 7) Losses, metrics
bce_loss = nn.BCEWithLogitsLoss()

def dice_loss_logits(logits, targets, eps=1e-6):
    probs = torch.sigmoid(logits)
    num = 2.0 * (probs * targets).sum(dim=(2,3))
    den = probs.sum(dim=(2,3)) + targets.sum(dim=(2,3))
    dice = (num + eps) / (den + eps)
    return 1.0 - dice.mean()

def combined_loss(logits, targets, bce_w=0.5):
    b = bce_loss(logits, targets)
    d = dice_loss_logits(logits, targets)
    return b * bce_w + d * (1.0 - bce_w)

def mean_dice_metric(logits, targets, thr=0.5):
    with torch.no_grad():
        probs = torch.sigmoid(logits)
        preds = (probs > thr).float()
        num = 2.0 * (preds * targets).sum(dim=(2,3))
        den = preds.sum(dim=(2,3)) + targets.sum(dim=(2,3))
        dice = (num + 1e-6) / (den + 1e-6)
        return dice.mean().item()


In [59]:


# 8) Optimizer, scheduler, amp scaler
optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)
scaler = torch.cuda.amp.GradScaler(enabled=USE_AMP)


  scaler = torch.cuda.amp.GradScaler(enabled=USE_AMP)


In [60]:

# 9) Training / validation loops
def train_one_epoch(model, loader, optimizer, device, scaler):
    model.train()
    running_loss = 0.0
    num_samples = 0
    pbar = tqdm(loader, desc="Train", leave=False)
    for imgs, masks in pbar:
        imgs = imgs.to(device, dtype=torch.float32, non_blocking=True)
        masks = masks.to(device, dtype=torch.float32, non_blocking=True)
        optimizer.zero_grad()
        with torch.cuda.amp.autocast(enabled=USE_AMP):
            logits = model(imgs)
            loss = combined_loss(logits, masks)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item() * imgs.size(0)
        num_samples += imgs.size(0)
        pbar.set_postfix({'loss': running_loss / num_samples})
    return running_loss / num_samples

def validate(model, loader, device):
    model.eval()
    running_loss = 0.0
    num = 0
    dice_sum = 0.0
    pbar = tqdm(loader, desc="Val", leave=False)
    with torch.no_grad():
        for imgs, masks in pbar:
            imgs = imgs.to(device, dtype=torch.float32, non_blocking=True)
            masks = masks.to(device, dtype=torch.float32, non_blocking=True)
            logits = model(imgs)
            loss = combined_loss(logits, masks)
            running_loss += loss.item() * imgs.size(0)
            dice_sum += mean_dice_metric(logits, masks)
            num += imgs.size(0)
            pbar.set_postfix({'v_loss': running_loss / num, 'v_dice': dice_sum / num})
    return running_loss / max(1, num), dice_sum / max(1, num)


In [61]:

# 10) Run training with checkpointing
best_dice = 0.0
best_path = WORK_DIR / "best_model.pth"

for epoch in range(1, EPOCHS+1):
    print(f"\nEpoch {epoch}/{EPOCHS}")
    train_loss = train_one_epoch(model, train_loader, optimizer, device, scaler)
    val_loss, val_dice = validate(model, val_loader, device)
    scheduler.step(val_dice)
    print(f"Train loss: {train_loss:.5f} | Val loss: {val_loss:.5f} | Val mean Dice: {val_dice:.5f}")
    if val_dice > best_dice:
        best_dice = val_dice
        torch.save({'model_state_dict': model.state_dict(), 'epoch': epoch, 'val_dice': val_dice}, str(best_path))
        print(f"Saved best model: {best_path} (Dice {val_dice:.5f})")
    # free mem
    gc.collect()
    torch.cuda.empty_cache()

print("Training complete. Best val dice:", best_dice)


Epoch 1/30


  with torch.cuda.amp.autocast(enabled=USE_AMP):
                                                                                  

Train loss: 0.50481 | Val loss: 0.44589 | Val mean Dice: 0.49701
Saved best model: /kaggle/working/best_model.pth (Dice 0.49701)

Epoch 2/30


                                                                                  

Train loss: 0.45052 | Val loss: 0.43776 | Val mean Dice: 0.69832
Saved best model: /kaggle/working/best_model.pth (Dice 0.69832)

Epoch 3/30


                                                                                  

Train loss: 0.44413 | Val loss: 0.43649 | Val mean Dice: 0.66999

Epoch 4/30


                                                                                  

Train loss: 0.44012 | Val loss: 0.43679 | Val mean Dice: 0.73069
Saved best model: /kaggle/working/best_model.pth (Dice 0.73069)

Epoch 5/30


                                                                                  

Train loss: 0.43755 | Val loss: 0.43411 | Val mean Dice: 0.62649

Epoch 6/30


                                                                                  

Train loss: 0.43557 | Val loss: 0.43559 | Val mean Dice: 0.49872

Epoch 7/30


                                                                                  

Train loss: 0.43361 | Val loss: 0.42827 | Val mean Dice: 0.51838

Epoch 8/30


                                                                                  

Train loss: 0.43226 | Val loss: 0.42702 | Val mean Dice: 0.53546

Epoch 9/30


                                                                                  

Train loss: 0.42769 | Val loss: 0.42379 | Val mean Dice: 0.69867

Epoch 10/30


                                                                                  

Train loss: 0.42641 | Val loss: 0.42321 | Val mean Dice: 0.65352

Epoch 11/30


                                                                                  

Train loss: 0.42546 | Val loss: 0.42666 | Val mean Dice: 0.72157

Epoch 12/30


                                                                                  

Train loss: 0.42426 | Val loss: 0.42510 | Val mean Dice: 0.55937

Epoch 13/30


                                                                                  

Train loss: 0.42216 | Val loss: 0.42162 | Val mean Dice: 0.75660
Saved best model: /kaggle/working/best_model.pth (Dice 0.75660)

Epoch 14/30


                                                                                  

Train loss: 0.42117 | Val loss: 0.42268 | Val mean Dice: 0.74305

Epoch 15/30


                                                                                  

Train loss: 0.42085 | Val loss: 0.42286 | Val mean Dice: 0.72271

Epoch 16/30


                                                                                  

Train loss: 0.42002 | Val loss: 0.42452 | Val mean Dice: 0.70005

Epoch 17/30


                                                                                  

Train loss: 0.41990 | Val loss: 0.42541 | Val mean Dice: 0.69392

Epoch 18/30


                                                                                  

Train loss: 0.41854 | Val loss: 0.42494 | Val mean Dice: 0.72863

Epoch 19/30


                                                                                  

Train loss: 0.41815 | Val loss: 0.42202 | Val mean Dice: 0.76565
Saved best model: /kaggle/working/best_model.pth (Dice 0.76565)

Epoch 20/30


                                                                                  

Train loss: 0.41759 | Val loss: 0.42194 | Val mean Dice: 0.74083

Epoch 21/30


                                                                                  

Train loss: 0.41695 | Val loss: 0.42150 | Val mean Dice: 0.77048
Saved best model: /kaggle/working/best_model.pth (Dice 0.77048)

Epoch 22/30


                                                                                  

Train loss: 0.41714 | Val loss: 0.41867 | Val mean Dice: 0.72212

Epoch 23/30


                                                                                  

Train loss: 0.41677 | Val loss: 0.42588 | Val mean Dice: 0.69315

Epoch 24/30


                                                                                  

Train loss: 0.41678 | Val loss: 0.42115 | Val mean Dice: 0.75965

Epoch 25/30


                                                                                  

Train loss: 0.41637 | Val loss: 0.42446 | Val mean Dice: 0.64389

Epoch 26/30


                                                                                  

Train loss: 0.41579 | Val loss: 0.42172 | Val mean Dice: 0.75841

Epoch 27/30


                                                                                  

Train loss: 0.41559 | Val loss: 0.41991 | Val mean Dice: 0.78175
Saved best model: /kaggle/working/best_model.pth (Dice 0.78175)

Epoch 28/30


                                                                                  

Train loss: 0.41540 | Val loss: 0.42650 | Val mean Dice: 0.74966

Epoch 29/30


                                                                                  

Train loss: 0.41455 | Val loss: 0.42006 | Val mean Dice: 0.77842

Epoch 30/30


                                                                                  

Train loss: 0.41499 | Val loss: 0.42097 | Val mean Dice: 0.76330
Training complete. Best val dice: 0.7817511032859067


In [63]:

# 11) Postprocessing + RLE export (inference on test images, if you have test set)
# Example post-process function used on Segformer code earlier
def mask_to_rle(img):
    # img: 2D numpy array (H, W) of binary mask
    pixels = img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def post_process(probability, threshold=0.5, min_size=3500):
    mask = (probability > threshold).astype(np.uint8)
    num_component, component = cv2.connectedComponents(mask)
    predictions = np.zeros_like(mask, dtype=np.uint8)
    for c in range(1, num_component):
        p = (component == c)
        if p.sum() > min_size:
            predictions[p] = 1
    return predictions


In [72]:
# PREDICTIONS
import os
import cv2
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm

# ==== SETTINGS ====
TEST_DIR = "/kaggle/input/severstal-steel-defect-detection/test_images"  # change if needed
IMG_HEIGHT, IMG_WIDTH = 256, 1600  # match your training size
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ==== RLE ENCODING ====
def rle_encode(mask):
    pixels = mask.flatten(order='F')
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return " ".join(str(x) for x in runs)

# ==== PREDICTION LOOP ====
submission = []
model.eval()

test_image_names = sorted(os.listdir(TEST_DIR))  # get all test image filenames

with torch.no_grad():
    for img_name in tqdm(test_image_names):
        img_path = os.path.join(TEST_DIR, img_name)

        # Read & preprocess image
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
        image = torch.tensor(image / 255.0, dtype=torch.float32).permute(2, 0, 1).unsqueeze(0).to(DEVICE)

        # Predict
        pred = model(image)
        pred = torch.sigmoid(pred).cpu().numpy()[0]

        # Loop over 4 defect classes
        for cls in range(4):
            mask = pred[cls]
            mask = (mask > 0.5).astype(np.uint8)  # binary threshold

            if mask.sum() == 0:
                submission.append([f"{img_name}_{cls+1}", ""])
            else:
                rle = rle_encode(mask)
                submission.append([f"{img_name}_{cls+1}", rle])

# ==== SAVE CSV ====
df = pd.DataFrame(submission, columns=["ImageId_ClassId", "EncodedPixels"])
df.to_csv("submission.csv", index=False)

print("✅ submission.csv created successfully!")

100%|██████████| 5506/5506 [04:34<00:00, 20.03it/s]


✅ submission.csv created successfully!
