In [1]:
pip install torch torchvision pandas pillow tqdm




In [2]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
# Paste this into a Colab cell (after drive.mount('/content/drive'))
# train_severstal_unet_auto_find.py (run in Colab)
import os, sys, glob, time
import numpy as np, pandas as pd
from PIL import Image
from tqdm import tqdm

import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T

# -----------------------
# USER: set the top-level folder you gave earlier (do NOT add train.csv)
# -----------------------
TOP_ROOT = r"/content/drive/MyDrive/Pttern Assignments/Severstal Kaggle dataset/severstal-steel-defect-detection (1)"

# -----------------------
# Utility: find files / image dir recursively
# -----------------------
def find_file_recursive(root, filename):
    matches = glob.glob(os.path.join(root, "**", filename), recursive=True)
    return matches[0] if len(matches) > 0 else None

def find_image_dir_recursive(root):
    # prefer commonly named folders
    for cand in ["train_images", "train", "images", "train_images_1600x256"]:
        p = find_file_recursive(root, "")  # noop to keep style
    # search for any folder that contains jpg files
    jpgs = glob.glob(os.path.join(root, "**", "*.jpg"), recursive=True)
    if len(jpgs) == 0:
        return None
    # return the directory containing the first .jpg found
    return os.path.dirname(jpgs[0])

# -----------------------
# locate train.csv and images
# -----------------------
train_csv_path = find_file_recursive(TOP_ROOT, "train.csv")
img_dir = find_image_dir_recursive(TOP_ROOT)

print("TOP_ROOT:", TOP_ROOT)
print("Found train.csv:", train_csv_path)
print("Detected image directory:", img_dir)

if train_csv_path is None:
    print("\nERROR: train.csv not found under TOP_ROOT. Please confirm where you extracted the Kaggle zip.")
    print("You can list files with: !ls -la '{}'".format(TOP_ROOT))
    raise SystemExit(1)
if img_dir is None:
    print("\nERROR: No .jpg images found under TOP_ROOT. Please confirm image folder exists.")
    raise SystemExit(1)

# -----------------------
# The rest is the same pipeline: RLE decode, dataset, UNet, training loop
# (trimmed for brevity here; full code present below)
# -----------------------

# --- RLE decode + build_mask_for_image ---
def rle_decode(mask_rle, shape):
    h, w = shape
    if not isinstance(mask_rle, str):
        return np.zeros((h, w), dtype=np.uint8)
    s = mask_rle.strip().split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0::2], s[1::2])]
    starts -= 1
    mask = np.zeros(h * w, dtype=np.uint8)
    for st, le in zip(starts, lengths):
        mask[st:st + le] = 1
    mask = mask.reshape((w, h)).T
    return mask

def build_mask_for_image(image_id, df, shape):
    h, w = shape
    mask_multi = np.zeros((4, h, w), dtype=np.uint8)
    rows = df[df["ImageId"] == image_id]
    for _, row in rows.iterrows():
        class_id = int(row["ClassId"]) - 1
        rle = row["EncodedPixels"]
        if isinstance(rle, str):
            mask = rle_decode(rle, (h, w))
            mask_multi[class_id] = np.maximum(mask_multi[class_id], mask)
    return mask_multi

# --- Dataset ---
class SeverstalDataset(Dataset):
    def __init__(self, csv_path, img_dir, transforms=None, mode="train", img_size=(256,512)):
        self.df = pd.read_csv(csv_path)
        if "ImageId_ClassId" in self.df.columns:
            s = self.df["ImageId_ClassId"].str.split("_", expand=True)
            self.df["ImageId"] = s[0]; self.df["ClassId"] = s[1]
            self.df = self.df[["ImageId","ClassId","EncodedPixels"]]
        self.image_ids = sorted(self.df["ImageId"].unique())
        n = len(self.image_ids)
        if mode=="train": self.image_ids = self.image_ids[:int(n*0.9)]
        else: self.image_ids = self.image_ids[int(n*0.9):]
        self.img_dir = img_dir
        self.transforms = transforms
        self.img_size = img_size

    def __len__(self): return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        path = os.path.join(self.img_dir, image_id)
        img = Image.open(path).convert("RGB")
        w,h = img.size
        mask_multi = build_mask_for_image(image_id, self.df, (h,w))
        if self.transforms:
            img = self.transforms(img)
            # resize masks with nearest
            mask_pil = Image.fromarray(np.transpose(mask_multi,(1,2,0)))
            mask_pil = mask_pil.resize((self.img_size[1], self.img_size[0]), resample=Image.NEAREST)
            mask_arr = np.array(mask_pil); mask_arr = np.transpose(mask_arr,(2,0,1))
            mask_tensor = torch.from_numpy(mask_arr.astype(np.float32))
        else:
            img = T.ToTensor()(img)
            mask_tensor = torch.from_numpy(mask_multi.astype(np.float32))
        return img, mask_tensor, image_id

# --- UNet (simple) ---
def conv_block(in_ch, out_ch):
    return nn.Sequential(
        nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
        nn.BatchNorm2d(out_ch),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1),
        nn.BatchNorm2d(out_ch),
        nn.ReLU(inplace=True),
    )

class UNet(nn.Module):
    def __init__(self, n_channels=3, n_classes=4, features=[64,128,256,512]):
        super().__init__()
        self.pool = nn.MaxPool2d(2,2)
        self.downs = nn.ModuleList()
        prev = n_channels
        for f in features:
            self.downs.append(conv_block(prev,f)); prev=f
        self.bottleneck = conv_block(prev, prev*2)
        self.ups = nn.ModuleList()
        rev = features[::-1]
        up_in = prev*2
        for f in rev:
            self.ups.append(nn.ConvTranspose2d(up_in, f, kernel_size=2, stride=2))
            self.ups.append(conv_block(up_in, f))
            up_in = f
        self.final_conv = nn.Conv2d(features[0], n_classes, kernel_size=1)

    def forward(self,x):
        skips=[]
        for d in self.downs:
            x=d(x); skips.append(x); x=self.pool(x)
        x=self.bottleneck(x)
        skips=skips[::-1]; up_idx=0
        for i in range(0,len(self.ups),2):
            up_conv=self.ups[i]; conv=self.ups[i+1]
            x=up_conv(x); skip=skips[up_idx]; up_idx+=1
            if x.shape!=skip.shape:
                x = nn.functional.interpolate(x, size=skip.shape[2:], mode='bilinear', align_corners=False)
            x=torch.cat((skip,x), dim=1); x=conv(x)
        return self.final_conv(x)

# --- losses & metrics ---
def dice_loss_logits(inputs, targets, eps=1e-6):
    probs = torch.sigmoid(inputs)
    num = 2*(probs*targets).sum(dim=(2,3))
    den = probs.sum(dim=(2,3)) + targets.sum(dim=(2,3)) + eps
    return (1 - (num/den)).mean()

def bce_dice_loss(inputs, targets, bce_weight=0.5):
    bce = nn.BCEWithLogitsLoss()(inputs, targets)
    d = dice_loss_logits(inputs, targets)
    return bce_weight*bce + (1-bce_weight)*d

def compute_iou(inputs, targets, thresh=0.5, eps=1e-6):
    probs = torch.sigmoid(inputs); preds = (probs>thresh).float()
    intersection = (preds*targets).sum(dim=(2,3))
    union = (preds+targets - preds*targets).sum(dim=(2,3)) + eps
    return (intersection/union).mean().item()

# --- training / validation loops ---
def train_one_epoch(model, loader, optimizer, device):
    model.train(); total_loss=0.0; total_iou=0.0; n=0
    for imgs,masks,_ in loader:
        imgs=imgs.to(device); masks=masks.to(device)
        logits = model(imgs); loss = bce_dice_loss(logits, masks)
        optimizer.zero_grad(); loss.backward(); optimizer.step()
        iou = compute_iou(logits.detach(), masks.detach())
        bs = imgs.size(0)
        total_loss += loss.item()*bs; total_iou += iou*bs; n += bs
    return total_loss/n, total_iou/n

def validate(model, loader, device):
    model.eval(); total_loss=0.0; total_iou=0.0; n=0
    with torch.no_grad():
        for imgs,masks,_ in loader:
            imgs=imgs.to(device); masks=masks.to(device)
            logits = model(imgs); loss = bce_dice_loss(logits, masks)
            iou = compute_iou(logits, masks)
            bs = imgs.size(0)
            total_loss += loss.item()*bs; total_iou += iou*bs; n+=bs
    return total_loss/n, total_iou/n

# -----------------------
# hyperparams
# -----------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
IMG_SIZE = (256,512)
BATCH_SIZE = 8
NUM_EPOCHS = 8
LR = 1e-3
NUM_WORKERS = 2

# -----------------------
# build dataloaders & model
# -----------------------
transforms = T.Compose([T.Resize((IMG_SIZE[0], IMG_SIZE[1])), T.ToTensor()])
train_ds = SeverstalDataset(train_csv_path, img_dir, transforms=transforms, mode="train", img_size=IMG_SIZE)
val_ds   = SeverstalDataset(train_csv_path, img_dir, transforms=transforms, mode="val",   img_size=IMG_SIZE)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

model = UNet(n_channels=3, n_classes=4).to(DEVICE)
opt = optim.Adam(model.parameters(), lr=LR)

best_iou=0.0
for epoch in range(1, NUM_EPOCHS+1):
    t0=time.time()
    tr_loss, tr_iou = train_one_epoch(model, train_loader, opt, DEVICE)
    val_loss, val_iou = validate(model, val_loader, DEVICE)
    print(f"Epoch {epoch}/{NUM_EPOCHS} | TrainLoss {tr_loss:.4f} IoU {tr_iou:.4f} | ValLoss {val_loss:.4f} IoU {val_iou:.4f} | Time {time.time()-t0:.1f}s")
    if val_iou > best_iou:
        best_iou = val_iou
        torch.save(model.state_dict(), "/content/best_unet.pth")
        print("Saved best model to /content/best_unet.pth")

print("Done. Best Val IoU:", best_iou)


TOP_ROOT: /content/drive/MyDrive/Pttern Assignments/Severstal Kaggle dataset/severstal-steel-defect-detection (1)
Found train.csv: None
Detected image directory: /content/drive/MyDrive/Pttern Assignments/Severstal Kaggle dataset/severstal-steel-defect-detection (1)/test_images

ERROR: train.csv not found under TOP_ROOT. Please confirm where you extracted the Kaggle zip.
You can list files with: !ls -la '/content/drive/MyDrive/Pttern Assignments/Severstal Kaggle dataset/severstal-steel-defect-detection (1)'


SystemExit: 1