# ResUNet Weld Seam Segmentation Training Pipeline
This notebook handles:
1. Downloading the `u-net_model` v4 dataset from Roboflow.
2. Defining the pure Deep Residual UNet (ResUNet) architecture.
3. Setting up data loaders with Image Augmentation (Albumentations).
4. Training the model with Mixed Precision, BCE+Dice Loss, and Cosine Annealing.
5. Running predictions on the test set and exporting `best_resunet_seam.pth`.

In [None]:
!pip install -q roboflow albumentations
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from roboflow import Roboflow
import albumentations as A
from albumentations.pytorch import ToTensorV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/94.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.6/94.6 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.8/66.8 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.9/49.9 MB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m73.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m111.0 MB/s[0m eta [36m0:00:00[0m
[?25hUsing device: cuda


## 1. Download Dataset

In [None]:
import zipfile, os

# Zip is already in session storage at /content/
zip_path = "/content/U-Net_model.v4i.png-mask-semantic.zip"

# Extract it
with zipfile.ZipFile(zip_path, 'r') as z:
    z.extractall("/content/dataset")

# Show what we got
for root, dirs, files_list in os.walk("/content/dataset"):
    for f in files_list[:5]:
        print(os.path.join(root, f))
    break

DATA_YAML_PATH = "/content/dataset"
print("\nDataset ready at:", DATA_YAML_PATH)

/content/dataset/README.roboflow.txt
/content/dataset/README.dataset.txt

Dataset ready at: /content/dataset


In [5]:
import zipfile, os

zip_path = "/content/U-Net_model.v4i.png-mask-semantic.zip"

# Clean up any previous attempts
!rm -rf /content/dataset
os.makedirs("/content/dataset", exist_ok=True)

# Extract
with zipfile.ZipFile(zip_path, 'r') as z:
    z.extractall("/content/dataset")

# Print the folder structure deeply
print("Dataset Directory Structure:")
for root, dirs, files_list in os.walk("/content/dataset"):
    level = root.replace("/content/dataset", "").count(os.sep)
    indent = " " * 4 * level
    print(f"{indent}{os.path.basename(root)}/")
    if level < 2:  # Print a couple of files to verify
        subindent = " " * 4 * (level + 1)
        for f in files_list[:2]:
            print(f"{subindent}{f}")

DATA_YAML_PATH = "/content/dataset"


Dataset Directory Structure:
dataset/
    README.roboflow.txt
    README.dataset.txt
    train/
        12_jpg.rf.d76e19964deace7e928253e04407e333.jpg
        53_jpg.rf.2edb65eedb0e09e9820d04c2af88ba61_mask.png
    valid/
        N_32_jpg.rf.930edbe76382470c7bc3e71cfa9cb103_mask.png
        41_jpg.rf.d8e1bdafa108596afc14bc3289fcb65f.jpg
    test/
        211_jpg.rf.48d47f24d85c8a9f111a4baf69ea315c_mask.png
        107_jpg.rf.df9bb748e81be3e3c9fd48203429591d_mask.png


## 2. Deep Residual UNet (ResUNet) Architecture
Instead of plain convolutions, ResUNet uses residual blocks. This allows gradients to flow smoothly, preventing vanishing gradients, which is critical for thin paths like 1-pixel weld seams.

In [7]:
class WeldSeamDataset(Dataset):
    def __init__(self, folder_dir, transform=None):
        self.folder_dir = folder_dir
        # Find only the original images (ignoring the _mask.png files)
        self.images = [
            f for f in os.listdir(folder_dir)
            if f.endswith('.jpg') and not f.endswith('_mask.jpg') and not f.endswith('_mask.png')
        ]
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.folder_dir, img_name)

        # The mask name is exactly the image name, but with '.jpg' replaced by '_mask.png'
        mask_name = img_name.replace('.jpg', '_mask.png')
        mask_path = os.path.join(self.folder_dir, mask_name)

        if not os.path.exists(mask_path):
            raise FileNotFoundError(f"Missing mask: Expected {mask_name} in {self.folder_dir}")

        # Load Image
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Load Mask
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        # Binarize it: any pixel > 0 becomes 1
        mask = (mask > 0).astype(np.float32)

        if self.transform is not None:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask.unsqueeze(0)

# Augmentations remain the same
train_transform = A.Compose([
    A.Resize(512, 512),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ColorJitter(brightness=0.2, contrast=0.2, p=0.3),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(512, 512),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

# Use the exact paths from your output
DATA_YAML_PATH = "/content/dataset"

try:
    train_dataset = WeldSeamDataset(os.path.join(DATA_YAML_PATH, 'train'), transform=train_transform)
    val_dataset = WeldSeamDataset(os.path.join(DATA_YAML_PATH, 'valid'), transform=val_transform)
except Exception as e:
    print("Error loading datasets:", e)
    train_dataset, val_dataset = [], []

batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2) if len(train_dataset)>0 else []
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2) if len(val_dataset)>0 else []

print(f"Train images: {len(train_dataset)}, Val images: {len(val_dataset)}")


Train images: 226, Val images: 65


## 4. Loss Function
We use BCE + Dice Loss. Dice Loss specifically helps segment thin lines gracefully, since it scores intersection over union.

In [None]:
class DiceBCELoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, logits, targets, smooth=1.0):
        bce_loss = F.binary_cross_entropy_with_logits(logits, targets)

        probs = torch.sigmoid(logits)
        probs_flat = probs.view(-1)
        targets_flat = targets.view(-1)

        intersection = (probs_flat * targets_flat).sum()
        dice_loss = 1 - (2. * intersection + smooth) / (probs_flat.sum() + targets_flat.sum() + smooth)

        return bce_loss + dice_loss

def compute_dice_coeff(logits, targets):
    probs = torch.sigmoid(logits) > 0.5
    intersection = (probs & (targets > 0.5)).sum().float()
    return (2. * intersection) / (probs.sum() + targets.sum() + 1e-6)

## 5. Training Loop
Trains the ResUNet model using Mixed Precision for speed and reduced memory.

In [None]:
model = ResUNet(in_c=3, out_c=1).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)
criterion = DiceBCELoss()
scaler = torch.cuda.amp.GradScaler()

num_epochs = 50
best_val_dice = 0.0

train_losses, val_losses, val_dices = [], [], []

if len(train_loader) > 0:
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0

        for imgs, masks in train_loader:
            imgs, masks = imgs.to(device), masks.to(device)

            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                preds = model(imgs)
                loss = criterion(preds, masks)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            epoch_loss += loss.item()

        train_losses.append(epoch_loss / len(train_loader))
        scheduler.step()

        # Validation
        model.eval()
        val_loss, val_dice = 0, 0
        with torch.no_grad():
            for imgs, masks in val_loader:
                imgs, masks = imgs.to(device), masks.to(device)
                with torch.cuda.amp.autocast():
                    preds = model(imgs)
                    loss = criterion(preds, masks)
                    val_loss += loss.item()
                    val_dice += compute_dice_coeff(preds, masks).item()

        val_losses.append(val_loss / len(val_loader))
        val_dice = val_dice / len(val_loader)
        val_dices.append(val_dice)

        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_losses[-1]:.4f} | Val Loss: {val_losses[-1]:.4f} | Val Dice: {val_dice:.4f}")

        if val_dice > best_val_dice:
            best_val_dice = val_dice
            torch.save(model.state_dict(), "best_resunet_seam.pth")
            print("--> Saved new best model")

    print("Training Complete. Best Val Dice:", best_val_dice)

## 6. Plotting Results and Exporting

In [None]:
if len(train_losses) > 0:
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.legend()
    plt.title('Loss Curves')

    plt.subplot(1, 2, 2)
    plt.plot(val_dices, label='Val Dice Coeff')
    plt.legend()
    plt.title('Validation Accuracy (Dice)')
    plt.show()

# To download the weights locally:
try:
    from google.colab import files
    files.download('best_resunet_seam.pth')
except:
    pass