In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import pandas as pd
import numpy as np
import cv2
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from torchvision import transforms
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
import matplotlib.pyplot as plt

In [10]:

class CustomDataset(Dataset):
    def __init__(self, img_dir, mask_dir, resize=(480, 480), transform=None):
        self.img_dir = img_dir
        self.mask_dir = mask_dir
        self.resize = resize
        self.transform = transform
        self.image_files = sorted([f for f in os.listdir(self.img_dir) if f.endswith('.png')])
        self.mask_files = sorted([f for f in os.listdir(self.mask_dir) if f.endswith('.png')])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.image_files[idx])
        mask_path = os.path.join(self.mask_dir, self.mask_files[idx])

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, self.resize)

        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        mask = cv2.resize(mask, self.resize)
        mask = (mask > 127).astype(np.uint8)  # binary: white -> 1, black -> 0

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask'].unsqueeze(0)  # Add channel dimension: (1, H, W)

        return image, mask

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

class ConvBlock(nn.Module):
    """
    A basic convolution block: Conv2d -> BatchNorm -> ReLU.
    """
    def __init__(self, in_channels, out_channels):
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn   = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x
class UNetPlusPlus(nn.Module):
    def __init__(self, num_classes=1, H=480, W=480, deep_supervision=True):
        super(UNetPlusPlus, self).__init__()
        self.H = H
        self.W = W
        self.deep_supervision = deep_supervision
        resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)

        # Encoder (same as before)
        self.x00 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool)
        self.x10 = resnet.layer1
        self.x20 = resnet.layer2
        self.x30 = resnet.layer3
        self.x40 = resnet.layer4
        self.upsample = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True)

        # Decoder (same as before)
        self.conv01 = ConvBlock(64 + 256, 64)
        self.conv11 = ConvBlock(256 + 512, 256)
        self.conv21 = ConvBlock(512 + 1024, 512)
        self.conv31 = ConvBlock(1024 + 2048, 1024)

        self.conv02 = ConvBlock(64 + 64 + 256, 64)
        self.conv12 = ConvBlock(256 + 256 + 512, 256)
        self.conv22 = ConvBlock(512 + 512 + 1024, 512)

        self.conv03 = ConvBlock(64 + 64 + 64 + 256, 64)
        self.conv13 = ConvBlock(256 + 256 + 256 + 512, 256)

        self.conv04 = ConvBlock(64 + 64 + 64 + 64 + 256, 64)

        # Final prediction layers (deep supervision heads)
        self.final_1 = nn.Conv2d(64, num_classes, kernel_size=1)
        self.final_2 = nn.Conv2d(64, num_classes, kernel_size=1)
        self.final_3 = nn.Conv2d(64, num_classes, kernel_size=1)
        self.final_4 = nn.Conv2d(64, num_classes, kernel_size=1)

    def forward(self, x):
        x00 = self.x00(x)
        x10 = self.x10(x00)
        x20 = self.x20(x10)
        x30 = self.x30(x20)
        x40 = self.x40(x30)

        x01 = self.conv01(torch.cat([x00, x10], dim=1))
        x11 = self.conv11(torch.cat([x10, self.upsample(x20)], dim=1))
        x21 = self.conv21(torch.cat([x20, self.upsample(x30)], dim=1))
        x31 = self.conv31(torch.cat([x30, self.upsample(x40)], dim=1))

        x02 = self.conv02(torch.cat([x00, x01, x11], dim=1))
        x12 = self.conv12(torch.cat([x10, x11, self.upsample(x21)], dim=1))
        x22 = self.conv22(torch.cat([x20, x21, self.upsample(x31)], dim=1))

        x03 = self.conv03(torch.cat([x00, x01, x02, x12], dim=1))
        x13 = self.conv13(torch.cat([x10, x11, x12, self.upsample(x22)], dim=1))

        x04 = self.conv04(torch.cat([x00, x01, x02, x03, x13], dim=1))

        # Deep supervision outputs
        out1 = F.interpolate(self.final_1(x01), size=(self.H, self.W), mode="bilinear", align_corners=True)
        out2 = F.interpolate(self.final_2(x02), size=(self.H, self.W), mode="bilinear", align_corners=True)
        out3 = F.interpolate(self.final_3(x03), size=(self.H, self.W), mode="bilinear", align_corners=True)
        out4 = F.interpolate(self.final_4(x04), size=(self.H, self.W), mode="bilinear", align_corners=True)

        if self.deep_supervision:
            return [out1, out2, out3, out4]
        else:
            return out4  # final only


class StructureLoss(nn.Module):
    def __init__(self):
        super(StructureLoss, self).__init__()
        self.bce = nn.BCEWithLogitsLoss(reduction='none')

    def forward(self, pred, mask):
        pred = F.interpolate(pred, size=mask.shape[2:], mode='bilinear', align_corners=True)

        weit = 1 + 5 * torch.abs(F.avg_pool2d(mask.float(), kernel_size=31, stride=1, padding=15) - mask.float())
        bce = self.bce(pred, mask.float())
        bce = (weit * bce).sum(dim=(2, 3)) / weit.sum(dim=(2, 3))

        pred_probs = torch.sigmoid(pred)
        inter = (pred_probs * mask).sum(dim=(2, 3))
        union = (pred_probs + mask).sum(dim=(2, 3))
        iou = 1 - (inter + 1) / (union - inter + 1)

        return (bce + iou).mean()


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import os
import cv2
import numpy as np
from torch.optim.lr_scheduler import ReduceLROnPlateau

train_transform = A.Compose([
    A.HorizontalFlip(p=0.4),
    A.VerticalFlip(p=0.4),
    A.RandomGamma(gamma_limit=(70, 130), p=0.2),
    A.RGBShift(p=0.3, r_shift_limit=10, g_shift_limit=10, b_shift_limit=10),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])
img_dir = "/kaggle/input/datacv/drive-download-20250407T081859Z-001/TrainDataset/TrainDataset/image"  # update this
mask_dir = "/kaggle/input/datacv/drive-download-20250407T081859Z-001/TrainDataset/TrainDataset/mask"
# === Parameters ===
lr = 3e-4
batch_size = 16
in_channels = 3
out_channels = 1
H, W = 480, 480
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_dataset = CustomDataset(img_dir=img_dir, mask_dir=mask_dir, transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)

# === Phase 1: Train without deep supervision ===
model = UNetPlusPlus(num_classes=out_channels, H=H, W=W, deep_supervision=False)
model.to(device)

loss_fn = StructureLoss()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.75, patience=5)
scaler = torch.cuda.amp.GradScaler()

for epoch in range(150):
    model.train()
    epoch_loss = 0.0
    progress_bar = tqdm(train_loader, desc=f"Phase 1 - Epoch {epoch+1}/100", leave=False)

    for images, masks in progress_bar:
        images = images.to(device, non_blocking=True)
        masks = masks.to(device, non_blocking=True)

        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            outputs = model(images)
            loss = loss_fn(outputs, masks)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        epoch_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

    epoch_loss /= len(train_loader)
    print(f"[Phase 1] Epoch {epoch+1}/100 | Loss: {epoch_loss:.4f}")
    if (epoch + 1) % 15 == 0:
        torch.save(model.state_dict(), f"/kaggle/working/model_phase1_epoch{epoch+1}.pth")
    scheduler.step(epoch_loss)

# === Save final Phase 1 weights ===
torch.save(model.state_dict(), "/kaggle/working/model_phase1_final.pth")

# === Phase 2: Load weights and train with deep supervision ===
model = UNetPlusPlus(num_classes=out_channels, H=H, W=W, deep_supervision=True)
model.load_state_dict(torch.load("/kaggle/working/model_phase1_final.pth"))
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)  # lower LR
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.75, patience=5)
scaler = torch.cuda.amp.GradScaler()

for epoch in range(151, 201):
    model.train()
    epoch_loss = 0.0
    progress_bar = tqdm(train_loader, desc=f"Phase 2 - Epoch {epoch}/150", leave=False)

    for images, masks in progress_bar:
        images = images.to(device, non_blocking=True)
        masks = masks.to(device, non_blocking=True)

        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            outputs = model(images)
            if isinstance(outputs, list):
                # Weighted deep supervision loss
                weights = [0.1, 0.2, 0.3, 0.4]
                loss = sum(w * loss_fn(o, masks) for w, o in zip(weights, outputs))
            else:
                loss = loss_fn(outputs, masks)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        epoch_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

    epoch_loss /= len(train_loader)
    print(f"[Phase 2] Epoch {epoch}/150 | Loss: {epoch_loss:.4f}")
    if epoch % 10 == 0:
        torch.save(model.state_dict(), f"/kaggle/working/model_phase2_epoch{epoch}.pth")
    scheduler.step(epoch_loss)

# === Save final Phase 2 weights ===
torch.save(model.state_dict(), "/kaggle/working/model_phase2_final.pth")
 

In [None]:
torch.save(model.state_dict(), "/kaggle/working/model_weights.pth")   

In [None]:
import matplotlib.pyplot as plt
device = 'cuda'

def infer(model, image_path, device):
    model.eval()
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    transformed = A.Compose([
        A.Resize(480, 480),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ])(image=image)
    
    input_tensor = transformed['image'].unsqueeze(0).to(device)
    
    with torch.no_grad():
        output = model(input_tensor)
        if isinstance(output, (list, tuple)):
            output = output[-1]
        output = torch.sigmoid(output)
        prediction = (output > 0.5).float().squeeze().cpu().numpy()

    binary_mask = (prediction * 255).astype(np.uint8)  # 0 for background, 255 for object

    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.title("Original Image")
    plt.axis("off")
    
    plt.subplot(1, 2, 2)
    plt.imshow(binary_mask, cmap="gray")
    plt.title("Binary Mask")
    plt.axis("off")
    plt.show() 
model = UNetPlusPlus(num_classes=1, deep_supervision=False).to(device) 
model.load_state_dict(torch.load("/kaggle/working/model_weights.pth", map_location=device))
model.eval()
infer(model, "/kaggle/input/datacv/drive-download-20250407T081859Z-001/TrainDataset/TrainDataset/image/1.png", device)

In [18]:
import torch
from torch.utils.data import DataLoader
import numpy as np
from tqdm import tqdm
import os

# Assuming you already have UNet and CustomDataset defined
val_transform = A.Compose([
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])
# Load test dataset
test_dataset = CustomDataset(
    img_dir='/kaggle/input/comvsdataprime/TestDataset/TestDataset/Kvasir/images',
    mask_dir='/kaggle/input/comvsdataprime/TestDataset/TestDataset/Kvasir/masks',
    resize=(480, 480),
    transform=val_transform,  # no strong augmentations
)

test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Load trained model
model = UNetPlusPlus(num_classes=1, deep_supervision=False)
model.load_state_dict(torch.load("/kaggle/input/comvsdataprime/UnetPlusPlus-phase2.pth", map_location='cuda' if torch.cuda.is_available() else 'cpu'))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Helper functions
def compute_iou(pred, mask, eps=1e-6):
    pred = (pred > 0.5).float()
    mask = (mask > 0.5).float()
    intersection = (pred * mask).sum()
    union = pred.sum() + mask.sum() - intersection
    return (intersection + eps) / (union + eps)

def compute_dice(pred, mask, eps=1e-6):
    pred = (pred > 0.5).float()
    mask = (mask > 0.5).float()
    intersection = (pred * mask).sum()
    return (2 * intersection + eps) / (pred.sum() + mask.sum() + eps)

# Run evaluation
ious, dices = [], []

with torch.no_grad():
    for images, masks in tqdm(test_loader, desc="Evaluating"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = model(images)
        probs = torch.sigmoid(outputs)

        for pred, true_mask in zip(probs, masks):
            iou = compute_iou(pred, true_mask)
            dice = compute_dice(pred, true_mask)
            ious.append(iou.item())
            dices.append(dice.item())

macro_iou = np.mean(ious)
macro_dice = np.mean(dices)

print(f"📊 Macro IoU: {macro_iou:.4f}")
print(f"📊 Macro Dice: {macro_dice:.4f}")


  model.load_state_dict(torch.load("/kaggle/input/comvsdataprime/UnetPlusPlus-phase2.pth", map_location='cuda' if torch.cuda.is_available() else 'cpu'))
Evaluating: 100%|██████████| 100/100 [00:07<00:00, 13.73it/s]

📊 Macro IoU: 0.8212
📊 Macro Dice: 0.8748





In [19]:
import torch
from torch.utils.data import DataLoader, ConcatDataset
import numpy as np
from tqdm import tqdm
import os
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset

# CustomDataset class (as you already defined)

# Define transforms
val_transform = A.Compose([
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

# List of dataset subfolders
dataset_names = ['CVC-300', 'CVC-ClinicDB', 'CVC-ColonDB', 'ETIS-LaribPolypDB', 'Kvasir']
base_path = '/kaggle/input/comvsdataprime/TestDataset/TestDataset'

# Collect all datasets
all_datasets = []
for name in dataset_names:
    img_dir = os.path.join(base_path, name, 'images')
    mask_dir = os.path.join(base_path, name, 'masks')
    ds = CustomDataset(img_dir=img_dir, mask_dir=mask_dir, resize=(480, 480), transform=val_transform)
    all_datasets.append(ds)

# Combine datasets
full_test_dataset = ConcatDataset(all_datasets)
test_loader = DataLoader(full_test_dataset, batch_size=1, shuffle=False)

# Load model
model = UNetPlusPlus(num_classes=1, deep_supervision=False)
model.load_state_dict(torch.load("/kaggle/input/comvsdataprime/UnetPlusPlus-phase2.pth", map_location='cuda' if torch.cuda.is_available() else 'cpu'))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Metrics
def compute_iou(pred, mask, eps=1e-6):
    pred = (pred > 0.5).float()
    mask = (mask > 0.5).float()
    intersection = (pred * mask).sum()
    union = pred.sum() + mask.sum() - intersection
    return (intersection + eps) / (union + eps)

def compute_dice(pred, mask, eps=1e-6):
    pred = (pred > 0.5).float()
    mask = (mask > 0.5).float()
    intersection = (pred * mask).sum()
    return (2 * intersection + eps) / (pred.sum() + mask.sum() + eps)

# Run evaluation
ious, dices = [], []

with torch.no_grad():
    for images, masks in tqdm(test_loader, desc="Evaluating all datasets"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = model(images)
        probs = torch.sigmoid(outputs)

        for pred, true_mask in zip(probs, masks):
            iou = compute_iou(pred, true_mask)
            dice = compute_dice(pred, true_mask)
            ious.append(iou.item())
            dices.append(dice.item())

# Final results
macro_iou = np.mean(ious)
macro_dice = np.mean(dices)

print(f"📊 Macro IoU (All datasets): {macro_iou:.4f}")
print(f"📊 Macro Dice (All datasets): {macro_dice:.4f}")


  model.load_state_dict(torch.load("/kaggle/input/comvsdataprime/UnetPlusPlus-phase2.pth", map_location='cuda' if torch.cuda.is_available() else 'cpu'))
Evaluating all datasets: 100%|██████████| 798/798 [00:55<00:00, 14.27it/s]

📊 Macro IoU (All datasets): 0.5969
📊 Macro Dice (All datasets): 0.6507



