In [None]:
!ls /kaggle/input/prompt-based-segmentaion/final_dataset

In [1]:
"""ClipSeg fine-tuning script — updated to handle HF output shape mismatch and logits attribute name.

Key fixes:
- Use CLIPSegForImageSegmentation.from_pretrained(...) instead of AutoModelForImageSegmentation
- Read logits from outputs.logits or outputs.conditional_logits (defensive)
- Coerce logits to 4D (N, C, H, W) before F.interpolate
- Defensive runtime checks and a single debug print (commented by default)

Run this in the same environment as your original script. """

import os
import numpy as np
import pandas as pd
import cv2
import ast
import time
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.amp import autocast, GradScaler

import albumentations as A
from albumentations.pytorch import ToTensorV2

# Transformers import
try:
    from transformers import AutoProcessor
    from transformers import CLIPSegForImageSegmentation
except ImportError:
    print("transformers not found. Please install it: !pip install -q transformers")
    raise

# ============================================================================
# CONFIGURATION
# ============================================================================
class Config:
    # Paths
    DATASET_DIR = "/kaggle/input/prompt-based-segmentaion/final_dataset"
    SAVE_DIR = "clipseg_models"

    # Model
    MODEL_NAME = "CIDAS/clipseg-rd64-refined"
    IMAGE_SIZE = 352

    # Training
    BATCH_SIZE = 8
    NUM_EPOCHS = 30
    LEARNING_RATE = 5e-5
    WEIGHT_DECAY = 1e-2
    NUM_WORKERS = 2

    # Early stopping
    PATIENCE = 5
    MIN_DELTA = 0.001

    # Mixed precision
    USE_AMP = True
    GRADIENT_ACCUMULATION_STEPS = 4
    PYTORCH_CUDA_ALLOC_CONF = "expandable_segments:True"
    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

    # Device
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

config = Config()

# ============================================================================
# SETUP: Create save directory
# ============================================================================
os.makedirs(config.SAVE_DIR, exist_ok=True)
print(f"✓ Models will be saved to {config.SAVE_DIR}")

# ============================================================================
# Model Freezing Utility
# ============================================================================

def freeze_clip_backbone(model):
    """Freeze all parameters in the CLIP backbone"""
    for name, param in model.named_parameters():
        if "clip" in name:
            param.requires_grad = False
    print("✓ Froze CLIP text and vision backbones.")

# ============================================================================
# Dataset
# ============================================================================
class PromptSegDataset(Dataset):
    """Dataset for prompt-based segmentation (Image, Text, Mask)"""

    def __init__(self, csv_path, base_dir, transform=None):
        try:
            self.data = pd.read_csv(csv_path, dtype=str)
        except Exception as e:
            print(f"Error reading CSV: {e}")
            self.data = pd.read_csv(csv_path, dtype=str, encoding='utf-8')

        self.base_dir = base_dir
        self.transform = transform

        print(f"✓ Loaded {len(self.data)} samples from {csv_path}")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]

        # Load image and mask
        img_path = os.path.join(self.base_dir, row['image_path'])
        mask_path = os.path.join(self.base_dir, row['mask_path'])

        image = cv2.imread(img_path)
        if image is None:
            raise FileNotFoundError(f"Image not found: {img_path}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if mask is None:
            raise FileNotFoundError(f"Mask not found: {mask_path}")
        mask = (mask > 0).astype(np.uint8)

        # Get text prompt
        try:
            prompts = ast.literal_eval(row['prompts'])
            text_prompt = prompts[0] if prompts else "object"
        except Exception:
            text_prompt = "object"

        # Apply augmentation
        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']
        else:
            # Ensure returned types are tensors
            image = ToTensorV2()(image=image)['image']
            mask = torch.from_numpy(mask).unsqueeze(0).float()

        return {
            'image': image,
            'mask': mask.float(),  # Ensure mask is float for loss
            'prompt': text_prompt,
            'class_name': row.get('class_name', 'object')
        }

# ============================================================================
# Data Augmentation
# ============================================================================

def get_transforms(is_train=True):
    """Get augmentation pipeline for ClipSeg"""
    CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073]
    CLIP_STD = [0.26862954, 0.26130258, 0.27577711]

    if is_train:
        return A.Compose([
            A.Resize(height=config.IMAGE_SIZE, width=config.IMAGE_SIZE),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.3),
            A.RandomBrightnessContrast(p=0.4),
            A.GaussNoise(p=0.2),
            A.GaussianBlur(blur_limit=3, p=0.2),
            A.Affine(translate_percent=0.1, scale=(0.9, 1.1), rotate=(-15, 15), p=0.5),
            A.Normalize(mean=CLIP_MEAN, std=CLIP_STD),
            ToTensorV2()
        ])
    else:
        return A.Compose([
            A.Resize(height=config.IMAGE_SIZE, width=config.IMAGE_SIZE),
            A.Normalize(mean=CLIP_MEAN, std=CLIP_STD),
            ToTensorV2()
        ])

# ============================================================================
# Custom Collate Function
# ============================================================================
class ClipSegCollator:
    """Custom collate function to handle text tokenization."""
    def __init__(self, processor):
        self.processor = processor

    def __call__(self, batch):
        pixel_values = torch.stack([item['image'] for item in batch])
        masks = torch.stack([item['mask'] for item in batch])

        texts = [item['prompt'] for item in batch]

        inputs = self.processor(
            text=texts,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        return {
            'pixel_values': pixel_values,
            'input_ids': inputs.input_ids,
            'attention_mask': inputs.attention_mask,
            'masks': masks,
            'class_names': [item['class_name'] for item in batch]
        }

# ============================================================================
# Loss Functions
# ============================================================================
class FocalDiceLoss(nn.Module):
    """Combined Focal + Dice Loss"""
    def __init__(self, alpha=0.25, gamma=2.0, dice_weight=1.0):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.dice_weight = dice_weight

    def focal_loss(self, pred, target):
        bce = F.binary_cross_entropy_with_logits(pred, target, reduction='none')
        pt = torch.exp(-bce)
        focal = self.alpha * (1 - pt) ** self.gamma * bce
        return focal.mean()

    def dice_loss(self, pred, target, smooth=1.0):
        pred = torch.sigmoid(pred)
        pred_flat = pred.view(pred.shape[0], -1)
        target_flat = target.view(target.shape[0], -1)

        intersection = (pred_flat * target_flat).sum(dim=1)
        union = pred_flat.sum(dim=1) + target_flat.sum(dim=1)

        dice = (2.0 * intersection + smooth) / (union + smooth)
        return 1 - dice.mean()

    def forward(self, pred, target):
        focal = self.focal_loss(pred, target)
        dice = self.dice_loss(pred, target)
        return focal + self.dice_weight * dice, {
            'focal': focal.item(),
            'dice': dice.item()
        }

# ============================================================================
# Metrics
# ============================================================================
def compute_iou(pred, target, threshold=0.5):
    pred = (torch.sigmoid(pred) > threshold).float()
    intersection = (pred * target).sum()
    union = pred.sum() + target.sum() - intersection
    return (intersection / (union + 1e-6)).item()


def compute_dice(pred, target, threshold=0.5):
    pred = (torch.sigmoid(pred) > threshold).float()
    intersection = (pred * target).sum()
    dice = (2.0 * intersection) / (pred.sum() + target.sum() + 1e-6)
    return dice.item()

# ============================================================================
# Training Functions
# ============================================================================

def _coerce_logits_to_4d(logits, masks, debug=False):
    """Coerce logits tensor into shape (N, C, H, W) before interpolation."""
    # Optional debug print (comment out in production)
    if debug:
        print("DEBUG logits.shape (before reshape):", tuple(logits.shape))

    # If outputs are a HF BatchEncoding-like object, convert to tensor
    if isinstance(logits, (list, tuple)):
        logits = logits[0]

    # Ensure tensor
    if not torch.is_tensor(logits):
        logits = torch.tensor(logits)

    # Typical cases:
    # - (N, H, W) -> add channel dim -> (N,1,H,W)
    # - (N, C, H, W) -> already fine
    # - (N, S) or flattened -> not handled automatically

    if logits.dim() == 4:
        return logits

    if logits.dim() == 3:
        # If shape matches masks spatial dims, assume (N, H, W)
        N, A, B = logits.shape
        if A == masks.shape[-2] and B == masks.shape[-1]:
            logits = logits.unsqueeze(1)  # (N,1,H,W)
        else:
            # Generic: add channel dim
            logits = logits.unsqueeze(1)
        return logits

    if logits.dim() == 2:
        # ambiguous: treat as (N, S) -> try to reshape to (N,1,H,W) if possible
        N, S = logits.shape
        H = masks.shape[-2]
        W = masks.shape[-1]
        if S == H * W:
            logits = logits.view(N, 1, H, W)
            return logits
        else:
            # fallback: add two dims
            logits = logits.unsqueeze(1).unsqueeze(-1)
            return logits

    raise RuntimeError(f"Unable to coerce logits to 4D; got shape {tuple(logits.shape)}")


def train_epoch(model, loader, criterion, optimizer, scaler, epoch, config):
    model.train()

    total_loss = 0
    total_iou = 0
    total_dice = 0

    pbar = tqdm(loader, desc=f'Epoch {epoch} [Train]')

    for batch_idx, batch in enumerate(pbar):
        pixel_values = batch['pixel_values'].to(config.DEVICE)
        input_ids = batch['input_ids'].to(config.DEVICE)
        attention_mask = batch['attention_mask'].to(config.DEVICE)
        masks = batch['masks'].to(config.DEVICE).unsqueeze(1)  # [B,1,H,W]

        with autocast(device_type='cuda', dtype=torch.float16, enabled=config.USE_AMP):
            outputs = model(
                pixel_values=pixel_values,
                input_ids=input_ids,
                attention_mask=attention_mask
            )

            # Defensive retrieval of logits
            logits = None
            for attr in ("logits", "conditional_logits"):
                if hasattr(outputs, attr):
                    logits = getattr(outputs, attr)
                    break

            if logits is None:
                # try dict-like access
                try:
                    outputs_dict = dict(outputs)
                    # pick first tensor-like value
                    for v in outputs_dict.values():
                        if torch.is_tensor(v):
                            logits = v
                            break
                except Exception:
                    pass

            if logits is None:
                # Final debug dump
                try:
                    print("Model outputs keys:", list(outputs.keys()))
                except Exception:
                    pass
                raise RuntimeError("No logits-like attribute found on CLIPSeg output.")

            # Coerce logits to 4D
            logits = _coerce_logits_to_4d(logits, masks, debug=False)

            pred_masks = F.interpolate(
                logits,
                size=masks.shape[-2:],
                mode='bilinear',
                align_corners=False
            )

            loss, loss_dict = criterion(pred_masks, masks)
            loss = loss / config.GRADIENT_ACCUMULATION_STEPS

        scaler.scale(loss).backward()

        if (batch_idx + 1) % config.GRADIENT_ACCUMULATION_STEPS == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        with torch.no_grad():
            iou = compute_iou(pred_masks, masks)
            dice = compute_dice(pred_masks, masks)

        total_loss += loss.item() * config.GRADIENT_ACCUMULATION_STEPS
        total_iou += iou
        total_dice += dice

        pbar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'iou': f'{iou:.4f}',
            'dice': f'{dice:.4f}'
        })

    n = len(loader)
    return total_loss / n, total_iou / n, total_dice / n


@torch.no_grad()
def validate(model, loader, criterion, config):
    model.eval()

    total_loss = 0
    total_iou = 0
    total_dice = 0

    for batch in tqdm(loader, desc='Validating'):
        pixel_values = batch['pixel_values'].to(config.DEVICE)
        input_ids = batch['input_ids'].to(config.DEVICE)
        attention_mask = batch['attention_mask'].to(config.DEVICE)
        masks = batch['masks'].to(config.DEVICE).unsqueeze(1)

        outputs = model(
            pixel_values=pixel_values,
            input_ids=input_ids,
            attention_mask=attention_mask
        )

        logits = None
        for attr in ("logits", "conditional_logits"):
            if hasattr(outputs, attr):
                logits = getattr(outputs, attr)
                break

        if logits is None:
            try:
                outputs_dict = dict(outputs)
                for v in outputs_dict.values():
                    if torch.is_tensor(v):
                        logits = v
                        break
            except Exception:
                pass

        if logits is None:
            try:
                print("Model outputs keys:", list(outputs.keys()))
            except Exception:
                pass
            raise RuntimeError("No logits-like attribute found on CLIPSeg output.")

        logits = _coerce_logits_to_4d(logits, masks, debug=False)

        pred_masks = F.interpolate(
            logits,
            size=masks.shape[-2:],
            mode='bilinear',
            align_corners=False
        )

        loss, _ = criterion(pred_masks, masks)
        iou = compute_iou(pred_masks, masks)
        dice = compute_dice(pred_masks, masks)

        total_loss += loss.item()
        total_iou += iou
        total_dice += dice

    n = len(loader)
    return total_loss / n, total_iou / n, total_dice / n

# ============================================================================
# MAIN EXECUTION
# ============================================================================
if __name__ == "__main__":
    print("=" * 70)
    print("CLIPSEG FINE-TUNING")
    print("=" * 70)

    print(f"\nLoading model: {config.MODEL_NAME}")
    processor = AutoProcessor.from_pretrained(config.MODEL_NAME)

    model = CLIPSegForImageSegmentation.from_pretrained(config.MODEL_NAME)
    model.to(config.DEVICE)

    # Freeze the backbone
    freeze_clip_backbone(model)

    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total = sum(p.numel() for p in model.parameters())

    print(f"✓ Trainable params: {trainable:,} ({100*trainable/total:.2f}%)")
    print(f"✓ Frozen params: {total-trainable:,}")

    # Create datasets
    train_dataset = PromptSegDataset(
        os.path.join(config.DATASET_DIR, 'train.csv'),
        config.DATASET_DIR,
        transform=get_transforms(True)
    )

    valid_dataset = PromptSegDataset(
        os.path.join(config.DATASET_DIR, 'valid.csv'),
        config.DATASET_DIR,
        transform=get_transforms(False)
    )

    # Create collator
    collator = ClipSegCollator(processor)

    # Create dataloaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=config.BATCH_SIZE,
        shuffle=True,
        num_workers=config.NUM_WORKERS,
        collate_fn=collator,
        pin_memory=True
    )

    valid_loader = DataLoader(
        valid_dataset,
        batch_size=config.BATCH_SIZE,
        shuffle=False,
        num_workers=config.NUM_WORKERS,
        collate_fn=collator,
        pin_memory=True
    )

    print(f"\n✓ Train batches: {len(train_loader)}")
    print(f"✓ Valid batches: {len(valid_loader)}")

    # Setup training
    criterion = FocalDiceLoss()
    optimizer = optim.AdamW(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=config.LEARNING_RATE,
        weight_decay=config.WEIGHT_DECAY
    )
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, T_0=5, T_mult=2
    )
    scaler = GradScaler(device='cuda', enabled=config.USE_AMP)

    # Training loop
    best_iou = 0
    patience_counter = 0
    history = {'train_loss': [], 'train_iou': [], 'valid_loss': [], 'valid_iou': []}

    print("\n" + "=" * 70)
    print("STARTING TRAINING")
    print("=" * 70)

    for epoch in range(1, config.NUM_EPOCHS + 1):
        train_loss, train_iou, train_dice = train_epoch(
            model, train_loader, criterion, optimizer, scaler, epoch, config
        )
        valid_loss, valid_iou, valid_dice = validate(
            model, valid_loader, criterion, config
        )

        scheduler.step()

        history['train_loss'].append(train_loss)
        history['train_iou'].append(train_iou)
        history['valid_loss'].append(valid_loss)
        history['valid_iou'].append(valid_iou)

        print(f"\nEpoch {epoch}")
        print(f"Train: Loss={train_loss:.4f}, IoU={train_iou:.4f}, Dice={train_dice:.4f}")
        print(f"Valid: Loss={valid_loss:.4f}, IoU={valid_iou:.4f}, Dice={valid_dice:.4f}")

        # Save best model
        if valid_iou > best_iou + config.MIN_DELTA:
            best_iou = valid_iou
            patience_counter = 0

            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'valid_iou': valid_iou,
            }, os.path.join(config.SAVE_DIR, 'best_model.pth'))

            processor.save_pretrained(config.SAVE_DIR)

            print(f"✅ Best model saved! IoU: {valid_iou:.4f}")
        else:
            patience_counter += 1

        if patience_counter >= config.PATIENCE:
            print(f"\n⚠️ Early stopping triggered!")
            break

    print("\n" + "=" * 70)
    print("TRAINING COMPLETE!")
    print("=" * 70)
    print(f"Best validation IoU: {best_iou:.4f}")
    print(f"Model saved to: {config.SAVE_DIR}/best_model.pth")


2025-10-29 11:36:26.875251: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1761737786.906731     988 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1761737786.914648     988 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


✓ Models will be saved to clipseg_models
CLIPSEG FINE-TUNING

Loading model: CIDAS/clipseg-rd64-refined


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


✓ Froze CLIP text and vision backbones.
✓ Trainable params: 1,127,009 (0.75%)
✓ Frozen params: 149,620,737
✓ Loaded 8590 samples from /kaggle/input/prompt-based-segmentaion/final_dataset/train.csv
✓ Loaded 1074 samples from /kaggle/input/prompt-based-segmentaion/final_dataset/valid.csv

✓ Train batches: 1074
✓ Valid batches: 135

STARTING TRAINING


Epoch 1 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.13it/s, loss=0.2012, iou=0.2351, dice=0.3806]
Validating: 100%|██████████| 135/135 [00:48<00:00,  2.80it/s]



Epoch 1
Train: Loss=0.8133, IoU=0.1645, Dice=0.2748
Valid: Loss=0.7321, IoU=0.2251, Dice=0.3595
✅ Best model saved! IoU: 0.2251


Epoch 2 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.13it/s, loss=0.1672, iou=0.1953, dice=0.3268]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 2
Train: Loss=0.7306, IoU=0.2255, Dice=0.3576
Valid: Loss=0.6867, IoU=0.2668, Dice=0.4112
✅ Best model saved! IoU: 0.2668


Epoch 3 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.11it/s, loss=0.2035, iou=0.1183, dice=0.2116]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 3
Train: Loss=0.7090, IoU=0.2480, Dice=0.3851
Valid: Loss=0.6741, IoU=0.2733, Dice=0.4193
✅ Best model saved! IoU: 0.2733


Epoch 4 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.11it/s, loss=0.1987, iou=0.1095, dice=0.1975]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.83it/s]



Epoch 4
Train: Loss=0.7002, IoU=0.2577, Dice=0.3984
Valid: Loss=0.6698, IoU=0.2806, Dice=0.4273
✅ Best model saved! IoU: 0.2806


Epoch 5 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.08it/s, loss=0.2221, iou=0.0625, dice=0.1177]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 5
Train: Loss=0.6940, IoU=0.2603, Dice=0.4006
Valid: Loss=0.6667, IoU=0.2848, Dice=0.4320
✅ Best model saved! IoU: 0.2848


Epoch 6 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.09it/s, loss=0.1750, iou=0.2512, dice=0.4015]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 6
Train: Loss=0.6934, IoU=0.2610, Dice=0.4015
Valid: Loss=0.6565, IoU=0.2854, Dice=0.4328


Epoch 7 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.11it/s, loss=0.1370, iou=0.3256, dice=0.4913]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 7
Train: Loss=0.6838, IoU=0.2699, Dice=0.4130
Valid: Loss=0.6465, IoU=0.2988, Dice=0.4489
✅ Best model saved! IoU: 0.2988


Epoch 8 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.10it/s, loss=0.1691, iou=0.2670, dice=0.4214]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 8
Train: Loss=0.6768, IoU=0.2745, Dice=0.4181
Valid: Loss=0.6384, IoU=0.3147, Dice=0.4657
✅ Best model saved! IoU: 0.3147


Epoch 9 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.10it/s, loss=0.1441, iou=0.2502, dice=0.4003]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.83it/s]



Epoch 9
Train: Loss=0.6706, IoU=0.2808, Dice=0.4258
Valid: Loss=0.6325, IoU=0.3198, Dice=0.4720
✅ Best model saved! IoU: 0.3198


Epoch 10 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.12it/s, loss=0.1794, iou=0.1365, dice=0.2402]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 10
Train: Loss=0.6627, IoU=0.2820, Dice=0.4278
Valid: Loss=0.6269, IoU=0.3277, Dice=0.4803
✅ Best model saved! IoU: 0.3277


Epoch 11 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.11it/s, loss=0.1359, iou=0.3877, dice=0.5588]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.83it/s]



Epoch 11
Train: Loss=0.6603, IoU=0.2871, Dice=0.4329
Valid: Loss=0.6264, IoU=0.3244, Dice=0.4771


Epoch 12 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.11it/s, loss=0.1823, iou=0.2859, dice=0.4447]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.83it/s]



Epoch 12
Train: Loss=0.6562, IoU=0.2887, Dice=0.4339
Valid: Loss=0.6222, IoU=0.3307, Dice=0.4841
✅ Best model saved! IoU: 0.3307


Epoch 13 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.12it/s, loss=0.1884, iou=0.2346, dice=0.3800]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 13
Train: Loss=0.6561, IoU=0.2904, Dice=0.4367
Valid: Loss=0.6224, IoU=0.3284, Dice=0.4819


Epoch 14 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.12it/s, loss=0.1653, iou=0.1918, dice=0.3219]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.83it/s]



Epoch 14
Train: Loss=0.6548, IoU=0.2910, Dice=0.4382
Valid: Loss=0.6218, IoU=0.3313, Dice=0.4847


Epoch 15 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.12it/s, loss=0.1693, iou=0.2341, dice=0.3793]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.83it/s]



Epoch 15
Train: Loss=0.6543, IoU=0.2897, Dice=0.4366
Valid: Loss=0.6216, IoU=0.3310, Dice=0.4844


Epoch 16 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.11it/s, loss=0.1625, iou=0.6047, dice=0.7537]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 16
Train: Loss=0.6523, IoU=0.2945, Dice=0.4416
Valid: Loss=0.6201, IoU=0.3324, Dice=0.4858
✅ Best model saved! IoU: 0.3324


Epoch 17 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.12it/s, loss=0.1557, iou=0.2111, dice=0.3486]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.83it/s]



Epoch 17
Train: Loss=0.6524, IoU=0.2939, Dice=0.4410
Valid: Loss=0.6139, IoU=0.3384, Dice=0.4919
✅ Best model saved! IoU: 0.3384


Epoch 18 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.13it/s, loss=0.1815, iou=0.1958, dice=0.3275]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 18
Train: Loss=0.6483, IoU=0.3002, Dice=0.4484
Valid: Loss=0.6091, IoU=0.3433, Dice=0.4981
✅ Best model saved! IoU: 0.3433


Epoch 19 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.12it/s, loss=0.1523, iou=0.4625, dice=0.6325]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 19
Train: Loss=0.6418, IoU=0.3025, Dice=0.4509
Valid: Loss=0.6111, IoU=0.3470, Dice=0.5011
✅ Best model saved! IoU: 0.3470


Epoch 20 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.12it/s, loss=0.1230, iou=0.2802, dice=0.4377]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.83it/s]



Epoch 20
Train: Loss=0.6392, IoU=0.3096, Dice=0.4591
Valid: Loss=0.6031, IoU=0.3531, Dice=0.5086
✅ Best model saved! IoU: 0.3531


Epoch 21 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.13it/s, loss=0.1873, iou=0.1246, dice=0.2215]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.83it/s]



Epoch 21
Train: Loss=0.6356, IoU=0.3095, Dice=0.4592
Valid: Loss=0.6004, IoU=0.3558, Dice=0.5111
✅ Best model saved! IoU: 0.3558


Epoch 22 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.11it/s, loss=0.2065, iou=0.1517, dice=0.2635]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.83it/s]



Epoch 22
Train: Loss=0.6350, IoU=0.3099, Dice=0.4598
Valid: Loss=0.6010, IoU=0.3522, Dice=0.5081


Epoch 23 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.11it/s, loss=0.1775, iou=0.1841, dice=0.3109]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 23
Train: Loss=0.6315, IoU=0.3164, Dice=0.4669
Valid: Loss=0.5961, IoU=0.3627, Dice=0.5183
✅ Best model saved! IoU: 0.3627


Epoch 24 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.12it/s, loss=0.1620, iou=0.2995, dice=0.4609]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.83it/s]



Epoch 24
Train: Loss=0.6258, IoU=0.3185, Dice=0.4692
Valid: Loss=0.5936, IoU=0.3632, Dice=0.5197


Epoch 25 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.12it/s, loss=0.1137, iou=0.3848, dice=0.5557]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 25
Train: Loss=0.6252, IoU=0.3207, Dice=0.4720
Valid: Loss=0.5963, IoU=0.3615, Dice=0.5175


Epoch 26 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.13it/s, loss=0.1476, iou=0.3423, dice=0.5100]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.83it/s]



Epoch 26
Train: Loss=0.6274, IoU=0.3179, Dice=0.4684
Valid: Loss=0.5923, IoU=0.3652, Dice=0.5217
✅ Best model saved! IoU: 0.3652


Epoch 27 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.11it/s, loss=0.1173, iou=0.5555, dice=0.7143]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.83it/s]



Epoch 27
Train: Loss=0.6231, IoU=0.3169, Dice=0.4677
Valid: Loss=0.5905, IoU=0.3672, Dice=0.5234
✅ Best model saved! IoU: 0.3672


Epoch 28 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.11it/s, loss=0.1579, iou=0.2207, dice=0.3616]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 28
Train: Loss=0.6236, IoU=0.3220, Dice=0.4729
Valid: Loss=0.5901, IoU=0.3643, Dice=0.5211


Epoch 29 [Train]: 100%|██████████| 1074/1074 [02:31<00:00,  7.11it/s, loss=0.1441, iou=0.2540, dice=0.4051]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 29
Train: Loss=0.6233, IoU=0.3190, Dice=0.4701
Valid: Loss=0.5887, IoU=0.3665, Dice=0.5232


Epoch 30 [Train]: 100%|██████████| 1074/1074 [02:30<00:00,  7.11it/s, loss=0.2031, iou=0.1443, dice=0.2522]
Validating: 100%|██████████| 135/135 [00:47<00:00,  2.82it/s]



Epoch 30
Train: Loss=0.6223, IoU=0.3242, Dice=0.4750
Valid: Loss=0.5878, IoU=0.3684, Dice=0.5254
✅ Best model saved! IoU: 0.3684

TRAINING COMPLETE!
Best validation IoU: 0.3684
Model saved to: clipseg_models/best_model.pth


In [None]:
!pip install transformers