In [None]:
!pip install -q segmentation-models-pytorch albumentations

In [None]:
import os
import cv2
import collections
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# PyTorch & related libraries
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
import segmentation_models_pytorch as smp

# Other utilities
import albumentations as albu
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

In [None]:
# Define paths
train_df = pd.read_csv('../input/understanding_cloud_organization/train.csv')
train_path = '../input/understanding_cloud_organization/train_images/'
test_path = '../input/understanding_cloud_organization/test_images/'

# Preprocess DataFrame
train_df['ImageId'] = train_df['Image_Label'].apply(lambda x: x.split('_')[0])
train_df['ClassId'] = train_df['Image_Label'].apply(lambda x: x.split('_')[1])
train_df['hasMask'] = ~train_df['EncodedPixels'].isna()
mask_count_df = train_df.groupby('ImageId').agg(np.sum).reset_index()
mask_count_df.sort_values('hasMask', ascending=False, inplace=True)

# Split data
train_ids, val_ids = train_test_split(
    mask_count_df['ImageId'].values, random_state=42, stratify=mask_count_df['hasMask'], test_size=0.1
)

In [None]:
def rle2mask(rle, input_shape):
    width, height = input_shape[:2]
    mask = np.zeros(width * height, dtype=np.uint8)
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]
    lengths = array[1::2]
    for index, start in enumerate(starts):
        mask[int(start):int(start + lengths[index])] = 1
    return mask.reshape(height, width).T

def mask2rle(img):
    pixels = img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
class CloudDataset(Dataset):
    def __init__(self, df, image_ids, data_folder, img_size=(320, 640),
                 n_classes=4, transforms=None):
        self.df = df
        self.data_folder = data_folder
        self.img_size = img_size
        self.n_classes = n_classes
        self.transforms = transforms
        self.image_ids = image_ids

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        
        # Load image
        image_path = os.path.join(self.data_folder, image_id)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Create mask
        mask = np.zeros((*self.img_size, self.n_classes), dtype=np.float32)
        df_image = self.df[self.df['ImageId'] == image_id]
        
        for i, row in df_image.iterrows():
            class_id = ['Fish', 'Flower', 'Gravel', 'Sugar'].index(row['ClassId'])
            rle = row['EncodedPixels']
            if isinstance(rle, str):
                class_mask = rle2mask(rle, (1400, 2100))
                class_mask = cv2.resize(class_mask, (self.img_size[1], self.img_size[0]))
                mask[..., class_id] = class_mask

        # Apply augmentations
        if self.transforms:
            augmented = self.transforms(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']
        
        # Transpose for PyTorch format (C, H, W)
        image = image.transpose(2, 0, 1).astype(np.float32)
        mask = mask.transpose(2, 0, 1).astype(np.float32)

        return torch.from_numpy(image), torch.from_numpy(mask)

In [None]:
# Albumentations pipeline
train_transforms = albu.Compose([
    albu.VerticalFlip(p=0.5),
    albu.HorizontalFlip(p=0.5),
    albu.ShiftScaleRotate(scale_limit=0.2, rotate_limit=10, shift_limit=0.1, p=0.5, border_mode=0),
    albu.GridDistortion(p=0.5),
    albu.Resize(320, 640),
    albu.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

val_transforms = albu.Compose([
    albu.Resize(320, 640),
    albu.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

# Create datasets
train_dataset = CloudDataset(df=train_df, image_ids=train_ids, data_folder=train_path, transforms=train_transforms)
val_dataset = CloudDataset(df=train_df, image_ids=val_ids, data_folder=train_path, transforms=val_transforms)

# Create dataloaders
BATCH_SIZE = 8
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=os.cpu_count())
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=os.cpu_count())

In [None]:
def dice_coef(y_pred, y_true, smooth=1.0):
    y_pred_f = y_pred.flatten()
    y_true_f = y_true.flatten()
    intersection = (y_pred_f * y_true_f).sum()
    return (2. * intersection + smooth) / (y_pred_f.sum() + y_true_f.sum() + smooth)

def bce_dice_loss(y_pred, y_true):
    # BCEWithLogitsLoss is used for numerical stability
    bce = nn.BCEWithLogitsLoss()
    # Dice loss expects probabilities, so we apply sigmoid
    dice = 1 - dice_coef(y_pred.sigmoid(), y_true)
    return bce(y_pred, y_true) + dice

In [None]:
# Create U-Net model with a ResNet34 backbone
model = smp.Unet(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    in_channels=3,
    classes=4,
)

# Setup device, optimizer, loss function, and scheduler
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

LEARNING_RATE = 1e-3
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)
criterion = bce_dice_loss
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)

In [None]:
# Training configuration
EPOCHS = 15
BEST_MODEL_PATH = 'best_model.pth'
best_dice = 0.0

# History for plotting
history = {'train_loss': [], 'val_loss': [], 'val_dice': []}

for epoch in range(EPOCHS):
    print(f"--- Epoch {epoch+1}/{EPOCHS} ---")

    # Training phase
    model.train()
    train_loss = 0.0
    for images, masks in tqdm(train_loader, desc="Training"):
        images, masks = images.to(device), masks.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    avg_train_loss = train_loss / len(train_loader)
    history['train_loss'].append(avg_train_loss)

    # Validation phase
    model.eval()
    val_loss = 0.0
    val_dice_score = 0.0
    with torch.no_grad():
        for images, masks in tqdm(val_loader, desc="Validation"):
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)
            loss = criterion(outputs, masks)
            val_loss += loss.item()
            # Sigmoid is applied to get probabilities for dice score
            val_dice_score += dice_coef(outputs.sigmoid(), masks).item()

    avg_val_loss = val_loss / len(val_loader)
    avg_val_dice = val_dice_score / len(val_loader)
    history['val_loss'].append(avg_val_loss)
    history['val_dice'].append(avg_val_dice)

    print(f"Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Dice: {avg_val_dice:.4f}")

    # Scheduler and Checkpointing
    scheduler.step(avg_val_dice)
    if avg_val_dice > best_dice:
        print(f"Dice score improved from {best_dice:.4f} to {avg_val_dice:.4f}. Saving model...")
        best_dice = avg_val_dice
        torch.save(model.state_dict(), BEST_MODEL_PATH)

In [None]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history['train_loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.title('Loss History')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history['val_dice'], label='Validation Dice Score')
plt.title('Dice Score History')
plt.xlabel('Epoch')
plt.ylabel('Dice Score')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Load the best performing model
model.load_state_dict(torch.load(BEST_MODEL_PATH))
model.to(device)

def predict(model, loader, device):
    model.eval()
    all_preds = []
    with torch.no_grad():
        for images, _ in tqdm(loader, desc="Predicting"):
            images = images.to(device)
            preds = model(images).sigmoid().cpu().numpy()
            all_preds.append(preds)
    return np.concatenate(all_preds, axis=0)

# Predict on validation set to find thresholds
val_preds = predict(model, val_loader, device)

# Find best thresholds
class_params = {}
for class_id in range(4):
    print(f"Processing class {class_id+1}/4...")
    attempts = []
    for t in range(0, 100, 5):
        t /= 100
        for ms in [1000, 5000, 10000, 15000, 20000]:
            dices = []
            for i in range(val_preds.shape[0]):
                pred = (val_preds[i, class_id, :, :] > t).astype(np.uint8)
                # Post-processing to remove small objects
                if pred.sum() < ms:
                    pred = np.zeros_like(pred)
                
                _, true_mask = val_dataset[i]
                true = true_mask[class_id, :, :].numpy()
                dices.append(dice_coef(pred, true))
            attempts.append((t, ms, np.mean(dices)))

    attempts_df = pd.DataFrame(attempts, columns=['threshold', 'min_size', 'dice'])
    attempts_df = attempts_df.sort_values('dice', ascending=False)
    best_params = attempts_df.iloc[0]
    class_params[class_id] = {'threshold': best_params['threshold'], 'min_size': best_params['min_size']}
    print(f"Best params for class {class_id+1}: {class_params[class_id]}")

In [None]:
# Create test dataset and loader
test_df = pd.read_csv('../input/understanding_cloud_organization/sample_submission.csv')
test_df['ImageId'] = test_df['Image_Label'].apply(lambda x: x.split('_')[0])
test_image_ids = test_df['ImageId'].unique()

test_dataset = CloudDataset(df=test_df, image_ids=test_image_ids, data_folder=test_path, transforms=val_transforms)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=os.cpu_count())

# Predict on test set
test_preds = predict(model, test_loader, device)

# Generate RLEs for submission
encoded_pixels = []
for i, image_id in enumerate(tqdm(test_image_ids)):
    for class_id in range(4):
        pred_mask = (test_preds[i, class_id, :, :] > class_params[class_id]['threshold']).astype(np.uint8)
        if pred_mask.sum() < class_params[class_id]['min_size']:
            pred_mask = np.zeros_like(pred_mask)
        
        # Upscale mask and encode RLE
        full_size_mask = cv2.resize(pred_mask, (2100, 1400))
        encoded_pixels.append(mask2rle(full_size_mask))
        
# Create submission file
submission_df = pd.read_csv('../input/understanding_cloud_organization/sample_submission.csv')
submission_df['EncodedPixels'] = encoded_pixels
submission_df.to_csv('submission.csv', index=False)
print("Submission file created successfully!")
submission_df.head()