In [None]:
!pip install -q segmentation-models-pytorch albumentations

In [None]:
import os
import json
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# PyTorch & related libraries
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import segmentation_models_pytorch as smp

# Other utilities
import albumentations as albu
from skimage.exposure import adjust_gamma
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split

In [None]:
def post_process(probability, threshold, min_size):
    """
    Post processing of each predicted mask, components with lesser number of pixels
    than `min_size` are ignored
    """
    mask = cv2.threshold(probability, threshold, 1, cv2.THRESH_BINARY)[1]
    num_component, component = cv2.connectedComponents(mask.astype(np.uint8))
    predictions = np.zeros((350, 525), np.float32)
    num = 0
    for c in range(1, num_component):
        p = (component == c)
        if p.sum() > min_size:
            predictions[p] = 1
            num += 1
    return predictions, num

def np_resize(img, input_shape):
    """
    Reshape a numpy array, which is input_shape=(height, width),
    as opposed to input_shape=(width, height) for cv2
    """
    height, width = input_shape
    return cv2.resize(img, (width, height))

def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle2mask(rle, input_shape):
    width, height = input_shape[:2]
    mask= np.zeros( width*height ).astype(np.uint8)
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]
    lengths = array[1::2]
    current_position = 0
    for index, start in enumerate(starts):
        mask[int(start):int(start+lengths[index])] = 1
        current_position += lengths[index]
    return mask.reshape(height, width).T

def build_masks(rles, input_shape, reshape=None):
    depth = len(rles)
    if reshape is None:
        masks = np.zeros((*input_shape, depth))
    else:
        masks = np.zeros((*reshape, depth))

    for i, rle in enumerate(rles):
        if type(rle) is str:
            if reshape is None:
                masks[:, :, i] = rle2mask(rle, input_shape)
            else:
                mask = rle2mask(rle, input_shape)
                reshaped_mask = np_resize(mask, reshape)
                masks[:, :, i] = reshaped_mask

    return masks

def build_rles(masks, reshape=None):
    width, height, depth = masks.shape
    rles = []
    for i in range(depth):
        mask = masks[:, :, i]
        if reshape:
            mask = mask.astype(np.float32)
            mask = np_resize(mask, reshape).astype(np.int64)
        rle = mask2rle(mask)
        rles.append(rle)
    return rles

In [None]:
train_df = pd.read_csv('../input/understanding_cloud_organization/train.csv')
train_df['ImageId'] = train_df['Image_Label'].apply(lambda x: x.split('_')[0])
train_df['ClassId'] = train_df['Image_Label'].apply(lambda x: x.split('_')[1])
train_df['hasMask'] = ~ train_df['EncodedPixels'].isna()

mask_count_df = train_df.groupby('ImageId').agg(np.sum).reset_index()
mask_count_df.sort_values('hasMask', ascending=False, inplace=True)

sub_df = pd.read_csv('../input/understanding_cloud_organization/sample_submission.csv')
sub_df['ImageId'] = sub_df['Image_Label'].apply(lambda x: x.split('_')[0])
test_imgs = pd.DataFrame(sub_df['ImageId'].unique(), columns=['ImageId'])

In [None]:
class CloudDataset(Dataset):
    def __init__(self, df, list_IDs, mode='fit',
                 base_path='../input/understanding_cloud_organization/train_images',
                 dim=(1400, 2100), reshape=(320, 480), gamma=0.8,
                 augment_fn=None, n_classes=4):
        self.dim = dim
        self.df = df
        self.mode = mode
        self.base_path = base_path
        self.list_IDs = list_IDs
        self.reshape = reshape
        self.gamma = gamma
        self.augment_fn = augment_fn
        self.n_classes = n_classes

    def __len__(self):
        return len(self.list_IDs)

    def __getitem__(self, index):
        # Get image and mask paths
        img_id = self.list_IDs[index]
        im_name = self.df['ImageId'].iloc[img_id]
        img_path = f"{self.base_path}/{im_name}"

        # Load and preprocess image
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if self.reshape:
            img = cv2.resize(img, (self.reshape[1], self.reshape[0]))
        if self.gamma:
            img = adjust_gamma(img, gamma=self.gamma)
        
        # Normalize and transpose for PyTorch (C, H, W)
        img = img.astype(np.float32) / 255.0
        img = img.transpose(2, 0, 1)
        img_tensor = torch.from_numpy(img)

        # Handle prediction case (no labels)
        if self.mode == 'predict':
            return img_tensor

        # Get masks
        image_df = self.df[self.df['ImageId'] == im_name]
        rles = image_df['EncodedPixels'].values
        masks = build_masks(rles, input_shape=self.dim, reshape=self.reshape)
        masks = masks.transpose(2, 0, 1) # (H, W, C) -> (C, H, W)
        masks_tensor = torch.from_numpy(masks.astype(np.float32))

        return img_tensor, masks_tensor

In [None]:
# Create segmentation model with specified encoder and weights
model = smp.Unet(
    encoder_name="efficientnet-b4",
    encoder_weights="imagenet",
    in_channels=3,
    classes=4,
)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Model loaded on {device}")

In [None]:
# The original notebook used: model.load_weights("../input/cloudmodels/EfficientNetB4.h5")
# This is the PyTorch equivalent. You must replace the path with a valid .pth file.
try:
    # --- YOU MUST PROVIDE A PYTORCH-COMPATIBLE .pth WEIGHT FILE HERE ---
    weights_path = "../input/your-pytorch-weights/efficientnet-b4-unet.pth"
    model.load_state_dict(torch.load(weights_path, map_location=device))
    print(f"Loaded weights from {weights_path}")
except FileNotFoundError:
    print("Warning: PyTorch weights file not found. Using pre-trained ImageNet weights for the encoder.")

In [None]:
# Prediction parameters (unchanged)
minsizes = [20000, 20000, 22500, 10000]
sigmoid = lambda x: 1 / (1 + np.exp(-x))

# Create test dataset and dataloader
test_dataset = CloudDataset(
    df=test_imgs,
    list_IDs=list(range(test_imgs.shape[0])),
    mode='predict',
    base_path='../input/understanding_cloud_organization/test_images',
    reshape=(320, 480),
    gamma=0.8
)

test_loader = DataLoader(
    test_dataset,
    batch_size=4, # You can increase batch_size for faster inference on GPU
    shuffle=False,
    num_workers=num_cores
)

# Prediction loop
test_df_list = []
model.eval() # Set model to evaluation mode
with torch.no_grad():
    for i, batch_images in enumerate(tqdm(test_loader)):
        batch_images = batch_images.to(device)
        batch_preds = model(batch_images)
        # Apply sigmoid and move to CPU
        batch_preds = batch_preds.sigmoid().cpu().numpy()

        for j in range(batch_preds.shape[0]):
            # Get the original image filename
            original_idx = i * test_loader.batch_size + j
            filename = test_imgs['ImageId'].iloc[original_idx]
            image_df = sub_df[sub_df['ImageId'] == filename].copy()

            # Transpose back to (H, W, C) for cv2/numpy functions
            pred_masks = batch_preds[j,].transpose(1, 2, 0)
            pred_masks = cv2.resize(pred_masks, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)

            processed_masks = np.zeros((350, 525, 4), dtype=np.float32)
            for t in range(4):
                a, num_predict = post_process(pred_masks[:, :, t], 0.6, minsizes[t])
                processed_masks[:, :, t] = a
            
            pred_rles = build_rles(processed_masks)
            image_df['EncodedPixels'] = pred_rles
            test_df_list.append(image_df)

final_sub_df = pd.concat(test_df_list)

In [None]:
final_sub_df = final_sub_df[['Image_Label', 'EncodedPixels']]
final_sub_df.to_csv('submission.csv', index=False)
display(final_sub_df.head(10))