In [2]:
pip install segmentation_models_pytorch

Note: you may need to restart the kernel to use updated packages.


In [3]:
# Importing the libraries
import matplotlib.pyplot as plt
import os
import numpy as np
import cv2

# Import segmentation_models_pytorch as smp
import math
import torch
from torch.utils.data import DataLoader, Subset, distributed, Dataset

from typing import Dict, List
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

import segmentation_models_pytorch as smp

In [4]:
print('GPU available' if torch.cuda.is_available() else 'GPU not available')

GPU available


In [6]:
IMAGE_WIDTH = 128
IMAGE_HEIGHT = 64

### encode_label_from_label_map

- **Input**:  
  - `label` (np.ndarray): A color image (H, W, 3) representing segmentation labels.  
  - `label_map` (Dict): A dictionary mapping class names to their corresponding RGB colors.  
- **Output**:  
  - `np.ndarray`: A one-hot encoded mask of shape (H, W, n_classes).  
- **Usage**: Converts a color-coded label into a one-hot encoded mask for semantic segmentation.  


### decode_mask_from_color_map

- **Input**:  
  - `mask` (np.ndarray or torch.Tensor): A one-hot encoded mask of shape (H, W, n_classes).  
  - `color_map` (Dict): A dictionary mapping class names to their corresponding RGB colors.  
- **Output**:  
  - `np.ndarray`: A decoded color image of shape (H, W, 3).  
- **Usage**: Converts a one-hot encoded mask back into a color-coded label for visualization.  

In [27]:
def encode_label_from_label_map(label: np.ndarray, label_map: Dict) -> np.ndarray:
    def multiply_along_three_axes(a1, a2, a3):
        return a1 * a2 * a3

    n_classes = len(label_map.keys())
    h, w = label.shape[:2]
    mask = np.zeros((h, w, n_classes), dtype=np.float32)

    for idx, cls in enumerate(label_map.keys()):
        color = label_map[cls]
        ij = label == color
        if ij.ndim == 3:
            ij = multiply_along_three_axes(*ij.transpose(2, 0, 1))
        mask[ij, idx] = 1

    return mask

def decode_mask_from_color_map(mask, color_map: Dict):
    if isinstance(mask, torch.Tensor):
        mask = mask.permute(1, 2, 0).numpy()

    h, w = mask.shape[:2]
    label = np.zeros((h, w, 3), dtype=np.uint8)

    for idx, cls in enumerate(color_map.keys()):
        color = color_map[cls]
        label[mask[..., idx] == 1] = color

    return label

# Labels for decoding
label_map_dict = {
  'Terrain1': [0],
  'Terrain2': [1],
  'Terrain3': [2],
  'Terrain4': [3],
  'Terrain5': [4],
}

# Colors for plotting and encoding
color_map_dict = {
  'Terrain1': [0, 0, 0],
  'Terrain2': [254, 0, 254],
  'Terrain3': [0, 253, 0],
  'Terrain4': [0, 0, 252],
  'Terrain5': [250, 250, 250],

}

# MarsSurface Class

`__init__(self, mode: str, X, y, label_map_dict, color_map_dict, train_transforms, valid_transforms)`
Initializes the dataset class for the Mars surface segmentation task.

- **Input**:
  - `mode` (str): Specifies the mode of the dataset, one of ["train", "valid", "test", "debug"].
  - `X`: The input images (array or list-like structure).
  - `y`: The corresponding labels for the images.
  - `label_map_dict` (dict): A dictionary mapping class names to their labels.
  - `color_map_dict` (dict): A dictionary mapping class names to their RGB color representation.
  - `train_transforms` (callable): Transformations to be applied during training.
  - `valid_transforms` (callable): Transformations to be applied during validation/testing.

- **Output**: None


`_read_data(self, item: int)`
Reads and preprocesses a single data point from the dataset.

- **Input**:
  - `item` (int): Index of the data point to read.

- **Output**:
  - `image` (np.ndarray): The preprocessed image.
  - `label` (np.ndarray): The corresponding label encoded using `label_map_dict`.


`_decode_label(self, label_map)`
Encodes the label map into a one-hot encoded mask.

- **Input**:
  - `label_map` (np.ndarray): The label map for the given image.

- **Output**:
  - `mask` (np.ndarray): One-hot encoded mask of shape (H, W, n_classes).


`__len__(self)`
Returns the total number of samples in the dataset.

- **Input**: None

- **Output**:
  - (int): Number of samples.


`__getitem__(self, item: int)`
Fetches a data point, applies transformations, and prepares it for training or evaluation.

- **Input**:
  - `item` (int): Index of the data point.

- **Output**:
  - `image` (torch.Tensor): The transformed image tensor of shape (3, H, W).
  - `mask` (torch.Tensor): The one-hot encoded mask tensor of shape (n_classes, H, W).
  - `zero_mask` (torch.Tensor): A mask highlighting unannotated areas of shape (n_classes, H, W).
  - `_label` (torch.Tensor): The decoded RGB mask tensor of shape (3, H, W).

In [29]:
class MarsSurface(Dataset):

    def __init__(self, mode: str, X, y, label_map_dict, color_map_dict, train_transforms = train_transforms, valid_transforms = valid_transforms):
        assert mode in ["train", "valid","test", "debug"], "Invalid value for self.mode, type 'train' or 'test'"
        self.label_map_dict = label_map_dict
        self.color_map_dict = color_map_dict
        self.mode = mode
        self.transforms = train_transforms if self.mode in ["train", "debug"] else valid_transforms

        self.img_size = [IMAGE_HEIGHT, IMAGE_WIDTH]
        self.X = X
        self.y = y

    def _read_data(self, item: int):
        image = self.X[item]
        if image.max() > 1:
            image = (image / 255).astype("float32")
        label = self._decode_label(self.y[item])

        return image, label

    def _decode_label(self, label_map):
        return encode_label_from_label_map(label_map, self.label_map_dict)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, item: int):
        image, mask = self._read_data(item)
        if image.ndim == 2:
            image = np.repeat(image[..., np.newaxis], repeats=3, axis=2)

        _label = decode_mask_from_color_map(mask, self.color_map_dict)

        mask = torch.from_numpy(mask).permute(2, 0, 1).float()
        image = torch.from_numpy(image).permute(2, 0, 1).float()
        _label = torch.from_numpy(_label).permute(2, 0, 1).float()

        # hide values on mask which aren't annotated
        zero_mask = 1 - mask[0, ...]
        zero_mask = torch.stack([zero_mask] * 5)

        return image, mask, zero_mask, _label

In [None]:
# Loads some semantic segmentation models with different encoder architectures to later perform ensemble.

base_path = '/kaggle/input/models'

model_name = ['/mit_b2.pt', '/mit_b4.pt', '/mobileone_s4.pt']

model_infer1 = smp.PSPNet(
    encoder_name = 'mit_b2',
    classes = 5,
    encoder_weights = None,
    in_channels = 3,
    activation = None
)

model_infer1.load_state_dict(
    torch.load(base_path + model_name[0], weights_only=True, map_location='cpu')
)

model_infer1.eval()



model_infer2 = smp.PSPNet(
    encoder_name = 'mit_b4',
    classes = 5,
    encoder_weights = None,
    in_channels = 3,
    activation = None
)

model_infer2.load_state_dict(
    torch.load(base_path + model_name[1], weights_only=True, map_location='cpu')
)

model_infer2.eval()



model_infer3 = smp.FPN(
    encoder_name = 'mobileone_s4',
    classes = 5,
    encoder_weights = None,
    in_channels = 3,
    activation = None
)

model_infer3.load_state_dict(
    torch.load(base_path + model_name[2], weights_only=True, map_location='cpu')
)

model_infer3.eval()



models = [model_infer1, model_infer2, model_infer3]

for model in models:
    infer_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(infer_device)

In [17]:
data = np.load('/kaggle/input/mars-no-outliers/mars_no_outliers.npz')
training_set = data['training_set']

X = training_set[:, 0]
y = training_set[:, 1]

X_test = data['test_set']

### `create_dataset_training`

### Purpose
Generates a processed training dataset by running ensemble model predictions on the input data (`X`, `y`) and saving the results along with their masks.

---

### Steps
1. **Dataset and Dataloader**:  
   Initializes a `MarsSurface` dataset in validation mode and processes it with a `DataLoader`.

2. **Model Inference**:  
   Runs predictions using an ensemble of models, scaling outputs to a 0–255 range. Predictions are concatenated and stored.

3. **Save Results**:  
   Saves the processed predictions (`final_dataset`) and ground truth masks (`final_masks`) as a compressed `.npz` file.

---

### Output
- File: `ensembled_dataset_train.npz` containing:
  - `dataset`: Predictions with shape `(N, C, H, W)`.
  - `masks`: Masks wo_grad()` for inference.


In [21]:
def create_dataset_trianing(X, y):
    
    dataset = MarsSurface('valid', X, y, label_map_dict, color_map_dict)
    
    datatse_loader = DataLoader(
        dataset,
        batch_size = 1,
        num_workers = 1,
        shuffle = False,
        drop_last = False,
        pin_memory = True
    )
    
    pbar = tqdm(enumerate(datatse_loader), total = len(datatse_loader), dynamic_ncols=True)

    final_dataset = []
    final_masks = []
    
    with torch.no_grad():
        for i, (inputs, masks, zero_mask, label ) in pbar:
            inputs = inputs.to(infer_device)
            preds = []
            for model in models:
                out = model(inputs)
                out = torch.argmax(out, dim = 1).unsqueeze(1)
                out = (out / 4.0) * 255.0
                preds.append(out)
            
            concatenated_predictions = torch.cat(preds, dim=1)
            final_dataset.append(concatenated_predictions)
            mask = torch.squeeze(masks, dim=0).numpy()
            mask = np.argmax(mask, axis=0)
            final_masks.append(mask)
    
    final_dataset = np.array([tensor.cpu().numpy() if isinstance(tensor, torch.Tensor) else tensor for tensor in final_dataset])
    final_masks = np.array([mask.cpu().numpy() if isinstance(mask, torch.Tensor) else mask for mask in final_masks])
    
    final_dataset = np.array(final_dataset)  # Shape: (N, C, H, W)
    final_masks = np.array(final_masks) # Shape: (N, H, W)
    np.savez('ensembled_dataset_train.npz', dataset = final_dataset, masks = final_masks)
    
    print('Training dataset created')

In [19]:
# Similar to create_dataset_testing(X, y) but without adding ground truth to the dataset.
def create_dataset_testing(X):
    
    dataset = MarsSurface('valid', X, X, label_map_dict, color_map_dict)
    
    datatse_loader = DataLoader(
        dataset,
        batch_size = 1,
        num_worker = 1,
        shuffle = False,
        drop_last = False,
        pin_memory = True,
    )
    
    pbar = tqdm(enumerate(datatse_loader), total=len(datatse_loader), dynamic_ncols=True)
    final_dataset = []
    
    with torch.no_grad():
        for i, (inputs, masks, zero_mask, label ) in pbar:
            inputs = inputs.to(infer_device)
            preds = []
            for model in models:
                out = model(inputs)
                out = torch.argmax(out, dim=1).unsqueeze(1)
                out = (out / 4.0) * 255.0
                preds.append(out)
            
            concatenated_predictions = torch.cat(preds, dim=1)
            final_dataset.append(concatenated_predictions)
    
    final_dataset = np.array([tensor.cpu().numpy() if isinstance(tensor, torch.Tensor) else tensor for tensor in final_dataset])
    np.savez('ensembled_dataset_testing.npz', dataset=final_dataset)
    
    print("Test dataset created")

In [None]:
create_dataset_trianing(X, y)
create_dataset_testing(X_test)