# Kaggler: SenNet + HOA - Hacking the Human Vasculature in 3D

<hr>
<b>Team:</b> Kaggler <br>
<b>Team Members:</b> Hyunji Cha, Minjae Jeong, Min Joh, Jin You <br><br>

> <b>Competition Information:</b><br>
Title: SenNet + HOA - Hacking the Human Vasculature in 3D <br>
Host: SenNet + HOA <br>
Platform: Kaggle <br>
Final Submission Deadline: February 6, 2024 <br>
Link: https://www.kaggle.com/competitions/blood-vessel-segmentation <br>

<hr>

## 0. Setups

In [1]:
import os
import time
import sys
import datetime
from glob import glob

import numpy as np
import pandas as pd
from skimage import io, transform, exposure
from tqdm import tqdm
import albumentations as Alb

import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, Subset, random_split
from torchinfo import summary

## 1. Configuration

In [2]:
class CFG:
    """
    Configuration class for the blood vessel segmentation project.
    """
    # ============== Data Paths =============
    DATASET_DIRECTORY = os.path.abspath(os.path.join("..", "..", "kaggle", "input", "blood-vessel-segmentation"))
    TRAIN_DATASET_NAME = "kidney_1_dense"
    # DATASET_DIRECTORY = os.path.abspath(os.path.join("..", "blood-vessel-segmentation"))
    TRAIN_DATASET_DIRECTORY = os.path.join(DATASET_DIRECTORY, "train", TRAIN_DATASET_NAME)
    TEST_DATASET_NAME = "kidney_2"
    TEST_DATASET_DIRECTORY = os.path.join(DATASET_DIRECTORY, "train", TEST_DATASET_NAME)

    # ============== Model Configuration =============
    MODEL_NAME = 'AUNet'
    IN_CHANNEL = 1  # Number of input channels (e.g., 1 for grayscale images, 3 for RGB images)
    OUT_CHANNEL = 1 # Number of output channels

    # ============== Model Paths =============
    #CHECKPOINT_TEST_PATH = os.path.abspath("..")
    # List of paths to trained model weights
    CHECKPOINT_PATH = os.path.abspath(os.path.join("..", "..", "kaggle", "working", "checkpoints"))
    TRAINED_MODEL = f"{MODEL_NAME}_checkpoint.pth"
    #TRAINED_MODEL = f"{MODEL_NAME}_kidney_1_dense_checkpoint.pth"
    #/kaggle/input/attention-unet/pytorch/real/{version}
    MODEL_PATH = os.path.abspath(os.path.join("..", "..", "kaggle", "input", "attention-unet", "pytorch", "real", "1", "checkpoints"))
    #MODEL_PATH = None

    # ============== Image Processing Settings =============
    INPUT_IMAGE_SIZE = (512, 512)  # Size of the input images (height x width)
    GAMMA = 1
    NUM_IMAGES = None # 'None' for all images
    BINARY_CHECK = False

    # ============== Training and Validation Parameters =============
    IF_TRAIN = False
    IF_TEST = False
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    #VALID_ID = 1  # ID for validation dataset or fold (e.g., 1 first fold is being used as validation set)
    BATCH_SIZE = 1  # Batch size for model training
    THRESHOLD_PERCENTILE = 0.0014109  # Threshold for post-processing
    LEARNING_RATE = 1e-4
    NUM_EPOCHS = 30
    NUM_WORKERS = 4 # Number of processes
    TRAIN_TEST_SPLIT_RATIO = 0.2 # for test subset
    
    # ============== Training and Validation Parameters =============
    IF_SUBMISSION = True

In [3]:
print(CFG.DEVICE)

cuda


## 2. Load Dataset

### 2.1 Custom Dataset Class

In [4]:
class CustomDataset(Dataset):
    def __init__(self, image_path, mask_path=None, target_image_size=(512, 512), gamma=None, image_format=".tif"):
        # File IO
        self.gamma = gamma if gamma is not None else 1
        self.image_path = image_path
        self.image_format = image_format
        self.image_filenames = sorted([os.path.join(self.image_path, filename) for filename in os.listdir(self.image_path) if filename.endswith(self.image_format)])
        print(f"{len(self.image_filenames)} images are loaded.")

        # If masks (labels) are given
        self.mask_path = mask_path
        if self.mask_path:
            self.mask_filenames = sorted([os.path.join(self.mask_path, filename) for filename in os.listdir(self.mask_path) if filename.endswith(self.image_format)])
            if not len(self.mask_filenames) == len(self.image_filenames):
                self.image_filenames, self.mask_filenames = self.match_images_and_labels(self.image_filenames, self.mask_filenames)
            print(f"{len(self.mask_filenames)} labels are loaded.")

        # Image processing
        self.original_image_size = None
        self.target_image_size = target_image_size
        self.gamma = gamma

        # Process and print the size of the first image if convert_size is True
        if self.target_image_size and len(self.image_filenames) > 0:
            first_image = io.imread(self.image_filenames[0])
            resized_image = self.resize_and_pad(first_image, self.target_image_size)
            self.original_image_size = first_image.shape
            print(f"Original size: {self.original_image_size}, Converted size: {resized_image.shape}")

    def match_images_and_labels(self, image_filenames, mask_filenames):
      matched_images = []
      matched_labels = []

      image_basenames = {os.path.basename(path).split('.')[0] for path in image_filenames}
      label_basenames = {os.path.basename(path).split('.')[0] for path in mask_filenames}

      common_basenames = image_basenames.intersection(label_basenames)

      for basename in common_basenames:
        matched_images.append(os.path.join(self.image_path, basename + self.image_format))
        matched_labels.append(os.path.join(self.mask_path, basename + self.image_format))

      return sorted(matched_images), sorted(matched_labels)

    def __getitem__(self, index):
        # Load image and convert to float32
        # imread returns 2d numpy array (grayscale) and 3d numpy array (h,w,c) (rgb)
        image = io.imread(self.image_filenames[index]).astype(np.float32)

        # Resize and pad the image if convert_size is True
        if self.target_image_size:
            image = self.resize_and_pad(image, self.target_image_size, self.gamma)

        # Extract the filename
        # os.path.basename extract the base name (i.e., the final part) of a pathname
        image_filename = os.path.basename(self.image_filenames[index])

        # If masks (labels) are given
        if self.mask_path:
            mask = io.imread(self.mask_filenames[index]).astype(np.uint8) # np.uint8 is used for binary masks due to its memory efficiency
            mask = self.resize_and_pad(mask, self.target_image_size, is_mask=True)

            # Convert mask from [0, 255] to [0, 1]
            mask = (mask > 0).astype(np.uint8)  # This converts all non-zero values to 1

            return image, mask, image_filename

        return image, image_filename

    def __len__(self):
        return len(self.image_filenames)

    def resize_and_pad(self, image, target_image_size, is_mask=False):

        gamma = self.gamma

        """Resize an image or mask and add padding to keep aspect ratio."""
        # Calculate scale and padding
        h, w = image.shape[:2]
        scale = min(target_image_size[0] / h, target_image_size[1] / w)
        new_h, new_w = int(h * scale), int(w * scale)
        pad_h = (target_image_size[0] - new_h) // 2
        pad_w = (target_image_size[1] - new_w) // 2

        # Set parameters for resizing based on whether it's an image or a mask
        if is_mask:
            # Use nearest-neighbor interpolation for masks
            order = 0
            anti_aliasing = False
        else:
            # Use bilinear interpolation (order=1) for images
            order = 1
            anti_aliasing = True # minimize visual distortions when reducing the image size; averaging the colors of the pixels at the edges of contrasting areas (blurs edges)

        # Resize image with preserve_range set to True
        image_resized = transform.resize(
            image,
            (new_h, new_w),
            anti_aliasing=anti_aliasing,
            mode='constant',
            preserve_range=True,  # Preserve the original image's intensity range
            order=order
        )

        # Apply gamma correction to images only
        if not is_mask and gamma != 1:
            image_resized = exposure.adjust_gamma(image_resized, gamma)

        # Add a channel dimension to grayscale images if necessary
        if len(image.shape) == 2:  # Grayscale image
            image_resized = image_resized[..., np.newaxis]

        # Initialize padded image
        padded_image = np.zeros((target_image_size[0], target_image_size[1], image_resized.shape[2]), dtype=image_resized.dtype)

        # Insert the resized image into the padded image
        padded_image[pad_h:pad_h+new_h, pad_w:pad_w+new_w, :] = image_resized

        return padded_image

In [5]:
class PipelineDataset(Dataset):
    def __init__(self, image_filenames, target_image_size=(512, 512), gamma=None, image_format=".tif"):
        # File IO
        self.gamma = gamma if gamma is not None else 1
        self.image_format = image_format
        
        # if filenames are given
        self.image_filenames = image_filenames

        # Image processing
        self.original_image_size = None
        self.target_image_size = target_image_size
        self.gamma = gamma

        # Process and print the size of the first image if convert_size is True
        if self.target_image_size and len(self.image_filenames) > 0:
            first_image = io.imread(self.image_filenames[0])
            resized_image = self.resize_and_pad(first_image, self.target_image_size)
            self.original_image_size = first_image.shape
            print(f"Original size: {self.original_image_size}, Converted size: {resized_image.shape}")

    def __getitem__(self, index):
        # Load image and convert to float32
        # imread returns 2d numpy array (grayscale) and 3d numpy array (h,w,c) (rgb)
        image = io.imread(self.image_filenames[index]).astype(np.float32)

        # Resize and pad the image if convert_size is True
        if self.target_image_size:
            image = self.resize_and_pad(image, self.target_image_size, self.gamma)

        # Extract the filename
        # os.path.basename extract the base name (i.e., the final part) of a pathname
        image_filename = os.path.basename(self.image_filenames[index])

        return image, image_filename

    def __len__(self):
        return len(self.image_filenames)

    def resize_and_pad(self, image, target_image_size, is_mask=False):

        gamma = self.gamma

        """Resize an image or mask and add padding to keep aspect ratio."""
        # Calculate scale and padding
        h, w = image.shape[:2]
        scale = min(target_image_size[0] / h, target_image_size[1] / w)
        new_h, new_w = int(h * scale), int(w * scale)
        pad_h = (target_image_size[0] - new_h) // 2
        pad_w = (target_image_size[1] - new_w) // 2

        # Set parameters for resizing based on whether it's an image or a mask
        if is_mask:
            # Use nearest-neighbor interpolation for masks
            order = 0
            anti_aliasing = False
        else:
            # Use bilinear interpolation (order=1) for images
            order = 1
            anti_aliasing = True # minimize visual distortions when reducing the image size; averaging the colors of the pixels at the edges of contrasting areas (blurs edges)

        # Resize image with preserve_range set to True
        image_resized = transform.resize(
            image,
            (new_h, new_w),
            anti_aliasing=anti_aliasing,
            mode='constant',
            preserve_range=True,  # Preserve the original image's intensity range
            order=order
        )

        # Apply gamma correction to images only
        if not is_mask and gamma != 1:
            image_resized = exposure.adjust_gamma(image_resized, gamma)

        # Add a channel dimension to grayscale images if necessary
        if len(image.shape) == 2:  # Grayscale image
            image_resized = image_resized[..., np.newaxis]

        # Initialize padded image
        padded_image = np.zeros((target_image_size[0], target_image_size[1], image_resized.shape[2]), dtype=image_resized.dtype)

        # Insert the resized image into the padded image
        padded_image[pad_h:pad_h+new_h, pad_w:pad_w+new_w, :] = image_resized

        return padded_image

### 2.2 Display Function

In [6]:
def display_image(images, titles=None, max_cols=4):
    # Wrap single image in a list
    if not isinstance(images, list):
        images = [images]
        titles = [titles] if titles is not None else titles

    if titles is not None and not isinstance(titles, list):
        raise TypeError("Titles should be provided as a list or a single title for a single image.")

    if titles and len(images) != len(titles):
        raise ValueError("Every image should have a corresponding title.")

    num_images = len(images)
    cols = min(num_images, max_cols)
    rows = num_images // cols + (num_images % cols > 0)
    fig, axes = plt.subplots(rows, cols, figsize=(cols * 4, rows * 4))
    if rows == 1 or cols == 1:
        axes = np.array([axes])

    axes = axes.flatten()[:num_images]  # Limit to the number of images to avoid blank subplots

    for ax, img, title in zip(axes, images, titles or [None]*num_images):
        if isinstance(img, torch.Tensor):
            img = img.cpu().numpy()
            if img.ndim == 3 and img.shape[0] in [1, 3, 4]:
                img = img.transpose(1, 2, 0)
            if img.ndim == 3 and img.shape[2] == 1:
                img = img.squeeze(2)

        ax.imshow(img, cmap='gray')
        if title:
            ax.set_title(title)
        ax.axis('off')

    plt.tight_layout()
    plt.show()

### 2.3 Load Dataset and Display Sample Image

In [7]:
# Get dataset paths
image_path = os.path.join(CFG.TRAIN_DATASET_DIRECTORY, "images")
mask_path = os.path.join(CFG.TRAIN_DATASET_DIRECTORY, "labels")

# Print current working directory and full paths for debugging
print("Current working directory:", os.getcwd())
print("Full image path:", os.path.abspath(image_path))
print("Full mask path:", os.path.abspath(mask_path))
print("Checkpoint path:", os.path.abspath(CFG.CHECKPOINT_PATH))
if CFG.MODEL_PATH:
    print("Model path:", os.path.abspath(CFG.MODEL_PATH))

if not CFG.IF_SUBMISSION: # Initialize dataset
    dataset = CustomDataset(image_path, mask_path, target_image_size=CFG.INPUT_IMAGE_SIZE, gamma=CFG.GAMMA)

Current working directory: /kaggle/working
Full image path: /kaggle/input/blood-vessel-segmentation/train/kidney_1_dense/images
Full mask path: /kaggle/input/blood-vessel-segmentation/train/kidney_1_dense/labels
Checkpoint path: /kaggle/working/checkpoints
Model path: /kaggle/input/attention-unet/pytorch/real/1/checkpoints


In [8]:
if not CFG.IF_SUBMISSION:
    index = 600
    original_image = io.imread(os.path.join(CFG.TRAIN_DATASET_DIRECTORY, "images", dataset[index][2]))
    display_image([original_image, dataset[index][0], dataset[index][1]], [f"Image {dataset[index][2]}", "Processed", f"Mask {dataset[index][2]}"])

### 2.4 Train-Test-Split and DataLoader

In [9]:
if not CFG.IF_SUBMISSION: 
    # Slice dataset
    if not CFG.NUM_IMAGES:
        pass
    elif CFG.NUM_IMAGES < 6:
        raise("Number of samples is not sufficient. Raise number over 6.")
    else:
        # Create a subset of the dataset
        indices = torch.randperm(len(dataset)).tolist()
        subset_indices = indices[:CFG.NUM_IMAGES]
        subset_dataset = Subset(dataset, subset_indices)
        dataset = subset_dataset

    # Calculate the sizes of train and test sets
    total_size = len(dataset)
    test_size = int(total_size * CFG.TRAIN_TEST_SPLIT_RATIO)
    train_size = total_size - test_size

    # Split the dataset into train and test datasets
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Further split the train dataset into train and validation datasets
    val_size = int(train_size * CFG.TRAIN_TEST_SPLIT_RATIO)
    new_train_size = train_size - val_size
    train_dataset, val_dataset = random_split(train_dataset, [new_train_size, val_size])

    # Create DataLoaders for train, validation, and test datasets
    train_loader = DataLoader(train_dataset, batch_size=CFG.BATCH_SIZE, shuffle=True, num_workers=CFG.NUM_WORKERS)
    val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=CFG.NUM_WORKERS)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=CFG.NUM_WORKERS)

    print("Numbers of batches in train, valid, test data loader are:")
    print(f"Train: {len(train_loader)}, Validation: {len(val_loader)}, Test: {len(test_loader)}")

    print("Numbers of examples in train, valid, test data loader are:")
    print(f"Train: {len(train_loader.sampler)}, Validation: {len(val_loader.sampler)}, Test: {len(test_loader.sampler)}")

In [10]:
if not CFG.IF_SUBMISSION:
    data, target, filename = next(iter(train_loader))
    print("Shape of the data: ", data.shape)  # Expected: [batch_size, 1, height, width]

In [11]:
def check_target_masks(data_loader):
    for batch_index, (data, target, filename) in enumerate(tqdm(data_loader)):
        target_np = target.cpu().numpy()

        if not np.all(np.isin(target_np, [0, 1])):
            unique_values = np.unique(target_np)
            return False, f"Non-binary values found in batch {batch_index}. Unique values: {unique_values}"

    return True, "All target masks are binary (contain only 0s and 1s)"

if CFG.BINARY_CHECK:
  is_binary, message = check_target_masks(train_loader)
  print(message)
  is_binary, message = check_target_masks(val_loader)
  print(message)
  is_binary, message = check_target_masks(test_loader)
  print(message)

## 3. Image Augmentation

In [12]:
def augement_image(image, mask):

    # convert np array to float type before converting into tensor
    image_dense = torch.from_numpy(image.astype(np.float32))
    mask_dense = torch.from_numpy(mask.astype(np.float32))

    if len(image_dense.shape) == 2:
        image_dense = image_dense.unsqueeze(0) # unsqueeze add new dimention at the specified index = 0 (at the beginning)

    image_np = image_dense.permute(1, 2, 0).numpy() # permute to rearrange the dimensions of tensor
    mask_np = mask_dense.numpy()

    image_list = [None, None, None, None]
    mask_list = [None, None, None, None]

    transform_rotate_90 = Alb.Compose([
        Alb.Rotate(limit=90, p=0.5)
    ])

    # Original image
    image_list[0] = image_np
    mask_list[0] = mask_np

    # Original image with 90-degree rotation
    augmented_rotate_90 = transform_rotate_90(image=image_np, mask=mask_np)
    image_list[1], mask_list[1] = augmented_rotate_90['image'], augmented_rotate_90['mask']

    # Original image with 180-degree rotation
    augmented_rotate_180 = transform_rotate_90(image=image_list[1], mask=mask_list[1])
    image_list[2], mask_list[2] = augmented_rotate_180['image'], augmented_rotate_180['mask']

    # Original image with 270-degree rotation
    augmented_rotate_270 = transform_rotate_90(image=image_list[2], mask=mask_list[2])
    image_list[3], mask_list[3] = augmented_rotate_270['image'], augmented_rotate_270['mask']

    # Making the mask dimensions 3
    for i in range(4):
        if mask_list[i].ndim == 2:
            mask_list[i] = mask_list[i][..., np.newaxis]

    augmented_data_zip = list(zip(*[image_list, mask_list]))

    return augmented_data_zip

## 4. Model

In [13]:
# SSL certificate expired for se_resnext50_32x4d encoder
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

### 4.1 Attention U-Net


In [14]:
class ConvolutionalBlock(nn.Module):
    # filter/kernel slides over the input image and produce a feature map (3x3 or 5x5 pixels)
    def __init__(self, in_c, out_c):
        super().__init__()

        self.conv = nn.Sequential(
            # extracts features by applying filters
            nn.Conv2d(in_c, out_c, kernel_size=3, padding=1),
            # normalizes features, stablizees and speed up training
            nn.BatchNorm2d(out_c),
            # introduces non-linearity, allows network to learn more complex patterns
            nn.ReLU(inplace=True),
            nn.Conv2d(out_c, out_c, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_c),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.conv(x)

class EncoderBlock(nn.Module):
    # use convolutional and pooling layers to capture the context of the image
    def __init__(self, in_c, out_c):
        super().__init__()

        # extract feature masps
        self.conv = ConvolutionalBlock(in_c, out_c)
        # select most significant pixel value in each filter patch of the feature map
        # down-sampels feature maps
        self.pool = nn.MaxPool2d((2, 2))

    def forward(self, x):
        s = self.conv(x)
        p = self.pool(s)
        return s, p

class AttentionBlock(nn.Module):
    # useful when the area of interest occupies a relatively small part of the image
    # suppress irrelevant regions in input while highlighting features useful for a specific task
    def __init__(self, in_c, out_c):
        super().__init__()

        # Wg = weights for gating signal; from a deeper layer (high-level features)
        # gating signal helps to identify region of interest
        self.Wg = nn.Sequential(
            nn.Conv2d(in_c[0], out_c, kernel_size=1, padding=0),
            nn.BatchNorm2d(out_c)
        )

        # Ws = weights ofr skip connection signal; from an earlier layer (low-level features)
        # refirned skip connection signal ensures that the network retains crucial spatial details
        self.Ws = nn.Sequential(
            nn.Conv2d(in_c[1], out_c, kernel_size=1, padding=0),
            nn.BatchNorm2d(out_c)
        )
        self.relu = nn.ReLU(inplace=True)
        self.output = nn.Sequential(
            nn.Conv2d(out_c, out_c, kernel_size=1, padding=0),
            nn.Sigmoid()
        )

    def forward(self, g, s):
        Wg = self.Wg(g)
        Ws = self.Ws(s)
        # integrate contextual info by Wg with spatial details from Ws and filter out negative activations
        out = self.relu(Wg + Ws)
        out = self.output(out)
        return out * s

class DecoderBlock(nn.Module):
    # use convolutional and up-sampling layers to enable precise localization
    def __init__(self, in_c, out_c):
        super().__init__()

        # increase spatial dimensions
        self.up = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True)
        self.ag = AttentionBlock(in_c, out_c)
        self.c1 = ConvolutionalBlock(in_c[0]+out_c, out_c)

    def forward(self, x, s):
        x = self.up(x)
        s = self.ag(x, s)
        x = torch.cat([x, s], axis=1)
        x = self.c1(x)
        return x

class attention_unet(nn.Module):
    def __init__(self):
        super().__init__()

        # Adusted for 1 input channel
        self.e1 = EncoderBlock(1, 64)  # Adjusted for 1 input channel
        self.e2 = EncoderBlock(64, 128)
        self.e3 = EncoderBlock(128, 256)
        # Add more encoder blocks if needed to accommodate the larger input size

        self.b1 = ConvolutionalBlock(256, 512)

        self.d1 = DecoderBlock([512, 256], 256)
        self.d2 = DecoderBlock([256, 128], 128)
        self.d3 = DecoderBlock([128, 64], 64)
        # Add more decoder blocks symmetrically if more encoder blocks are added

        self.output = nn.Conv2d(64, 1, kernel_size=1, padding=0)  # Output for 1 channel

    def forward(self, x):
        s1, p1 = self.e1(x)
        s2, p2 = self.e2(p1)
        s3, p3 = self.e3(p2)

        b1 = self.b1(p3)

        d1 = self.d1(b1, s3)
        d2 = self.d2(d1, s2)
        d3 = self.d3(d2, s1)

        output = self.output(d3)
        return output

### 4.2 Model Summary

In [15]:
model = attention_unet()
# summary(model, input_size = (CFG.BATCH_SIZE, 1, CFG.INPUT_IMAGE_SIZE[0], CFG.INPUT_IMAGE_SIZE[1]))
summary(model, input_size = (1, 1, CFG.INPUT_IMAGE_SIZE[0], CFG.INPUT_IMAGE_SIZE[1]))

Layer (type:depth-idx)                   Output Shape              Param #
attention_unet                           [1, 1, 512, 512]          --
├─EncoderBlock: 1-1                      [1, 64, 512, 512]         --
│    └─ConvolutionalBlock: 2-1           [1, 64, 512, 512]         --
│    │    └─Sequential: 3-1              [1, 64, 512, 512]         37,824
│    └─MaxPool2d: 2-2                    [1, 64, 256, 256]         --
├─EncoderBlock: 1-2                      [1, 128, 256, 256]        --
│    └─ConvolutionalBlock: 2-3           [1, 128, 256, 256]        --
│    │    └─Sequential: 3-2              [1, 128, 256, 256]        221,952
│    └─MaxPool2d: 2-4                    [1, 128, 128, 128]        --
├─EncoderBlock: 1-3                      [1, 256, 128, 128]        --
│    └─ConvolutionalBlock: 2-5           [1, 256, 128, 128]        --
│    │    └─Sequential: 3-3              [1, 256, 128, 128]        886,272
│    └─MaxPool2d: 2-6                    [1, 256, 64, 64]          --
├

## 5. Evaluation

In [16]:
# ref.: https://www.kaggle.com/stainsby/fast-tested-rle
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    rle = ' '.join(str(x) for x in runs)
    if rle == '':
      rle = '1 0'
    return rle

def rle_decode(mask_rle, shape):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

### 5.1 Run-Length Encode and Decode

Run-Length Encoding (RLE) is a simple form of data compression where runs of data (i.e., sequences in which the same data value occurs in many consecutive data elements) are stored as a single data value and count. It is particularly effective for data with many such runs, as it can significantly reduce the size of the data.

In the context of image segmentation, such as in a Kaggle competition for blood vessel segmentation from CT images, RLE is often used to encode binary masks. In a binary mask, each pixel is either part of the object of interest (e.g., a blood vessel) or the background. RLE can efficiently represent these masks, especially when large regions of pixels are the same (all vessel or all background).

The encoding typically works as follows:

1. The binary mask is flattened into a one-dimensional array (e.g., by taking each row of pixels in turn).
2. The algorithm goes through this array and counts the number of consecutive pixels with the same value.
3. Each run of pixels is then represented by two numbers: the start position in the flattened array and the length of the run.

For example, the array <code>[0, 0, 1, 1, 1, 0, 0]</code> would be encoded as <code>[2, 3]</code> in RLE, meaning that starting from the third element, there are three consecutive 1s.

### 5.2 Loss Functions

#### 5.2.1. DICE Loss
Dice loss is particularly useful for data with imbalanced classes. It measures the overlap between the predicted segmentation and the ground truth.

In [17]:
class DiceLoss(nn.Module):
    def forward(self, inputs, targets, smooth=1):
        inputs = inputs.sigmoid()
        intersection = (inputs * targets).sum()
        dice = (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)
        return 1 - dice

#### 5.2.2. Jaccard/Intersection over Union (IoU) Loss
Similar to Dice loss, IoU is another common metric for the evaluation of object detection algorithms, such as image segmentation.

In [18]:
class IoULoss(nn.Module):
    def forward(self, inputs, targets, smooth=1):
        inputs = inputs.sigmoid()
        intersection = (inputs * targets).sum()
        total = (inputs + targets).sum()
        union = total - intersection
        IoU = (intersection + smooth) / (union + smooth)
        return 1 - IoU

#### 5.2.3 Focal Loss
This loss function is designed to address class imbalance by down-weighting well-classified examples.

In [19]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, logits=True, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.logits = logits
        self.reduce = reduce

    def forward(self, inputs, targets):
        if self.logits:
            BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduce=False)
        else:
            BCE_loss = F.binary_cross_entropy(inputs, targets, reduce=False)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

        if self.reduce:
            return torch.mean(F_loss)
        else:
            return F_loss

### 5.4 Metrics

In [20]:
def calculate_metrics(y_pred, y_true):
    """
    Calculates IOU, F1 score, precision, recall, and accuracy for U-Net predictions using NumPy.

    Arguments:
    y_true -- the ground truth labels (a binary array of 0s and 1s)
    y_pred -- the predicted labels (a binary array of 0s and 1s)

    Returns:
    A dictionary containing the IOU, F1 score, precision, recall, and accuracy.
    """
    with torch.no_grad():
        # Convert probability output to binary predictions
        y_pred = (y_pred > 0.5).float()

        # Flatten the arrays
        y_true = y_true.detach().cpu().numpy().flatten()
        y_pred = y_pred.detach().cpu().numpy().flatten()

        # Calculate true positives, false positives, and false negatives
        tp = np.sum((y_true == 1) & (y_pred == 1))
        fp = np.sum((y_true == 0) & (y_pred == 1))
        fn = np.sum((y_true == 1) & (y_pred == 0))

        # Very small number to prevent infinity when dividing zero
        eps = 1e-8

        # Calculate IOU
        iou = tp / (tp + fp + fn + eps)

        # Calculate precision
        precision = tp / (tp + fp + eps)

        # Calculate recall
        recall = tp / (tp + fn + eps)

        # Calculate F1 score
        f1_score = 2 * (precision * recall) / (precision + recall + eps)

        # Calculate accuracy
        accuracy = np.sum(y_true == y_pred) / len(y_true)

    # Return a dictionary containing the metrics
    metrics = {"IoU_Score": iou, "F1_Score": f1_score, "Precision": precision, "Recall": recall, "Accuracy": accuracy}
    return metrics

## 6. Train

### 6.1 Trainer Class

In [21]:
class Trainer:
    def __init__(self, model, train_loader, val_loader, optimizer, criterion, device, checkpoint_path, model_name):
        self.device = device
        self.model = model.to(self.device)
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.optimizer = optimizer
        self.criterion = criterion
        self.checkpoint_path = checkpoint_path
        self.model_name = model_name
        self.best_valid_loss = float('inf')

    def train_epoch(self):
        self.model.train()
        total_train_loss = 0

        for batch_index, batch in enumerate(tqdm(self.train_loader)):
            self.optimizer.zero_grad()
            data, target, _ = batch

            data = data.to(self.device, dtype=torch.float32).permute(0, 3, 1, 2)
            target = target.to(self.device, dtype=torch.float32).permute(0, 3, 1, 2)

            output = self.model(data)
            train_loss = self.criterion(output, target)
            train_loss.backward()
            self.optimizer.step()

            total_train_loss += train_loss.item()

        average_train_loss = total_train_loss / len(self.train_loader)
        return average_train_loss

    def validate(self):
        self.model.eval()
        total_val_loss = 0

        with torch.no_grad():
            for batch_index, batch in enumerate(tqdm(self.val_loader)):
                data, target, _ = batch
                data = data.to(self.device, dtype=torch.float32).permute(0, 3, 1, 2)
                target = target.to(self.device, dtype=torch.float32).permute(0, 3, 1, 2)

                output = self.model(data)
                val_loss = self.criterion(output, target).item()
                total_val_loss += val_loss

        average_val_loss = total_val_loss / len(self.val_loader)
        return average_val_loss

    def train(self, epochs, resume_from_path=None):
        # Check if the checkpoint directory exists
        if not os.path.exists(self.checkpoint_path):
            os.makedirs(self.checkpoint_path, exist_ok=True)
            print(f"Checkpoint directory created at {self.checkpoint_path}")
        else:
            print(f"Checkpoint directory already exists at {self.checkpoint_path}")
        
        training_log_path = os.path.join(self.checkpoint_path, f"{self.model_name}_train_results.csv")
        
        # Resume training if true
        if resume_from_path:
            print(f"Resuming training from checkpoint: {resume_from_path}")
            start_epoch, train_df = self.load_model(resume_from_path)
        else:
            print("Starting training from scratch")
            start_epoch = 0
            train_df = pd.DataFrame(columns=["Epoch", "Train_Loss", "Valid_Loss", "Time_Per_Iteration"])
        
        # Training iteration
        self.save_config(CFG)
        for epoch in range(epochs):
            print(f"\nEpoch # {start_epoch + epoch + 1}")
            start_time = time.time()
            train_loss = self.train_epoch()
            val_loss = self.validate()
            iteration_time = (time.time() - start_time) / len(self.train_loader)

            result_df = pd.DataFrame({
                "Epoch": [start_epoch + epoch + 1],
                "Train_Loss": [train_loss],
                "Valid_Loss": [val_loss],
                "Time_Per_Iteration": [iteration_time]
            })
            train_df = pd.concat([train_df, result_df], ignore_index=True)

            train_df.to_csv(training_log_path)

            if val_loss < self.best_valid_loss:
                print(f"Improved Validation Loss from {self.best_valid_loss:.6f} to {val_loss:.6f}")
                self.best_valid_loss = val_loss
                self.save_model(self.model, self.optimizer, (start_epoch + epoch + 1), self.best_valid_loss, self.checkpoint_path, self.model_name)
                print(f"Saved model at: {os.path.join(self.checkpoint_path, self.model_name)}")
            else:
                print(f"Train Loss: {train_loss:.6f}, Validation Loss: {val_loss:.6f}")

        return self.model, train_df
    
    def load_model(self, resume_from_path):
        model_path = os.path.join(resume_from_path, self.model_name)
        if not os.path.isfile(model_path):
            raise FileNotFoundError(f"No trained model has been found at {model_path}.")

        checkpoint = torch.load(model_path)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.best_valid_loss = checkpoint.get('best_valid_loss', float('inf'))

        training_log_path = os.path.join(resume_from_path, f"{self.model_name}_train_results.csv")
        if os.path.isfile(training_log_path):
            train_df = pd.read_csv(training_log_path, index_col=0)
            start_epoch = train_df['Epoch'].iloc[-1]
            print(f"Model states loaded, training will resume from epoch # {start_epoch}.")
        else:
            raise FileNotFoundError(f"No training results have been found at {training_log_path}.")

        return start_epoch, train_df

    def save_model(self, model, optimizer, epoch, best_valid_loss, path, filename):
        if not os.path.exists(path):
            os.makedirs(path)

        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'best_valid_loss': best_valid_loss
        }, os.path.join(path, filename))

    def save_config(self, config):
        config_filename = f"{self.model_name}_config.txt"
        config_path = os.path.join(self.checkpoint_path, config_filename)
        current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

        with open(config_path, 'w') as file:
            file.write(f"Config Saved on: {current_time}\n\n")
            for attribute in dir(config):
                if not attribute.startswith("__") and not callable(getattr(config, attribute)):
                    file.write(f"{attribute}: {getattr(config, attribute)}\n")

### 6.2 Train Model

In [22]:
if CFG.IF_TRAIN:
    # Initialize trainer class
    trainer = Trainer(model,
                    train_loader,
                    val_loader,
                    optimizer=optim.Adam(model.parameters(),lr=CFG.LEARNING_RATE),
                    criterion=DiceLoss(),
                    device=CFG.DEVICE,
                    checkpoint_path=CFG.CHECKPOINT_PATH,
                    model_name=CFG.TRAINED_MODEL
    )

    # Train the model
    print(f"{CFG.TRAIN_DATASET_NAME} is in training.")
    model, train_df = trainer.train(
        epochs=CFG.NUM_EPOCHS,
        resume_from_path=CFG.MODEL_PATH
    )

### 6.3 Results in Training

In [23]:
if CFG.IF_TRAIN:
    # Load training results
    train_df = pd.read_csv(os.path.join(CFG.CHECKPOINT_PATH, f"{CFG.TRAINED_MODEL}_train_results.csv"))

    # Show line plots of loss values
    sns.set_theme()
    ax = sns.lineplot(data=train_df, x="Epoch", y="Train_Loss", label="Train Loss")
    ax = sns.lineplot(data=train_df, x="Epoch", y="Valid_Loss", label="Valid Loss")

    ax.set_xlabel("Epoch Numbers")
    ax.set_ylabel("Losses")
    ax.set_title("Valid and Train Losses")

## 7. Test

### 7.1 Tester Class

In [24]:
class Tester:
    def __init__(self, model, test_loader, criterion, device, checkpoint_path, model_name):
        self.test_loader = test_loader
        self.criterion = criterion
        self.device = device
        self.checkpoint_path = checkpoint_path
        self.model_name = model_name
        self.model = model.to(self.device)

        # Load model state dictionary
        self.load_model()

    def load_model(self):
        model_path = os.path.join(self.checkpoint_path, self.model_name)
        if os.path.isfile(model_path):
            checkpoint = torch.load(model_path)
            if "model_state_dict" in checkpoint:
                self.model.load_state_dict(checkpoint["model_state_dict"])
                print(f"Model state loaded from {model_path}")
            else:
                print("The checkpoint does not contain a model state dictionary.")
        else:
            print("Model file not found. Please check the path and model_name.")

    def test(self):
        self.model.eval()
        test_loss = 0
        test_df = pd.DataFrame(columns=["Batch", "Filename", "Loss", "Time_Per_Iteration", "IoU_Score", "F1_Score", "Precision", "Recall", "Accuracy", "rle"])

        with torch.no_grad():
            for batch_index, (data, target, filenames) in enumerate(tqdm(self.test_loader)):
                data = data.to(self.device, dtype=torch.float32).permute(0, 3, 1, 2)
                target = target.to(self.device, dtype=torch.float32).permute(0, 3, 1, 2)

                batch_start_time = time.time()
                output = self.model(data)
                loss = self.criterion(output, target).item()
                test_loss += loss

                batch_time = time.time() - batch_start_time

                # Calculate and store metrics
                metrics = self.calculate_metrics(output, target)

                for i in range(data.size(0)):
                    # Process each image in the batch for RLE encoding
                    pred_mask = output[i].cpu().numpy().squeeze()  # Assuming binary mask output
                    pred_mask = (pred_mask > 0.5).astype(np.uint8)  # Thresholding

                    # RLE encode the mask
                    rle_encoded = self.rle_encode(pred_mask)

                    # Extract the first element from filename if it's a tuple
                    filename_str = filenames[i][0] if isinstance(filenames[i], tuple) else filenames[i]

                    # Append all information to test_df
                    batch_df = pd.DataFrame({
                        "Batch": [batch_index + 1],
                        "Filename": [filename_str],
                        "Loss": [loss],
                        "Time_Per_Iteration": [batch_time],
                        "IoU_Score": [metrics["IoU_Score"]],
                        "F1_Score": [metrics["F1_Score"]],
                        "Precision": [metrics["Precision"]],
                        "Recall": [metrics["Recall"]],
                        "Accuracy": [metrics["Accuracy"]],
                        "rle": [rle_encoded]
                    })
                    test_df = pd.concat([test_df, batch_df], ignore_index=True)

        test_loss /= len(self.test_loader)
        print(f"Test set: Average loss: {test_loss:.4f}")

        # Save test_df with all information
        test_df.to_csv(os.path.join(self.checkpoint_path, f"{self.model_name}_test_results.csv"), index=False)

        return self.model, test_df

    def rle_encode(self, img):
        '''
        img: numpy array, 1 - mask, 0 - background
        Returns run length as string formated
        '''
        pixels = img.flatten()
        pixels = np.concatenate([[0], pixels, [0]])
        runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
        runs[1::2] -= runs[::2]
        rle = ' '.join(str(x) for x in runs)
        if rle == '':
          rle = '1 0'
        return rle

    def calculate_metrics(self, y_pred, y_true):
        with torch.no_grad():
            y_pred = (y_pred > 0.5).float()
            y_true = y_true.view_as(y_pred)

            y_true = y_true.detach().cpu().numpy().flatten()
            y_pred = y_pred.detach().cpu().numpy().flatten()

            tp = np.sum((y_true == 1) & (y_pred == 1))
            fp = np.sum((y_true == 0) & (y_pred == 1))
            fn = np.sum((y_true == 1) & (y_pred == 0))

            eps = 1e-8

            iou = tp / (tp + fp + fn + eps)
            precision = tp / (tp + fp + eps)
            recall = tp / (tp + fn + eps)
            f1_score = 2 * (precision * recall) / (precision + recall + eps)
            accuracy = np.sum(y_true == y_pred) / len(y_true)

            return {
                "IoU_Score": iou,
                "F1_Score": f1_score,
                "Precision": precision,
                "Recall": recall,
                "Accuracy": accuracy
            }

### 7.2 Test Model

In [25]:
if CFG.IF_TEST:
    # Initialize tester class
    tester = Tester(model=attention_unet(),
                    test_loader=test_loader,
                    criterion=DiceLoss(),
                    device=CFG.DEVICE,
                    checkpoint_path = CFG.CHECKPOINT_PATH,
                    model_name=CFG.TRAINED_MODEL
    )

    # Test the model
    model, test_df = tester.test()

### 7.3 Results in Test

In [26]:
 if CFG.IF_TEST:
    # Load test results
    test_df = pd.read_csv(os.path.join(CFG.CHECKPOINT_PATH, f"{CFG.TRAINED_MODEL}_test_results.csv"))

    # Calculate the average of each metric
    average_scores = test_df[["IoU_Score", "F1_Score", "Precision", "Recall", "Accuracy"]].mean()

    # Print the averaged results of scores
    print("Averaged Results of Scores:")
    print(average_scores)

    # Calculate and print the average and total time per iteration
    average_time_per_iteration = test_df["Time_Per_Iteration"].mean()
    total_time = test_df["Time_Per_Iteration"].sum()

    print("\nAverage Time Per Iteration: {:.4f} seconds".format(average_time_per_iteration))
    print("Total Time for All Iterations: {:.4f} seconds".format(total_time))

    # Divide required items from the dataset to show them only
    melted_data = test_df.melt(id_vars=["Filename", "Time_Per_Iteration"],
                               value_vars=["IoU_Score", "F1_Score", "Precision", "Recall", "Accuracy"],
                               var_name="Types of Metrics", value_name="Score")

    sns.set(style="whitegrid")

    plt.figure(figsize=(20, 6))
    sns.violinplot(x="Types of Metrics", y="Score", data=melted_data,
                   inner="quartile",
                   order=["IoU_Score", "F1_Score", "Precision", "Recall", "Accuracy"])

    # Remove side boundaries
    sns.despine(left=True)
    plt.show()

### 7.4 Illustrate predictions

In [27]:
if CFG.IF_TEST:
    # Choose index
    index = 0
    if index < 0 or index >= len(test_df):
        raise ValueError("Index out of range.")

    # Extract data
    data_row = test_df.iloc[index]
    filename = data_row["Filename"]
    rle = data_row['rle']

    # Get images
    original_image = io.imread(os.path.join(CFG.TRAIN_DATASET_DIRECTORY, "images", filename))
    label_image = io.imread(os.path.join(CFG.TRAIN_DATASET_DIRECTORY, "labels", filename))

    # Check if RLE data exists and is valid
    if pd.isna(rle):
        print(f"Missing RLE data at index {index}. Filename: {filename}")
        images = [original_image, label_image]
        titles = [filename, "Label"]
    else:
        predicted_mask = rle_decode(rle, CFG.INPUT_IMAGE_SIZE)
        images = [original_image, label_image, predicted_mask]
        titles = [filename, "Label", "Predicted Mask"]

    # Use the display function
    display_image(images, titles)

## 8. Submission

In [28]:
def revert_resize_and_pad(mask_pred, original_image_size, target_image_size):
    # Assuming mask_pred is a 2D array for a single-channel grayscale image
    # Adjustments are needed if mask_pred comes with an extra singleton dimension
    
    # Ensure mask_pred is 2D (height, width) by removing any singleton dimensions
    if mask_pred.ndim > 2:
        mask_pred = mask_pred.squeeze()

    original_height, original_width = original_image_size[:2]
    
    # Calculate the scale and padding used during the initial resize
    scale = min(target_image_size[0] / original_height, target_image_size[1] / original_width)
    resized_height, resized_width = int(original_height * scale), int(original_width * scale)
    
    pad_h = (target_image_size[0] - resized_height) // 2
    pad_w = (target_image_size[1] - resized_width) // 2

    # Crop the padded area from the mask
    cropped_mask = mask_pred[pad_h:pad_h+resized_height, pad_w:pad_w+resized_width]

    # Resize the cropped mask back to the original image size
    reverted_mask = transform.resize(
        cropped_mask,
        (original_height, original_width),
        order=0,
        preserve_range=True,
        anti_aliasing=False
    ).astype(np.uint8)

    return reverted_mask

In [29]:
def get_output(model, model_path, test_dataset, device, dataset_name):
    if os.path.isfile(model_path):
        checkpoint = torch.load(model_path)
        if "model_state_dict" in checkpoint:
            model.load_state_dict(checkpoint["model_state_dict"])
            model.to(device)
            print(f"Model state loaded from {model_path}")
        else:
            raise("The checkpoint does not contain a model state dictionary.")
    else:
        raise("Model file not found. Please check the path and model_name.")
        
    model.eval()  # Ensure the model is in evaluation mode
    submission_list = []

    # Iterate over the test dataset
    for images, filenames in tqdm(DataLoader(test_dataset, batch_size=1)):
        images = images.to(device, dtype=torch.float32).permute(0, 3, 1, 2)

        # Initialize variables for storing ensemble predictions
        ensemble_pred = torch.zeros_like(images, device=device)

        # Process each axis for the weighted ensemble
        for axis in range(3):
            if axis > 0:
                # Rotate images if axis is 1 or 2
                images = torch.rot90(images, 1, [2, 3])

            # Forward pass through the model
            with torch.no_grad():
                preds = model(images)
                
            # If axis was rotated, rotate predictions back to original orientation
            if axis > 0:
                preds = torch.rot90(preds, -1, [2, 3])

            # Aggregate predictions
            ensemble_pred += preds / 3

        # Apply threshold to the aggregated predictions
        threshold = CFG.THRESHOLD_PERCENTILE
        mask_pred = (ensemble_pred > threshold).cpu().numpy().astype(np.uint8)
        #print(f"Converted: {mask_pred.shape}")
        mask_pred = revert_resize_and_pad(mask_pred, test_dataset.original_image_size, test_dataset.target_image_size)
        #print(f"Reverted: {mask_pred.shape}")
        mask_pred = np.squeeze(mask_pred)

        # RLE encode the mask
        rle = rle_encode(mask_pred)
        
        # ID
        base_filename = os.path.splitext(os.path.basename(filenames[0]))[0]
        identifier = f"{dataset_name}_{base_filename}"
        
        submission_list.append({"id": identifier, "rle": rle})

    # Convert submission list to DataFrame and sort
    submission_df = pd.DataFrame(submission_list)
    submission_df = submission_df.sort_values(by='id')
    return submission_df

In [30]:
if CFG.IF_SUBMISSION:
    # Use glob to create a list of image directories within the 'test' directory
    #image_dirs = glob(os.path.join(CFG.DATASET_DIRECTORY, "test", "*", "images"))
    image_dirs = [os.path.join(CFG.DATASET_DIRECTORY, "train", CFG.TEST_DATASET_NAME, "images")]
    print(image_dirs)
    
    # Initialize an empty list to store individual submission DataFrames
    submission_dfs = []

    for image_dir in image_dirs:
        # Get all image file paths from the current directory
        images = glob(os.path.join(image_dir, "*.tif"))

        # Load dataset
        dataset = PipelineDataset(images, target_image_size=CFG.INPUT_IMAGE_SIZE, gamma=CFG.GAMMA)

        # Get the output in the desired format
        current_df = get_output(
            attention_unet(),
            os.path.join(CFG.MODEL_PATH, CFG.TRAINED_MODEL),
            dataset,
            CFG.DEVICE,
            os.path.basename(os.path.dirname(image_dir)),
        )

        # Append the current DataFrame to the list of submission DataFrames
        submission_dfs.append(current_df)

    # Concatenate all individual submission DataFrames into a single DataFrame
    submission_df = pd.concat(submission_dfs)

    # Save the results to a CSV file
    submission_df.to_csv("submission.csv", index=False)
    print("Submission Dataframe has been created")

['/kaggle/input/blood-vessel-segmentation/train/kidney_2/images']
Original size: (1041, 1511), Converted size: (512, 512, 1)
Model state loaded from /kaggle/input/attention-unet/pytorch/real/1/checkpoints/AUNet_checkpoint.pth


100%|██████████| 2217/2217 [13:13<00:00,  2.79it/s]


Submission Dataframe has been created


In [31]:
submission_df.head(6)

Unnamed: 0,id,rle
1362,kidney_2_0000,1 0
877,kidney_2_0001,1 0
1163,kidney_2_0002,1 0
2077,kidney_2_0003,1 0
1884,kidney_2_0004,1 0
768,kidney_2_0005,89607 3 91118 3 92629 3 336656 3 338167 3 3396...
