In [None]:
# this variable will be used in `runner.train` and by default we disable FP16 mode
is_fp16_used = False

In [None]:
# Catalyst
!pip install catalyst==22.4

# for augmentations
!pip install albumentations==0.4.3

# for pretrained segmentation models for PyTorch
!pip install segmentation-models-pytorch

# for TTA
!pip install ttach==0.0.2

# for tensorboard
!pip install tensorflow

# if Your machine support Apex FP16, uncomment this 3 lines below
# !git clone https://github.com/NVIDIA/apex
# !pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex
# is_fp16_used = True

In [None]:
from typing import Callable, List, Tuple

import os
import torch
import catalyst
from catalyst import utils

print(f"torch: {torch.__version__}, catalyst: {catalyst.__version__}")

# os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # "" - CPU, "0" - 1 GPU, "0,1" - MultiGPU

SEED = 42
utils.set_global_seed(SEED)
utils.prepare_cudnn(deterministic=True)

In [None]:
from pathlib import Path
from typing import List, Optional
import os
import cv2

ROOT = Path("Dataset/")

train_image_path = os.path.join(ROOT, "train/images")
train_mask_path = os.path.join(ROOT, "train/masks")
test_image_path = os.path.join(ROOT, "test/images")
test_mask_path = os.path.join(ROOT, "test/masks")
valid_image_path = os.path.join(ROOT, "val/images")
valid_mask_path = os.path.join(ROOT, "val/masks")

ALL_IMAGES = sorted(os.listdir(train_image_path))
ALL_MASKS = sorted(os.listdir(train_mask_path))
ALL_TEST_IMAGES = sorted(os.listdir(test_image_path))
ALL_VALID_IMAGES = sorted(os.listdir(valid_image_path))
ALL_VALID_MASKS = sorted(os.listdir(valid_mask_path))
ALL_TEST_MASKS = sorted(os.listdir(test_mask_path))
print(f"Number of train images/masks: {len(ALL_IMAGES)}")
print(f"Number of test images: {len(ALL_TEST_IMAGES)}")
print(f"Number of valid images/masks: {len(ALL_VALID_IMAGES)}")


In [None]:
import random
import matplotlib.pyplot as plt
import numpy as np
from skimage.io import imread as gif_imread
from catalyst import utils


def show_examples(name: str, image: np.ndarray, mask: np.ndarray):
    plt.figure(figsize=(10, 14))
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.title(f"Image: {name}")

    plt.subplot(1, 2, 2)
    plt.imshow(mask)
    plt.title(f"Mask: {name}")


def show(index: int, images: List[Path], masks: List[Path], transforms=None) -> None:

    image = gif_imread("Dataset/train/images/" + images[index])
    mask = gif_imread("Dataset/train/masks/" + masks[index])
    name = images[index]

    if transforms is not None:
        temp = transforms(image=image, mask=mask)
        image = temp["image"]
        mask = temp["mask"]

    show_examples(name, image, mask)

def show_random(images: List[Path], masks: List[Path], transforms=None) -> None:
    length = len(images)
    index = random.randint(0, length - 1)
    show(index, images, masks, transforms)

In [None]:
from torch.utils.data import Dataset


class SegmentationDataset(Dataset):
    def __init__(
        self,
        images: List[Path],
        masks: List[Path] = None,
        transforms=None,
        data_type: str = "train",
    ) -> None:
        self.images = images
        self.masks = masks
        self.transforms = transforms
        self.data_type = data_type

    def __len__(self) -> int:
        return len(self.images)

    def __getitem__(self, idx: int) -> dict:
        image_path = self.images[idx]
        image = gif_imread("Dataset/" + self.data_type + "/images/" + image_path)
        # Transpose the image to change its shape
        transposed_image = np.transpose(image, (2, 0, 1))
        image = transposed_image
        result = {"image": image}
        
        if self.masks is not None:
            mask = gif_imread("Dataset/" + self.data_type + "/masks/" + self.masks[idx])
            result["mask"] = mask
        
        if self.transforms is not None:
            result = self.transforms(**result)
        
        result["filename"] = image_path

        return result

In [None]:
import albumentations as albu
from albumentations.pytorch import ToTensorV2 as ToTensor


def pre_transforms(image_size=128):
    return [albu.Resize(image_size, image_size, p=1)]


def hard_transforms():
    result = [
      albu.RandomRotate90(),
      albu.Cutout(),
      albu.RandomBrightnessContrast(
          brightness_limit=0.2, contrast_limit=0.2, p=0.3
      ),
      albu.GridDistortion(p=0.3),
      albu.HueSaturationValue(p=0.3)
    ]

    return result
  

def resize_transforms(image_size=128):
    BORDER_CONSTANT = 0
    pre_size = image_size 

    random_crop = albu.Compose([
      albu.SmallestMaxSize(pre_size, p=1),
      albu.RandomCrop(
          image_size, image_size, p=1
      )

    ])

    rescale = albu.Compose([albu.Resize(image_size, image_size, p=1)])

    random_crop_big = albu.Compose([
      albu.LongestMaxSize(pre_size, p=1),
      albu.RandomCrop(
          image_size, image_size, p=1
      )

    ])

    # Converts the image to a square of size image_size x image_size
    result = [
      albu.OneOf([
          random_crop,
          rescale,
          random_crop_big
      ], p=1)
    ]

    return result
  
def post_transforms():
    # we use ImageNet image normalization
    # and convert it to torch.Tensor
    return [albu.Normalize(), ToTensor()]
  
def compose(transforms_to_compose):
    # combine all augmentations into single pipeline
    result = albu.Compose([
      item for sublist in transforms_to_compose for item in sublist
    ])
    return result

In [None]:
import torchvision.transforms as transforms
transform = transforms.Compose([
    transforms.Resize((128, 128)),    # Resize images to 128x128 pixels
    transforms.ToTensor(),            # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize using ImageNet statistics
])

train_transforms = compose([resize_transforms()])
valid_transforms = compose([resize_transforms()])

show_transforms = compose([resize_transforms(), hard_transforms()])

In [None]:
show_random(ALL_IMAGES, ALL_MASKS, transforms=show_transforms)

In [None]:
if is_fp16_used:
    batch_size = 32
else:
    batch_size = 8

print(f"batch_size: {batch_size}")
train_dataset=SegmentationDataset(
    images=ALL_IMAGES,
    masks=ALL_MASKS,
    transforms=train_transforms,
    data_type="train"
)
valid_dataset=SegmentationDataset(
    images=ALL_VALID_IMAGES,
    masks=ALL_VALID_MASKS,
    transforms=valid_transforms,
    data_type="val"
)
test_dataset=SegmentationDataset(
    images=ALL_TEST_IMAGES,
    masks=ALL_TEST_MASKS,
    transforms=valid_transforms,
    data_type="test"
)
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=4,
    drop_last=True,
)
valid_loader = DataLoader(
    valid_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=4,
    drop_last=True,
)
print("len(train_dataset):", len(train_dataset))
print("len(valid_dataset):", len(valid_dataset))
print("len(train_loader):", len(train_loader))
print("len(valid_loader):", len(valid_loader))
    
loaders = collections.OrderedDict()
loaders["train"] = train_loader
loaders["valid"] = valid_loader

In [None]:
import segmentation_models_pytorch as smp
from segmentation_models_pytorch.utils.losses import DiceLoss
# We will use Feature Pyramid Network with pre-trained ResNeXt50 backbone
model = smp.FPN(encoder_name="resnext50_32x4d", classes=1)


from torch import nn


# we have multiple criterions
criterion = {
    "dice": DiceLoss(),
    "bce": nn.BCEWithLogitsLoss()
}

In [None]:
from torch import optim


learning_rate = 0.001
encoder_learning_rate = 0.0005

# Since we use a pre-trained encoder, we will reduce the learning rate on it.
layerwise_params = {"encoder*": dict(lr=encoder_learning_rate, weight_decay=0.00003)}


In [None]:
from catalyst.dl import SupervisedRunner

num_epochs = 3
logdir = "./logs/segmentation"

device = utils.get_device()
print(f"device: {device}")

if is_fp16_used:
    fp16_params = dict(opt_level="O1") # params for FP16
else:
    fp16_params = None

print(f"FP16 params: {fp16_params}")


# by default SupervisedRunner uses "features" and "targets",
# in our case we get "image" and "mask" keys in dataset __getitem__

runner = SupervisedRunner(input_key="image", target_key="mask")

In [None]:
%load_ext tensorboard
%tensorboard --logdir {logdir}

In [None]:
from catalyst.dl import \
  CriterionCallback, MetricAggregationCallback


from catalyst.callbacks.metrics.segmentation import DiceCallback,IOUCallback

callbacks = [
    # Each criterion is calculated separately.
    CriterionCallback(
        input_key="mask",
        prefix="loss_dice",
        criterion_key="dice",
        target_key="mask",
        metric_key="dice"
    ),

    CriterionCallback(
        input_key="mask",
        prefix="loss_bce",
        criterion_key="bce",
        target_key="mask",
        metric_key="bce"
    ),

    # And only then we aggregate everything into one loss.
    MetricAggregationCallback(
        metric_key="loss",
        mode="weighted_sum", # can be "sum", "weighted_sum" or "mean"
        # because we want weighted sum, we need to add scale for each loss
        metrics={"loss_dice": 1.0, "loss_iou": 1.0, "loss_bce": 0.8},
    ),

    # metrics
    DiceCallback(input_key="image", target_key="mask"),
    IOUCallback(input_key="image", target_key="mask"),

]
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    # our dataloaders
    loaders=loaders,
    # We can specify the callbacks list for the experiment;
    callbacks=callbacks,
    # path to save logs
    logdir=logdir,
    num_epochs=num_epochs,
    # IoU needs to be maximized.
    valid_metric="dice",
    minimize_valid_metric=False,
    # for FP16. It uses the variable from the very first cell
    fp16=fp16_params,
    # prints train logs
    verbose=True,
)

In [None]:
TEST_IMAGES = sorted(test_image_path.glob("*.jpg"))

# create test dataset
test_dataset = SegmentationDataset(
    TEST_IMAGES, 
    transforms=valid_transforms
)

num_workers: int = 4

infer_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers
)

# this get predictions for the whole loader
predictions = np.vstack(list(map(
    lambda x: x["logits"].cpu().numpy(), 
    runner.predict_loader(loader=infer_loader, resume=f"{logdir}/checkpoints/best.pth")
)))

print(type(predictions))
print(predictions.shape)

In [None]:
threshold = 0.5
max_count = 5

for i, (features, logits) in enumerate(zip(test_dataset, predictions)):
    image = utils.tensor_to_ndimage(features["image"])

    mask_ = torch.from_numpy(logits[0]).sigmoid()
    mask = utils.detach(mask_ > threshold).astype("float")
        
    show_examples(name="", image=image, mask=mask)
    
    if i >= max_count:
        break

In [None]:
batch = next(iter(loaders["valid"]))
# saves to `logdir` and returns a `ScriptModule` class
runner.trace(model=model, batch=batch, logdir=logdir, fp16=is_fp16_used)

!ls {logdir}/trace/

In [None]:
from catalyst.utils import trace

if is_fp16_used:
    model = trace.load_traced_model(
        f"{logdir}/trace/traced-forward-opt_O1.pth", 
        device="cuda", 
        opt_level="O1"
    )
else:
    model = trace.load_traced_model(
        f"{logdir}/trace/traced-forward.pth", 
        device="cpu"
    )

model_input = batch["image"].to("cuda" if is_fp16_used else "cpu")
model(model_input)