In [1]:
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf

from sys import getsizeof
import os

from PIL import Image
import cv2

import matplotlib.pyplot as plt
from glob import glob

from sklearn.model_selection import train_test_split
import numpy as np
import math
import pandas as pd
import mlflow

if tf.test.is_gpu_available():
    os.environ["CUDA_VISIBLE_DEVICES"]="0"
    print('cuda connected')

import segmentation_models as sm
sm.set_framework('tf.keras')
sm.framework()

import albumentations as A

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
cuda connected
Segmentation Models: using `keras` framework.


2022-10-02 14:00:13.705320: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-02 14:00:14.678412: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-02 14:00:14.737661: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-02 14:00:14.737949: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

In [2]:
def rle_decode(mask_rle, shape):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = np.asarray(mask_rle.split(), dtype=int)
    starts = s[0::2] - 1
    lengths = s[1::2]
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)  # Needed to align to RLE direction

# TIS inference

In [3]:
def round_clip_0_1(x, **kwargs):
    return x.round().clip(0, 1)

# define heavy augmentations
def get_training_augmentation():
    train_transform = [

        A.HorizontalFlip(p=0.5),

        A.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0),

        A.PadIfNeeded(min_height=320, min_width=320, always_apply=True, border_mode=0),
        A.RandomCrop(height=320, width=320, always_apply=True),

        A.IAAPerspective(p=0.5),

        A.OneOf(
            [
                A.CLAHE(p=1),
                A.RandomBrightness(p=1),
                A.RandomGamma(p=1),
            ],
            p=0.9,
        ),

        A.OneOf(
            [
                A.IAASharpen(p=1),
                A.Blur(blur_limit=3, p=1),
                A.MotionBlur(blur_limit=3, p=1),
            ],
            p=0.9,
        ),

        A.OneOf(
            [
                A.RandomContrast(p=1),
                A.HueSaturationValue(p=1),
            ],
            p=0.9,
        ),
        A.Lambda(mask=round_clip_0_1)
    ]
    return A.Compose(train_transform)


def get_validation_augmentation():
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
        A.PadIfNeeded(384, 480)
    ]
    return A.Compose(test_transform)

def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform
    
    Args:
        preprocessing_fn (callbale): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    
    """
    
    _transform = [
        A.Lambda(image=preprocessing_fn),
    ]
    return A.Compose(_transform)

In [4]:
DIRECTORY = os.getcwd()
DIRECTORY

'/home/ares/work/TIS'

In [5]:
data = pd.read_csv('/home/ares/work/TIS/train.csv')
data = data[data['segmentation'].isna()==False]
data['class'].value_counts()

large_bowel    14085
small_bowel    11201
stomach         8627
Name: class, dtype: int64

In [6]:
all_paths_to_imgs = []
for id in data['id']:
    splitted_id = id.split('_')
    local_path = DIRECTORY
    for i, part in enumerate(splitted_id):
        if part == 'slice':
            local_path = os.path.join(local_path, part + f"_{splitted_id[-1]}")
            break
        elif "day" in part:
            local_path = os.path.join(local_path, f"{local_path.split('/')[-1]}_" + part)
            local_path = os.path.join(local_path, 'scans')
        else:
            local_path = os.path.join(local_path, "train", part)
    all_files = glob(local_path + "_*")
    all_paths_to_imgs.append(all_files)
all_paths_to_imgs = np.array(all_paths_to_imgs).flatten()

In [7]:
refreshed_data = data.copy()
refreshed_data['id'] = all_paths_to_imgs
refreshed_data.index = range(refreshed_data.shape[0])
refreshed_data

Unnamed: 0,id,class,segmentation
0,/home/ares/work/TIS/train/case123/case123_day2...,stomach,28094 3 28358 7 28623 9 28889 9 29155 9 29421 ...
1,/home/ares/work/TIS/train/case123/case123_day2...,stomach,27561 8 27825 11 28090 13 28355 14 28620 15 28...
2,/home/ares/work/TIS/train/case123/case123_day2...,stomach,15323 4 15587 8 15852 10 16117 11 16383 12 166...
3,/home/ares/work/TIS/train/case123/case123_day2...,stomach,14792 5 15056 9 15321 11 15587 11 15852 13 161...
4,/home/ares/work/TIS/train/case123/case123_day2...,stomach,14526 6 14789 12 15054 14 15319 16 15584 17 15...
...,...,...,...
33908,/home/ares/work/TIS/train/case30/case30_day0/s...,small_bowel,22540 1 22804 5 23069 7 23334 10 23600 11 2386...
33909,/home/ares/work/TIS/train/case30/case30_day0/s...,large_bowel,18746 7 19009 23 19038 7 19273 44 19537 49 198...
33910,/home/ares/work/TIS/train/case30/case30_day0/s...,small_bowel,23079 1 23343 6 23608 9 23874 11 24139 13 2440...
33911,/home/ares/work/TIS/train/case30/case30_day0/s...,large_bowel,18746 2 19010 8 19040 3 19274 25 19302 12 1953...


In [8]:
# helper function for data visualization
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()
    
# helper function for data visualization    
def denormalize(x):
    """Scale image to range 0..1 for correct plot"""
    x_max = np.percentile(x, 98)
    x_min = np.percentile(x, 2)    
    x = (x - x_min) / (x_max - x_min)
    x = x.clip(0, 1)
    return x


# classes for data loading and preprocessing
class Dataset:
    """CamVid Dataset. Read images, apply augmentation and preprocessing transformations.
    
    Args:
        images_dir (str): path to images folder
        masks_dir (str): path to segmentation masks folder
        class_values (list): values of classes to extract from segmentation mask
        augmentation (albumentations.Compose): data transfromation pipeline 
            (e.g. flip, scale, etc.)
        preprocessing (albumentations.Compose): data preprocessing 
            (e.g. noralization, shape manipulation, etc.)
    
    """
    
    def __init__(
            self,
            data: pd.DataFrame,
            augmentation=None,
            preprocessing=None,
    ):
        self.images_fps = data['id'].values
        self.masks_rle_encode = data['segmentation'].values

        # convert str names to class values on masks
        
        self.augmentation = augmentation
        self.preprocessing = preprocessing
    
    def __getitem__(self, i):
        
        # read data
        image = cv2.imread(self.images_fps[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.uint8)

        mask = rle_decode(self.masks_rle_encode[i], image.shape[:2]).astype(np.float32)
        mask = np.expand_dims(mask, axis=2)
        
        # add background if mask is not binary
        # if mask.shape[-1] != 1:
        #     background = 1 - mask.sum(axis=-1, keepdims=True)
        #     mask = np.concatenate((mask, background), axis=-1)
        
        
        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
        
        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
            
        return image, mask
        
    def __len__(self):
        return self.images_fps.shape[0]
    
    
class Dataloader(tf.keras.utils.Sequence):
    """Load data from dataset and form batches
    
    Args:
        dataset: instance of Dataset class for image loading and preprocessing.
        batch_size: Integet number of images in batch.
        shuffle: Boolean, if `True` shuffle image indexes each epoch.
    """
    
    def __init__(self, dataset, batch_size=1, shuffle=False):
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.dataset))

        self.on_epoch_end()
    
    def __getitem__(self, i):
        
        # collect batch data
        start = i * self.batch_size
        stop = (i + 1) * self.batch_size
        data = []
        for j in range(start, stop):
            data.append(self.dataset[j])
        # transpose list of lists
        batch = [np.stack(samples, axis=0) for samples in zip(*data)]
        
        return batch
    
    def __len__(self):
        """Denotes the number of batches per epoch"""
        return len(self.indexes) // self.batch_size
    
    def on_epoch_end(self):
        """Callback function to shuffle indexes each epoch"""
        if self.shuffle:
            self.indexes = np.random.permutation(self.indexes)   

## CREATE MODEL

In [9]:
BACKBONE = 'efficientnetb0'
BATCH_SIZE = 5
LR = 0.01
EPOCHS = 100
MODEL_NAME = "unet"
PATH_TO_MODEL = f"./models/{MODEL_NAME}.h5"
LOAD_MODEL = True

base_model_params = {
    "backbone": BACKBONE,
    "batch_size": BATCH_SIZE,
    "epochs": EPOCHS
}
preprocess_input = sm.get_preprocessing(BACKBONE)

In [10]:
target_col_name = "class"
train_data, val_data, y_train, y_val = train_test_split(refreshed_data.drop(target_col_name, axis=1), refreshed_data[target_col_name], \
    test_size=0.2, stratify=refreshed_data[target_col_name])

In [11]:
train_dataset = Dataset(
    data=train_data.join(y_train), 
    augmentation=get_training_augmentation(),
    preprocessing=get_preprocessing(preprocess_input))

val_dataset = Dataset(
    val_data.join(y_val),
    augmentation=get_validation_augmentation(),
    preprocessing=get_preprocessing(preprocess_input)
    )

train_data_loader = Dataloader(train_dataset, batch_size=BATCH_SIZE)
val_data_loader = Dataloader(val_dataset, batch_size=BATCH_SIZE)

In [12]:
# define network parameters
n_classes = 3  # case for binary and multiclass segmentation
activation = 'sigmoid' if n_classes == 1 else 'softmax'

#create model
model = sm.Unet(BACKBONE, classes=n_classes, activation=activation)
MODEL_NAME = "Unet"
# model = sm.FPN(BACKBONE, classes=n_classes, activation=activation)

if LOAD_MODEL:
    model.load_weights(PATH_TO_MODEL)
    print('weights loaded')
    base_model_params['model-name'] = MODEL_NAME
    base_model_params['backbone-name'] = BACKBONE

2022-09-29 11:47:11.458720: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-29 11:47:11.459064: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-29 11:47:11.459314: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-29 11:47:11.459900: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-29 11:47:11.460164: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from S

weights loaded


2022-09-29 11:47:12.834786: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 17989632 exceeds 10% of free system memory.


In [13]:
# define optomizer

optim = tf.keras.optimizers.Adam(LR)
base_model_params['optimizer'] = optim.get_config()['name']

# Segmentation models losses can be combined together by '+' and scaled by integer or float factor
dice_coef, focal_coef = 0.6, 0.4
dice_loss = sm.losses.DiceLoss()
focal_loss = sm.losses.BinaryFocalLoss() if n_classes == 1 else sm.losses.CategoricalFocalLoss()
total_loss = dice_loss*dice_coef + (1 * focal_loss)*focal_coef

base_model_params['dice loss coef'] = dice_coef
base_model_params['focal loss coef'] = focal_coef

# actulally total_loss can be imported directly from library, above example just show you how to manipulate with losses
# total_loss = sm.losses.binary_focal_dice_loss # or sm.losses.categorical_focal_dice_loss 

metrics = [sm.metrics.IOUScore(threshold=0.5), sm.metrics.FScore(threshold=0.5)]

# learning rate schedule
def step_decay(epoch):
	initial_lrate = 0.001
	drop = 0.9
	epochs_drop = 10.0
	lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
	return lrate

# compile keras model with defined optimozer, loss and metrics
model.compile(optim, total_loss, metrics)

callbacks = [
	tf.keras.callbacks.ModelCheckpoint(PATH_TO_MODEL, save_weights_only=True, save_best_only=True, mode='min'),
	tf.keras.callbacks.CSVLogger('./logging/log.csv'),
	tf.keras.callbacks.LearningRateScheduler(step_decay)
]

In [14]:
# # train model
history = model.fit_generator(
    train_data_loader,
    steps_per_epoch=len(train_data_loader),
    epochs=EPOCHS,
    callbacks=callbacks,
    validation_data=val_data_loader,
    validation_steps=len(val_data_loader),
)

Epoch 1/100


2022-09-29 11:47:44.361758: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8400
2022-09-29 11:47:47.061736: W tensorflow/core/common_runtime/bfc_allocator.cc:275] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.08GiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2022-09-29 11:47:47.062486: W tensorflow/core/common_runtime/bfc_allocator.cc:275] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.08GiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2022-09-29 11:47:47.231953: W tensorflow/core/common_runtime/bfc_allocator.cc:275] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.09GiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory

Epoch 2/100


2022-09-29 12:18:23.297255: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 17989632 exceeds 10% of free system memory.


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100


2022-09-29 14:12:30.291203: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 17989632 exceeds 10% of free system memory.


Epoch 7/100
Epoch 8/100
Epoch 9/100

In [None]:
base_model_params['loss'] = history.history['loss']
with mlflow.start_run():
    # mlflow.create_experiment('fit-segmentation-model', artifact_location='./models')
    mlflow.log_params(base_model_params)
    mlflow.end_run()

In [4]:
from  mlflow.tracking import MlflowClient
client = MlflowClient()
experiments = client.list_experiments() # returns a list of mlflow.entities.Experiment

In [6]:
experiments[0]

<Experiment: artifact_location='file:///home/ares/work/TIS/mlruns/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>

In [12]:
run = client.get_run('0207c13cc40a4c2e99fa251bd6166f82')

## Famous models preporation