## Defining initial modules

In [None]:
import sys
sys.path.append("../")
sys.path.append("./")

In [None]:
import os
import pandas
import albumentations 
import matplotlib.pyplot as plt 
from PIL import Image
import numpy
import typing
import warnings
import torch 
from torch import optim

from sklearn.model_selection import train_test_split
warnings.filterwarnings("ignore", category=DeprecationWarning)

## Defining data urls

In [None]:
# training images and masks

train_images_path = "../data/train/train/images_size_400/"
train_image_masks_path = "../data/train/train/masks_size_400/"

# train images and masks

first_exp_train_urls = [
    ("../data/train/train/images_size_400/", "../data/train/train/masks_size_400/"),
    ("../data/train/train/images_size_500/", "../data/train/train/masks_size_500/"),
    ("../data/train/train/images_size_600/", "../data/train/train/masks_size_600/"),
]

first_exp_validation_urls = [
    ("../data/train/_extra/images_size_500/", "../data/train/_extra/masks_size_500/"),
]


second_exp_train_urls = [
    ("../data/train/train/images_size_500/", "../data/train/train/masks_size_500/"),
    ("../data/train/train/images_size_600/", "../data/train/train/masks_size_600/"),
    ("../data/train/train/images_size_700/", "../data/train/train/masks_size_700/"),
]

second_exp_validation_urls = [
    ("../data/train/train/images_size_600/", "../data/train/train/masks_size_600/"),
]

## Loading small batch of images and masks for initial analysis

In [None]:
def load_files(file_path: str):
    """
    Function loads files from given 'file_path' source
    Args:
        - file_path - path, pointing to the files directory
        - number_of_files - respective number of files to load
    """
    batch = []
    for path in os.listdir(file_path):
        try:
            full_url = os.path.join(file_path, path)
            ext = path.split(".")[-1]
            if ext in ('jpeg', 'png', 'jpg'):
                batch.append(full_url)
                
        except(FileNotFoundError):
            print('file not found: %s' % full_url)
    return batch

In [None]:
def load_experiment_data(urls: typing.Tuple[str]):
    """
    Function loads data for experiment
    
    urls: (img_url, mask_url) - list of tuples, containing
    corresponding images and masks with different resolutions
    """
    imgs, masks = [], []
    for img_path, mask_path in urls:
    
        imgs = load_files(file_path=train_images_path)
        masks = load_files(file_path=train_image_masks_path)

        imgs.extend(imgs)
        masks.extend(masks)
    return imgs, masks

# Loading training and validation sets for first experiment

In [None]:
%%time

f_train_imgs, f_train_masks = load_experiment_data(first_exp_train_urls)
f_validation_imgs, f_validation_masks = load_experiment_data(first_exp_validation_urls)

In [None]:
first_exp_train_info = pandas.DataFrame(
    {
        'image': f_train_imgs,
        'mask': f_train_masks
    }
)

first_exp_validation_info = pandas.DataFrame(
    {
        'image': f_validation_imgs,
        'mask': f_validation_masks
    }
)

# Loading training and validation sets for second experiment

In [None]:
s_train_imgs, s_train_masks = load_experiment_data(first_exp_train_urls)
s_validation_imgs, s_validation_masks = load_experiment_data(first_exp_validation_urls)

In [None]:
%%time

second_exp_train_info = pandas.DataFrame(
    {
        'image': s_train_imgs,
        'mask': s_train_masks
    }
)

second_exp_validation_info = pandas.DataFrame(
    {
        'image': s_validation_imgs,
        'mask': s_validation_masks
    }
)

# Visualizing images and masks

In [None]:
import typing

def visualize_set_of_images(imgs: typing.List, masks: typing.List):
    """
    Function visualizes image for a given
    set of data
    """
    _, ax = plt.subplots(ncols=2, nrows=5)
    plt.figure(figsize=(30, 30))

    for idx in range(5):
        
        img = Image.open(imgs[idx])
        mask = Image.open(masks[idx])
        
        ax[idx,0].imshow(img)
        ax[idx,1].imshow(mask, cmap='gray')

## Visualizing images and masks for training set (first experiment)

In [None]:
visualize_set_of_images(f_train_imgs, f_train_masks)

## Visualizing images and masks for validation set (first experiment)

In [None]:
visualize_set_of_images(f_validation_imgs, f_validation_masks)

## Defining Augmentation Pipeline

In [None]:
train_color_transformations = albumentations.Compose(
    [
        albumentations.OneOf([
            albumentations.ColorJitter(
                brightness=0.15,
                hue=0.15,
                saturation=0.15
            ),
            albumentations.FancyPCA()
        ])
    ]
)

## Defining Datasets for first experiment

In [None]:
from dataset.dataset import SegmentationDataset 

f_train_dataset = SegmentationDataset(
    imgs=first_exp_train_info['image'].tolist(),
    masks=first_exp_train_info['mask'].tolist(),
    color_transformations=train_color_transformations
)

f_validation_dataset = SegmentationDataset(
    imgs=first_exp_validation_info['image'].tolist(),
    masks=first_exp_validation_info['mask'].tolist(),
)

## Defining datasets for second experiment

In [None]:
s_train_dataset = SegmentationDataset(
    imgs=second_exp_train_info['image'].tolist(),
    masks=second_exp_train_info['mask'].tolist(),
    color_transformations=train_color_transformations
)

s_validation_dataset = SegmentationDataset(
    imgs=second_exp_validation_info['image'].tolist(),
    masks=second_exp_validation_info['mask'].tolist()
)

# Beginning of experiments.

### Defining loss functions and evaluation metrics for experiments

In [None]:
from losses.losses import DiceLoss, FocalLoss, ComboLoss

focal_gamma = 2

dice_loss = DiceLoss()
focal_loss = FocalLoss(gamma=focal_gamma)

combo_loss = ComboLoss(
    dice_prop=0.4, 
    focal_prop=0.6, 
    focal_gamma=focal_gamma
)

### Defining network for experiment

In [None]:
from networks import unet
network = unet.UNet()

### Defining optimizers and learning rate 

In [None]:
learning_rate = 3e-6
adam_opt = optim.Adam(params=network.parameters(), lr=learning_rate, weight_decay=1e-5)
sgd_opt = optim.SGD(params=network.parameters(), lr=learning_rate)

### Defining lr scheduling techniques

In [None]:
from torch.optim import lr_scheduler

reduction_gamma = 0.0001
step_size = 5
patience_epochs = 5

adam_step_scheduler = lr_scheduler.StepLR(
    optimizer=adam_opt,
    step_size=step_size, 
    gamma=reduction_gamma
)

sgd_step_scheduler = lr_scheduler.StepLR(
    optimizer=sgd_opt,
    step_size=step_size, 
    gamma=reduction_gamma
)

adam_plateau_scheduler = lr_scheduler.ReduceLROnPlateau(
    adam_opt, mode='min', 
    factor=0.1,
    patience=patience_epochs, 
    min_lr=learning_rate
)

sgd_plateau_scheduler = lr_scheduler.ReduceLROnPlateau(
    sgd_opt, mode='min', 
    factor=0.1,
    patience=patience_epochs, 
    min_lr=learning_rate
)

In [None]:
import math

def pick_best_batch_size(model_size: float, forward_backward_size: float):
    """
    Function returns maximum batch size,
    that can be formed based on existing
    network and computational capabilities
    
    Following formula is applied: 
    max_batch_size = (total_gpu_bytes - model_size) / (forward_backward_size)
    
    Parameters:
    -----------
    
    model_size - (mb) - size of the model in bytes
    forward_backward_size - (mb) - size of the forward and backward passes in bytes
    """
    total_gpu = torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)
    max_batch = (total_gpu - model_size) / (forward_backward_size)
    return 2 ** math.floor(numpy.log2(max_batch))

### Importing network trainer

In [None]:
from network_trainer.network_trainer import NetworkTrainer

In [None]:
trainer = NetworkTrainer(
    network=network,
    optimizer=adam_opt,
    lr_scheduler=adam_step_scheduler,
    loss_function=focal_loss,
    train_device='cuda',
    early_stopping_patience=5,
    max_epochs=10,
    batch_size=32,
)

In [None]:
%%time
train_loss, train_history = trainer.train(f_train_dataset)

In [None]:
print('training loss for first trainer: %s' % str(f_train_loss))

In [None]:
plt.figure(figsize=(10, 10))
plt.plot(numpy.arange(len((f_train_history))), f_train_history)
plt.title("Training loss for the first experiment")

In [None]:
%%time
f_eval_metric = trainer.evaluate(f_validation_dataset)

In [None]:
fg, ax = plt.subplots(ncols=2, nrows=5, figsize=(20, 30))
for idx in range(5):
    img, mask = f_validation_dataset[0]
    predicted_mask = trainer.predict([img])[0]
    ax[idx, 0].imshow(predicted_mask.squeeze(0).squeeze(0), cmap='gray')
    ax[idx, 1].imshow(mask, cmap='gray')

In [None]:
print('evaluation metric f1-score for first trainer: %s' % str(f_eval_metric))

## Converting model to ONNX Format

In [None]:
model_path = "../final_model/network.onnx"
trainer.save_network(model_path, best_input)