### The trainig workflow below was referring to

#### data preprocessing & batch data generator:
https://github.com/lyakaap/Kaggle-Carvana-3rd-Place-Solution

#### model architecture: 
U-Net, a convolutional networks for biomedical image segmentation proposed by Ronneberger et al. (https://arxiv.org/abs/1505.04597) <br />
Implementation of U-Net in in keras (https://github.com/kimoktm/U-Net)
___

In [5]:
import os
import threading

import numpy as np
import pandas as pd

from skimage.io import imread
from skimage.transform import resize

from sklearn.model_selection import train_test_split

from tensorflow.keras.layers import Input, Conv2D,  MaxPooling2D, Dropout, concatenate, UpSampling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from loss import dice_coef, bce_dice_loss

import warnings
warnings.filterwarnings("ignore")

from IPython.display import display

### Data Preprocessing & Batch Data Generator

In [2]:
WIDTH = 1024
HEIGHT = 1024
BATCH_SIZE = 32

class ThreadSafeIterator:

    def __init__(self, it):
        self.it = it
        self.lock = threading.Lock()

    def __iter__(self):
        return self

    def __next__(self):
        with self.lock:
            return self.it.__next__()


def threadsafe_generator(f):
    """
    A decorator that takes a generator function and makes it thread-safe.
    """

    def g(*args, **kwargs):
        return ThreadSafeIterator(f(*args, **kwargs))

    return g


@threadsafe_generator
def train_generator(df):
    while True: 
        shuffle_indices = np.random.permutation(np.arange(len(df)))
        
        for start in range(0, len(df), BATCH_SIZE):
            x_batch = []
            y_batch = []
            
            end = min(start + BATCH_SIZE, len(df))
            indices_batch = df.iloc[shuffle_indices[start:end]]
            
            for index in indices_batch.values:
                img = imread('data/train/{}.jpg'.format(index))
                img = resize(img, (WIDTH, HEIGHT), preserve_range=True)
                
                mask = imread('data/train_masks/{}_mask.gif'.format(index))
                mask = resize(mask, (WIDTH, HEIGHT), preserve_range=True)
                
                # === You can add data augmentations here. === #
                if np.random.random() < 0.5:
                    img, mask = img[:, ::-1, :], mask[..., ::-1, :]  # random horizontal flip
                
                x_batch.append(img)
                y_batch.append(mask)
            
            x_batch = np.array(x_batch, np.float32) / 255.
            y_batch = np.array(y_batch, np.float32) / 255.
            
            yield x_batch, y_batch


@threadsafe_generator
def valid_generator(df):
    while True:
        for start in range(0, len(df), BATCH_SIZE):
            x_batch = []
            y_batch = []

            end = min(start + BATCH_SIZE, len(df))
            indices_batch = df.iloc[start:end]

            for index in indices_batch.values:
                img = imread('data/train/{}.jpg'.format(index))
                img = resize(img, (WIDTH, HEIGHT), preserve_range=True)
                
                mask = imread('data/train_masks/{}_mask.gif'.format(index))
                mask = resize(mask, (WIDTH, HEIGHT), preserve_range=True)
                
                x_batch.append(img)
                y_batch.append(mask)

            x_batch = np.array(x_batch, np.float32) / 255.
            y_batch = np.array(y_batch, np.float32) / 255.

            yield x_batch, y_batch

### Model Architecture

In [3]:
inputs = Input((HEIGHT, WIDTH, 3))

conv1 = Conv2D(16, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs)
conv1 = Conv2D(16, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

conv2 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool1)
conv2 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv2)
drop2 = Dropout(0.5)(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(drop2)

conv3 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool2)
conv3 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3)
drop3 = Dropout(0.5)(conv3)

up4 = Conv2D(32, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(drop3))
concat4 = concatenate([conv2,up4], axis = 3)
conv4 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(concat4)
conv4 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv4)

up5 = Conv2D(16, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv4))
concat5 = concatenate([conv1,up5], axis = 3)
conv5 = Conv2D(16, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(concat5)
conv5 = Conv2D(16, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5)

conv6 = Conv2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5)
conv7 = Conv2D(1, 1, activation = 'sigmoid')(conv6)

model = Model(inputs = inputs, outputs = conv7)

epochs = 50
learning_rate = 1e-4
decay_rate = learning_rate/epochs

model.compile(optimizer = Adam(lr=learning_rate, decay=decay_rate), 
              loss = bce_dice_loss, 
              metrics = [dice_coef])
    
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1024, 1024, 3 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 1024, 1024, 1 448         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 1024, 1024, 1 2320        conv2d[0][0]                     
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 512, 512, 16) 0           conv2d_1[0][0]                   
__________________________________________________________________________________________________
conv2d_2 (

### Model Training

In [None]:
callbacks = [EarlyStopping(monitor='val_dice_coef',
                           patience=5,
                           verbose=1,
                           min_delta=1e-4,
                           mode='max'),
             ModelCheckpoint(monitor='val_dice_coef',
                             filepath='model_weights.hdf5',
                             save_best_only=True,
                             mode='max')]

In [None]:
indices = pd.read_csv('data/train_masks.csv')['img'].map(lambda s: s.split('.')[0])

indices_train, indices_valid = train_test_split(indices, test_size=0.1, random_state=42)

model.fit_generator(generator=train_generator(indices_train),
                    steps_per_epoch=np.ceil(float(len(indices_train))/float(BATCH_SIZE)),
                    epochs=epochs,
                    verbose=2,
                    callbacks=callbacks,
                    validation_data=valid_generator(indices_valid),
                    validation_steps=np.ceil(float(len(indices_valid))/float(BATCH_SIZE)))

### Result Analysis