## LinkNet: A Keras implementation for the Kaggle Carvana Image Masking Challenge 


Date created: Dec 20, 2017   
Last modified: Jan 19, 2018  
Tags: LinkNet, Keras, semantic segmentation


## Import libraries

In [1]:
import cv2
import numpy as np
import pandas as pd


from keras.models import Model
from keras.layers import Conv2D, Conv2DTranspose, MaxPooling2D
from keras.layers import Input, Activation, BatchNormalization, concatenate
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
from keras.optimizers import RMSprop
from keras.losses import binary_crossentropy
import keras.backend as K

from sklearn.model_selection import train_test_split
import h5py
import random

import linknet
import losses
import augmentation

DATAPATH = "../data"

Using TensorFlow backend.
  return f(*args, **kwds)


## LinkNet Model

In [2]:
model = linknet.build_LinkNet(input_shape=(512,512, 3))
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 512, 512, 3)  0                                            
__________________________________________________________________________________________________
initial__conv (Conv2D)          (None, 256, 256, 64) 9472        input_1[0][0]                    
__________________________________________________________________________________________________
initial__bn (BatchNormalization (None, 256, 256, 64) 256         initial__conv[0][0]              
__________________________________________________________________________________________________
initial_act (Activation)        (None, 256, 256, 64) 0           initial__bn[0][0]                
__________________________________________________________________________________________________
max_poolin

## Data, Preprocessing, Augmentation

#### Data
The training dataset consists of 5088 images. Each car is presented in 16 fixed photo angles. The ground truth training mask images were converted from *.gif* to *.png* format so as to be compatible with the *OpenCV* library. 

The images and ground truth masks can be found in the *train* and *train_masks* folders respectively. The test data was not used.

In [4]:
df_train = pd.read_csv(DATAPATH+'/train_masks.csv')

In [5]:
df_train = df_train.iloc[2:,:]
df_train.head()

In [6]:
ids_train = df_train['img'].map(lambda s: s.split('.')[0])

In [7]:
ids_train_split, ids_valid_split = train_test_split(ids_train, test_size=0.2, random_state=42)

#### Preprocessing -- rescaling (input and mask data)
The original image resolution of 1918 x 1280 was downsampled to 512 x 512 using the *OpenCV* library functions.

In [3]:
input_width = 512
input_height = 512
max_epochs = 10
orig_width = 1918
orig_height= 1280
threshold  = 0.5

In [None]:
all_imgs  = {}
all_masks = {}

for id in ids_train:
    img  = cv2.imread(DATAPATH+'/train/{}.jpg'.format(id))
    img  = cv2.resize(img, (input_width, input_height))
    mask = cv2.imread(DATAPATH+'/train_masks_png/{}_mask.png'.format(id), cv2.IMREAD_GRAYSCALE)
    mask = cv2.resize(mask, (input_width, input_height))
    all_imgs[id]  = img
    all_masks[id] = mask

#### Augmentation

The following transformations using the *OpenCV* library were made:
* Hue, Saturation, Value using randomHueSaturationValue
* Shift, Scale, Rotate using randomShiftScaleRotate
* Horizontal flips using randomHorizontalFlip

## Training

In [None]:
def train_generator(train_batch_size):
    while True:
        this_ids_train_split = random.sample(list(ids_train_split), len(ids_train_split))
        for start in range(0, len(ids_train_split), train_batch_size):
            x_batch = []
            y_batch = []
            end = min(start + train_batch_size, len(ids_train_split))
            ids_train_batch = this_ids_train_split[start:end]
            for id in ids_train_batch:
                img  = all_imgs[id]
                mask = all_masks[id]
                img = augmentation.randomHueSaturationValue(img,
                                               hue_shift_limit=(-50, 50),
                                               sat_shift_limit=(-5, 5),
                                               val_shift_limit=(-15, 15))
                img, mask = augmentation.randomShiftScaleRotate(img, mask,
                                                   shift_limit=(-0.0625, 0.0625),
                                                   scale_limit=(-0.1, 0.1),
                                                   rotate_limit=(-0, 0))
                img, mask = augmentation.randomHorizontalFlip(img, mask)
                mask = np.expand_dims(mask, axis=2)
                x_batch.append(img)
                y_batch.append(mask)
            x_batch = np.array(x_batch, np.float32) / 255
            y_batch = np.array(y_batch, np.float32) / 255
            yield x_batch, y_batch

In [None]:
def valid_generator(val_batch_size):
    while True:
        for start in range(0, len(ids_valid_split), val_batch_size):
            x_batch = []
            y_batch = []
            end = min(start + val_batch_size, len(ids_valid_split))
            ids_valid_batch = ids_valid_split[start:end]
            for id in ids_valid_batch.values:
                img  = all_imgs[id]
                mask = all_masks[id]
                mask = np.expand_dims(mask, axis=2)
                x_batch.append(img)
                y_batch.append(mask)
            x_batch = np.array(x_batch, np.float32) / 255
            y_batch = np.array(y_batch, np.float32) / 255
            yield x_batch, y_batch

In [None]:
train_batch_size = 6
val_batch_size   = 16

In [None]:
callbacks = [ReduceLROnPlateau(monitor='val_dice_coef',
                               factor=0.2,
                               patience=3,
                               verbose=1,
                               epsilon=1e-4,
                               mode='max'),
             ModelCheckpoint(monitor='val_dice_coef',
                             filepath='../weights/best_weights_linknet_1.hdf5',
                             save_best_only=True,
                             save_weights_only=True,
                             mode='max')]

history = model.fit_generator(generator=train_generator(train_batch_size),
                    steps_per_epoch=np.ceil(float(len(ids_train_split)) / float(train_batch_size)),
                    epochs=max_epochs,
                    verbose=2,
                    callbacks=callbacks,
                    validation_data=valid_generator(val_batch_size),
                    validation_steps=np.ceil(float(len(ids_valid_split)) / float(val_batch_size)))


<h3> References and Further Reading </h3>

<a name="ref1"></a>[1] [Chaurasia, Abhishek, Culurciello, Eugenio. "LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation." arXiv:1707.03718v1 [cs.CV]](https://arxiv.org/pdf/1707.03718.pdf)

<div style="background-color: #FAAC58; margin-left: 0px; margin-right: 20px; padding-bottom: 8px; padding-left: 8px; padding-right: 8px; padding-top: 8px;">


Author:  Meena Mani  <br>
email:   meenas.mailbag@gmail.com   <br> 
Twitter: @meena_uvaca    <br>
</div>