In [None]:
import os
import functools

import numpy as np
import pandas as pd
import matplotlib
matplotlib.rcParams['axes.grid'] = False
matplotlib.rcParams['figure.figsize'] = (12,12)
import matplotlib.pyplot as plt
import matplotlib.image as mplimg

from sklearn.model_selection import train_test_split  #function to split datalist into train and test set
from PIL import Image

import csv

import tensorflow as tf
import tensorflow.contrib as tfcontrib
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import models
from tensorflow.keras import backend as K

# Explore the Data
Let us look at the Carvana Image data.

In [None]:
#directories of training images as well as training masks
train_dir = "train"
mask_dir = "train_masks"

In [None]:
#extract a list of image IDs
train_df = pd.read_csv("train_masks.csv")
train_ids = train_df['img'].map(lambda s : s.split('.')[0])

In [None]:
# show example of a car and its 16 images
plt.figure(figsize=(70, 40))
i = 0
for img_id in train_ids[:16]:
    img_path = os.path.join('train', (img_id + '.jpg'))
    img = mplimg.imread(img_path)

    # show the image
    plt.subplot(6, 8, i+1)
    plt.imshow(img)
    plt.title("Id: {}".format(img_id), fontsize=20)
    
    i += 1

plt.suptitle("16 images of a car taken at 22.5° rotation",y=0.94,fontsize=48)
#plt.savefig("sample car 16 images")

# Create Training and Validation sets
We'll need to split the training image and labels into training and validation sets

Validation set is just to make sure our model isn't overfitting

In [None]:
x_train_filenames = []    # list of training image file name
y_train_filenames = []    # list of mask image file name

# format the filenames by adding the appropriate formatting to the image ids
for id in train_ids:
    x_train_filenames.append(os.path.join(train_dir, (id + ".jpg")))
    y_train_filenames.append(os.path.join(mask_dir, (id + "_mask.gif")))
    
# using sklearn's training/validation data splitter
seed = 42
x_train, x_val, y_train, y_val = train_test_split(x_train_filenames, y_train_filenames, test_size=0.2, random_state=seed)

num_train_examples = len(x_train)
num_val_examples = len(x_val)

print('Number of training samples: ' + str(num_train_examples))
print('Number of validation samples: ' + str(num_val_examples))

Visualize some Images and Masks

In [None]:
display_num = 4    # number of images to display

r_choices = np.random.choice(len(x_train), display_num)

plt.figure(figsize=(20, 15))

for i in range(0, display_num * 2, 2):
    img_num = r_choices[i // 2]
    x_pathname = x_train[img_num]
    y_pathname = y_train[img_num]
    
    plt.subplot(display_num, 4, i + 1)
    plt.imshow(mplimg.imread(x_pathname))
    plt.title("Original Image")
  
    plt.subplot(display_num, 4, i + 2)
    plt.imshow(mplimg.imread(y_pathname))
    plt.title("Mask")
    
plt.suptitle("Examples of Images and their Masks", y=0.94, fontsize=24)
#plt.savefig("sample car & mask")

# Building an input pipeline to generate datasets with tf.data

To feed data into our model, we make use of `tf.data`'s dataset pipelining capabilities with functional APIs

Steps in the pipeline:

1. Read (load) image files from path
2. Decode `jpeg` and `gif` into tensors
3. Apply Image augmentation to help model generalize better
4. Shuffle the data, batch the data, fetch


## Load & decode images from path

In [None]:
def _load_images(x_path, y_path):
    '''load function that loads and decodes image files from their path'''
    # decode the original jpeg image
    x_file = tf.read_file(x_path)
    x_img = tf.image.decode_jpeg(x_file, channels=3)
    
    # decode the mask .gif images
    y_file = tf.read_file(y_path)
    
    # decoding a .gif file is more complicated
    # decode_gif returns a tensor [frame_number, height, width, channel]
    y_img = tf.image.decode_gif(y_file)[0]    # since this is not an animated gif, we take the first and only frame
    y_img = y_img[:,:,0]                      # We take the first channel only
    
    # add an additional dimension to the tensor so that it has the same shape as x_img
    y_img = tf.expand_dims(y_img, axis=-1)
    
    return x_img, y_img
    

## Data Augmentation

There are many commonly used types of data augmentation. Given the data, we will only perform the following:

1. **Resize** - this is needed to ensure dimensions matches model and also due to hardware restrictions (outlined below)
2. **Pixel Scaling** - Image values are between 0-255, rescale to \[0,1\] easier to training.
3. **Image translations (shifts)** - Random horizontal & vertical translations
4. **hue delta** - Randomly adjust RGB values to provide hue variations

### Image translation

In [None]:
def shift_img(x_img, y_img, w_shift_range, h_shift_range):
    """This fn performs horizontal and vertical shift"""
    
    # if shift range is specified
    if w_shift_range or h_shift_range:
        
        # sample a random shift amount from range
        if w_shift_range:
            w_shift = tf.random_uniform([], img_shape[1] * -w_shift_range, img_shape[1] * w_shift_range)
        
        if h_shift_range:
            h_shift = tf.random_uniform([], img_shape[0] * -h_shift_range, img_shape[0] * h_shift_range)
        
        # apply shift
        x_img = tfcontrib.image.translate(x_img, [w_shift, h_shift])
        y_img = tfcontrib.image.translate(y_img, [w_shift, h_shift])
        
    return x_img, y_img

### Combine augmentations into a function

In [None]:
def _augment(x_img,
             y_img,
             resize=None,       # resize to [h,w]
             scale=1/255,       # Scale pixel values of image
             hue_delta=0,       # Adjust hue by a random factor
             w_shift_range=0,   # random horizontal translation
             h_shift_range=0):  # random vertical translation
    
    # Resize
    if resize is not None:
        x_img = tf.image.resize_images(x_img, resize)
        y_img = tf.image.resize_images(y_img, resize)
    
    # hue shift
    if hue_delta:
        x_img = tf.image.random_hue(x_img, hue_delta)
    
    # image shift
    x_img, y_img = shift_img(x_img, y_img, w_shift_range, h_shift_range)
    
    # pixel scaling
    x_img = tf.to_float(x_img) * scale
    y_img = tf.to_float(y_img) * scale
    
    return x_img, y_img
        

## Connect Pipeline

Now we stitch the functions into a pipeline that generates dataset

In [None]:
def get_dataset(x_filenames,
               y_filenames,
               preproc_fn=functools.partial(_augment),
               threads=5,
               batch_size=10,
               shuffle=True):
    
    num_x = len(x_filenames)
    
    # create the filename queue
    dataset = tf.data.Dataset.from_tensor_slices((x_filenames, y_filenames))
    
    # add file reading and decoding to the queue
    dataset = dataset.map(_load_images, num_parallel_calls=threads)
    
    # add any preprocessing to the queue
    dataset = dataset.map(preproc_fn, num_parallel_calls=threads)
    
    if shuffle:
        dataset = dataset.shuffle(num_x)
        
    # repeat data for all epoch
    dataset = dataset.repeat().batch(batch_size)
    
    return dataset

# Build the model
Now that we've defined out pipeline, now it's time to build our model with building blocks.

A U-Net consists of stacking **Convolutional Layers** into **Encoder Blocks** and **Decoder Blocks**
Which we will define as functions that generate tensor graphs

In [None]:
# use functional API of Keras to define and build building blocks

# A single convolution block with 2 convolution layer.
def conv_block(input_tensor, num_filters=32, kernel_size=(3,3)):
    # Convolution 1
    output = layers.Conv2D(num_filters, kernel_size, padding='same')(input_tensor)
    output = layers.BatchNormalization()(output)
    output = layers.Activation('relu')(output)
    
    # Convolution 2
    output = layers.Conv2D(num_filters, kernel_size, padding='same')(output)
    output = layers.BatchNormalization()(output)
    output = layers.Activation('relu')(output)
    
    return output

# encoder block
def encoder_block(input_tensor, num_filters, kernel_size):
    # each encoder block performs 1 convolusion block
    output = conv_block(input_tensor, num_filters, kernel_size)
    pool = layers.MaxPooling2D((2,2), strides=(2,2))(output)
    
    # return the output to retrain information to be fed into the decoding layers
    return pool, output

# decoder block
def decoder_block(input_tensor, concat_tensor, num_filters=32, kernel_size=(3,3)):
    # transpose convolution for upsampling
    upsampled = layers.Conv2DTranspose(num_filters,(2,2),strides=(2,2),padding='same')(input_tensor)
    
    # Concatenate the upsampled feature map with the pre-downsampled feature map
    concat = layers.concatenate([concat_tensor, upsampled], axis=-1)
    output = layers.BatchNormalization()(concat)
    output = layers.Activation('relu')(output)
    
    # decoder Convolusion block
    output = conv_block(output, num_filters, kernel_size)
    
    return output

In [None]:
# Build UNet Segmentation CNN Architecture
def u_net(img_shape):
    # img_dim
    inputs = layers.Input(shape=img_shape)

    # img_dim/2, img_dim
    encode0_pool, encode0 = encoder_block(inputs, 32, (3,3))
    # img_dim/4, img_dim/2
    encode1_pool, encode1 = encoder_block(encode0_pool, 64, (3,3))
    # img_dim/8, img_dim/4
    encode2_pool, encode2 = encoder_block(encode1_pool, 128, (3,3))
    # img_dim/16, img_dim/8
    encode3_pool, encode3 = encoder_block(encode2_pool, 256, (3,3))
    # img_dim/32, img_dim/16
    encode4_pool, encode4 = encoder_block(encode3_pool, 512, (3,3))

    # img_dim/32, centre block
    centre = conv_block(encode4_pool, 1024, (3,3))

    # img_dim/16, input: img_dim/32, concated: img_dim/16
    decode4 = decoder_block(centre, encode4, 512, (3,3))
    # img_dim/8, input: img_dim/16, concated: img_dim/8
    decode3 = decoder_block(decode4, encode3, 256, (3,3))
    # img_dim/4, input: img_dim/8, concated: img_dim/4
    decode2 = decoder_block(decode3, encode2, 128, (3,3))
    # img_dim/2, input: img_dim/4, concated: img_dim/2
    decode1 = decoder_block(decode2, encode1, 64, (3,3))
    # img_dim, input: img_dim/2, concated: img_dim
    decode0 = decoder_block(decode1, encode0, 32, (3,3))

    # output segmentation pixel generation
    outputs = layers.Conv2D(1, (1,1), activation='sigmoid')(decode0)
    
    return inputs, outputs

# Define loss functions
For Image Segmentation problems the dice score is usually used. This score measures the segmentation overlaps and works better for **imbalanced** problems.

Dice loss is the loss function version of dice score.

We'll use a custom loss function combining **binary cross entropy** with **dice loss**. This is based on empirical tries of other contestants

In [None]:
def dice_coef(y_true, y_pred):
    # Flatten to reduce dimensionality
    y_true_flat = tf.reshape(y_true, [-1])
    y_pred_flat = tf.reshape(y_pred, [-1])
    
    # calculate the dice coefficient
    smooth = 1.0
    intersect = tf.reduce_sum(y_true_flat * y_pred_flat)
    dice = (2.0 * intersect + smooth) / (tf.reduce_sum(y_true_flat) + tf.reduce_sum(y_pred_flat) + smooth)
    
    return dice

In [None]:
# converting the dice metric into a loss function for training
def dice_loss(y_true, y_pred):
    loss = 1 - dice_coef(y_true, y_pred)
    
    return loss

In [None]:
def bce_dice_loss(y_true, y_pred):
    loss = losses.binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
    
    return loss

# Define model and compile
Now that we've defined out pipeline, model and loss functions, it's time to instantiate them as objects

## Setup and Generate train and validation datasets
This UNet architecture requires the size of image be evenly divisible by 32, as downsampling by factor of 2 happens 5 times.

Given my machine limitation, dimension of 128x192 is used. Higher resolution is better, resize to a larger dimension divisible by 32 if your machine supports it.

Alternatively tweak the batch size.

In [None]:
# training parameters
img_shape = (128, 192, 3)
batch_size = 3
epochs = 50

In [None]:
# training dataset configurations
train_cfg = {
    'resize': [img_shape[0], img_shape[1]],
    'scale': 1/255.,
    'hue_delta': 0.1,
    'w_shift_range': 0.1,
    'h_shift_range': 0.1
}
train_preproc_fn = functools.partial(_augment, **train_cfg)

# validation dataset configurations
val_cfg = {
    'resize': [img_shape[0], img_shape[1]],
    'scale': 1/255.
}
val_preproc_fn = functools.partial(_augment, **val_cfg)

# Create dataset objects
train_dataset = get_dataset(x_train,
                           y_train,
                           preproc_fn=train_preproc_fn,
                           batch_size=batch_size)
val_dataset = get_dataset(x_val,
                         y_val,
                         preproc_fn=val_preproc_fn,
                         batch_size=batch_size)

### Test our pipeline and visualize augmentations

In [None]:
temp_ds = get_dataset(x_train,
                     y_train,
                     preproc_fn=train_preproc_fn,
                     batch_size=2,
                     shuffle=False)

data_aug_iter = temp_ds.make_one_shot_iterator()
next_element = data_aug_iter.get_next()

with tf.Session() as sess:
    batch_of_imgs, label = sess.run(next_element)
    
    plt.figure(figsize=(14,10))
    
    plt.subplot(2,2,1)
    plt.imshow(batch_of_imgs[0])
    
    plt.subplot(2,2,2)
    plt.imshow(label[0,:,:,0])
    
    plt.subplot(2,2,3)
    plt.imshow(batch_of_imgs[1])
    
    plt.subplot(2,2,4)
    plt.imshow(label[1,:,:,0])
    
    plt.suptitle("Sample Training Data",y=0.94,fontsize=20)

## Create & Compile Model Object

In [None]:
# define model object given network definition earlier
model_inputs, model_outputs = u_net(img_shape)
model = models.Model(inputs=[model_inputs], outputs=[model_outputs])

# compile with training params
model.compile(optimizer='adam', loss=bce_dice_loss, metrics=[dice_loss, dice_coef])

model.summary()

# Train the model
Once all objects has been defined, we can start training our model

In [None]:
# define logs & model saving objects
# make sure this path has been CREATED, keras doesn create it for you
save_model_path = './tmp/train1/weights-{epoch:02d}-{val_dice_loss:.2f}.hdf5'

# callback object to save keras model, save best model based on validation dice loss
keras_cp = tf.keras.callbacks.ModelCheckpoint(filepath=save_model_path,
                                             monitor='val_dice_loss',
                                             save_best_only=True,
                                             verbose=1)

# tensorboard checkpoint to allow graph visualizations and parameter histograms
tensorboard_cp = tf.keras.callbacks.TensorBoard(log_dir='./logs/train6',
                                               histogram_freq=1,
                                               batch_size=batch_size
                                               )


In [None]:
history = model.fit(x=train_dataset,
                    epochs=epochs,
                    verbose=2,
                    callbacks=[keras_cp,tensorboard_cp],
                    validation_data=val_dataset,
                    steps_per_epoch=int(np.ceil(num_train_examples / batch_size)),
                    validation_steps=int(np.ceil(num_val_examples / batch_size))
                   )


## Visualize the training history

In [None]:
# history of custom binary-cross-entrophy + dice loss
losses = history.history['loss']
val_losses = history.history['val_loss']

# history of dice loss
dice_losses = history.history['dice_loss']
val_dice_losses = history.history['val_dice_loss']

# history of dice score
dice_scores = history.history['dice_coef']
val_dice_scores = history.history['val_dice_coef']

epochs_range = range(epochs)

plt.figure(figsize=(20, 5))
# plot bce + dice losses
plt.subplot(1, 3, 1)
plt.plot(epochs_range, losses, label='Train')
plt.plot(epochs_range, val_losses, label='Val')
plt.legend(loc='upper right')
plt.title('Training and Validation BCE + Dice Loss')
axes = plt.gca()
axes.set_ylim([0,0.15])

# plot dice losses
plt.subplot(1, 3, 2)
plt.plot(epochs_range, dice_losses, label='Train')
plt.plot(epochs_range, val_dice_losses, label='Val')
plt.legend(loc='upper right')
plt.title('Training and Validation Dice Loss')
axes = plt.gca()
axes.set_ylim([0,0.08])

# plot dice scores
plt.subplot(1, 3, 3)
plt.plot(epochs_range, dice_scores, label='Train')
plt.plot(epochs_range, val_dice_scores, label='Val')
plt.legend(loc='lower right')
plt.title('Training and Validation Dice Scores')
axes = plt.gca()
axes.set_ylim([0.94,1])

plt.suptitle("Training Metrices",y=0.98,fontsize=18)
plt.savefig('50 epoch loss')
plt.show()

## Visualize some predictions

In [None]:
#pred_model = models.load_model("./tmp/weights-03-1.00.hdf5",
#                              custom_objects={
#                                  'bce_dice_loss': bce_dice_loss,
#                                  'dice_loss': dice_loss
#                              })
model.load_weights("./tmp/train1/weights-45-0.01.hdf5")    # key in the correct model name and path

In [None]:
# Let's visualize some of the outputs 
data_aug_iter = val_dataset.make_one_shot_iterator()
next_element = data_aug_iter.get_next()

# visualize the images, actual masks & predicted masks
plt.figure(figsize=(15, 15))
num_to_show = 4
for i in range(num_to_show):
    # get a batch of image
    batch_of_imgs, masks = tf.keras.backend.get_session().run(next_element)
    
    # take the first image of the batch prediction
    predicted_mask = model.predict(batch_of_imgs)[0]
    predicted_mask_np = np.array(predicted_mask)
    predicted_mask_np = np.round(predicted_mask_np, 0)
    
    # show the image
    img = batch_of_imgs[0]
    plt.subplot(num_to_show, 3, 3 * i + 1)
    plt.imshow(img)
    plt.title("Image")
    #plt.axis("off")
    
    # show the actual mask
    plt.subplot(num_to_show, 3, 3 * i + 2)
    plt.imshow(masks[0, :, :, 0], cmap='gray')
    plt.title("Actual Mask")
    #plt.axis("off")
    
    # show the predicted mask
    plt.subplot(num_to_show, 3, 3 * i + 3)
    plt.imshow(predicted_mask_np[:, :, 0], cmap='gray')
    plt.title("Predicted Mask")
    #plt.axis("off")
    
plt.suptitle("Samples of Validation Image, Actual Mask, and Predicted Mask",y=0.94,fontsize=18)
plt.savefig('50 epoch sample valid data result')

# Test our model with test images
Let's test our model on test image and change the input size to see how well it performs with scale variations.

Keep in mind the input dimensions need to be evenly divisible by 32

In [None]:
test_shapes = [(128, 192, 3), (256, 384, 3), (512, 768,3), (1024, 1536,3), (1280,1920,3)]
test_img_shape = test_shapes[0]

# redefine the model with new input image size
t_inputs, t_outputs = u_net(test_img_shape)
test_model = models.Model(inputs=[t_inputs], outputs=[t_outputs])

# load model weights
test_model.load_weights("./tmp/train1/weights-45-0.01.hdf5")    # load correct model name and path

In [None]:
# sample of test data colours: light-bronze, white, black, red, light-blue
test_img_ids = ['00b6aee52419', '0bb87dac6ad9', '0a0e3fb8f782', '0a63454298b8','0b9d03ec6720']
sample_num = ['_01','_02','_03','_04','_05','_06','_07','_08','_09','_10','_11','_12','_13','_14','_15','_16']

plt.figure(figsize=(50, 30))
for test_img_id in test_img_ids:
    figure_fname = '50 epoch test samples ' + test_img_id + '@{}x{}'.format(test_img_shape[1],test_img_shape[0])
    for i in range(16):
        test_img_path = os.path.join('test', (test_img_id + sample_num[i] + '.jpg'))
        test_img_original = mplimg.imread(test_img_path)

        # right pad so width from 1918 to 1920
        test_img = np.pad(test_img_original, [(0,0),(0,2),(0,0)], 'constant')
        # resize
        test_img = Image.fromarray(test_img).resize((test_img_shape[1],test_img_shape[0]),Image.BILINEAR)
        # scale pixels & expand_dimension to fit model
        test_img = np.expand_dims(np.array(test_img)*1/255, axis=0)

        # prediction
        predicted_mask = test_model.predict(test_img)[0]
        predicted_mask_np = np.array(predicted_mask)
        predicted_mask_np = np.round(predicted_mask_np, 0)

        # show the image
        if i < 8: plt.subplot(6, 8, i+1)
        else: plt.subplot(6,8,i+17)
        plt.imshow(test_img[0])
        plt.title("Image", fontsize=20)

        # show the predicted mask
        if i < 8: plt.subplot(6, 8, i+9)
        else: plt.subplot(6,8,i+25)
        plt.imshow(predicted_mask[:, :, 0])
        plt.title("Predicted Mask Raw", fontsize=20)

        # show the image
        if i < 8: plt.subplot(6, 8, i+17)
        else: plt.subplot(6,8,i+33)
        plt.imshow(predicted_mask_np[:, :, 0])
        plt.title("Predicted Mask Rounded", fontsize=20)

    plt.suptitle("Prediction on test sample {} @{}x{}".format(test_img_id, test_img_shape[1],test_img_shape[0]),y=0.92,fontsize=48)
    plt.savefig(figure_fname)

we see that if our model is trained on images scaled down too much, it performs poorly on larger scaled images.

# Make submission to Kaggle
If our model is trained on smaller images, we'll just have to resize the prediction mask into full-scale image for Kaggle submission.

It's ugly, but it works!

In [None]:
# run length encoder to encode our predictions for Kaggle submission
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
img_shape = (128, 192, 3) # input image shape

# define model object given network definition earlier
model_inputs, model_outputs = u_net(img_shape)
model = models.Model(inputs=[model_inputs], outputs=[model_outputs])
model.load_weights("./tmp/train1/weights-45-0.01.hdf5")

In [None]:
test_fnames = os.listdir('test')
test_num = len(test_fnames)

with open('submission2.csv', mode='w', newline='') as submission_file:
    submission_writer = csv.writer(submission_file, delimiter=',')
    # write header row
    submission_writer.writerow(['img','rle_mask'])
    
    count = 1
    for test_fname in test_fnames:
        test_img_path = os.path.join('test', test_fname)
        test_img = mplimg.imread(test_img_path)
        
        # input image prprocessing
        # right pad so width from 1918 to 1920
        test_img = np.pad(test_img, [(0,0),(0,2),(0,0)], 'constant')
        # resize
        test_img = Image.fromarray(test_img).resize(img_shape,Image.BILINEAR)
        # scale pixels & expand_dimension to fit model
        test_img = np.expand_dims(np.array(test_img)*1/255, axis=0)

        # prediction
        predicted_mask = model.predict(test_img)[0]
        
        # predicted mask post-processing
        # scale pixels up to use PIL Image object
        predicted_mask = predicted_mask * 255
        # convert datatype
        predicted_mask = predicted_mask.astype('uint8')
        # resize with PIL
        predicted_mask = Image.fromarray(predicted_mask[:,:,0], mode="L").resize((1920,1280),Image.BILINEAR)
        # scale pixels back down down to [0,1]
        predicted_mask = np.array(predicted_mask) * 1/255.0
        # round to 0 or 1
        predicted_mask = np.round(predicted_mask, 0)[:,:1918]
        
        # encode to rle and write to file
        encoded = rle_encode(predicted_mask)
        submission_writer.writerow([test_fname,encoded])
        
        # since we'll be processing 100,064 images, this is just to check that the code is still running
        if count % 100 == 0:
            print('Predicted & encoded ' + str(count) + ' files of ' + str(test_num))
            
        count += 1
