## Road segmentation according to kaggle

In this notebook, we try to implement the CNN for the road segmentation for the ML course project, a files is provided, but here, we try another approach form an another source.

## Package importation

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import os,sys
from PIL import Image
import tensorflow.python.platform

In [3]:
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math

from sklearn.model_selection import train_test_split

In [4]:
import glob
import os
import math
import operator
import functools
import random
import datetime
from scipy.ndimage.interpolation import rotate, shift, zoom


## Useful parameters

In [40]:
NUM_CHANNELS = 3 # RGB images
PIXEL_DEPTH = 255
NUM_LABELS = 2
TRAINING_SIZE = 20
VALIDATION_SIZE = 5  # Size of the validation set.
SEED = 66478  # Set to None for random seed.
BATCH_SIZE = 16 # 64
NUM_EPOCHS = TRAINING_ITERS
RESTORE_MODEL = False # If True, restore existing model instead of training a new one
RECORDING_STEP = 20
IMG_PATCH_SIZE = 16

## Loading Data

In [6]:
data_folder = '../data/'
data_dir = data_folder + 'training/'
train_data_filename = data_dir + 'images/' #input X
train_labels_filename = data_dir + 'groundtruth/' #output Y

In [7]:
# Extract patches from a given image
def img_crop(im, w, h):
    list_patches = []
    imgwidth = im.shape[0]
    imgheight = im.shape[1]
    is_2d = len(im.shape) < 3
    for i in range(0,imgheight,h):
        for j in range(0,imgwidth,w):
            if is_2d:
                im_patch = im[j:j+w, i:i+h]
            else:
                im_patch = im[j:j+w, i:i+h, :]
            list_patches.append(im_patch)
    return list_patches

def extract_data(filename, num_images):
    """Extract the images into a 4D tensor [image index, y, x, channels].
    Values are rescaled from [0, 255] down to [-0.5, 0.5].
    """
    imgs = []
    for i in range(1, num_images+1):
        imageid = "satImage_%.3d" % i
        image_filename = filename + imageid + ".png"
        if os.path.isfile(image_filename):
            print ('Loading ' + image_filename)
            img = mpimg.imread(image_filename)
            imgs.append(img)
        else:
            print ('File ' + image_filename + ' does not exist')

    num_images = len(imgs)
    IMG_WIDTH = imgs[0].shape[0]
    IMG_HEIGHT = imgs[0].shape[1]
    N_PATCHES_PER_IMAGE = (IMG_WIDTH/IMG_PATCH_SIZE)*(IMG_HEIGHT/IMG_PATCH_SIZE)

    img_patches = [img_crop(imgs[i], IMG_PATCH_SIZE, IMG_PATCH_SIZE) for i in range(num_images)]
    data = [img_patches[i][j] for i in range(len(img_patches)) for j in range(len(img_patches[i]))]

    return np.asarray(data)
        
# Assign a label to a patch v
def value_to_class(v):
    foreground_threshold = 0.25 # percentage of pixels > 1 required to assign a foreground label to a patch
    df = np.sum(v)
    if df > foreground_threshold:
        return [0, 1]
    else:
        return [1, 0]

# Extract label images
def extract_labels(filename, num_images):
    """Extract the labels into a 1-hot matrix [image index, label index]."""
    gt_imgs = []
    for i in range(1, num_images+1):
        imageid = "satImage_%.3d" % i
        image_filename = filename + imageid + ".png"
        if os.path.isfile(image_filename):
            print ('Loading ' + image_filename)
            img = mpimg.imread(image_filename)
            gt_imgs.append(img)
        else:
            print ('File ' + image_filename + ' does not exist')

    num_images = len(gt_imgs)
    gt_patches = [img_crop(gt_imgs[i], IMG_PATCH_SIZE, IMG_PATCH_SIZE) for i in range(num_images)]
    data = np.asarray([gt_patches[i][j] for i in range(len(gt_patches)) for j in range(len(gt_patches[i]))])
    labels = np.asarray([value_to_class(np.mean(data[i])) for i in range(len(data))])

    # Convert to dense 1-hot representation.
    return labels.astype(np.float32)


In [8]:
train_data = extract_data(train_data_filename, TRAINING_SIZE) #Input, real images
train_labels = extract_labels(train_labels_filename, TRAINING_SIZE) #Output, black and white image

Loading data/training/images/satImage_001.png
Loading data/training/images/satImage_002.png
Loading data/training/images/satImage_003.png
Loading data/training/images/satImage_004.png
Loading data/training/images/satImage_005.png
Loading data/training/images/satImage_006.png
Loading data/training/images/satImage_007.png
Loading data/training/images/satImage_008.png
Loading data/training/images/satImage_009.png
Loading data/training/images/satImage_010.png
Loading data/training/images/satImage_011.png
Loading data/training/images/satImage_012.png
Loading data/training/images/satImage_013.png
Loading data/training/images/satImage_014.png
Loading data/training/images/satImage_015.png
Loading data/training/images/satImage_016.png
Loading data/training/images/satImage_017.png
Loading data/training/images/satImage_018.png
Loading data/training/images/satImage_019.png
Loading data/training/images/satImage_020.png
Loading data/training/groundtruth/satImage_001.png
Loading data/training/groundt

To be consistent and not always change the variable name, we will use the same denomination as [Kaggle](https://www.kaggle.com/pouryaayria/convolutional-neural-networks-tutorial-tensorflow) . But we don't need to split the data since they seems already to be split into a train set and the dataset is really small.

In [10]:
# Shapes of training set
print("Training set (images) shape: {shape}".format(shape=train_data.shape))
print("Training set (labels) shape: {shape}".format(shape=train_labels.shape))

Training set (images) shape: (12500, 16, 16, 3)
Training set (labels) shape: (12500, 2)


The datasets contains a sample of 12500 images, which seems not to be large, then a splitting won't be necessary. The dataset is a list of matrix of size 16x16 representing the pixels we have chosen and 3 represents the number of colors RGB (number of channels).

# Configuration of Neural Network.

Some important variables have already been defined in the section parameters, we put some here and add other relevant parameters. We add only the parameters ``LEARNING_RATE`` which can be useful later.

In [28]:
TRAINING_SIZE = 20
NUM_LABELS = 2
BATCH_SIZE = 16 # 64
NUM_EPOCHS = 10
LEARNING_RATE = 0.001
TRAINING_ITERS = 40000
DROPOUT = 0.5

We try to balance our dataset so that the learning phase is more easier.

In [12]:
c0 = 0
c1 = 0
for i in range(len(train_labels)):
        if train_labels[i][0] == 1:
            c0 = c0 + 1
        else:
            c1 = c1 + 1
print ('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1))

print ('Balancing training data...')
min_c = min(c0, c1)
idx0 = [i for i, j in enumerate(train_labels) if j[0] == 1]
idx1 = [i for i, j in enumerate(train_labels) if j[1] == 1]
new_indices = idx0[0:min_c] + idx1[0:min_c]
print (len(new_indices))
print (train_data.shape)
train_data = train_data[new_indices,:,:,:]
train_labels = train_labels[new_indices]

Number of data points per class: c0 = 9450 c1 = 3050
Balancing training data...
6100
(12500, 16, 16, 3)


In [13]:
train_size = train_labels.shape[0]

c0 = 0
c1 = 0
for i in range(len(train_labels)):
    if train_labels[i][0] == 1:
        c0 = c0 + 1
    else:
        c1 = c1 + 1
print ('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1))

Number of data points per class: c0 = 3050 c1 = 3050


## Same padding

In [21]:
train_data_node = tf.placeholder(
    tf.float32,
    shape=(BATCH_SIZE, IMG_PATCH_SIZE, IMG_PATCH_SIZE, NUM_CHANNELS))
train_labels_node = tf.placeholder(tf.float32,
                                       shape=(BATCH_SIZE, NUM_LABELS))
train_all_data_node = tf.constant(train_data)
keep_prob = tf.placeholder(tf.float32)
print('Shape of placeholder',train_data_node.shape, train_labels_node.shape)

Shape of placeholder (16, 16, 16, 3) (16, 2)


In [15]:
def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

## Max pool

In [16]:
def maxpool2d(x, k=2):
    return tf.nn.max_pool(x,
                              ksize=[1, k, k, 1],
                              strides=[1, k, k, 1],
                              padding='SAME')

In [17]:
weights = {
    'wc1': tf.Variable(tf.truncated_normal([5, 5, NUM_CHANNELS, 32],  # 5x5 filter, depth 32.
                                stddev=0.1,
                                seed=SEED)),
    'wc2': tf.Variable(tf.truncated_normal([5, 5, 32, 64],
                                stddev=0.1,
                                seed=SEED)),
    'wd1': tf.Variable(  # fully connected, depth 512.
    tf.truncated_normal([int(IMG_PATCH_SIZE / 4 * IMG_PATCH_SIZE / 4 * 64), 512],
                                stddev=0.1,
                                seed=SEED)),
    'out': tf.Variable(
    tf.truncated_normal([512, NUM_LABELS],
                                stddev=0.1,
                                seed=SEED)),
}

biases = {
    'bc1':  tf.Variable(tf.zeros([32])),
    'bc2':  tf.Variable(tf.constant(0.1, shape=[64])),
    'bd1':  tf.Variable(tf.constant(0.1, shape=[512])),
    'out':  tf.Variable(tf.constant(0.1, shape=[NUM_LABELS]))
}

In [18]:
def conv_net(x, weights, biases, dropout):
    #Convolution layer 1
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    # Max pooling
    conv1 = maxpool2d(conv1, k=2)
    
    #Convolution layer 2
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    # Max pooling
    conv2 = maxpool2d(conv2, k=2)
    
    # Reshape the feature map cuboid into a 2D matrix to feed it to the
    # fully connected layers.
    conv2_shape = conv2.get_shape().as_list()
    fc1 = tf.reshape(conv2, shape = [conv2_shape[0], conv2_shape[1] * conv2_shape[2] * conv2_shape[3]])
    fc1 = tf.add(tf.matmul(fc1,weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)#layer
    fc1 = tf.nn.dropout(fc1, dropout)
    
    out = tf.add(tf.matmul(fc1,weights['out']), biases['out'])
    return out

# Optimization method

We need a cost function to minimize. In [Kaggle](https://www.kaggle.com/pouryaayria/convolutional-neural-networks-tutorial-tensorflow), they propose to use AdamOptimizer which is an advanced form of Gradient Descent.

In [19]:
x = train_data_node
y = train_labels_node

In [37]:
# Create the model
model = conv_net(x, weights, biases, keep_prob)
print(model)
#Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(cost)

# Evaluate model
y_true_cls = tf.argmax(y,1)
y_pred_cls = tf.argmax(model,1)
# This is a vector of booleans whether the predicted 
#class equals the true class of each image.
correct_model = tf.equal(y_pred_cls, y_true_cls)
# This calculates the classification accuracy by first type-casting 
#the vector of booleans to floats, so that False becomes 0 and True becomes 1,
#and then calculating the average of these numbers.
accuracy = tf.reduce_mean(tf.cast(correct_model, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

Tensor("Add_5:0", shape=(16, 2), dtype=float32)


## Get Batch

Define the number of samples that goings to be propagated through the network, the batch size is the number of training examples present in a single batch. We can't pass the entire dataset into neural net at once, then we divide the dataset into number of batches or sets or parts.

In [24]:
def getBatch(X, Y, batchSize=16):
    """
    Creates a list of random minibatches from (X, Y)
    
    Arguments:
    X -- input train/test 
    Y --input label train/test
    
    Returns:
    mini_batches -- tuple of synchronous (mini_batch_X, mini_batch_Y)
    
    """
    arrayLength = X.shape[0]
    count = 0 
    
    while count < arrayLength/batchSize:
        random.seed(datetime.datetime.now())
        randstart = random.randint(0, arrayLength-batchSize-1)
#         print(randstart)
        count += 1
        yield (X[randstart:randstart+batchSize], Y[randstart:randstart+batchSize]) 


In [41]:
loss_t = []
steps_t = []
acc_t = []

with tf.Session() as sess:
    sess.run(init)
    step = 1   
#     Keep training until reach max iterations
    while step * BATCH_SIZE < NUM_EPOCHS:
        a = getBatch(train_data, train_labels, BATCH_SIZE)
        batch_x, batch_y = next(a)
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: DROPOUT})
        if step % RECORDING_STEP == 0:
            print('*'*15)
            loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
                                                              y: batch_y,
                                                              keep_prob: 1.})
            print("Epoch " + str(step*BATCH_SIZE) + ", Loss= " + \
                  "{:.3f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))
            loss_t.append(loss)
            steps_t.append(step*BATCH_SIZE)
            acc_t.append(acc)
        step += 1

***************
Epoch 320, Loss= 0.493, Training Accuracy= 0.750
***************
Epoch 640, Loss= 1.399, Training Accuracy= 0.000
***************
Epoch 960, Loss= 0.176, Training Accuracy= 1.000
***************
Epoch 1280, Loss= 1.108, Training Accuracy= 0.000
***************
Epoch 1600, Loss= 0.552, Training Accuracy= 0.750
***************
Epoch 1920, Loss= 0.503, Training Accuracy= 0.875
***************
Epoch 2240, Loss= 0.922, Training Accuracy= 0.125
***************
Epoch 2560, Loss= 0.423, Training Accuracy= 0.938
***************
Epoch 2880, Loss= 0.484, Training Accuracy= 0.875
***************
Epoch 3200, Loss= 0.898, Training Accuracy= 0.312
***************
Epoch 3520, Loss= 0.444, Training Accuracy= 1.000
***************
Epoch 3840, Loss= 0.586, Training Accuracy= 0.625
***************
Epoch 4160, Loss= 0.328, Training Accuracy= 1.000
***************
Epoch 4480, Loss= 0.812, Training Accuracy= 0.125
***************
Epoch 4800, Loss= 0.601, Training Accuracy= 0.562
*************

***************
Epoch 39680, Loss= 0.462, Training Accuracy= 1.000
