In [1]:
"""
Baseline for machine learning project on road segmentation.
This simple baseline consits of a CNN with two convolutional+pooling layers with a soft-max loss
Credits: Aurelien Lucchi, ETH Zürich
"""



import gzip
import os
import sys
import urllib
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from PIL import Image

import code

import tensorflow.python.platform

import numpy 
import tensorflow as tf

NUM_CHANNELS = 3 # RGB images
PIXEL_DEPTH = 255
NUM_LABELS = 2
TRAINING_SIZE = 100
VALIDATION_SIZE = 5  # Size of the validation set.
SEED = 66478  # Set to None for random seed.
BATCH_SIZE = 16 # 64
NUM_EPOCHS = 5
RESTORE_MODEL = False # If True, restore existing model instead of training a new one
RECORDING_STEP = 1000

# Set image patch size in pixels
# IMG_PATCH_SIZE should be a multiple of 4
# image size should be an integer multiple of this number!
IMG_PATCH_SIZE = 16

#tf.app.flags.DEFINE_string('train_dir', '/tmp/mnist',
#                           """Directory where to write event logs """
#                           """and checkpoint.""")
#FLAGS = tf.app.flags.FLAGS

  from ._conv import register_converters as _register_converters


In [2]:
# Extract patches from a given image
def img_crop(im, w, h):
    list_patches = []
    imgwidth = im.shape[0]
    imgheight = im.shape[1]
    is_2d = len(im.shape) < 3
    for i in range(0,imgheight,h):
        for j in range(0,imgwidth,w):
            if is_2d:
                im_patch = im[j:j+w, i:i+h]
            else:
                im_patch = im[j:j+w, i:i+h, :]
            list_patches.append(im_patch)
    return list_patches

In [3]:
def extract_data(filename, num_images):
    """Extract the images into a 4D tensor [image index, y, x, channels].
    Values are scaled from [0, 1] instead of [0,255].
    """
    imgs = []
    for i in range(1, num_images+1):
        imageid = "satImage_%.3d" % i
        image_filename = filename + imageid + ".png"
        if os.path.isfile(image_filename):
            print ('Loading ' + image_filename)
            img = mpimg.imread(image_filename)
            imgs.append(img)
        else:
            print ('File ' + image_filename + ' does not exist')

    num_images = len(imgs)
    IMG_WIDTH = imgs[0].shape[0]
    IMG_HEIGHT = imgs[0].shape[1]

    img_patches = [img_crop(imgs[i], IMG_PATCH_SIZE, IMG_PATCH_SIZE) for i in range(num_images)]
    data = [img_patches[i][j] for i in range(len(img_patches)) for j in range(len(img_patches[i]))]

    return numpy.asarray(data)

In [4]:
# Assign a label to a patch v
def value_to_class(v):
    foreground_threshold = 0.25 # percentage of pixels > 1 required to assign a foreground label to a patch
    df = numpy.sum(v)
    if df > foreground_threshold:
        return [0, 1]
    else:
        return [1, 0]

In [5]:
# Extract label images
def extract_labels(filename, num_images):
    """Extract the labels into a 1-hot matrix [image index, label index]."""
    gt_imgs = []
    for i in range(1, num_images+1):
        imageid = "satImage_%.3d" % i
        image_filename = filename + imageid + ".png"
        if os.path.isfile(image_filename):
            print ('Loading ' + image_filename)
            img = mpimg.imread(image_filename)
            gt_imgs.append(img)
        else:
            print ('File ' + image_filename + ' does not exist')

    num_images = len(gt_imgs)
    gt_patches = [img_crop(gt_imgs[i], IMG_PATCH_SIZE, IMG_PATCH_SIZE) for i in range(num_images)]
    data = numpy.asarray([gt_patches[i][j] for i in range(len(gt_patches)) for j in range(len(gt_patches[i]))])
    labels = numpy.asarray([value_to_class(numpy.mean(data[i])) for i in range(len(data))])

    # Convert to dense 1-hot representation.
    return labels.astype(numpy.float32)

In [6]:
data_dir = "../data/training/"
train_data_filename = data_dir + 'images/'
train_labels_filename = data_dir + 'groundtruth/' 

# Extract it into numpy arrays.
train_data = extract_data(train_data_filename, TRAINING_SIZE)
train_labels = extract_labels(train_labels_filename, TRAINING_SIZE)

num_epochs = NUM_EPOCHS

c0 = 0
c1 = 0
for i in range(len(train_labels)):
    if train_labels[i][0] == 1:
        c0 = c0 + 1
    else:
        c1 = c1 + 1
print ('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1))

print ('Balancing training data...')
min_c = min(c0, c1)
idx0 = [i for i, j in enumerate(train_labels) if j[0] == 1]
idx1 = [i for i, j in enumerate(train_labels) if j[1] == 1]
new_indices = idx0[0:min_c] + idx1[0:min_c]
print (len(new_indices))
print (train_data.shape)
train_data = train_data[new_indices,:,:,:]
train_labels = train_labels[new_indices]


train_size = train_labels.shape[0]

c0 = 0
c1 = 0
for i in range(len(train_labels)):
    if train_labels[i][0] == 1:
        c0 = c0 + 1
    else:
        c1 = c1 + 1
print ('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1))

Loading ../data/training/images/satImage_001.png
Loading ../data/training/images/satImage_002.png
Loading ../data/training/images/satImage_003.png
Loading ../data/training/images/satImage_004.png
Loading ../data/training/images/satImage_005.png
Loading ../data/training/images/satImage_006.png
Loading ../data/training/images/satImage_007.png
Loading ../data/training/images/satImage_008.png
Loading ../data/training/images/satImage_009.png
Loading ../data/training/images/satImage_010.png
Loading ../data/training/images/satImage_011.png
Loading ../data/training/images/satImage_012.png
Loading ../data/training/images/satImage_013.png
Loading ../data/training/images/satImage_014.png
Loading ../data/training/images/satImage_015.png
Loading ../data/training/images/satImage_016.png
Loading ../data/training/images/satImage_017.png
Loading ../data/training/images/satImage_018.png
Loading ../data/training/images/satImage_019.png
Loading ../data/training/images/satImage_020.png
Loading ../data/trai

Number of data points per class: c0 = 46309 c1 = 16191
Balancing training data...
32382
(62500, 16, 16, 3)
Number of data points per class: c0 = 16191 c1 = 16191


# Prepare the CNN steps

1) Define the functions as conv2d, pooling, activation, flattening, dense (for the full connected steps), batch for train

2) Define a CNN class

In [7]:
def prepare_batches(idxsize, batchsize):
    training_indices = range(idxsize)
    perm_indices = numpy.random.permutation(training_indices)

    res = []
    for i in range(0, idxsize, batchsize):
        res.append(perm_indices[i:i+batchsize])
    return res

In [8]:
def conv2d(layer, filters_size, kernel_size, s, padding, activation='relu'):
    if activation=='relu':
        activation=tf.nn.relu
    return tf.layers.conv2d(layer, filters=filters_size,  kernel_size=[kernel_size, kernel_size], strides=[s, s], padding=padding, activation=activation)

def pooling(layer, k=2, s=2, pool_type='max'):
    
    if pool_type=='max':
        return tf.layers.max_pooling2d(layer, pool_size=[k,k], strides=s)
    
def dense(layer, inputs_size, outputs_size, he_std=0.1):
    weights = tf.Variable(tf.truncated_normal([inputs_size, outputs_size],stddev=he_std))
    biases = tf.Variable(tf.constant(he_std, shape=[outputs_size]))
    layer = tf.matmul(layer,weights) + biases  
    return layer

def flattening_layer(layer):
        #make it single dimensional
        input_size = layer.get_shape().as_list()
        new_size = input_size[-1] * input_size[-2] * input_size[-3]
        return tf.reshape(layer, [-1, new_size]),new_size
    

def activation(layer, activation='relu'):
    if activation=='relu':
        return tf.nn.relu(layer)
    elif activation=='soft_max':
        return tf.nn.softmax(layer)
    
def optimizer_choice(name='GD', lr=0.003):
    if name=='GD':
        return tf.train.GradientDescentOptimizer(lr)
    elif name=='adam':
        return tf.train.AdamOptimizer(lr)

In [14]:
class CNN:
    def __init__(self):
        print('init done')
        self.X = None
        self.y = None
        self.network = None
        self.training_op = None
        self.out = None
        self.session = None
    def model(self, conv_params, fc_params, optimizer='GD', learning_rate=0.003):

        # init the graph 
        graph = tf.Graph()
        graph.seed = 1


        with graph.as_default():
            
            # init the placeholder (None is given for more flexibility in batch_size)
            # 16,16 will be put as parameters in a further version
            self.X = tf.placeholder(
                    tf.float32,
                    shape=[None, 16, 16, 3], name='X')

            self.y = tf.placeholder(tf.float32, shape=[None, 2], name='y')


            he_init = tf.contrib.layers.variance_scaling_initializer()

            # Initialize first convolution step 
            network = conv2d(self.X, 
                             conv_params['params1']['filter_size'], 
                             conv_params['params1']['kernel_size'], 
                             conv_params['params1']['strides'],
                             conv_params['params1']['padding'], 
                             conv_params['params1']['activation'])

            network = pooling(network)

            # for loop to allow different sizes of convolotional steps
            for i in range(2,len(conv_params)+1, 1):
                conv_par = conv_params['params'+str(i)]
                print(conv_par)
                network = conv2d(network, 
                             conv_par['filter_size'], 
                             conv_par['kernel_size'], 
                             conv_par['strides'],
                             conv_par['padding'], 
                             conv_par['activation'])

                network = pooling(network)

            # flatten last convolution step for full connected NN
            network, flatten_size = flattening_layer(network)

            
            # Initialize first full connected step
            network = dense(network, 
                         flatten_size, 
                         fc_params['params1']['output_size'])

            network = activation(network)

            # for loop to allow different sizes of full connected NN
            for i in range(2,len(fc_params)+1, 1):
                fc_par = fc_params['params'+str(i)]
                print(fc_par)
                network = dense(network, 
                             fc_par['input_size'], 
                             fc_par['output_size'])

                network = activation(network, fc_par['activation'])
            
            # Outputs, probability if last activation is a softmax
            self.out = network


            # Init the loss function (in a further version we will allow 
            # different losses (find the best to minimize F1-Score)
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=self.out, labels=self.y))

            ## Initialize the choosen Optimizer
            optimizer = optimizer_choice('GD', learning_rate)
            self.training_op = optimizer.minimize(loss)
            
            # Initialize all Variables
            init = tf.global_variables_initializer()
            
        self.session = tf.Session(config=None, graph=graph)
        self.session.run(init)

    def train(self, train_data, train_labels, number_epochs=3, batchsize=16):
        train_size=train_data.shape[0]

        for iepoch in range(number_epochs):

            batchset = prepare_batches(train_size, batchsize)

            for batch_indices in batchset:
                batch_data = train_data[batch_indices, :, :, :]
                batch_labels = train_labels[batch_indices]

                feed_dict = {
                    self.X: batch_data,
                    self.y: batch_labels,
                   }
                self.session.run(self.training_op, feed_dict=feed_dict)

    def predict(self, test):
        prediction = self.session.run(self.out, feed_dict={self.X: test})
        return prediction

## Easy example

In [10]:
conv_params = {
    'params1': {'filter_size': 32 ,'kernel_size': 5, 'strides': 1, 'padding': 'SAME', 'activation': tf.nn.relu},
    'params2': {'filter_size': 64 ,'kernel_size': 5, 'strides': 1, 'padding': 'SAME', 'activation': tf.nn.relu},
}

fc_params = {
    'params1': {'input_size': 4*4*64, 'output_size': 512, 'activation': 'relu'},
    'params2': {'input_size': 512, 'output_size': 2, 'activation': 'soft_max'},
}



In [15]:
cnn1 = CNN()
cnn1.model(conv_params, fc_params)
cnn1.train(train_data, train_labels)

init done
{'filter_size': 64, 'kernel_size': 5, 'strides': 1, 'padding': 'SAME', 'activation': <function relu at 0x12b8e1950>}
{'input_size': 512, 'output_size': 2, 'activation': 'soft_max'}


In [16]:
prediction = cnn1.predict(train_data)

In [17]:
def accuracy(pred, labels):
    idx0 = numpy.where(pred[:,0] <= 0.5)
    idx1 = numpy.where(pred[:,0] > 0.5)

    pred[idx0] = 0
    pred[idx1] = 1

    return (pred[:,0] == labels[:,0]).mean()

accuracy(prediction, train_labels)

0.6320795503674881