# Final project
By Thomas Sandfeld Nielsen and Lennart J. Pedersen

Choose: Cell > Run All.

### WARNING: 
The model consumes upward of 10 GBs of RAM. To run locally, switch the '#' mark before the call to create_network()
just before the section 'Training, loss and performance'.

In [None]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import utils

tf.reset_default_graph()

num_classes = 12
height, width, nchannels = 224, 224, 3
padding = 'same'

x_pl = tf.placeholder(tf.float32, [None, height, width, nchannels], name='xPlaceholder')
y_pl = tf.placeholder(tf.float32, [None, height, width, num_classes], name='yPlaceholder')

DENSE_BLOCK_I = 1
TD_I = 1
TU_I = 1
CONCAT_I = 1

OUTPUT_SIZE = 12

## Parts to build the model

In [None]:
l2 = slim.l2_regularizer(0.0001)
regularizers = {"beta" : slim.l2_regularizer(0.0001), "gamma": slim.l2_regularizer(0.0001)}

def concatenate(x, net):
    global CONCAT_I
    with tf.variable_scope('concat%d' % CONCAT_I, reuse=True):
        CONCAT_I += 1
        return tf.concat([x, net], axis=-1)

def concatenateN(xs, net):
    global CONCAT_I
    with tf.variable_scope('concat%d' % CONCAT_I, reuse=True):
        to_cat = []
        for x in xs:
            to_cat.append(x)
            
        to_cat.append(net)
        CONCAT_I += 1
        return tf.concat(to_cat, axis=-1)

def denseBlock(n, net, num_features, is_training):
    global DENSE_BLOCK_I
    with tf.variable_scope( 'Denseblock_%d' % (DENSE_BLOCK_I)):
        last_x = net
        dense_layer_outputs = []
    
        # For N-1 do left side of dense block diagram (it makes sense)
        for i in range(n-1):
            with tf.variable_scope( 'db_layer%d_%d' % (DENSE_BLOCK_I,i+1)):
                x = slim.batch_norm(last_x, activation_fn=tf.nn.relu, param_regularizers=regularizers)
                x = slim.conv2d(inputs=x, kernel_size=[3,3], num_outputs=num_features, padding='same', weights_initializer=tf.contrib.keras.initializers.he_uniform(), weights_regularizer=l2, activation_fn=None)
                x = slim.dropout(inputs=x, keep_prob=0.8, is_training=is_training)
                dense_layer_outputs.append(x)
                last_x = concatenate(last_x, x)
    
        final_x = 0
        # For last N gather all x outputs from layers in dense block and concatenate with the last output
        with tf.variable_scope( 'db_layer%d_%d' % (DENSE_BLOCK_I,n)):
            x = slim.batch_norm(last_x, activation_fn=tf.nn.relu, param_regularizers=regularizers)
            x = slim.conv2d(inputs=x, kernel_size=[3,3], num_outputs=num_features, padding='same', weights_initializer=tf.contrib.keras.initializers.he_uniform(), weights_regularizer=l2, activation_fn=None)
            x = slim.dropout(inputs=x, keep_prob=0.8, is_training=is_training)

        final_x = x
        final_x = concatenateN(dense_layer_outputs, final_x)

    DENSE_BLOCK_I += 1

    return final_x

def transition_down(net, num_features, is_training):
    global TD_I
    with tf.variable_scope( 'TD%d' % TD_I):
        x = slim.batch_norm(net, activation_fn=tf.nn.relu, param_regularizers=regularizers)
        x = slim.conv2d(inputs=x, kernel_size=[1,1], num_outputs=num_features, padding='same', weights_initializer=tf.contrib.keras.initializers.he_uniform(), weights_regularizer=l2, activation_fn=None)
        x = slim.dropout(inputs=x, keep_prob=0.8, is_training=is_training)
        x = slim.max_pool2d(inputs=x, kernel_size=[2,2])
    
    TD_I += 1
    return x

def transition_up(net, num_features):
    global TU_I
    with tf.variable_scope( 'TU%d' % TU_I):
        # Normal transposed convolution
        x = slim.conv2d_transpose(inputs=net, kernel_size=[3,3], stride=2, num_outputs=num_features, padding='same', weights_initializer=tf.contrib.keras.initializers.he_uniform(), weights_regularizer=l2, activation_fn=None)
        
        # Nearest neighbor interpolation
        #newHeigtWidth = int(net.shape[1]) * 2
        #x = tf.image.resize_nearest_neighbor(images = net, size =[newHeigtWidth, newHeigtWidth])
    
    TU_I += 1    
    return x

# Create the network

In [None]:
def create_network(block_layer_sizes, is_training, reuse=None):
    indexes = [i for i in range(len(block_layer_sizes))]
    block_size = block_layer_sizes[0]
    bottleneck_layer_size = 4
    skip_dict = {}
    
    input_data = x_pl
    conv_out = slim.conv2d(input_data, OUTPUT_SIZE*block_size, kernel_size=[3,3], activation_fn=None)
    
    old_num_features = OUTPUT_SIZE*block_size
    #Downsampling
    new_input = conv_out
    for nb in indexes:
        n = block_layer_sizes[nb]
        feats = n*OUTPUT_SIZE
        new_features = feats + old_num_features
        
        net = denseBlock(n, new_input, OUTPUT_SIZE, is_training)
        concat = concatenate(new_input, net)
    
        skip_dict[nb] = concat # Store the skip connection after concat
    
        new_input = transition_down(concat, new_features, is_training)
        print(new_input)
        old_num_features = new_features
    
    # BOTTLENECK
    new_features = block_size*OUTPUT_SIZE + old_num_features
    bottleneck_out = denseBlock(bottleneck_layer_size, new_input, OUTPUT_SIZE, is_training)
    old_num_features = new_features
    
    # Upsampling
    print('upsampling')
    new_input = bottleneck_out
    for nb in reversed(indexes):
        n = block_layer_sizes[nb]
        
        feats = n*OUTPUT_SIZE
        new_features = old_num_features - feats
        old_num_features = new_features
        new_features = old_num_features + feats*2
        
        tu_out = transition_up(new_input, new_features)
    
        skip_connection = skip_dict[nb] # Read skip connection
    
        concat = concatenate(tu_out, skip_connection)
        new_input = denseBlock(n, concat, OUTPUT_SIZE, is_training)
        print(new_input)
    
    final_input = concatenate(concat, new_input)
    conv_finish = slim.conv2d(final_input, OUTPUT_SIZE, kernel_size=[1,1], activation_fn=None, weights_initializer=tf.contrib.keras.initializers.he_uniform(), weights_regularizer=l2)
    
    return conv_finish

In [None]:
block_layer_sizes = [4,4,4,4,4]
#block_layer_sizes = [2,2]
y = create_network(block_layer_sizes=block_layer_sizes, is_training=True)

# Loss, training and performance

In [None]:
print('loss, training, performance')
with tf.variable_scope('loss'):
    masked_weights = 1 - y_pl[:,:,:,-1]
    cross_entropy = tf.losses.softmax_cross_entropy(logits=y, onehot_labels=y_pl, weights=masked_weights)

    
with tf.variable_scope('training'):
    # defining our optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)

    # applying the gradients
    train_op = optimizer.minimize(cross_entropy)

    
with tf.variable_scope('performance'):
    masked_labels = y_pl[:,:,:,:11]#1 - tf.unstack(y_pl, axis=-1)[-1]
    masked_preds = y[:,:,:,:11]#1 - tf.unstack(y, axis=-1)[-1]
    #print(masked_preds.shape)
    #print(masked_weights.shape)
    # making a one-hot encoded vector of correct (1) and incorrect (0) predictions
    #correct_prediction = tf.equal(tf.argmax(y, axis=-1), tf.argmax(y_pl, axis=-1))
    correct_prediction = tf.equal(tf.argmax(masked_preds, axis=-1), tf.argmax(masked_labels, axis=-1))

    # averaging the one-hot encoded vector
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

print('Model consits of ', utils.num_params(), 'trainable parameters.')

# Load data

In [None]:
gpu_opts = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)

import matplotlib.pyplot as plt
from os import listdir

# For the CamVid dataset
train_path = './CamVid/train/'
train_labels_path = './CamVid/trainannot/'
test_path = './CamVid/test/'
test_labels_path = './CamVid/testannot/'
val_path = './CamVid/val/'
val_labels_path = './CamVid/valannot/'

train_imgs = [train_path + f for f in listdir(train_path)]
train_labels = [train_labels_path + f for f in listdir(train_labels_path)]

test_imgs = [test_path + f for f in listdir(test_path)]
test_labels = [test_labels_path + f for f in listdir(test_labels_path)]

val_imgs = [val_path + f for f in listdir(val_path)]
val_labels = [val_labels_path + f for f in listdir(val_labels_path)]

# Auxiliary methods

In [None]:
def one_hot_labels(labels):
    new_arr = []
    for label in labels:
        label_arr = []
        for pixel_values in label:
            new_pixels = []
            for pixel in pixel_values:
                new_vals = [0]*12
                new_vals[pixel] = 1
                new_pixels.append(np.array(new_vals))
            label_arr.append(np.array(new_pixels))
        new_arr.append(np.array(label_arr))
        
    return np.array(new_arr)

from scipy import misc
import random

def normalized(rgb):
    #return rgb/255.0
    norm=np.zeros((rgb.shape[0], rgb.shape[1], 3),np.float32)

    b=rgb[:,:,0]
    g=rgb[:,:,1]
    r=rgb[:,:,2]

    norm[:,:,0] = b/255
    norm[:,:,1] = g/255
    norm[:,:,2] = r/255
    
    return norm


import numpy as np
import random as rand
def next_batch(num, data, labels, idxs, count):
    img_size = (224,224)
    
    data_shuffle = np.array([misc.imresize(misc.imread(data[i]), img_size) for i in idxs])
    
    new_data_shuffle = []
    for d in data_shuffle:
        new_data_shuffle.append(normalized(d))
    
    data_shuffle = np.array(new_data_shuffle)
    
    labels_shuffle = np.array([misc.imresize(misc.imread(labels[i]), img_size ) for i in idxs])
    labels = one_hot_labels(labels_shuffle)

    # Randomly flip images to left and right
    new_data = []
    new_labels = []
    for j in range(len(data_shuffle)):
        data = data_shuffle[j]
        label = labels[j]
        
        chance = int(rand.random()*10)
        if chance < 6:
            if chance < 3:
                #print('left right')
                new_data.append(np.fliplr(data))
                new_labels.append(np.fliplr(label))
            else:
                #print('rot90')
                new_data.append(np.rot90(data))
                new_labels.append(np.rot90(label))
        else:
            new_data.append(data)
            new_labels.append(label)
    
    data_shuffle = np.array(new_data)
    labels = np.array(new_labels)
    
    #If there is not enough data left to get full batch size:
    if len(data_shuffle[count * num: ]) < num:
        data_shuffle = data_shuffle[count * num:]
        labels = labels[count * num:]
    else:
        #Get the batch of the remaining data:
        data_shuffle = data_shuffle[count * num : (count + 1) * num]
        labels = labels[count * num : (count + 1) * num]
    
    return data_shuffle, labels



def deencode_one_hot(image):
    colors = [(128, 128, 128), (128, 0, 0), (192, 192, 128), (128, 64, 128), (0, 0, 192),
         (128, 128, 0), (192, 128, 128), (64, 64, 128), (64, 0, 128), (64, 64, 0),
         (0, 128, 192), (0, 0, 0)]
    
    res = []
    for img in image:
        row = []
        for one_hot in img:
            index = 0
            for p in one_hot:
                if p == 1:
                    try:
                        row.append(colors[index])
                    except:
                        #print(index, len(colors))
                        continue
                index += 1
        res.append(np.array(row,dtype=np.float32))
    return np.array(res, dtype=np.float32)

def deencode_pred(pred):
    colors = [(128, 128, 128), (128, 0, 0), (192, 192, 128), (128, 64, 128), (0, 0, 192),
         (128, 128, 0), (192, 128, 128), (64, 64, 128), (64, 0, 128), (64, 64, 0),
         (0, 128, 192), (0, 0, 0)]
    
    res = []
    for img in pred:
        row = []
        for p in img:
            row.append(colors[p])
        res.append(np.array(row, dtype=np.float32))        
                
    return np.array(res, dtype=np.float32)


def get_iou(preds, labels, num_classes, session):
    colors = [(128, 128, 128), (128, 0, 0), (192, 192, 128), (128, 64, 128), (0, 0, 192),
         (128, 128, 0), (192, 128, 128), (64, 64, 128), (64, 0, 128), (64, 64, 0),
         (0, 128, 192)]
    names = ['sky', 'building', 'column_pole', 'road', 'sidewalk', 'tree', 
                'sign', 'fence', 'car', 'pedestrian', 'bicyclist'] 
    
    batch_size = preds.shape[0]
    mean_iou = 0.0
    iou_per_class = {}
    for i in range(batch_size):
        prediction = preds[i]
        label = labels[i]
        
        # Convert labels to same format as predictions
        new_labels = []
        for dimension in label:
            arr = []
            for pixels in dimension:
                arr.append(np.where(pixels==1)[0][0])
            new_labels.append(arr)
                
        label = new_labels
        
        # Flatten
        o = tf.reshape(prediction,[-1])
        y = tf.reshape(label,[-1])
        
        size = y.shape[0]
        
        o = o.eval(session=session)
        y = y.eval(session=session)
        
        classes_in_label = np.unique(y)
        if 11 in classes_in_label:
            index = np.where(classes_in_label==11)
            classes_in_label = np.delete(classes_in_label, [index])
        
        for c in classes_in_label:
            union = []
            l_or = []
            
            color = colors[c]
            class_name = names[c]
            for p in range(size):
                try:
                    label_color = colors[y[p]]
                    pred_color = colors[o[p]]
                except:
                    # fails on void pixels which should not be count in IoU
                    continue
                union.append(int(pred_color==color and label_color==color))
                l_or.append(int(pred_color==color or label_color==color))
        
            sum_union = sum(union)
            sum_l_or = sum(l_or)
            if sum_l_or < 1:
                sum_l_or = 1
                
            iou = sum_union/sum_l_or
                
            try:
                iou_per_class[class_name] += iou
            except:
                iou_per_class[class_name] = iou
            
            mean_iou += iou
    
    all_classes_mean_iou = 0.0
    for k,v in iou_per_class.items():
        print(k, "IOU :", v/batch_size)
        all_classes_mean_iou += v/batch_size
    
    return all_classes_mean_iou/num_classes

## Train the model 
Train the model and get a test prediction

In [None]:
batch_size = 3
max_epochs = 10


valid_loss, valid_accuracy = [], []
train_loss, train_accuracy = [], []
test_loss, test_accuracy = [], []

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
TRAIN_DATASIZE = len(train_imgs)
TEST_DATASIZE = len(test_imgs)
train_runs = TRAIN_DATASIZE / batch_size
test_runs = TEST_DATASIZE / batch_size

train_runs = int(train_runs) + 1
test_runs = int(test_runs)

with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    completed = 0
    
    try:
        while completed < max_epochs:
            print('training started')
            #Shuffle data before each epoch
            idxs = np.random.permutation(TRAIN_DATASIZE)
            idxs_val = np.random.permutation(len(val_imgs))
            
            for i in range(train_runs):       
                _train_loss, _train_accuracy = [], []
                
                ## Run train op
                x_batch, y_batch =  next_batch(batch_size, train_imgs, train_labels, idxs, i)
                fetches_train = [train_op, cross_entropy, accuracy]
                feed_dict_train = {x_pl: x_batch, y_pl: y_batch}
                _, _loss, _acc = sess.run(fetches_train, feed_dict_train)
                
                _train_loss.append(_loss)
                _train_accuracy.append(_acc)
                
                    
                if i % 4 == 0:                
                    train_loss.append(np.mean(_train_loss))
                    train_accuracy.append(np.mean(_train_accuracy))
                    
                    fetches_valid = [cross_entropy, accuracy]
                    val_count = int(i/4)
                    x_val, y_val = next_batch(batch_size, val_imgs, val_labels, idxs_val, val_count)
                    
                    feed_dict_valid = {x_pl: x_val, y_pl: y_val}
                    _loss, _acc = sess.run(fetches_valid, feed_dict_valid)
                    
                    valid_loss.append(_loss)
                    valid_accuracy.append(_acc)
                    print("Epoch {} : Train Loss {:6.3f}, Train acc {:6.3f}, Valid loss {:6.3f}, Valid acc: {:6.3f}".format(
                            completed, train_loss[-1], train_accuracy[-1], valid_loss[-1], valid_accuracy[-1]))
                
                print(i+1, 'of', train_runs, 'done')
            
            completed += 1
        
        y = create_network(block_layer_sizes=block_layer_sizes, is_training=False, reuse=True)
        
        idxs_test = np.random.permutation(TEST_DATASIZE)
        
        x_batch, y_batch =  next_batch(batch_size, test_imgs, test_labels, idxs_test, 1)
        feed_dict_test = {x_pl: x_batch, y_pl: y_batch}
        
        fetches_valid = [cross_entropy, accuracy]
        _loss, _acc = sess.run(fetches_valid, feed_dict_test)
        test_loss.append(_loss)
        test_accuracy.append(_acc)
        print('Test Loss {:6.3f}, Test acc {:6.3f}'.format(np.mean(test_loss), np.mean(test_accuracy)))
        
        
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        # Get image
        idxs_out = np.random.permutation(len(test_imgs))
        
        feed_dict = {x_pl: x_batch}
        classification = sess.run(tf.argmax(y,axis=-1), feed_dict)
        
        y_img = deencode_one_hot(y_batch[0])
        c_img = deencode_pred(classification[0])
        
        print("IOU = ", get_iou(classification, y_batch, 11, session=sess))
        
        import scipy
        scipy.misc.toimage(x_batch[0],cmin=0.0, cmax=1.0).save('x_img.jpg')
        scipy.misc.toimage(y_img,cmin=0.0, cmax=255.0).save('y_img.jpg')
        scipy.misc.toimage(c_img,cmin=0.0, cmax=255.0).save('c_img.jpg')
        
    except IOError as e:
        print(e.errno, e.strerror)