#Imports

In [0]:
import re
import random
import numpy as np
import os.path
from PIL import Image
import shutil
import zipfile
import time
%tensorflow_version 1.x
import tensorflow as tf
from glob import glob
from urllib.request import urlretrieve
from tqdm import tqdm
import sys
import os
from copy import deepcopy
from unittest import mock
import warnings
from distutils.version import LooseVersion
from moviepy.editor import VideoFileClip
from moviepy.editor import ImageSequenceClip


#Running Mode

In [0]:
ON_DRIVE = True #@param {type: "boolean"}

#Helper Functions


In [0]:

class DLProgress(tqdm):
    last_block = 0

    def hook(self, block_num=1, block_size=1, total_size=None):
        self.total = total_size
        self.update((block_num - self.last_block) * block_size)
        self.last_block = block_num


# Download and extract pretrained vgg model if it doesn't exist
def maybe_download_pretrained_vgg(data_dir):
    vgg_filename = 'vgg.zip'
    vgg_path = os.path.join(data_dir, 'vgg')
    vgg_files = [
        os.path.join(vgg_path, 'variables/variables.data-00000-of-00001'),
        os.path.join(vgg_path, 'variables/variables.index'),
        os.path.join(vgg_path, 'saved_model.pb')]

    missing_vgg_files = [vgg_file for vgg_file in vgg_files if not os.path.exists(vgg_file)]
    if missing_vgg_files:
        # Clean vgg dir
        if os.path.exists(vgg_path):
            shutil.rmtree(vgg_path)
        os.makedirs(vgg_path)

        # Download vgg
        print('Downloading pre-trained vgg model...')
        with DLProgress(unit='B', unit_scale=True, miniters=1) as pbar:
            urlretrieve(
                'https://s3-us-west-1.amazonaws.com/udacity-selfdrivingcar/vgg.zip',
                os.path.join(vgg_path, vgg_filename),
                pbar.hook)

        # Extract vgg
        print('Extracting model...')
        zip_ref = zipfile.ZipFile(os.path.join(vgg_path, vgg_filename), 'r')
        zip_ref.extractall(data_dir)
        zip_ref.close()

        # Remove zip file to save space
        os.remove(os.path.join(vgg_path, vgg_filename))       
    else:
        print('Model found! No need to download.')


# Generate function to create batches of training data
def gen_batch_function(data_folder, image_shape):
  
    # Create batches of training data
    def get_batches_fn(batch_size):
        image_paths = glob(os.path.join(data_folder, 'image_2', '*.png'))
        label_paths = {
            re.sub(r'_(lane|road)_', '_', os.path.basename(path)): path
            for path in glob(os.path.join(data_folder, 'gt_image_2', '*_road_*.png'))}
        background_color = np.array([255, 0, 0])

        random.shuffle(image_paths)
        for batch_i in range(0, len(image_paths), batch_size):
            images = []
            gt_images = []
            for image_file in image_paths[batch_i:batch_i+batch_size]:
                gt_image_file = label_paths[os.path.basename(image_file)]

                image = np.array(Image.open(image_file).resize(image_shape))

                gt_image = np.array(Image.open(gt_image_file).resize(image_shape))

                gt_bg = np.all(gt_image == background_color, axis=2)
                gt_bg = gt_bg.reshape(*gt_bg.shape, 1)
                gt_image = np.concatenate((gt_bg, np.invert(gt_bg)), axis=2)

                images.append(image)
                gt_images.append(gt_image)

            yield np.array(images), np.array(gt_images)
    return get_batches_fn


# Generate test output using the test images
def gen_test_output(sess, logits, keep_prob, image_pl, data_folder, image_shape):
    for image_file in glob(os.path.join(data_folder, 'image_2', '*.png')):
        image = np.array(Image.open(image_file).resize(image_shape))

        im_softmax = sess.run(
            [tf.nn.softmax(logits)],
            {keep_prob: 1.0, image_pl: [image]})
        im_softmax = im_softmax[0][:, 1].reshape(image_shape[0], image_shape[1])
        segmentation = (im_softmax > 0.5).reshape(image_shape[0], image_shape[1], 1)
        mask = np.dot(segmentation, np.array([[0, 255, 0, 127]]))
        mask = Image.fromarray(mask, mode="RGBA")
        street_im = Image.fromarray(image)
        street_im.paste(mask, box=None, mask=mask)

        yield os.path.basename(image_file), np.array(street_im)


def save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image):
    # Make folder for current run
    output_dir = os.path.join(runs_dir, str(time.time()))
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir)

    # Run NN on test images and save them to HD
    print('Training Finished. Saving test images to: {}'.format(output_dir))
    image_outputs = gen_test_output(
        sess, logits, keep_prob, input_image, os.path.join(data_dir, 'data_road/testing'), image_shape)
    for name, image in image_outputs:
        out_image = Image.fromarray(image)
        out_image.save(os.path.join(output_dir, name))

# Generate test output using the test images
def segment_single_image(sess, logits, keep_prob, input_image, image, image_shape):
    image = np.array(image.resize(image_shape))

    im_softmax = sess.run([tf.nn.softmax(logits)],
                          {keep_prob: 1.0, input_image: [image]
                          })
    im_softmax = im_softmax[0][:, 1].reshape(image_shape[0], image_shape[1])
    segmentation = (im_softmax > 0.5).reshape(image_shape[0], image_shape[1], 1)
    mask = np.dot(segmentation, np.array([[0, 255, 0, 127]]))
    mask = Image.fromarray(mask, mode="RGBA")
        
    street_im = Image.fromarray(image)        
    street_im.paste(mask, box=None, mask=mask)

    return np.array(street_im)

#Testing Functions

In [0]:
def test_safe(func):
    """
    Isolate tests
    """
    def func_wrapper(*args):
        with tf.Graph().as_default():
            result = func(*args)
        print('Tests Passed')
        return result

    return func_wrapper


def _prevent_print(function, params):
    sys.stdout = open(os.devnull, "w")
    function(**params)
    sys.stdout = sys.__stdout__


def _assert_tensor_shape(tensor, shape, display_name):
    assert tf.assert_rank(tensor, len(shape), message='{} has wrong rank'.format(display_name))

    tensor_shape = tensor.get_shape().as_list() if len(shape) else []

    wrong_dimension = [ten_dim for ten_dim, cor_dim in zip(tensor_shape, shape)
                       if cor_dim is not None and ten_dim != cor_dim]
    assert not wrong_dimension, \
        '{} has wrong shape.  Found {}'.format(display_name, tensor_shape)


# Mock an attribute.  Restore attribute when exiting scope.
class TmpMock(object):
    def __init__(self, module, attrib_name):
        self.original_attrib = deepcopy(getattr(module, attrib_name))
        setattr(module, attrib_name, mock.MagicMock())
        self.module = module
        self.attrib_name = attrib_name

    def __enter__(self):
        return getattr(self.module, self.attrib_name)

    def __exit__(self, type, value, traceback):
        setattr(self.module, self.attrib_name, self.original_attrib)


@test_safe
def test_load_vgg(load_vgg, tf_module):
    with TmpMock(tf_module.saved_model.loader, 'load') as mock_load_model:
        vgg_path = ''
        sess = tf.Session()
        test_input_image = tf.placeholder(tf.float32, name='image_input')
        test_keep_prob = tf.placeholder(tf.float32, name='keep_prob')
        test_vgg_layer3_out = tf.placeholder(tf.float32, name='layer3_out')
        test_vgg_layer4_out = tf.placeholder(tf.float32, name='layer4_out')
        test_vgg_layer7_out = tf.placeholder(tf.float32, name='layer7_out')

        input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg(sess, vgg_path)

        assert mock_load_model.called, \
            'tf.saved_model.loader.load() not called'
        assert mock_load_model.call_args == mock.call(sess, ['vgg16'], vgg_path), \
            'tf.saved_model.loader.load() called with wrong arguments.'

        assert input_image == test_input_image, 'input_image is the wrong object'
        assert keep_prob == test_keep_prob, 'keep_prob is the wrong object'
        assert vgg_layer3_out == test_vgg_layer3_out, 'layer3_out is the wrong object'
        assert vgg_layer4_out == test_vgg_layer4_out, 'layer4_out is the wrong object'
        assert vgg_layer7_out == test_vgg_layer7_out, 'layer7_out is the wrong object'


@test_safe
def test_layers(layers):
    num_classes = 2
    vgg_layer3_out = tf.placeholder(tf.float32, [None, None, None, 256])
    vgg_layer4_out = tf.placeholder(tf.float32, [None, None, None, 512])
    vgg_layer7_out = tf.placeholder(tf.float32, [None, None, None, 4096])
    layers_output = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes)

    _assert_tensor_shape(layers_output, [None, None, None, num_classes], 'Layers Output')


@test_safe
def test_optimize(optimize):
    num_classes = 2
    shape = [2, 3, 4, num_classes]
    layers_output = tf.Variable(tf.zeros(shape))
    correct_label = tf.placeholder(tf.float32, [None, None, None, num_classes])
    learning_rate = tf.placeholder(tf.float32)
    logits, train_op, cross_entropy_loss, decaying_learning_rate = optimize(layers_output, correct_label, learning_rate, num_classes)
    #logits, train_op, cross_entropy_loss = optimize(layers_output, correct_label, learning_rate, num_classes)

    _assert_tensor_shape(logits, [2*3*4, num_classes], 'Logits')

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run([train_op, decaying_learning_rate], {correct_label: np.arange(np.prod(shape)).reshape(shape), learning_rate: 10})
        test, loss = sess.run([layers_output, cross_entropy_loss], {correct_label: np.arange(np.prod(shape)).reshape(shape)})

    assert test.min() != 0 or test.max() != 0, 'Training operation not changing weights.'


@test_safe
def test_train_nn(train_nn):
    epochs = 1
    batch_size = 2

    def get_batches_fn(batach_size_parm):
        shape = [batach_size_parm, 2, 3, 3]
        return np.arange(np.prod(shape)).reshape(shape)

    train_op = tf.constant(0)
    cross_entropy_loss = tf.constant(10.11)
    input_image = tf.placeholder(tf.float32, name='input_image')
    correct_label = tf.placeholder(tf.float32, name='correct_label')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    learning_rate = tf.placeholder(tf.float32, name='learning_rate')
    decaying_learning_rate = tf.convert_to_tensor(0.0, dtype=tf.float32, name='decaying_learning_rate')

    with tf.Session() as sess:
        parameters = {
            'sess': sess,
            'epochs': epochs,
            'batch_size': batch_size,
            'get_batches_fn': get_batches_fn,
            'train_op': train_op,
            'cross_entropy_loss': cross_entropy_loss,
            'input_image': input_image,
            'correct_label': correct_label,
            'keep_prob': keep_prob,
            'learning_rate': learning_rate,
            'decaying_learning_rate': decaying_learning_rate}
        # _prevent_print(train_nn, parameters)
        train_nn(**parameters)

@test_safe
def test_for_kitti_dataset(data_dir):
    kitti_dataset_path = os.path.join(data_dir, 'data_road')
    training_labels_count = len(glob(os.path.join(kitti_dataset_path, 'training/gt_image_2/*_road_*.png')))
    training_images_count = len(glob(os.path.join(kitti_dataset_path, 'training/image_2/*.png')))
    testing_images_count = len(glob(os.path.join(kitti_dataset_path, 'testing/image_2/*.png')))

    assert not (training_images_count == training_labels_count == testing_images_count == 0),\
        'Kitti dataset not found. Extract Kitti dataset in {}'.format(kitti_dataset_path)
    assert training_images_count == 289, 'Expected 289 training images, found {} images.'.format(training_images_count)
    assert training_labels_count == 289, 'Expected 289 training labels, found {} labels.'.format(training_labels_count)
    assert testing_images_count == 290, 'Expected 290 testing images, found {} images.'.format(testing_images_count)

#Main

In [0]:
# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer.  You are using {}'.format(tf.__version__)
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

LEARNING_RATE = 0.001
DECAY_RATE = 0.90
DECAY_AFTER_N_STEPS = 50
KEEP_PROB = 0.5
EPOCHS = 5
BATCH_SIZE = 5
CLASSES = 2
IMAGE_SHAPE = (576, 160)
L2_REG = 0.001
STD_DEV = 0.01

DATA_DIR = './data'
VIDEO_DIR = './video'

if ON_DRIVE:
  DATA_DIR = './drive/My Drive/data'
  VIDEO_DIR = './drive/My Drive/video'


# Load Pretrained VGG Model into TensorFlow.
def load_vgg(sess, vgg_path):

    vgg_tag = 'vgg16'
    vgg_input_tensor_name = 'image_input:0'
    vgg_keep_prob_tensor_name = 'keep_prob:0'
    vgg_layer3_out_tensor_name = 'layer3_out:0'
    vgg_layer4_out_tensor_name = 'layer4_out:0'
    vgg_layer7_out_tensor_name = 'layer7_out:0'
    
    tf.saved_model.loader.load(sess, [vgg_tag], vgg_path)
    graph = tf.get_default_graph()
    input_image = graph.get_tensor_by_name(vgg_input_tensor_name)
    keep_prob = graph.get_tensor_by_name(vgg_keep_prob_tensor_name)
    layer3_out = graph.get_tensor_by_name(vgg_layer3_out_tensor_name) # pooled layer 3
    layer4_out = graph.get_tensor_by_name(vgg_layer4_out_tensor_name) # pooled layer 4
    layer7_out = graph.get_tensor_by_name(vgg_layer7_out_tensor_name) # convolved layer 7
    
    return input_image, keep_prob, layer3_out, layer4_out, layer7_out
test_load_vgg(load_vgg, tf)


# Create the layers for a fully convolutional network.  Build skip-layers using the vgg layers.
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes):
  
    # layer 7 1x1 convolution to conserve spatial information
    layer7_1x1 = tf.layers.conv2d(vgg_layer7_out, num_classes, 1, padding='same',
                             kernel_initializer= tf.random_normal_initializer(stddev=STD_DEV),
                             kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG))
    
    # layer 7 1x1 convolution upsampled to reverse the convolution operation
    layer7_upsampled = tf.layers.conv2d_transpose(layer7_1x1, num_classes, 4, 2, padding='same',
                             kernel_initializer= tf.random_normal_initializer(stddev=STD_DEV),
                             kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG))
    
    # layer 4 1x1 convolution to conserve spatial information
    layer4_1x1 = tf.layers.conv2d(vgg_layer4_out, num_classes, 1, padding='same',
                             kernel_initializer= tf.random_normal_initializer(stddev=STD_DEV),
                             kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG))
    
    # Skip connection between convolved layer 4 & convolved + upsampled layer 7 to retain the original context
    layer4_7_skip_connection = tf.add(layer7_upsampled, layer4_1x1)
    
    # Upscaling again in preparation for creating the skip connection between layer 7 & layer 3
    layer7_final = tf.layers.conv2d_transpose(layer4_7_skip_connection, num_classes, 4, 2, padding='same',
                             kernel_initializer= tf.random_normal_initializer(stddev=STD_DEV),
                             kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG))
    
    # layer 3 1x1 convolution to conserve spatial information
    layer3_1x1 = tf.layers.conv2d(vgg_layer3_out, num_classes, 1, padding='same',
                             kernel_initializer= tf.random_normal_initializer(stddev=STD_DEV),
                             kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG))
    
    # Skip connection between convolved layer 3 & upsampled layer 7 
    layer3_7_skip_connection = tf.add(layer3_1x1, layer7_final)
    
    # final layer upscaling
    layer_last = tf.layers.conv2d_transpose(layer3_7_skip_connection, num_classes, 16, 8, padding='same',
                             kernel_initializer= tf.random_normal_initializer(stddev=STD_DEV),
                             kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG))

    return layer_last
test_layers(layers)

# Build the TensorFLow loss and optimizer operations.
def optimize(nn_last_layer, correct_label, learning_rate, num_classes):
    logits = tf.reshape(nn_last_layer, (-1, num_classes))
    labels = tf.reshape(correct_label, (-1, num_classes))
    
    cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
    
    # Apply decaying learning rate i.e. the learning rate decreases as the epochs increase
    global_step = tf.Variable(0, trainable=False) 
    initial_learning_rate = learning_rate
    decaying_learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step, DECAY_AFTER_N_STEPS, DECAY_RATE, staircase=True)
    
    # Optimizer for reducing loss
    optimizer = tf.train.AdamOptimizer(decaying_learning_rate)
    
    train_op = optimizer.minimize(cross_entropy_loss, global_step=global_step)
    
    return logits, train_op, cross_entropy_loss, decaying_learning_rate
test_optimize(optimize)


# Train neural network and print out the loss during training.
def train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image,
             correct_label, keep_prob, learning_rate, decaying_learning_rate):
    epoch_loss = {}
    batch_loss = []
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        start_time = time.time()
        print('Epoch: {} START...'.format(epoch + 1))
        batch_counter = 1
        for image, label in get_batches_fn(batch_size):
            _, loss, decaying_rate = sess.run([train_op, cross_entropy_loss, decaying_learning_rate],
                                              feed_dict={
                                                      input_image: image,
                                                      correct_label: label,
                                                      keep_prob: KEEP_PROB,
                                                      learning_rate: LEARNING_RATE
                                                      })  
            print("  Batch {} >>> Loss = {:.4f}, Learning Rate = {:.6f}".format(batch_counter, loss, decaying_rate))
            batch_loss.append(loss)
            batch_counter += 1
        end_time = time.time()
        elapsed = end_time - start_time
        hours = elapsed//3600
        minutes = (elapsed%3600)//60
        seconds = (elapsed%3600)%60
        print("Epoch: {} END. Time taken: {:.0f} hours {:.0f} minutes {:.0f} seconds\n".format(epoch + 1, hours, minutes, seconds))
        epoch_loss[epoch] = np.average(batch_loss)
    return epoch_loss
test_train_nn(train_nn)

def run():
    num_classes = CLASSES
    image_shape = IMAGE_SHAPE
    data_dir = DATA_DIR
    runs_dir = './runs'
    if ON_DRIVE:
      runs_dir = './drive/My Drive/runs'
    test_for_kitti_dataset(data_dir)

    # Download pretrained vgg model
    maybe_download_pretrained_vgg(data_dir)

    with tf.Session() as sess:
        # Path to vgg model
        vgg_path = os.path.join(data_dir, 'vgg')
        # Create function to get batches
        get_batches_fn = gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape)
        
        correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label')
        learning_rate = tf.placeholder(tf.float32, name='learning_rate')

        input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg(sess, vgg_path)

        nn_last_layer = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes)

        logits, train_op, cross_entropy_loss, decaying_learning_rate = optimize(nn_last_layer, correct_label, learning_rate, num_classes)
        start_time = time.time()
        
        epoch_loss = train_nn(sess, EPOCHS, BATCH_SIZE, get_batches_fn, train_op, 
                              cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate, decaying_learning_rate)
        
        end_time = time.time()
        elapsed = end_time - start_time
        hours = elapsed//3600
        minutes = (elapsed%3600)//60
        seconds = (elapsed%3600)%60
        print("Training time: {:.0f} hours {:.0f} minutes {:.0f} seconds".format(hours, minutes, seconds))     
        
        log_file_path = './' + str(EPOCHS) + '_log.txt'
        log_file = open(log_file_path, 'w') 
        log_file.write('Epoch,Loss\n')
        for key in epoch_loss.keys():
            log_file.write('{},{}\n'.format(key, epoch_loss[key]))
        log_file.close()
        
        start_time = time.time()
        # Save inference data using helper.save_inference_samples
        save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)

        end_time = time.time()
        elapsed = end_time - start_time
        hours = elapsed//3600
        minutes = (elapsed%3600)//60
        seconds = (elapsed%3600)%60
        print("Inference time: {:.0f} hours {:.0f} minutes {:.0f} seconds".format(hours, minutes, seconds))
        
        start_time = time.time()
        processed_frames = []
        # Load video 
        video_clip = VideoFileClip(VIDEO_DIR + '/solidWhiteRight.mp4')
        frame_counter = 1;
        for frame in video_clip.iter_frames():
            processed_frame = segment_single_image(sess, logits, keep_prob, input_image, frame, image_shape)
            # Collect processed frame
            processed_frames.append(processed_frame)
            print("Frame {} processed".format(frame_counter))
            frame_counter += 1 
        # Stitcha all frames to get the video
        processed_video = ImageSequenceClip(processed_frames, fps=video_clip.fps)
        processed_video.write_videofile(VIDEO_DIR + '/solidWhiteRight_processed.mp4', audio=False)
        print("Processed video written to {} directory".format(VIDEO_DIR))
        end_time = time.time()
        elapsed = end_time - start_time
        hours = elapsed//3600
        minutes = (elapsed%3600)//60
        seconds = (elapsed%3600)%60
        print("Video processing time: {:.0f} hours {:.0f} minutes {:.0f} seconds".format(hours, minutes, seconds))
run()