# Traffic light state learning

Jupyter notebook for training a tensorflow cnn model capable to detect traffic light color. The training is based on cropped images from Udacity's simulator, the images provided in the rosbag, the Bosch dataset. 

In [1]:
import numpy as np
import tensorflow as tf
from scipy import misc
from random import shuffle, random
import cv2
import os
import six.moves.urllib as urllib
import tarfile
from PIL import Image
from tqdm import tqdm
from time import gmtime, strftime
import yaml

In [2]:
def get_training_examples(directory='small_lights', sub_directory='.', light=0):
    """Extracting filenames and predefined labels from a directory"""
    labels, filenames = (list(), list())
    path = os.path.join(os.getcwd(), directory, sub_directory)
    examples = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
    labels += [light] * len(examples)
    filenames += examples
    return np.array(filenames), np.array(labels)

In [45]:
def read_image_from_disk(filename, flipping=True):
    """
    Reads a png image from disk and 
    converts it into a Numpy ndarray
    """
    file_contents = misc.imread(filename, mode='RGB') # The model expects RGB images
    if file_contents.shape != (32, 64):
        file_contents = misc.imresize(file_contents, (32, 64)) # The model expects 32x64x3 images
    if random() > 0.5 and flipping is True:
        return np.fliplr(file_contents)
    else: 
        return file_contents

In [4]:
def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

In [5]:
def shuffle_data(filenames, labels):
    """Shuffles Numpy ndarrays for filenames and labels"""
    index = list(range(len(filenames)))
    shuffle(index)
    return filenames[index], labels[index]    

In [6]:
def batch_feed(filenames, labels, batch_size=5, shuffling=True):
    """Batch procedure for feeding the training process"""
    index = list(range(len(filenames)))
    if shuffling:
        shuffle(index)
    for batch in chunks(index, batch_size):
        batch_labels = labels[batch]
        batch_train = np.array([read_image_from_disk(file) for file in filenames[batch]]) / 255.
        yield(batch_train, batch_labels)

In [7]:
def create_placeholders():
    """Creates i/O and dropout placeholders"""
    keep_prob = tf.placeholder(tf.float32, name="keep_prob")
    input_layer = tf.placeholder(tf.float32, shape=(None, None, None, Channels), name="input_layer")
    output_layer = tf.placeholder(tf.int32, shape=(None))
    return input_layer, keep_prob, output_layer

In [8]:
def create_layers(input_layer, keep_prob, height, width, classes):
    """    
    ____________________________________________________________________________________________________
    Layer (type)                     Output Shape          Param #     Connected to
    ====================================================================================================
    convolution2d_1 (Convolution2D)  (None, 112, 112, 16)  448         convolution2d_input_1[0][0]
    ____________________________________________________________________________________________________
    maxpooling2d_1 (MaxPooling2D)    (None, 37, 37, 16)    0           convolution2d_1[0][0]
    ____________________________________________________________________________________________________
    dropout_1 (Dropout)              (None, 37, 37, 16)    0           maxpooling2d_1[0][0]
    ____________________________________________________________________________________________________
    convolution2d_2 (Convolution2D)  (None, 37, 37, 32)    4640        dropout_1[0][0]
    ____________________________________________________________________________________________________
    maxpooling2d_2 (MaxPooling2D)    (None, 12, 12, 32)    0           convolution2d_2[0][0]
    ____________________________________________________________________________________________________
    dropout_2 (Dropout)              (None, 12, 12, 32)    0           maxpooling2d_2[0][0]
    ____________________________________________________________________________________________________
    convolution2d_3 (Convolution2D)  (None, 12, 12, 64)    18496       dropout_2[0][0]
    ____________________________________________________________________________________________________
    maxpooling2d_3 (MaxPooling2D)    (None, 6, 6, 64)      0           convolution2d_3[0][0]
    ____________________________________________________________________________________________________
    dropout_3 (Dropout)              (None, 6, 6, 64)      0           maxpooling2d_3[0][0]
    ____________________________________________________________________________________________________
    flatten_1 (Flatten)              (None, 2304)          0           dropout_3[0][0]
    ____________________________________________________________________________________________________
    dense_1 (Dense)                  (None, 128)           295040      flatten_1[0][0]
    ____________________________________________________________________________________________________
    dropout_4 (Dropout)              (None, 128)           0           dense_1[0][0]
    ____________________________________________________________________________________________________
    dense_2 (Dense)                  (None, 3)             387         dropout_4[0][0]
    ====================================================================================================
    Total params: 319,011
    Trainable params: 319,011
    Non-trainable params: 0
    
    """
    resized_input = tf.image.resize_images(images = input_layer, 
                                           size = (height, width)
                                          )
    
    conv1 = tf.layers.conv2d(inputs=resized_input,
                             filters=16, kernel_size=[3, 3],
                             padding="same", activation=tf.nn.relu)

    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[3, 3],
                                    strides=2)

    dropout1 = tf.nn.dropout(pool1, keep_prob)
    
    conv2 = tf.layers.conv2d(inputs=dropout1, 
                             filters=32, kernel_size=[3, 3], 
                             padding="same", activation=tf.nn.relu)

    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[3, 3],
                                    strides=2)
    
    dropout2 = tf.nn.dropout(pool2, keep_prob)
    
    conv3 = tf.layers.conv2d(inputs=dropout2, 
                             filters=64, kernel_size=[3, 3], 
                             padding="same", activation=tf.nn.relu)

    pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2],
                                    strides=2)
    
    dropout3 = tf.nn.dropout(pool3, keep_prob)
     
    flat = tf.contrib.layers.flatten(dropout3)
    
    connected = tf.layers.dense(inputs=flat, units=128,
                                  activation=tf.tanh)
    
    dropout4 = tf.nn.dropout(connected, keep_prob-0.1)
    
    logits = tf.layers.dense(inputs=dropout4, units=classes,
                             name="output_logits")
    
    return logits

In [9]:
def optimization(logits):
    """Optimization based on AdamOptimizer"""
    one_hot_y = tf.one_hot(output_layer, Classes)
    loss_operation = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                labels=one_hot_y)
    )
    optimizer = tf.train.AdamOptimizer(learning_rate=0.0003)
    training_operation = optimizer.minimize(loss_operation)
    correct_prediction = tf.equal(
        tf.argmax(logits, 1, name="output_class"),
        tf.argmax(one_hot_y, 1))
    accuracy_operation = tf.reduce_mean(
        tf.cast(correct_prediction, tf.float32))
    return loss_operation, training_operation, accuracy_operation

In [10]:
# Collecting available examples from separated directories
sim_green_files,     sim_green_labels     = get_training_examples(sub_directory='green_sim',     light=2)
sim_red_files,       sim_red_labels       = get_training_examples(sub_directory='red_sim',       light=0)
sim_yellow_files,    sim_yellow_labels    = get_training_examples(sub_directory='yellow_sim',    light=1)
sim_none_files,      sim_none_labels      = get_training_examples(sub_directory='none_sim',      light=3)
bosch_green_files,   bosch_green_labels   = get_training_examples(sub_directory='green_bosch',   light=2)
bosch_red_files,     bosch_red_labels     = get_training_examples(sub_directory='red_bosch',     light=0)
bosch_yellow_files,  bosch_yellow_labels  = get_training_examples(sub_directory='yellow_bosch',  light=1)
bosch_none_files,    bosch_none_labels    = get_training_examples(sub_directory='none_bosch',    light=3)
rosbag_green_files,  rosbag_green_labels  = get_training_examples(sub_directory='green_rosbag',  light=2)
rosbag_red_files,    rosbag_red_labels    = get_training_examples(sub_directory='red_rosbag',    light=0)

In [11]:
# Putting together all the examples for the training
filenames = np.concatenate((sim_green_files, sim_red_files, sim_yellow_files, sim_none_files,
                        bosch_green_files, bosch_red_files, bosch_yellow_files, bosch_none_files,
                        rosbag_green_files, rosbag_red_files))
labels = np.concatenate((sim_green_labels, sim_red_labels, sim_yellow_labels, sim_none_labels,
                        bosch_green_labels, bosch_red_labels, bosch_yellow_labels, bosch_none_labels,
                        rosbag_green_labels, rosbag_red_labels))

In [12]:
# Reporting label distributions
np.unique(labels, return_counts=True)

(array([0, 1, 2, 3]), array([4212,  312, 5720, 5358], dtype=int64))

In [13]:
Height, Width, Channels = (32, 64, 3)
Classes = 4

epochs = 50
batch_size = 5
validation = 0.10
best_result = 0.0

tf.reset_default_graph() 

input_layer, keep_prob, output_layer = create_placeholders()
logits = create_layers(input_layer, keep_prob, Height, Width, Classes)
loss_operation, training_operation, accuracy_operation = optimization(logits)
saver = tf.train.Saver(max_to_keep=10)

filenames, labels = shuffle_data(filenames, labels)

number_examples = len(labels)
train_space = int(number_examples * (1.0 - validation))
train_filenames, train_labels = filenames[:train_space], labels[:train_space]
validation_filenames, validation_labels = filenames[train_space:], labels[train_space:]

print("Train examples: %i" % (len(train_labels)))
print("Validation examples %i" % (len(validation_labels)))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(epochs):
        train_loss, train_accuracy = 0.0, 0.0
        for n, (X, y) in enumerate(batch_feed(train_filenames, train_labels, batch_size=batch_size, shuffling=True)):
            _, loss, accuracy = sess.run([training_operation, loss_operation, accuracy_operation],
                                      feed_dict={input_layer: X,
                                                 output_layer: y,
                                                 keep_prob: 0.8
                                                })
            train_loss += loss
            train_accuracy += accuracy
            if n % 5000 == 0:
                print("Epoch: %i Batch: %i Train accuracy %0.3f" % (e, n, train_accuracy / (n + 1.0)))
        
        train_loss = train_loss / (n + 1.0)
        train_accuracy = train_accuracy / (n + 1.0)
            
        validation_loss, validation_accuracy = 0.0, 0.0
        for m, (X, y) in enumerate(batch_feed(validation_filenames, validation_labels, batch_size=100, shuffling=False)):
            loss, accuracy = sess.run([loss_operation, accuracy_operation],
                                      feed_dict={input_layer: X,
                                                 output_layer: y,
                                                 keep_prob: 1.0})
            validation_loss += loss
            validation_accuracy += accuracy
        
        validation_loss = validation_loss / (m + 1.0)
        validation_accuracy = validation_accuracy / (m + 1.0)

        print("Epoch: %i Batch: %i Train accuracy %0.3f Validation accuracy %0.3f" % (e, n, train_accuracy, validation_accuracy))
        
        # Early cutoff - Keeping only the epoch with best validation accuracy
        if validation_accuracy > best_result:
            best_result = validation_accuracy
            path = saver.save(sess, './state_detection', global_step=e)
            print("Model saved")
            best_epoch_path = path
            
print("best epoch at {}".format(best_epoch_path))

Train examples: 14041
Validation examples 1561
Epoch: 0 Batch: 0 Train accuracy 0.400
Epoch: 0 Batch: 2808 Train accuracy 0.945 Validation accuracy 0.961
Model saved
Epoch: 1 Batch: 0 Train accuracy 1.000
Epoch: 1 Batch: 2808 Train accuracy 0.974 Validation accuracy 0.978
Model saved
Epoch: 2 Batch: 0 Train accuracy 1.000
Epoch: 2 Batch: 2808 Train accuracy 0.979 Validation accuracy 0.980
Model saved
Epoch: 3 Batch: 0 Train accuracy 1.000
Epoch: 3 Batch: 2808 Train accuracy 0.980 Validation accuracy 0.983
Model saved
Epoch: 4 Batch: 0 Train accuracy 1.000
Epoch: 4 Batch: 2808 Train accuracy 0.981 Validation accuracy 0.983
Epoch: 5 Batch: 0 Train accuracy 1.000
Epoch: 5 Batch: 2808 Train accuracy 0.981 Validation accuracy 0.982
Epoch: 6 Batch: 0 Train accuracy 1.000
Epoch: 6 Batch: 2808 Train accuracy 0.982 Validation accuracy 0.980
Epoch: 7 Batch: 0 Train accuracy 1.000
Epoch: 7 Batch: 2808 Train accuracy 0.984 Validation accuracy 0.984
Model saved
Epoch: 8 Batch: 0 Train accuracy 1.00

# Traffic light state testing

In [6]:
try:
    print (best_epoch_path)
except:
    best_epoch_path = 'state_detection-33'

state_detection-33


In [5]:
# Recovering the neural network
session = tf.Session()
loader = tf.train.import_meta_graph(best_epoch_path+'.meta')
loader.restore(session, best_epoch_path)
graph = tf.get_default_graph()
out = graph.get_tensor_by_name("output_class:0")
input_layer = graph.get_tensor_by_name("input_layer:0")
keep_prob = graph.get_tensor_by_name("keep_prob:0")
counter = 0

INFO:tensorflow:Restoring parameters from state_detection-33


In [16]:
def get_training_examples(directory='small_lights', sub_directory='.', light=0):
    """Extracting filenames and predefined labels from a directory"""
    labels, filenames = (list(), list())
    path = os.path.join(os.getcwd(), directory, sub_directory)
    examples = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
    labels += [light] * len(examples)
    filenames += examples
    return np.array(filenames), np.array(labels)

In [21]:
def read_image_from_disk(filename, flipping=True):
    """
    Reads a png image from disk and 
    converts it into a Numpy ndarray
    """
    file_contents = misc.imread(filename, mode='RGB')
    if file_contents.shape != (32, 64):
        file_contents = misc.imresize(file_contents, (32, 64))
    if random() > 0.5 and flipping is True:
        return np.fliplr(file_contents)
    else: 
        return file_contents

In [22]:
# Collecting available examples from separated directories
sim_green_files,     sim_green_labels     = get_training_examples(sub_directory='green_sim',     light=2)
sim_red_files,       sim_red_labels       = get_training_examples(sub_directory='red_sim',       light=0)
sim_yellow_files,    sim_yellow_labels    = get_training_examples(sub_directory='yellow_sim',    light=1)
sim_none_files,      sim_none_labels      = get_training_examples(sub_directory='none_sim',      light=3)
bosch_green_files,   bosch_green_labels   = get_training_examples(sub_directory='green_bosch',   light=2)
bosch_red_files,     bosch_red_labels     = get_training_examples(sub_directory='red_bosch',     light=0)
bosch_yellow_files,  bosch_yellow_labels  = get_training_examples(sub_directory='yellow_bosch',  light=1)
bosch_none_files,    bosch_none_labels    = get_training_examples(sub_directory='none_bosch',    light=3)
rosbag_green_files,  rosbag_green_labels  = get_training_examples(sub_directory='green_rosbag',  light=2)
rosbag_red_files,    rosbag_red_labels    = get_training_examples(sub_directory='red_rosbag',    light=0)

In [30]:
# getting all the available images
files = np.concatenate((sim_green_files, sim_red_files, sim_yellow_files, sim_none_files,
                        bosch_green_files, bosch_red_files, bosch_yellow_files, bosch_none_files,
                        rosbag_green_files, rosbag_red_files))
labels = np.concatenate((sim_green_labels, sim_red_labels, sim_yellow_labels, sim_none_labels,
                        bosch_green_labels, bosch_red_labels, bosch_yellow_labels, bosch_none_labels,
                        rosbag_green_labels, rosbag_red_labels))

In [31]:
correct = 0
wrong = 0
for file, label in zip(files, labels):
    img = read_image_from_disk(file, flipping=False)
    result = session.run([out], feed_dict={input_layer: [img /255.], keep_prob: 1.0})
    if (label ==result[0][0]):
        correct += 1
    else:
        wrong += 1

In [32]:
# This estimation contains training examples
print ("Accuracy: %0.3f" % (float(correct)/(correct+wrong)))

Accuracy: 0.991


In [33]:
# getting all the simulator and rosbag images
files = np.concatenate((sim_green_files, sim_red_files, sim_yellow_files, sim_none_files,
                        rosbag_green_files, rosbag_red_files))
labels = np.concatenate((sim_green_labels, sim_red_labels, sim_yellow_labels, sim_none_labels,
                        rosbag_green_labels, rosbag_red_labels))

In [34]:
correct = 0
wrong = 0
for file, label in zip(files, labels):
    img = read_image_from_disk(file, flipping=False)
    result = session.run([out], feed_dict={input_layer: [img /255.], keep_prob: 1.0})
    if (label ==result[0][0]):
        correct += 1
    else:
        wrong += 1

In [35]:
# This estimation contains training examples
print ("Accuracy: %0.3f" % (float(correct)/(correct+wrong)))

Accuracy: 0.995


In [40]:
# Labels in order to decode the answer
labels = {0:'RED', 1:'YELLOW', 2:'GREEN', 3:'NONE'}

In [46]:
# Testing a couple of difficult examples
img = read_image_from_disk("cnn_failing_RED1.jpg", flipping=False)
result = session.run([out], feed_dict={input_layer: [img /255.], keep_prob: 1.0})
print (labels[result[0][0]])

GREEN


In [42]:
img = read_image_from_disk("cnn_failing_RED0.jpg", flipping=False)
result = session.run([out], feed_dict={input_layer: [img /255.], keep_prob: 1.0})
print (labels[result[0][0]])

GREEN
