# Create Labels for Traffic Lights in images from the Udacity simulator

Create labels for traffic lights in images from the Udacity simulator. The existing models from the TensorFlow Object Detection zoo are quite good at detecting traffic lights but don't specify the state of the light. We will train a model on the output of this notebook for that purpose.

In [1]:
import numpy as np
import tensorflow as tf
import os
import glob
import fnmatch

from PIL import Image
from matplotlib import pyplot as plt
from tqdm import tqdm
from pascal_voc_io import PascalVocWriter, PascalVocReader

from sklearn.model_selection import StratifiedShuffleSplit
from object_detection.utils import dataset_util

### Configuration Parameters

In [16]:
MODEL_NAME = 'faster_rcnn_inception_resnet_v2_atrous_coco_11_06_2017'
MODEL_BASEDIR = '../'

PATH_TO_CKPT = os.path.join(MODEL_BASEDIR, MODEL_NAME, 'frozen_inference_graph.pb')

### Load the Tensorflow model into memory

In [17]:
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')
        
    input_image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
    output_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
    output_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    output_classes = detection_graph.get_tensor_by_name('detection_classes:0')
    output_num_detections = detection_graph.get_tensor_by_name('num_detections:0')
    
sess = tf.Session(graph=detection_graph)

### Helper functions

In [18]:
def load_image(path):
    image = Image.open(path)
    (im_width, im_height) = image.size
    
    return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)

In [19]:
%matplotlib inline

def display_image(img):
    plt.figure()
    if len(img.shape) == 3 and img.shape[2] == 3:
        plt.imshow(img)
    elif len(img.shape) == 3 and img.shape[2] == 1:
        plt.imshow(img[:,:,0],cmap='gray')
    else:
        plt.imshow(img, cmap='gray')
    plt.axis('off')
    plt.show()

### Detection

In [20]:
def get_classification(img):
    # bounding box detection.
    with detection_graph.as_default():
        # expand dimension since the model expects image to have shape [1, None, None, 3].
        img_expanded = np.expand_dims(img, axis=0)  
        (boxes, scores, classes, num) = sess.run(
            [output_boxes, output_scores, output_classes, output_num_detections],
            feed_dict={input_image_tensor: img_expanded})
        
    # all outputs are float32 numpy arrays, so convert types as appropriate
    num = int(num[0])
    classes = classes[0].astype(np.uint8)
    boxes = boxes[0]
    scores = scores[0]
        
    return boxes, scores, classes, num

In [21]:
def get_abs_bb(img, box):
    return (int(box[0] * img.shape[0]),
            int(box[1] * img.shape[1]),
            int(box[2] * img.shape[0]),
            int(box[3] * img.shape[1]))

In [8]:
def get_light_color(img, box):
    w = (box[3] - box[1]) / 10
    h = (box[2] - box[0]) / 10
    y1 = box[0] + h
    x1 = box[1] + w 
    y2 = box[2] - h
    x2 = box[3] - w
           
    sub_img = img[y1:y2,x1:x2]
    r = (sub_img[:,:,0] > 200).sum()
    g = (sub_img[:,:,1] > 200).sum()
    b = (sub_img[:,:,2] > 200).sum()
    
    #print ('{}-{}-{}'.format(r,g,b))
    #display_image(sub_img)
         
    if r > 20 and g > 20:
        return 'yellow'
    elif r > 20:
        return 'red'
    elif g > 20:
        return 'green'
    else:
        return 'unknown'

### Create Pascal VOC XML files

In [24]:
failed = []

def process_directory(path, label):
    files = glob.glob(os.path.join(path, '*.jpg'))
        
    for img_file in tqdm(files):
        img = load_image(img_file)
        (boxes, scores, classes, num) = get_classification(img)
        
        voc_writer = PascalVocWriter(path, img_file, img.shape[0:2])
        sw_ok = False
        
        for idx in range(num):
            if scores[idx] > 0.90:
                box = get_abs_bb(img, boxes[idx])
                voc_writer.addBndBox(box[1], box[0], box[3], box[2], label, False)
                sw_ok = True
                               
        if sw_ok:
            voc_writer.save(os.path.splitext(img_file)[0] + '.xml')
        else:
            failed.append(img_file)
            

In [23]:
img = load_image('../data/lot/red/left0141.jpg')
(boxes, scores, classes, num) = get_classification(img)

print (scores)
print (classes)

[0.9778216  0.79239184 0.31425834 0.29825878 0.19362003 0.14744002
 0.06475128 0.03456942 0.0342094  0.03198571 0.02855564 0.02722973
 0.02671204 0.02535975 0.02386992 0.02300037 0.0228492  0.01972944
 0.01882317 0.01791454 0.0173516  0.01733541 0.01657807 0.01628075
 0.0147054  0.01349893 0.01329653 0.01292871 0.01160978 0.01093205
 0.0109008  0.01083262 0.01017914 0.00951026 0.00791783 0.00741296
 0.00739492 0.00736778 0.00731838 0.00715427 0.00685472 0.00680534
 0.00676219 0.00664894 0.00636791 0.00595118 0.0056281  0.00545484
 0.00543252 0.00517387 0.00504763 0.0050165  0.00500525 0.00477189
 0.00466252 0.00465641 0.00450267 0.00437919 0.00431821 0.00418091
 0.00411439 0.00397753 0.00387212 0.0038645  0.00359505 0.00359486
 0.00357301 0.00356068 0.00326204 0.00319059 0.00312079 0.00289263
 0.002892   0.00282259 0.00269003 0.00262684 0.00255486 0.00255234
 0.00253893 0.00252692 0.002526   0.00249356 0.00230408 0.00217045
 0.00211329 0.00210908 0.00210907 0.00210454 0.00209558 0.0020

In [27]:
process_directory('../data/lot/unknown', 'trafficlight_unknown')

100%|██████████| 6/6 [00:09<00:00,  1.62s/it]


### Create Tensorflow TFRecord files

In [2]:
def load_data(path):
    
    voc_files = []
    
    for root, dirnames, filenames in os.walk(path):
        for filename in fnmatch.filter(filenames, '*.xml'):
            voc_files.append(os.path.join(root, filename))
    
    imgs = []
    vocs = []
    labels = []
    
    for voc_file in tqdm(voc_files):
        voc = PascalVocReader(voc_file)
        
        imgs.append(os.path.splitext(voc_file)[0] + '.jpg')
        vocs.append(voc)
        labels.append(voc.shapes[0][0])
        
    return imgs, vocs, labels
    

In [3]:
LABEL_DICT = {
    "trafficlight_red" : 1,
    "trafficlight_yellow" : 2,
    "trafficlight_green" : 3,
    "trafficlight_unknown" : 4,
    "traffic light" : 10
}

In [4]:
def create_tf_example(voc, img_file):
    filename = img_file.encode()
    
    with tf.gfile.GFile(img_file, 'rb') as fid:
        encoded_image = fid.read()
        
    image_format = os.path.splitext(img_file)[1].encode()
    
    xmins = []  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box (1 per box)
    ymins = []  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box (1 per box)
    classes_text = [] # List of string class name of bounding box (1 per box)
    classes = []      # List of integer class id of bounding box (1 per box)
    
    for shape in voc.shapes:
        (min_c, max_c) = (shape[1][0], shape[1][2])
        
        xmins.append(float(min_c[0]) / voc.width)
        xmaxs.append(float(max_c[0]) / voc.width)
        ymins.append(float(min_c[1]) / voc.height)
        ymaxs.append(float(max_c[1]) / voc.height)
        label = shape[0]
        label = "traffic light"            # XXX !!! XXX
        classes_text.append(label.encode())
        classes.append(int(LABEL_DICT[label]))
    
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(voc.height),
        'image/width': dataset_util.int64_feature(voc.width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_image),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    
    return tf_example   
    

In [5]:
data_dir = '../data/sim'

# load data
imgs, vocs, labels = load_data(data_dir)

# split data
ss = StratifiedShuffleSplit(n_splits=1, test_size=0.2)
splitter = ss.split(np.zeros(len(imgs)), labels)

train_idx, test_idx = next(splitter)

# write training data
writer = tf.python_io.TFRecordWriter(os.path.join(data_dir, 'training.record'))
for idx in tqdm(train_idx):
    example = create_tf_example(vocs[idx], imgs[idx])
    writer.write(example.SerializeToString())
#writer.close()

# write test data
#writer = tf.python_io.TFRecordWriter(os.path.join(data_dir, 'testing.record'))
for idx in tqdm(test_idx):
    example = create_tf_example(vocs[idx], imgs[idx])
    writer.write(example.SerializeToString())
writer.close()



100%|██████████| 618/618 [00:00<00:00, 3289.30it/s]
100%|██████████| 494/494 [00:00<00:00, 1659.27it/s]
100%|██████████| 124/124 [00:00<00:00, 1660.30it/s]


In [7]:
print(set(labels))

set(['trafficlight_red', 'trafficlight_yellow', 'trafficlight_unknown', 'trafficlight_green'])


In [12]:
import shutil

for idx in tqdm(test_idx):
    shutil.copyfile(imgs[idx], os.path.join('../data/test', os.path.basename(imgs[idx])))

100%|██████████| 123/123 [00:00<00:00, 11575.30it/s]


In [19]:
create_tf_example(vocs[train_idx[0]], imgs[train_idx[0]])

features {
  feature {
    key: "image/encoded"
    value {
      bytes_list {
        value: "\377\330\377\340\000\020JFIF\000\001\001\000\000\001\000\001\000\000\377\333\000C\000\002\001\001\001\001\001\002\001\001\001\002\002\002\002\002\004\003\002\002\002\002\005\004\004\003\004\006\005\006\006\006\005\006\006\006\007\t\010\006\007\t\007\006\006\010\013\010\t\n\n\n\n\n\006\010\013\014\013\n\014\t\n\n\n\377\333\000C\001\002\002\002\002\002\002\005\003\003\005\n\007\006\007\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\377\300\000\021\010\002X\003 \003\001\"\000\002\021\001\003\021\001\377\304\000\037\000\000\001\005\001\001\001\001\001\001\000\000\000\000\000\000\000\000\001\002\003\004\005\006\007\010\t\n\013\377\304\000\265\020\000\002\001\003\003\002\004\003\005\005\004\004\000\000\001}\001\002\003\000\004\021\005\022!1A\006\023Qa\007\"q\0242\201\221\241\010#B\261\301\025R\321\360$3br\202\t\n\026\027\030\031\032%&\'()*456789: