In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import cv2
import glob
import os
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import keras
from keras.layers import Dense, Dropout, Activation, Flatten, AveragePooling2D, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical

Using TensorFlow backend.


In [3]:
dataframe = pd.read_csv("GT-with-BB.csv") 

In [4]:
dataframe

Unnamed: 0.1,Unnamed: 0,name,x,y,w,h,color
0,0,IMAGE0.png,627,272,48,114,RED
1,1,IMAGE0.png,379,268,48,114,RED
2,2,IMAGE1.png,627,272,48,114,RED
3,3,IMAGE1.png,379,268,48,114,RED
4,4,IMAGE2.png,627,272,48,114,RED
...,...,...,...,...,...,...,...
96,96,IMAGE269.png,278,453,27,56,GREEN
97,97,IMAGE269.png,166,449,26,57,GREEN
98,98,IMAGE270.png,142,429,30,64,GREEN
99,99,IMAGE274.png,572,300,121,300,YELLOW


In [5]:
dataframe = dataframe[dataframe['color'] != "YELLOW"]
dataset = dataframe.values
dataframe

Unnamed: 0.1,Unnamed: 0,name,x,y,w,h,color
0,0,IMAGE0.png,627,272,48,114,RED
1,1,IMAGE0.png,379,268,48,114,RED
2,2,IMAGE1.png,627,272,48,114,RED
3,3,IMAGE1.png,379,268,48,114,RED
4,4,IMAGE2.png,627,272,48,114,RED
...,...,...,...,...,...,...,...
94,94,IMAGE268.png,287,469,23,52,GREEN
95,95,IMAGE268.png,185,466,24,52,GREEN
96,96,IMAGE269.png,278,453,27,56,GREEN
97,97,IMAGE269.png,166,449,26,57,GREEN


In [6]:
num_examples = dataframe.shape[0]

In [7]:
label = dataframe['color'].values

In [8]:
label.dtype

dtype('O')

In [9]:
labels, categorical = np.unique(label, return_inverse=True)

In [10]:
print(labels)
print(categorical)

['GREEN' 'RED']
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [11]:
normalized_h = 128
normalized_w = 64

In [12]:
def read_image(filename):
    im = cv2.imread(filename)
    return cv2.cvtColor(im, cv2.COLOR_BGR2RGB) if im is not None else None

xs = np.zeros((num_examples, normalized_h, normalized_w, 3), dtype=np.uint8)
ys = np.zeros(num_examples, dtype=np.int8)

for index, row in dataframe.iterrows():
    name = row['name']
    x = row['x']
    y = row['y']
    h = row['h']
    w = row['w']
    missing_ctr = 0
    img = read_image(name)
    if img is not None:
        cropped = img[y:y+h, x:x+w]
        resized = cv2.resize(cropped, (normalized_w, normalized_h))
        xs[index-missing_ctr] = resized 
        ys[index-missing_ctr] = categorical[index]
    else:
        print("Missing image: %s" % name)
        missing_ctr += 1

xs = xs[:-missing_ctr]
ys = ys[:-missing_ctr]
ys = to_categorical(ys)

Missing image: IMAGE223.png
Missing image: IMAGE223.png
Missing image: IMAGE224.png
Missing image: IMAGE224.png
Missing image: IMAGE224.png
Missing image: IMAGE225.png
Missing image: IMAGE225.png
Missing image: IMAGE226.png
Missing image: IMAGE227.png
Missing image: IMAGE228.png
Missing image: IMAGE229.png
Missing image: IMAGE230.png
Missing image: IMAGE230.png
Missing image: IMAGE230.png
Missing image: IMAGE231.png
Missing image: IMAGE231.png
Missing image: IMAGE231.png
Missing image: IMAGE232.png
Missing image: IMAGE232.png
Missing image: IMAGE232.png
Missing image: IMAGE233.png
Missing image: IMAGE233.png
Missing image: IMAGE233.png
Missing image: IMAGE233.png
Missing image: IMAGE234.png
Missing image: IMAGE234.png
Missing image: IMAGE235.png
Missing image: IMAGE235.png
Missing image: IMAGE236.png
Missing image: IMAGE236.png
Missing image: IMAGE237.png
Missing image: IMAGE237.png
Missing image: IMAGE238.png
Missing image: IMAGE238.png
Missing image: IMAGE238.png
Missing image: IMAGE

## Better data

In [13]:
base = os.getcwd()
train_green = base + "/ITSDC-Udacity-Traffic-Light-Classifier/traffic_light_images/training/green"
train_yellow = base + "/ITSDC-Udacity-Traffic-Light-Classifier/traffic_light_images/training/yellow"
train_red = base + "/ITSDC-Udacity-Traffic-Light-Classifier/traffic_light_images/training/red"

test_green = base + "/ITSDC-Udacity-Traffic-Light-Classifier/traffic_light_images/test/green"
test_yellow = base + "/ITSDC-Udacity-Traffic-Light-Classifier/traffic_light_images/test/yellow"
test_red = base + "/ITSDC-Udacity-Traffic-Light-Classifier/traffic_light_images/test/red"

xs_train = []
ys_train = []

xs_test = []
ys_test = []

for idx, name in enumerate([train_green, train_yellow, train_red]):
    for file_name in glob.glob(name + "/*.jpg"):
        img = read_image(file_name)
        img = cv2.resize(img, (normalized_w, normalized_h))
        xs_train.append(img)
        ys_train.append(idx)
        
for idx, name in enumerate([test_green, test_yellow, test_red]):
    for file_name in glob.glob(name + "/*.jpg"):
        img = read_image(file_name)
        img = cv2.resize(img, (normalized_w, normalized_h))
        xs_test.append(img)
        ys_test.append(idx)

In [14]:
xs_train = np.stack(xs_train, axis=0)
xs_test = np.stack(xs_test, axis=0)

In [15]:
ys_train = to_categorical(ys_train)
ys_test = to_categorical(ys_test)

In [16]:
# Adapted from Keras documentation
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=True,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        zca_epsilon=1e-06,  # epsilon for ZCA whitening
        rotation_range=5,  # randomly rotate images in the range (degrees, 0 to 180)
        # randomly shift images horizontally (fraction of total width)
        width_shift_range=0.05,
        # randomly shift images vertically (fraction of total height)
        height_shift_range=0.05,
        shear_range=0.05,  # set range for random shear
        zoom_range=0.05,  # set range for random zoom
        channel_shift_range=0.,  # set range for random channel shifts
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,  # randomly flip images
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format=None,
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.2)

In [17]:
datagen.fit(xs_train)

In [25]:
model = keras.Sequential(name="tl_net")

model.add(Conv2D(filters=8, kernel_size=(3, 3), activation='relu', input_shape=(normalized_h, normalized_w, 3)))
model.add(Dropout(rate=0.5))
model.add(AveragePooling2D())


model.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
model.add(Dropout(rate=0.5))
model.add(AveragePooling2D())

# model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
# model.add(Dropout(rate=0.5))
# model.add(AveragePooling2D())

model.add(Flatten())

model.add(Dense(units=120, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(Dense(units=84, activation='relu'))
model.add(Dense(units=3, activation = 'softmax'))

opt = keras.optimizers.Adam(decay=1e-4)

model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

print(model.summary())

Model: "tl_net"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 126, 62, 8)        224       
_________________________________________________________________
dropout_7 (Dropout)          (None, 126, 62, 8)        0         
_________________________________________________________________
average_pooling2d_5 (Average (None, 63, 31, 8)         0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 61, 29, 16)        1168      
_________________________________________________________________
dropout_8 (Dropout)          (None, 61, 29, 16)        0         
_________________________________________________________________
average_pooling2d_6 (Average (None, 30, 14, 16)        0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 6720)              0    

In [26]:
epochs = 3
batch_size = 16

model.fit_generator(
    datagen.flow(xs_train, ys_train,
                 batch_size=batch_size),
    epochs=epochs,
    validation_data=(xs_test, ys_test),
    workers=4)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.callbacks.History at 0x7face0538cd0>

In [27]:
model_name = 'tl_net'
model_path = os.path.join(os.getcwd(), model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)

# Score trained model.
scores = model.evaluate(xs_test, ys_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

Saved trained model at /Users/mkolodziej/src/ground-truth-images/tl_net 
Test loss: 0.17201344416749556
Test accuracy: 0.9696969985961914


In [43]:
from time import time

In [44]:
xs_test_one_example = xs_test[0, ...]
xs_test_one_example = np.expand_dims(xs_test_one_example, 0)

In [45]:
xs_test_one_example.shape

(1, 128, 64, 3)

In [49]:
start = time()
model.predict(xs_test_one_example)
duration = time() - start

In [50]:
print("Predicting one image took %f ms" % (duration * 1000))

Predicting one image took 2.142906 ms
