# Udacity CarND Project 2: Traffic Sign Classifier

# Step 0: Load the Data

In [None]:
# Load pickled data
import pickle

# TODO: fill this in based on where you saved the training and testing data
training_file = "train.p"
testing_file = "test.p"

with open(training_file, mode='rb') as f:
    train = pickle.load(f)
with open(testing_file, mode='rb') as f:
    test = pickle.load(f)
    
X_train, y_train = train['features'], train['labels']
X_test, y_test = test['features'], test['labels']

In [None]:
### Replace each question mark with the appropriate value.
import random
# TODO: Number of training examples
n_train = len(X_train)

# TODO: Number of testing examples.
n_test = len(X_test)

# TODO: What's the shape of an traffic sign image?
image_shape = "{}x{}".format(len(X_train[0]), len(X_train[0][0]))

# TODO: How many unique classes/labels there are in the dataset.
n_classes = max(train['labels'])+1

print("Number of training examples =", n_train)
print("Number of testing examples =", n_test)
print("Image data shape =", image_shape)
print("Number of classes =", n_classes)

# Visualize Data

In [None]:
import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline

num_displayed = range(43)
row_classes = 6
col_classes = 2
num_rows = (len(num_displayed) + row_classes -1)//row_classes

fg = plt.figure(figsize=(col_classes*row_classes,num_rows))
gridspace1 = gridspec.GridSpec(num_rows,col_classes*row_classes)
gridspace1.update(wspace=0.05, hspace=0.05) 
for i in range(len(num_displayed)):
    index = np.where(y_train==num_displayed[i])[0]
    random_selection = np.random.choice(index, col_classes, replace = False)
    image_samples = X_train[random_selection,:,:,:]
    for j in range(col_classes):
        axis = plt.subplot(gridspace1[i*col_classes + j])
        plt.imshow(image_samples[j,:,:,:])
        axis.text(2,6,str(i), bbox={'facecolor':'red', 'alpha':0.4, 'pad':2})
        plt.axis('off')
fg.suptitle('Training Data Example Showcase', fontsize=12, fontweight='bold')

# Plotting Images 

In [None]:
from collections import Counter

ctr = Counter(sorted(y_train))
print('Amount of Each Traffic Sign')
labels, values = zip(*(ctr.items()))

indexes = np.arange(len(labels))
width = 1.0

plt.bar(indexes, values, width)
plt.xticks(indexes + width * 2, labels)
plt.show()

# Step 2: Design and Test a Model Architecture

In [None]:
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils import resample
from tqdm import tqdm_notebook
from zipfile import ZipFile
import time
from datetime import timedelta
import math
import tensorflow as tf

# Setup Tensorflow

In [None]:
EPOCHS = 50
BATCH_SIZE = 128

# Implement

In [None]:
def normalize_color(image_data):

    a = 0.1
    b = 0.9
    
    Xmin = 0.0
    Xmax = 255.0

    norm_img = np.empty_like(image_data, dtype=np.float32)

    norm_img = a + (image_data - Xmin)*(b-a)/(Xmax - Xmin)

    return norm_img

train_features = normalize_color(X_train)
test_features = normalize_color(X_test)

In [None]:
# Turn labels into numbers and apply One-Hot Encoding
encoder = LabelBinarizer()
encoder.fit(y_train)
train_labels = encoder.transform(y_train)
test_labels = encoder.transform(y_test)

# Change to float32, so it can be multiplied against the features in TensorFlow, which are float32
train_labels = train_labels.astype(np.float32)
test_labels = test_labels.astype(np.float32)
is_labels_encod = True

print('Labels One-Hot Encoded')

## Question 1

Describe the techniques used to preprocess the data

Answer:

The images were normalized in all three RGB channels ranging from 0 to 255. We create a batch so it restricts memory limitations so it doesn't work on the entire data set at the same time. This is done during the test and the testing. 

We normalize so the gradients are manageable. If we have each number ranging from 0 to 255 as it would in full RBG it would require a large amount of numbers in the weight matrices. If we normalize the data set the issue can be avoided.

## Question 2

Descirbe how you set up the training, validation and testing data for your model. If you generated additional data, why?

Answer:



In [None]:
### Data exploration visualization goes here.
### Feel free to use as many code cells as needed.
import matplotlib.pyplot as plt
import numpy as np
import scipy.ndimage

train_features = np.array(train['features'])
train_labels = np.array(train['labels'])

inputs_per_class = np.bincount(train_labels)
max_inputs = np.max(inputs_per_class)

mpl_fig = plt.figure()
ax = mpl_fig.add_subplot(111)
ax.set_ylabel('Inputs')
ax.set_xlabel('Class')
ax.set_title('Number of inputs per class')
ax.bar(range(len(inputs_per_class)), inputs_per_class, 1/3, color='blue', label='Inputs per class')
plt.show()

for i in range(n_classes):
    for j in range(len(train_labels)):
        if (i == train_labels[j]):
            print('Class: ', i)
            plt.imshow(train_features[j])
            plt.show()
            break

In [None]:
### Regenerating images by rotating in between +/-20 degrees. 
### Credit to Mehdi Sqalli https://github.com/MehdiSv/TrafficSignsRecognition/

print('Regenerating data...')

import scipy.ndimage

# Generate additional data for underrepresented classes
print('Generating additional data...')
angles = [-5, 5, -10, 10, -15, 15, -20, 20]

for i in range(len(inputs_per_class)):
    input_ratio = min(int(max_inputs / inputs_per_class[i]) - 1, len(angles) - 1)

    if input_ratio <= 1:
        continue

    new_features = []
    new_labels = []
    mask = np.where(train_labels == i)

    for j in range(input_ratio):
        for feature in train_features[mask]:
            new_features.append(scipy.ndimage.rotate(feature, angles[j], reshape=False))
            new_labels.append(i)

    train_features = np.append(train_features, new_features, axis=0)
    train_labels = np.append(train_labels, new_labels, axis=0)

# Normalize features
print('Normalizing features...')
train_features = train_features / 255. * 0.8 + 0.1

# Get randomized datasets for training and validation
print('Randomizing datasets...')
from sklearn.model_selection import train_test_split
train_features, valid_features, train_labels, valid_labels = train_test_split(
   train_features,
   train_labels,
   test_size=0.2,
   random_state=832289
)

print('Data preprocessed')

# Features and Labels

In [None]:
x = tf.placeholder(tf.float32, (None, 32, 32, 3))
y = tf.placeholder(tf.int32, (None))
one_hot_y = tf.one_hot(y, 43)

# Training Pipeline

In [None]:
print('Creating network architecture...')
import tensorflow as tf
# Input dimensions
image_width = len(train_features[0][0])
image_height = len(train_features[0])
color_channels = len(train_features[0][0][0])

# Convolutional layer patch and output size
filter_width = 3
filter_height = 3
conv_k_output = 128

# Dimension parameters for each fully connected layer
fc_params = [
    image_width * image_height * conv_k_output,
    1024,
    1024,
    n_classes
]

# Build weights and biases
conv2d_weight = None
conv2d_bias = None
fc_weights = []
fc_biases = []

with tf.variable_scope('BONHOMME', reuse=False):
    conv2d_weight = tf.get_variable("conv2w", shape=[filter_width, filter_height, color_channels, conv_k_output], initializer=tf.contrib.layers.xavier_initializer())
    conv2d_bias = tf.get_variable("conv2b", shape=[conv_k_output], initializer=tf.contrib.layers.xavier_initializer())
    
    for i in range(len(fc_params) - 1):
        fc_weights.append(tf.get_variable('fc_weight' + str(i), shape=[fc_params[i], fc_params[i + 1]], initializer=tf.contrib.layers.xavier_initializer()))
        fc_biases.append(tf.get_variable('fc_bias' + str(i), shape=[fc_params[i + 1]], initializer=tf.contrib.layers.xavier_initializer()))

# One-hot encoded training and validation labels
oh_train_labels = tf.one_hot(train_labels, n_classes).eval(session=tf.Session())
oh_valid_labels = tf.one_hot(valid_labels, n_classes).eval(session=tf.Session())

# Input placeholders
input_ph = tf.placeholder(tf.float32, shape=[None, image_width, image_height, color_channels])
labels_ph = tf.placeholder(tf.float32)

# Convolutional layer
network = tf.nn.conv2d(input_ph, conv2d_weight, strides=[1, 1, 1, 1], padding='SAME')
network = tf.nn.bias_add(network, conv2d_bias)
network = tf.nn.relu(network)

# Fully connected layers
for i in range(len(fc_weights)):
    network = tf.matmul(tf.contrib.layers.flatten(network), fc_weights[i]) + fc_biases[i]
    if i < len(fc_weights) - 1: # No relu after last FC layer
        network = tf.nn.relu(network)

# Loss computation
prediction = tf.nn.softmax(network)
cross_entropy = -tf.reduce_sum(labels_ph * tf.log(prediction + 1e-6), reduction_indices=1)
loss = tf.reduce_mean(cross_entropy)

# Accuracy computation
is_correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(labels_ph, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct_prediction, tf.float32))

print('Network architecture created')

In [None]:
test_features = np.array(test_features) / 255 * 0.8 + 0.1
oh_test_labels = tf.one_hot(test_labels, n_classes).eval(session=tf.Session())
print('Test label one hot encoded')

In [None]:
batch_size = 128

def run_batch(session, network, features, labels):
    batch_count = int(len(features) / batch_size)
    accuracy = 0
    
    for i in range(batch_count):
        batch_start = i * batch_size
        accuracy += session.run(
            network,
            feed_dict={
                input_ph: features[batch_start:batch_start + batch_size],
                labels_ph: labels[batch_start:batch_start + batch_size]
            }
        )
    
    return accuracy / batch_count

print('Run batch function created')

In [None]:
from tqdm import tqdm

training_epochs = 50
optimizer = tf.train.AdamOptimizer().minimize(loss)

log_batch_step = 50
batches = []
loss_batch = []
train_acc_batch = []
valid_acc_batch = []
validation_accuracy = 0.0

init = tf.global_variables_initializer()

session = tf.Session(config=tf.ConfigProto(log_device_placement=True))
session.run(init)
batch_count = int(len(train_features) / batch_size)

for epoch in range(training_epochs):
    batches_pbar = tqdm(range(batch_count), desc='Epoch {:>2}/{}'.format(epoch + 1, training_epochs), unit='batches')

    # The training cycle
    for batch_i in batches_pbar:
        batch_start = batch_i * batch_size
        batch_features = train_features[batch_start:batch_start + batch_size]
        batch_labels = oh_train_labels[batch_start:batch_start + batch_size]

        _, l = session.run(
            [optimizer, loss],
            feed_dict={input_ph: batch_features, labels_ph: batch_labels})

        if not batch_i % log_batch_step:
            training_accuracy = session.run(
                accuracy,
                feed_dict={input_ph: batch_features, labels_ph: batch_labels}
            )

            idx = np.random.randint(len(valid_features), size=int(batch_size * .2))

            validation_accuracy = session.run(
                accuracy,
                feed_dict={input_ph: valid_features[idx,:], labels_ph: oh_valid_labels[idx,:]}
            )

            # Log batches
            previous_batch = batches[-1] if batches else 0
            batches.append(log_batch_step + previous_batch)
            loss_batch.append(l)
            train_acc_batch.append(training_accuracy)
            valid_acc_batch.append(validation_accuracy)


validation_accuracy = run_batch(session, accuracy, valid_features, oh_valid_labels)    
test_accuracy = run_batch(session, accuracy, test_features, oh_test_labels)

print('Final validation accuracy: ', validation_accuracy)
print('Final test accuracy: ', test_accuracy)
loss_plot = plt.subplot(211)
loss_plot.set_title('Loss')
loss_plot.plot(batches, loss_batch, 'g')
loss_plot.set_xlim([batches[0], batches[-1]])
acc_plot = plt.subplot(212)
acc_plot.set_title('Accuracy')
acc_plot.plot(batches, train_acc_batch, 'r', label='Training Accuracy')
acc_plot.plot(batches, valid_acc_batch, 'b', label='Validation Accuracy')
acc_plot.set_ylim([0, 1.0])
acc_plot.set_xlim([batches[0], batches[-1]])
acc_plot.legend(loc=4)
plt.tight_layout()
plt.show()

In [None]:
test_batch_size = 250
y_pred_cls = tf.argmax(prediction, dimension=1)
test_cls = np.argmax(oh_test_labels, axis=1)
from pylab import rcParams
from sklearn.metrics import confusion_matrix

img_shape = (32, 32, 3)
def plot_images(images, cls_true, cls_pred=None):
    assert len(images) == len(cls_true) == 9
    
    fig, axes = plt.subplots(3, 3)
    fig.subplots_adjust(hspace=0.3, wspace=0.3)

    for i, ax in enumerate(axes.flat):
        ax.imshow(images[i].reshape(img_shape), cmap='binary')
        if cls_pred is None:
            xlabel = "True: {0}".format(np.argmax(cls_true[i]))
        else:
            xlabel = "True: {0}, Pred: {1}".format(np.argmax(cls_true[i]), np.argmax(cls_pred[i]))
        ax.set_xlabel(xlabel)
        ax.set_xticks([])
        ax.set_yticks([])
    
    plt.show()
    
def plot_confusion_matrix(cls_pred):
    cm = confusion_matrix(y_true=test_cls,
                          y_pred=cls_pred)

    plt.figure(figsize=(40,40))
    rcParams['figure.figsize'] = 13, 13
    plt.matshow(cm)
    plt.colorbar()
    tick_marks = np.arange(n_classes)
    plt.xticks(tick_marks, range(n_classes))
    plt.yticks(tick_marks, range(n_classes))
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()
    

def plot_example_errors(cls_pred, correct):

    incorrect = (correct == False)    
    images = test_features[incorrect]    
    cls_pred = cls_pred[incorrect]
    cls_true = test_cls[incorrect]    
    plot_images(images=images[0:9],
                cls_true=cls_true[0:9],
                cls_pred=cls_pred[0:9])
    
def print_test_accuracy(show_example_errors=False,
                        show_confusion_matrix=False):

    num_test = len(test_features)
    cls_pred = np.zeros(shape=num_test, dtype=np.int)
    i = 0

    while i < num_test:
        j = min(i + test_batch_size, num_test)

        batch_features = test_features[i:j]
        batch_labels = oh_test_labels[i:j]
        
        feed_dict={input_ph: batch_features, labels_ph: batch_labels}

        cls_pred[i:j] = session.run(y_pred_cls, feed_dict=feed_dict)
        i = j

    correct = (test_cls == cls_pred)
    correct_sum = correct.sum()

    acc = float(correct_sum) / num_test

    msg = "Accuracy on Test-Set: {0:.1%} ({1} / {2})"
    print(msg.format(acc, correct_sum, num_test))

    if show_example_errors:
        print("Example errors:")
        plot_example_errors(cls_pred=cls_pred, correct=correct)

    if show_confusion_matrix:
        print("Confusion Matrix:")
        plot_confusion_matrix(cls_pred=cls_pred)

print_test_accuracy(show_example_errors=False, show_confusion_matrix=True)

# Test the Model on New Images

In [None]:
### Load the images and plot them here.
### Feel free to use as many code cells as needed.
import matplotlib.image as mpimg
import os, sys

imgs = ['50SpeedLimit', 'SnowCaution.png', 'childrenCrossing.png', 'hochwasser.png', 'priority.png']

new_input = []

for imgname in imgs:
    image = mpimg.imread('images/' + imgname)
    new_input.append(image)
    plt.imshow(image)
    plt.show()

In [None]:
### Run the predictions here
### Feel free to use as many code cells as needed.

new_predictions = session.run(prediction, feed_dict={input_ph: new_input})

In [None]:
### Visualize the softmax probabilities here.
### Feel free to use as many code cells as needed
print(new_predictions)

In [None]:
print(session.run(tf.nn.top_k(prediction, 2), feed_dict={input_ph: new_input}))

# Question 1

Question 1

Describe the techniques used to preprocess the data

Answer:
The images were normalized in all three RGB channels ranging from 0 to 255. We create a batch so it restricts memory limitations so it doesn't work on the entire data set at the same time. This is done during the test and the testing.
We normalize so the gradients are manageable. If we have each number ranging from 0 to 255 as it would in full RBG it would require a large amount of numbers in the weight matrices. If we normalize the data set the issue can be avoided.


# Question 2

I took 20% of the training data as validation data. Based on some of the test results I got from tweaking the validation data 20% seemed like a good number to not overfit the data. Until I was satisfied with the results I didn't use the testing data.

# Question 3

The first layer is a Convolutional Neural Network with a patch size of 3x3, a stride of 1, SAME padding and a depth of 64.
The second and third layers are fully connected layers with a width of 512.
The final layer is a fully connected layer with a width of 43 (the amount of classes)
I used the LeNet lab as a model for the architecture while applying the modifications I learned from the lessons.

# Question 4

I used the AdamOptimizer with a learning rate of 0.001
I used a batch size of 250 and 50 training epochs.
After numerous tests, the learning rate seemed to learn fairly quickly and it was enough to avoid getting stuck in a local minimum.

# Question 5

I ended up using a decent sized network due to the simplicity of the signs. I at first tried a larger network but the results ended up roughly the same but the computation time was drastically higher. I used a low amount of epochs because the results did not vary enough after about 30-40 epochs. The results are satisfying but the computation time was quite long even on my Nvidia GTX 1080.

# Question 6

I wanted to choose signs that I felt were very important and very common. The ice warning sign is a very important one because we are currently facing issues with developing safety precautions for driving in heavy snow. I also chose a childrens crossing sign because children could be not paying attention and cross the street at any given moment so being able to identify this sign and have the car drive cautiously is very important.