# Import

Import necessary libraries

In [None]:
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import shuffle
import tensorflow as tf

# Get Data

In [None]:
# define the path to training, testing, and validation data

training_file = "traffic_data/train.p"
testing_file="traffic_data/test.p"

# read data into variables
with open(training_file, mode='rb') as f:
    train = pickle.load(f)
with open(testing_file, mode='rb') as f:
    test = pickle.load(f)
    
# an input image will be of shape [32, 32, 3]. Width = 32, Height = 32, and Channels = 3
x_train, y_train = np.array(train['features']), np.array(train['labels']) 
x_test, y_test = np.array(test['features']), np.array(test['labels'])


# Preprocess Data

Obtain validation data set from training data set.Further preprocessing on images can be done here, as well. Including image normalization, color channel changes (to YUV, HSV, or Gray), and inverting input images.

In [None]:
# make 20% of the training dataset part of the validation data set.
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, 
                                                      test_size = 0.2, 
                                                      random_state = 0)

# Define Tensorflow Variables & Placeholders

In [None]:
# define how many times the model will train on the training set.
EPOCHS = 100

# define how many images to run through the model at once.
BATCH_SIZE = 128

# total number of images in training set
NUM_EXAMPLES = len(x_train)

# TensorFlow variables.

# Theses variables will be updated at run time by populating feed_dict

# x_image will hold the traffic sign images to train on. "None" tells 
# tensorflow to example any size for this dimension. In this case,
# None corresponds to the BATCH_SIZE
x_image = tf.placeholder(tf.float32, shape=[None, 32, 32, 3])

# y_ will hold the labels for the corresponding images in x_image.
y_ = tf.placeholder(tf.int32, shape=[None])

# y_ is an array of integers from 0 to 42. In order for the network to optimize itself
# (discussed below), the labels array must be one-hot encoded. Meaning each entry in
# y_ will be replaced with a vector that contains all zeros except at the index that 
# corresponds to the value of y_[i]. For example, if there were 43 classes and y_[i]
# had a value of 3, the resulting one-hot encoding would be [0, 0, 0, 1, 0, 0, ..., 0 ,0]
# where the "..." held 0s.
one_hot_y = tf.one_hot(y_, 43)

# define the probability that a neruons output is kept.
# this variable relates to the technique of Drop Out, which aims to prevent the network
# from overfitting on the training dataset.
# By setting this variable as a placeholder, I can control it when evaluating network
# performance and when testing network performance.
keep_prob = tf.placeholder(tf.float32)

# Convolutional Neural Network

In [None]:
# This cell defines the architecture of the Convolutional Network
# It is comprised of two convolutional layers and two fully connected layers.
# The fully connected layers implement Drop Out to reduce overfitting.

# Layer 1
# input: [32, 32, 3]
# output: [16, 16, 32]

# Define the weights of the first convolutional layer. Weights are pulled from a normal
# distribution with 0 mean and 0.1 for standard deviation.
# These weights are changed during the training phase of the model and cause the 
# network to "learn" how to recognize German Traffic Signs.
W_conv1 = tf.Variable(tf.truncated_normal(shape=[5, 5, 3, 32], stddev=0.1))

# Define the bias to be a vector of values = 0.1. Like the weights, the bias is also
# updated to help the network learn how to classify German Traffic Signs.
b_conv1 = tf.Variable(tf.constant(0.1, shape=[32]))

# tf.nn.conv2d will convolve the weights with the input image. When complete, the bias is added
# and this addition is sent to relu activation units.
conv1 = tf.nn.relu(tf.nn.conv2d(x_image, W_conv1, strides=[1,1,1,1], padding='SAME') + b_conv1)

# the output of the Rectified Linear Activation units is then fed into a max pooling layer with
# a size of 2x2. Meaning that within a 2x2 square, only the maximum value will be kept and fed
# onward into the network.
conv1 = tf.nn.max_pool(conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

# Layer 2
# input: [16, 16, 32]
# output: [8, 8, 64]

W_conv2 = tf.Variable(tf.truncated_normal(shape=[5, 5, 32, 64], stddev=0.1))
b_conv2 = tf.Variable(tf.constant(0.1, shape=[64]))
conv2 = tf.nn.relu(tf.nn.conv2d(conv1, W_conv2, strides=[1,1,1,1], padding='SAME'))
conv2 = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

# Layer 3 (fully connected)
# input: [8, 8, 64]
# output: 1024

# The convolutional layer conv2 is a 4D tensor, with Width, Height, and Depth. This tensor
# must be flattened prior to entering the fully connected layer. 

# reshape conv2
fc0 = tf.reshape(conv2, [-1, 8 * 8 * 64])

W_fc1 = tf.Variable(tf.truncated_normal(shape=[8 * 8 * 64, 1024], stddev=0.1))
b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024]))
fc1 = tf.nn.relu(tf.matmul(fc0, W_fc1) + b_fc1)

# Dropout is implemented here. keep_probability defines the probability of a neurons layer being
# kept. The choosing of which neuron to drop is random. Since dropout of a neuron is random,
# this fully connected layer will literally take a unique shape for every iteration. Different
# shapes allow for better generationalizion and less likely to overfit the training images.
fc1_drop = tf.nn.dropout(fc1, keep_prob=keep_prob)


# Layer 4 (output layer; softmax layer)
# the output must correspond to the number of classes in this dataset.
W_fc2 = tf.Variable(tf.truncated_normal(shape=[1024, 43], stddev=0.1))
b_fc2 = tf.Variable(tf.constant(0.1, shape=[43]))
y_conv = tf.matmul(fc1_drop, W_fc2) + b_fc2


# Loss & Optimization

In [None]:
# Softmax will assign probabilities to the input image belonging to one of the 43 classes.
# Softmax will exponentialize its input and then normalize them, which forms a valid probability 
# distribution. 

# Cross-Entropy is used to measurement how badly the model is performing. This is called loss -- how far
# off the model is from the desired outcome. Cross-Entropy is a very common loss function. 

# Tensorflow has a very efficient function that will obtain the softmax of the output and produce the 
# cross_entropy. tf.reduce_mean computes the average over all the examples in the batch.
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_y, logits=y_conv))

# Here, I ask tensorflow to minimize cross_entropy loss through ADAM optimizer. This process
# will use back-propagation to move the weights in the direction of greatest change. 
train_step = tf.train.AdamOptimizer(1e-3).minimize(cross_entropy)

# Here is were the model figures out if it predicted the correct label.
# argmax will return the index of the largest value in the tensors one_hot_y and y_conv.
# recall that in one_hot_y, all but 1 entries were 0. So the index with the value 1 will be
# return (which corresponds to the label). y_conv is a tensor with shape 43. the index
# of the element within y_conv with the greate value will be returned. the index corresponds
# to the network's predicted label.
# tf.equal will compare these indices and return True if they match, or False otherwise
# As such, correct_prediction is a vector of [True, True, False, False, True, False, ...]
correct_prediction = tf.equal(tf.argmax(one_hot_y, 1), tf.argmax(y_conv, 1))

# To obtain accuracy, the model converts these booleans into their corresponding float values;
# either 0 or 1, through tf.cast. Then, tf.reduce_mean returns the average of the vector which
# corresponds to the accuracy of the network.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Model Evaluation

In [None]:
# Evaluate is called only on the validation and testing datasets. Keep probability is set to 1, meaning 
# all neruons are present -- the fully connected layer will not change between successive batches.

def evaluate(x_data, y_data):
    
    num_examples = len(x_data)
    
    total_accuracy = 0
    
    sess = tf.get_default_session()
    
    for offset in range(0, num_examples, BATCH_SIZE):
        
        # obtains a small batch from the larger dataset that is x_data and y_data
        batch_x, batch_y = x_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
        
        # tell tensorflow session to evaluate accuracy. feed_dict will populate tensorflow
        # placeholders and variables with the required data to run.
        # notice how keep_prob is set to 1, indicating a 100% chance that any neruon's output will
        # be kept.
        local_accuracy = sess.run(accuracy, feed_dict={x_image: batch_x, y_: batch_y, keep_prob: 1.0}) 
        
        total_accuracy += (local_accuracy * len(batch_x))
    
    return total_accuracy / num_examples 


# Model Training

In [None]:
# train the model

with tf.Session() as sess:
    
    # intialize variables
    sess.run(tf.global_variables_initializer())
    
    for i in range(EPOCHS):
        
        # shuffle data for every epoch, so the model does not see the same
        # series of images each time.
        x_train, y_train = shuffle(x_train, y_train)
        
        # BATCH_SIZE number of images will be fed into the network at a time.
        # a large batch size results in faster training, but is more computationally
        # intensive.
        for offset in range(0, NUM_EXAMPLES, BATCH_SIZE):
            
            # set the offset
            end = offset + BATCH_SIZE
            
            # obtain our current batches for this iteration
            batch_x, batch_y = x_train[offset:end], y_train[offset:end]
            
            # train, using the ADAM optimizer to update network weights. 
            # 20% of neruons will be dropped.
            train_step.run(feed_dict={x_image: batch_x, y_: batch_y, keep_prob: 0.8})
        
        # when finished training on the training data set, evalute network performance
        # on the validation set. This occurs for every epoch.
        validation_accuracy = evaluate(x_valid, y_valid)
        print('epoch %d, training accuracy %g' % (i+1, validation_accuracy))
    
    # when training is complete, evaluate network performance on the test data set. The network only sees
    # this dataset at the end of training and is an indicator to how well the network generalizes.
    # if the network shows very high accuracy for validation and a low accuracy for testing,
    # there is a good chance the network overfitted, may want to decrease keep_probability.
    print('test accuracy %g' % accuracy.eval(feed_dict={x_image: x_test, y_: y_test, keep_prob: 1.0}))

