<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Load-the-Data" data-toc-modified-id="Load-the-Data-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Load the Data</a></span><ul class="toc-item"><li><span><a href="#Download-data-(if-needed)" data-toc-modified-id="Download-data-(if-needed)-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Download data (if needed)</a></span></li><li><span><a href="#Read-in-log-file" data-toc-modified-id="Read-in-log-file-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Read in log file</a></span></li><li><span><a href="#Read-in-images-by-path-from-log-file" data-toc-modified-id="Read-in-images-by-path-from-log-file-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Read in images by path from log file</a></span></li><li><span><a href="#Split-the-data-into-training-and-validation" data-toc-modified-id="Split-the-data-into-training-and-validation-1.4"><span class="toc-item-num">1.4&nbsp;&nbsp;</span>Split the data into training and validation</a></span></li></ul></li><li><span><a href="#Data-Augmentation" data-toc-modified-id="Data-Augmentation-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Data Augmentation</a></span><ul class="toc-item"><li><span><a href="#Attempt-w/-ImageDataGenerator" data-toc-modified-id="Attempt-w/-ImageDataGenerator-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Attempt w/ <code>ImageDataGenerator</code></a></span></li><li><span><a href="#Define-own-generator" data-toc-modified-id="Define-own-generator-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Define own generator</a></span></li></ul></li><li><span><a href="#Model" data-toc-modified-id="Model-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Model</a></span><ul class="toc-item"><li><span><a href="#Using-center-images-only" data-toc-modified-id="Using-center-images-only-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Using center images only</a></span><ul class="toc-item"><li><span><a href="#Evaluation" data-toc-modified-id="Evaluation-3.1.1"><span class="toc-item-num">3.1.1&nbsp;&nbsp;</span>Evaluation</a></span></li></ul></li></ul></li></ul></div>

In [1]:
import pandas as pd
import numpy as np
import cv2
import tensorflow.keras as keras

# Load the Data

We'll load the log data & also load the images (found in the log file).

## Download data (if needed)

In [2]:
# Download data & unzip if it doesn't already exist
import os.path
from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile

In [3]:
def load_ext_file(data_zip_url, data_path='data/'):
    '''Download the zip file from URL and extract it to path (if specified).
    '''
    # Check if path already exits
    if not os.path.exists(data_path):
        with urlopen(data_zip_url) as zip_resp:
            with ZipFile(BytesIO(zip_resp.read())) as zfile:
                # Extract files into the data directory
                zfile.extractall(path=None)
       

In [4]:
# Zip file contains the "data" and "__MACOSX" directories
load_ext_file(
    'https://d17h27t6h515a5.cloudfront.net/topher/2016/December/584f6edd_data/data.zip',
    data_path='data/'
)

## Read in log file

In [5]:
def create_img_meas_dfs(log_csv, data_dir=None, orig_dir=None, skiprows=None):
    '''Creates DataFrames for the image paths and measurements using CSV path.
    
    Returns tuple of two DataFrames.
    '''
    data_header = [
        'image_center',
        'image_left',
        'image_right',
        'steer_angle', # [-1,1]
        'throttle', # boolen (if accelerating)
        'break', # boolean (if breaking)
        'speed' # mph
    ]

    df = pd.read_csv(
        log_csv,
        names=data_header,
        skiprows=skiprows
    )

    # Replace the original directory from dataset (if specified)
    if orig_dir and data_dir:
        for col in ['image_center','image_left','image_right']:
            df[col] = df[col].str.replace(orig_dir,data_dir)
    
    # Get specifics for each DF
    df_img_paths = df.iloc[:,:3]
    df_measurments = df.iloc[:,3:]
    
    return df_img_paths,df_measurments

In [6]:
df_imgs, df_meas = create_img_meas_dfs(log_csv='data/driving_log.csv',skiprows=1)

display(df_imgs.head())

print('Stats for measurements:')
display(df_meas.describe())

Unnamed: 0,image_center,image_left,image_right
0,IMG/center_2016_12_01_13_30_48_287.jpg,IMG/left_2016_12_01_13_30_48_287.jpg,IMG/right_2016_12_01_13_30_48_287.jpg
1,IMG/center_2016_12_01_13_30_48_404.jpg,IMG/left_2016_12_01_13_30_48_404.jpg,IMG/right_2016_12_01_13_30_48_404.jpg
2,IMG/center_2016_12_01_13_31_12_937.jpg,IMG/left_2016_12_01_13_31_12_937.jpg,IMG/right_2016_12_01_13_31_12_937.jpg
3,IMG/center_2016_12_01_13_31_13_037.jpg,IMG/left_2016_12_01_13_31_13_037.jpg,IMG/right_2016_12_01_13_31_13_037.jpg
4,IMG/center_2016_12_01_13_31_13_177.jpg,IMG/left_2016_12_01_13_31_13_177.jpg,IMG/right_2016_12_01_13_31_13_177.jpg


Stats for measurements:


Unnamed: 0,steer_angle,throttle,break,speed
count,8036.0,8036.0,8036.0,8036.0
mean,0.00407,0.86966,0.00197,28.169839
std,0.12884,0.301326,0.036565,6.149327
min,-0.942695,0.0,0.0,0.50249
25%,0.0,0.985533,0.0,30.183093
50%,0.0,0.985533,0.0,30.1864
75%,0.0,0.985533,0.0,30.18664
max,1.0,0.985533,1.0,30.70936


## Read in images by path from log file

In [7]:
center_images = []
left_images = []
right_images = []

# TODO: Fix since this is a slow and inefficient process
# Iterate over the different images
data_dir = 'data/'
for index,row in df_imgs.iterrows():
    center_images.append(cv2.imread(data_dir+row['image_center'].strip()))
    left_images.append(cv2.imread(data_dir+row['image_left'].strip()))
    right_images.append(cv2.imread(data_dir+row['image_right'].strip()))

In [None]:
# Test to see if image reading works
import matplotlib.pyplot as plt

f = plt.figure(figsize=(25,25))
ax_left = f.add_subplot(1, 3, 1)
ax_center = f.add_subplot(1, 3, 2)
ax_right = f.add_subplot(1, 3, 3)

# Print out an image example for each image type
ax_center.imshow(images.get('center')[0])
ax_left.imshow(images.get('left')[0])
ax_right.imshow(images.get('right')[0])

## Split the data into training and validation

Since we are splitting the data, we want to keep all the images (left, right,center) from the same timestamp.

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
# Creates (5th rank) tensor so multiple images as one data point
X = np.array((left_images, center_images, right_images))
X = np.transpose(X, (1,0,2,3,4))

In [12]:
y = df_meas['steer_angle'].values.reshape(-1,1)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(
                                        X, y, test_size=0.2, random_state=27)

# Data Augmentation

We can do some data augmentation to the images to have more variety in the training material. We'll just do a horizontal flip here and only use the center images.

## Attempt w/ `ImageDataGenerator`

In [14]:
datagen = keras.preprocessing.image.ImageDataGenerator(
    horizontal_flip=True, validation_split=0.2)

In [15]:
# Only looking at center images 
# TODO: 
# Known issue for regression problems: https://github.com/keras-team/keras-preprocessing/issues/214
# train_generator = datagen.flow(
#     X_train[:,1,:], y=y_train, batch_size=64, shuffle=True, seed=27, subset='training'
# )
# valid_generator = datagen.flow(
#     X_valid[:,1,:], y=y_valid, batch_size=64, shuffle=True, seed=27, subset='validation'
# )

ValueError: Training and validation subsets have different number of classes after the split. If your numpy arrays are sorted by the label, you might want to shuffle them.

# Model

## Using center images only

We'll try just using center images for training the model. If we simply put in the left and right images for the camera angle, we'd likely have issues with the model learning incorrect behavior. There are some techniques that could allow us to use these other images but for simplicity's sake we'll only use the center images for now.

In [None]:
# Creating a resuable default convolution
from functools import partial
DefaultConv2D = partial(keras.layers.Conv2D, kernel_initializer='he_normal',
                        kernel_size=3, activation='relu', padding='SAME')

In [None]:
input_shape = X_train[0].shape

In [None]:
# Based on https://developer.nvidia.com/blog/deep-learning-self-driving-cars/
model_list = [
    # Normalize the images
    keras.layers.Lambda(lambda x: (x/255.0) - 0.5, input_shape=input_shape),
    DefaultConv2D(filters=24, kernel_size=5),
    keras.layers.MaxPooling2D(pool_size=2),
    DefaultConv2D(filters=36, kernel_size=5),
    keras.layers.MaxPooling2D(pool_size=2), 
    DefaultConv2D(filters=48),
    keras.layers.MaxPooling2D(pool_size=2),
    DefaultConv2D(filters=64),
    keras.layers.MaxPooling2D(pool_size=2),
    # Fully connected network
    keras.layers.Flatten(),
    keras.layers.Dense(units=1024, activation='relu'),
#     keras.layers.Dropout(0.2),  # Dropout to regularize
    keras.layers.Dense(units=128, activation='relu'),
#     keras.layers.Dropout(0.2),  # Dropout to regularize
    keras.layers.Dense(units=64, activation='relu'),
#     keras.layers.Dropout(0.2),  # Dropout to regularize
    keras.layers.Dense(units=16, activation='relu'),
    keras.layers.Dense(units=1)
]

In [None]:
# Adding in model to crop images first
model_list = (
    [model_list[0]] +
    # Crop out "unnessary images"
    [keras.layers.Cropping2D(cropping=((60,20), (0,0)))] +
    model_list[1:]
)

In [None]:
model = keras.models.Sequential(model_list)

In [None]:
model.compile(
    loss='mse', 
    optimizer='nadam'
)

In [None]:
model.summary()

In [None]:
# Allow early stopping after not changing significantly
stop_after_5_no_change = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    X=train_generator,
    y=None, # Since using a generator
    batch_size=None, # Since using a generator
    steps_per_epoch=2000,
    epochs=30,
    validation_data=valid_generator,
    validation_steps=800,
    callbacks=[stop_after_5_no_change]
)

### Evaluation

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline 

def eval_model(model, model_history, X, y, show=True):
    '''
    '''
    score = model.evaluate(X, y)
    print(f'Loss: {score:.2f}')

    if show:
        plt.plot(model_history.history['loss'], label='Loss (training data)')
        plt.plot(model_history.history['val_loss'], label='Loss (validation data)')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(loc='upper right')
        plt.show()

Let's checkout how the previous model turned while training.

In [None]:
eval_model(model, history, X, y)

In [None]:
model.save('model.h5')