# Preprocess data for self driving.

Steps:
1. Load dataset from: https://www.kaggle.com/roydatascience/training-car
2. Make sure you have the folder structure `training-car/self driving car training data`.
2. Run this notebook.

In [None]:
import cv2
import os
import pickle
import csv
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import cv2
import imageio

## Read CSV.

In [None]:
csv_path = "training-car/self driving car training data/data/driving_log.csv"
images_path = "training-car/self driving car training data/data/"

# Open CSV.
csv_lines = []
with open(csv_path, "rt") as file:
    reader = csv.reader(file)
    for csv_line in reader:
        csv_lines.append(csv_line)
    csv_headers = csv_lines.pop(0)
print(csv_headers)  
print(csv_lines[0])

# Method for processing data.

In [None]:
def get_data(csv_lines, preprocess):
    
    # Get the data.
    delta = 0.2
    images = []
    targets = []
    for line_index, line in enumerate(tqdm(csv_lines)):
        for image_index in range(3):
            
            # Retrieve and preprocess the image.
            image_path = line[image_index]
            image_path = os.path.join(images_path, "IMG" + (image_path.split("IMG")[1]).strip())
            image = cv2.imread(image_path, cv2.IMREAD_COLOR)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            # Preprocessing yes or no.
            if preprocess == True:
                image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
                image = image[:, :, 1]
                image = cv2.resize(image, (40, 40))
            images.append(image)
            
            # Get the target.
            target = float(line[3])
            if image_index == 1:
                target += delta
            elif image_index == 2:
                target -= delta
            targets.append(target)
       
    # Return numpy arrays.
    images = np.array(images).astype("float32")
    targets = np.array(targets).astype("float32")
    return images, targets

## Create an unprocessed dataset.
Note: This is not intended for training.

In [None]:
random_csv_lines = csv_lines[::]
np.random.shuffle(random_csv_lines)
random_csv_lines = random_csv_lines[0:100]

images, targets = get_data(random_csv_lines, preprocess=False)
print(images.shape)
print(targets.shape)

# Save pickle file.
with open("training-car-unprocessed.p", "wb") as file:
    pickle.dump((images, targets), file, protocol=4)

## Create preprocessed dataset.

In [None]:
# Get images and labels.
images, targets = get_data(csv_lines, preprocess=True)
print(images.shape)
print(targets.shape)

# Save pickle file.
with open("training-car.p", "wb") as file:
    pickle.dump((images, targets), file, protocol=4)

# Create an animated GIF.

In [None]:
gif_path = "demo.gif"
duration = 0.1
image_number = 400

images, targets = get_data(csv_lines[:image_number], preprocess=False)

font = cv2.FONT_HERSHEY_SIMPLEX

with imageio.get_writer(gif_path, mode='I', duration=duration) as writer:

    for i in tqdm(range(0, image_number, 3)):
        image = images[i].astype("uint8")
        target = targets[i]
        image = cv2.resize(image, (400, 300), interpolation=cv2.INTER_NEAREST)
        text = "Steering: {:0.2f}".format(target)
        cv2.putText(image, text, (10, 25), font, 1, (255, 255, 255), 2) 

        writer.append_data(image)

print("Done.")