### TODO:
- Look into using `Dataset` object for the datasets [helpful link](https://www.tensorflow.org/guide/data).
- Maybe use test set as well, rather than just testing it physically.

In [1]:
import tensorflow as tf

physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

from tensorflow.keras import layers

import numpy as np
import matplotlib.pyplot as plt
import random

# Some parameters 

In [2]:
MODELS = ["forward", "left", "right"]  # Also represents the class names.
MODEL_I = 0  # Model index, determines which module to train, hence which data to use, (chosen from MODELS).

DATA_PATH = f"data/{MODELS[MODEL_I]}_model_data"


VALIDATION_SPLIT = 0.1
IMGS_SHAPE = (240, 320, 3)
BATCH_SIZE = 64  # Needs to be even, because half will be original data, half augmented data.
SEED_NUM = 2


# Data augmentation parameters
ROT_RANGE = 10
BRIGHT_MIN = 0.2
BRIGHT_MAX = 1.5
HORI_FLIP = True


# General setup
tf.random.set_seed(SEED_NUM)
np.random.seed(SEED_NUM)
random.seed(SEED_NUM)

# Data sequence class to setup the generators and manage custom batches

In [3]:
class DataSequence(tf.keras.utils.Sequence):
    
    def __init__(self, validation_split, img_shape, batch_size):
        self.validation_split = validation_split
        self.img_shape = img_shape
        self.batch_size = batch_size
    
    
    # Creating the training and validation generators
    def create_train_val_gens(self, steering_frames, data_path, seed_num):
        img_gen = tf.keras.preprocessing.image.ImageDataGenerator(validation_split=self.validation_split,
                                                                  rescale=1./255)
        
        img_gen.fit(steering_frames)
        
        self.train_data_flow = img_gen.flow_from_directory(directory=data_path,
                                                      target_size=(self.img_shape[0], self.img_shape[1]),
                                                      batch_size=self.batch_size // 2, #  /2 because half will be augmented.
                                                      seed=seed_num,
                                                      subset="training")


        self.val_data_flow = img_gen.flow_from_directory(directory=data_path,
                                                    target_size=(self.img_shape[0], self.img_shape[1]),
                                                    batch_size=self.batch_size,
                                                    seed=seed_num,
                                                    subset="validation")
    
    # Creating the generator for data augmentation.
    def create_aug_gen(self, steering_frames, rot_range, bright_min, bright_max, hori_flip):
        # Note that I am using the original data and data augmented data, because this works best for
        # behavioral cloning tasks.
        self.datagen = tf.keras.preprocessing.image.ImageDataGenerator(
            rescale=1./255,
            rotation_range=rot_range,
            brightness_range=(bright_min, bright_max),
            horizontal_flip=hori_flip
        )

        self.datagen.fit(steering_frames)
    
    def __len__(self):
        return self.train_data_flow.samples // self.batch_size
    
    def __getitem__(self, idx):
        
        batch_half1_org = self.train_data_flow.next()  # Getting batch_size/2 samples from original data.
        
        # Getting the other half from the augmentation generator.
        datagen_flow = self.datagen.flow(batch_half1_org[0], batch_half1_org[1], batch_size=self.batch_size // 2)
        batch_half2_aug = datagen_flow.next()
        
        x_batch = np.vstack((batch_half1_org[0], batch_half2_aug[0]))
        y_batch = np.vstack((batch_half1_org[1], batch_half2_aug[1]))
        
        return (x_batch, y_batch)


        
        

In [4]:
data_seq = DataSequence(VALIDATION_SPLIT, IMGS_SHAPE, BATCH_SIZE)

# This is loaded/needed so I can use the training data to fit the generators.
with open(f"data/steering_frames_{MODELS[MODEL_I]}.npy", "rb") as file:
    steering_frames = np.load(file)  # This will be deleted from memory later in this cell.

data_seq.create_train_val_gens(steering_frames, DATA_PATH, SEED_NUM)
data_seq.create_aug_gen(steering_frames, ROT_RANGE, BRIGHT_MIN, BRIGHT_MAX, HORI_FLIP)


del steering_frames  # No longer needed, deleting from memory.


# Just getting a small sample f the training data to visualise and test the data augmentation with.
x_sample_batch, y_sample_batch = data_seq.train_data_flow.next()

print("Temp samples:", x_sample_batch.shape, y_sample_batch.shape)

Found 6477 images belonging to 3 classes.
Found 718 images belonging to 3 classes.
Temp samples: (32, 240, 320, 3) (32, 3)


# Visualising the data 

In [None]:
# To plot images in a grid
def plot_imgs(images, labels, rows=3, cols=3, fig_w=15, fig_h=10):
    fig, axis = plt.subplots(rows, cols, figsize=(fig_w, fig_h))
    fig.tight_layout()

    sample_index = 0
    for row in range(rows):
        for col in range(cols):
            img = images[sample_index]
            img = img * 255
            img = img.astype(int)
            
            label = labels[sample_index]
            
            sample_index += 1

            ax = axis[row, col]
            ax.set_title(str(label))
            ax.imshow(img)
            ax.axis("off")
        

In [None]:
plot_imgs(x_sample_batch, y_sample_batch)

# Testing data augmentation

### Rotation 

In [None]:
datagen_rot = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=ROT_RANGE,
)

data_flow = datagen_rot.flow(x_sample_batch, y_sample_batch, batch_size=32)
plot_imgs(data_flow.next()[0], data_flow.next()[1])

### Brightness

In [None]:
datagen_bright = tf.keras.preprocessing.image.ImageDataGenerator(
    brightness_range=(BRIGHT_MIN, BRIGHT_MAX),
    rescale=1./255
)

data_flow = datagen_bright.flow(x_sample_batch, y_sample_batch, batch_size=32)
plot_imgs(data_flow.next()[0], data_flow.next()[1])

###  Horizontal Flipping

In [None]:
datagen_flip = tf.keras.preprocessing.image.ImageDataGenerator(
    horizontal_flip=HORI_FLIP,
)

data_flow = datagen_flip.flow(x_sample_batch, y_sample_batch, batch_size=32)
plot_imgs(data_flow.next()[0], data_flow.next()[1])

# Testing/visualising all data augmentations together 

In [None]:
data_aug_data = data_seq.datagen.flow(x_sample_batch, y_sample_batch, batch_size=32).next()

aug_batch_imgs = data_aug_data[0]
aug_batch_labels = data_aug_data[1]

print("Augmented batch:", aug_batch_imgs.shape, aug_batch_labels.shape)

plot_imgs(aug_batch_imgs, aug_batch_labels, rows=5, fig_h=20)


# Creating the model

In [5]:
# Please note this is currently just a dummy model, just to test.

inputs = tf.keras.Input(shape=IMGS_SHAPE)

x = layers.Conv2D(24, (5,5), padding="same", strides=2, activation="elu")(inputs)  # org
x = layers.Conv2D(24, (5,5), strides=6, activation="elu")(x)
x = layers.Conv2D(36, (5,5), strides=6, activation="elu")(x)
# x = layers.Conv2D(48, (5,5), strides=2, activation="elu")(x)
# x = layers.Conv2D(64, (3,3), activation="elu")(x)
# x = layers.Conv2D(64, (3,3), activation="elu")(x)

x = layers.Flatten()(x)

x = layers.Dense(100, activation="elu")(x)

# x = layers.Dense(50, activation="elu")(x)
# x = layers.Dense(10, activation="elu")(x)
outputs = layers.Dense(3, activation="softmax")(x)

optimizer = tf.keras.optimizers.Adam(lr=1e-3)

model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy"])
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 240, 320, 3)]     0         
_________________________________________________________________
conv2d (Conv2D)              (None, 120, 160, 24)      1824      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 20, 26, 24)        14424     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 4, 36)          21636     
_________________________________________________________________
flatten (Flatten)            (None, 432)               0         
_________________________________________________________________
dense (Dense)                (None, 100)               43300     
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 303   

# Model training 

In [None]:
EPOCHS = 2

history = model.fit(data_seq,
                    validation_data = data_seq.val_data_flow,
                    validation_steps = data_seq.val_data_flow.samples // BATCH_SIZE,
                    epochs = EPOCHS)

In [None]:
# For custom epoch step:

# for e in range(EPOCHS):
#     print("----------------------------------------Epoch", e, ":")
    
#     batches = 0
    
#     for x_batch, y_batch in data_seq.train_data_flow:
#         model.fit(x=x_batch, y=y_batch,
#                   steps_per_epoch = 1,
#                   batch_size = x_batch.shape[0],
#                   validation_data = data_seq.val_data_flow,
#                   validation_steps = 1,
#                   verbose=2)
        
#         batches += 1
        
#         if batches >= data_seq.train_data_flow.samples // BATCH_SIZE:
#             break