In [19]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [20]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import IPython.display as display
from functools import partial
import matplotlib.pyplot as plt
import glob
import zipfile
from PIL import Image, ImageDraw
from datetime import datetime
from sklearn.model_selection import KFold
import random

In [21]:
is_colab = False

In [22]:
tf.random.set_seed(0)
random.seed(0)


In [23]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("TensorFlow version: ", tf.__version__)

Num GPUs Available:  1
TensorFlow version:  2.15.0


In [24]:
def parse_tfrecord_fn(example_proto):
    # Define the feature description for parsing
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'steering': tf.io.FixedLenFeature([], tf.float32),
    }

    parsed_features = tf.io.parse_single_example(example_proto, feature_description)
    image = tf.image.decode_jpeg(parsed_features['image'], channels=3)
    steering = parsed_features['steering']
    
    # Add random brightness change as data augmentation
    # if random.random() > 0.5:
    #     image = tf.image.random_brightness(image, 0.5)
    # 
    image = tf.cast(image, tf.float32)
    image = image / 255.0

    return image, steering


In [25]:
def load_dataset(tfrecord_files):
    raw_dataset = tf.data.TFRecordDataset(tfrecord_files)
    return raw_dataset.map(parse_tfrecord_fn)

In [26]:
if is_colab:
    from google.colab import drive
    drive.mount('/content/drive')

    if not os.path.exists("datasets"):
        os.makedirs("datasets")
    
    path_to_zip_file = "/content/drive/MyDrive/Colab Notebooks/AD/datasets/dataset_2024-04-0.zip"
    with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
        zip_ref.extractall("./datasets")

    tfrecord_files = list(glob.glob("./datasets/*.tfrecord"))
else:
    tfrecord_files = list(glob.glob("/home/anaya/Develop/autonomous_driving_training/datasets/*.tfrecord"))

In [27]:
def get_model():
    return tf.keras.Sequential([
        # First convolutional layer, input shape specified
        layers.InputLayer((66, 200, 3)),
        layers.Conv2D(24, kernel_size=5, strides=2),
        layers.BatchNormalization(),
        layers.ReLU(),
    
        # Second convolutional layer
        layers.Conv2D(36, kernel_size=5, strides=2),
        layers.BatchNormalization(),
        layers.ReLU(),
    
        # Third convolutional layer
        layers.Conv2D(48, kernel_size=5, strides=2),
        layers.BatchNormalization(),
        layers.ReLU(),
    
        # Fourth convolutional layer
        layers.Conv2D(64, kernel_size=3, strides=1),
        layers.BatchNormalization(),
        layers.ReLU(),
    
        # Fifth convolutional layer
        layers.Conv2D(64, kernel_size=3, strides=1),
        layers.BatchNormalization(),
        layers.ReLU(),
    
        # Flatten the output to feed into the dense layers
        layers.Flatten(),
        layers.Dropout(0.5),
    
        # First fully connected layer
        layers.Dense(1164),
        layers.BatchNormalization(),
        layers.ReLU(),
    
        # Second fully connected layer
        layers.Dense(100),
        layers.BatchNormalization(),
        layers.ReLU(),
    
        # Third fully connected layer
        layers.Dense(50),
        layers.BatchNormalization(),
        layers.ReLU(),
    
        # Fourth fully connected layer
        layers.Dense(10),
        layers.ReLU(),
    
        # Output layer
        layers.Dense(1)
    ])

In [28]:

parsed_dataset = load_dataset(tfrecord_files)
shuffled_dataset = parsed_dataset.shuffle(2040)

# Determine split sizes
total_items = sum([1 for _ in shuffled_dataset.as_numpy_iterator()])

num_folds = 4
fold_size = total_items // num_folds

In [29]:
print("Total items: ", total_items)
print("Fold size: ", fold_size)
print("Number of folds: ", num_folds)

Total items:  49701
Fold size:  12425
Number of folds:  4


In [30]:
# NOTE - Restart experiment from here
tf.random.set_seed(0)
random.seed(0)

# Create an array of datasets for each fold
datasets_for_fold = []

for i in range(num_folds):
    start = i * fold_size
    datasets_for_fold.append(shuffled_dataset.skip(start).take(fold_size))



In [31]:
log_dir = "./logs/cv/" + datetime.now().strftime("%Y%m%d-%H%M%S")

# Only add the loss to the tensorboard callback
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True, write_images=True, update_freq='epoch', profile_batch=2, embeddings_freq=1)

In [32]:
experiments = {
    # "experiment_4": {
    #     "learning_rate": 1e-4,
    #     "weight_decay": 1e-5,
    #     "scheduler": "constant",
    # },
    # "experiment_3": {
    #     "learning_rate": 1e-4,
    #     "weight_decay": 1e-6,
    #     "scheduler": "constant",
    # },
    "experiment_5": {
        "learning_rate": 1e-5,
        "weight_decay": 1e-5,
        "scheduler": "constant",
    },
    "experiment_6": {
        "learning_rate": 1e-5,
        "weight_decay": 1e-6,
        "scheduler": "constant",
    },
    "experiment_7": {
        "learning_rate": 1e-5,
        "weight_decay": 1e-7,
        "scheduler": "constant",
    },
    "experiment_8": {
        "learning_rate": 1e-6,
        "weight_decay": 1e-6,
        "scheduler": "constant",
    },
    "experiment_9": {
        "learning_rate": 1e-5,
        "weight_decay": 1e-5,
        "scheduler": "step",
    },
    "experiment_10": {
        "learning_rate": 1e-5,
        "weight_decay": 1e-6,
        "scheduler": "step",
    },
}

In [33]:
def meta_scheduler(steps, alpha=0.1):
    def scheduler(epoch, lr):
        if isinstance(steps, int):
            if epoch % (steps - 1) == 0 and epoch != 0:
                return lr * alpha
            else:
                return lr
        elif isinstance(steps, list):
            if epoch in steps:
                return lr * alpha
            else:
                return lr
        else:
            raise ValueError("Invalid steps parameter")
    return scheduler

In [34]:
from tensorflow.keras import backend as K

def steering_accuracy(threshold=0.05):
    def SA(y_true, y_pred):
        return K.mean(K.cast(K.less_equal(K.abs(y_true - y_pred), threshold), K.floatx()))
    return SA

In [35]:
# Use experimental.sample_from_datasets to approximate K-Fold

def run_cross_validation(experiment_name: str, experiment_params: dict, batch_size: int = 64):
    for i in range(num_folds):
        tf.random.set_seed(0)
        random.seed(0)
    
        train_datasets = [ds for j, ds in enumerate(datasets_for_fold) if j != i]
         # Concatenate the datasets
        train_dataset = train_datasets[0]
        for ds in train_datasets[1:]:
            train_dataset = train_dataset.concatenate(ds)
        
        # Validation dataset
        val_dataset = datasets_for_fold[i]
        
        model = get_model()
        
        # Compile the model (make sure to specify the loss and optimizer)
        model.compile(
            optimizer=tf.keras.optimizers.Adam(
                learning_rate=experiment_params["learning_rate"],
                weight_decay=experiment_params["weight_decay"]
            ),
            loss=tf.keras.losses.MeanSquaredError(),
            metrics=[steering_accuracy()]
        )
        
        train_early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=4, min_delta=0.0005)
        val_early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, min_delta=0.0005)
        callbacks_list = [tensorboard_callback, train_early_stopping, val_early_stopping]
        
        # Adding scheduler
        if experiment_params["scheduler"] == "step":
            callback_lr_scheduler = tf.keras.callbacks.LearningRateScheduler(meta_scheduler(15, 0.1))
            callbacks_list.append(callback_lr_scheduler)
    
        # Fit the model, add fold number as suffix to the log directory
        print(f"Cross-validation: {experiment_name} on fold {i + 1}")
        print("Params: ", experiment_params)
        tensorboard_callback.log_dir = log_dir + f"/{experiment_name}/fold_{i + 1}"

        model.fit(
            train_dataset.batch(batch_size, drop_remainder=True).prefetch(tf.data.AUTOTUNE),
            validation_data=val_dataset.batch(batch_size, drop_remainder=True).prefetch(tf.data.AUTOTUNE),
            callbacks=callbacks_list,
            epochs=30
        )
       


In [36]:
for experiment_name, experiment_params in experiments.items():
    run_cross_validation(experiment_name, experiment_params, batch_size=40)


Cross-validation: experiment_5 on fold 1
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Cross-validation: experiment_5 on fold 2
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Cross-validation: experiment_5 on fold 3
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoc

KeyboardInterrupt: 

In [None]:
# def scheduler(epoch, lr):
#     if epoch in (4, 10, 15):
#         return lr * 0.5
#     else:
#         return lr
# 
# callback_lr_scheduler = tf.keras.callbacks.LearningRateScheduler(scheduler)

In [None]:
# %load_ext tensorboard

In [None]:
# %tensorboard --logdir ./logs/fit/