In [1]:
import numpy as np
import pandas as pd
import os
import pickle

from sklearn.model_selection import train_test_split

os.environ["KERAS_BACKEND"] = "tensorflow"

import tensorflow as tf

import keras
import keras.backend as K

from tqdm import tqdm

%matplotlib inline

2025-06-28 23:55:14.293921: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751154914.542341      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751154914.609549      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# 4. Regression metrics

In [2]:
@tf.function
def rmse(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    
    y_true = tf.reshape(y_true, [-1, 1])
    y_pred = tf.reshape(y_pred, [-1, 1])
    
    return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))

@tf.function
def eva(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    
    y_true = tf.reshape(y_true, [-1, 1])
    y_pred = tf.reshape(y_pred, [-1, 1])

    numerator = tf.math.reduce_variance(y_true - y_pred)
    denominator = tf.math.reduce_variance(y_true) + tf.keras.backend.epsilon()
    
    return 1.0 - numerator / denominator

@tf.function
def r2_score(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    
    y_true = tf.reshape(y_true, [-1, 1])
    y_pred = tf.reshape(y_pred, [-1, 1])
    
    ss_res = tf.reduce_sum(tf.square(y_true - y_pred))
    ss_tot = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true)))
    
    return 1 - (ss_res / (ss_tot + K.epsilon()))

# 4. ResNet 8/18/34 construction

In [3]:
def resnet_block(x, filters, stride=1, use_projection=False):
    shortcut = x

    x = keras.layers.Conv2D(filters, 3, strides=stride, padding='same', use_bias=False)(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.ReLU()(x)

    x = keras.layers.Conv2D(filters, 3, strides=1, padding='same', use_bias=False)(x)
    x = keras.layers.BatchNormalization()(x)

    if use_projection:
        shortcut = keras.layers.Conv2D(filters, 1, strides=stride, use_bias=False)(shortcut)
        shortcut = keras.layers.BatchNormalization()(shortcut)

    x = keras.layers.Add()([x, shortcut])
    x = keras.layers.ReLU()(x)
    return x

In [4]:
def ResNet8_DroNet(input_shape=(200, 200, 1), dropout_rate=0.3):
    inputs = keras.Input(shape=input_shape)

    x = keras.layers.Conv2D(32, 5, strides=2, padding='same', use_bias=False)(inputs)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.ReLU()(x)

    # Residual Blocks
    x = resnet_block(x, 32)
    x = resnet_block(x, 32)

    x = resnet_block(x, 64, stride=2, use_projection=True)
    x = resnet_block(x, 64)

    x = resnet_block(x, 128, stride=2, use_projection=True)
    x = resnet_block(x, 128)

    return keras.models.Model(inputs, x, name="ResNet8_DroNet")

In [5]:
def ResNet18(input_shape=(224, 224, 3)):
    inputs = keras.Input(shape=input_shape)
    
    x = keras.layers.Conv2D(64, 7, strides=2, padding='same', use_bias=False)(inputs)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.ReLU()(x)
    x = keras.layers.MaxPooling2D(3, strides=2, padding='same')(x)

    # Conv2_x
    x = resnet_block(x, 64)
    x = resnet_block(x, 64)

    # Conv3_x
    x = resnet_block(x, 128, stride=2, use_projection=True)
    x = resnet_block(x, 128)

    # Conv4_x
    x = resnet_block(x, 256, stride=2, use_projection=True)
    x = resnet_block(x, 256)

    # Conv5_x
    x = resnet_block(x, 512, stride=2, use_projection=True)
    x = resnet_block(x, 512)

    return keras.models.Model(inputs, x, name="ResNet18_backbone")

In [6]:
def ResNet34(input_shape=(224, 224, 3)):
    inputs = keras.Input(shape=input_shape)
    
    x = keras.layers.Conv2D(64, 7, strides=2, padding='same', use_bias=False)(inputs)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.ReLU()(x)
    x = keras.layers.MaxPooling2D(3, strides=2, padding='same')(x)

    # Conv2_x (3 blocks)
    for _ in range(3):
        x = resnet_block(x, 64)

    # Conv3_x (4 blocks)
    x = resnet_block(x, 128, stride=2, use_projection=True)
    for _ in range(3):
        x = resnet_block(x, 128)

    # Conv4_x (6 blocks)
    x = resnet_block(x, 256, stride=2, use_projection=True)
    for _ in range(5):
        x = resnet_block(x, 256)

    # Conv5_x (3 blocks)
    x = resnet_block(x, 512, stride=2, use_projection=True)
    for _ in range(2):
        x = resnet_block(x, 512)

    return keras.models.Model(inputs, x, name="ResNet34_backbone")

# Grid search configurations

In [7]:
from itertools import product

configurations = list(product([1e-2, 1e-3, 1e-4], [32, 64, 128]))

# Models descriptions

In [8]:
models_descriptions = [
    {
        "name": "DroNet-500",
        "backbone": {
            "name": "ResNet-8",
            "weights": "dronet"
        },
        "input": {
            "shape": (200, 200, 1),
            "grayscale": True
        },
        "head_description": [
            (32, True, False),
        ]
    },
    {
        "name": "DroNet-600",
        "backbone": {
            "name": "ResNet-8",
            "weights": "dronet"
        },
        "input": {
            "shape": (200, 200, 1),
            "grayscale": True
        },
        "head_description": [
            (64, True, False),
        ]
    },
]

# Format verification
for model_description in models_descriptions:
    if "backbone" not in model_description:
        raise KeyError("backbone not specified at:\n" + str(model_description))

    if "name" not in model_description["backbone"]:
        raise KeyError("backbone.name not specified at:\n" + str(model_description))

    if "weights" not in model_description["backbone"]:
        model_description["backbone"]["weights"] = None
    
    if "name" not in model_description:
        model_description["name"] = model_description["backbone"]

    if "head_description" not in model_description:
        model_description["head_description"] = []

    if "weights" not in model_description:
        model_description["weights"] = None

    if "input" not in model_description:
        raise KeyError("input not specified at:\n" + str(model_description))

    if "shape" not in model_description["input"]:
        raise KeyError("input.shape not specified at:\n" + str(model_description))
    
    if "grayscale" not in model_description["input"]:
        model_description["input"]["grayscale"] = False

# Load dataset and build pipeline

In [9]:
def load_samples_dataframe(path: str):
    images_folder = os.path.join(path, "images")

    samples_df = pd.read_csv(
        os.path.join(path, "market_dataset_xy.txt"),
        sep=" ",
        header=None,
        names=["file path", "_", "datetime", "vel_y", "vel_x"]
    )

    samples_df["file path"] = (
        samples_df["file path"]
        .apply(lambda image_name: os.path.join(path, "images", image_name))
    )
    
    return samples_df

In [10]:
@tf.function
def augment(image, label):
    # Brightness and contrast
    image = tf.image.random_brightness(image, max_delta=0.2)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    
    # Flip horizontally
    do_flip = tf.random.uniform([]) > 0.5
    image = tf.cond(do_flip, lambda: tf.image.flip_left_right(image), lambda: image)
    
    vel_x = tf.cond(do_flip, lambda: -label["vel_x"], lambda: label["vel_x"])
    vel_y = label["vel_y"]
    
    # Optionally add noise
    noise = tf.random.normal(shape=tf.shape(image), mean=0.0, stddev=0.025)
    image = image + noise
    image = tf.clip_by_value(image, 0.0, 1.0)

    return image, {"vel_x": vel_x, "vel_y": vel_y}

In [11]:
def load_image(path, mode="color"):
    image = tf.io.read_file(path)

    if mode == "color":
        image = tf.image.decode_jpeg(image, channels=3)

    elif mode == "grayscale1":
        image = tf.image.decode_jpeg(image, channels=1)

    elif mode == "grayscale3":
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.rgb_to_grayscale(image)
        image = tf.image.grayscale_to_rgb(image)

    else:
        raise ValueError(f"Invalid mode: {mode}")

    return image

In [12]:
def build_train_val_test_datasets(samples_dataframe, input_description: dict, seed=42):
    # Split
    df_train, df_temp = train_test_split(samples_dataframe, test_size=0.4, random_state=seed)
    df_val, df_test = train_test_split(df_temp, test_size=0.5, random_state=seed)

    resolution = input_description["shape"][:2]
    channels = input_description["shape"][2]

    if not input_description["grayscale"]:
        mode = "color"
    elif channels == 1:
        mode = "grayscale1"
    elif channels == 3:
        mode = "grayscale3"

    def process_sample(path, label):
        image = load_image(path, mode=mode)
        image = tf.image.resize(image, resolution)
        image.set_shape(input_description["shape"])
        image = tf.cast(image, tf.float32) / 255.0

        return image, label

    def df_to_tf_dataset(df, training=False):
        paths = tf.constant(df["file path"].tolist())
        vel_x = tf.constant(df["vel_x"].astype("float32").tolist())
        vel_y = tf.constant(df["vel_y"].astype("float32").tolist())
    
        ds = tf.data.Dataset.from_tensor_slices((paths, {"vel_x": vel_x, "vel_y": vel_y}))
    
        ds = ds.map(process_sample, num_parallel_calls=tf.data.AUTOTUNE)
        ds = ds.cache()
    
        if training:
            ds = ds.shuffle(len(df), reshuffle_each_iteration=True)
            ds = ds.map(augment, num_parallel_calls=tf.data.AUTOTUNE)
            
        return ds

    return {
        "train": df_to_tf_dataset(df_train, training=True),
        "val":   df_to_tf_dataset(df_val, training=False),
        "test":  df_to_tf_dataset(df_test, training=False),
    }

In [13]:
def instantiate_backbone(backbone_name, weights=None):
    if backbone_name == "ResNet-8":
        if weights:
            if weights == "dronet":
                return keras.models.load_model("/kaggle/input/marketplace-navigation-dataset/model.keras")
            else:
                print("[INFO] could not find weights for ResNet-8")
        return ResNet8_DroNet(input_shape=(200, 200, 1))
    
    if backbone_name == "ResNet-18":
        if weights is not None:
            print("[INFO] No pre-trained weights for ResNet-18")
        return ResNet18(input_shape=(224, 224, 3))
        
    if backbone_name == "ResNet-34":
        if weights is not None:
            print("[INFO] No pre-trained weights for ResNet-34")
        return ResNet34(input_shape=(224, 224, 3))

    if backbone_name == "ResNet-50":
        return keras.applications.ResNet50(
            include_top=False,
            input_shape=(224, 224, 3),
            weights="imagenet",
            pooling=None,
        )

    if backbone_name == "ResNet-50V2":
        return keras.applications.ResNet50V2(
            include_top=False,
            input_shape=(224, 224, 3),
            weights="imagenet",
            pooling=None,
        )

    if backbone_name == "MobileNet":
        return keras.applications.MobileNet(
            include_top=False,
            input_shape=(224, 224, 3),
            weights="imagenet",
            pooling=None,
            alpha=1.0,
        )

    if backbone_name == "MobileNetV2":
        return keras.applications.MobileNetV2(
            include_top=False,
            input_shape=(224, 224, 3),
            weights="imagenet",
            pooling=None,
            alpha=1.0,
        )

    if backbone_name == "EfficientNetB0":
        return keras.applications.EfficientNetB0(
            include_top=False,
            input_shape=(224, 224, 3),
            weights="imagenet",
            pooling=None,
        )
    
    raise ValueError(f"Backbone {backbone_name} not implemented.")

In [14]:
def build_model(model_description: dict):
    input_shape = model_description["input"]["shape"]
    input_shape = tuple(input_shape)

    backbone_name = model_description["backbone"]["name"]
    backbone_weights = model_description["backbone"]["weights"]

    PREPROCESS = {
        "MobileNet":       keras.applications.mobilenet.preprocess_input,
        "MobileNetV2":     keras.applications.mobilenet_v2.preprocess_input,
        "ResNet-50":       lambda x: keras.applications.resnet.preprocess_input(x, mode="caffe"),
        "ResNet-50V2":     keras.applications.resnet_v2.preprocess_input,
        "EfficientNetB0":  keras.applications.efficientnet.preprocess_input,
    }
    
    inputs = keras.layers.Input(input_shape)
    x = keras.layers.Rescaling(255.)(inputs) # To parse from [0, 1] to [0, 225]
    x = PREPROCESS.get(backbone_name, lambda x: x / 255.0)(x)
    
    backbone = instantiate_backbone(backbone_name, weights=backbone_weights)
    if backbone_weights:
        backbone.trainable = False

    x = backbone(x)
    x = keras.layers.GlobalAveragePooling2D(name="global_pool")(x)
    x = keras.layers.Dropout(0.3, name="dropout")(x)

    for i, (nodes, use_batch_normalization, use_dropout) in enumerate(model_description["head_description"]):
        use_bias = not use_batch_normalization
        
        x = keras.layers.Dense(nodes, use_bias=use_bias, activation="linear", name=f"dense_{i}")(x)

        if use_batch_normalization:
            x = keras.layers.BatchNormalization(name=f"batchnorm_{i}")(x)

        x = keras.layers.ReLU(name=f"relu_{i}")(x)
        
        if use_dropout:
            x = keras.layers.Dropout(0.3, name=f"dropout_{i}")(x)
    
    output_x = keras.layers.Dense(1, name="vel_x")(x)
    output_y = keras.layers.Dense(1, name="vel_y")(x)

    outputs = [output_x, output_y]

    return keras.models.Model(
        inputs=inputs,
        outputs=outputs,
        name=model_description["name"]
    ), backbone

In [15]:
def unfreeze_weights(backbone: keras.Model, from_layer: str, to_layer: str, unfreeze_bn=False):
    unfreeze = False

    for layer in backbone.layers:
        if layer.name == from_layer:
            unfreeze = True
        
        if unfreeze:
            if isinstance(layer, keras.layers.BatchNormalization) and not unfreeze_bn:
                layer.trainable = False
            else:
                layer.trainable = True
        
        if layer.name == to_layer:
            break


In [16]:
def train_model(model: keras.Model, train_dataset: tf.data.Dataset, validation_dataset: tf.data.Dataset):
    history_1 = model.fit(
        train_dataset,
        validation_data=validation_dataset,
        verbose=0,
        epochs=40,
    )

    history_2 = model.fit(
        train_dataset,
        validation_data=validation_dataset,
        epochs=200,
        verbose=0,
        callbacks=[
            keras.callbacks.EarlyStopping(
                monitor="val_loss",
                patience=15,
                min_delta=1e-4,
                mode="min",
                restore_best_weights=True,
            ),
            keras.callbacks.ReduceLROnPlateau(
                monitor="val_loss",
                patience=5,
                factor=0.5,
                min_lr=1e-7,
                mode="min"
            ),
        ],
    )

    full_history = pd.concat([
        pd.DataFrame(history_1.history),
        pd.DataFrame(history_2.history),
    ]).reset_index(drop=True)

    return model, full_history

# Results structure

In [17]:
results = {}

for model_description in models_descriptions:
    model_name = model_description["name"]
    
    results[model_name] = {}

# Benchmark loop

In [18]:
samples_dataframe = load_samples_dataframe("/kaggle/input/marketplace-navigation-dataset/dataset")
# samples_dataframe = samples_dataframe.sample(frac=0.05)

round_number = 1
seed = round_number

datasets = {}

for model_description in models_descriptions:
    dataset_key = tuple(model_description["input"]["shape"])
    dataset_key = model_description["input"]["grayscale"], dataset_key
    
    if not dataset_key in datasets:
        datasets[dataset_key] = build_train_val_test_datasets(
            samples_dataframe,
            model_description["input"],
            seed
        )
        
for configuration in configurations:
    for model_description in models_descriptions:
        print("Training model", model_description["name"], " for configuration", configuration)
        
        learning_rate, batch_size = configuration
        dataset_key = tuple(model_description["input"]["shape"])
        dataset_key = model_description["input"]["grayscale"], dataset_key

        train_ds = datasets[dataset_key]["train"]
        val_ds   = datasets[dataset_key]["val"]
        test_ds  = datasets[dataset_key]["test"]
        
        train_ds = train_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
        val_ds   = val_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
        test_ds  = test_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

        model, backbone = build_model(model_description)
        
        model.compile(
            loss={
                "vel_x": "mean_squared_error",
                "vel_y": "mean_squared_error",
            },
            optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
            metrics={
                "vel_x": [rmse, eva, r2_score],
                "vel_y": [rmse, eva, r2_score],
            },
        )
        model, history_1 = train_model(model, train_ds, val_ds)

        unfreeze_weights(backbone, "add_2", "add_3")
        
        model.compile(
            loss={
                "vel_x": "mean_squared_error",
                "vel_y": "mean_squared_error",
            },
            optimizer=keras.optimizers.Adam(learning_rate=learning_rate / 1e-3),
            metrics={
                "vel_x": [rmse, eva, r2_score],
                "vel_y": [rmse, eva, r2_score],
            },
        )
        model, history_2 = train_model(model, train_ds, val_ds)

        evaluation_metrics = model.evaluate(test_ds, return_dict=True)
        
        results[model_description["name"]][configuration] = evaluation_metrics

        file_name = f"r{round_number}_{model.name}_{learning_rate:.0e}_{batch_size}"

        model.save(f"{file_name}.keras")
        
        pd.concat([
            history_1,
            history_2,
        ]).reset_index(drop=True).to_csv(f"{file_name}.csv")
        
        with open("results.pkl", "wb") as results_file:
            pickle.dump(results, results_file)

2025-06-28 23:55:28.555701: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Training model DroNet-500  for configuration (0.01, 32)
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 438ms/step - loss: 1.8601 - vel_x_eva: 2.7592e-07 - vel_x_loss: 0.6339 - vel_x_r2_score: -0.0429 - vel_x_rmse: 0.7494 - vel_y_eva: 1.7057e-07 - vel_y_loss: 0.7614 - vel_y_r2_score: -0.0266 - vel_y_rmse: 0.8701
Training model DroNet-600  for configuration (0.01, 32)
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step - loss: 5329.0024 - vel_x_eva: 2.5646e-07 - vel_x_loss: 0.6370 - vel_x_r2_score: -0.0486 - vel_x_rmse: 0.7513 - vel_y_eva: 2.0274e-07 - vel_y_loss: 0.7716 - vel_y_r2_score: -0.0378 - vel_y_rmse: 0.8753
Training model DroNet-500  for configuration (0.01, 64)
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 128ms/step - loss: 41.2411 - vel_x_eva: 2.4055e-07 - vel_x_loss: 0.5945 - vel_x_r2_score: -0.0377 - vel_x_rmse: 0.7575 - vel_y_eva: 1.6505e-07 - vel_y_loss: 0.7539 - vel_y_r2_score: -0.0118 - vel_y_rmse: 0.8666
Training 