This notebook is used to organize the experiments. If you just want the best models found, you can refer to the `3_model_training` notebook.

In [72]:
import os

import mlflow
from sklearn.model_selection import train_test_split
from mlflow.models import infer_signature
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score, log_loss
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from functools import partial

os.sys.path.append(os.path.abspath('../src'))
from data import loader
from data import preprocessor

mapname = 'Town01'

In [73]:
def create_and_train_model(model, args):
    """
    Function to create and train a model with given parameters.

    Args:
        model: The model to be trained.
        args: Arguments for training the model.

    Returns:
        result: The result of the training process.
    """
    result = {}
    # Train model with current hyperparameters
    print(f"Params: {args}")
    md = model(**args)
    md.fit(X_train, y_train)
    # Predict on the validation set
    y_pred = md.predict(X_val)
    # Log training results
    result["f1_score"] = f1_score(y_val, y_pred, average='weighted')
    result["accuracy"] = accuracy_score(y_val, y_pred)
    result["recall"] = recall_score(y_val, y_pred, average='weighted')
    result["precision"] = precision_score(y_val, y_pred, average='weighted')
    result["model"] = md

    return result

In [74]:
def objective(params, model_name):
    """
    Objective function for hyperparameter optimization.
    This function will be called by Hyperopt for each trial.
    """
    with mlflow.start_run(nested=True, run_name=f"{model_name}_{params}"):
        # Log hyperparameters being tested
        mlflow.log_params(params)

        # Train model with current hyperparameters
        result = create_and_train_model(
            model,
            args=params
        )

        # Log training results
        mlflow.log_metrics(
            {
                "accuracy": result["accuracy"],
                "recall": result["recall"],
                "precision": result["precision"],
                "f1_score": result["f1_score"],
            }
        )

        # Log the trained model
        mlflow.sklearn.log_model(
            result["model"], name=model_name, signature=signature)

        # Return loss for Hyperopt (it minimizes)
        return {"loss": -result["f1_score"], "status": STATUS_OK, "model": f'{model_name}_{params}'}

In [75]:
def run_experiments(run_name, max_evals, search_space, data_name, model_name=None):
    # Create or set experiment

    print(
        f"This will run {max_evals} trials to find optimal hyperparameters...")

    with mlflow.start_run(run_name=run_name):

        mlflow.set_tag("model_type", model_name)

        # Log experiment metadata
        mlflow.log_params(
            {
                "optimization_method": "Tree-structured Parzen Estimator (TPE)",
                "max_evaluations": max_evals,
                "objective_metric": "log_loss",
                "dataset": data_name,
                "model_type": model_name
            }
        )

        # Run optimization
        trials = Trials()
        objective_with_name = partial(objective, model_name=model_name)

        best_params = fmin(
            fn=objective_with_name,
            space=search_space,
            algo=tpe.suggest,
            max_evals=max_evals,
            trials=trials,
            verbose=True,
        )

        # Find and log best results
        best_trial = min(trials.results, key=lambda x: x["loss"])
        best_f1_score = -best_trial["loss"]

        # Log optimization results
        mlflow.log_params(best_params)
        mlflow.log_params({"best_model": best_trial["model"]})
        mlflow.log_metrics(
            {
                "best_f1_score": best_f1_score,
                "total_trials": len(trials.trials),
                "optimization_completed": 1,
            }
        )

    return best_f1_score 


In [76]:
client = mlflow.tracking.MlflowClient("http://127.0.0.1:8080")
mlflow.set_tracking_uri("http://127.0.0.1:8080")

## Preparing the Data

In [77]:
data_path = '../data'
uah_training = pd.read_csv(f'{data_path}/base/training_set_uah.csv')
uah_validation = pd.read_csv(f'{data_path}/base/validation_set_uah.csv')

carla_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/carla/carla_fixed.csv').drop(columns=['origin'])
carla_llm = pd.read_csv(f'{data_path}/merged/{mapname}/carla/carla_llm.csv').drop(columns=['origin'])

sumo_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/sumo/sumo_fixed.csv').drop(columns=['origin'])
sumo_llm = pd.read_csv(f'{data_path}/merged/{mapname}/sumo/sumo_llm.csv').drop(columns=['origin'])

carla_uah_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/carla_uah/carla_uah_fixed.csv').drop(columns=['origin'])
carla_uah_llm = pd.read_csv(f'{data_path}/merged/{mapname}/carla_uah/carla_uah_llm.csv').drop(columns=['origin'])

sumo_uah_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/sumo_uah/sumo_uah_fixed.csv').drop(columns=['origin'])
sumo_uah_llm = pd.read_csv(f'{data_path}/merged/{mapname}/sumo_uah/sumo_uah_llm.csv').drop(columns=['origin'])

Applying sliding windows to UAH, SUMO and CARLA full data

In [78]:
window_size = 10
step_size = 5
one_hot_keys = {
    'normal': 0,
    'aggressive': 1
}

In [None]:
# SUMO
X_sumo_fixed, y_sumo_fixed = preprocessor.sliding_windows(sumo_fixed, window_size=window_size, step_size=step_size)
X_sumo_llm, y_sumo_llm = preprocessor.sliding_windows(sumo_llm, window_size=window_size, step_size=step_size)

# CARLA
X_carla_fixed, y_carla_fixed = preprocessor.sliding_windows(carla_fixed, window_size=window_size, step_size=step_size)
X_carla_llm, y_carla_llm = preprocessor.sliding_windows(carla_llm, window_size=window_size, step_size=step_size)

Defining the search space for each model

In [54]:
max_evals = 5

search_space_rf = {
    "n_estimators": hp.choice("n_estimators", [20, 50, 100, 200, 500]),
    "max_depth": hp.choice("max_depth", [None, 5, 10, 20, 50]),
    "random_state": 42,
}
search_space_svc = {
    "C": hp.choice("C", [0.01, 0.05, 0.1, 0.5, 1]),  # Regularization parameter
    "kernel": hp.choice("kernel", ["rbf"]),  # Kernel type
    "gamma": hp.choice("gamma", [0.001, 0.01, 0.1, 1, 10]),  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
}
search_space_svc_linear = {
    "C": hp.choice("C", [10**-4, 10**-3, 10**-2, 0.1, 1])  # Regularization parameter
}
search_space_xgb = {
    "n_estimators": hp.choice("n_estimators", [200, 500, 700, 1000]),
    "max_depth": hp.choice("max_depth", [None, 5, 10]),
    "learning_rate": hp.choice("learning_rate", [0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5]),  # Learning rate
    # "subsample": hp.uniform("subsample", 0.5, 1.0),
    # "colsample_bytree": hp.uniform("colsample_bytree", 0.5, 1.0),
    "random_state": 42,
}

## Real Only

In [84]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH driveset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_UAH", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [85]:
columns_to_keep = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'angle', 'speed', 'label']

X_train, y_train = preprocessor.sliding_windows(uah_training[columns_to_keep], window_size=window_size, step_size=step_size)
X_val, y_val = preprocessor.sliding_windows(uah_validation[columns_to_keep], window_size=window_size, step_size=step_size)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('Driver_Behavior_Models_UAH')
data_name = 'UAH'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier')
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC')
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC')
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier')

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 10, 'n_estimators': 50, 'random_state': 42}
üèÉ View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/757271451324733873/runs/fe96fa56cd4e4ea3a392eedcdb850751

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/757271451324733873

Params: {'max_depth': 10, 'n_estimators': 500, 'random_state': 42}              
 20%|‚ñà‚ñà        | 1/5 [00:04<00:17,  4.49s/trial, best loss: -0.7314151877823617]

## SUMO Only

In [None]:
# Getting the validation for SUMO variables
columns_to_keep = sumo_fixed.columns.tolist()
print("Columns to keep for validation:", columns_to_keep)

X_val_base, y_val = preprocessor.sliding_windows(uah_validation[columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc', 'angle', 'speed', 'gyro_z', 'label']


### Fixed

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the SUMO Fixed dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_SUMO_Fixed", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_sumo_fixed, y_sumo_fixed

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('Driver_Behavior_Models_SUMO_Fixed')
data_name = 'SUMO_Fixed'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier')
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC')
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC')
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier')

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 50, 'n_estimators': 20, 'random_state': 42}
üèÉ View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/134669692609126421/runs/53272d0428e84baa97789bc64aedb785

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/134669692609126421

Params: {'max_depth': 10, 'n_estimators': 20, 'random_state': 42}               
 20%|‚ñà‚ñà        | 1/5 [00:04<00:19,  4.86s/trial, best loss: -0.3334085595387637]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/134669692609126421/runs/fe72e9fb6e9d472ab07e229b0970880e

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/134669692609126421   

Params: {'max_depth': None, 'n_estimators': 50, 'random_state': 42}             
 40%|‚ñà‚ñà‚ñà‚ñà      | 2/5 [00:07<00:11,  3.82s/trial, best loss: -0.3334085595387637]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/134669692609126421/runs/03b040ac171744898aa83fd46521bc93

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/134669692609126421   

Params: {'max_depth': 10, 'n_estimators': 50, 'random_state': 42}               
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 3/5 [00:19<00:14,  7.46s/trial, best loss: -0.3334085595387637]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/134669692609126421/runs/46e67431d1e748c5b8a119670a77636a

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/134669692609126421   

Params: {'max_depth': 50, 'n_estimators': 200, 'random_state': 42}              
 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 4/5 [00:26<00:07,  7.21s/trial, best loss: -0.3334085595387637]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/134669692609126421/runs/2ae637bed3af4e0b8e41e802d9757c34

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/134669692609126421   

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [01:08<00:00, 13.72s/trial, best loss: -0.3334085595387637]
üèÉ View run rf-sweep at: http://127.0.0.1:8080/#/experiments/134669692609126421/runs/e24c8db41cb1483d91412a6943b9e41a
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/134669692609126421
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'}  
  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/134669692609126421/runs/2cf82d91bdae4a778dcb1e39f0730425

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/134669692609126421

Params: {'C': 0.5, 'gamma': 0.01, 'kernel': 'rbf'}                               
 20%|‚ñà‚ñà        | 1/5 [01:17<05:10, 77.67s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 0.5, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/134669692609126421/runs/9378c3b6e5d14bd3a2a7ff5ebafbdc5b

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/134669692609126421    

Params: {'C': 1, 'gamma': 1, 'kernel': 'rbf'}                                    
 40%|‚ñà‚ñà‚ñà‚ñà      | 2/5 [02:22<03:30, 70.22s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 1, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/134669692609126421/runs/47e14c11c5544f818fa4fd0cfb12463b

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/134669692609126421    

Params: {'C': 0.5, 'gamma': 10, 'kernel': 'rbf'}                                 
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 3/5 [03:53<02:38, 79.45s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 0.5, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/134669692609126421/runs/53b5ef3a5b70444388928136c1237430

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/134669692609126421    

Params: {'C': 0.5, 'gamma': 0.01, 'kernel': 'rbf'}                               
 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 4/5 [05:37<01:29, 89.23s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 0.5, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/134669692609126421/runs/b13599a4caf64c9e8623c09885b87323

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/134669692609126421    

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [06:42<00:00, 80.50s/trial, best loss: -0.33310749162724596]
üèÉ View run svc-sweep at: http://127.0.0.1:8080/#/experiments/134669692609126421/runs/993a6f0c9ac34c868439f0bf24ddff6c
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/134669692609126421
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 0.001}                                 
üèÉ View run LinearSVC_{'C': 0.001} at: http://127.0.0.1:8080/#/experiments/134669692609126421/runs/685366b6662a4133a148db678b8b4f2a

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/134669692609126421

Params: {'C': 0.001}                                                            
üèÉ View run LinearSVC_{'C': 0.001} at: htt

0.33671100692668154

### LLM

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the SUMO LLM dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_SUMO_LLM", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_sumo_llm, y_sumo_llm

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

mlflow.set_experiment('Driver_Behavior_Models_SUMO_LLM')
data_name = 'SUMO_LLM'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier')
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC')
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC')
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier')

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 50, 'n_estimators': 100, 'random_state': 42}
üèÉ View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/959396384665733800/runs/4763cf8b76c449f5be4b6f541ee7ddc2

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/959396384665733800

Params: {'max_depth': 5, 'n_estimators': 500, 'random_state': 42}               
üèÉ View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/959396384665733800/runs/5a6e55c187bb4afc8a6d933764684679

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/959396384665733800   

Params: {'max_depth': 50, 'n_estimators': 200, 'random_state': 42}              
üèÉ View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/959396384665733800/runs/3

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 0.05, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/959396384665733800/runs/f28d8744d49443b5826984462385016e

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/959396384665733800    

Params: {'C': 1, 'gamma': 10, 'kernel': 'rbf'}                                   
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 3/5 [04:48<03:16, 98.31s/trial, best loss: -0.33641148675357896]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 1, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/959396384665733800/runs/7206561a8306433c8f2ff6ab90aa45e5

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/959396384665733800    

Params: {'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'}                                 
üèÉ View run SVC_{'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/959396384665733800/runs/11426f98a313488185e8e330874dc37c

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/959396384665733800     

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [12:10<00:00, 146.18s/trial, best loss: -0.33641148675357896]
üèÉ View run svc-sweep at: http://127.0.0.1:8080/#/experiments/959396384665733800/runs/14b10d595d864c15929e618775808d23
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/959396384665733800
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 1}                                     
üèÉ View run Lin

0.47694909303546645

## Carla Only

In [None]:
# Getting the validation for SUMO variables
columns_to_keep = carla_fixed.columns.tolist()
print("Columns to keep for validation:", columns_to_keep)
X_val_base, y_val = preprocessor.sliding_windows(uah_validation[columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'angle', 'label']


### Fixed

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the CARLA Fixed dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_CARLA_Fixed", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_carla_fixed, y_carla_fixed

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

mlflow.set_experiment('Driver_Behavior_Models_CARLA_Fixed')
data_name = 'CARLA_Fixed'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier')
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC')
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC')
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier')

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 10, 'n_estimators': 20, 'random_state': 42}
üèÉ View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/97b78d9280cc46aeb9bb55f1d17c1de3

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802

Params: {'max_depth': 50, 'n_estimators': 50, 'random_state': 42}               
üèÉ View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/d1ebb580ba1d45fcbf76f0632ad7fac9

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802   

Params: {'max_depth': 50, 'n_estimators': 200, 'random_state': 42}              
üèÉ View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/a3b

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/eebe184802d345dcb00bc37ff2d752b6

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802

Params: {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}                                
 20%|‚ñà‚ñà        | 1/5 [00:29<01:56, 29.17s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/30d50859de87417db048bc85af656b3e

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802    

Params: {'C': 0.01, 'gamma': 1, 'kernel': 'rbf'}                                 
 40%|‚ñà‚ñà‚ñà‚ñà      | 2/5 [00:58<01:27, 29.06s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 0.01, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/85690a1de52349d59a9f016fb6a9db67

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802    

Params: {'C': 1, 'gamma': 10, 'kernel': 'rbf'}                                   
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 3/5 [01:21<00:52, 26.36s/trial, best loss: -0.3335592240143389]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 1, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/0685d85c8a3b4fc4b771efe7f7f08f5a

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802   

Params: {'C': 0.01, 'gamma': 0.1, 'kernel': 'rbf'}                              
 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 4/5 [02:27<00:42, 42.18s/trial, best loss: -0.3335592240143389]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 0.01, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/f0fbb03b39f54c50a8733a25bd4e7836

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802   

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [02:53<00:00, 34.62s/trial, best loss: -0.3335592240143389]
üèÉ View run svc-sweep at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/0841ae5333bc491ab79e0ccd2ab65504
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 1}                                     
  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/f94030d3c1db40cea599ba55640c9599

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802

Params: {'C': 0.1}                                                               
 20%|‚ñà‚ñà        | 1/5 [00:02<00:08,  2.06s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run LinearSVC_{'C': 0.1} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/162e5a9cd3f440358ffd613d6d6faff5

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802    

Params: {'C': 0.0001}                                                            
 40%|‚ñà‚ñà‚ñà‚ñà      | 2/5 [00:04<00:06,  2.09s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run LinearSVC_{'C': 0.0001} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/33a88c4903e5436dbabc6ba70c64aa81

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802    

Params: {'C': 1}                                                                 
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 3/5 [00:05<00:03,  1.88s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/e8b22c5597bc4abbaa73bcf875abcd9a

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802    

Params: {'C': 0.001}                                                             
 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 4/5 [00:07<00:01,  1.95s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run LinearSVC_{'C': 0.001} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/4ea106de4ea34b1dbe3e4d9c81433c01

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802    

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:09<00:00,  1.93s/trial, best loss: -0.33310749162724596]
üèÉ View run svc-linear-sweep at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/caba95c5d3754bb8813fde6f78b0355c
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802
This will run 5 trials to find optimal hyperparameters...
Params: {'learning_rate': 0.001, 'max_depth': 10, 'n_estimators': 200, 'random_state': 42}
üèÉ View run XGBClassifier_{'learning_rate': 0.001, 'max_depth': 10, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/805744944225876802/runs/90f7efd253444681bc1eba65f007593f

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/805744944225876802

Params: {'learning_rate': 0.5, 

0.5284922889500198

### LLM

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the CARLA LLM dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_CARLA_LLM", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_carla_llm, y_carla_llm

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

mlflow.set_experiment('Driver_Behavior_Models_CARLA_LLM')
data_name = 'CARLA_LLM'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier')
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC')
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC')
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier')

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 5, 'n_estimators': 20, 'random_state': 42}
üèÉ View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/4182781930ec477a83f1087fe7eec7eb

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028

Params: {'max_depth': None, 'n_estimators': 20, 'random_state': 42}             
üèÉ View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/dc97ff4da817455bac1f1356ee11b2d7

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028   

Params: {'max_depth': 50, 'n_estimators': 20, 'random_state': 42}               
üèÉ View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/9019

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 0.05, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/9af434b9a26c4c33acd68e8fc7911dbc

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028

Params: {'C': 0.05, 'gamma': 0.001, 'kernel': 'rbf'}                             
 20%|‚ñà‚ñà        | 1/5 [00:32<02:11, 32.75s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 0.05, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/4e646464c8fd4f1389c59c7a79ea08c2

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028    

Params: {'C': 0.1, 'gamma': 10, 'kernel': 'rbf'}                                 
 40%|‚ñà‚ñà‚ñà‚ñà      | 2/5 [01:06<01:39, 33.14s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 0.1, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/018f2d1f7c124837b0f3e7f1baf76aec

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028    

Params: {'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}                                  
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 3/5 [02:05<01:29, 44.94s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/999cd7a0ad274707b33dcfc541ce9468

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028    

Params: {'C': 1, 'gamma': 1, 'kernel': 'rbf'}                                    
 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 4/5 [02:54<00:46, 46.79s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run SVC_{'C': 1, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/6ddfcaffd42543cf929e426b2a54213f

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028    

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [03:44<00:00, 44.95s/trial, best loss: -0.33310749162724596]
üèÉ View run svc-sweep at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/6481d0f73d0c456d86749e4c48ca00bb
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 0.1}                                   
  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run LinearSVC_{'C': 0.1} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/77501bdd03354987a907017e1aab9a79

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028

Params: {'C': 0.0001}                                                            
 20%|‚ñà‚ñà        | 1/5 [00:02<00:08,  2.16s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run LinearSVC_{'C': 0.0001} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/237117aa2f99487b8a43a40da6032965

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028    

Params: {'C': 0.0001}                                                            
 40%|‚ñà‚ñà‚ñà‚ñà      | 2/5 [00:03<00:05,  1.89s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run LinearSVC_{'C': 0.0001} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/3e56062855514fcfa4d0cbdb994e60b1

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028    

Params: {'C': 0.0001}                                                            
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 3/5 [00:05<00:03,  1.76s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run LinearSVC_{'C': 0.0001} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/bd0a6c68d2344dd6b5d3a097e9411264

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028    

Params: {'C': 1}                                                                 
 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 4/5 [00:07<00:01,  1.71s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



üèÉ View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/9d3dd8dd132144f0b00e48402f639c4e

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028    

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:09<00:00,  1.84s/trial, best loss: -0.33310749162724596]
üèÉ View run svc-linear-sweep at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/53641ce6a6784b108898061482bd2334
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028
This will run 5 trials to find optimal hyperparameters...
Params: {'learning_rate': 0.001, 'max_depth': 10, 'n_estimators': 500, 'random_state': 42}
üèÉ View run XGBClassifier_{'learning_rate': 0.001, 'max_depth': 10, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/169808028941311028/runs/55d8223e4d33421f8c635164d9a21485

üß™ View experiment at: http://127.0.0.1:8080/#/experiments/169808028941311028

Params: {'learning_rate': 0.4, 'max

0.4407236416518528

## Real + SUMO

### Fixed

In [176]:
signature = infer_signature(X_sumo_fixed, y_sumo_fixed)

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH driveset supplemented by SUMO with fixed parameters."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
driver_behavior_experiment = client.create_experiment(
    name="Driver_Behavior_Models_UAH_SUMO_fixed", tags=experiment_tags
)

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=5, search_space=search_space_rf, data_name='UAH')

## Real + SUMO (LLM)