This notebook is used to train the models using `MLFLow`, if you want to see the results for the best models, refer to the `4_evaluation` notebook.

In [1]:
import os

import mlflow
from sklearn.model_selection import train_test_split
from mlflow.models import infer_signature
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score, log_loss
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from functools import partial

os.sys.path.append(os.path.abspath('../src'))
from data import loader
from data import preprocessor

mapname = 'Town01'

In [2]:
def create_and_train_model(model, args):
    """
    Function to create and train a model with given parameters.

    Args:
        model: The model to be trained.
        args: Arguments for training the model.

    Returns:
        result: The result of the training process.
    """
    result = {}
    # Train model with current hyperparameters
    print(f"Params: {args}")
    md = model(**args)
    md.fit(X_train, y_train)
    # Predict on the validation set
    y_pred = md.predict(X_val)
    # Log training results
    result["f1_score"] = f1_score(y_val, y_pred, average='weighted')
    result["accuracy"] = accuracy_score(y_val, y_pred)
    result["recall"] = recall_score(y_val, y_pred, average='weighted')
    result["precision"] = precision_score(y_val, y_pred, average='weighted')
    result["model"] = md

    return result

In [3]:
def objective(params, model_name):
    """
    Objective function for hyperparameter optimization.
    This function will be called by Hyperopt for each trial.
    """
    with mlflow.start_run(nested=True, run_name=f"{model_name}_{params}"):
        # Log hyperparameters being tested
        mlflow.log_params(params)

        # Train model with current hyperparameters
        result = create_and_train_model(
            model,
            args=params
        )

        # Log training results
        mlflow.log_metrics(
            {
                "accuracy": result["accuracy"],
                "recall": result["recall"],
                "precision": result["precision"],
                "f1_score": result["f1_score"],
            }
        )

        # Log the trained model
        mlflow.sklearn.log_model(
            result["model"], name=model_name, signature=signature)

        # Return loss for Hyperopt (it minimizes)
        return {"loss": -result["f1_score"], "status": STATUS_OK, "model": f'{model_name}_{params}'}

In [4]:
def run_experiments(run_name, max_evals, search_space, data_name, features_names, model_name):
    # Create or set experiment

    print(
        f"This will run {max_evals} trials to find optimal hyperparameters...")

    with mlflow.start_run(run_name=run_name):

        mlflow.set_tag("model_type", model_name)

        # Log experiment metadata
        mlflow.log_params(
            {
                "optimization_method": "Tree-structured Parzen Estimator (TPE)",
                "max_evaluations": max_evals,
                "objective_metric": "log_loss",
                "dataset": data_name,
                "features": features_names,
            }
        )

        # Run optimization
        trials = Trials()
        objective_with_name = partial(objective, model_name=model_name)

        best_params = fmin(
            fn=objective_with_name,
            space=search_space,
            algo=tpe.suggest,
            max_evals=max_evals,
            trials=trials,
            verbose=True,
        )

        # Find and log best results
        best_trial = min(trials.results, key=lambda x: x["loss"])
        best_f1_score = -best_trial["loss"]

        # Log optimization results
        mlflow.log_params(best_params)
        mlflow.log_params({"best_model": best_trial["model"]})
        mlflow.log_metrics(
            {
                "best_f1_score": best_f1_score,
                "total_trials": len(trials.trials),
                "optimization_completed": 1,
            }
        )

    return best_f1_score 


In [5]:
client = mlflow.tracking.MlflowClient("http://127.0.0.1:8080")
mlflow.set_tracking_uri("http://127.0.0.1:8080")

## Preparing the Data

In [6]:
data_path = '../data'
uah_training = pd.read_csv(f'{data_path}/base/training_set_uah.csv')
uah_validation = pd.read_csv(f'{data_path}/base/validation_set_uah.csv')

carla_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/carla/carla_fixed.csv').drop(columns=['origin'])
carla_llm = pd.read_csv(f'{data_path}/merged/{mapname}/carla/carla_llm.csv').drop(columns=['origin'])

sumo_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/sumo/sumo_fixed.csv').drop(columns=['origin'])
sumo_llm = pd.read_csv(f'{data_path}/merged/{mapname}/sumo/sumo_llm.csv').drop(columns=['origin'])

carla_uah_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/carla_uah/carla_uah_fixed.csv')
carla_uah_llm = pd.read_csv(f'{data_path}/merged/{mapname}/carla_uah/carla_uah_llm.csv')

sumo_uah_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/sumo_uah/sumo_uah_fixed.csv')
sumo_uah_llm = pd.read_csv(f'{data_path}/merged/{mapname}/sumo_uah/sumo_uah_llm.csv')

In [7]:
sumo_columns_to_keep = sumo_fixed.columns.tolist()
carla_columns_to_keep = carla_fixed.columns.tolist()

In [8]:
window_size = 10
step_size = 5
one_hot_keys = {
    'normal': 0,
    'aggressive': 1
}

Defining the search space for each model

In [9]:
max_evals = 5

search_space_rf = {
    "n_estimators": hp.choice("n_estimators", [20, 50, 100, 200, 500]),
    "max_depth": hp.choice("max_depth", [None, 5, 10, 20, 50]),
    "random_state": 42,
}
search_space_svc = {
    "C": hp.choice("C", [0.01, 0.05, 0.1, 0.5, 1]),  # Regularization parameter
    "kernel": hp.choice("kernel", ["rbf"]),  # Kernel type
    "gamma": hp.choice("gamma", [0.001, 0.01, 0.1, 1, 10]),  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
}
search_space_svc_linear = {
    "C": hp.choice("C", [10**-4, 10**-3, 10**-2, 0.1, 1])  # Regularization parameter
}
search_space_xgb = {
    "n_estimators": hp.choice("n_estimators", [200, 500, 700, 1000]),
    "max_depth": hp.choice("max_depth", [None, 5, 10]),
    "learning_rate": hp.choice("learning_rate", [0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5]),  # Learning rate
    "random_state": 42,
}

# Train on Real test on Synthetic

By training a model on real data and testing on synthetic data, we verify synthetic data’s fidelity with the real-world.

## SUMO

In [18]:
X_train, y_train = preprocessor.sliding_windows(uah_training[sumo_columns_to_keep], window_size=10, step_size=5)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys)
signature = infer_signature(X_train, y_train)

### Fixed

In [19]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to TRTS on SUMO Fixed data."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="TRTS_SUMO_Fixed", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

Experiment already exists: RESOURCE_ALREADY_EXISTS: Experiment 'TRTS_SUMO_Fixed' already exists.


In [20]:
X_val, y_val = preprocessor.sliding_windows(sumo_fixed, window_size=10, step_size=5)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys)

mlflow.set_experiment('TRTS_SUMO_Fixed')
data_name = 'TRTS_SUMO_Fixed'

In [21]:
model = RandomForestClassifier
run_experiments('trts-rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=sumo_columns_to_keep)
model = SVC
run_experiments('trts-svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=sumo_columns_to_keep)
model = xgb.XGBClassifier
run_experiments('trts-xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=sumo_columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': None, 'n_estimators': 100, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/752116745523456312/runs/80ae50b7aa474a53b63e3d13652688bd

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/752116745523456312

Params: {'max_depth': None, 'n_estimators': 200, 'random_state': 42}             
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/752116745523456312/runs/bdbe5c323cf84c6480ec3b1c37b73dfc

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/752116745523456312    

Params: {'max_depth': 50, 'n_estimators': 50, 'random_state': 42}                
🏃 View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/752116745523456312/runs/76f47eb

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/752116745523456312/runs/ca93ce0a0c614daa93bb5212d8d781ec

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/752116745523456312    

100%|██████████| 5/5 [03:52<00:00, 46.49s/trial, best loss: -0.34700806825918606]
🏃 View run trts-svc-sweep at: http://127.0.0.1:8080/#/experiments/752116745523456312/runs/e01fff3ba9254b74947c54c4a7afa157
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/752116745523456312
This will run 5 trials to find optimal hyperparameters...
Params: {'learning_rate': 0.2, 'max_depth': 10, 'n_estimators': 1000, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.2, 'max_depth': 10, 'n_estimators': 1000, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/752116745523456312/runs/831210d9b5344ac2b2400164325075f8

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/752116745523456312

Params: {'learning_rate': 0.001, 'max_depth': N

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run XGBClassifier_{'learning_rate': 0.001, 'max_depth': None, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/752116745523456312/runs/28b64d1c3b8c4ed68e2c641168e405d4

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/752116745523456312    

Params: {'learning_rate': 0.001, 'max_depth': 10, 'n_estimators': 1000, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.001, 'max_depth': 10, 'n_estimators': 1000, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/752116745523456312/runs/221e36eeb2504ce58a8963f1ad48b852

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/752116745523456312    

100%|██████████| 5/5 [00:44<00:00,  9.00s/trial, best loss: -0.47254594910010395]
🏃 View run trts-xgb-sweep at: http://127.0.0.1:8080/#/experiments/752116745523456312/runs/af53e8ce9ead4b2397be7ee1445d1dd8
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/752116745523456312


0.47254594910010395

### LLM

In [22]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to TRTS on SUMO LLM data."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="TRTS_SUMO_LLM", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

Experiment already exists: RESOURCE_ALREADY_EXISTS: Experiment 'TRTS_SUMO_LLM' already exists.


In [None]:
X_val, y_val = preprocessor.sliding_windows(sumo_llm, window_size=10, step_size=5)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys)

mlflow.set_experiment('TRTS_SUMO_LLM')
data_name = 'TRTS_SUMO_LLM'

In [24]:
model = RandomForestClassifier
run_experiments('trts-rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=sumo_columns_to_keep)
model = SVC
run_experiments('trts-svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=sumo_columns_to_keep)
model = xgb.XGBClassifier
run_experiments('trts-xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=sumo_columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/130091945796323793/runs/c8fb3ea589224ea9b7b0d98130c6fd4c

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/130091945796323793

Params: {'max_depth': 50, 'n_estimators': 500, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/130091945796323793/runs/8f50fbf994ff4a5d98477e36bd8a861c

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/130091945796323793    

Params: {'max_depth': None, 'n_estimators': 500, 'random_state': 42}             
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/130091945796323793/runs/e7d4382e8f6743

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/130091945796323793/runs/1cac46b28cfc43de9a06872fc906b4f2

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/130091945796323793   

Params: {'C': 0.5, 'gamma': 1, 'kernel': 'rbf'}                                 
 40%|████      | 2/5 [01:55<02:55, 58.54s/trial, best loss: -0.366294641495687]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.5, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/130091945796323793/runs/940c1bd5829c481d8084cb9d8bbe1f69

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/130091945796323793  

Params: {'C': 0.05, 'gamma': 0.001, 'kernel': 'rbf'}                           
 60%|██████    | 3/5 [03:34<02:34, 77.16s/trial, best loss: -0.366294641495687]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/130091945796323793/runs/013129f83842435ba5d1954ab3265cf4

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/130091945796323793  

Params: {'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'}                            
🏃 View run SVC_{'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/130091945796323793/runs/af7eea6666db4ce49ceb4827d02e2b82

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/130091945796323793  

100%|██████████| 5/5 [05:26<00:00, 65.24s/trial, best loss: -0.366294641495687]
🏃 View run trts-svc-sweep at: http://127.0.0.1:8080/#/experiments/130091945796323793/runs/95a08266b07040faaa0c432c3f4ca77d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/130091945796323793
This will run 5 trials to find optimal hyperparameters...
Params: {'learning_rate': 0.3, 'max_depth': None, 'n_estimators': 500, 'random_state': 42}
🏃 View run XGBCl

0.48108660813876947

## CARLA

In [25]:
X_train, y_train = preprocessor.sliding_windows(uah_training[carla_columns_to_keep], window_size=10, step_size=5)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys)
signature = infer_signature(X_train, y_train)

### Fixed

In [26]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to TRTS on CARLA Fixed data."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="TRTS_CARLA_Fixed", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [27]:
X_val, y_val = preprocessor.sliding_windows(carla_fixed, window_size=10, step_size=5)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys)

mlflow.set_experiment('TRTS_CARLA_Fixed')
data_name = 'TRTS_CARLA_Fixed'

In [28]:
model = RandomForestClassifier
run_experiments('trts-rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=sumo_columns_to_keep)
model = SVC
run_experiments('trts-svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=sumo_columns_to_keep)
model = xgb.XGBClassifier
run_experiments('trts-xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=sumo_columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': None, 'n_estimators': 200, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/193100077548678515/runs/8ce28191c74a4122a45be4a8571aa9d1

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/193100077548678515

Params: {'max_depth': 5, 'n_estimators': 500, 'random_state': 42}                
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/193100077548678515/runs/d6bbf3fb41d74cf7ba27f7a0944b21d2

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/193100077548678515    

Params: {'max_depth': 50, 'n_estimators': 500, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/193100077548678515/runs/3475d58bb

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.5, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/193100077548678515/runs/8d79cac42e5a4fca84a0e0257fc03182

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/193100077548678515

Params: {'C': 0.05, 'gamma': 0.01, 'kernel': 'rbf'}                             
 20%|██        | 1/5 [00:55<03:39, 54.99s/trial, best loss: -0.2525635814718753]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/193100077548678515/runs/cd3300e5589941aba06676c2acd7ea5d

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/193100077548678515   

Params: {'C': 0.5, 'gamma': 10, 'kernel': 'rbf'}                                
 40%|████      | 2/5 [01:56<02:56, 58.77s/trial, best loss: -0.2525635814718753]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.5, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/193100077548678515/runs/4275455e786f4a6facff17d809972ae4

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/193100077548678515   

Params: {'C': 0.05, 'gamma': 0.1, 'kernel': 'rbf'}                              
 60%|██████    | 3/5 [03:19<02:19, 69.84s/trial, best loss: -0.4209539786453059]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/193100077548678515/runs/1323e32aedbe403282cf4288497bf177

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/193100077548678515   

Params: {'C': 0.5, 'gamma': 0.01, 'kernel': 'rbf'}                              
 80%|████████  | 4/5 [04:07<01:01, 61.05s/trial, best loss: -0.4209539786453059]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.5, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/193100077548678515/runs/323a5e2fe60348a880e1103c10ac4698

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/193100077548678515   

100%|██████████| 5/5 [04:55<00:00, 59.08s/trial, best loss: -0.4209539786453059]
🏃 View run trts-svc-sweep at: http://127.0.0.1:8080/#/experiments/193100077548678515/runs/76cfb88f9a014955a1db461f79357183
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/193100077548678515
This will run 5 trials to find optimal hyperparameters...
Params: {'learning_rate': 0.4, 'max_depth': 5, 'n_estimators': 1000, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.4, 'max_depth': 5, 'n_estimators': 1000, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/193100077548678515/runs/4f3e0f9404104c99b0c2b076a01b42a8

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/193100077548678515

Params: {'learning_rate': 0.01, 'max_depth': None, 'n

0.5261354373470896

### LLM

In [29]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to TRTS on CARLA LLM data."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="TRTS_CARLA_LLM", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [30]:
X_val, y_val = preprocessor.sliding_windows(carla_llm, window_size=10, step_size=5)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys)

mlflow.set_experiment('TRTS_CARLA_LLM')
data_name = 'TRTS_CARLA_LLM'

In [31]:
model = RandomForestClassifier
run_experiments('trts-rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=sumo_columns_to_keep)
model = SVC
run_experiments('trts-svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=sumo_columns_to_keep)
model = xgb.XGBClassifier
run_experiments('trts-xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=sumo_columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 10, 'n_estimators': 20, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/184858235810218053/runs/bbcd3d66920a4deba1424ced15f6eb3b

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/184858235810218053

Params: {'max_depth': None, 'n_estimators': 500, 'random_state': 42}             
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/184858235810218053/runs/74548cc8b773455e8180f8bf25d226a8

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/184858235810218053    

Params: {'max_depth': 5, 'n_estimators': 20, 'random_state': 42}                 
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/184858235810218053/runs/44222a46941f4c

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/184858235810218053/runs/684d5d7a26bd4748a8720aab32931cff

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/184858235810218053

Params: {'C': 0.01, 'gamma': 10, 'kernel': 'rbf'}                               
 20%|██        | 1/5 [00:39<02:39, 39.95s/trial, best loss: -0.3805192649733608]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/184858235810218053/runs/9a4d50991c46476a8acd2bdec9ff815d

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/184858235810218053   

Params: {'C': 0.01, 'gamma': 0.01, 'kernel': 'rbf'}                             
 40%|████      | 2/5 [01:44<02:43, 54.65s/trial, best loss: -0.3805192649733608]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/184858235810218053/runs/ce8afd3176b2417fa762e29c6fe18538

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/184858235810218053   

Params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}                                 
 60%|██████    | 3/5 [02:47<01:56, 58.47s/trial, best loss: -0.3805192649733608]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/184858235810218053/runs/37557aed9349400a895b4fdd03b547ac

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/184858235810218053   

Params: {'C': 0.05, 'gamma': 0.01, 'kernel': 'rbf'}                             
 80%|████████  | 4/5 [03:24<00:50, 50.02s/trial, best loss: -0.3805192649733608]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/184858235810218053/runs/98538667a2774d328e907ba8136e548f

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/184858235810218053   

100%|██████████| 5/5 [04:18<00:00, 51.63s/trial, best loss: -0.3805192649733608]
🏃 View run trts-svc-sweep at: http://127.0.0.1:8080/#/experiments/184858235810218053/runs/fa8ccc63e7c14647ad2f3a4f22a89e9c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/184858235810218053
This will run 5 trials to find optimal hyperparameters...
Params: {'learning_rate': 0.4, 'max_depth': None, 'n_estimators': 200, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.4, 'max_depth': None, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/184858235810218053/runs/d2bd36b46f6e4c84b701319106d9ecd4

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/184858235810218053

Params: {'learning_rate': 0.01, 'max_depth': 5, 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run XGBClassifier_{'learning_rate': 0.001, 'max_depth': None, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/184858235810218053/runs/43b001fc5b9d4dd3bd671626e06dbf2e

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/184858235810218053   

100%|██████████| 5/5 [00:19<00:00,  3.90s/trial, best loss: -0.5194390536550584]
🏃 View run trts-xgb-sweep at: http://127.0.0.1:8080/#/experiments/184858235810218053/runs/6fae331e04c8450ea184475c3b0c2dca
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/184858235810218053


0.5194390536550584

# Discriminative Score

We seek to understand how well a classifier can separate the real and the synthetic data to determine how indistiguishable they are.

If the classifiers do not get good scores, that means the data is hardly distinguishable.

In [152]:
X_uah_disc_sumo = uah_training[sumo_columns_to_keep].drop(columns=['label'])
X_uah_disc_carla = uah_training[carla_columns_to_keep].drop(columns=['label'])
y_uah_disc = np.zeros(len(X_uah_disc_carla)) # 0 for real data and 1 for synthetic data

## SUMO

In [156]:
X_sumo_disc_fixed, y_sumo_disc_fixed = sumo_fixed.drop(columns=['label']), np.ones(len(sumo_fixed))
X_sumo_disc_llm, y_sumo_disc_llm = sumo_llm.drop(columns=['label']), np.ones(len(sumo_llm))

### Fixed

In [206]:
X, y = pd.concat([X_sumo_disc_fixed, X_uah_disc_sumo], axis=0, ignore_index=True), np.concat([y_sumo_disc_fixed, y_uah_disc], axis=0)
X.reset_index(drop=True, inplace=True)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [207]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


In [215]:
svc = SVC(kernel='rbf')
svc.fit(X_train, y_train)
y_pred = svc.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


### LLM

In [209]:
X, y = pd.concat([X_sumo_disc_llm, X_uah_disc_sumo], axis=0, ignore_index=True), np.concat([y_sumo_disc_llm, y_uah_disc], axis=0)
X.reset_index(drop=True, inplace=True)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [210]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


In [216]:
svc = SVC(kernel='rbf')
svc.fit(X_train, y_train)
y_pred = svc.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


## CARLA

In [208]:
X_carla_disc_fixed, y_carla_disc_fixed = carla_fixed.drop(columns=['label']), np.ones(len(carla_fixed))
X_carla_disc_llm, y_carla_disc_llm = carla_llm.drop(columns=['label']), np.ones(len(carla_llm))

### Fixed

In [211]:
X, y = pd.concat([X_carla_disc_fixed, X_uah_disc_carla], axis=0, ignore_index=True), np.concat([y_carla_disc_fixed, y_uah_disc], axis=0)
X.reset_index(drop=True, inplace=True)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [212]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


In [217]:
svc = SVC(kernel='rbf')
svc.fit(X_train, y_train)
y_pred = svc.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


### LLM

In [213]:
X, y = pd.concat([X_carla_disc_llm, X_uah_disc_carla], axis=0, ignore_index=True), np.concat([y_carla_disc_llm, y_uah_disc], axis=0)
X.reset_index(drop=True, inplace=True)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [214]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


In [218]:
svc = SVC(kernel='rbf')
svc.fit(X_train, y_train)
y_pred = svc.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


# Predictive Scores

Here we train on different combinations of real and synthetic data and test on real data only in order to verify the quality of the data for real-world applications.

## Real Only

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH driveset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="UAH", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
columns_to_keep = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'angle', 'speed', 'label']

X_train, y_train = preprocessor.sliding_windows(uah_training[columns_to_keep], window_size=window_size, step_size=step_size)
X_val, y_val = preprocessor.sliding_windows(uah_validation[columns_to_keep], window_size=window_size, step_size=step_size)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('UAH')
data_name = 'UAH'

In [11]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 20, 'n_estimators': 200, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/b4545e6d62fb46ee9f6327196b0e46fd

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/680415510059672824

Params: {'max_depth': 50, 'n_estimators': 20, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/6b7d4a795d7b4e998301d39109dd01bc

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/680415510059672824   

Params: {'max_depth': 10, 'n_estimators': 200, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/cb94b8efb8bb4e7c

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/3547e21a18804c97a66c4ecb8742fb8a

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/680415510059672824  

Params: {'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}                                
 60%|██████    | 3/5 [02:52<02:10, 65.14s/trial, best loss: -0.750378634529194]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/3f6d065d3a084c8685ebc7d99dfeca1d

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/680415510059672824  

Params: {'C': 0.01, 'gamma': 1, 'kernel': 'rbf'}                               
 80%|████████  | 4/5 [04:39<01:21, 81.39s/trial, best loss: -0.750378634529194]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/1627897df74d4bfdbad38ca7485473af

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/680415510059672824  

100%|██████████| 5/5 [05:56<00:00, 71.30s/trial, best loss: -0.750378634529194]
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/71e1a6a806be4bf3877addaf7602f185
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/680415510059672824
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 0.1}                                   
🏃 View run LinearSVC_{'C': 0.1} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/961149dd405b4f318d2214cc3fcf69cb

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/680415510059672824

Params: {'C': 0.01}                                                             
🏃 View run LinearSVC_{'C': 0.01} at: http://127.0.0.1:8080/#/experiments/68041551005967282

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run XGBClassifier_{'learning_rate': 0.001, 'max_depth': 10, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/b78074ff33a24aa6b3cc0cbcee31bd8d

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/680415510059672824

Params: {'learning_rate': 0.4, 'max_depth': None, 'n_estimators': 700, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.4, 'max_depth': None, 'n_estimators': 700, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/d4282d0f88fc427e937b8b21c6f41ea2

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/680415510059672824   

Params: {'learning_rate': 0.5, 'max_depth': None, 'n_estimators': 1000, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.5, 'max_depth': None, 'n_estimators': 1000, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/9f292825a3fc4dfcbee22a3be6dd4269

🧪 View experiment at: http://127.0.0.1:

0.7218176596094258

## SUMO Only

In [12]:
# SUMO
X_sumo_fixed, y_sumo_fixed = preprocessor.sliding_windows(sumo_fixed, window_size=window_size, step_size=step_size)
X_sumo_llm, y_sumo_llm = preprocessor.sliding_windows(sumo_llm, window_size=window_size, step_size=step_size)

In [35]:
# Getting the validation for SUMO variables
print("Columns to keep for validation:", sumo_columns_to_keep)

X_val_base, y_val = preprocessor.sliding_windows(uah_validation[sumo_columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc', 'angle', 'speed', 'gyro_z', 'label']


### Fixed

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the SUMO Fixed dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="SUMO_Fixed", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_sumo_fixed, y_sumo_fixed

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('SUMO_Fixed')
data_name = 'SUMO_Fixed'

In [16]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 20, 'n_estimators': 200, 'random_state': 42}
  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/319021118004357339/runs/8e66cf6b774a42fabba32cdf451ca385

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/319021118004357339

Params: {'max_depth': None, 'n_estimators': 100, 'random_state': 42}             
 20%|██        | 1/5 [00:27<01:48, 27.09s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/319021118004357339/runs/a286d0e07667496a83c8b98124ac2e23

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/319021118004357339    

Params: {'max_depth': 20, 'n_estimators': 20, 'random_state': 42}                
🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/319021118004357339/runs/52e6b182aa3949d6882a2b43a7186909

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/319021118004357339    

Params: {'max_depth': 5, 'n_estimators': 20, 'random_state': 42}                 
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/319021118004357339/runs/66b02c49efce4fbeab97da377abec8ee

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/319021118004357339   

Params: {'max_depth

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/319021118004357339/runs/57b3d9b4beff4d1897bd05bf56e4117b

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/319021118004357339   

100%|██████████| 5/5 [01:07<00:00, 13.49s/trial, best loss: -0.3334085595387637]
🏃 View run rf-sweep at: http://127.0.0.1:8080/#/experiments/319021118004357339/runs/43334ed9d43b466ea9b3a6a8d3b131a5
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/319021118004357339
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 0.01, 'gamma': 0.001, 'kernel': 'rbf'} 
  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/319021118004357339/runs/36e4a9df20c24cd9906af9b1f22398be

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/319021118004357339

Params: {'C': 0.5, 'gamma': 1, 'kernel': 'rbf'}                                  
 20%|██        | 1/5 [01:14<04:58, 74.53s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.5, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/319021118004357339/runs/fe0554965cdd4da481005ebd750f8226

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/319021118004357339    

Params: {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}                                 
 40%|████      | 2/5 [02:24<03:35, 71.97s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 1, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/319021118004357339/runs/92d9fa5887e7414d96c135ddf235917a

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/319021118004357339    

Params: {'C': 1, 'gamma': 10, 'kernel': 'rbf'}                                   
 60%|██████    | 3/5 [03:23<02:11, 65.91s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 1, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/319021118004357339/runs/c3a723a8dfe2439699f79bd7477637d2

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/319021118004357339    

Params: {'C': 0.5, 'gamma': 0.01, 'kernel': 'rbf'}                                
 80%|████████  | 4/5 [06:19<01:49, 109.46s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.5, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/319021118004357339/runs/70e80a21dacc4d89b0e55594d9e8c773

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/319021118004357339     

100%|██████████| 5/5 [07:17<00:00, 87.51s/trial, best loss: -0.33310749162724596] 
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/experiments/319021118004357339/runs/87b6ac0425544845b29b3a001647d7b8
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/319021118004357339
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 0.01}                                  
🏃 View run LinearSVC_{'C': 0.01} at: http://127.0.0.1:8080/#/experiments/319021118004357339/runs/97867ad14d8f45eb86c710a26dc947e5

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/319021118004357339

Params: {'C': 0.0001}                                                            
🏃 View run LinearSVC_{'C': 0.0001} at: http://127.0.0.1:8080/#/experiments/31902

0.33730962743803933

### LLM

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the SUMO LLM dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="SUMO_LLM", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_sumo_llm, y_sumo_llm

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

mlflow.set_experiment('SUMO_LLM')
data_name = 'SUMO_LLM'

In [19]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': None, 'n_estimators': 200, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/945333525365379287/runs/443228b956a147779a5df4cf5dd33e9e

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/945333525365379287

Params: {'max_depth': None, 'n_estimators': 200, 'random_state': 42}            
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/945333525365379287/runs/8187a5c1de6742cc83eb1a9ae59b591e

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/945333525365379287   

Params: {'max_depth': 20, 'n_estimators': 20, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/945333525365379287/runs/1f0fac16f5

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 1, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/945333525365379287/runs/255616c0636b44a5883381af59b29860

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/945333525365379287

Params: {'C': 0.05, 'gamma': 1, 'kernel': 'rbf'}                                  
 20%|██        | 1/5 [02:59<11:57, 179.41s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/945333525365379287/runs/d491031fad2844df9139a2c869d26abb

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/945333525365379287     

Params: {'C': 0.5, 'gamma': 0.001, 'kernel': 'rbf'}                               
🏃 View run SVC_{'C': 0.5, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/945333525365379287/runs/7ad60986d6b84a96ae8821a6e4e00eef

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/945333525365379287     

Params: {'C': 0.01, 'gamma': 10, 'kernel': 'rbf'}                                 
 60%|██████    | 3/5 [06:13<03:47, 113.72s/trial, best loss: -0.33641148675357896]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/945333525365379287/runs/1618621b98474504b12406a651b0a05b

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/945333525365379287     

Params: {'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'}                               
 80%|████████  | 4/5 [07:52<01:48, 108.09s/trial, best loss: -0.33641148675357896]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/945333525365379287/runs/bb51dc4f915348f381f1fe2aa98ef422

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/945333525365379287     

100%|██████████| 5/5 [09:18<00:00, 111.78s/trial, best loss: -0.33641148675357896]
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/experiments/945333525365379287/runs/f3ae6543bd6140d19a6b95513c05d58b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/945333525365379287
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 1}                                     
🏃 View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/945333525365379287/runs/d8bd2b2b37334864b29f832ba5c8ea17

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/945333525365379287

Params: {'C': 1}                                                                
🏃 View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/9453335253653

0.5748710014522482

## Carla Only

In [32]:
X_carla_fixed, y_carla_fixed = preprocessor.sliding_windows(carla_fixed, window_size=window_size, step_size=step_size)
X_carla_llm, y_carla_llm = preprocessor.sliding_windows(carla_llm, window_size=window_size, step_size=step_size)

In [36]:
# Getting the validation for SUMO variables
print("Columns to keep for validation:", carla_columns_to_keep)
X_val_base, y_val = preprocessor.sliding_windows(uah_validation[carla_columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'angle', 'label', 'acc']


### Fixed

In [37]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the CARLA Fixed dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="CARLA_Fixed", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

Experiment already exists: RESOURCE_ALREADY_EXISTS: Experiment 'CARLA_Fixed' already exists.


In [None]:
X_train, y_train = X_carla_fixed, y_carla_fixed

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

mlflow.set_experiment('CARLA_Fixed')
data_name = 'CARLA_Fixed'

In [24]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 10, 'n_estimators': 20, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/4c7a3ef88d564a37bc32a0ec6c7d9ba2

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225

Params: {'max_depth': 10, 'n_estimators': 200, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/c2dc35308fa342bf922068f3e51682f8

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225   

Params: {'max_depth': None, 'n_estimators': 200, 'random_state': 42}            
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/265818ea574e433

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/b5fc43555b77453fbcb123bec110377d

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225

Params: {'C': 1, 'gamma': 10, 'kernel': 'rbf'}                                   
 20%|██        | 1/5 [00:19<01:19, 19.97s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 1, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/b073b5f22f044eacb182960492692aaa

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225    

Params: {'C': 0.05, 'gamma': 0.1, 'kernel': 'rbf'}                               
 40%|████      | 2/5 [01:23<02:16, 45.57s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/9d7fc4a5f341406aa2548ba5fb170352

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225    

Params: {'C': 0.01, 'gamma': 0.01, 'kernel': 'rbf'}                              
 60%|██████    | 3/5 [01:43<01:08, 34.01s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/78a0610cc5be4d1f89a8c303c16616ed

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225    

Params: {'C': 0.01, 'gamma': 0.001, 'kernel': 'rbf'}                             
 80%|████████  | 4/5 [02:11<00:31, 31.73s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/1a3a4d9451a044168ef302a467e1fef2

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225    

100%|██████████| 5/5 [02:39<00:00, 31.97s/trial, best loss: -0.33310749162724596]
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/e75a091c789b4f93bff4f46c66f9e92d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 0.01}                                  
  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 0.01} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/c1c13aac142a4e59bf01ab2b138de318

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225

Params: {'C': 0.0001}                                                            
 20%|██        | 1/5 [00:01<00:07,  1.89s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 0.0001} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/94e0c28fae0e448daf5eab8a259be4ce

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225    

Params: {'C': 1}                                                                 
 40%|████      | 2/5 [00:03<00:05,  1.70s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/99911b152a5a4d509524a7361b11ccbc

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225    

Params: {'C': 1}                                                                 
 60%|██████    | 3/5 [00:05<00:03,  1.84s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/e72fc50c685e483b9f6474577af0a5b2

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225    

Params: {'C': 0.0001}                                                            
 80%|████████  | 4/5 [00:07<00:01,  1.93s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 0.0001} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/f4fec242e5ac44a0ac72099eed1bba26

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225    

100%|██████████| 5/5 [00:09<00:00,  1.83s/trial, best loss: -0.33310749162724596]
🏃 View run svc-linear-sweep at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/7e047b45c06a42838ec9c54faff142ae
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225
This will run 5 trials to find optimal hyperparameters...
Params: {'learning_rate': 0.3, 'max_depth': 10, 'n_estimators': 500, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.3, 'max_depth': 10, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/219343887402467225/runs/4542ba0acd2445729d9e03a293171577

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/219343887402467225

Params: {'learning_rate': 0.4, 'max_depth': 10, 'n_estimators': 700, 'ra

0.3934144677894773

### LLM

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the CARLA LLM dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="CARLA_LLM", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_carla_llm, y_carla_llm

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

mlflow.set_experiment('CARLA_LLM')
data_name = 'CARLA_LLM'

In [27]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': None, 'n_estimators': 50, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/2300379d66334c93881158be9f3ea81f

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628

Params: {'max_depth': None, 'n_estimators': 500, 'random_state': 42}             
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/095d9ff58f5f48079cc10a6e737e8a83

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628    

Params: {'max_depth': 10, 'n_estimators': 500, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/251fc936

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 1, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/64a44e1539384e6d820f902973d96d4a

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628

Params: {'C': 0.01, 'gamma': 0.01, 'kernel': 'rbf'}                              
 20%|██        | 1/5 [01:25<05:42, 85.73s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/dd8a74273b5f4533acdbbe006d209675

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628    

Params: {'C': 0.01, 'gamma': 0.001, 'kernel': 'rbf'}                             
 40%|████      | 2/5 [01:57<02:41, 53.73s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/fdf8f42881b84f7c87d26e834228f49d

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628    

Params: {'C': 0.5, 'gamma': 10, 'kernel': 'rbf'}                                 
 60%|██████    | 3/5 [02:28<01:26, 43.41s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.5, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/6d106fb0de1b452d91e721dc66e1bf77

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628    

Params: {'C': 0.5, 'gamma': 10, 'kernel': 'rbf'}                                 
 80%|████████  | 4/5 [03:30<00:50, 50.80s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.5, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/e3408b83ff284546a15372743676f572

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628    

100%|██████████| 5/5 [04:32<00:00, 54.48s/trial, best loss: -0.33310749162724596]
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/b726a9b870384cc1860bc13bd56b5b13
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 1}                                     
  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/da8a2ce078214e10b2e69a7200e86a38

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628

Params: {'C': 1}                                                                 
 20%|██        | 1/5 [00:02<00:08,  2.08s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/488d8bfe3af5437aa8511158c4d4da1d

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628    

Params: {'C': 0.0001}                                                            
 40%|████      | 2/5 [00:04<00:06,  2.10s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 0.0001} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/5bc1cc36445747e6a1db5c435b123d67

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628    

Params: {'C': 0.001}                                                             
 60%|██████    | 3/5 [00:05<00:03,  1.86s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 0.001} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/182a8deaaf7341e08c8426f459660e48

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628    

Params: {'C': 0.1}                                                               
 80%|████████  | 4/5 [00:07<00:01,  1.78s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 0.1} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/aa25e87e2cea499d81663dc59fe736b6

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628    

100%|██████████| 5/5 [00:09<00:00,  1.91s/trial, best loss: -0.33310749162724596]
🏃 View run svc-linear-sweep at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/1fc740d0a526463192b33156dfb013c4
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628
This will run 5 trials to find optimal hyperparameters...
Params: {'learning_rate': 0.5, 'max_depth': 5, 'n_estimators': 500, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.5, 'max_depth': 5, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/a7ed20d649c8410d930475d2d9557bb6

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628

Params: {'learning_rate': 0.4, 'max_depth': 5, 'n_estimators': 700, 'random_s

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run XGBClassifier_{'learning_rate': 0.01, 'max_depth': 5, 'n_estimators': 700, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/8ec42b08cdb44a51b7b693504e4d6ecd

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628    

100%|██████████| 5/5 [00:37<00:00,  7.55s/trial, best loss: -0.40131969784645705]
🏃 View run xgb-sweep at: http://127.0.0.1:8080/#/experiments/339489166525188628/runs/15ee4321853346d89320159e72d27843
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/339489166525188628


0.40131969784645705

## Real + SUMO

In [None]:
sumo_uah_fixed_20 = preprocessor.fill_synthetic_data(sumo_uah_fixed, 0.2)
sumo_uah_llm_20 = preprocessor.fill_synthetic_data(sumo_uah_llm, 0.2)

sumo_uah_fixed_100 = preprocessor.fill_synthetic_data(sumo_uah_fixed, 1)
sumo_uah_llm_100 = preprocessor.fill_synthetic_data(sumo_uah_llm, 1)

sumo_uah_fixed_200 = preprocessor.fill_synthetic_data(sumo_uah_fixed, 2)
sumo_uah_llm_200 = preprocessor.fill_synthetic_data(sumo_uah_llm, 2)

In [None]:
X_sumo_uah_fixed_20, y_sumo_uah_fixed_20 = preprocessor.sliding_windows(sumo_uah_fixed_20, window_size=window_size, step_size=step_size)
X_sumo_uah_llm_20, y_sumo_uah_llm_20 = preprocessor.sliding_windows(sumo_uah_llm_20, window_size=window_size, step_size=step_size)

X_sumo_uah_fixed_100, y_sumo_uah_fixed_100 = preprocessor.sliding_windows(sumo_uah_fixed_100, window_size=window_size, step_size=step_size)
X_sumo_uah_llm_100, y_sumo_uah_llm_100 = preprocessor.sliding_windows(sumo_uah_llm_100, window_size=window_size, step_size=step_size)

X_sumo_uah_fixed_200, y_sumo_uah_fixed_200 = preprocessor.sliding_windows(sumo_uah_fixed_200, window_size=window_size, step_size=step_size)
X_sumo_uah_llm_200, y_sumo_uah_llm_200 = preprocessor.sliding_windows(sumo_uah_llm_200, window_size=window_size, step_size=step_size)

In [None]:
print("Columns to keep for validation:", sumo_columns_to_keep)

X_val_base, y_val = preprocessor.sliding_windows(uah_validation[sumo_columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc', 'angle', 'speed', 'gyro_z', 'label']


## Real + SUMO (20%)

### Fixed

In [28]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + SUMO Fixed (20%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="UAH_SUMO_Fixed_20", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [29]:
X_train, y_train = X_sumo_uah_fixed_20, y_sumo_uah_fixed_20

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('UAH_SUMO_Fixed_20')
data_name = 'UAH_SUMO_Fixed_20'

In [30]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 50, 'n_estimators': 500, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/285553466500292992/runs/10a2e8f448f641f8980afc84a6f04ff7

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/285553466500292992

Params: {'max_depth': 20, 'n_estimators': 20, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/285553466500292992/runs/41ec85b27916402ca5fd25106ba93ad4

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/285553466500292992   

Params: {'max_depth': None, 'n_estimators': 100, 'random_state': 42}            
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/285553466500292992/runs/cc71417b3ac744

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/285553466500292992/runs/e1291df099da4c3e878f4ad93edba17f

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/285553466500292992

Params: {'C': 0.1, 'gamma': 0.01, 'kernel': 'rbf'}                              
🏃 View run SVC_{'C': 0.1, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/285553466500292992/runs/a6de74e6d2aa420a8574cc016a0f9043

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/285553466500292992   

Params: {'C': 0.05, 'gamma': 0.1, 'kernel': 'rbf'}                              
🏃 View run SVC_{'C': 0.05, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/285553466500292992/runs/3f5760aab20c4270bf2ead3ee443c553

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/285553466500292992   

Params: {'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'}                               
🏃 View run SVC_{'C': 0.5, 'gamma': 0.1, 'kernel': '

0.6737632970955671

### LLM

In [31]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + SUMO LLM (20%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="UAH_SUMO_LLM_20", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [32]:
X_train, y_train = X_sumo_uah_llm_20, y_sumo_uah_llm_20

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('UAH_SUMO_LLM_20')
data_name = 'UAH_SUMO_LLM_20'

In [33]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': None, 'n_estimators': 500, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/824130069139841936/runs/a274a4ca54c64eb7ba4716ac59f62043

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/824130069139841936

Params: {'max_depth': 10, 'n_estimators': 500, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/824130069139841936/runs/6d4ecf9339cf45fcbc54088702660984

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/824130069139841936   

Params: {'max_depth': 20, 'n_estimators': 500, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/824130069139841936/runs/af5461e97a5

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.1, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/824130069139841936/runs/e08d9192a48c433fa006fb64e25f501c

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/824130069139841936

Params: {'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}                               
🏃 View run SVC_{'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/824130069139841936/runs/ac11070a3fb6467e9e2d085e898b3e38

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/824130069139841936   

Params: {'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'}                               
🏃 View run SVC_{'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/824130069139841936/runs/633af98a66e94be1a6dff56493c96305

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/824130069139841936   

Params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}                                 
🏃 View run SVC_{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'

0.6672160208768976

## Real + SUMO (100%)

### Fixed

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + SUMO Fixed (100%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="UAH_SUMO_Fixed_100", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_sumo_uah_fixed_100, y_sumo_uah_fixed_100

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('UAH_SUMO_Fixed_100')
data_name = 'UAH_SUMO_Fixed_100'

In [36]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/292249603410866252/runs/797d0c8540904d77aafe85b3ce40bc17

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/292249603410866252

Params: {'max_depth': 5, 'n_estimators': 100, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/292249603410866252/runs/43944aa76ae44f6eb02012c0f03f6af0

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/292249603410866252   

Params: {'max_depth': None, 'n_estimators': 20, 'random_state': 42}             
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/292249603410866252/runs/f6f8571464764cfd881

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.5, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/292249603410866252/runs/1ca2947dee7a4c18bf66a211032a0fa3

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/292249603410866252

Params: {'C': 0.5, 'gamma': 10, 'kernel': 'rbf'}                                
 20%|██        | 1/5 [01:21<05:27, 81.78s/trial, best loss: -0.3335592240143389]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.5, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/292249603410866252/runs/659dd5ff87374ae58dbb30a7d239b781

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/292249603410866252   

Params: {'C': 0.01, 'gamma': 0.01, 'kernel': 'rbf'}                             
🏃 View run SVC_{'C': 0.01, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/292249603410866252/runs/0bef7541a3ab4a7eb3a28bd93e4077bc

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/292249603410866252   

Params: {'C': 0.5, 'gamma': 1, 'kernel': 'rbf'}                                 
🏃 View run SVC_{'C': 0.5, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/292249603410866252/runs/c5456bd9c32b491cb846fc1d13acfce4

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/292249603410866252   

Params: {'C': 0.01, 'gamma': 0.1, 'kernel': 'rbf'}                              
🏃 View run SVC_{'C': 0.01, 'gamma': 0.1, 'kernel':

0.6949253238759916

### LLM

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + SUMO LLM (100%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="UAH_SUMO_LLM_100", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_sumo_uah_llm_100, y_sumo_uah_llm_100

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('UAH_SUMO_LLM_100')
data_name = 'UAH_SUMO_LLM_100'

In [39]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': None, 'n_estimators': 50, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/366368940860072541/runs/42e0a8de81ca4fecaf42fb279bbc7497

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/366368940860072541

Params: {'max_depth': 10, 'n_estimators': 500, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/366368940860072541/runs/b9d4059c60d94cd592fe85eccb87e229

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/366368940860072541   

Params: {'max_depth': 10, 'n_estimators': 20, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/366368940860072541/runs/6dec295ecaa94f

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/366368940860072541/runs/cfbf2ec8c174432cb99cf0b256cc8ab5

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/366368940860072541   

Params: {'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'}                             
🏃 View run SVC_{'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/366368940860072541/runs/37c1df117bf14c159ac66299b6f7d069

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/366368940860072541   

Params: {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}                                
🏃 View run SVC_{'C': 1, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/366368940860072541/runs/dad87a95d9164d4fb8c2486cb81dad84

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/366368940860072541   

100%|██████████| 5/5 [02:39<00:00, 31.89s/trial, best loss: -0.711088506260725] 
🏃 View run svc-sweep at: http://127.0.0.1:8080/#

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run XGBClassifier_{'learning_rate': 0.001, 'max_depth': 10, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/366368940860072541/runs/3157acc8994f4ade8d976195610e6ed7

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/366368940860072541

Params: {'learning_rate': 0.4, 'max_depth': 5, 'n_estimators': 500, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.4, 'max_depth': 5, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/366368940860072541/runs/dba23f55ff61484e814930bae02c4904

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/366368940860072541   

Params: {'learning_rate': 0.2, 'max_depth': None, 'n_estimators': 1000, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.2, 'max_depth': None, 'n_estimators': 1000, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/366368940860072541/runs/c375ff7477a34aa8997c28e399864ff8

🧪 View experiment at: http://127.0.0.1:8080/#

0.6602741862289948

## Real + SUMO (200%)

### Fixed

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + SUMO Fixed (200%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="UAH_SUMO_Fixed_200", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_sumo_uah_fixed_200, y_sumo_uah_fixed_200

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('UAH_SUMO_Fixed_200')
data_name = 'UAH_SUMO_Fixed_200'

In [42]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 50, 'n_estimators': 20, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/822700870272922603/runs/d6455a68871b4b65aec359d6f442ce94

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/822700870272922603

Params: {'max_depth': 5, 'n_estimators': 500, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/822700870272922603/runs/44dc38fa2c1e43ca9bf39d4701fcc688

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/822700870272922603   

Params: {'max_depth': 10, 'n_estimators': 50, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/822700870272922603/runs/40bf325b2b10401487a

0.695258375859124

### LLM

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + SUMO LLM (200%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="UAH_SUMO_LLM_200", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_sumo_uah_llm_200, y_sumo_uah_llm_200

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('UAH_SUMO_LLM_200')
data_name = 'UAH_SUMO_LLM_200'

In [45]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': None, 'n_estimators': 50, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/886119111104728342/runs/c5ab9a11a3084ccf993218e4ed28799e

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/886119111104728342

Params: {'max_depth': 10, 'n_estimators': 200, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/886119111104728342/runs/b7b6d48c21124d6cbb9543e664590a04

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/886119111104728342   

Params: {'max_depth': 20, 'n_estimators': 200, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/886119111104728342/runs/11cdb887194f4

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/886119111104728342/runs/7c18ff1e7007417d8abf7b562dfb3f65

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/886119111104728342   

Params: {'C': 0.01, 'gamma': 1, 'kernel': 'rbf'}                                
 40%|████      | 2/5 [01:34<02:34, 51.62s/trial, best loss: -0.7118918222609891]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/886119111104728342/runs/5f313fad0c1c4abcad28c5240c8ddc70

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/886119111104728342   

Params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}                                 
🏃 View run SVC_{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/886119111104728342/runs/97ae06cd256e48d0b3b1bcd3840812b3

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/886119111104728342   

Params: {'C': 0.01, 'gamma': 0.1, 'kernel': 'rbf'}                              
🏃 View run SVC_{'C': 0.01, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/886119111104728342/runs/7dfb0c65d7aa4205800f0bafbd2e4611

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/886119111104728342   

100%|██████████| 5/5 [02:38<00:00, 31.72s/trial, best loss: -0.7118918222609891]
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/ex

0.6717993841066797

## Real + CARLA

In [None]:
carla_uah_fixed_20 = preprocessor.fill_synthetic_data(carla_uah_fixed, 0.2)
carla_uah_llm_20 = preprocessor.fill_synthetic_data(carla_uah_llm, 0.2)

carla_uah_fixed_100 = preprocessor.fill_synthetic_data(carla_uah_fixed, 0.5)
carla_uah_llm_100 = preprocessor.fill_synthetic_data(carla_uah_llm, 0.5)

carla_uah_fixed_200 = preprocessor.fill_synthetic_data(carla_uah_fixed, 0.7)
carla_uah_llm_200 = preprocessor.fill_synthetic_data(carla_uah_llm, 0.7)

In [None]:
X_carla_uah_fixed_20, y_carla_uah_fixed_20 = preprocessor.sliding_windows(carla_uah_fixed_20, window_size=window_size, step_size=step_size)
X_carla_uah_llm_20, y_carla_uah_llm_20 = preprocessor.sliding_windows(carla_uah_llm_20, window_size=window_size, step_size=step_size)

X_carla_uah_fixed_100, y_carla_uah_fixed_100 = preprocessor.sliding_windows(carla_uah_fixed_100, window_size=window_size, step_size=step_size)
X_carla_uah_llm_100, y_carla_uah_llm_100 = preprocessor.sliding_windows(carla_uah_llm_100, window_size=window_size, step_size=step_size)

X_carla_uah_fixed_200, y_carla_uah_fixed_200 = preprocessor.sliding_windows(carla_uah_fixed_200, window_size=window_size, step_size=step_size)
X_carla_uah_llm_200, y_carla_uah_llm_200 = preprocessor.sliding_windows(carla_uah_llm_200, window_size=window_size, step_size=step_size)

In [None]:
print("Columns to keep for validation:", carla_columns_to_keep)

X_val_base, y_val = preprocessor.sliding_windows(uah_validation[carla_columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'angle', 'label']


## Real + CARLA (20%)

### Fixed

In [49]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + CARLA Fixed (20%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="UAH_CARLA_Fixed_20", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [50]:
X_train, y_train = X_carla_uah_fixed_20, y_carla_uah_fixed_20

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('UAH_CARLA_Fixed_20')
data_name = 'UAH_CARLA_Fixed_20'

In [51]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 20, 'n_estimators': 20, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/972965940385745321/runs/e55eb96153eb41f2ade00344f8676d6f

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/972965940385745321

Params: {'max_depth': 5, 'n_estimators': 100, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/972965940385745321/runs/62871c65e76842f4a574ea0e8b91e03d

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/972965940385745321   

Params: {'max_depth': None, 'n_estimators': 100, 'random_state': 42}            
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/972965940385745321/runs/e9efdfd904b8454d

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/972965940385745321/runs/5b8787c7802e4e719e92d08d1a04844b

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/972965940385745321

Params: {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}                               
🏃 View run SVC_{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/972965940385745321/runs/6fc7bc46bdab48e6a36be8048d802455

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/972965940385745321   

Params: {'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'}                             
🏃 View run SVC_{'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/972965940385745321/runs/9bb105879fc2410686b4484da99c9629

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/972965940385745321  

Params: {'C': 0.05, 'gamma': 0.01, 'kernel': 'rbf'}                            
🏃 View run SVC_{'C': 0.05, 'gamma': 0.01, 'kernel':

0.615789436457017

### LLM

In [52]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + CARLA LLM (20%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="UAH_CARLA_LLM_20", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

Experiment already exists: RESOURCE_ALREADY_EXISTS: Experiment 'UAH_CARLA_LLM_20' already exists.


In [53]:
X_train, y_train = X_carla_uah_llm_20, y_carla_uah_llm_20

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('UAH_CARLA_LLM_20')
data_name = 'UAH_CARLA_LLM_20'

In [54]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 20, 'n_estimators': 50, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/520029334633823444/runs/ae4db70369b04079a79c6af343b9f49b

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/520029334633823444

Params: {'max_depth': 5, 'n_estimators': 20, 'random_state': 42}                
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/520029334633823444/runs/2ab81e7a8d0148628798407fdc370f6c

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/520029334633823444   

Params: {'max_depth': 5, 'n_estimators': 20, 'random_state': 42}                
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/520029334633823444/runs/7656599aaac343cd904e7

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/520029334633823444/runs/5ec292750bf3469bb64c12d4686a3b4f

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/520029334633823444

Params: {'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'}                                
🏃 View run SVC_{'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/520029334633823444/runs/a230fdcbf4e84f8e9c2c98cce24db964

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/520029334633823444    

Params: {'C': 0.05, 'gamma': 1, 'kernel': 'rbf'}                                 
 40%|████      | 2/5 [02:54<04:09, 83.28s/trial, best loss: -0.61287736879774]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/520029334633823444/runs/0d1e30bf199740baaf0121a105bba3ab

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/520029334633823444 

Params: {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}                              
🏃 View run SVC_{'C': 1, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/520029334633823444/runs/ab9aa9b02a4f44cb9e7d8b10bfc6822f

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/520029334633823444 

Params: {'C': 0.01, 'gamma': 0.001, 'kernel': 'rbf'}                          
🏃 View run SVC_{'C': 0.01, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/520029334633823444/runs/3c73b3c9fd3e43b4a3b1065870c550ae

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/520029334633823444 

100%|██████████| 5/5 [05:55<00:00, 71.13s/trial, best loss: -0.61287736879774]
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/experiments

0.6271407585576513

## Real + CARLA (100%)

### Fixed

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + CARLA Fixed (100%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="UAH_CARLA_Fixed_100", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_carla_uah_fixed_100, y_carla_uah_fixed_100

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('UAH_CARLA_Fixed_100')
data_name = 'UAH_CARLA_Fixed_100'

In [57]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 10, 'n_estimators': 200, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/437010834851071118/runs/ca688acb27b84678a1f51a8637365780

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/437010834851071118

Params: {'max_depth': 5, 'n_estimators': 500, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/437010834851071118/runs/87a2c2684d0f44529c47e377a93fe033

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/437010834851071118   

Params: {'max_depth': 20, 'n_estimators': 100, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/437010834851071118/runs/7e7bb5f668e545c6

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/437010834851071118/runs/c7de33a02c024abd94450b19ea7fc354

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/437010834851071118

Params: {'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}                                 
 20%|██        | 1/5 [01:24<05:37, 84.29s/trial, best loss: -0.3335592240143389]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/437010834851071118/runs/70e278dc0ff4416bb0533d30701a2d13

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/437010834851071118   

Params: {'C': 0.05, 'gamma': 0.1, 'kernel': 'rbf'}                              
 40%|████      | 2/5 [03:14<04:59, 99.74s/trial, best loss: -0.3335592240143389]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/437010834851071118/runs/025c758740bb413aa66ad8d06408dd64

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/437010834851071118   

Params: {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}                                
🏃 View run SVC_{'C': 1, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/437010834851071118/runs/eacb795ddf4f4b56a281c6c725b1d03b

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/437010834851071118   

Params: {'C': 0.1, 'gamma': 10, 'kernel': 'rbf'}                                
 80%|████████  | 4/5 [04:21<00:54, 54.67s/trial, best loss: -0.5530845862715699]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.1, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/437010834851071118/runs/fc7dc18d8e5b4d1e8d6888fe3c90b69b

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/437010834851071118   

100%|██████████| 5/5 [06:26<00:00, 77.37s/trial, best loss: -0.5530845862715699]
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/experiments/437010834851071118/runs/bdadae2edf7d48638e037ed584e8a1d9
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/437010834851071118
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 0.0001}                                
🏃 View run LinearSVC_{'C': 0.0001} at: http://127.0.0.1:8080/#/experiments/437010834851071118/runs/15536401f6b645cfb316a8969a052fb2

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/437010834851071118

Params: {'C': 0.001}                                                             
🏃 View run LinearSVC_{'C': 0.001} at: http://127.0.0.1:8080/#/experiments/4370108348

0.6099322215772953

### LLM

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + CARLA LLM (100%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="UAH_CARLA_LLM_100", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_carla_uah_llm_100, y_carla_uah_llm_100

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('UAH_CARLA_LLM_100')
data_name = 'UAH_CARLA_LLM_100'

In [60]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 50, 'n_estimators': 100, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/174820846745286408/runs/06108dc6801449ee8e8e8fe4fdc34bea

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/174820846745286408

Params: {'max_depth': 20, 'n_estimators': 200, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/174820846745286408/runs/bc922991a0a44e58b4d8435041e3bb87

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/174820846745286408   

Params: {'max_depth': 5, 'n_estimators': 100, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/174820846745286408/runs/f55d1b8f56b9499f

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 1, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/174820846745286408/runs/cc727b600a974b659761b8557ecc7ca8

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/174820846745286408 

Params: {'C': 0.05, 'gamma': 1, 'kernel': 'rbf'}                              
 60%|██████    | 3/5 [03:59<02:57, 88.58s/trial, best loss: -0.61287736879774]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/174820846745286408/runs/0b343c80ebfb4e9e8ef4b72622ddf5b2

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/174820846745286408 

Params: {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}                              
🏃 View run SVC_{'C': 1, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/174820846745286408/runs/fc43ef5285b34b28979e038be45791f7

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/174820846745286408 

100%|██████████| 5/5 [06:26<00:00, 77.22s/trial, best loss: -0.61287736879774]
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/experiments/174820846745286408/runs/ecaf62ed9daa434797031284522e0521
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/174820846745286408
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 0.01}                                  
🏃 View run LinearSVC_{'C': 0.01} at: http://127.0.0.1:8080/#/experime

0.633336868620779

## Real + CARLA (200%)

### Fixed

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + CARLA Fixed (200%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="UAH_CARLA_Fixed_200", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_carla_uah_fixed_200, y_carla_uah_fixed_200

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('UAH_CARLA_Fixed_200')
data_name = 'UAH_CARLA_Fixed_200'

In [63]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 50, 'n_estimators': 500, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/813301024797058801/runs/75f84886f2e84d78b7a24dec59ad4cad

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/813301024797058801

Params: {'max_depth': None, 'n_estimators': 20, 'random_state': 42}            
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/813301024797058801/runs/d441b563138c45f88b2eb6e6e35cdda6

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/813301024797058801  

Params: {'max_depth': None, 'n_estimators': 50, 'random_state': 42}             
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/813301024797058801/runs/6643c7553a294fa

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/813301024797058801/runs/01d1221947cb4c48b825f0d9a071396d

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/813301024797058801   

Params: {'C': 0.5, 'gamma': 0.01, 'kernel': 'rbf'}                              
🏃 View run SVC_{'C': 0.5, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/813301024797058801/runs/0dced9bbfc6b4182bfad8b7c9c8cbd6a

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/813301024797058801   

Params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}                                 
🏃 View run SVC_{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/813301024797058801/runs/93d45bbb4474491f98be27f7f89be029

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/813301024797058801   

100%|██████████| 5/5 [05:16<00:00, 63.37s/trial, best loss: -0.5980641760820076]
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/ex

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run XGBClassifier_{'learning_rate': 0.001, 'max_depth': None, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/813301024797058801/runs/5e255fb83c9c40d48a19745ace363f58

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/813301024797058801   

Params: {'learning_rate': 0.5, 'max_depth': 5, 'n_estimators': 500, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.5, 'max_depth': 5, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/813301024797058801/runs/c9a7e9da82704807aecaafb977937e8b

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/813301024797058801   

100%|██████████| 5/5 [00:20<00:00,  4.04s/trial, best loss: -0.6271407585576513]
🏃 View run xgb-sweep at: http://127.0.0.1:8080/#/experiments/813301024797058801/runs/3f68eaf9ded4439d9bd663e616e0dbbe
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/813301024797058801


0.6271407585576513

### LLM

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + CARLA LLM (200%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="UAH_CARLA_LLM_200", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_carla_uah_llm_200, y_carla_uah_llm_200

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('UAH_CARLA_LLM_200')
data_name = 'UAH_CARLA_LLM_200'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': None, 'n_estimators': 200, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/702296139000575806/runs/24c80a0c3c4d48318bc08d541c00beac

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/702296139000575806

Params: {'max_depth': 50, 'n_estimators': 500, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/702296139000575806/runs/0206531025df4eae81ef85e017854088

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/702296139000575806   

Params: {'max_depth': 5, 'n_estimators': 50, 'random_state': 42}                
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/702296139000575806/runs/d46f071f7f3b4

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/702296139000575806/runs/4bdce57bac2b46bd9a01b9022f082e5c

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/702296139000575806

Params: {'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}                                 
 20%|██        | 1/5 [00:37<02:29, 37.27s/trial, best loss: -0.3335592240143389]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/702296139000575806/runs/33a1bb8717cd43aab074798030f9d07c

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/702296139000575806   

Params: {'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'}                               
🏃 View run SVC_{'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/702296139000575806/runs/75621c188abe43638db17d9dbe540058

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/702296139000575806   

Params: {'C': 0.01, 'gamma': 0.001, 'kernel': 'rbf'}                            
🏃 View run SVC_{'C': 0.01, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/702296139000575806/runs/24ba94cb8dc64bb1b3262536e176c05c

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/702296139000575806 

Params: {'C': 0.05, 'gamma': 0.001, 'kernel': 'rbf'}                          
🏃 View run SVC_{'C': 0.05, 'gamma': 0.001, 'kernel':

0.6201520237843372