This notebook is used to organize the experiments. If you just want the best models found, you can refer to the `3_model_training` notebook.

In [1]:
import os

import mlflow
from sklearn.model_selection import train_test_split
from mlflow.models import infer_signature
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score, log_loss
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from functools import partial

os.sys.path.append(os.path.abspath('../src'))
from data import loader
from data import preprocessor

mapname = 'Town01'

In [2]:
def create_and_train_model(model, args):
    """
    Function to create and train a model with given parameters.

    Args:
        model: The model to be trained.
        args: Arguments for training the model.

    Returns:
        result: The result of the training process.
    """
    result = {}
    # Train model with current hyperparameters
    print(f"Params: {args}")
    md = model(**args)
    md.fit(X_train, y_train)
    # Predict on the validation set
    y_pred = md.predict(X_val)
    # Log training results
    result["f1_score"] = f1_score(y_val, y_pred, average='weighted')
    result["accuracy"] = accuracy_score(y_val, y_pred)
    result["recall"] = recall_score(y_val, y_pred, average='weighted')
    result["precision"] = precision_score(y_val, y_pred, average='weighted')
    result["model"] = md

    return result

In [3]:
def objective(params, model_name):
    """
    Objective function for hyperparameter optimization.
    This function will be called by Hyperopt for each trial.
    """
    with mlflow.start_run(nested=True, run_name=f"{model_name}_{params}"):
        # Log hyperparameters being tested
        mlflow.log_params(params)

        # Train model with current hyperparameters
        result = create_and_train_model(
            model,
            args=params
        )

        # Log training results
        mlflow.log_metrics(
            {
                "accuracy": result["accuracy"],
                "recall": result["recall"],
                "precision": result["precision"],
                "f1_score": result["f1_score"],
            }
        )

        # Log the trained model
        mlflow.sklearn.log_model(
            result["model"], name=model_name, signature=signature)

        # Return loss for Hyperopt (it minimizes)
        return {"loss": -result["f1_score"], "status": STATUS_OK, "model": f'{model_name}_{params}'}

In [4]:
def run_experiments(run_name, max_evals, search_space, data_name, features_names, model_name):
    # Create or set experiment

    print(
        f"This will run {max_evals} trials to find optimal hyperparameters...")

    with mlflow.start_run(run_name=run_name):

        mlflow.set_tag("model_type", model_name)

        # Log experiment metadata
        mlflow.log_params(
            {
                "optimization_method": "Tree-structured Parzen Estimator (TPE)",
                "max_evaluations": max_evals,
                "objective_metric": "log_loss",
                "dataset": data_name,
                "features": features_names,
            }
        )

        # Run optimization
        trials = Trials()
        objective_with_name = partial(objective, model_name=model_name)

        best_params = fmin(
            fn=objective_with_name,
            space=search_space,
            algo=tpe.suggest,
            max_evals=max_evals,
            trials=trials,
            verbose=True,
        )

        # Find and log best results
        best_trial = min(trials.results, key=lambda x: x["loss"])
        best_f1_score = -best_trial["loss"]

        # Log optimization results
        mlflow.log_params(best_params)
        mlflow.log_params({"best_model": best_trial["model"]})
        mlflow.log_metrics(
            {
                "best_f1_score": best_f1_score,
                "total_trials": len(trials.trials),
                "optimization_completed": 1,
            }
        )

    return best_f1_score 


In [5]:
client = mlflow.tracking.MlflowClient("http://127.0.0.1:8080")
mlflow.set_tracking_uri("http://127.0.0.1:8080")

## Preparing the Data

In [6]:
data_path = '../data'
uah_training = pd.read_csv(f'{data_path}/base/training_set_uah.csv')
uah_validation = pd.read_csv(f'{data_path}/base/validation_set_uah.csv')

carla_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/carla/carla_fixed.csv').drop(columns=['origin'])
carla_llm = pd.read_csv(f'{data_path}/merged/{mapname}/carla/carla_llm.csv').drop(columns=['origin'])

sumo_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/sumo/sumo_fixed.csv').drop(columns=['origin'])
sumo_llm = pd.read_csv(f'{data_path}/merged/{mapname}/sumo/sumo_llm.csv').drop(columns=['origin'])

carla_uah_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/carla_uah/carla_uah_fixed.csv')
carla_uah_llm = pd.read_csv(f'{data_path}/merged/{mapname}/carla_uah/carla_uah_llm.csv')

sumo_uah_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/sumo_uah/sumo_uah_fixed.csv')
sumo_uah_llm = pd.read_csv(f'{data_path}/merged/{mapname}/sumo_uah/sumo_uah_llm.csv')

Applying sliding windows to UAH, SUMO and CARLA full data

In [7]:
window_size = 10
step_size = 5
one_hot_keys = {
    'normal': 0,
    'aggressive': 1
}

Defining the search space for each model

In [8]:
max_evals = 5

search_space_rf = {
    "n_estimators": hp.choice("n_estimators", [20, 50, 100, 200, 500]),
    "max_depth": hp.choice("max_depth", [None, 5, 10, 20, 50]),
    "random_state": 42,
}
search_space_svc = {
    "C": hp.choice("C", [0.01, 0.05, 0.1, 0.5, 1]),  # Regularization parameter
    "kernel": hp.choice("kernel", ["rbf"]),  # Kernel type
    "gamma": hp.choice("gamma", [0.001, 0.01, 0.1, 1, 10]),  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
}
search_space_svc_linear = {
    "C": hp.choice("C", [10**-4, 10**-3, 10**-2, 0.1, 1])  # Regularization parameter
}
search_space_xgb = {
    "n_estimators": hp.choice("n_estimators", [200, 500, 700, 1000]),
    "max_depth": hp.choice("max_depth", [None, 5, 10]),
    "learning_rate": hp.choice("learning_rate", [0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5]),  # Learning rate
    # "subsample": hp.uniform("subsample", 0.5, 1.0),
    # "colsample_bytree": hp.uniform("colsample_bytree", 0.5, 1.0),
    "random_state": 42,
}

## Real Only

In [9]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH driveset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_UAH", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [10]:
columns_to_keep = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'angle', 'speed', 'label']

X_train, y_train = preprocessor.sliding_windows(uah_training[columns_to_keep], window_size=window_size, step_size=step_size)
X_val, y_val = preprocessor.sliding_windows(uah_validation[columns_to_keep], window_size=window_size, step_size=step_size)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('Driver_Behavior_Models_UAH')
data_name = 'UAH'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 20, 'n_estimators': 200, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/b4545e6d62fb46ee9f6327196b0e46fd

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/680415510059672824

Params: {'max_depth': 50, 'n_estimators': 20, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/6b7d4a795d7b4e998301d39109dd01bc

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/680415510059672824   

Params: {'max_depth': 10, 'n_estimators': 200, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/cb94b8efb8bb4e7c

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/680415510059672824/runs/3547e21a18804c97a66c4ecb8742fb8a

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/680415510059672824  

Params: {'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}                                
 60%|██████    | 3/5 [02:52<02:10, 65.14s/trial, best loss: -0.750378634529194]

## SUMO Only

In [None]:
# SUMO
X_sumo_fixed, y_sumo_fixed = preprocessor.sliding_windows(sumo_fixed, window_size=window_size, step_size=step_size)
X_sumo_llm, y_sumo_llm = preprocessor.sliding_windows(sumo_llm, window_size=window_size, step_size=step_size)

In [None]:
# Getting the validation for SUMO variables
columns_to_keep = sumo_fixed.columns.tolist()
print("Columns to keep for validation:", columns_to_keep)

X_val_base, y_val = preprocessor.sliding_windows(uah_validation[columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc', 'angle', 'speed', 'gyro_z', 'label']


### Fixed

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the SUMO Fixed dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_SUMO_Fixed", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_sumo_fixed, y_sumo_fixed

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('Driver_Behavior_Models_SUMO_Fixed')
data_name = 'SUMO_Fixed'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/d0f23778a31f4c1d803867a4a88bd64a

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589

Params: {'max_depth': 5, 'n_estimators': 100, 'random_state': 42}                
 20%|██        | 1/5 [00:03<00:14,  3.74s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/aff187b36d6744a1bb3bc9f0e9b8921f

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589    

Params: {'max_depth': 5, 'n_estimators': 500, 'random_state': 42}                
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/48f3444a62ee445ea3dae162e280fa09

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589    

Params: {'max_depth': 50, 'n_estimators': 50, 'random_state': 42}                
 60%|██████    | 3/5 [00:33<00:26, 13.48s/trial, best loss: -0.3334085595387637]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/fbf155e119bb451b8c29549f6339dcdd

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589   

Params: {'max_depth': 5, 'n_estimators': 500, 'random_state': 42}               
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 500, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/8877be27fde040539a515b1116b4be97

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589   

100%|██████████| 5/5 [01:05<00:00, 13.16s/trial, best loss: -0.3334085595387637]
🏃 View run rf-sweep at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/40f5f5abf89d4b9da16e76e53ef29f24
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 0.01, 'gamma': 0.1, 'kernel': 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/f968b4ec201a4c64a6da6a240416c9f5

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589

Params: {'C': 0.01, 'gamma': 1, 'kernel': 'rbf'}                                 
 20%|██        | 1/5 [01:06<04:26, 66.51s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/acd04320a4df4d9d9c344233b9d3d9a0

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589    

Params: {'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'}                                
 40%|████      | 2/5 [02:13<03:20, 66.88s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/3fb95802e08e471cb474ee086781df35

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589    

Params: {'C': 0.01, 'gamma': 0.1, 'kernel': 'rbf'}                               
 60%|██████    | 3/5 [03:11<02:05, 62.64s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/c7043023bb734e45b4af112fa9ac5694

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589    

Params: {'C': 0.1, 'gamma': 10, 'kernel': 'rbf'}                                 
 80%|████████  | 4/5 [04:17<01:03, 63.97s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.1, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/320940916b9845718564d31105278b0c

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589    

100%|██████████| 5/5 [05:32<00:00, 66.44s/trial, best loss: -0.33310749162724596]
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/0d4bf3e613144ed4be37bca695429a55
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 0.01}                                  
🏃 View run LinearSVC_{'C': 0.01} at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/2d38b47281cd47d786d0c45fa449d0f1

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589

Params: {'C': 0.1}                                                               
🏃 View run LinearSVC_{'C': 0.1} at: http://127.0.0.1:8080/#/experiments/713908740940

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run XGBClassifier_{'learning_rate': 0.001, 'max_depth': 5, 'n_estimators': 700, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/a8bf9427c97e4e2b88be8e73f3cdd613

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589    

100%|██████████| 5/5 [00:24<00:00,  4.84s/trial, best loss: -0.33701038711508563]
🏃 View run xgb-sweep at: http://127.0.0.1:8080/#/experiments/713908740940849589/runs/865099d4630a45998f63eeadc945dc98
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/713908740940849589


0.33701038711508563

### LLM

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the SUMO LLM dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_SUMO_LLM", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_sumo_llm, y_sumo_llm

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

mlflow.set_experiment('Driver_Behavior_Models_SUMO_LLM')
data_name = 'SUMO_LLM'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 20, 'n_estimators': 50, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/571950295118383582/runs/3c17cb7710394ee681d43105dfe12c7e

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/571950295118383582

Params: {'max_depth': 5, 'n_estimators': 100, 'random_state': 42}                
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/571950295118383582/runs/8878a05b07c4452294112e05db4f2435

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/571950295118383582    

Params: {'max_depth': 10, 'n_estimators': 50, 'random_state': 42}                
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/571950295118383582/runs/a18beca835214080

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 1, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/571950295118383582/runs/c97d74fa86dd4b59bf749a5b5fef6f0b

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/571950295118383582

Params: {'C': 0.1, 'gamma': 0.01, 'kernel': 'rbf'}                                
 20%|██        | 1/5 [02:51<11:26, 171.56s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.1, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/571950295118383582/runs/9a0f4ee25f6241afab3b7aed3a810fc9

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/571950295118383582     

Params: {'C': 0.05, 'gamma': 1, 'kernel': 'rbf'}                                  
 40%|████      | 2/5 [04:18<06:05, 121.82s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/571950295118383582/runs/5319b0bbc2494426b10375edf543da18

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/571950295118383582     

Params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}                                   
 60%|██████    | 3/5 [06:02<03:47, 113.79s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/571950295118383582/runs/d3be78178e864fa487303b04133235ac

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/571950295118383582     

Params: {'C': 0.01, 'gamma': 0.01, 'kernel': 'rbf'}                               
 80%|████████  | 4/5 [07:26<01:41, 101.92s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/571950295118383582/runs/749e527a9598415f9a0437c20df497e3

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/571950295118383582     

100%|██████████| 5/5 [08:50<00:00, 106.09s/trial, best loss: -0.33310749162724596]
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/experiments/571950295118383582/runs/26213aeb8e3f48239264fbc53157580d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/571950295118383582
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 0.001}                                 
🏃 View run LinearSVC_{'C': 0.001} at: http://127.0.0.1:8080/#/experiments/571950295118383582/runs/85907956b0484cfda00777c13a6a0bad

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/571950295118383582

Params: {'C': 0.1}                                                               
🏃 View run LinearSVC_{'C': 0.1} at: http://127.0.0.1:8080/#/experiments/571950

0.5845236878921106

## Carla Only

In [None]:
X_carla_fixed, y_carla_fixed = preprocessor.sliding_windows(carla_fixed, window_size=window_size, step_size=step_size)
X_carla_llm, y_carla_llm = preprocessor.sliding_windows(carla_llm, window_size=window_size, step_size=step_size)

In [None]:
# Getting the validation for SUMO variables
columns_to_keep = carla_fixed.columns.tolist()
print("Columns to keep for validation:", columns_to_keep)
X_val_base, y_val = preprocessor.sliding_windows(uah_validation[columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'angle', 'label']


### Fixed

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the CARLA Fixed dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_CARLA_Fixed", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_carla_fixed, y_carla_fixed

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

mlflow.set_experiment('Driver_Behavior_Models_CARLA_Fixed')
data_name = 'CARLA_Fixed'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 50, 'n_estimators': 200, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/9303df6411c547049006837146ed4425

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199

Params: {'max_depth': None, 'n_estimators': 200, 'random_state': 42}            
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/0c4cf2bfb6ad4bfbad8583053697a05d

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199   

Params: {'max_depth': 10, 'n_estimators': 100, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': 10, 'n_estimators': 100, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/dabad6fcbf454

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/ff1c6c2780a94743afd9b69f2ecbc1f4

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199

Params: {'C': 0.01, 'gamma': 10, 'kernel': 'rbf'}                               
 20%|██        | 1/5 [00:22<01:29, 22.49s/trial, best loss: -0.3335592240143389]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 10, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/f62242cec0954b44a9abbe68588c71cf

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199   

Params: {'C': 0.01, 'gamma': 0.1, 'kernel': 'rbf'}                              
 40%|████      | 2/5 [00:45<01:08, 22.71s/trial, best loss: -0.3335592240143389]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.01, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/20aa272fd4b1427f9d28b807323b3069

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199   

Params: {'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'}                             
 60%|██████    | 3/5 [01:07<00:45, 22.59s/trial, best loss: -0.3335592240143389]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/3af05efccb8c4b2e8299e5d5cba40f20

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199   

Params: {'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}                               
 80%|████████  | 4/5 [01:34<00:24, 24.04s/trial, best loss: -0.3335592240143389]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/e99ca11bbb1c4e3ab11fcbdbb79d25fc

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199   

100%|██████████| 5/5 [01:52<00:00, 22.46s/trial, best loss: -0.3335592240143389]
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/62d1e1dce8b74189bd322a9a49b2c32d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 0.01}                                  
  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 0.01} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/22c6881dfa5c476cbdbdf4dbf7d25d78

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199

Params: {'C': 0.01}                                                              
 20%|██        | 1/5 [00:01<00:07,  1.83s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 0.01} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/6f452fb55a6543d8a366716c3b4306ff

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199    

Params: {'C': 0.01}                                                              
 40%|████      | 2/5 [00:03<00:05,  1.83s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 0.01} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/a35b379a087d4bd6ad1f1bbd0d4fed92

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199    

Params: {'C': 1}                                                                 
 60%|██████    | 3/5 [00:05<00:03,  1.82s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/9ee6a8eb8bfa4fc0ba98b69790aba10d

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199    

Params: {'C': 1}                                                                 
 80%|████████  | 4/5 [00:07<00:01,  1.85s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/49fcd25a4abe4b7e9662614c7ae9ba77

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199    

100%|██████████| 5/5 [00:09<00:00,  1.87s/trial, best loss: -0.33310749162724596]
🏃 View run svc-linear-sweep at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/df1ac57325e44e899d86f4867fe38d1d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199
This will run 5 trials to find optimal hyperparameters...
Params: {'learning_rate': 0.2, 'max_depth': 10, 'n_estimators': 200, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.2, 'max_depth': 10, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/758379252446667199/runs/688ba7c2cf574f1f88fdc30566d5e958

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/758379252446667199

Params: {'learning_rate': 0.5, 'max_depth': None, 'n_estimators': 200, 'rando

0.5120679087827963

### LLM

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the CARLA LLM dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_CARLA_LLM", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_carla_llm, y_carla_llm

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

mlflow.set_experiment('Driver_Behavior_Models_CARLA_LLM')
data_name = 'CARLA_LLM'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 50, 'n_estimators': 20, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 50, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/ea0109e9346d453d9631eb1dd6c7741f

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458

Params: {'max_depth': 20, 'n_estimators': 200, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': 20, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/d7f626dabf7246f2946ace7e3b67eb77

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458   

Params: {'max_depth': None, 'n_estimators': 20, 'random_state': 42}              
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/833719341cf24a0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 0.01, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/73e8535d26fe4ca7bcc3836552cf343d

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458

Params: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}                                  
 20%|██        | 1/5 [00:30<02:00, 30.15s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/df4473769ce24ea3a1c900d8f9010e18

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458    

Params: {'C': 0.05, 'gamma': 0.001, 'kernel': 'rbf'}                             
 40%|████      | 2/5 [01:04<01:37, 32.45s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.05, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/525512b303d14574ab1b82aed73ac8f4

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458    

Params: {'C': 1, 'gamma': 1, 'kernel': 'rbf'}                                    
 60%|██████    | 3/5 [01:34<01:02, 31.38s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 1, 'gamma': 1, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/69c3440cb6a2404aa95508003b9d5119

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458    

Params: {'C': 0.5, 'gamma': 0.001, 'kernel': 'rbf'}                              
 80%|████████  | 4/5 [02:19<00:36, 36.94s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run SVC_{'C': 0.5, 'gamma': 0.001, 'kernel': 'rbf'} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/575b2118bb7347b193ff29a97641e109

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458    

100%|██████████| 5/5 [02:49<00:00, 33.99s/trial, best loss: -0.33310749162724596]
🏃 View run svc-sweep at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/eabe81d2355b4fe6808414b32882fc89
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458
This will run 5 trials to find optimal hyperparameters...
Params: {'C': 1}                                     
  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/7d3fc06306394ea8a778cf9f2e69c370

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458

Params: {'C': 1}                                                                 
 20%|██        | 1/5 [00:01<00:07,  1.97s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/a3f08f5232bc4cf8b4497d9f7c708b63

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458    

Params: {'C': 1}                                                                 
 40%|████      | 2/5 [00:03<00:05,  1.97s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/019d64cb3f59472e81e1d25633bcddc2

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458    

Params: {'C': 1}                                                                 
 60%|██████    | 3/5 [00:05<00:03,  1.98s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 1} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/30c0813a073a4ba9b47bcc5d08b77a32

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458    

Params: {'C': 0.01}                                                              
 80%|████████  | 4/5 [00:07<00:01,  1.98s/trial, best loss: -0.33310749162724596]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run LinearSVC_{'C': 0.01} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/97635a5891a540e987e84082146dd35a

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458    

100%|██████████| 5/5 [00:09<00:00,  1.95s/trial, best loss: -0.33310749162724596]
🏃 View run svc-linear-sweep at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/dfc7d674534f4711b32964a4dee22400
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458
This will run 5 trials to find optimal hyperparameters...
Params: {'learning_rate': 0.001, 'max_depth': 10, 'n_estimators': 1000, 'random_state': 42}
  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



🏃 View run XGBClassifier_{'learning_rate': 0.001, 'max_depth': 10, 'n_estimators': 1000, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/0136cee6959945f4b98c7877d3ed6dc8

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458

Params: {'learning_rate': 0.4, 'max_depth': 5, 'n_estimators': 700, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.4, 'max_depth': 5, 'n_estimators': 700, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/3a5b83f72fbd49659e3476879bc7aefe

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/338383607405652458    

Params: {'learning_rate': 0.5, 'max_depth': None, 'n_estimators': 1000, 'random_state': 42}
🏃 View run XGBClassifier_{'learning_rate': 0.5, 'max_depth': None, 'n_estimators': 1000, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/338383607405652458/runs/2e08245c262f4048a2a38f3b814af3a6

🧪 View experiment at: http://127.0.0.1:8080

0.40131969784645705

## Real + SUMO (20 % synthetic)

In [None]:
sumo_uah_fixed_20 = preprocessor.fill_synthetic_data(sumo_uah_fixed, 0.2)
sumo_uah_llm_20 = preprocessor.fill_synthetic_data(sumo_uah_llm, 0.2)

In [None]:
# SUMO_UAH
X_sumo_uah_fixed_20, y_sumo_uah_fixed_20 = preprocessor.sliding_windows(sumo_uah_fixed_20, window_size=window_size, step_size=step_size)
X_sumo_uah_llm_20, y_sumo_uah_llm_20 = preprocessor.sliding_windows(sumo_uah_llm_20, window_size=window_size, step_size=step_size)

In [None]:
# Getting the validation for SUMO variables
columns_to_keep = sumo_uah_fixed_20.columns.tolist()
print("Columns to keep for validation:", columns_to_keep)

X_val_base, y_val = preprocessor.sliding_windows(uah_validation[columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc', 'angle', 'speed', 'gyro_z', 'label']


### Fixed

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + SUMO Fixed (20%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_UAH_SUMO_Fixed_20", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

Experiment already exists: RESOURCE_ALREADY_EXISTS: Experiment 'Driver_Behavior_Models_UAH_SUMO_Fixed_20' already exists.


In [None]:
X_train, y_train = X_sumo_uah_fixed_20, y_sumo_uah_fixed_20

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('Driver_Behavior_Models_UAH_SUMO_Fixed_20')
data_name = 'UAH_SUMO_Fixed_20'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]

Params: {'max_depth': None, 'n_estimators': 50, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/266617953664885142/runs/b3e2f86150034bbb981f1c407686bbc9

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/266617953664885142

Params: {'max_depth': None, 'n_estimators': 200, 'random_state': 42}            
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 200, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/266617953664885142/runs/bca2a17241d6429fbdffea089ee4fea6

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/266617953664885142   

 20%|██        | 1/5 [00:09<00:39,  9.76s/trial, best loss: -0.6038356386029884]
🏃 View run rf-sweep at: http://127.0.0.1:8080/#/experiments/266617953664885142/runs/89834a4c79b74a9c930a09c96dfc6bab
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/266617953664885142


KeyboardInterrupt: 

### LLM

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + SUMO LLM (20%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_UAH_SUMO_LLM_20", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

Experiment already exists: RESOURCE_ALREADY_EXISTS: Experiment 'Driver_Behavior_Models_UAH_SUMO_LLM_20' already exists.


In [None]:
X_train, y_train = X_sumo_uah_llm_20, y_sumo_uah_llm_20

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('Driver_Behavior_Models_UAH_SUMO_LLM_20')
data_name = 'UAH_SUMO_LLM_20'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

## Real + CARLA (20%)

In [None]:
carla_uah_fixed_20 = preprocessor.fill_synthetic_data(carla_uah_fixed, 0.2)
carla_uah_llm_20 = preprocessor.fill_synthetic_data(carla_uah_llm, 0.2)

In [None]:
# CARLA_UAH
X_carla_uah_fixed_20, y_carla_uah_fixed_20 = preprocessor.sliding_windows(carla_uah_fixed_20, window_size=window_size, step_size=step_size)
X_carla_uah_llm_20, y_carla_uah_llm_20 = preprocessor.sliding_windows(carla_uah_llm_20, window_size=window_size, step_size=step_size)

In [None]:
# Getting the validation for SUMO variables
columns_to_keep = carla_uah_fixed_20.columns.tolist()
print("Columns to keep for validation:", columns_to_keep)

X_val_base, y_val = preprocessor.sliding_windows(uah_validation[columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'angle', 'label']


### Fixed

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + CARLA Fixed (20%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_UAH_CARLA_Fixed_20", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

Experiment already exists: RESOURCE_ALREADY_EXISTS: Experiment 'Driver_Behavior_Models_UAH_CARLA_Fixed_20' already exists.


In [None]:
X_train, y_train = X_carla_uah_fixed_20, y_carla_uah_fixed_20

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('Driver_Behavior_Models_UAH_CARLA_Fixed_20')
data_name = 'UAH_CARLA_Fixed_20'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)

This will run 5 trials to find optimal hyperparameters...
Params: {'max_depth': 5, 'n_estimators': 50, 'random_state': 42}
🏃 View run RandomForestClassifier_{'max_depth': 5, 'n_estimators': 50, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/657265456461126531/runs/63db75c32e064319bd8e43868ce217ad

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/657265456461126531

Params: {'max_depth': None, 'n_estimators': 20, 'random_state': 42}             
🏃 View run RandomForestClassifier_{'max_depth': None, 'n_estimators': 20, 'random_state': 42} at: http://127.0.0.1:8080/#/experiments/657265456461126531/runs/8728cf9246f24feb9f9f5bae852a42e1

🧪 View experiment at: http://127.0.0.1:8080/#/experiments/657265456461126531   

 20%|██        | 1/5 [00:05<00:20,  5.05s/trial, best loss: -0.4344485543775649]
🏃 View run rf-sweep at: http://127.0.0.1:8080/#/experiments/657265456461126531/runs/df969f1cf6584967842f831332d78cbc
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/

KeyboardInterrupt: 

### LLM

In [None]:
# Provide an Experiment description that will appear in the UI
experiment_description = (
    "Experiment to train models on the UAH + CARLA LLM (20%) dataset."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "driver-behavior-prediction",
    "mlflow.note.content": experiment_description,
}

# Create the Experiment, providing a unique name
try:
    driver_behavior_experiment = client.create_experiment(
        name="Driver_Behavior_Models_UAH_CARLA_LLM_20", tags=experiment_tags
    )
except mlflow.exceptions.RestException as e:
    print(f"Experiment already exists: {e}")

In [None]:
X_train, y_train = X_carla_uah_llm_20, y_carla_uah_llm_20

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

signature = infer_signature(X_train, y_train)
mlflow.set_experiment('Driver_Behavior_Models_UAH_CARLA_LLM_20')
data_name = 'UAH_CARLA_LLM_20'

In [None]:
model = RandomForestClassifier
run_experiments('rf-sweep', max_evals=max_evals, search_space=search_space_rf, data_name=data_name, model_name = 'RandomForestClassifier', features_names=columns_to_keep)
model = SVC
run_experiments('svc-sweep', max_evals=max_evals, search_space=search_space_svc, data_name=data_name, model_name = 'SVC', features_names=columns_to_keep)
model = LinearSVC
run_experiments('svc-linear-sweep', max_evals=max_evals, search_space=search_space_svc_linear, data_name=data_name, model_name = 'LinearSVC', features_names=columns_to_keep)
model = xgb.XGBClassifier
run_experiments('xgb-sweep', max_evals=max_evals, search_space=search_space_xgb, data_name=data_name, model_name = 'XGBClassifier', features_names=columns_to_keep)