This notebook is used to train the models using `MLFLow`, if you want to see the results for the best models, refer to the `4_evaluation` notebook.

In [10]:
import os

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import StandardScaler

os.sys.path.append(os.path.abspath('../src'))
from data import preprocessor

mapname = 'Town01'

In [3]:
def create_and_train_model(model, args):
    """
    Function to create and train a model with given parameters.

    Args:
        model: The model to be trained.
        args: Arguments for training the model.

    Returns:
        result: The result of the training process.
    """
    result = {}
    # Train model with current hyperparameters
    print(f"Params: {args}")
    md = model(**args)
    md.fit(X_train, y_train)
    # Predict on the validation set
    y_pred = md.predict(X_val)
    # Log training results
    result["f1_score"] = f1_score(y_val, y_pred, average='weighted')
    result["accuracy"] = accuracy_score(y_val, y_pred)
    result["recall"] = recall_score(y_val, y_pred, average='weighted')
    result["precision"] = precision_score(y_val, y_pred, average='weighted')
    result["model"] = md

    return result

## Preparing the Data

In [4]:
data_path = '../data'
uah_training = pd.read_csv(f'{data_path}/base/training_set_uah.csv')
uah_validation = pd.read_csv(f'{data_path}/base/validation_set_uah.csv')

carla_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/carla/carla_fixed.csv').drop(columns=['origin'])
carla_llm = pd.read_csv(f'{data_path}/merged/{mapname}/carla/carla_llm.csv').drop(columns=['origin'])

sumo_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/sumo/sumo_fixed.csv').drop(columns=['origin'])
sumo_llm = pd.read_csv(f'{data_path}/merged/{mapname}/sumo/sumo_llm.csv').drop(columns=['origin'])

carla_uah_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/carla_uah/carla_uah_fixed.csv')
carla_uah_llm = pd.read_csv(f'{data_path}/merged/{mapname}/carla_uah/carla_uah_llm.csv')

sumo_uah_fixed = pd.read_csv(f'{data_path}/merged/{mapname}/sumo_uah/sumo_uah_fixed.csv')
sumo_uah_llm = pd.read_csv(f'{data_path}/merged/{mapname}/sumo_uah/sumo_uah_llm.csv')

In [5]:
sumo_columns_to_keep = sumo_fixed.columns.tolist()
carla_columns_to_keep = carla_fixed.columns.tolist()

In [6]:
window_size = 10
step_size = 5
one_hot_keys = {
    'normal': 0,
    'aggressive': 1
}

# Train on Real test on Synthetic

By training a model on real data and testing on synthetic data, we verify synthetic data’s fidelity with the real-world.

In [66]:
TRTS = {}

## SUMO

In [67]:
X_train, y_train = preprocessor.sliding_windows(uah_training[sumo_columns_to_keep], window_size=10, step_size=5)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys)

### Fixed

In [68]:
X_val, y_val = preprocessor.sliding_windows(sumo_fixed, window_size=10, step_size=5)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys)

In [69]:
# Best model: XGBClassifier_{'learning_rate': 0.001, 'max_depth': 10, 'n_estimators': 1000, 'random_state': 42}
results = create_and_train_model(xgb.XGBClassifier, {
    'learning_rate': 0.001,
    'max_depth': 10,
    'n_estimators': 1000,
    'random_state': 42,
})
TRTS['sumo_fixed'] = {
    'model': 'XGBClassifier',
    'f1_score': results['f1_score'],
    'accuracy': results['accuracy'],
    'recall': results['recall'],
    'precision': results['precision'],
}

Params: {'learning_rate': 0.001, 'max_depth': 10, 'n_estimators': 1000, 'random_state': 42}


### LLM

In [70]:
X_val, y_val = preprocessor.sliding_windows(sumo_llm, window_size=10, step_size=5)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys)

In [71]:
# Best model: RandomForestClassifier_{'max_depth': 10, 'n_estimators': 50, 'random_state': 42}
results = create_and_train_model(RandomForestClassifier, {
    'max_depth': 10,
    'n_estimators': 50,
    'random_state': 42,
})
TRTS['sumo_llm'] = {
    'model': 'RFClassifier',
    'f1_score': results['f1_score'],
    'accuracy': results['accuracy'],
    'recall': results['recall'],
    'precision': results['precision'],
}

Params: {'max_depth': 10, 'n_estimators': 50, 'random_state': 42}


## CARLA

In [72]:
X_train, y_train = preprocessor.sliding_windows(uah_training[carla_columns_to_keep], window_size=10, step_size=5)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys)

### Fixed

In [73]:
X_val, y_val = preprocessor.sliding_windows(carla_fixed, window_size=10, step_size=5)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys)

In [74]:
# Best model: XGBClassifier_{'learning_rate': 0.2, 'max_depth': 5, 'n_estimators': 700, 'random_state': 42}
results = create_and_train_model(xgb.XGBClassifier, {
    'learning_rate': 0.2,
    'max_depth': 5,
    'n_estimators': 700,
    'random_state': 42,
})
TRTS['carla_fixed'] = {
    'model': 'XGBClassifier',
    'f1_score': results['f1_score'],
    'accuracy': results['accuracy'],
    'recall': results['recall'],
    'precision': results['precision'],
}

Params: {'learning_rate': 0.2, 'max_depth': 5, 'n_estimators': 700, 'random_state': 42}


### LLM

In [75]:
X_val, y_val = preprocessor.sliding_windows(carla_llm, window_size=10, step_size=5)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys)

In [76]:
# Best model: XGBClassifier_{'learning_rate': 0.3, 'max_depth': 5, 'n_estimators': 500, 'random_state': 42}
results = create_and_train_model(xgb.XGBClassifier, {
    'learning_rate': 0.2,
    'max_depth': 4,
    'n_estimators': 500,
    'random_state': 42,
})
TRTS['carla_llm'] = {
    'model': 'XGBClassifier',
    'f1_score': results['f1_score'],
    'accuracy': results['accuracy'],
    'recall': results['recall'],
    'precision': results['precision'],
}

Params: {'learning_rate': 0.2, 'max_depth': 4, 'n_estimators': 500, 'random_state': 42}


## Results table

In [77]:
TRTS_df = pd.DataFrame.from_dict(TRTS, orient='index').T
TRTS_df

Unnamed: 0,sumo_fixed,sumo_llm,carla_fixed,carla_llm
model,XGBClassifier,RFClassifier,XGBClassifier,XGBClassifier
f1_score,0.472546,0.487043,0.526135,0.50708
accuracy,0.496421,0.52394,0.526416,0.515162
recall,0.496421,0.52394,0.526416,0.515162
precision,0.49959,0.512985,0.525869,0.532568


# Discriminative Score

We seek to understand how well a classifier can separate the real and the synthetic data to determine how indistiguishable they are.

If the classifiers do not get good scores, that means the data is hardly distinguishable.

In [152]:
X_uah_disc_sumo = uah_training[sumo_columns_to_keep].drop(columns=['label'])
X_uah_disc_carla = uah_training[carla_columns_to_keep].drop(columns=['label'])
y_uah_disc = np.zeros(len(X_uah_disc_carla)) # 0 for real data and 1 for synthetic data

## SUMO

In [156]:
X_sumo_disc_fixed, y_sumo_disc_fixed = sumo_fixed.drop(columns=['label']), np.ones(len(sumo_fixed))
X_sumo_disc_llm, y_sumo_disc_llm = sumo_llm.drop(columns=['label']), np.ones(len(sumo_llm))

### Fixed

In [206]:
X, y = pd.concat([X_sumo_disc_fixed, X_uah_disc_sumo], axis=0, ignore_index=True), np.concat([y_sumo_disc_fixed, y_uah_disc], axis=0)
X.reset_index(drop=True, inplace=True)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [207]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


In [215]:
svc = SVC(kernel='rbf')
svc.fit(X_train, y_train)
y_pred = svc.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


### LLM

In [209]:
X, y = pd.concat([X_sumo_disc_llm, X_uah_disc_sumo], axis=0, ignore_index=True), np.concat([y_sumo_disc_llm, y_uah_disc], axis=0)
X.reset_index(drop=True, inplace=True)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [210]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


In [216]:
svc = SVC(kernel='rbf')
svc.fit(X_train, y_train)
y_pred = svc.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


## CARLA

In [208]:
X_carla_disc_fixed, y_carla_disc_fixed = carla_fixed.drop(columns=['label']), np.ones(len(carla_fixed))
X_carla_disc_llm, y_carla_disc_llm = carla_llm.drop(columns=['label']), np.ones(len(carla_llm))

### Fixed

In [211]:
X, y = pd.concat([X_carla_disc_fixed, X_uah_disc_carla], axis=0, ignore_index=True), np.concat([y_carla_disc_fixed, y_uah_disc], axis=0)
X.reset_index(drop=True, inplace=True)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [212]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


In [217]:
svc = SVC(kernel='rbf')
svc.fit(X_train, y_train)
y_pred = svc.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


### LLM

In [213]:
X, y = pd.concat([X_carla_disc_llm, X_uah_disc_carla], axis=0, ignore_index=True), np.concat([y_carla_disc_llm, y_uah_disc], axis=0)
X.reset_index(drop=True, inplace=True)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [214]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


In [218]:
svc = SVC(kernel='rbf')
svc.fit(X_train, y_train)
y_pred = svc.predict(X_val)
print(f"F1 Score: {f1_score(y_val, y_pred, average='weighted')}")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")

F1 Score: 1.0
Accuracy: 1.0


# Predictive Scores

Here we train on different combinations of real and synthetic data and test on real data only in order to verify the quality of the data for real-world applications.

## Real Only

In [None]:
columns_to_keep = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'angle', 'speed', 'label']

X_train, y_train = preprocessor.sliding_windows(uah_training[columns_to_keep], window_size=window_size, step_size=step_size)
X_val, y_val = preprocessor.sliding_windows(uah_validation[columns_to_keep], window_size=window_size, step_size=step_size)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

## SUMO Only

In [12]:
# SUMO
X_sumo_fixed, y_sumo_fixed = preprocessor.sliding_windows(sumo_fixed, window_size=window_size, step_size=step_size)
X_sumo_llm, y_sumo_llm = preprocessor.sliding_windows(sumo_llm, window_size=window_size, step_size=step_size)

In [35]:
# Getting the validation for SUMO variables
print("Columns to keep for validation:", sumo_columns_to_keep)

X_val_base, y_val = preprocessor.sliding_windows(uah_validation[sumo_columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc', 'angle', 'speed', 'gyro_z', 'label']


### Fixed

In [None]:
X_train, y_train = X_sumo_fixed, y_sumo_fixed

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

### LLM

In [None]:
X_train, y_train = X_sumo_llm, y_sumo_llm

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

## Carla Only

In [32]:
X_carla_fixed, y_carla_fixed = preprocessor.sliding_windows(carla_fixed, window_size=window_size, step_size=step_size)
X_carla_llm, y_carla_llm = preprocessor.sliding_windows(carla_llm, window_size=window_size, step_size=step_size)

In [36]:
# Getting the validation for SUMO variables
print("Columns to keep for validation:", carla_columns_to_keep)
X_val_base, y_val = preprocessor.sliding_windows(uah_validation[carla_columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'angle', 'label', 'acc']


### Fixed

In [None]:
X_train, y_train = X_carla_fixed, y_carla_fixed

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

### LLM

In [None]:
X_train, y_train = X_carla_llm, y_carla_llm

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

## Real + SUMO

In [None]:
sumo_uah_fixed_20 = preprocessor.fill_synthetic_data(sumo_uah_fixed, 0.2)
sumo_uah_llm_20 = preprocessor.fill_synthetic_data(sumo_uah_llm, 0.2)

sumo_uah_fixed_100 = preprocessor.fill_synthetic_data(sumo_uah_fixed, 1)
sumo_uah_llm_100 = preprocessor.fill_synthetic_data(sumo_uah_llm, 1)

sumo_uah_fixed_200 = preprocessor.fill_synthetic_data(sumo_uah_fixed, 2)
sumo_uah_llm_200 = preprocessor.fill_synthetic_data(sumo_uah_llm, 2)

In [None]:
X_sumo_uah_fixed_20, y_sumo_uah_fixed_20 = preprocessor.sliding_windows(sumo_uah_fixed_20, window_size=window_size, step_size=step_size)
X_sumo_uah_llm_20, y_sumo_uah_llm_20 = preprocessor.sliding_windows(sumo_uah_llm_20, window_size=window_size, step_size=step_size)

X_sumo_uah_fixed_100, y_sumo_uah_fixed_100 = preprocessor.sliding_windows(sumo_uah_fixed_100, window_size=window_size, step_size=step_size)
X_sumo_uah_llm_100, y_sumo_uah_llm_100 = preprocessor.sliding_windows(sumo_uah_llm_100, window_size=window_size, step_size=step_size)

X_sumo_uah_fixed_200, y_sumo_uah_fixed_200 = preprocessor.sliding_windows(sumo_uah_fixed_200, window_size=window_size, step_size=step_size)
X_sumo_uah_llm_200, y_sumo_uah_llm_200 = preprocessor.sliding_windows(sumo_uah_llm_200, window_size=window_size, step_size=step_size)

In [None]:
print("Columns to keep for validation:", sumo_columns_to_keep)

X_val_base, y_val = preprocessor.sliding_windows(uah_validation[sumo_columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc', 'angle', 'speed', 'gyro_z', 'label']


## Real + SUMO (20%)

### Fixed

In [None]:
X_train, y_train = X_sumo_uah_fixed_20, y_sumo_uah_fixed_20

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

### LLM

In [None]:
X_train, y_train = X_sumo_uah_llm_20, y_sumo_uah_llm_20

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

## Real + SUMO (100%)

### Fixed

In [None]:
X_train, y_train = X_sumo_uah_fixed_100, y_sumo_uah_fixed_100

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

### LLM

In [None]:
X_train, y_train = X_sumo_uah_llm_100, y_sumo_uah_llm_100

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

## Real + SUMO (200%)

### Fixed

In [None]:
X_train, y_train = X_sumo_uah_fixed_200, y_sumo_uah_fixed_200

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

### LLM

In [None]:
X_train, y_train = X_sumo_uah_llm_200, y_sumo_uah_llm_200

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

## Real + CARLA

In [None]:
carla_uah_fixed_20 = preprocessor.fill_synthetic_data(carla_uah_fixed, 0.2)
carla_uah_llm_20 = preprocessor.fill_synthetic_data(carla_uah_llm, 0.2)

carla_uah_fixed_100 = preprocessor.fill_synthetic_data(carla_uah_fixed, 0.5)
carla_uah_llm_100 = preprocessor.fill_synthetic_data(carla_uah_llm, 0.5)

carla_uah_fixed_200 = preprocessor.fill_synthetic_data(carla_uah_fixed, 0.7)
carla_uah_llm_200 = preprocessor.fill_synthetic_data(carla_uah_llm, 0.7)

In [None]:
X_carla_uah_fixed_20, y_carla_uah_fixed_20 = preprocessor.sliding_windows(carla_uah_fixed_20, window_size=window_size, step_size=step_size)
X_carla_uah_llm_20, y_carla_uah_llm_20 = preprocessor.sliding_windows(carla_uah_llm_20, window_size=window_size, step_size=step_size)

X_carla_uah_fixed_100, y_carla_uah_fixed_100 = preprocessor.sliding_windows(carla_uah_fixed_100, window_size=window_size, step_size=step_size)
X_carla_uah_llm_100, y_carla_uah_llm_100 = preprocessor.sliding_windows(carla_uah_llm_100, window_size=window_size, step_size=step_size)

X_carla_uah_fixed_200, y_carla_uah_fixed_200 = preprocessor.sliding_windows(carla_uah_fixed_200, window_size=window_size, step_size=step_size)
X_carla_uah_llm_200, y_carla_uah_llm_200 = preprocessor.sliding_windows(carla_uah_llm_200, window_size=window_size, step_size=step_size)

In [None]:
print("Columns to keep for validation:", carla_columns_to_keep)

X_val_base, y_val = preprocessor.sliding_windows(uah_validation[carla_columns_to_keep], window_size=window_size, step_size=step_size)
y_val = preprocessor.one_hot_encode(y_val, one_hot_keys=one_hot_keys)

Columns to keep for validation: ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'angle', 'label']


## Real + CARLA (20%)

### Fixed

In [None]:
X_train, y_train = X_carla_uah_fixed_20, y_carla_uah_fixed_20

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

### LLM

In [None]:
X_train, y_train = X_carla_uah_llm_20, y_carla_uah_llm_20

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

## Real + CARLA (100%)

### Fixed

In [None]:
X_train, y_train = X_carla_uah_fixed_100, y_carla_uah_fixed_100

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

### LLM

In [None]:
X_train, y_train = X_carla_uah_llm_100, y_carla_uah_llm_100

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

## Real + CARLA (200%)

### Fixed

In [None]:
X_train, y_train = X_carla_uah_fixed_200, y_carla_uah_fixed_200

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL

### LLM

In [None]:
X_train, y_train = X_carla_uah_llm_200, y_carla_uah_llm_200

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val_base)
y_train = preprocessor.one_hot_encode(y_train, one_hot_keys=one_hot_keys)

# TODO: TRAIN MODEL