In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.model_selection import train_test_split, cross_val_score, permutation_test_score

from sklearn import gaussian_process
from sklearn import svm
from sklearn import neighbors
from sklearn import ensemble


SEED = 2137

## LOAD DATA


In [2]:
from util.load_scenarios import load_scenario_df
from util.load_evaluations import load_benchmark_df

eval_df = load_benchmark_df("../data/evaluation/benchmarking/default")
scenario_df = load_scenario_df("../data/definition/routes_devtest_sliced.xml")

# join scenario and evaluation data
df = eval_df.join(scenario_df, on='route_index', how='inner').sort_index()

## PROCESS DATA


In [3]:
# use only numerical columns
df = df.select_dtypes(include=np.number)

# use 10fps only
df = df.loc[10, "True"]

# aggregate repetitions
df = df.groupby('route_index').mean()
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 128 entries, 0 to 131
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   duration_game        128 non-null    float64
 1   duration_system      128 non-null    float64
 2   route_length         128 non-null    float64
 3   score_composed       128 non-null    float64
 4   score_penalty        128 non-null    float64
 5   score_route          128 non-null    float64
 6   driving_score        128 non-null    float64
 7   driving_score_error  128 non-null    float64
 8   n_points             128 non-null    float64
 9   length               128 non-null    float64
 10  dist                 128 non-null    float64
 11  dist_len_ratio       128 non-null    float64
 12  max_angles           128 non-null    float64
 13  avg_angles           128 non-null    float64
 14  n_turns              128 non-null    float64
dtypes: float64(15)
memory usage: 16.0 KB


In [4]:

safe_threshold = 0.01
risky_threshold = 0.25

df.loc[:, 'label'] = np.select(
    [df['driving_score_error'] < safe_threshold,
     df['driving_score_error'].between(safe_threshold, risky_threshold),
     df['driving_score_error'] > risky_threshold,
     ],
    ['safe', 'moderate', 'risky',],
)
# shuffle data rows
df = df.sample(frac=1, random_state=SEED)

## TRAIN MODEL


### Get featurers


In [5]:
features = ['max_angles', 'dist_len_ratio', 'n_turns']
X = df[features].to_numpy()
X.shape

(128, 3)

### Get labels


In [6]:
y = df['label'].to_numpy()
y.shape

(128,)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0)

### Fit


In [8]:
methods = [

    svm.SVC(kernel='linear'),
    svm.SVC(kernel='rbf'),
    tree.DecisionTreeClassifier(),
    gaussian_process.GaussianProcessClassifier(),
    ensemble.RandomForestClassifier(),
    ensemble.ExtraTreesClassifier(),
    neighbors.KNeighborsClassifier(),
]

for clf in methods:
    scores = cross_val_score(clf, X, y)
    print(f"{clf}: \n - accuracy: {scores.mean():.2f} +/- {scores.std():.2f}")

    # score, permutation_scores, pvalue = permutation_test_score(
    #     clf, X, y, random_state=0, n_jobs=16)

    # print(
    #     f"- permutation scores: {permutation_scores.mean():.2f} +/- "
    #     f"{permutation_scores.std():.2f} \n"
    # )

SVC(kernel='linear'): 
 - accuracy: 0.70 +/- 0.07
SVC(): 
 - accuracy: 0.68 +/- 0.06
DecisionTreeClassifier(): 
 - accuracy: 0.49 +/- 0.05
GaussianProcessClassifier(): 
 - accuracy: 0.59 +/- 0.07
RandomForestClassifier(): 
 - accuracy: 0.59 +/- 0.05
ExtraTreesClassifier(): 
 - accuracy: 0.59 +/- 0.04
KNeighborsClassifier(): 
 - accuracy: 0.63 +/- 0.06


- Simple model achieve 'high' accuracy
- P scores are slighly worring (idk if they should)
