## Load data


In [46]:
import json
import glob
import pandas as pd
import matplotlib.pyplot as plt
import scipy
from pathlib import Path
import numpy as np
import seaborn as sns

idx = pd.IndexSlice

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [47]:
def process_df(df):
    record_df = pd.json_normalize(df['records'])
    # concatenate records data
    df = pd.concat([df, record_df],  axis=1)

    # drop original column
    df = df.drop('records', axis=1)

    # drop infractions columns

    # remove prefixes from column name
    df.columns = df.columns.str.removeprefix('meta.')
    df.columns = df.columns.str.removeprefix('scores.')
    df['driving_score'] = df['score_composed'] / 100
    df = df.rename(columns={"index": "route_index"})
    df = df.set_index(['rep', 'fps', 'highquality', 'route_index'])
    return df


def make_df(path: str):
    with open(path, "r") as f:

        p = Path(path)

        x = p.stem.split("_")
        data = {}
        data['fps'] = int(x[1])
        data['highquality'] = x[3]

        rep_no = [x[-1] for x in path.split('/') if 'rep' in x][0]
        data['rep'] = int(rep_no)
        content = json.load(f)
        data['records'] = content['_checkpoint']['records']

        df = pd.DataFrame(data)
        return process_df(df)

In [48]:
dfs = []
path = "../phase1/data/ronin/rep*/*/*.json"
for file_path in glob.glob(path):
    # print(file_path)
    dfs.append(make_df(file_path))

df = pd.concat(dfs).sort_index()[['driving_score', 'duration_system']]

## FILTER DATA


### Filter duplicated scenarios

Filter scenarios that are exactly the same


In [49]:
# Routes of theese scenarios are duplicated
excluded_routes = [20, 21, 22, 23]
df = df.drop(excluded_routes, level='route_index', axis=0)

### Filter 16 FPS

Carla crashes at 16 so we need to filter this results out


In [50]:
# filter out 16 fps
df = df.drop(16, level='fps', axis=0)

In [51]:
df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 17920 entries, (0, 10, 'False', 0) to (9, 20, 'True', 131)
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   driving_score    17920 non-null  float64
 1   duration_system  17920 non-null  float64
dtypes: float64(2)
memory usage: 373.6+ KB


## PROCESS DATA


### Add upsteer variable


In [52]:
df['upsteering'] = False
df = df.set_index(['upsteering'], append=True)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,driving_score,duration_system
rep,fps,highquality,route_index,upsteering,Unnamed: 5_level_1,Unnamed: 6_level_1
0,10,False,0,False,0.026306,642.428658
0,10,False,1,False,0.232490,536.358014
0,10,False,2,False,0.113361,489.848481
0,10,False,3,False,0.432466,277.129590
0,10,False,4,False,0.168521,404.950006
...,...,...,...,...,...,...
9,20,True,127,False,1.000000,287.689000
9,20,True,128,False,1.000000,397.439014
9,20,True,129,False,1.000000,234.884112
9,20,True,130,False,0.600000,373.658720


## PROCESS DATA


### Square error

RMSE


In [58]:
oracle_df = df.xs((20, "True"), level=[
                  'fps', 'highquality'], drop_level=False).sort_index()

# oracle driving score for each scenario
oracle_vec = oracle_df.groupby('route_index')['driving_score'].mean()
# oracle_vec

In [54]:
#
df['rmse'] = (df['driving_score'] - oracle_vec).abs()

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,driving_score,duration_system,rmse
rep,fps,highquality,route_index,upsteering,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,10,False,0,False,0.026306,642.428658,6.586943e-01
0,10,False,1,False,0.232490,536.358014,6.830993e-01
0,10,False,2,False,0.113361,489.848481,8.823495e-01
0,10,False,3,False,0.432466,277.129590,5.675335e-01
0,10,False,4,False,0.168521,404.950006,8.267248e-01
...,...,...,...,...,...,...,...
9,20,True,127,False,1.000000,287.689000,0.000000e+00
9,20,True,128,False,1.000000,397.439014,0.000000e+00
9,20,True,129,False,1.000000,234.884112,0.000000e+00
9,20,True,130,False,0.600000,373.658720,1.600000e-02
