In [2]:
import json
import glob
import pandas as pd
import matplotlib.pyplot as plt
import scipy

In [3]:
pd.set_option('display.max_rows', 30)

path = "data/*.json"

data = []

for file_path in glob.glob(path):
    with open(file_path, "r") as f:
        file_path = file_path.replace(".json", "")

        # first int is fps second is substepping bool
        quality = "epic" if "epic" in file_path else "low"
        fps, _ = [int(value)
                  for value in file_path.split("_") if value.isdigit()]

        # print(f"{fps = }, {bool(substepping) = }")

        content = json.load(f)
        labels = content['labels']
        values = map(float, content['values'])
        data.append({

            'fps': fps,
            'quality': quality,
            # **dict(zip(labels, values)),
            'records': content['_checkpoint']['records']
        })


In [4]:
df = pd.DataFrame(data)
df = df.explode('records', ignore_index=True)
record_df = pd.json_normalize(df['records'])

# concatonate records data
df = pd.concat([df, record_df],  axis=1)

# drop original column
df = df.drop('records', axis=1)

# drop infractions columns

# remove prefices from column name
df.columns = df.columns.str.removeprefix('meta.')
df.columns = df.columns.str.removeprefix('scores.')
# df = df.rename(columns={"index": "route_index"})
df

Unnamed: 0,fps,quality,index,route_id,status,infractions.collisions_layout,infractions.collisions_pedestrian,infractions.collisions_vehicle,infractions.outside_route_lanes,infractions.red_light,infractions.route_dev,infractions.route_timeout,infractions.stop_infraction,infractions.vehicle_blocked,duration_game,duration_system,route_length,score_composed,score_penalty,score_route
0,10,epic,0,RouteScenario_16,Failed - Agent timed out,[],[],[],[],[],[],[Route timeout.],[],[],917.100014,2350.653948,1071.028134,80.003722,1.00,80.003722
1,10,epic,1,RouteScenario_16,Failed - Agent timed out,[],[Agent collided against object with type=walke...,[],[],[],[],[Route timeout.],[],[],917.100014,2354.508524,1071.028134,23.458103,0.50,46.916205
2,10,epic,2,RouteScenario_16,Failed - Agent timed out,[],[],[Agent collided against object with type=vehic...,[],[],[],[Route timeout.],[],[],917.100014,2413.147233,1071.028134,24.242325,0.60,40.403875
3,10,epic,3,RouteScenario_17,Completed,[],[],[],[],[],[],[],[],[],415.400006,802.025016,862.430089,100.000000,1.00,100.000000
4,10,epic,4,RouteScenario_17,Failed - Agent got blocked,[],[],[Agent collided against object with type=vehic...,[],[],[],[],[],"[Agent got blocked at (x=62.376, y=188.075, z=...",667.700010,1617.573606,862.430089,34.367447,0.36,95.465132
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
163,10,low,25,RouteScenario_24,Failed - Agent got blocked,[],[Agent collided against object with type=walke...,[Agent collided against object with type=vehic...,[],[],[],[],[],"[Agent got blocked at (x=71.845, y=-88.115, z=...",1284.800019,1808.531137,2101.202735,7.507748,0.18,41.709712
164,10,low,26,RouteScenario_24,Failed - Agent got blocked,[],[],[Agent collided against object with type=vehic...,[],[],[],[],[],"[Agent got blocked at (x=77.617, y=-85.957, z=...",1056.400016,1503.301611,2101.202735,14.915011,0.36,41.430586
165,10,low,27,RouteScenario_25,Failed - Agent got blocked,[],[],[Agent collided against object with type=vehic...,[],[],[],[],[],"[Agent got blocked at (x=-127.208, y=124.152, ...",1286.600019,1834.791415,1554.578340,22.117736,0.36,61.438155
166,10,low,28,RouteScenario_25,Failed - Agent timed out,[],[],[Agent collided against object with type=vehic...,[],"[Agent ran a red light 9290 at (x=40.25, y=78....",[],[Route timeout.],[],[],1321.100020,2631.076403,1554.578340,30.376486,0.42,72.324966


Question: For the same scenario, does the route more likely to complete in a lower quality/fps?

In [13]:
df1 = df.groupby(['route_id', 'quality', 'fps']).agg({'score_route':['max', 'min']})
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,score_route,score_route
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,max,min
route_id,quality,fps,Unnamed: 3_level_2,Unnamed: 4_level_2
RouteScenario_16,epic,10,80.003722,40.403875
RouteScenario_16,epic,20,84.636040,55.264535
RouteScenario_16,epic,30,5.545614,5.469284
RouteScenario_16,low,10,100.000000,100.000000
RouteScenario_16,low,20,100.000000,93.608639
...,...,...,...,...
RouteScenario_24,low,20,41.663192,40.639728
RouteScenario_25,epic,10,99.331706,88.457481
RouteScenario_25,epic,20,100.000000,72.750244
RouteScenario_25,low,10,100.000000,61.438155


The above table shows that higher fidelity data generally has lower score. Does it mean infractions are more likely to happen in higher fidelity scenarios? Moreover, does it means using low fidelity data will miss many infractions that suppose to happen?