In [1]:
import glob
import pandas as pd
import numpy as np
from pathlib import Path
from itertools import product
import json

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
N_REPETITIONS = 10
N_ITERATIONS = 25
FPS_VALUES = [10, 12, 14, 15, 17, 20]

In [3]:
base_path = "data/ronin/rs_v2"
paths = {"20 EPIC": f"{base_path}/*/checkpoints/it*/*20*True.json",
         "18 EPIC": f"{base_path}/*/checkpoints/it*/*18*True.json",
         "17 EPIC": f"{base_path}/*/checkpoints/it*/*17*True.json",
         "15 EPIC": f"{base_path}/*/checkpoints/it*/*15*True.json",
         "14 EPIC": f"{base_path}/*/checkpoints/it*/*14*True.json",
         "12 EPIC": f"{base_path}/*/checkpoints/it*/*12*True.json",
         "10 EPIC": f"{base_path}/*/checkpoints/it*/*10*True.json",

         }

for name, pattern in paths.items():
    n = len(glob.glob(pattern))
    print(
        f"For {name} there are {n}/{N_REPETITIONS*N_ITERATIONS} ready {n//N_ITERATIONS}")

For 20 EPIC there are 6/250 ready 0
For 18 EPIC there are 4/250 ready 0
For 17 EPIC there are 4/250 ready 0
For 15 EPIC there are 2/250 ready 0
For 14 EPIC there are 2/250 ready 0
For 12 EPIC there are 0/250 ready 0
For 10 EPIC there are 0/250 ready 0
For 20 LOW there are 0/250 ready 0
For 10 LOW there are 0/250 ready 0


In [4]:
def process_df(df):
    record_df = pd.json_normalize(df['records'])
    # concatenate records data
    df = pd.concat([df, record_df],  axis=1)

    # drop original column
    df = df.drop(['records', 'index'], axis=1)

    # drop infractions columns

    # remove prefixes from column name
    df.columns = df.columns.str.removeprefix('meta.')
    df.columns = df.columns.str.removeprefix('scores.')
    df['driving_score'] = df['score_composed'] / 100
    # df = df.rename(columns={"index": "route_index"})
    df = df.set_index(['fps', 'highquality', 'rep', 'it', 'route_id'])
    return df

In [5]:

def make_df(path: str):

    # DATA FROM PATH
    data = {}
    data['path'] = path
    stem = Path(path).stem.split("_")
    data['fps'] = int(stem[1])
    data['highquality'] = stem[3]
    match path.split("/"):
        case ["data", "ronin", "rs", rep, "checkpoints", it, *objects]:
            data['rep'] = int(rep)
            data['it'] = int(it[2:])
    # print(data)

    # READ CHECKPOINT
    with open(path, "r") as f:
        content = json.load(f)

        data['records'] = content['_checkpoint']['records']
        if not data['records']:
            return

        df = pd.DataFrame(data)
        return process_df(df)


dfs = []
pattern = f"{base_path}/*/checkpoints/it*/*.json"
for file in glob.glob(pattern):
    dfs.append(make_df(file))

KeyError: "None of ['rep', 'it'] are in the columns"

In [None]:
df = pd.concat(dfs).sort_index()
# df.to_csv("df.csv")
df[['driving_score', 'duration_system']].reset_index()['route_id'].value_counts()

route_id
RouteScenario_650    29
RouteScenario_421    21
RouteScenario_296    19
RouteScenario_173    19
RouteScenario_266    17
RouteScenario_187    17
RouteScenario_245    17
RouteScenario_14     16
RouteScenario_667    15
RouteScenario_335    15
RouteScenario_428    14
RouteScenario_204    14
RouteScenario_237    14
RouteScenario_110    14
RouteScenario_457    14
RouteScenario_700    14
RouteScenario_682    14
RouteScenario_524    14
RouteScenario_123    14
RouteScenario_615    13
RouteScenario_677    13
RouteScenario_154    13
RouteScenario_43     12
RouteScenario_27     12
RouteScenario_133    12
RouteScenario_534    12
RouteScenario_248    12
RouteScenario_725    11
RouteScenario_213    11
RouteScenario_586    11
RouteScenario_718    11
RouteScenario_178    11
RouteScenario_722    11
RouteScenario_741    11
RouteScenario_185    11
RouteScenario_701    10
RouteScenario_150    10
RouteScenario_17     10
RouteScenario_491    10
RouteScenario_310    10
RouteScenario_416    10
RouteSc

In [None]:
# Get an index of minimal driving score for each fidelity value for each repetition
solutions_idx = df.groupby(
    ['fps', 'highquality', 'rep'],)['driving_score'].idxmin()
df = df.loc[solutions_idx][['driving_score', 'duration_system']]
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,driving_score,duration_system
fps,highquality,rep,it,route_id,Unnamed: 5_level_1,Unnamed: 6_level_1
10,False,0,21,RouteScenario_142,0.00071,1621.990574
10,False,1,2,RouteScenario_476,0.04217,858.50662
10,False,2,16,RouteScenario_233,0.019364,623.855442
10,False,3,8,RouteScenario_150,0.002658,926.74597
10,False,4,19,RouteScenario_228,0.009314,926.00132
10,False,5,2,RouteScenario_533,0.035103,583.82271
10,False,6,2,RouteScenario_492,0.00163,3473.157909
10,False,7,23,RouteScenario_140,0.000938,2913.136081
10,False,8,11,RouteScenario_491,0.001538,2950.083571
10,False,9,9,RouteScenario_296,0.028618,1163.251741


In [None]:
# Get an index of minimal driving score for each fidelity value for each repetition
solutions_idx = df.groupby(
    ['fps', 'highquality', 'rep'],)['driving_score'].idxmin()

# extract route id
solutions_idx = solutions_idx.apply(lambda x: x[-1].split("_")[-1])
solutions_idx = solutions_idx.rename("scenario_id")
solutions_idx

fps  highquality  rep
10   False        0      142
                  1      476
                  2      233
                  3      150
                  4      228
                  5      533
                  6      492
                  7      140
                  8      491
                  9      296
     True         0      476
                  1       11
                  2       72
                  3      277
                  4      533
                  5      419
                  6      140
                  7      691
                  8      532
                  9      335
12   True         0      297
                  1      109
                  2      161
                  3      419
                  4      310
                  5      534
                  6      532
                  7      324
                  8      490
                  9      248
14   True         0      161
                  1      492
                  2      310
                  3  

In [None]:
solutions_dict = solutions_idx.groupby(
    ['fps', 'highquality']).apply(set).apply(lambda x: ",".join(x))

solutions_dict

fps  highquality
10   False          142,296,476,140,150,492,228,233,491,533
     True             277,11,419,476,72,532,691,140,335,533
12   True           490,419,532,297,310,161,534,248,324,109
14   True           296,161,310,151,248,342,324,492,241,392
15   True           273,310,248,392,261,324,309,342,241,274
17   True                   490,296,273,589,248,245,392,594
18   True                   599,296,273,248,245,261,392,594
20   False                  490,273,266,248,245,261,509,392
     True                   490,273,231,248,261,509,260,392
Name: scenario_id, dtype: object

^ should be 90, 10 for each fidelity value

In [None]:
solutions_dict.to_csv("rs_solutions.csv")

In [None]:
df = pd.read_csv("rs_solutions.csv").set_index(['fps', 'highquality'])
df = df.xs(True, level='highquality')

for fps, row in df.iterrows():
    print(fps, row['scenario_id'])

10 277,11,419,476,72,532,691,140,335,533
12 490,419,532,297,310,161,534,248,324,109
14 296,161,310,151,248,342,324,492,241,392
15 273,310,248,392,261,324,309,342,241,274
17 490,296,273,589,248,245,392,594
18 599,296,273,248,245,261,392,594
20 490,273,231,248,261,509,260,392
