In [1]:
from pprint import pprint
import os.path as osp

import numpy as np
import pandas as pd

import baposgmcp.plot as plot_utils

results_dir = "/home/jonathon/baposgmcp_results/LBF10x10-n2-f7-static-v2/"
# input file
results_file_name = "compiled_episode_results.csv"
results_file_path = osp.join(results_dir, results_file_name)
# output file
compiled_results_file_name = "compiled_results.csv"
compiled_results_file_path = osp.join(results_dir, compiled_results_file_name)

In [2]:
# Outcomes parse functions
def parse_win(row):
    return int(row["episode_outcome"] == 'WIN')

def parse_loss(row):
    return int(row["episode_outcome"] == 'LOSS')

def parse_draw(row):
    return int(row["episode_outcome"] == 'DRAW')

def parse_na(row):
    return int(row["episode_outcome"] not in ('WIN', 'LOSS', 'DRAW'))

In [3]:
# Import data
ep_df = pd.read_csv(results_file_path)
ep_df = plot_utils.add_df_coplayer_policy_id(ep_df)

# Assign groups and aggs
group_keys = [
    "policy_id",
    "coplayer_policy_id"
]

# replace num_episodes with actual number of episodes completed
ep_df["num_episodes"] = ep_df.groupby(group_keys)["num_episodes"].transform(len)

# parse episode outcomes into seperate columns
outcome_col_names = ["WIN", "LOSS", "DRAW", "NA"]
for k, fn in zip(outcome_col_names, [parse_win, parse_loss, parse_draw, parse_na]):
    ep_df[k] = ep_df.apply(fn, axis=1)

print("Columns")
print("-------")
for c in ep_df.columns:
    print(f'    "{c}",')

Columns
-------
    "exp_id",
    "agent_id",
    "env_id",
    "exp_seed",
    "num_episodes",
    "time_limit",
    "episode_step_limit",
    "policy_id",
    "discount",
    "c_init",
    "c_base",
    "truncated",
    "action_selection",
    "dirichlet_alpha",
    "root_exploration_fraction",
    "reinvigorator",
    "known_bounds",
    "extra_particles_prop",
    "step_limit",
    "epsilon",
    "policy_prior_map",
    "fixed_policy_id",
    "num_sims",
    "action_dist_distance_0_mean",
    "action_dist_distance_0_std",
    "action_dist_distance_1_mean",
    "action_dist_distance_1_std",
    "action_dist_distance_0_0",
    "action_dist_distance_0_1",
    "action_dist_distance_0_2",
    "action_dist_distance_0_3",
    "action_dist_distance_0_4",
    "action_dist_distance_0_5",
    "action_dist_distance_0_6",
    "action_dist_distance_0_7",
    "action_dist_distance_0_8",
    "action_dist_distance_0_9",
    "action_dist_distance_0_10",
    "action_dist_distance_0_11",
    "action_d

In [12]:
constants = [
    "agent_id",
    "env_id",
    "time_limit",
    "episode_step_limit",
    "discount",
    "c_init",
    "c_base",
    "truncated",
    "action_selection",
    "dirichlet_alpha",
    "root_exploration_fraction",
    'reinvigorator',
    "known_bounds",
    "extra_particles_prop",
    "step_limit",
    "epsilon",
    "meta_policy_dict",
    "num_sims",
    "num_episodes",
    "policy_prior_map",
    "fixed_policy_id"
]
replaced = [
    # replaced by number of episodes completed
    "num_episodes",
    # removed/superseded by above
    "episode_number",
    # parsed into num_outcome_...
    "episode_outcome",
    # removed/superseded by 'episode_outcome'
    "episode_done",
]
# take first value in grouped df
first_keys = [
    "exp_id",
    "exp_seed",
]

# add outcomes to sum keys
sum_keys = outcome_col_names

mean_keys = [
    'search_time',
    'update_time',
    'reinvigoration_time',
    'evaluation_time',
    'policy_calls',
    'inference_time',
    'search_depth',
    'min_value',
    'max_value',
    'episode_return',
    'episode_discounted_return',
    'episode_steps',
    'episode_time'
]

assigned_keys = set(group_keys + constants + replaced + first_keys + sum_keys + mean_keys)
belief_stat_keys = [c for c in ep_df if c not in assigned_keys]
belief_stat_keys

['action_dist_distance_0_mean',
 'action_dist_distance_0_std',
 'action_dist_distance_1_mean',
 'action_dist_distance_1_std',
 'action_dist_distance_0_0',
 'action_dist_distance_0_1',
 'action_dist_distance_0_2',
 'action_dist_distance_0_3',
 'action_dist_distance_0_4',
 'action_dist_distance_0_5',
 'action_dist_distance_0_6',
 'action_dist_distance_0_7',
 'action_dist_distance_0_8',
 'action_dist_distance_0_9',
 'action_dist_distance_0_10',
 'action_dist_distance_0_11',
 'action_dist_distance_0_12',
 'action_dist_distance_0_13',
 'action_dist_distance_0_14',
 'action_dist_distance_0_15',
 'action_dist_distance_0_16',
 'action_dist_distance_0_17',
 'action_dist_distance_0_18',
 'action_dist_distance_0_19',
 'action_dist_distance_0_20',
 'action_dist_distance_0_21',
 'action_dist_distance_0_22',
 'action_dist_distance_0_23',
 'action_dist_distance_0_24',
 'action_dist_distance_0_25',
 'action_dist_distance_0_26',
 'action_dist_distance_0_27',
 'action_dist_distance_0_28',
 'action_dist_

In [14]:
# group by and then aggregate
gb = ep_df.groupby(group_keys)

agg_dict = {}
for k in first_keys:
    agg_dict[k] = pd.NamedAgg(column=k, aggfunc="min")

for k in constants:
    agg_dict[k] = pd.NamedAgg(column=k, aggfunc="first")

for k in sum_keys:
    agg_dict[f"num_{k}"] = pd.NamedAgg(column=k, aggfunc="sum")
    
for k in mean_keys:
    agg_dict[f"{k}_mean"] = pd.NamedAgg(column=k, aggfunc="mean")
    agg_dict[f"{k}_std"] = pd.NamedAgg(column=k, aggfunc="std")
    agg_dict[f"{k}_min"] = pd.NamedAgg(column=k, aggfunc="min")
    agg_dict[f"{k}_max"] = pd.NamedAgg(column=k, aggfunc="max")
    
for k in belief_stat_keys:
    if k.endswith("_mean"):
        agg_dict[f"{k}"] = pd.NamedAgg(column=k, aggfunc="mean")
    elif k.endswith("_std"):
        agg_dict[f"{k}"] = pd.NamedAgg(column=k, aggfunc="std")
    else:
        agg_dict[f"{k}_mean"] = pd.NamedAgg(column=k, aggfunc="mean")
        agg_dict[f"{k}_std"] = pd.NamedAgg(column=k, aggfunc="std")
        # get count of non nan values since this varies for belief stats based on step number
        agg_dict[f"{k}_n"] = pd.NamedAgg(column=k, aggfunc="count")

    
gb_agg = gb.agg(**agg_dict)
gb_agg

Unnamed: 0_level_0,Unnamed: 1_level_0,exp_id,exp_seed,agent_id,env_id,time_limit,episode_step_limit,discount,c_init,c_base,truncated,...,bayes_accuracy_1_46_n,bayes_accuracy_1_47_mean,bayes_accuracy_1_47_std,bayes_accuracy_1_47_n,bayes_accuracy_1_48_mean,bayes_accuracy_1_48_std,bayes_accuracy_1_48_n,bayes_accuracy_1_49_mean,bayes_accuracy_1_49_std,bayes_accuracy_1_49_n
policy_id,coplayer_policy_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
LBF10x10-n2-f7-static-v2/heuristic1-v0,baposgmcp-fixed_piheuristic1-v0_numsims1000_truncatedFalse,96,0,1,LBF10x10-n2-f7-static-v2,43200,50,,,,,...,0,,,0,,,0,,,0
LBF10x10-n2-f7-static-v2/heuristic1-v0,baposgmcp-fixed_piheuristic1-v0_numsims100_truncatedFalse,88,0,1,LBF10x10-n2-f7-static-v2,43200,50,,,,,...,0,,,0,,,0,,,0
LBF10x10-n2-f7-static-v2/heuristic1-v0,baposgmcp-fixed_piheuristic1-v0_numsims10_truncatedFalse,80,0,1,LBF10x10-n2-f7-static-v2,43200,50,,,,,...,0,,,0,,,0,,,0
LBF10x10-n2-f7-static-v2/heuristic1-v0,baposgmcp-fixed_piheuristic1-v0_numsims500_truncatedFalse,92,0,1,LBF10x10-n2-f7-static-v2,43200,50,,,,,...,0,,,0,,,0,,,0
LBF10x10-n2-f7-static-v2/heuristic1-v0,baposgmcp-fixed_piheuristic1-v0_numsims50_truncatedFalse,84,0,1,LBF10x10-n2-f7-static-v2,43200,50,,,,,...,0,,,0,,,0,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ucbmcp_metauniform_numsims500_truncatedFalse,LBF10x10-n2-f7-static-v2/heuristic4-v0,175,0,0,LBF10x10-n2-f7-static-v2,43200,50,0.99,2.0,20000,False,...,110,0.272435,0.401627,108,0.263516,0.400918,105,0.275865,0.406518,100
ucbmcp_metauniform_numsims50_truncatedFalse,LBF10x10-n2-f7-static-v2/heuristic1-v0,164,0,0,LBF10x10-n2-f7-static-v2,43200,50,0.99,2.0,20000,False,...,146,0.271862,0.416827,144,0.261258,0.411937,139,0.269532,0.417519,137
ucbmcp_metauniform_numsims50_truncatedFalse,LBF10x10-n2-f7-static-v2/heuristic2-v0,165,0,0,LBF10x10-n2-f7-static-v2,43200,50,0.99,2.0,20000,False,...,199,0.296669,0.425344,199,0.294621,0.425333,199,0.298716,0.428142,199
ucbmcp_metauniform_numsims50_truncatedFalse,LBF10x10-n2-f7-static-v2/heuristic3-v0,166,0,0,LBF10x10-n2-f7-static-v2,43200,50,0.99,2.0,20000,False,...,145,0.248770,0.417869,143,0.247261,0.416670,142,0.236877,0.409625,139


In [15]:
compiled_df = gb_agg.reset_index()
compiled_df.to_csv(compiled_results_file_path)