In [122]:
import os
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind, mannwhitneyu

osl = os.listdir
ospj = os.path.join

### get relevant runs data given filters of interest

In [123]:
import re

def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] 
    return sorted(l, key=alphanum_key)

In [124]:
"""
runs_dir = "/home/derposoft/ray_results"
runs_filters = [
    "fcscout_baseline200h_1r2b_SEED",
    "gnnscout_200h2.2l_1r2b_SEED",
    "gatscout_200h2.2l_1r2b_SEED"
]
runs_filters = [
    "fcskirmish_5hp_baseline100_50_PAPER_SEED",
    "gcnskirmish_5hp_baseline100_50_3.4_PAPER_SEED",
    "gatLNskirmish_5hp_GLOBALbaseline100_50_3.4_PAPER_SEED",
    "gcnLNskirmish_5hp_baseline100_50_3.4_PAPER_SEED",
]
runs_filters = [
    "fcskirmish_5hp_baseline100_50_100ksteps_SEED",
    "gcnskirmish_5hp_baseline100_50_2.4_PAPER_SEED",
]
runs_dirs = [
    "/home/derposoft/Documents/tb_logs/40k_runs/fcskirmish_100_50",
    "/home/derposoft/Documents/tb_logs/40k_runs/gcnskirmish_100_50_3.4",
]
runs_filters = [
    "fcskirmish_5hp_baseline100_50_PAPER_SEED",
    "gcnskirmish_5hp_baseline100_50_3.4_PAPER_SEED",
]
"""
runs_dirs = [
    "/home/derposoft/Documents/tb_logs/seeded_runs/"
]
runs_filters = [
    "baseline_SEED",
    "hybridGAT_5hp_globalemb_NOOPT_SEED",
    "hybridGAT_5hp_hybridemb_NOOPT_SEED",
    "hybridGAT_5hp_localemb_NOOPT_SEED",
    "hybridGCN_5hp_globalemb_NOOPT_SEED",
    "hybridGCN_5hp_hybridemb_NOOPT_SEED",
    "hybridGCN_5hp_localemb_NOOPT_SEED",
    "hybridGT_5hp_globalemb_NOOPT_SEED",
    "hybridGT_5hp_hybridemb_NOOPT_SEED",
    "hybridGT_5hp_localemb_NOOPT_SEED",
]
runs = sum(([ospj(runs_dir, x) for x in osl(runs_dir)] for runs_dir in runs_dirs), [])
tot_steps_len = 200
runs = natural_sort(runs)

In [125]:
all_filters_runs = [
    [run for run in runs if runs_filter in run]
    for runs_filter in runs_filters
]

In [127]:
run_data_file = "progress.csv"
run_data_col = "episode_reward_mean"
run_data_col = "evaluation/episode_reward_mean"
run_step_col = "timesteps_total"
all_runs_data = []
for filter_runs in all_filters_runs:
    curr_run_data = []
    #for run in filter_runs:
    #    datafile = pd.read_csv(ospj(runs_dir, run, run_data_file))
    #    curr_run_data.append(datafile[run_data_col])
    #for run in runs:
    for run in filter_runs:
        datafile = pd.read_csv(ospj(run, run_data_file))
        curr_run_data.append(datafile[run_data_col])
    all_runs_data.append(curr_run_data)
#runs_steps = pd.read_csv(ospj(runs_dir, all_filters_runs[0][0], run_data_file))[run_step_col]
runs_steps = pd.read_csv(ospj(runs[0], run_data_file))[run_step_col]

### given data, run "map" on data to get values suitable for analysis

In [128]:
# finds the first training step when reward exceeds a given value ge
def first_ep_to_val(runs, steps, ge=30):
    first_eps = []
    for run in runs:
        found = False
        for step, val in zip(steps, run):
            if val >= ge:
                first_eps.append(step)
                found = True
                break
        if not found:
            first_eps.append(-1)
    return first_eps

# finds max avg reward
def max_avg_reward(runs, steps):
    max_vals = []
    for run in runs:
        if len(run) < tot_steps_len: continue
        max_vals.append(max(run))
    return max_vals

# returns last reward
def last_ep_reward(runs, steps):
    last_vals = []
    for run in runs:
        if len(run) < tot_steps_len: continue
        last_vals.append(run.tolist()[-1])
    return last_vals

In [129]:
first_ep_to_30 = [
    first_ep_to_val(filter_runs_data, runs_steps)
    for filter_runs_data in all_runs_data
]
max_reward = [
    max_avg_reward(filter_runs_data, runs_steps)
    for filter_runs_data in all_runs_data
]
last_reward = [
    last_ep_reward(filter_runs_data, runs_steps)
    for filter_runs_data in all_runs_data
]

### given "map" values, run "reduce" to aggregate values and show significance

In [133]:
# pretty print a dictionary
def pretty_print(d, tabs=0, tabsize=4):
    for k in d:
        v = d[k]
        nspaces = " " * tabsize * tabs
        if type(v) == dict:
            print(f"{nspaces}{k}:")
            pretty_print(v, tabs+1, tabsize)
        else:
            print(f"{nspaces}{k}: {v}")

# get stats for mapped values for a given experiment
def get_stats(all_runs_vals, firstn=-1):
    stats = {}
    #baselines = all_runs_vals[0]
    baselines = all_runs_vals[0]
    for filter, vals in zip(runs_filters, all_runs_vals):
        if firstn > 0: vals = vals[:firstn]
        mu = np.mean(vals)
        sigma = np.std(vals)
        ci95z = 1.96
        ttest_results = ttest_ind(baselines, vals, alternative="less", equal_var=False)
        stats[filter] = {
            "str": f"{mu}+/-{ci95z*sigma} -- p={ttest_results.pvalue}",
            #"mean": f"{mu}+/-{ci95z*sigma}",
            #"median": np.median(vals),
            #"std": sigma,
            #"ci": [mu-ci95z*sigma, mu+ci95z*sigma],
            #"n": len(vals),
            #"mannwu": mannwhitneyu(baselines, vals, alternative="less"),
            #"ttest_less": ttest_ind(baselines, vals, alternative="less", equal_var=False),
            #"ttest_more": ttest_ind(baselines, vals, alternative="greater", equal_var=False),
        }
    return stats

In [134]:
firstn = 35
first_ep_to_30_stats = get_stats(first_ep_to_30, firstn=firstn)
max_reward_stats = get_stats(max_reward, firstn=firstn)
last_reward_stats = get_stats(last_reward, firstn=firstn)

In [135]:
def print_stat(stats, name=""):
    assert name != ""
    print(name+" stats:")
    pretty_print(stats)
    print()

print_stat(max_reward_stats, "max reward")
print_stat(first_ep_to_30_stats, "first ep to 30")
print_stat(last_reward_stats, "last reward")


max reward stats:
baseline_SEED:
    str: 45.95+/-12.854746329663607 -- p=0.5
hybridGAT_5hp_globalemb_NOOPT_SEED:
    str: 53.54+/-13.362600968374386 -- p=0.013528438388306441
hybridGAT_5hp_hybridemb_NOOPT_SEED:
    str: 46.81+/-17.581200000000003 -- p=0.4096271589408053
hybridGAT_5hp_localemb_NOOPT_SEED:
    str: 51.720000000000006+/-15.526317868702805 -- p=0.05510325640708142
hybridGCN_5hp_globalemb_NOOPT_SEED:
    str: 49.11+/-18.884770367679877 -- p=0.21402645459676656
hybridGCN_5hp_hybridemb_NOOPT_SEED:
    str: 49.550000000000004+/-11.390365929152583 -- p=0.1555326478299324
hybridGCN_5hp_localemb_NOOPT_SEED:
    str: 51.18+/-12.289309409401326 -- p=0.05045861579400422
hybridGT_5hp_globalemb_NOOPT_SEED:
    str: 45.720000000000006+/-9.104954220642737 -- p=0.533682540281827
hybridGT_5hp_hybridemb_NOOPT_SEED:
    str: 50.730000000000004+/-13.550689316783847 -- p=0.07488542649303868
hybridGT_5hp_localemb_NOOPT_SEED:
    str: 51.58+/-15.735712025834738 -- p=0.060661192344162604

first