In [108]:
import os
import numpy as np
import pandas as pd
from itertools import product
from collections import defaultdict
from scipy.stats import trim_mean

In [109]:
def get_mean(arr, trim =0.0):
    return trim_mean(arr, trim)
    #return arr.mean()

def get_standard_error(arr):
    num_samples = len(arr)
    return arr.std()/num_samples

# Pong

In [175]:
data = {}
methods = set()

for file_name in os.listdir('pong/'):
    if file_name == '.DS_Store':
        continue

    if 'seed_' not in file_name:
        continue

    seed_num = file_name.replace('seed_', '')[:file_name.replace('seed_', '').find('_')]
    method = file_name.replace(f'seed_{seed_num}_', '')
    method = method[:method.find('.')]
    
    if seed_num not in data:
        data[seed_num] = {}
        
    if method not in methods:
        methods.add(method)
    
    np_data = np.load(os.path.join('pong', file_name))
    data[seed_num][method] = {
        'rewards': {
            'mean': get_mean(np_data[0].sum(axis=0)[0]),
            'std': get_standard_error(np_data[0].sum(axis=0)[0])
        },
        'steps': {
            'mean': get_mean(np_data[0].mean(axis=0)[1]),
            'std': get_standard_error(np_data[0].mean(axis=0)[1])
        }
    }

In [176]:
tuple_keys = list(product(methods, ['rewards', 'steps'], ['mean', 'std']))
d = defaultdict(list)

for tuple_key in tuple_keys:
    for seed in data.keys():
        d[tuple_key].append(data[seed][tuple_key[0]][tuple_key[1]][tuple_key[2]])
df = pd.DataFrame(d, index=[1,5,10,15])
df

Unnamed: 0_level_0,k_2,k_2,k_2,k_2,nc_a2c,nc_a2c,nc_a2c,nc_a2c,moa,moa,...,consensus_update,consensus_update,k_1_unclamped,k_1_unclamped,k_1_unclamped,k_1_unclamped,k_1,k_1,k_1,k_1
Unnamed: 0_level_1,rewards,rewards,steps,steps,rewards,rewards,steps,steps,rewards,rewards,...,steps,steps,rewards,rewards,steps,steps,rewards,rewards,steps,steps
Unnamed: 0_level_2,mean,std,mean,std,mean,std,mean,std,mean,std,...,mean,std,mean,std,mean,std,mean,std,mean,std
1,-0.031066,0.004102,172.69,1.498575,-0.00274,0.001158,93.02,0.348927,-0.128537,0.002927,...,172.56,1.303699,0.273555,0.002356,259.37,1.497213,0.242548,0.003949,292.2,1.741161
5,0.22147,0.002697,135.4,0.693005,0.028735,0.001452,70.44,0.163513,-0.228033,0.003974,...,129.32,0.676548,0.190023,0.004354,169.7,1.119343,-0.184517,0.005078,100.52,1.131276
10,0.326812,0.00211,196.44,0.965898,0.256762,0.002536,180.21,0.643109,-0.641514,0.0,...,169.48,0.732413,0.318764,0.00193,191.27,0.947303,0.167078,0.003643,166.14,1.072529
15,0.228548,0.003257,232.81,1.555887,-0.089723,0.000846,68.03,0.089011,-0.185343,0.003531,...,136.78,1.126713,0.221099,0.003373,231.09,1.393459,0.28181,0.003266,252.92,1.460165


In [177]:
df.mean(axis=0)

k_2               rewards  mean      0.186441
                           std       0.003041
                  steps    mean    184.335000
                           std       1.178341
nc_a2c            rewards  mean      0.048259
                           std       0.001498
                  steps    mean    102.925000
                           std       0.311140
moa               rewards  mean     -0.295857
                           std       0.002608
                  steps    mean     58.202500
                           std       0.324786
consensus_update  rewards  mean      0.130821
                           std       0.003007
                  steps    mean    152.035000
                           std       0.959843
k_1_unclamped     rewards  mean      0.250860
                           std       0.003003
                  steps    mean    212.857500
                           std       1.239330
k_1               rewards  mean      0.126730
                           std    

# Pistonball

In [163]:
data = {}
methods = set()

for file_name in os.listdir('piston/'):
    if file_name == '.DS_Store':
        continue

    if 'seed_' not in file_name:
        continue

    seed_num = file_name.replace('seed_', '')[:file_name.replace('seed_', '').find('_')]
    method = file_name.replace(f'seed_{seed_num}_', '')
    method = method[:method.find('.')]
    
    if seed_num not in data:
        data[seed_num] = {}
        
    if method not in methods:
        methods.add(method)
    
    np_data = np.load(os.path.join('piston', file_name))
    data[seed_num][method] = {
        'rewards': {
            'mean': get_mean(np_data[0].sum(axis=0)[0]),
            'std': get_standard_error(np_data[0].sum(axis=0)[0])
        },
        'steps': {
            'mean': get_mean(np_data[0].mean(axis=0)[1]),
            'std': get_standard_error(np_data[0].mean(axis=0)[1])
        }
    }

In [164]:
tuple_keys = list(product(methods, ['rewards', 'steps'], ['mean', 'std']))
d = defaultdict(list)

for tuple_key in tuple_keys:
    for seed in data.keys():
        d[tuple_key].append(data[seed][tuple_key[0]][tuple_key[1]][tuple_key[2]])
df = pd.DataFrame(d, index=[1,5,10,15])
df

Unnamed: 0_level_0,infopg(k=2),infopg(k=2),infopg(k=2),infopg(k=2),a2c,a2c,a2c,a2c,infopg_nocritic(k=1),infopg_nocritic(k=1),...,infopg(k=1),infopg(k=1),infopg_nocritic(k=3),infopg_nocritic(k=3),infopg_nocritic(k=3),infopg_nocritic(k=3),consensusupdate,consensusupdate,consensusupdate,consensusupdate
Unnamed: 0_level_1,rewards,rewards,steps,steps,rewards,rewards,steps,steps,rewards,rewards,...,steps,steps,rewards,rewards,steps,steps,rewards,rewards,steps,steps
Unnamed: 0_level_2,mean,std,mean,std,mean,std,mean,std,mean,std,...,mean,std,mean,std,mean,std,mean,std,mean,std
1,7.240686,0.032243,35.85,0.567774,1.441443,0.044103,128.1,0.853648,5.034653,0.040665,...,27.32,0.416862,9.430926,0.042659,36.96,0.603695,1.397745,0.045788,128.3,0.885122
5,6.145953,0.040363,54.3,0.716304,0.516712,0.044839,146.29,0.82011,5.570649,0.039006,...,33.7,0.483741,8.944309,0.045662,43.79,0.626922,0.870913,0.039638,138.11,0.821693
10,7.1979,0.034457,38.12,0.559304,0.486072,0.040176,144.11,0.81332,4.747915,0.046476,...,28.71,0.418522,9.255591,0.043632,40.0,0.612232,0.393426,0.039344,152.88,0.761874
15,6.668096,0.035937,39.54,0.635118,1.134782,0.0418,134.83,0.843845,4.551848,0.044036,...,23.53,0.403419,8.895678,0.045249,40.8,0.640991,1.597192,0.042613,125.92,0.86828


In [165]:
df.mean(axis=0)

infopg(k=2)           rewards  mean      6.813159
                               std       0.035750
                      steps    mean     41.952500
                               std       0.619625
a2c                   rewards  mean      0.894752
                               std       0.042730
                      steps    mean    138.332500
                               std       0.832731
infopg_nocritic(k=1)  rewards  mean      4.976266
                               std       0.042546
                      steps    mean     61.750000
                               std       0.752187
infopg_nocritic(k=2)  rewards  mean      7.476647
                               std       0.024036
                      steps    mean     17.440000
                               std       0.297428
moa                   rewards  mean      4.097826
                               std       0.038188
                      steps    mean     91.667500
                               std       0.763559


# Walker

In [113]:
data = {}
methods = set()

for file_name in os.listdir('walker/'):
    if file_name == '.DS_Store':
        continue

    if 'seed_' not in file_name:
        continue

    seed_num = file_name.replace('seed_', '')[:file_name.replace('seed_', '').find('_')]
    method = file_name.replace(f'seed_{seed_num}_', '')
    method = method[:method.find('.')]
    
    if seed_num not in data:
        data[seed_num] = {}
        
    if method not in methods:
        methods.add(method)
    
    np_data = np.load(os.path.join('walker', file_name))
    data[seed_num][method] = {
        'rewards': {
            'mean': get_mean(np_data[:, :2].mean(axis=-1), trim=0.1),
            'std': get_standard_error(np_data[:, :2].mean(axis=-1))
        },
        'steps': {
            'mean': get_mean(np_data[:, -1], trim=0.1),
            'std': get_standard_error(np_data[:, -1])
        }
    }

In [114]:
tuple_keys = list(product(methods, ['rewards', 'steps'], ['mean', 'std']))
d = defaultdict(list)

for tuple_key in tuple_keys:
    for seed in data.keys():
        d[tuple_key].append(data[seed][tuple_key[0]][tuple_key[1]][tuple_key[2]])
df = pd.DataFrame(d, index=[1,5,10,15])
df

Unnamed: 0_level_0,consensus,consensus,consensus,consensus,nc_a2c,nc_a2c,nc_a2c,nc_a2c,infopg_k1,infopg_k1,infopg_k1,infopg_k1,moa,moa,moa,moa,infopg_k1_adv,infopg_k1_adv,infopg_k1_adv,infopg_k1_adv
Unnamed: 0_level_1,rewards,rewards,steps,steps,rewards,rewards,steps,steps,rewards,rewards,steps,steps,rewards,rewards,steps,steps,rewards,rewards,steps,steps
Unnamed: 0_level_2,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
1,-2.58893,0.298675,477.4625,1.448616,-66.409987,0.172626,82.1125,0.204462,5.063044,0.190146,481.425,1.369619,0.708681,0.14224,492.325,0.949702,11.125301,0.118226,500.0,0.954029
5,-1.431958,0.274432,495.7375,1.290323,-66.393228,0.212527,82.525,0.229916,6.009702,0.148114,500.0,1.007614,1.358743,0.124844,494.95,0.929157,11.43458,0.089764,500.0,0.611617
10,-1.247761,0.27579,494.6125,1.246428,-69.240675,0.168048,79.85,0.19472,0.087268,0.268405,445.8875,1.658891,-0.369647,0.176823,488.875,1.204431,11.918026,0.121619,500.0,0.858297
15,-1.464844,0.184139,492.8875,1.231723,-63.770376,0.159924,78.5125,0.188881,3.089747,0.206309,469.5,1.531669,0.953661,0.155597,480.8875,1.264153,12.78692,0.114438,500.0,0.788631


In [115]:
df.mean(axis=0)

consensus      rewards  mean     -1.683373
                        std       0.258259
               steps    mean    490.175000
                        std       1.304272
nc_a2c         rewards  mean    -66.453566
                        std       0.178281
               steps    mean     80.750000
                        std       0.204495
infopg_k1      rewards  mean      3.562441
                        std       0.203244
               steps    mean    474.203125
                        std       1.391948
moa            rewards  mean      0.662860
                        std       0.149876
               steps    mean    489.259375
                        std       1.086861
infopg_k1_adv  rewards  mean     11.816207
                        std       0.111012
               steps    mean    500.000000
                        std       0.803143
dtype: float64

# Walker

In [127]:
data = {}
methods = set()

for file_name in os.listdir('starcraft/'):
    if file_name == '.DS_Store':
        continue

    if 'seed_' not in file_name:
        continue

    seed_num = file_name.replace('seed_', '')[:file_name.replace('seed_', '').find('_')]
    method = file_name.replace(f'seed_{seed_num}_', '')
    method = method[:method.find('.')]
    
    if seed_num not in data:
        data[seed_num] = {}
        
    if method not in methods:
        methods.add(method)
    
    np_data = np.load(os.path.join('starcraft', file_name))
    data[seed_num][method] = {
        'rewards': {
            'mean': get_mean(np_data[0].sum(axis=0)[0]),
            'std': get_standard_error(np_data[0].sum(axis=0)[0])
        },
        'steps': {
            'mean': get_mean(np_data[0].mean(axis=0)[1]),
            'std': get_standard_error(np_data[0].mean(axis=0)[1])
        }
    }

In [128]:
tuple_keys = list(product(methods, ['rewards', 'steps'], ['mean', 'std']))
d = defaultdict(list)

for tuple_key in tuple_keys:
    for seed in data.keys():
        d[tuple_key].append(data[seed][tuple_key[0]][tuple_key[1]][tuple_key[2]])
df = pd.DataFrame(d, index=[1,5,10,15])
df

Unnamed: 0_level_0,consensus,consensus,consensus,consensus,infopg,infopg,infopg,infopg,nc_a2c,nc_a2c,nc_a2c,nc_a2c,moa,moa,moa,moa,adv,adv,adv,adv
Unnamed: 0_level_1,rewards,rewards,steps,steps,rewards,rewards,steps,steps,rewards,rewards,steps,steps,rewards,rewards,steps,steps,rewards,rewards,steps,steps
Unnamed: 0_level_2,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
1,0.281004,0.00276,57.87,0.077739,4.32265,0.006893,30.87,0.057474,0.0,0.0,60.0,0.0,3.16202,0.043295,26.73,0.061235,3.877196,0.017195,42.12,0.127901
5,0.245534,0.002482,58.82,0.057834,4.327968,0.007007,31.1,0.058932,0.0,0.0,60.0,0.0,2.584956,0.012746,28.3,0.082201,3.56862,0.017026,46.67,0.133956
10,0.23457,0.00192,58.79,0.059301,4.508448,0.005701,30.36,0.040952,0.0,0.0,60.0,0.0,2.716901,0.015054,28.91,0.092186,3.620021,0.015171,44.92,0.135873
15,0.227566,0.001503,59.38,0.0434,4.42319,0.007256,30.83,0.057255,0.0,0.0,60.0,0.0,2.684227,0.014174,26.84,0.065662,3.857461,0.018344,44.11,0.129429


In [129]:
df.mean(axis=0)

consensus  rewards  mean     0.247168
                    std      0.002166
           steps    mean    58.715000
                    std      0.059568
infopg     rewards  mean     4.395564
                    std      0.006714
           steps    mean    30.790000
                    std      0.053653
nc_a2c     rewards  mean     0.000000
                    std      0.000000
           steps    mean    60.000000
                    std      0.000000
moa        rewards  mean     2.787026
                    std      0.021317
           steps    mean    27.695000
                    std      0.075321
adv        rewards  mean     3.730825
                    std      0.016934
           steps    mean    44.455000
                    std      0.131789
dtype: float64