In [160]:
import os
import numpy as np
import pandas as pd
from itertools import product
from collections import defaultdict
from scipy.stats import trim_mean

In [159]:
def get_mean(arr, trim =0.0):
    return trim_mean(arr, trim)
    #return arr.mean()

def get_standard_error(arr):
    num_samples = len(arr)
    return arr.std()/num_samples

# Pong

In [161]:
data = {}
methods = set()

for file_name in os.listdir('pong/'):
    if file_name == '.DS_Store':
        continue

    if 'seed_' not in file_name:
        continue

    seed_num = file_name.replace('seed_', '')[:file_name.replace('seed_', '').find('_')]
    method = file_name.replace(f'seed_{seed_num}_', '')
    method = method[:method.find('.')]
    
    if seed_num not in data:
        data[seed_num] = {}
        
    if method not in methods:
        methods.add(method)
    
    np_data = np.load(os.path.join('pong', file_name))
    
    if method == 'pr2':
        data[seed_num][method] = {
            'rewards': {
                'mean': get_mean(np_data[:, :2].sum(axis=-1)),
                'std': get_standard_error(np_data[:, :2].sum(axis=-1))
            },
            'steps': {
                'mean': get_mean(np_data[:, -1]),
                'std': get_standard_error(np_data[:, -1])
            }
        }
    else:
        data[seed_num][method] = {
            'rewards': {
                'mean': get_mean(np_data[0].sum(axis=0)[0]),
                'std': get_standard_error(np_data[0].sum(axis=0)[0])
            },
            'steps': {
                'mean': get_mean(np_data[0].mean(axis=0)[1]),
                'std': get_standard_error(np_data[0].mean(axis=0)[1])
            }
        }

In [162]:
tuple_keys = list(product(methods, ['rewards', 'steps'], ['mean', 'std']))
d = defaultdict(list)

for tuple_key in tuple_keys:
    for seed in data.keys():
        d[tuple_key].append(data[seed][tuple_key[0]][tuple_key[1]][tuple_key[2]])
df = pd.DataFrame(d, index=[1,5,10,15])
df

Unnamed: 0_level_0,k_1_unclamped,k_1_unclamped,k_1_unclamped,k_1_unclamped,k_2,k_2,k_2,k_2,pr2,pr2,...,nc_a2c,nc_a2c,moa,moa,moa,moa,consensus_update,consensus_update,consensus_update,consensus_update
Unnamed: 0_level_1,rewards,rewards,steps,steps,rewards,rewards,steps,steps,rewards,rewards,...,steps,steps,rewards,rewards,steps,steps,rewards,rewards,steps,steps
Unnamed: 0_level_2,mean,std,mean,std,mean,std,mean,std,mean,std,...,mean,std,mean,std,mean,std,mean,std,mean,std
1,0.273555,0.002356,259.37,1.497213,-0.031066,0.004102,172.69,1.498575,-0.825435,0.004271,...,93.02,0.348927,-0.128537,0.002927,86.28,0.546053,0.065541,0.003432,172.56,1.303699
5,0.190023,0.004354,169.7,1.119343,0.22147,0.002697,135.4,0.693005,-0.878699,0.00249,...,70.44,0.163513,-0.228033,0.003974,53.16,0.315667,0.203049,0.002892,129.32,0.676548
10,0.318764,0.00193,191.27,0.947303,0.326812,0.00211,196.44,0.965898,-0.818909,0.003465,...,180.21,0.643109,-0.641514,0.0,20.0,0.0,0.276399,0.001701,169.48,0.732413
15,0.221099,0.003373,231.09,1.393459,0.228548,0.003257,232.81,1.555887,-0.827007,0.00319,...,68.03,0.089011,-0.185343,0.003531,73.37,0.437426,-0.021706,0.004003,136.78,1.126713


In [163]:
df.mean(axis=0)

k_1_unclamped     rewards  mean      0.250860
                           std       0.003003
                  steps    mean    212.857500
                           std       1.239330
k_2               rewards  mean      0.186441
                           std       0.003041
                  steps    mean    184.335000
                           std       1.178341
pr2               rewards  mean     -0.837512
                           std       0.003354
                  steps    mean     36.757500
                           std       0.342769
k_1               rewards  mean      0.126730
                           std       0.003984
                  steps    mean    202.945000
                           std       1.351283
nc_a2c            rewards  mean      0.048259
                           std       0.001498
                  steps    mean    102.925000
                           std       0.311140
moa               rewards  mean     -0.295857
                           std    

# Pistonball

In [164]:
data = {}
methods = set()

for file_name in os.listdir('pistonball/'):
    if file_name == '.DS_Store':
        continue

    if 'seed_' not in file_name:
        continue

    seed_num = file_name.replace('seed_', '')[:file_name.replace('seed_', '').find('_')]
    method = file_name.replace(f'seed_{seed_num}_', '')
    method = method[:method.find('.')]
    
    if seed_num not in data:
        data[seed_num] = {}
        
    if method not in methods:
        methods.add(method)
    
    np_data = np.load(os.path.join('pistonball', file_name))
    if method == 'pr2':
        data[seed_num][method] = {
            'rewards': {
                'mean': get_mean(np_data[:, :2].sum(axis=-1)),
                'std': get_standard_error(np_data[:, :2].sum(axis=-1))
            },
            'steps': {
                'mean': get_mean(np_data[:, -1]),
                'std': get_standard_error(np_data[:, -1])
            }
        }
    else:
        data[seed_num][method] = {
            'rewards': {
                'mean': get_mean(np_data[0].sum(axis=0)[0]),
                'std': get_standard_error(np_data[0].sum(axis=0)[0])
            },
            'steps': {
                'mean': get_mean(np_data[0].mean(axis=0)[1]),
                'std': get_standard_error(np_data[0].mean(axis=0)[1])
            }
        }

In [165]:
tuple_keys = list(product(methods, ['rewards', 'steps'], ['mean', 'std']))
d = defaultdict(list)

for tuple_key in tuple_keys:
    for seed in data.keys():
        d[tuple_key].append(data[seed][tuple_key[0]][tuple_key[1]][tuple_key[2]])
df = pd.DataFrame(d, index=[1,5,10,15])
df

Unnamed: 0_level_0,infopg(k=1),infopg(k=1),infopg(k=1),infopg(k=1),infopg(k=2),infopg(k=2),infopg(k=2),infopg(k=2),consensusupdate,consensusupdate,...,infopg_nocritic(k=1),infopg_nocritic(k=1),moa,moa,moa,moa,infopg_nocritic(k=2),infopg_nocritic(k=2),infopg_nocritic(k=2),infopg_nocritic(k=2)
Unnamed: 0_level_1,rewards,rewards,steps,steps,rewards,rewards,steps,steps,rewards,rewards,...,steps,steps,rewards,rewards,steps,steps,rewards,rewards,steps,steps
Unnamed: 0_level_2,mean,std,mean,std,mean,std,mean,std,mean,std,...,mean,std,mean,std,mean,std,mean,std,mean,std
1,7.563258,0.021991,27.32,0.416862,7.240686,0.032243,35.85,0.567774,1.397745,0.045788,...,57.09,0.694435,4.418068,0.03721,88.8,0.782716,7.663879,0.025332,17.94,0.327942
5,7.26657,0.027056,33.7,0.483741,6.145953,0.040363,54.3,0.716304,0.870913,0.039638,...,48.74,0.672172,4.174463,0.037964,90.85,0.730007,7.480719,0.021576,15.44,0.18064
10,7.408111,0.026795,28.71,0.418522,7.1979,0.034457,38.12,0.559304,0.393426,0.039344,...,71.99,0.80957,3.81594,0.039501,98.72,0.761978,7.59543,0.024163,17.85,0.327745
15,7.106542,0.025254,23.53,0.403419,6.668096,0.035937,39.54,0.635118,1.597192,0.042613,...,69.18,0.832571,3.982832,0.038076,88.3,0.779534,7.166561,0.025073,18.53,0.353388


In [166]:
df.mean(axis=0)

infopg(k=1)           rewards  mean      7.336120
                               std       0.025274
                      steps    mean     28.315000
                               std       0.430636
infopg(k=2)           rewards  mean      6.813159
                               std       0.035750
                      steps    mean     41.952500
                               std       0.619625
consensusupdate       rewards  mean      1.064819
                               std       0.041846
                      steps    mean    136.302500
                               std       0.834242
a2c                   rewards  mean      0.894752
                               std       0.042730
                      steps    mean    138.332500
                               std       0.832731
infopg_nocritic(k=3)  rewards  mean      9.131626
                               std       0.044301
                      steps    mean     40.387500
                               std       0.620960


# Walker

In [167]:
data = {}
methods = set()

for file_name in os.listdir('walker/'):
    if file_name == '.DS_Store':
        continue

    if 'seed_' not in file_name:
        continue

    seed_num = file_name.replace('seed_', '')[:file_name.replace('seed_', '').find('_')]
    method = file_name.replace(f'seed_{seed_num}_', '')
    method = method[:method.find('.')]
    
    if seed_num not in data:
        data[seed_num] = {}
        
    if method not in methods:
        methods.add(method)
    
    np_data = np.load(os.path.join('walker', file_name))
    
    data[seed_num][method] = {
        'rewards': {
            'mean': get_mean(np_data[:, :2].mean(axis=-1), trim=0.1),
            'std': get_standard_error(np_data[:, :2].mean(axis=-1))
        },
        'steps': {
            'mean': get_mean(np_data[:, -1], trim=0.1),
            'std': get_standard_error(np_data[:, -1])
        }
    }

In [168]:
tuple_keys = list(product(methods, ['rewards', 'steps'], ['mean', 'std']))
d = defaultdict(list)

for tuple_key in tuple_keys:
    for seed in data.keys():
        d[tuple_key].append(data[seed][tuple_key[0]][tuple_key[1]][tuple_key[2]])
df = pd.DataFrame(d, index=[1,5,10,15])
df

Unnamed: 0_level_0,consensus,consensus,consensus,consensus,pr2,pr2,pr2,pr2,nc_a2c,nc_a2c,...,infopg_k1,infopg_k1,moa,moa,moa,moa,infopg_k1_adv,infopg_k1_adv,infopg_k1_adv,infopg_k1_adv
Unnamed: 0_level_1,rewards,rewards,steps,steps,rewards,rewards,steps,steps,rewards,rewards,...,steps,steps,rewards,rewards,steps,steps,rewards,rewards,steps,steps
Unnamed: 0_level_2,mean,std,mean,std,mean,std,mean,std,mean,std,...,mean,std,mean,std,mean,std,mean,std,mean,std
1,-2.58893,0.298675,477.4625,1.448616,-81.177136,0.387411,109.55,1.51663,-66.409987,0.172626,...,481.425,1.369619,0.708681,0.14224,492.325,0.949702,11.125301,0.118226,500.0,0.954029
5,-1.431958,0.274432,495.7375,1.290323,-85.248501,0.386838,81.55,1.278384,-66.393228,0.212527,...,500.0,1.007614,1.358743,0.124844,494.95,0.929157,11.43458,0.089764,500.0,0.611617
10,-1.247761,0.27579,494.6125,1.246428,-84.623186,0.369084,96.3,1.396773,-69.240675,0.168048,...,445.8875,1.658891,-0.369647,0.176823,488.875,1.204431,11.918026,0.121619,500.0,0.858297
15,-1.464844,0.184139,492.8875,1.231723,-85.831291,0.350215,89.275,1.390222,-63.770376,0.159924,...,469.5,1.531669,0.953661,0.155597,480.8875,1.264153,12.78692,0.114438,500.0,0.788631


In [169]:
df.mean(axis=0)

consensus      rewards  mean     -1.683373
                        std       0.258259
               steps    mean    490.175000
                        std       1.304272
pr2            rewards  mean    -84.220029
                        std       0.373387
               steps    mean     94.168750
                        std       1.395502
nc_a2c         rewards  mean    -66.453566
                        std       0.178281
               steps    mean     80.750000
                        std       0.204495
infopg_k1      rewards  mean      3.562441
                        std       0.203244
               steps    mean    474.203125
                        std       1.391948
moa            rewards  mean      0.662860
                        std       0.149876
               steps    mean    489.259375
                        std       1.086861
infopg_k1_adv  rewards  mean     11.816207
                        std       0.111012
               steps    mean    500.000000
           

# Starcraft

In [170]:
data = {}
methods = set()

for file_name in os.listdir('starcraft/'):
    if file_name == '.DS_Store':
        continue

    if 'seed_' not in file_name:
        continue

    seed_num = file_name.replace('seed_', '')[:file_name.replace('seed_', '').find('_')]
    method = file_name.replace(f'seed_{seed_num}_', '')
    method = method[:method.find('.')]
    
    if seed_num not in data:
        data[seed_num] = {}
        
    if method not in methods:
        methods.add(method)
    
    np_data = np.load(os.path.join('starcraft', file_name))
    
    if method == 'pr2':
        data[seed_num][method] = {
            'rewards': {
                'mean': get_mean(np_data[:, :2].sum(axis=-1)),
                'std': get_standard_error(np_data[:, :2].sum(axis=-1))
            },
            'steps': {
                'mean': get_mean(np_data[:, -1]),
                'std': get_standard_error(np_data[:, -1])
            }
        }
    else:
        data[seed_num][method] = {
            'rewards': {
                'mean': get_mean(np_data[0].sum(axis=0)[0]),
                'std': get_standard_error(np_data[0].sum(axis=0)[0])
            },
            'steps': {
                'mean': get_mean(np_data[0].mean(axis=0)[1]),
                'std': get_standard_error(np_data[0].mean(axis=0)[1])
            }
        }

In [171]:
tuple_keys = list(product(methods, ['rewards', 'steps'], ['mean', 'std']))
d = defaultdict(list)

for tuple_key in tuple_keys:
    for seed in data.keys():
        d[tuple_key].append(data[seed][tuple_key[0]][tuple_key[1]][tuple_key[2]])
df = pd.DataFrame(d, index=[1,5,10,15])
df

Unnamed: 0_level_0,consensus,consensus,consensus,consensus,infopg,infopg,infopg,infopg,pr2,pr2,...,nc_a2c,nc_a2c,moa,moa,moa,moa,adv,adv,adv,adv
Unnamed: 0_level_1,rewards,rewards,steps,steps,rewards,rewards,steps,steps,rewards,rewards,...,steps,steps,rewards,rewards,steps,steps,rewards,rewards,steps,steps
Unnamed: 0_level_2,mean,std,mean,std,mean,std,mean,std,mean,std,...,mean,std,mean,std,mean,std,mean,std,mean,std
1,0.281004,0.00276,57.87,0.077739,4.32265,0.006893,30.87,0.057474,0.65827,0.003966,...,60.0,0.0,3.16202,0.043295,26.73,0.061235,3.877196,0.017195,42.12,0.127901
5,0.245534,0.002482,58.82,0.057834,4.327968,0.007007,31.1,0.058932,0.66198,0.003663,...,60.0,0.0,2.584956,0.012746,28.3,0.082201,3.56862,0.017026,46.67,0.133956
10,0.23457,0.00192,58.79,0.059301,4.508448,0.005701,30.36,0.040952,0.622454,0.003604,...,60.0,0.0,2.716901,0.015054,28.91,0.092186,3.620021,0.015171,44.92,0.135873
15,0.227566,0.001503,59.38,0.0434,4.42319,0.007256,30.83,0.057255,0.607451,0.003499,...,60.0,0.0,2.684227,0.014174,26.84,0.065662,3.857461,0.018344,44.11,0.129429


In [172]:
df.mean(axis=0)

consensus  rewards  mean     0.247168
                    std      0.002166
           steps    mean    58.715000
                    std      0.059568
infopg     rewards  mean     4.395564
                    std      0.006714
           steps    mean    30.790000
                    std      0.053653
pr2        rewards  mean     0.637539
                    std      0.003683
           steps    mean    27.492500
                    std      0.081809
nc_a2c     rewards  mean     0.000000
                    std      0.000000
           steps    mean    60.000000
                    std      0.000000
moa        rewards  mean     2.787026
                    std      0.021317
           steps    mean    27.695000
                    std      0.075321
adv        rewards  mean     3.730825
                    std      0.016934
           steps    mean    44.455000
                    std      0.131789
dtype: float64