In [1]:
import os
import pickle
import numpy as np
import bootstrapped.bootstrap as bs
import bootstrapped.stats_functions as bs_stats
from stable_baselines.results_plotter import ts2xy
from stable_baselines.bench.monitor import load_results
from tqdm import tqdm
from collections import defaultdict

  from ._conv import register_converters as _register_converters


In [2]:
algo = 'ppo2' # "sac"
env = "Acrobot-v1" #'CartPole-v1' 'RoboschoolInvertedPendulumSwingup-v1' "RoboschoolAnt-v1"  "RoboschoolHopper-v1"  "LunarLanderContinuous-v2"
total_timesteps = int(2e5) # int(2e6)
prefixes =  ['4sources-3sets-SIW', '1sources-3sets-SIW'] #["1sources-3sets-SDW", "4sources-3sets-SDW", "4sources-3sets-SIW-no-bias"] #"1sources-3sets-SIW", "4sources-3sets-SIW" "1sources-3sets-1subopt-SIW" "4sources-3sets-2subopt-SIW", "4sources-3sets-4subopt-SIW" "8sources-3sets-SIW"

base = 'logs'
save_path = "{}/{}_analysis/".format(base, env)
os.makedirs(save_path, exist_ok=True)

### Compute mean episodic rewards

In [3]:
def _get_mean_episodic_reward(result, steps_percentage, total_timesteps=None):
    
    timesteps = result[0]
    if total_timesteps is None:
        total_timesteps = timesteps[-1]
    cut_off = int(total_timesteps * steps_percentage)
    
    
    if timesteps[-1] / cut_off < .95:
        print(timesteps[-1] / cut_off,  timesteps[-1])
        raise Warning("total_timesteps {} is too high comparing to trained timesteps {}".format(total_timesteps, timesteps[-1]))
    
    # find cut_off episode
    for cut_ind in reversed(range(len(timesteps))):
        if timesteps[cut_ind] <= cut_off:
            break
    
    return result[1][:cut_ind].mean()

In [4]:
mean_episodic_reward = defaultdict(list)
failed_exp = defaultdict(list)
for env_exp_id in tqdm(os.listdir('logs/{}'.format(algo))):
    if env in env_exp_id and env_exp_id[-1]=='1':
        # remove the experiment number
        env_exp = env_exp_id[:-2]
        for i in range(1,4):
            # load results of training from scratch
            try:
                result = ts2xy(load_results("logs/{}/{}_{}".format(algo, env_exp, i)), 'timesteps')
            except:
                print(("logs/{}/{}_{}".format(algo, env_exp, i)))
            for steps_percentage in [0.25, 0.5, 0.75, 1.0]:
                mean_episodic_reward[algo+'_{}%'.format(int(steps_percentage*100))].append(
                    _get_mean_episodic_reward(result, steps_percentage, total_timesteps))
            
        # get the experiment name
        exp = ''.join(env_exp.split('_')[1:])
        for p in prefixes:
            for i in range(1,10):
                file = "{}/multipolar-{}/{}_{}-{}_{}".format(base, algo, env, p, exp, i)
                try:
                    result = ts2xy(load_results(file), 'timesteps')
                    for steps_percentage in [0.25, 0.5, 0.75, 1.0]:
                        mean_episodic_reward['multipolar-{}_{}_{}%'.format(algo, p, int(steps_percentage*100))].append(
                            _get_mean_episodic_reward(result, steps_percentage, total_timesteps))
                except:
                    failed_exp['multipolar-{}_{}'.format(algo, p)].append(file)

100%|██████████| 30/30 [00:17<00:00,  1.67it/s]


In [5]:
for model in failed_exp:
    print(model, failed_exp[model], len(failed_exp[model]))

### Save the results

In [6]:
for model in mean_episodic_reward.keys():
    file = os.path.join(save_path, model + '-{}.pkl'.format(total_timesteps))
    if os.path.isfile(file):
        print("{} exists".format(model))
        continue
    with open(file, 'wb') as f:
        pickle.dump(mean_episodic_reward[model], f, pickle.HIGHEST_PROTOCOL)
        print("saved {}".format(model))
        
    file = os.path.join(save_path, model[:-5] + 'failed_exp.pkl')
    if os.path.isfile(file):
        with file as f:
            pickle.dump(failed_exp[model], f, pickle.HIGHEST_PROTOCOL)

ppo2_25% exists
ppo2_50% exists
ppo2_75% exists
ppo2_100% exists
saved multipolar-ppo2_4sources-3sets-SIW_25%
saved multipolar-ppo2_4sources-3sets-SIW_50%
saved multipolar-ppo2_4sources-3sets-SIW_75%
saved multipolar-ppo2_4sources-3sets-SIW_100%
saved multipolar-ppo2_1sources-3sets-SIW_25%
saved multipolar-ppo2_1sources-3sets-SIW_50%
saved multipolar-ppo2_1sources-3sets-SIW_75%
saved multipolar-ppo2_1sources-3sets-SIW_100%


### Loading pickles

In [7]:
mean_episodic_reward = {}
failed_exp = {}
for model in os.listdir(save_path):
    file = os.path.join(save_path, model)
    if file[-4:] == '.pkl':
        with open(file, 'rb') as f:
            if 'failed' in file:
                failed_exp[model[:-4]] = pickle.load(f)
            else:
                mean_episodic_reward[model[:-4]] = pickle.load(f)

### Print results

In [8]:
np.random.seed(1993)
for key in mean_episodic_reward:
    cleanedList = [x for x in mean_episodic_reward[key] if str(x) != 'nan']
    # print(key, str(round(np.mean(mean_episodic_reward[key]), 2)) + " ± " + str(round( 1.96 * np.std(mean_episodic_reward[key])/10, 2)) )
    bounds = bs.bootstrap(np.array(cleanedList), stat_func=bs_stats.mean)
    print(key, int(round(bounds.value)), "({},{})".format(int(round(bounds.lower_bound)), int(round(bounds.upper_bound))))


ppo2_25%-200000 -307 (-341,-276)
ppo2_50%-200000 -154 (-172,-136)
ppo2_75%-200000 -120 (-134,-106)
ppo2_100%-200000 -106 (-118,-93)
multipolar-ppo2_4sources-3sets-SIW_25%-200000 -147 (-158,-136)
multipolar-ppo2_4sources-3sets-SIW_50%-200000 -111 (-119,-103)
multipolar-ppo2_4sources-3sets-SIW_75%-200000 -98 (-105,-91)
multipolar-ppo2_4sources-3sets-SIW_100%-200000 -92 (-99,-86)
multipolar-ppo2_1sources-3sets-SIW_25%-200000 -156 (-169,-143)
multipolar-ppo2_1sources-3sets-SIW_50%-200000 -115 (-124,-106)
multipolar-ppo2_1sources-3sets-SIW_75%-200000 -100 (-107,-92)
multipolar-ppo2_1sources-3sets-SIW_100%-200000 -93 (-100,-86)


In [9]:
for key in mean_episodic_reward:
    print(key, len(mean_episodic_reward[key]))

ppo2_25%-200000 30
ppo2_50%-200000 30
ppo2_75%-200000 30
ppo2_100%-200000 30
multipolar-ppo2_4sources-3sets-SIW_25%-200000 90
multipolar-ppo2_4sources-3sets-SIW_50%-200000 90
multipolar-ppo2_4sources-3sets-SIW_75%-200000 90
multipolar-ppo2_4sources-3sets-SIW_100%-200000 90
multipolar-ppo2_1sources-3sets-SIW_25%-200000 90
multipolar-ppo2_1sources-3sets-SIW_50%-200000 90
multipolar-ppo2_1sources-3sets-SIW_75%-200000 90
multipolar-ppo2_1sources-3sets-SIW_100%-200000 90


In [10]:
mean_episodic_reward.keys()

dict_keys(['ppo2_25%-200000', 'ppo2_50%-200000', 'ppo2_75%-200000', 'ppo2_100%-200000', 'multipolar-ppo2_4sources-3sets-SIW_25%-200000', 'multipolar-ppo2_4sources-3sets-SIW_50%-200000', 'multipolar-ppo2_4sources-3sets-SIW_75%-200000', 'multipolar-ppo2_4sources-3sets-SIW_100%-200000', 'multipolar-ppo2_1sources-3sets-SIW_25%-200000', 'multipolar-ppo2_1sources-3sets-SIW_50%-200000', 'multipolar-ppo2_1sources-3sets-SIW_75%-200000', 'multipolar-ppo2_1sources-3sets-SIW_100%-200000'])