In [1]:
import os
import pickle
import numpy as np
import bootstrapped.bootstrap as bs
import bootstrapped.stats_functions as bs_stats
from stable_baselines.results_plotter import ts2xy
from stable_baselines.bench.monitor import load_results
from tqdm import tqdm
from collections import defaultdict

  from ._conv import register_converters as _register_converters


In [2]:
algo = "sac" #'ppo2' #"sac"
env = "LunarLanderContinuous-v2" #"RoboschoolHopper-v1" #"LunarLanderContinuous-v2" #'RoboschoolHopper-v1' # "Acrobot-v1"
total_timesteps = int(2e5)

prefixes =  ["1sources-3sets-SIW", "4sources-3sets-SIW", "8sources-3sets-SIW"] #"1sources-3sets-1subopt-SIW" "4sources-3sets-2subopt-SIW", "4sources-3sets-4subopt-SIW"
save_path = "logs/{}_analysis/".format(env)
os.makedirs(save_path, exist_ok=True)

### Compute mean episodic rewards

In [3]:
def _get_mean_episodic_reward(result, steps_percentage, total_timesteps=None):
    
    timesteps = result[0]
    if total_timesteps is None:
        total_timesteps = timesteps[-1]
    cut_off = int(total_timesteps * steps_percentage)
    
    
    if timesteps[-1] / cut_off < .98:
        print(timesteps[-1] / cut_off )
        raise Warning("total_timesteps {} is too high comparing to trained timesteps {}".format(total_timesteps, timesteps[-1]))
    
    # find cut_off episode
    for cut_ind in reversed(range(len(timesteps))):
        if timesteps[cut_ind] <= cut_off:
            break
    
    return result[1][:cut_ind].mean()

In [4]:
mean_episodic_reward = defaultdict(list)
failed_exp = defaultdict(list)
for env_exp_id in tqdm(os.listdir('logs/{}_scratch'.format(algo))):
    if env in env_exp_id and env_exp_id[-1]=='1':
        # remove the experiment number
        env_exp = env_exp_id[:-2]
        for i in range(1,4):
            # load results of training from scratch
            result = ts2xy(load_results("logs/{}_scratch/{}_{}".format(algo, env_exp, i)), 'timesteps')
            for steps_percentage in [0.25, 0.5, 0.75, 1.0]:
                mean_episodic_reward[algo+'_{}%'.format(int(steps_percentage*100))].append(
                    _get_mean_episodic_reward(result, steps_percentage, total_timesteps))
            
        # get the experiment name
        exp = ''.join(env_exp.split('_')[1:])
        for p in prefixes:
            for i in range(1,10):
                file = "logs/mlap-{}/{}_{}-{}_{}".format(algo, env, p, exp, i)
                try:
                    result = ts2xy(load_results(file), 'timesteps')
                    for steps_percentage in [0.25, 0.5, 0.75, 1.0]:
                        mean_episodic_reward['mlap-{}_{}_{}%'.format(algo, p, int(steps_percentage*100))].append(
                            _get_mean_episodic_reward(result, steps_percentage, total_timesteps))
                except:
                    failed_exp['mlap-{}_{}'.format(algo, p)].append(file)

100%|██████████| 300/300 [02:35<00:00,  1.93it/s]


In [5]:
for model in failed_exp:
    print(model, failed_exp[model], len(failed_exp[model]))

mlap-ppo2_1sources-3sets-SIW ['logs/mlap-ppo2/RoboschoolHopper-v1_1sources-3sets-SIW-leg051-foot043-thigh047-torso04-size086-damping276-friction115-armature183_1', 'logs/mlap-ppo2/RoboschoolHopper-v1_1sources-3sets-SIW-leg051-foot043-thigh047-torso04-size086-damping276-friction115-armature183_2', 'logs/mlap-ppo2/RoboschoolHopper-v1_1sources-3sets-SIW-leg051-foot043-thigh047-torso04-size086-damping276-friction115-armature183_3', 'logs/mlap-ppo2/RoboschoolHopper-v1_1sources-3sets-SIW-leg051-foot043-thigh047-torso04-size086-damping276-friction115-armature183_4', 'logs/mlap-ppo2/RoboschoolHopper-v1_1sources-3sets-SIW-leg051-foot043-thigh047-torso04-size086-damping276-friction115-armature183_5', 'logs/mlap-ppo2/RoboschoolHopper-v1_1sources-3sets-SIW-leg051-foot043-thigh047-torso04-size086-damping276-friction115-armature183_6', 'logs/mlap-ppo2/RoboschoolHopper-v1_1sources-3sets-SIW-leg051-foot043-thigh047-torso04-size086-damping276-friction115-armature183_7', 'logs/mlap-ppo2/RoboschoolHopper

### Save the results

In [5]:
for model in mean_episodic_reward.keys():
    file = os.path.join(save_path, model + '-{}.pkl'.format(total_timesteps))
    if os.path.isfile(file):
        print("{} exists".format(model))
        continue
    with open(file, 'wb') as f:
        pickle.dump(mean_episodic_reward[model], f, pickle.HIGHEST_PROTOCOL)
        print("saved {}".format(model))
        
    file = os.path.join(save_path, model[:-5] + 'failed_exp.pkl')
    if os.path.isfile(file):
        with file as f:
            pickle.dump(failed_exp[model], f, pickle.HIGHEST_PROTOCOL)

saved sac_25%
saved sac_50%
saved sac_75%
saved sac_100%
saved mlap-sac_1sources-3sets-SIW_25%
saved mlap-sac_1sources-3sets-SIW_50%
saved mlap-sac_1sources-3sets-SIW_75%
saved mlap-sac_1sources-3sets-SIW_100%
saved mlap-sac_4sources-3sets-SIW_25%
saved mlap-sac_4sources-3sets-SIW_50%
saved mlap-sac_4sources-3sets-SIW_75%
saved mlap-sac_4sources-3sets-SIW_100%


### Loading pickles

In [4]:
mean_episodic_reward = {}
failed_exp = {}
for model in os.listdir(save_path):
    file = os.path.join(save_path, model)
    if file[-4:] == '.pkl':
        with open(file, 'rb') as f:
            if 'failed' in file:
                failed_exp[model[:-4]] = pickle.load(f)
            else:
                mean_episodic_reward[model[:-4]] = pickle.load(f)

### Print results

In [6]:
np.random.seed(1993)
for key in mean_episodic_reward:
    # print(key, str(round(np.mean(mean_episodic_reward[key]), 2)) + " ± " + str(round( 1.96 * np.std(mean_episodic_reward[key])/10, 2)) )
    bounds = bs.bootstrap(np.array(mean_episodic_reward[key]), stat_func=bs_stats.mean)
    print(key, int(round(bounds.value)), "({},{})".format(int(round(bounds.lower_bound)), int(round(bounds.upper_bound))))


sac_25% -83 (-89,-77)
sac_50% -15 (-22,-7)
sac_75% 35 (26,43)
sac_100% 76 (67,84)
mlap-sac_1sources-3sets-SIW_25% -14 (-19,-9)
mlap-sac_1sources-3sets-SIW_50% 65 (60,70)
mlap-sac_1sources-3sets-SIW_75% 114 (109,118)
mlap-sac_1sources-3sets-SIW_100% 150 (145,155)
mlap-sac_4sources-3sets-SIW_25% -10 (-14,-5)
mlap-sac_4sources-3sets-SIW_50% 69 (64,73)
mlap-sac_4sources-3sets-SIW_75% 117 (113,122)
mlap-sac_4sources-3sets-SIW_100% 154 (149,158)


In [8]:
mean_episodic_reward.keys()

dict_keys(['ppo2_25%', 'ppo2_50%', 'ppo2_75%', 'ppo2_100%', 'mlap-ppo2_1sources-3sets-SIW_25%', 'mlap-ppo2_4sources-3sets-SIW_25%', 'mlap-ppo2_4sources-3sets-SIW_50%', 'mlap-ppo2_4sources-3sets-SIW_75%', 'mlap-ppo2_4sources-3sets-SIW_100%'])

In [12]:
for key in mean_episodic_reward:
    print(key, len(mean_episodic_reward[key]))

ppo2_25% 300
ppo2_50% 300
ppo2_75% 300
ppo2_100% 300
mlap-ppo2_4sources-3sets-SIW_25% 888
mlap-ppo2_4sources-3sets-SIW_50% 888
mlap-ppo2_4sources-3sets-SIW_75% 888
mlap-ppo2_4sources-3sets-SIW_100% 0


In [9]:
mean_episodic_reward['mlap-ppo2_1sources-3sets-SIW_25%']

[]

In [None]:
# for env_exp_id in tqdm(os.listdir('logs/{}_scratch'.format(algo))):
#     if env in env_exp_id and env_exp_id[-1]=='1':
#         # remove the experiment number
#         env_exp = env_exp_id[:-2]
#         results = []
#         for i in range(1,4):
#             # load results of training from scratch
#             results.append(ts2xy(load_results("logs/{}_scratch/{}_{}".format(algo, env_exp, i)), 'timesteps'))
            
#         for steps_percentage in [0.25, 0.5, 0.75, 1.0]:
#             mean_episodic_reward[algo+'_{}%'.format(int(steps_percentage*100))].append(_get_mean_episodic_reward(results, steps_percentage))
            
#         # get the experiment name
#         exp = ''.join(env_exp.split('_')[1:])
#         for p in prefixes:
#             results = []
#             for i in range(1,10):
#                 file = "logs/mlap-{}/{}_{}-{}_{}".format(algo, env, p, exp, i)
#                 try:
#                     results.append(ts2xy(load_results(file), 'timesteps'))
#                 except:
#                     failed_exp['mlap-{}_{}'.format(algo, p)].append(file)
#             for steps_percentage in [0.25, 0.5, 0.75, 1.0]:
#                 mean_episodic_reward['mlap-{}_{}_{}%'.format(algo, p, int(steps_percentage*100))].append(_get_mean_episodic_reward(results, steps_percentage))

# def _get_mean_episodic_reward(results, steps_percentage):

#     means = [xy[1][:int(length*steps_percentage)].mean() for xy in results]
    
#     return np.mean(means)

# ppo2_25% -164.72 ± 14.08
# ppo2_50% -111.8 ± 9.61
# ppo2_75% -97.88 ± 8.34
# ppo2_100% -91.39 ± 7.77
# mlap-ppo2_4sources-3sets-SIW_25% -119.88 ± 11.36
# mlap-ppo2_4sources-3sets-SIW_50% -97.53 ± 8.59
# mlap-ppo2_4sources-3sets-SIW_75% -89.82 ± 7.75
# mlap-ppo2_4sources-3sets-SIW_100% -85.82 ± 7.35


# def _mean_episodic_reward(cum_episodic_reward, steps_percentage, total_timesteps=None):
#     timesteps = cum_episodic_reward[0]
#     if total_timesteps is None:
#         total_timesteps = timesteps[0][-1]
#     cut_off = int(total_timesteps * steps_percentage)
    
#     for i in reversed(range(len(timesteps))):
#         if step <= cut_off:
#             return cum_episodic_reward[1][i] / timesteps[i]
#     return cum_episodic_reward[1][0] / timesteps[0]