Analyse comparisons between reward functions, produced by running `runners/comparison/hardcoded.sh` and `runners/comparison/learnt.sh`.

In [None]:
import os

from absl import logging
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns

from evaluating_rewards.experiments import results
from evaluating_rewards.experiments import visualize

OUTPUT_ROOT = os.path.join(os.environ["HOME"], "output")
DATA_ROOT = os.path.join(OUTPUT_ROOT, "comparison")
FIGURES_ROOT = os.path.join(OUTPUT_ROOT, "figures")

logging.set_verbosity(logging.DEBUG)

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
hardcoded_root_dir = os.path.join(DATA_ROOT, 'hardcoded')
hardcoded_keys = ['source_reward_type', 'target_reward_type', 'seed']
hardcoded_stats = {}
for env_name in ['Hopper-v3', 'HalfCheetah-v3']:
    hardcoded_stats[env_name] = results.load_multiple_stats(
        hardcoded_root_dir, hardcoded_keys,
        cfg_filter=lambda k: k['env_name'] == f'evaluating_rewards/{env_name}',
    )

for kind in ['policy', 'transition']:
    dataset_factory = f'evaluating_rewards.experiments.datasets.random_{kind}_generator'
    mock_env_name = f'PointMassLine_{kind}-v0'
    hardcoded_stats[mock_env_name] = results.load_multiple_stats(
        hardcoded_root_dir, hardcoded_keys,
        cfg_filter=lambda k: k['env_name'] == 'evaluating_rewards/PointMassLine-v0' and 
                             k['dataset_factory']['py/function'] == dataset_factory
    )

hardcoded_res = {env_name: results.pipeline(stats) for env_name, stats in hardcoded_stats.items()}

In [None]:
regress_stats = {}
regress_res = {}
for env_name in ['PointMassLine-v0', 'Hopper-v3', 'HalfCheetah-v3']:
    regress_stats[env_name] = results.load_multiple_stats(
        os.path.join(DATA_ROOT, 'train_regress'),
        ['source_reward_path', 'target_reward_type', 'seed'],
        cfg_filter=lambda k: k['env_name'] == f'evaluating_rewards/{env_name}' and not k['model_wrapper_kwargs'],
    )
    regress_res[env_name] = results.pipeline(regress_stats[env_name], figsize=(12, 32))

In [None]:
preferences_stats = {}
preferences_res = {}
for env_name in ['PointMassLine-v0', 'Hopper-v3', 'HalfCheetah-v3']:
    preferences_stats[env_name] = results.load_multiple_stats(
        os.path.join(DATA_ROOT, 'train_preferences'),
        ['source_reward_path', 'target_reward_type', 'seed'],
        cfg_filter=lambda k: k['env_name'] == f'evaluating_rewards/{env_name}' and not k['model_wrapper_kwargs'],
    )
    preferences_res[env_name] = results.pipeline(preferences_stats[env_name], figsize=(12, 32))

In [None]:
def presentation_heatmaps():
    FIG_FNS = {
        "PointMassLine_policy-v0": results.point_mass_heatmaps,
        "PointMassLine_transition-v0": results.point_mass_heatmaps,
        "PointMassLine-v0": results.point_mass_heatmaps,
        "Hopper-v3": results.hopper_heatmaps,
    }

    RES = {
        "hardcoded": hardcoded_res,
        "regress": regress_res,
        "preferences": preferences_res,
    }

    for dirname, res in RES.items():
        for key, fn in FIG_FNS.items():
            if key in res:
                figs = fn(res[key]["loss"]["loss"])
                visualize.save_figs(os.path.join(FIGURES_ROOT, dirname, key), figs.items())

presentation_heatmaps()

In [None]:
hardcoded_loss_norm = hardcoded["loss"]["loss"] / hardcoded["loss"]["loss"].loc[('evaluating_rewards/Zero-v0', slice(None), 0')]
fig, ax = plt.subplots(1, 1, figsize=(12, 8))
hardcoded_loss_norm = hardcoded_loss_norm.loc[hardcoded_loss_norm.index.get_level_values('target_reward_type') != 'evaluating_rewards/Zero-v0']
visualize.comparison_heatmap(hardcoded_loss_norm, ax=ax)