# Reward surfaces

Visualizing the objective surface

$$J(\theta) = \mathbb{E}_{\tau \sim \pi_\theta} \sum_{i=1}^{t_\tau} R_i$$

In particular, this notebook visualizes its behavior in 2d plots along particular directions

Installation:

```
pip install git+https://github.com/weepingwillowben/reward-surfaces.git
```

In [11]:
from reward_surfaces.experiments import generate_plane_data
from reward_surfaces.plotting import plot_plane
from reward_surfaces.runners.run_jobs_multiproc import run_job_list
from reward_surfaces.utils.job_results_to_csv import job_results_to_csv
from reward_surfaces.utils.surface_utils import readz
from reward_surfaces.agents.make_agent import make_agent

import json
import os
import shutil

First, you need to define the vectors along which to view the plot

In [12]:
p_str = "0040000"
trained_checkpoint = f"train_hopper_small/{p_str}/"
generated_dirs_dir = f"_temp_dir/"

dir2_fname = f"hopper_eig_vecs/results/{p_str}/mineigvec.npz"
dir1_fname = f"hopper_eig_vecs/results/{p_str}/maxeigvec.npz"
dir1_fname = f"hopper_grad/results/{p_str}/grad.npz"

dir1 = readz(dir1_fname)
dir2 = readz(dir2_fname)

train_info = json.load(open(trained_checkpoint+"../info.json"))
base_magnitude = 1.0
dir1_mag = 0.2 * base_magnitude
dir2_mag = 1.00 * base_magnitude
dir1 = [d*dir1_mag/1000000 for d in dir1]
dir2 = [d*dir2_mag for d in dir2]

train_info['dir1_mag'] = dir1_mag
train_info['dir2_mag'] = dir2_mag


Compute the objective for all points in a specified grid. This might take awhile.

In [None]:
# data won't calculate if temporary directory exists
if os.path.exists(generated_dirs_dir):
    shutil.rmtree(generated_dirs_dir)
    print("removed old directory")

grid_size = 9
num_episodes = 25
generate_plane_data(trained_checkpoint, generated_dirs_dir, dir1, dir2, train_info, grid_size=grid_size, num_episodes=num_episodes)
run_job_list(generated_dirs_dir+"jobs.sh")
job_results_to_csv(generated_dirs_dir)

 11%|█         | 9/81 [00:00<00:00, 89.43it/s]

removed old directory


 30%|██▉       | 24/81 [00:04<00:14,  4.06it/s]

In [None]:

plot_plane(generated_dirs_dir+"results.csv",
    outname="script_out",
    type="heat",
    dir1_name=os.path.basename(dir1_fname),
    dir2_name=os.path.basename(dir2_fname),
    dir1_scale=dir1_mag,
    dir2_scale=dir2_mag,
)