In this Colab, we perform a simple test of our reward model comparison technique on randomly generated reward models.

This is the same idea as the notebook `random_deep.ipynb`, but in a tabular setting. This produces cleaner and quicker results, and is useful as a playground for rapidly testing out new ideas.

In [0]:
import numpy as np
import matplotlib.pyplot as plt

from evaluating_rewards import tabular
from evaluating_rewards.experiments import util
from evaluating_rewards.experiments import visualize

In [0]:
# Global config
n_states = 100
n_actions = 10

In [0]:
def run_shaping_comparison(reps=3, **kwargs):
  dfs = []
  # TODO(): seed to produce deterministically different results
  for _ in range(reps):
    with util.fresh_sess():
      df = tabular.experiment_shaping_comparison(**kwargs)
    dfs.append(df)
  return dfs

comparisons = run_shaping_comparison(n_states=n_states, n_actions=n_actions, state_only=True)

In [0]:
def plot_shaping_comparison(dfs, **kwargs):
  fig, axs = plt.subplots(1, len(dfs), figsize=(16, 4), squeeze=False)
  longforms = []
  for df, ax in zip(dfs, axs[0]):
    longform = visualize.plot_shaping_comparison(df, ax=ax)
    longforms.append(longform)
  return longforms

longform_comparisons = plot_shaping_comparison(comparisons)

In [0]:
r1, p1, s1 = tabular.make_shaped_reward(n_states, n_actions, seed=0)
r2, p2, s2 = tabular.make_shaped_reward(n_states, n_actions, seed=1)

print('r1 vs s1', tabular.summary_comparison(r1, s1))
print('s2 vs r1', tabular.summary_comparison(s2, r1))