In [1]:
%load_ext autoreload
%autoreload 2

import itertools

from matplotlib import pyplot as plt
import numpy as np
import gym
from gym.envs import registration

from wrappers import color_grid

%matplotlib inline

In [10]:
def make(
    domain_name,
    task_name,
    action_dims_to_split,
    num_cells_per_dim,
    num_colors_per_cell,
    evil_level,
    seed=1,
    visualize_reward=True,
    from_pixels=False,
    height=84,
    width=84,
    camera_id=0,
    frame_skip=1,
    episode_length=1000,
    environment_kwargs=None
):
    env_id = 'dmc_%s_%s_%s' % (domain_name, task_name, seed)
    env_id += f'{"-".join(map(str, action_dims_to_split))}_{num_cells_per_dim}_{num_colors_per_cell}_{evil_level}-v1'
    if from_pixels:
        assert not visualize_reward, 'cannot use visualize reward when learning from pixels'

    # shorten episode length
    max_episode_steps = (episode_length + frame_skip - 1) // frame_skip

    if env_id not in gym.envs.registry:
        registration.register(
            id=env_id,
            entry_point='wrappers.color_grid:DmcColorGridWrapper',
            kwargs={
                'domain_name': domain_name,
                'task_name': task_name,
                'task_kwargs': {
                    'random': seed
                },
                'num_cells_per_dim': num_cells_per_dim,
                'num_colors_per_cell': num_colors_per_cell,
                'evil_level': evil_level,
                'action_dims_to_split': action_dims_to_split,
                'environment_kwargs': environment_kwargs,
                'visualize_reward': visualize_reward,
                'from_pixels': from_pixels,
                'height': height,
                'width': width,
                'camera_id': camera_id,
                'frame_skip': frame_skip,
            },
            # max_episode_steps=max_episode_steps
        )
    return gym.make(env_id)

In [None]:
cg_env = make(
    'cheetah', 'run',
    [0, 2, 4,],
    2,
    16,
    color_grid.EvilEnum.MAXIMUM_EVIL,
    from_pixels=True,
    visualize_reward=False)
obs_0 = cg_env.reset()
obs, reward, done, extra = cg_env.step(np.random.rand(6).astype(np.float32) * 2 - 1)
# plt.imshow(obs_0)
# plt.show()
# plt.imshow(obs)
# plt.show()
for i, j, k in itertools.product(*(3 * [[-.75, .25]])):
    obs, reward, done, extra = cg_env.step(np.array([i, 0, j, 0, k, 0], dtype=np.float32))
    plt.imshow(obs)
    plt.show()
    obs, reward, done, extra = cg_env.step(np.array([i + .5, 0, j + .5, 0, k + .5, 0], dtype=np.float32))
    plt.imshow(obs)
    plt.show()

In [None]:
cg_env = make(
    'cheetah', 'run',
    [0, 2, 4,],
    3,
    16,
    color_grid.EvilEnum.MAXIMUM_EVIL,
    from_pixels=True,
    visualize_reward=False)
obs_0 = cg_env.reset()
obs, reward, done, extra = cg_env.step(np.random.rand(6).astype(np.float32) * 2 - 1)
# plt.imshow(obs_0)
# plt.show()
# plt.imshow(obs)
# plt.show()
for i, j, k in itertools.product(*(3 * [[-.75, .25]])):
    obs, reward, done, extra = cg_env.step(np.array([i, 0, j, 0, k, 0], dtype=np.float32))
    plt.imshow(obs)
    plt.show()
    obs, reward, done, extra = cg_env.step(np.array([i + .5, 0, j + .5, 0, k + .5, 0], dtype=np.float32))
    plt.imshow(obs)
    plt.show()

In [None]:
cg_env = make(
    'cheetah', 'run',
    [0, 2, 4,],
    4,
    16,
    color_grid.EvilEnum.MAXIMUM_EVIL,
    from_pixels=True,
    visualize_reward=False)
obs_0 = cg_env.reset()
obs, reward, done, extra = cg_env.step(np.random.rand(6).astype(np.float32) * 2 - 1)
# plt.imshow(obs_0)
# plt.show()
# plt.imshow(obs)
# plt.show()
for i, j, k in itertools.product(*(3 * [[-.75, .25]])):
    obs, reward, done, extra = cg_env.step(np.array([i, 0, j, 0, k, 0], dtype=np.float32))
    plt.imshow(obs)
    plt.show()
    obs, reward, done, extra = cg_env.step(np.array([i + .5, 0, j + .5, 0, k + .5, 0], dtype=np.float32))
    plt.imshow(obs)
    plt.show()

In [None]:
cg_env = make(
    'cheetah', 'run',
    [0, 2, 4,],
    8,
    16,
    color_grid.EvilEnum.MAXIMUM_EVIL,
    from_pixels=True,
    visualize_reward=False)
obs_0 = cg_env.reset()
obs, reward, done, extra = cg_env.step(np.random.rand(6).astype(np.float32) * 2 - 1)
# plt.imshow(obs_0)
# plt.show()
# plt.imshow(obs)
# plt.show()
for i, j, k in itertools.product(*(3 * [[-.75, .25]])):
    obs, reward, done, extra = cg_env.step(np.array([i, 0, j, 0, k, 0], dtype=np.float32))
    plt.imshow(obs)
    plt.show()
    obs, reward, done, extra = cg_env.step(np.array([i + .5, 0, j + .5, 0, k + .5, 0], dtype=np.float32))
    plt.imshow(obs)
    plt.show()

In [None]:
cg_env = make(
    'cheetah', 'run',
    [0, 2, 4,],
    16,
    16,
    color_grid.EvilEnum.MAXIMUM_EVIL,
    from_pixels=True,
    visualize_reward=False)
obs_0 = cg_env.reset()
obs, reward, done, extra = cg_env.step(np.random.rand(6).astype(np.float32) * 2 - 1)
# plt.imshow(obs_0)
# plt.show()
# plt.imshow(obs)
# plt.show()
for i, j, k in itertools.product(*(3 * [[-.75, .25]])):
    obs, reward, done, extra = cg_env.step(np.array([i, 0, j, 0, k, 0], dtype=np.float32))
    plt.imshow(obs)
    plt.show()
    obs, reward, done, extra = cg_env.step(np.array([i + .5, 0, j + .5, 0, k + .5, 0], dtype=np.float32))
    plt.imshow(obs)
    plt.show()