In [4]:
from dm_control import suite
import matplotlib 
import matplotlib.animation as animation
import matplotlib.pyplot as plt
from IPython.display import HTML
import PIL.Image
import copy
import os
from IPython.display import clear_output
import numpy as np

# using svg backend for figure rendering
%config InlineBackend.figure_format = 'svg'

In [5]:
# font sizes

SMALL_SIZE = 8
MEDIUM_SIZE = 10
BIGGER_SIZE = 12

plt.rc('font', size=SMALL_SIZE)  # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title

In [6]:
if os.environ.get('COLAB_NOTEBOOK_TEST', False):
    # skip video generation during tests, as it is quite expensive
    display_video = lambda *args, **kwarghs: None
else:
    def display_video(frames, framerate=30):
        height, width, _ = frames[0].shape
        dpi = 70
        orig_backend = matplotlib.get_backend()
        matplotlib.use('Agg')  # switch to headless 'Agg' to prevent figure rendering
        fig, ax = plt.subplots(1, 1, figsize=(width/dpi, height/dpi), dpi=dpi)
        matplotlib.use(orig_backend)
        ax.set_axis_off()
        ax.set_aspect('equal')
        ax.set_position([0, 0, 1, 1])
        im = ax.imshow(frames[0])
        
        def update(frame):
            im.set_data(frame)
            return [im]
        
        interval = 1000 / framerate
        anim = animation.FuncAnimation(fig=fig, func=update, frames=frames, 
                                      interval=interval, blit=True, repeat=False)
        return HTML(anim.to_html5_video())
    
np.random.seed()

In [12]:
random_state = np.random.RandomState(42)
env = suite.load('pendulum', 'swingup', task_kwargs={'random': random_state})


# Simulate episode with random actions

duration = 5 #seconds
frames = []
ticks = []
rewards = []
observations = []

spec = env.action_spec()
time_step = env.reset()

while env.physics.data.time < duration:
    
    action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
    time_step = env.step(action)
    
    camera0 = env.physics.render(camera_id=0, height=200, width=200)
    camera1 = env.physics.render(camera_id=1, height=200, width=200)
    frames.append(np.hstack((camera0, camera1)))
    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)
    
html_video = display_video(frames, framerate=1./env.control_timestep())
html_video

In [13]:
random_state = np.random.RandomState(42)
env = suite.load('acrobot', 'swingup_sparse', task_kwargs={'random': random_state})
# Simulate episode with random actions

duration = 5 #seconds
frames = []
ticks = []
rewards = []
observations = []

spec = env.action_spec()
time_step = env.reset()

while env.physics.data.time < duration:
    
    action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
    time_step = env.step(action)
    
    camera0 = env.physics.render(camera_id=0, height=200, width=200)
    camera1 = env.physics.render(camera_id=1, height=200, width=200)
    frames.append(np.hstack((camera0, camera1)))
    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)
    
html_video = display_video(frames, framerate=1./env.control_timestep())
html_video

In [14]:
env = suite.load('cartpole', 'swingup_sparse', task_kwargs={'random': random_state})
# Simulate episode with random actions

duration = 5 #seconds
frames = []
ticks = []
rewards = []
observations = []

spec = env.action_spec()
time_step = env.reset()

while env.physics.data.time < duration:
    
    action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
    time_step = env.step(action)
    
    camera0 = env.physics.render(camera_id=0, height=200, width=200)
    camera1 = env.physics.render(camera_id=1, height=200, width=200)
    frames.append(np.hstack((camera0, camera1)))
    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)
    
html_video = display_video(frames, framerate=1./env.control_timestep())
html_video

In [16]:
env = suite.load('cartpole', 'two_poles', task_kwargs={'random': random_state})
# Simulate episode with random actions

duration = 5 #seconds
frames = []
ticks = []
rewards = []
observations = []

spec = env.action_spec()
time_step = env.reset()

while env.physics.data.time < duration:
    
    action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
    time_step = env.step(action)
    
    camera0 = env.physics.render(camera_id=0, height=200, width=200)
    camera1 = env.physics.render(camera_id=1, height=200, width=200)
    frames.append(np.hstack((camera0, camera1)))
    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)
    
html_video = display_video(frames, framerate=1./env.control_timestep())
html_video

In [18]:
env = suite.load('ball_in_cup', 'catch', task_kwargs={'random': random_state})
# Simulate episode with random actions

duration = 5 #seconds
frames = []
ticks = []
rewards = []
observations = []

spec = env.action_spec()
time_step = env.reset()

while env.physics.data.time < duration:
    
    action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
    time_step = env.step(action)
    
    camera0 = env.physics.render(camera_id=0, height=200, width=200)
    camera1 = env.physics.render(camera_id=1, height=200, width=200)
    frames.append(np.hstack((camera0, camera1)))
    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)
    
html_video = display_video(frames, framerate=1./env.control_timestep())
html_video

In [20]:
env = suite.load('point_mass', 'hard', task_kwargs={'random': random_state})
# Simulate episode with random actions

duration = 5 #seconds
frames = []
ticks = []
rewards = []
observations = []

spec = env.action_spec()
time_step = env.reset()

while env.physics.data.time < duration:
    
    action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
    time_step = env.step(action)
    
    camera0 = env.physics.render(camera_id=0, height=200, width=200)
    camera1 = env.physics.render(camera_id=1, height=200, width=200)
    frames.append(np.hstack((camera0, camera1)))
    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)
    
html_video = display_video(frames, framerate=1./env.control_timestep())
html_video

In [26]:
env = suite.load('reacher', 'hard', task_kwargs={'random': random_state})
# Simulate episode with random actions

duration = 5 #seconds
frames = []
ticks = []
rewards = []
observations = []

spec = env.action_spec()
time_step = env.reset()

while env.physics.data.time < duration:
    
    action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
    time_step = env.step(action)
    
    camera0 = env.physics.render(camera_id=0, height=200, width=200)
    camera1 = env.physics.render(camera_id=1, height=200, width=200)
    frames.append(np.hstack((camera0, camera1)))
    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)
    
html_video = display_video(frames, framerate=1./env.control_timestep())
html_video

In [32]:
env = suite.load('finger', 'turn_hard', task_kwargs={'random': random_state})
# Simulate episode with random actions

duration = 5 #seconds
frames = []
ticks = []
rewards = []
observations = []

spec = env.action_spec()
time_step = env.reset()

while env.physics.data.time < duration:
    
    action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
    time_step = env.step(action)
    
    camera0 = env.physics.render(camera_id=0, height=200, width=200)
    camera1 = env.physics.render(camera_id=1, height=200, width=200)
    frames.append(np.hstack((camera0, camera1)))
    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)
    
html_video = display_video(frames, framerate=1./env.control_timestep())
html_video

In [71]:
env = suite.load(domain_name='hopper', task_name='stand')
# Simulate episode with random actions

duration = 10 #seconds
frames = []
ticks = []
rewards = []
observations = []

spec = env.action_spec()
time_step = env.reset()

while not time_step.last():
    
    action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
    time_step = env.step(action)
    
    camera0 = env.physics.render(camera_id=0, height=200, width=200)
    camera1 = env.physics.render(camera_id=1, height=200, width=200)
    frames.append(np.hstack((camera0, camera1)))
    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)
    
html_video = display_video(frames, framerate=1./env.control_timestep())
html_video

In [56]:
env = suite.load('fish', 'swim', task_kwargs={'random': random_state})
# Simulate episode with random actions

duration = 10 #seconds
frames = []
ticks = []
rewards = []
observations = []

spec = env.action_spec()
time_step = env.reset()


while env.physics.data.time < duration:
    
    action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
    time_step = env.step(action)
    
    camera0 = env.physics.render(camera_id=0, height=200, width=200)
    camera1 = env.physics.render(camera_id=1, height=200, width=200)
    frames.append(np.hstack((camera0, camera1)))
    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)
    
html_video = display_video(frames, framerate=1./env.control_timestep())
html_video

In [57]:
spec

BoundedArray(shape=(5,), dtype=dtype('float64'), name=None, minimum=[-1. -1. -1. -1. -1.], maximum=[1. 1. 1. 1. 1.])

In [76]:
env = suite.load(domain_name='humanoid', task_name='walk')
# Simulate episode with random actions

duration = 10 #seconds
frames = []
ticks = []
rewards = []
observations = []

spec = env.action_spec()
time_step = env.reset()

while not time_step.last():
    
    action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
    time_step = env.step(action)
    
    camera0 = env.physics.render(camera_id=0, height=200, width=200)
    camera1 = env.physics.render(camera_id=1, height=200, width=200)
    frames.append(np.hstack((camera0, camera1)))
    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)
    
html_video = display_video(frames, framerate=1./env.control_timestep())
html_video

In [74]:
spec

BoundedArray(shape=(21,), dtype=dtype('float64'), name=None, minimum=[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1.], maximum=[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.])

In [78]:
from dm_control import suite
from dm_control import viewer
import numpy as np
import PIL

env = suite.load(domain_name="hopper", task_name="hop")
action_spec = env.action_spec()

# Define a uniform random policy.
def random_policy(time_step):
  return np.random.uniform(low=action_spec.minimum,
                           high=action_spec.maximum,
                           size=action_spec.shape)

# Launch the viewer application.
viewer.launch(env, policy=random_policy)

In [79]:
# loading a task
env = suite.load(domain_name='humanoid', task_name='run')
action_spec = env.action_spec()

# Define a uniform random policy.
def random_policy(time_step):
  return np.random.uniform(low=action_spec.minimum,
                           high=action_spec.maximum,
                           size=action_spec.shape)

# Launch the viewer application.
viewer.launch(env, policy=random_policy)
