In [None]:
import gym
import cv2
import numpy as np

In [None]:
# DM Control Suite dm2gym wrapper examples
# https://github.com/zuoxingdong/dm2gym

env = gym.make('dm2gym:FishSwim-v0', environment_kwargs={'flat_observation': True})
#env = gym.make('Pendulum-v0')

observation_space = env.observation_space
action_space = env.action_space

print(observation_space)
print(action_space)

In [None]:
env = gym.make('dm2gym:HumanoidRun-v0', environment_kwargs={'flat_observation': True})

In [None]:
observation_space = env.observation_space
action_space = env.action_space

print(observation_space)
print(action_space)

In [None]:
# Rendering using dm2gym wrapper
kwargs={'use_opencv_renderer': True}

obs = env.reset()
while True:
    action = np.random.uniform(-1.0, 1.0, size=action_space.shape)
    obs, rew, done, info =  env.step(action)

    env.render(mode = 'human', **kwargs)

In [None]:
from dm_control import suite
for domain_name, task_name in suite.ALL_TASKS:
    print(domain_name, task_name)

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from dm_control import suite
from dm_control.suite.wrappers import action_noise
from six.moves import input

from dm_control import viewer

In [None]:
task_kwargs = {}
task_kwargs['time_limit'] = float('inf')

env = suite.load(domain_name='humanoid', task_name='run', task_kwargs=task_kwargs)
env.task.visualize_reward = True
env = action_noise.Wrapper(env, scale=1.0)

viewer.launch(env)

In [None]:
# Another way to perfrom random actions
action_spec = env.action_spec()

# Define a uniform random policy.
def random_policy(time_step):
  del time_step  # Unused.
  return np.random.uniform(low=action_spec.minimum,
                           high=action_spec.maximum,
                           size=action_spec.shape)

viewer.launch(env, policy=random_policy)

In [None]:
# Saving video example

def grabFrame(env):
    # Get RGB rendering of env
    rgbArr = env.physics.render(480, 640, camera_id=0)
    # Convert to BGR for use with OpenCV
    return cv2.cvtColor(rgbArr, cv2.COLOR_BGR2RGB)

# Load task:
env = suite.load(domain_name="humanoid", task_name="run")

# Setup video writer - mp4 at 60 fps
video_name = 'video.mp4'
frame = grabFrame(env)
height, width, layers = frame.shape
video = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'mp4v'), 60.0, (width, height))

# First pass - Step through an episode and capture each frame
action_spec = env.action_spec()
time_step = env.reset()
while not time_step.last():
    action = np.random.uniform(action_spec.minimum,
                               action_spec.maximum,
                               size=action_spec.shape)
    time_step = env.step(action)
    frame = grabFrame(env)
    # Render env output to video
    video.write(grabFrame(env))

# End render to video file
video.release()

In [None]:
# Second pass - Playback
cap = cv2.VideoCapture(video_name)
while(cap.isOpened()):
    ret, frame = cap.read()
    cv2.imshow('Playback', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()

In [None]:

# Exit
cv2.destroyAllWindows()

In [None]:
from dm_control import composer
from dm_control.locomotion.examples import basic_cmu_2019, basic_rodent_2020
import numpy as np

# Build an example environment.
#env = basic_cmu_2019.cmu_humanoid_run_walls()
env = basic_rodent_2020.rodent_maze_forage()

# Get the `action_spec` describing the control inputs.
action_spec = env.action_spec()

# Define a uniform random policy.
def random_policy(time_step):
  del time_step  # Unused.
  return np.random.uniform(low=action_spec.minimum,
                           high=action_spec.maximum,
                           size=action_spec.shape)

# Step through the environment for one episode with random actions.
#time_step = env.reset()
'''while not time_step.last():
  action = np.random.uniform(action_spec.minimum, action_spec.maximum,
                             size=action_spec.shape)
  time_step = env.step(action)
  print("reward = {}, discount = {}, observations = {}.".format(
      time_step.reward, time_step.discount, time_step.observation))'''

viewer.launch(env, policy=random_policy)