# Test Pendulum Offline Training with d3rlpy and Decision Transformer

We will use the Pendulum environment from OpenAI Gym to test the Decision Transformer algorithm from d3rlpy.

In [None]:
# Test if we are running on CoLab or not
if 'google.colab' in str(get_ipython()):
  print('Running on CoLab')
  %apt-get install -y xvfb ffmpeg > /dev/null 2>&1
  %pip install pyvirtualdisplay pygame moviepy > /dev/null 2>&1
  %pip install d3rlpy
else:
  print('Not running on CoLab')

In [None]:
!nvidia-smi

In [None]:
# Directory creation
import os
path = "./models"
isExist = os.path.exists(path)
if not isExist:
  os.makedirs(path)

path = "./videos/video-pendulum"
isExist = os.path.exists(path)
if not isExist:
  os.makedirs(path)

In [None]:
import d3rlpy

# set seed reproducibility
seed=1
d3rlpy.seed(seed)

# get Pendulum dataset
dataset, env = d3rlpy.datasets.get_pendulum()

# Setup Decision Transformer
dt = d3rlpy.algos.DecisionTransformerConfig().create(device="cuda:0")


In [None]:
# offline training
dt.fit(
   dataset,
   n_steps=10000,
   n_steps_per_epoch=1000,
   eval_env=env,
   eval_target_return=0,  # specify target environment return
)

dt.save_model("./models/pendulum-dt.d3")

In [None]:
import gym
import numpy as np
from gym.wrappers import RecordVideo

# start virtual display
d3rlpy.notebook_utils.start_virtual_display()

# wrap RecordVideo wrapper
env = RecordVideo(gym.make("Pendulum-v1", render_mode="rgb_array"), './videos/video-pendulum')
#env = gym.make("Pendulum-v1", render_mode="rgb_array")

# wrap as stateful actor for interaction
actor = dt.as_stateful_wrapper(target_return=0)

# interaction
observation, reward = env.reset(), 0.0
observation = observation[0]
while True:
    action = actor.predict(observation, reward)
    observation, reward, done, truncated, _ = env.step(action)
    if done or truncated:
        break


d3rlpy.notebook_utils.render_video("./videos/video-pendulum/rl-video-episode-0.mp4")