Setup rendering dependencies for Google Colaboratory.

In [None]:
!apt-get install -y xvfb ffmpeg > /dev/null 2>&1
!pip install pyvirtualdisplay pygame moviepy > /dev/null 2>&1

Install d3rlpy!

In [None]:
!pip install d3rlpy

Setup cartpole dataset.

In [None]:
import d3rlpy

# get CartPole dataset
dataset, env = d3rlpy.datasets.get_cartpole()

Setup data-driven deep reinforcement learning algorithm.

In [None]:
# setup CQL algorithm
cql = d3rlpy.algos.DiscreteCQLConfig().create()

# start training
cql.fit(
    dataset,
    n_steps=10000,
    n_steps_per_epoch=1000,
    evaluators={
        'environment': d3rlpy.metrics.EnvironmentEvaluator(env), # evaluate with CartPole-v1 environment
    },
)

Setup rendering utilities for Google Colaboratory.

In [None]:
import glob
import io
import base64

from gym.wrappers import RecordVideo
from IPython.display import HTML
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display

# start virtual display
display = Display()
display.start()

# play recorded video
def show_video():
    mp4list = glob.glob('video/*.mp4')
    if len(mp4list) > 0:
        mp4 = mp4list[0]
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)
        ipythondisplay.display(HTML(data='''
            <video alt="test" autoplay loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
            </video>'''.format(encoded.decode('ascii'))))
    else: 
        print("Could not find video")

Record video!

In [None]:
import gym

# wrap RecordVideo wrapper
env = RecordVideo(gym.make("CartPole-v1", render_mode="rgb_array"), './video')

# evaluate
d3rlpy.metrics.evaluate_qlearning_with_environment(cql, env)

Let's see how it works!

In [None]:
show_video()