Setup rendering dependencies for Google Colaboratory.

In [None]:
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1

Install d3rlpy!

In [None]:
!pip install d3rlpy

Setup cartpole environment.

In [None]:
import gym

env = gym.make('CartPole-v0')
eval_env = gym.make('CartPole-v0')

Setup data-driven deep reinforcement learning algorithm.

In [None]:
from d3rlpy.algos import DQN
from d3rlpy.online.explorers import ConstantEpsilonGreedy
from d3rlpy.online.buffers import ReplayBuffer
from d3rlpy.metrics.scorer import evaluate_on_environment

# setup DQN algorithm
dqn = DQN(learning_rate=1e-3, target_update_interval=100, use_gpu=False)

# setup explorer
explorer = ConstantEpsilonGreedy(epsilon=0.3)

# setup replay buffer
buffer = ReplayBuffer(maxlen=50000, env=env)

# start training
dqn.fit_online(env,
               buffer,
               explorer,
               eval_env=eval_env,
               n_steps=50000,
               n_steps_per_epoch=10000)

Setup rendering utilities for Google Colaboratory.

In [None]:
import glob
import io
import base64

from gym.wrappers import Monitor
from IPython.display import HTML
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display

# start virtual display
display = Display(visible=0, size=(1400, 900))
display.start()

# play recorded video
def show_video():
    mp4list = glob.glob('video/*.mp4')
    if len(mp4list) > 0:
        mp4 = mp4list[0]
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)
        ipythondisplay.display(HTML(data='''
            <video alt="test" autoplay loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
            </video>'''.format(encoded.decode('ascii'))))
    else: 
        print("Could not find video")

Record video!

In [None]:
# wrap Monitor wrapper
env = Monitor(env, './video', force=True)

# evaluate
evaluate_on_environment(env)(dqn)

Let's see how it works!

In [None]:
show_video()