In [None]:
import gym
import tensorflow as tf
import numpy as np
import os

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from rl_utils import load_expert_policy, SupervisedPolicy, run_rollouts, get_valdata, render_demo

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Imports specifically so we can render outputs in Jupyter.
from JSAnimation.IPython_display import display_animation
from matplotlib import animation
from IPython.display import display
    
def get_session():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)
    return session

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def display_frames_as_gif(frames):
    """
    Displays a list of frames as a gif, with controls
    """
    #plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi = 72)
    patch = plt.imshow(frames[0])
    plt.axis('off')

    def animate(i):
        patch.set_data(frames[i])

    anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval=25)
    display(display_animation(anim, default_mode='loop'))

In [None]:
# Loading the expert
env, expert = load_expert_policy('roboschool-cheetah')

# Get some data
observations, actions, rewards = run_rollouts(env, expert, env.spec.timestep_limit, 20)
val_data = get_valdata(env, expert, env.spec.timestep_limit)

In [None]:
# Loading a clonner
noob = SupervisedPolicy(env.observation_space, env.action_space)
train_data = (observations, actions)
noob.train(train_data, val_data, 300, verbose=False)

In [None]:
noob.save_json()

In [None]:
frames = render_demo(env, noob, 300)
display_frames_as_gif(frames)

In [None]:
noob.load_json()