**Installing the relevant libraries.**

In [0]:
!pip install gym pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg

In [0]:
!apt-get update 
!apt-get install cmake
!pip install --upgrade setuptools 
!pip install ez_setup 
!pip install gym[box2d]

**Importing the relevant libraries.**

In [0]:
import gym
import numpy as np
import matplotlib.pyplot as plt
import random
import cv2

In [4]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.optimizers import Adamax
from keras.layers import Convolution2D

Using TensorFlow backend.


In [0]:
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40)
import glob
import io
import base64
from IPython import display as ipythondisplay
from IPython.display import HTML
from pyvirtualdisplay import Display
from IPython.display import clear_output

**Defining the display to render the openai gym environment.**

In [6]:
display = Display(visible=0, size=(1400, 900))
display.start()

<Display cmd_param=['Xvfb', '-br', '-nolisten', 'tcp', '-screen', '0', '1400x900x24', ':1001'] cmd=['Xvfb', '-br', '-nolisten', 'tcp', '-screen', '0', '1400x900x24', ':1001'] oserror=None return_code=None stdout="None" stderr="None" timeout_happened=False>

**Wrapping the environment.**

In [0]:
def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

In [8]:
env = wrap_env(gym.make("CarRacing-v0"))
observation = env.reset()

Track generation: 907..1144 -> 237-tiles track


**Defining a function to transform the observed image into a usable image for our convolutional neural network.**

In [0]:
def transform(obs):
  top = obs[:84, 6:90]
  top = cv2.cvtColor(top, cv2.COLOR_RGB2GRAY)
  top = cv2.threshold(top, 120, 255, cv2.THRESH_BINARY)[1]
  top = top.astype('float')/255
  
  return top

**Defining a function to convert the output from our convolutional neural network into an action to feed the environment.**

In [0]:
def output_to_action(output_value):    
  gas = 0.0
  brake = 0.0
  steering = 0.0
    
  if output_value <= 4:
      output_value -= 2
      steering = float(output_value)/2
  elif output_value == 5:
      output_value -= 4
      gas = float(output_value)/3 
  elif output_value == 6:
      output_value -= 5
      brake = float(output_value)/2 
  else:
      print("error")
    
  return [steering, gas, brake]

**Defining our convolutional neural network.**

In [0]:
def neuralnet():
    model = Sequential()
    model.add(Convolution2D(24, 5, 5, subsample=(2, 2), input_shape=(84, 84, 1), activation='elu'))
    model.add(Convolution2D(36, 5, 5, subsample=(2, 2), activation='elu'))
    model.add(Convolution2D(48, 3, 3, activation='elu'))
    model.add(Flatten())
    model.add(Dense(50, activation='elu'))
    model.add(Dense(25, activation='elu'))
    model.add(Dense(7, activation = 'linear'))
    
    adamax = Adamax()
    model.compile(loss='mse', optimizer = adamax)
    model.summary()
    
    return model

In [0]:
class Model:
    def __init__(self, env):
        self.env = env
        self.model = neuralnet()

    def predict(self, state):
        return self.model.predict(state.reshape(1, 84, 84, 1), verbose=0)[0]

    def update(self, state, G):
        self.model.fit(state.reshape(1, 84, 84, 1), np.array(G).reshape(-1, 7), epochs=1, verbose=0)

    def sample_action(self, state, eps):
        qval = self.predict(state)
        if np.random.random() < eps:
            return random.randint(0, 6), qval
        else:
            return np.argmax(qval), qval

**Defining a function to play a video of the rendering of each episode to get a visual representation of our agent learning.**

In [0]:
def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")

**Defining a function to play an episode of the car racing environment, and to update the parameters of our network to allow it to make better control decisions in the future.**

In [0]:
def play_one(env, model, eps, gamma):
    done = False
    full_reward_received = False
    totalreward = 0
    iters = 0
    observation = env.reset()
    clear_output(wait=True)
    
    while not done:
        env.render()
      
        state = transform(observation)
        qval_max, qval = model.sample_action(state, eps)
        prev_state = state
        action = output_to_action(qval_max)
        observation, reward, done, info = env.step(action)
        
        state = transform(observation)
            
        qval_next = model.predict(state)
        G = reward + gamma*np.max(qval_next)
        y = qval[:]
        y[qval_max] = G
        model.update(prev_state, y)
        totalreward += reward
        iters += 1
        
    return totalreward, iters

**When executed, this block of code plays an N number of episodes and renders each episode for visualization purposes. The neural network also learns from each episode that it plays.**

In [15]:
N = 1
totalrewards = np.empty(N)

model = Model(env)
eps = 0.15
gamma = 0.95

for n in range(N):
    env = wrap_env(gym.make("CarRacing-v0"))
    
    totalreward, iters = play_one(env, model, eps, gamma)
    totalrewards[n] = totalreward
    
    print("Episode:", n, ", Iters", iters, ", Total Reward:", totalreward, "\n")
    env.close()
    show_video()
    !rm -r video

Instructions for updating:
Use tf.cast instead.
Episode: 0 , Iters 1000 , Total Reward: 24.9999999999989 

