In [1]:
import numpy as np
from PIL import Image
import cv2 
import io
import time
import pandas as pd
import numpy as np
from IPython.display import clear_output
from random import randint
import os

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys

from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD , Adam
from keras.callbacks import TensorBoard
from collections import deque
import random
import pickle
from io import BytesIO
import base64
import json

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
game_url = "chrome://dino"
chrome_driver_path = "/usr/bin/chromedriver"
loss_file_path = "./objects/loss_df.csv"
actions_file_path = "./objects/actions_df.csv"
q_value_file_path = "./objects/q_values.csv"
scores_file_path = "./objects/scores_df.csv"

#create id for canvas for faster selection from DOM
init_script = "document.getElementsByClassName('runner-canvas')[0].id = 'runner-canvas'"

#get image from canvas
getbase64Script = "canvasRunner = document.getElementById('runner-canvas'); \
return canvasRunner.toDataURL().substring(22)"

In [3]:
class Game:
    def __init__(self,custom_config=True):
        chrome_options = Options()
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--mute-audio")
        self._driver = webdriver.Chrome(executable_path = chrome_driver_path,options=chrome_options)
        self._driver.set_window_position(x=-10,y=0)
        self._driver.get('chrome://dino')
        self._driver.execute_script("Runner.config.ACCELERATION=0")
        self._driver.execute_script(init_script)
    def get_crashed(self):
        return self._driver.execute_script("return Runner.instance_.crashed")
    def get_playing(self):
        return self._driver.execute_script("return Runner.instance_.playing")
    def restart(self):
        self._driver.execute_script("Runner.instance_.restart()")
    def press_up(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_UP)
    def get_score(self):
        score_array = self._driver.execute_script("return Runner.instance_.distanceMeter.digits")
        score = ''.join(score_array) # the javascript object is of type array with score in the formate[1,0,0] which is 100.
        return int(score)
    def pause(self):
        return self._driver.execute_script("return Runner.instance_.stop()")
    def resume(self):
        return self._driver.execute_script("return Runner.instance_.play()")
    def end(self):
        self._driver.close()

In [4]:
class DinoAgent:
    def __init__(self,game): 
        self._game = game; 
        self.jump(); #to start the game, we need to jump once
    def is_running(self):
        return self._game.get_playing()
    def is_crashed(self):
        return self._game.get_crashed()
    def jump(self):
        self._game.press_up()

In [5]:
class Game_sate:
    def __init__(self,agent,game):
        self._agent = agent
        self._game = game
        self._display = show_img() #display the processed image on screen using openCV, implemented using python coroutine 
        self._display.__next__() # initiliaze the display coroutine 
    def get_state(self,actions):
        actions_df.loc[len(actions_df)] = actions[1] # storing actions in a dataframe
        score = self._game.get_score() 
        reward = 0.1
        is_over = False #game over
        if actions[1] == 1:
            self._agent.jump()
        image = grab_screen(self._game._driver) 
        self._display.send(image) #display the image on screen
        if self._agent.is_crashed():
            scores_df.loc[len(loss_df)] = score # log the score when game is over
            self._game.restart()
            reward = -1
            is_over = True
        return image, reward, is_over #return the Experience tuple

In [6]:
def save_obj(obj, name ):
    with open('objects/'+ name + '.pkl', 'wb') as f: #dump files into objects folder
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name ):
    with open('objects/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

def grab_screen(_driver):
    image_b64 = _driver.execute_script(getbase64Script)
    screen = np.array(Image.open(BytesIO(base64.b64decode(image_b64))))
    image = process_img(screen)#processing image as required
    return image

def process_img(image):
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #RGB to Grey Scale
    image = image[:300, :500] #Crop Region of Interest(ROI)
    image = cv2.resize(image, (80,80))
    return  image

def show_img(graphs = False):
    """
    Show images in new window
    """
    while True:
        screen = (yield)
        window_title = "logs" if graphs else "game_play"
        cv2.namedWindow(window_title, cv2.WINDOW_NORMAL)        
        imS = cv2.resize(screen, (800, 400)) 
        cv2.imshow(window_title, screen)
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

In [7]:
#Intialize log structures from file if exists else create new
loss_df = pd.read_csv(loss_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns =['loss'])
scores_df = pd.read_csv(scores_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns = ['scores'])
actions_df = pd.read_csv(actions_file_path) if os.path.isfile(actions_file_path) else pd.DataFrame(columns = ['actions'])
q_values_df =pd.read_csv(actions_file_path) if os.path.isfile(q_value_file_path) else pd.DataFrame(columns = ['qvalues'])

In [8]:
#game parameters
ACTIONS = 2 # possible actions: jump, do nothing
GAMMA = 0.99 # decay rate of past observations original 0.99
OBSERVATION = 100. # timesteps to observe before training
EXPLORE = 100000  # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 16 # size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4
img_rows , img_cols = 80,80
img_channels = 4 #We stack 4 frames

In [9]:
# training variables saved as checkpoints to filesystem to resume training from the same step
def init_cache():
    """initial variable caching, done only once"""
    save_obj(INITIAL_EPSILON,"epsilon")
    t = 0
    save_obj(t,"time")
    D = deque()
    save_obj(D,"D")

In [10]:
'''Call only once to init file structure
'''
init_cache()

In [11]:
def buildmodel():
    print("Now we build the model")
    model = Sequential()
    model.add(Conv2D(32, (8, 8), padding='same',strides=(4, 4),input_shape=(img_cols,img_rows,img_channels)))  #80*80*4
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (4, 4),strides=(2, 2),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3),strides=(1, 1),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(ACTIONS))
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    
    #create model file if not present
    if not os.path.isfile(loss_file_path):
        model.save_weights('model.h5')
    print("We finish building the model")
    return model

In [12]:
def trainNetwork(model,game_state,observe=False):
    last_time = time.time()
    # store the previous observations in replay memory
    D = load_obj("D") #load from file system
    # get the first state by doing nothing
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] =1 #0 => do nothing,
                     #1=> jump
    
    x_t, r_0, terminal = game_state.get_state(do_nothing) # get next step after performing the action
    

    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # stack 4 images to create placeholder input
    

    
    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  #1*20*40*4
    
    initial_state = s_t 

    if observe :
        OBSERVE = 999999999    #We keep observe, never train
        epsilon = FINAL_EPSILON
        print ("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")    
    else:                       #We go to training mode
        OBSERVE = OBSERVATION
        epsilon = load_obj("epsilon") 
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)

    t = load_obj("time") # resume from the previous time step stored in file system
    while (True): #endless running
        
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0 #reward at 4
        a_t = np.zeros([ACTIONS]) # action at t
        
        #choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0: #parameter to skip frames for actions
            if  random.random() <= epsilon: #randomly explore an action
                print("----------Random Action----------")
                action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1
            else: # predict the output
                q = model.predict(s_t)       #input a stack of 4 images, get the prediction
                max_Q = np.argmax(q)         # chosing index with maximum q value
                action_index = max_Q 
                a_t[action_index] = 1        # o=> do nothing, 1=> jump
                
        #We reduced the epsilon (exploration parameter) gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE 

        #run the selected action and observed next state and reward
        x_t1, r_t, terminal = game_state.get_state(a_t)
        print('fps: {0}'.format(1 / (time.time()-last_time))) # helpful for measuring frame rate
        last_time = time.time()
        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x20x40x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) # append the new image to input stack and remove the first one
        
        
        # store the transition in D
        D.append((s_t, action_index, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #only train if done observing
        if t > OBSERVE: 
            
            #sample a minibatch to train on
            minibatch = random.sample(D, BATCH)
            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3]))   #32, 20, 40, 4
            targets = np.zeros((inputs.shape[0], ACTIONS))                         #32, 2

            #Now we do the experience replay
            for i in range(0, len(minibatch)):
                state_t = minibatch[i][0]    # 4D stack of images
                action_t = minibatch[i][1]   #This is action index
                reward_t = minibatch[i][2]   #reward at state_t due to action_t
                state_t1 = minibatch[i][3]   #next state
                terminal = minibatch[i][4]   #wheather the agent died or survided due the action
                

                inputs[i:i + 1] = state_t    

                targets[i] = model.predict(state_t)  # predicted q values
                Q_sa = model.predict(state_t1)      #predict q values for next step
                
                if terminal:
                    targets[i, action_t] = reward_t # if terminated, only equals reward
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            loss += model.train_on_batch(inputs, targets)
            loss_df.loc[len(loss_df)] = loss
            q_values_df.loc[len(q_values_df)] = np.max(Q_sa)
        s_t = initial_state if terminal else s_t1 #reset game to initial frame if terminate
        t = t + 1
        
        # save progress every 1000 iterations
        if t % 1000 == 0:
            print("Now we save model")
            game_state._game.pause() #pause game while saving to filesystem
            model.save_weights("model.h5", overwrite=True)
            save_obj(D,"D") #saving episodes
            save_obj(t,"time") #caching time steps
            save_obj(epsilon,"epsilon") #cache epsilon to avoid repeated randomness in actions
            loss_df.to_csv("./objects/loss_df.csv",index=False)
            scores_df.to_csv("./objects/scores_df.csv",index=False)
            actions_df.to_csv("./objects/actions_df.csv",index=False)
            q_values_df.to_csv(q_value_file_path,index=False)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)
            clear_output()
            game_state._game.resume()
        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state,             "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t,             "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")
    print("************************")


In [13]:
def playGame(observe=False):
    game = Game()
    dino = DinoAgent(game)
    game_state = Game_sate(dino,game)    
    model = buildmodel()
    try:
        trainNetwork(model,game_state,observe=observe)
    except StopIteration:
        game.end()

In [14]:
playGame(observe=False);

Now we build the model
We finish building the model
fps: 0.37936920195785184
TIMESTEP 1 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 31.07950857329164
TIMESTEP 2 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 32.28871439568899
TIMESTEP 3 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 31.239462845311067
TIMESTEP 4 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 32.88154408190784
TIMESTEP 5 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 31.988773471224395
TIMESTEP 6 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 32.425004251897896
TIMESTEP 7 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 27.0976128177795
TIMESTEP 8 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 25.36560349311175
TIMESTEP 9 / STATE observe / EPSILON 0.1 / ACTION

fps: 22.7451899091126
TIMESTEP 74 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 24.963569162644255
TIMESTEP 75 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 15.818071421298164
TIMESTEP 76 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 16.811915794199226
TIMESTEP 77 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 15.638367523470764
TIMESTEP 78 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 14.0611619564853
TIMESTEP 79 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 17.065139025640608
TIMESTEP 80 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 30.82188680354492
TIMESTEP 81 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 14.759944821373272
TIMESTEP 82 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
------

fps: 7.057636553013412
TIMESTEP 137 / STATE explore / EPSILON 0.09996403599999995 / ACTION 1 / REWARD 0.1 / Q_MAX  4.8186393 / Loss  0.04314285144209862
fps: 7.025211210044286
TIMESTEP 138 / STATE explore / EPSILON 0.09996303699999995 / ACTION 1 / REWARD 0.1 / Q_MAX  6.399618 / Loss  0.8607652187347412
fps: 8.947275907675243
TIMESTEP 139 / STATE explore / EPSILON 0.09996203799999995 / ACTION 0 / REWARD 0.1 / Q_MAX  8.169534 / Loss  0.8525620102882385
fps: 8.835061318548913
TIMESTEP 140 / STATE explore / EPSILON 0.09996103899999995 / ACTION 0 / REWARD 0.1 / Q_MAX  4.4245343 / Loss  0.043837837874889374
fps: 7.899294124737274
TIMESTEP 141 / STATE explore / EPSILON 0.09996003999999994 / ACTION 0 / REWARD 0.1 / Q_MAX  11.022433 / Loss  0.777346670627594
fps: 9.134355011879899
TIMESTEP 142 / STATE explore / EPSILON 0.09995904099999994 / ACTION 0 / REWARD 0.1 / Q_MAX  5.7934656 / Loss  0.1268974244594574
fps: 6.66828406675432
TIMESTEP 143 / STATE explore / EPSILON 0.09995804199999994 / ACTIO

TIMESTEP 190 / STATE explore / EPSILON 0.09991108899999987 / ACTION 0 / REWARD 0.1 / Q_MAX  10.327347 / Loss  0.09079746901988983
fps: 6.105734811375001
TIMESTEP 191 / STATE explore / EPSILON 0.09991008999999987 / ACTION 0 / REWARD 0.1 / Q_MAX  5.26234 / Loss  0.028471512719988823
fps: 6.7509600991485454
TIMESTEP 192 / STATE explore / EPSILON 0.09990909099999987 / ACTION 0 / REWARD 0.1 / Q_MAX  9.922419 / Loss  0.9388775825500488
fps: 6.3051760631133345
TIMESTEP 193 / STATE explore / EPSILON 0.09990809199999987 / ACTION 0 / REWARD 0.1 / Q_MAX  7.9722 / Loss  0.014611373655498028
fps: 4.957009275104712
TIMESTEP 194 / STATE explore / EPSILON 0.09990709299999986 / ACTION 1 / REWARD 0.1 / Q_MAX  10.1039095 / Loss  0.030314577743411064
----------Random Action----------
fps: 5.607148395514606
TIMESTEP 195 / STATE explore / EPSILON 0.09990609399999986 / ACTION 1 / REWARD 0.1 / Q_MAX  10.067735 / Loss  0.022336360067129135
fps: 6.778826510822063
TIMESTEP 196 / STATE explore / EPSILON 0.0999050

fps: 7.179212260945622
TIMESTEP 244 / STATE explore / EPSILON 0.09985714299999979 / ACTION 1 / REWARD 0.1 / Q_MAX  5.3881197 / Loss  0.09840026497840881
----------Random Action----------
fps: 7.989256999617138
TIMESTEP 245 / STATE explore / EPSILON 0.09985614399999979 / ACTION 0 / REWARD 0.1 / Q_MAX  5.3509636 / Loss  0.39882755279541016
fps: 7.050020590484674
TIMESTEP 246 / STATE explore / EPSILON 0.09985514499999978 / ACTION 1 / REWARD 0.1 / Q_MAX  6.2148285 / Loss  0.05140404775738716
fps: 6.772521766088821
TIMESTEP 247 / STATE explore / EPSILON 0.09985414599999978 / ACTION 1 / REWARD 0.1 / Q_MAX  9.23596 / Loss  0.07452288269996643
fps: 6.812728516040588
TIMESTEP 248 / STATE explore / EPSILON 0.09985314699999978 / ACTION 1 / REWARD 0.1 / Q_MAX  10.003293 / Loss  0.0872301533818245
fps: 9.314817570283576
TIMESTEP 249 / STATE explore / EPSILON 0.09985214799999978 / ACTION 0 / REWARD 0.1 / Q_MAX  8.5724325 / Loss  0.06086870655417442
fps: 6.951070924289531
TIMESTEP 250 / STATE explore

TIMESTEP 298 / STATE explore / EPSILON 0.0998031969999997 / ACTION 1 / REWARD 0.1 / Q_MAX  7.5408955 / Loss  0.1015625
fps: 6.34794637478925
TIMESTEP 299 / STATE explore / EPSILON 0.0998021979999997 / ACTION 1 / REWARD 0.1 / Q_MAX  6.4140463 / Loss  0.07485312223434448
fps: 7.215721356402241
TIMESTEP 300 / STATE explore / EPSILON 0.0998011989999997 / ACTION 1 / REWARD 0.1 / Q_MAX  6.896754 / Loss  0.09592065215110779
fps: 6.489154583542197
TIMESTEP 301 / STATE explore / EPSILON 0.0998001999999997 / ACTION 1 / REWARD 0.1 / Q_MAX  7.3138676 / Loss  0.12950068712234497
fps: 6.611033352247651
TIMESTEP 302 / STATE explore / EPSILON 0.0997992009999997 / ACTION 1 / REWARD 0.1 / Q_MAX  3.6535587 / Loss  1.181394338607788
fps: 7.482372921267737
TIMESTEP 303 / STATE explore / EPSILON 0.0997982019999997 / ACTION 1 / REWARD -1 / Q_MAX  8.722599 / Loss  0.7250130772590637
----------Random Action----------
fps: 6.732214054123463
TIMESTEP 304 / STATE explore / EPSILON 0.0997972029999997 / ACTION 1 / 

KeyboardInterrupt: 