In [2]:
import numpy as np
from PIL import Image
import cv2
import io
import time
import pandas as pd
import numpy as np
from IPython.display import clear_output
from random import randint
import os

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys

from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD , Adam
from keras.callbacks import TensorBoard
from collections import deque
import random
import pickle
from io import BytesIO
import base64
import json

Using TensorFlow backend.


In [3]:
#path variables
game_url = "chrome://dino"
chrome_driver_path = "C:\chromedriver"
loss_file_path = "./objects/loss_df.csv"
actions_file_path = "./objects/actions_df.csv"
q_value_file_path = "./objects/q_values.csv"
scores_file_path = "./objects/scores_df.csv"

#scripts
#create id for canvas for faster selection from DOM
init_script = "document.getElementsByClassName('runner-canvas')[0].id = 'runner-canvas'"

#get image from canvas
getbase64Script = "canvasRunner = document.getElementById('runner-canvas'); \
return canvasRunner.toDataURL().substring(22)"

In [4]:
'''
* Game class: Selenium interfacing between the python and browser
* __init__():  Launch the broswer window using the attributes in chrome_options
* get_crashed() : return true if the agent as crashed on an obstacles. Gets javascript variable from game decribing the state
* get_playing(): true if game in progress, false is crashed or paused
* restart() : sends a signal to browser-javascript to restart the game
* press_up(): sends a single to press up get to the browser
* get_score(): gets current game score from javascript variables.
* pause(): pause the game
* resume(): resume a paused game if not crashed
* end(): close the browser and end the game
'''
class Game:
    def __init__(self,custom_config=True):
        chrome_options = Options()
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--mute-audio")
        self._driver = webdriver.Chrome(executable_path = 'C:\chromedriver.exe')
        self._driver.set_window_position(x=-10,y=0)
        self._driver.get('chrome://dino')
        self._driver.execute_script("Runner.config.ACCELERATION=0")
        self._driver.execute_script(init_script)
    def get_crashed(self):
        return self._driver.execute_script("return Runner.instance_.crashed")
    def get_playing(self):
        return self._driver.execute_script("return Runner.instance_.playing")
    def restart(self):
        self._driver.execute_script("Runner.instance_.restart()")
    def press_up(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_UP)
    def press_down(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_DOWN)
    def get_score(self):
        score_array = self._driver.execute_script("return Runner.instance_.distanceMeter.digits")
        score = ''.join(score_array) # the javascript object is of type array with score in the formate[1,0,0] which is 100.
        return int(score)
    def pause(self):
        return self._driver.execute_script("return Runner.instance_.stop()")
    def resume(self):
        return self._driver.execute_script("return Runner.instance_.play()")
    def end(self):
        self._driver.close()

In [5]:
class DinoAgent:
    def __init__(self,game): #takes game as input for taking actions
        self._game = game; 
        self.jump(); #to start the game, we need to jump once
    def is_running(self):
        return self._game.get_playing()
    def is_crashed(self):
        return self._game.get_crashed()
    def jump(self):
        self._game.press_up()
    def duck(self):
        self._game.press_down()

In [6]:
class Game_sate:
    def __init__(self,agent,game):
        self._agent = agent
        self._game = game
        self._display = show_img() #display the processed image on screen using openCV, implemented using python coroutine 
        self._display.__next__() # initiliaze the display coroutine 
    def get_state(self,actions):
        actions_df.loc[len(actions_df)] = actions[0] # storing actions in a dataframe
        score = self._game.get_score() 
        reward = 0.1
        is_over = False #game over
        if actions[1] == 1:
            self._agent.jump()
        image = grab_screen(self._game._driver) 
        self._display.send(image) #display the image on screen
        if actions[0] == 1:
            self._agent.duck()
        image = grab_screen(self._game._driver) 
        self._display.send(image) #display the image on screen
        if self._agent.is_crashed():
            scores_df.loc[len(loss_df)] = score # log the score when game is over
            self._game.restart()
            reward = -1
            is_over = True
        return image, reward, is_over #return the Experience tuple

In [7]:
def save_obj(obj, name ):
    with open('objects/'+ name + '.pkl', 'wb') as f: #dump files into objects folder
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name ):
    with open('objects/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

def grab_screen(_driver):
    image_b64 = _driver.execute_script(getbase64Script)
    screen = np.array(Image.open(BytesIO(base64.b64decode(image_b64))))
    image = process_img(screen)#processing image as required
    return image

def process_img(image):
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #RGB to Grey Scale
    image = image[:300, :500] #Crop Region of Interest(ROI)
    image = cv2.resize(image, (80,80))
    return  image

def show_img(graphs = False):
    """
    Show images in new window
    """
    while True:
        screen = (yield)
        window_title = "logs" if graphs else "game_play"
        cv2.namedWindow(window_title, cv2.WINDOW_NORMAL)        
        imS = cv2.resize(screen, (800, 400)) 
        cv2.imshow(window_title, screen)
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

In [8]:
#Intialize log structures from file if exists else create new
loss_df = pd.read_csv(loss_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns =['loss'])
scores_df = pd.read_csv(scores_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns = ['scores'])
actions_df = pd.read_csv(actions_file_path) if os.path.isfile(actions_file_path) else pd.DataFrame(columns = ['actions'])
q_values_df =pd.read_csv(actions_file_path) if os.path.isfile(q_value_file_path) else pd.DataFrame(columns = ['qvalues'])


In [9]:
#game parameters
ACTIONS = 2 # possible actions: jump, do nothing
GAMMA = 0.99 # decay rate of past observations original 0.99
OBSERVATION = 100. # timesteps to observe before training
EXPLORE = 100000  # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 16 # size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4
img_rows , img_cols = 80,80
img_channels = 4 #We stack 4 frames

In [10]:
# training variables saved as checkpoints to filesystem to resume training from the same step
def init_cache():
    """initial variable caching, done only once"""
    save_obj(INITIAL_EPSILON,"epsilon")
    t = 0
    save_obj(t,"time")
    D = deque()
    save_obj(D,"D")

In [11]:
'''Call only once to init file structure
'''
init_cache()

In [12]:
def buildmodel():
    model = Sequential()
    model.add(Conv2D(32, (8, 8), padding='same',strides=(4, 4),input_shape=(img_cols,img_rows,img_channels)))  #80*80*4
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (4, 4),strides=(2, 2),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3),strides=(1, 1),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(ACTIONS))
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    
    #create model file if not present
    if not os.path.isfile(loss_file_path):
        model.save_weights('model.h5')
    return model

In [13]:
''' 
main training module
Parameters:
* model => Keras Model to be trained
* game_state => Game State module with access to game environment and dino
* observe => flag to indicate wherther the model is to be trained(weight updates), else just play
'''
def trainNetwork(model,game_state,observe=False):
    last_time = time.time()
    # store the previous observations in replay memory
    D = load_obj("D") #load from file system
    # get the first state by doing nothing
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] =1 #0 => do nothing,
                     #1=> jump
    
    x_t, r_0, terminal = game_state.get_state(do_nothing) # get next step after performing the action
    

    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # stack 4 images to create placeholder input
    

    
    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  #1*20*40*4
    
    initial_state = s_t 

    if observe :
        OBSERVE = 999999999    #We keep observe, never train
        epsilon = FINAL_EPSILON
        print ("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")    
    else:                       #We go to training mode
        OBSERVE = OBSERVATION
        epsilon = load_obj("epsilon") 
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)

    t = load_obj("time") # resume from the previous time step stored in file system
    while (True): #endless running
        
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0 #reward at 4
        a_t = np.zeros([ACTIONS]) # action at t
        
        #choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0: #parameter to skip frames for actions
            if  random.random() <= epsilon: #randomly explore an action
                print("----------Random Action----------")
                action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1
            else: # predict the output
                q = model.predict(s_t)       #input a stack of 4 images, get the prediction
                max_Q = np.argmax(q)         # chosing index with maximum q value
                action_index = max_Q 
                a_t[action_index] = 1        # o=> do nothing, 1=> jump
                
        #We reduced the epsilon (exploration parameter) gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE 

        #run the selected action and observed next state and reward
        x_t1, r_t, terminal = game_state.get_state(a_t)
        print('fps: {0}'.format(1 / (time.time()-last_time))) # helpful for measuring frame rate
        last_time = time.time()
        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x20x40x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) # append the new image to input stack and remove the first one
        
        
        # store the transition in D
        D.append((s_t, action_index, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #only train if done observing
        if t > OBSERVE: 
            
            #sample a minibatch to train on
            minibatch = random.sample(D, BATCH)
            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3]))   #32, 20, 40, 4
            targets = np.zeros((inputs.shape[0], ACTIONS))                         #32, 2

            #Now we do the experience replay
            for i in range(0, len(minibatch)):
                state_t = minibatch[i][0]    # 4D stack of images
                action_t = minibatch[i][1]   #This is action index
                reward_t = minibatch[i][2]   #reward at state_t due to action_t
                state_t1 = minibatch[i][3]   #next state
                terminal = minibatch[i][4]   #wheather the agent died or survided due the action
                

                inputs[i:i + 1] = state_t    

                targets[i] = model.predict(state_t)  # predicted q values
                Q_sa = model.predict(state_t1)      #predict q values for next step
                
                if terminal:
                    targets[i, action_t] = reward_t # if terminated, only equals reward
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            loss += model.train_on_batch(inputs, targets)
            loss_df.loc[len(loss_df)] = loss
            q_values_df.loc[len(q_values_df)] = np.max(Q_sa)
        s_t = initial_state if terminal else s_t1 #reset game to initial frame if terminate
        t = t + 1
        
        # save progress every 1000 iterations
        if t % 1000 == 0:
            print("Now we save model")
            game_state._game.pause() #pause game while saving to filesystem
            model.save_weights("model.h5", overwrite=True)
            save_obj(D,"D") #saving episodes
            save_obj(t,"time") #caching time steps
            save_obj(epsilon,"epsilon") #cache epsilon to avoid repeated randomness in actions
            loss_df.to_csv("./objects/loss_df.csv",index=False)
            scores_df.to_csv("./objects/scores_df.csv",index=False)
            actions_df.to_csv("./objects/actions_df.csv",index=False)
            q_values_df.to_csv(q_value_file_path,index=False)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)
            clear_output()
            game_state._game.resume()
        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "\t STATE", state, "\n EPSILON", epsilon, "\t ACTION", action_index, "\n REWARD", r_t,             "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")


In [14]:
#main function
def playGame(observe=False):
    game = Game()
    dino = DinoAgent(game)
    game_state = Game_sate(dino,game)    
    model = buildmodel()
    try:
        trainNetwork(model,game_state,observe=observe)
    except StopIteration:
        game.end()

In [15]:
playGame(observe=False);

Instructions for updating:
Colocations handled automatically by placer.
fps: 2.223589760989378
TIMESTEP 1 	 STATE observe 
 EPSILON 0.1 	 ACTION 1 
 REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 16.268594656654358
TIMESTEP 2 	 STATE observe 
 EPSILON 0.1 	 ACTION 1 
 REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 16.13907689939781
TIMESTEP 3 	 STATE observe 
 EPSILON 0.1 	 ACTION 0 
 REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
fps: 12.665988216713423
TIMESTEP 4 	 STATE observe 
 EPSILON 0.1 	 ACTION 0 
 REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 10.644793210565854
TIMESTEP 5 	 STATE observe 
 EPSILON 0.1 	 ACTION 0 
 REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 15.394200983630626
TIMESTEP 6 	 STATE observe 
 EPSILON 0.1 	 ACTION 0 
 REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
fps: 15.882521782924307
TIMESTEP 7 	 STATE observe 
 EPSILON 0.1 	 ACTION 0 
 REWARD 0.1 / Q_MAX  0 / Loss  0
----------Random Action----------
fps: 13.52198694968148
TIMESTEP 8 	 STATE observe 
 EPSILO

TIMESTEP 70 	 STATE observe 
 EPSILON 0.1 	 ACTION 0 
 REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 15.882702211451075
TIMESTEP 71 	 STATE observe 
 EPSILON 0.1 	 ACTION 0 
 REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 13.165911737655106
TIMESTEP 72 	 STATE observe 
 EPSILON 0.1 	 ACTION 0 
 REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 15.634520188465437
TIMESTEP 73 	 STATE observe 
 EPSILON 0.1 	 ACTION 1 
 REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 16.403479131468618
TIMESTEP 74 	 STATE observe 
 EPSILON 0.1 	 ACTION 1 
 REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 16.13907689939781
TIMESTEP 75 	 STATE observe 
 EPSILON 0.1 	 ACTION 1 
 REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 16.139014798796396
TIMESTEP 76 	 STATE observe 
 EPSILON 0.1 	 ACTION 0 
 REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 15.634287077487365
TIMESTEP 77 	 STATE observe 
 EPSILON 0.1 	 ACTION 1 
 REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 16.403799899096956
TIMESTEP 78 	 STATE observe 
 EPSILON 0.1 	 ACTION 1 
 REWARD 0.1 / Q_MAX  0 / Loss  0
fps: 16.403607437014557
TI

TIMESTEP 132 	 STATE explore 
 EPSILON 0.09996903099999996 	 ACTION 1 
 REWARD 0.1 / Q_MAX  12.524648 / Loss  0.5036306977272034
fps: 7.239572078058817
TIMESTEP 133 	 STATE explore 
 EPSILON 0.09996803199999996 	 ACTION 1 
 REWARD 0.1 / Q_MAX  11.838965 / Loss  0.19858069717884064
fps: 7.096528807043903
TIMESTEP 134 	 STATE explore 
 EPSILON 0.09996703299999996 	 ACTION 0 
 REWARD 0.1 / Q_MAX  13.589549 / Loss  0.25393497943878174
fps: 7.096600849364669
TIMESTEP 135 	 STATE explore 
 EPSILON 0.09996603399999995 	 ACTION 0 
 REWARD 0.1 / Q_MAX  11.258528 / Loss  0.7029576301574707
fps: 7.096564828021447
TIMESTEP 136 	 STATE explore 
 EPSILON 0.09996503499999995 	 ACTION 1 
 REWARD 0.1 / Q_MAX  12.821321 / Loss  0.36910587549209595
fps: 7.198668154123402
TIMESTEP 137 	 STATE explore 
 EPSILON 0.09996403599999995 	 ACTION 0 
 REWARD 0.1 / Q_MAX  15.869523 / Loss  0.32822003960609436
fps: 7.19868050919164
TIMESTEP 138 	 STATE explore 
 EPSILON 0.09996303699999995 	 ACTION 1 
 REWARD 0.1 / 

TIMESTEP 185 	 STATE explore 
 EPSILON 0.09991608399999988 	 ACTION 1 
 REWARD 0.1 / Q_MAX  10.771335 / Loss  0.31287989020347595
fps: 7.143046423030483
TIMESTEP 186 	 STATE explore 
 EPSILON 0.09991508499999988 	 ACTION 0 
 REWARD 0.1 / Q_MAX  11.631937 / Loss  0.36968743801116943
fps: 7.094788228985708
TIMESTEP 187 	 STATE explore 
 EPSILON 0.09991408599999987 	 ACTION 1 
 REWARD 0.1 / Q_MAX  11.205051 / Loss  0.2861787676811218
fps: 7.19629676653364
TIMESTEP 188 	 STATE explore 
 EPSILON 0.09991308699999987 	 ACTION 0 
 REWARD 0.1 / Q_MAX  12.27859 / Loss  0.25170016288757324
fps: 7.197556371623708
TIMESTEP 189 	 STATE explore 
 EPSILON 0.09991208799999987 	 ACTION 1 
 REWARD 0.1 / Q_MAX  9.095083 / Loss  0.20290139317512512
fps: 7.046573906381778
TIMESTEP 190 	 STATE explore 
 EPSILON 0.09991108899999987 	 ACTION 0 
 REWARD 0.1 / Q_MAX  13.286111 / Loss  0.13774585723876953
fps: 7.1472457663221105
TIMESTEP 191 	 STATE explore 
 EPSILON 0.09991008999999987 	 ACTION 1 
 REWARD 0.1 / 

fps: 7.096528807043903
TIMESTEP 239 	 STATE explore 
 EPSILON 0.0998621379999998 	 ACTION 0 
 REWARD 0.1 / Q_MAX  12.986844 / Loss  0.35247042775154114
fps: 7.250823308134198
TIMESTEP 240 	 STATE explore 
 EPSILON 0.0998611389999998 	 ACTION 1 
 REWARD 0.1 / Q_MAX  7.54773 / Loss  0.6968091726303101
fps: 7.096528807043903
TIMESTEP 241 	 STATE explore 
 EPSILON 0.09986013999999979 	 ACTION 0 
 REWARD 0.1 / Q_MAX  7.581063 / Loss  0.8496133685112
fps: 7.046656776749398
TIMESTEP 242 	 STATE explore 
 EPSILON 0.09985914099999979 	 ACTION 0 
 REWARD 0.1 / Q_MAX  7.7228413 / Loss  0.5732768177986145
fps: 7.193199026570375
TIMESTEP 243 	 STATE explore 
 EPSILON 0.09985814199999979 	 ACTION 0 
 REWARD 0.1 / Q_MAX  12.203072 / Loss  0.1537107229232788
fps: 7.194149045652264
TIMESTEP 244 	 STATE explore 
 EPSILON 0.09985714299999979 	 ACTION 1 
 REWARD 0.1 / Q_MAX  12.378609 / Loss  0.32946616411209106
fps: 7.143058587909112
TIMESTEP 245 	 STATE explore 
 EPSILON 0.09985614399999979 	 ACTION 1 


TIMESTEP 293 	 STATE explore 
 EPSILON 0.09980819199999971 	 ACTION 1 
 REWARD 0.1 / Q_MAX  20.657885 / Loss  0.413770467042923
----------Random Action----------
fps: 7.04359187073242
TIMESTEP 294 	 STATE explore 
 EPSILON 0.09980719299999971 	 ACTION 1 
 REWARD 0.1 / Q_MAX  14.7382345 / Loss  0.20387002825737
fps: 7.090698232694642
TIMESTEP 295 	 STATE explore 
 EPSILON 0.09980619399999971 	 ACTION 0 
 REWARD 0.1 / Q_MAX  17.98555 / Loss  0.3135032057762146
fps: 7.197877517959068
TIMESTEP 296 	 STATE explore 
 EPSILON 0.09980519499999971 	 ACTION 0 
 REWARD 0.1 / Q_MAX  19.956825 / Loss  0.34968101978302
fps: 7.145175907817898
TIMESTEP 297 	 STATE explore 
 EPSILON 0.0998041959999997 	 ACTION 0 
 REWARD 0.1 / Q_MAX  11.179217 / Loss  0.6655891537666321
fps: 7.1450176738639755
TIMESTEP 298 	 STATE explore 
 EPSILON 0.0998031969999997 	 ACTION 0 
 REWARD 0.1 / Q_MAX  15.838426 / Loss  0.8208165168762207
fps: 7.094812231151648
TIMESTEP 299 	 STATE explore 
 EPSILON 0.0998021979999997 	 A

NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=74.0.3729.169)
  (Driver info: chromedriver=74.0.3729.6 (255758eccf3d244491b8a1317aa76e1ce10d57e9-refs/branch-heads/3729@{#29}),platform=Windows NT 10.0.17134 x86_64)
