In [3]:
import base64
import json
import os
import pickle
import random
import time
from collections import deque
from io import BytesIO

import cv2  #opencv
import numpy as np
import pandas as pd
from IPython.display import clear_output
from PIL import Image
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Dense, Activation, Flatten
#keras imports
from keras.models import Sequential
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from tensorflow.keras.optimizers import Adam

In [18]:
#path variables
game_url = "chrome://dino"
chrome_driver_path = "E:\Programming\dino-game\chromedriver"
loss_file_path = "./objects/loss_df.csv"
actions_file_path = "./objects/actions_df.csv"
q_value_file_path = "./objects/q_values.csv"
scores_file_path = "./objects/scores_df.csv"

#scripts
#create id for canvas for faster selection from DOM
init_script = "document.getElementsByClassName('runner-canvas')[0].id = 'runner-canvas'"

#get image from canvas
getbase64Script = "canvasRunner = document.getElementById('runner-canvas'); \
return canvasRunner.toDataURL().substring(22)"

In [20]:
'''
* Game class: Selenium interfacing between the python and browser
* __init__():  Launch the broswer window using the attributes in chrome_options
* get_crashed() : return true if the agent as crashed on an obstacles. Gets javascript variable from game decribing the state
* get_playing(): true if game in progress, false is crashed or paused
* restart() : sends a signal to browser-javascript to restart the game
* press_up(): sends a single to press up get to the browser
* get_score(): gets current game score from javascript variables.
* pause(): pause the game
* resume(): resume a paused game if not crashed
* end(): close the browser and end the game
'''


class Game:
    def __init__(self, custom_config=True):
        chrome_options = Options()
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--mute-audio")
        self._driver = webdriver.Chrome(executable_path=chrome_driver_path, chrome_options=chrome_options)
        self._driver.set_window_position(x=-10, y=0)
        try:
            self._driver.get('chrome://dino')
        except:
            pass
        self._driver.execute_script("Runner.config.ACCELERATION=0")
        self._driver.execute_script(init_script)

    def get_crashed(self):
        return self._driver.execute_script("return Runner.instance_.crashed")

    def get_playing(self):
        return self._driver.execute_script("return Runner.instance_.playing")

    def restart(self):
        self._driver.execute_script("Runner.instance_.restart()")

    def press_up(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_UP)

    def get_score(self):
        score_array = self._driver.execute_script("return Runner.instance_.distanceMeter.digits")
        score = ''.join(
            score_array)  # the javascript object is of type array with score in the formate[1,0,0] which is 100.
        return int(score)

    def pause(self):
        return self._driver.execute_script("return Runner.instance_.stop()")

    def resume(self):
        return self._driver.execute_script("return Runner.instance_.play()")

    def end(self):
        self._driver.close()

In [6]:
class DinoAgent:
    def __init__(self, game):  #takes game as input for taking actions
        self._game = game;
        self.jump()  #to start the game, we need to jump once

    def is_running(self):
        return self._game.get_playing()

    def is_crashed(self):
        return self._game.get_crashed()

    def jump(self):
        self._game.press_up()

    def duck(self):
        self._game.press_down()

In [7]:
class Game_sate:
    def __init__(self, agent, game):
        self._agent = agent
        self._game = game
        self._display = show_img()  #display the processed image on screen using openCV, implemented using python coroutine
        self._display.__next__()  # initiliaze the display coroutine

    def get_state(self, actions):
        actions_df.loc[len(actions_df)] = actions[1]  # storing actions in a dataframe
        score = self._game.get_score()
        reward = 0.1
        is_over = False  #game over
        if actions[1] == 1:
            self._agent.jump()
        image = grab_screen(self._game._driver)
        self._display.send(image)  #display the image on screen
        if self._agent.is_crashed():
            scores_df.loc[len(loss_df)] = score  # log the score when game is over
            self._game.restart()
            reward = -1
            is_over = True
        return image, reward, is_over  #return the Experience tuple

In [8]:
def save_obj(obj, name):
    with open('objects/' + name + '.pkl', 'wb') as f:  #dump files into objects folder
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)


def load_obj(name):
    with open('objects/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)


def grab_screen(_driver):
    image_b64 = _driver.execute_script(getbase64Script)
    screen = np.array(Image.open(BytesIO(base64.b64decode(image_b64))))
    image = process_img(screen)  #processing image as required
    return image


def process_img(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  #RGB to Grey Scale
    image = image[:300, :500]  #Crop Region of Interest(ROI)
    image = cv2.resize(image, (80, 80))
    return image


def show_img(graphs=False):
    """
    Show images in new window
    """
    while True:
        screen = (yield)
        window_title = "logs" if graphs else "game_play"
        cv2.namedWindow(window_title, cv2.WINDOW_NORMAL)
        imS = cv2.resize(screen, (800, 400))
        cv2.imshow(window_title, screen)
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

In [9]:
#Intialize log structures from file if exists else create new
loss_df = pd.read_csv(loss_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns=['loss'])
scores_df = pd.read_csv(scores_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns=['scores'])
actions_df = pd.read_csv(actions_file_path) if os.path.isfile(actions_file_path) else pd.DataFrame(columns=['actions'])
q_values_df = pd.read_csv(actions_file_path) if os.path.isfile(q_value_file_path) else pd.DataFrame(columns=['qvalues'])

In [10]:
#game parameters
ACTIONS = 2  # possible actions: jump, do nothing
GAMMA = 0.99  # decay rate of past observations original 0.99
OBSERVATION = 100.  # timesteps to observe before training
EXPLORE = 100000  # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001  # final value of epsilon
INITIAL_EPSILON = 0.1  # starting value of epsilon
REPLAY_MEMORY = 50000  # number of previous transitions to remember
BATCH = 16  # size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4
img_rows, img_cols = 80, 80
img_channels = 4  #We stack 4 frames

In [11]:
# training variables saved as checkpoints to filesystem to resume training from the same step
def init_cache():
    """initial variable caching, done only once"""
    save_obj(INITIAL_EPSILON, "epsilon")
    t = 0
    save_obj(t, "time")
    D = deque()
    save_obj(D, "D")

In [12]:
'''Call only once to init file structure
'''
#init_cache()

'Call only once to init file structure\n'

In [24]:
def buildmodel():
    print("Now we build the model")
    model = Sequential()
    model.add(
        Conv2D(32, (8, 8), padding='same', strides=(4, 4), input_shape=(img_cols, img_rows, img_channels)))  #80*80*4
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (4, 4), strides=(2, 2), padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3), strides=(1, 1), padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(ACTIONS))
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse', optimizer=adam)

    #create model file if not present
    if not os.path.isfile(loss_file_path):
        model.build((1,80,80,1))
        model.save_weights('model.h5')
    print("We finish building the model")
    return model

In [14]:
''' 
main training module
Parameters:
* model => Keras Model to be trained
* game_state => Game State module with access to game environment and dino
* observe => flag to indicate wherther the model is to be trained(weight updates), else just play
'''


def trainNetwork(model, game_state, observe=False):
    last_time = time.time()
    # store the previous observations in replay memory
    D = load_obj("D")  #load from file system
    # get the first state by doing nothing
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1  #0 => do nothing,
    #1=> jump

    x_t, r_0, terminal = game_state.get_state(do_nothing)  # get next step after performing the action

    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)  # stack 4 images to create placeholder input

    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  #1*20*40*4

    initial_state = s_t

    if observe:
        OBSERVE = 999999999  #We keep observe, never train
        epsilon = FINAL_EPSILON
        print("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse', optimizer=adam)
        print("Weight load successfully")
    else:  #We go to training mode
        OBSERVE = OBSERVATION
        epsilon = load_obj("epsilon")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse', optimizer=adam)

    t = load_obj("time")  # resume from the previous time step stored in file system
    while (True):  #endless running

        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0  #reward at 4
        a_t = np.zeros([ACTIONS])  # action at t

        #choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0:  #parameter to skip frames for actions
            if random.random() <= epsilon:  #randomly explore an action
                print("----------Random Action----------")
                action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1
            else:  # predict the output
                q = model.predict(s_t)  #input a stack of 4 images, get the prediction
                max_Q = np.argmax(q)  # chosing index with maximum q value
                action_index = max_Q
                a_t[action_index] = 1  # o=> do nothing, 1=> jump

        #We reduced the epsilon (exploration parameter) gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

            #run the selected action and observed next state and reward
        x_t1, r_t, terminal = game_state.get_state(a_t)
        print('fps: {0}'.format(1 / (time.time() - last_time)))  # helpful for measuring frame rate
        last_time = time.time()
        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1)  #1x20x40x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3)  # append the new image to input stack and remove the first one

        # store the transition in D
        D.append((s_t, action_index, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #only train if done observing
        if t > OBSERVE:

            #sample a minibatch to train on
            minibatch = random.sample(D, BATCH)
            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3]))  #32, 20, 40, 4
            targets = np.zeros((inputs.shape[0], ACTIONS))  #32, 2

            #Now we do the experience replay
            for i in range(0, len(minibatch)):
                state_t = minibatch[i][0]  # 4D stack of images
                action_t = minibatch[i][1]  #This is action index
                reward_t = minibatch[i][2]  #reward at state_t due to action_t
                state_t1 = minibatch[i][3]  #next state
                terminal = minibatch[i][4]  #wheather the agent died or survided due the action

                inputs[i:i + 1] = state_t

                targets[i] = model.predict(state_t)  # predicted q values
                Q_sa = model.predict(state_t1)  #predict q values for next step

                if terminal:
                    targets[i, action_t] = reward_t  # if terminated, only equals reward
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            loss += model.train_on_batch(inputs, targets)
            loss_df.loc[len(loss_df)] = loss
            q_values_df.loc[len(q_values_df)] = np.max(Q_sa)
        s_t = initial_state if terminal else s_t1  #reset game to initial frame if terminate
        t = t + 1

        # save progress every 1000 iterations
        if t % 1000 == 0:
            print("Now we save model")
            game_state._game.pause()  #pause game while saving to filesystem
            model.save_weights("model.h5", overwrite=True)
            save_obj(D, "D")  #saving episodes
            save_obj(t, "time")  #caching time steps
            save_obj(epsilon, "epsilon")  #cache epsilon to avoid repeated randomness in actions
            loss_df.to_csv("./objects/loss_df.csv", index=False)
            scores_df.to_csv("./objects/scores_df.csv", index=False)
            actions_df.to_csv("./objects/actions_df.csv", index=False)
            q_values_df.to_csv(q_value_file_path, index=False)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)
            clear_output()
            game_state._game.resume()
        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state, "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t,
              "/ Q_MAX ", np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")
    print("************************")


In [15]:
#main function
def playGame(observe=False):
    game = Game()
    dino = DinoAgent(game)
    game_state = Game_sate(dino, game)
    model = buildmodel()
    try:
        trainNetwork(model, game_state, observe=observe)
    except StopIteration:
        game.end()

In [None]:
init_cache()
playGame(observe=False)

TIMESTEP 7000 / STATE explore / EPSILON 0.09310789899998947 / ACTION 0 / REWARD -1 / Q_MAX  -0.58873814 / Loss  0.006693670060485601
fps: 0.36949952300663774
TIMESTEP 7001 / STATE explore / EPSILON 0.09310689999998947 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.55603456 / Loss  0.00845547765493393
fps: 0.5627452356281272
TIMESTEP 7002 / STATE explore / EPSILON 0.09310590099998947 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.82244855 / Loss  0.013020455837249756
fps: 0.571428727130543
TIMESTEP 7003 / STATE explore / EPSILON 0.09310490199998947 / ACTION 0 / REWARD -1 / Q_MAX  -0.7104302 / Loss  0.008381884545087814
fps: 0.556163023780014
TIMESTEP 7004 / STATE explore / EPSILON 0.09310390299998947 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.8571637 / Loss  0.005142907612025738
fps: 0.5485574142610928
TIMESTEP 7005 / STATE explore / EPSILON 0.09310290399998947 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.9129097 / Loss  0.008280093781650066
fps: 0.5665715154865165
TIMESTEP 7006 / STATE explore / EPSILON 0.09310190499998947

TIMESTEP 7052 / STATE explore / EPSILON 0.0930559509999894 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.6408235 / Loss  0.004547004587948322
----------Random Action----------
fps: 0.5790377570415497
TIMESTEP 7053 / STATE explore / EPSILON 0.0930549519999894 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.88320005 / Loss  0.0073694102466106415
fps: 0.5659311983324995
TIMESTEP 7054 / STATE explore / EPSILON 0.09305395299998939 / ACTION 1 / REWARD -1 / Q_MAX  -0.88146085 / Loss  0.007647787220776081
fps: 0.5605376173799506
TIMESTEP 7055 / STATE explore / EPSILON 0.09305295399998939 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.9174421 / Loss  0.005425114184617996
fps: 0.5630625266894981
TIMESTEP 7056 / STATE explore / EPSILON 0.09305195499998939 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.7864844 / Loss  0.006087565794587135
fps: 0.5724116490565718
TIMESTEP 7057 / STATE explore / EPSILON 0.09305095599998939 / ACTION 1 / REWARD -1 / Q_MAX  0.73827916 / Loss  0.00546561274677515
fps: 0.5714286492795466
TIMESTEP 7058 / STATE expl

TIMESTEP 7103 / STATE explore / EPSILON 0.09300500199998932 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.73873746 / Loss  0.004151638597249985
fps: 0.4313603165959355
TIMESTEP 7104 / STATE explore / EPSILON 0.09300400299998932 / ACTION 1 / REWARD -1 / Q_MAX  -0.9794222 / Loss  0.005042682867497206
fps: 0.46168211604596715
TIMESTEP 7105 / STATE explore / EPSILON 0.09300300399998931 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.6505768 / Loss  0.0028785024769604206
fps: 0.4446336114618534
TIMESTEP 7106 / STATE explore / EPSILON 0.09300200499998931 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.8828629 / Loss  0.004501651972532272
----------Random Action----------
fps: 0.3930821800853172
TIMESTEP 7107 / STATE explore / EPSILON 0.09300100599998931 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.610067 / Loss  0.008133905939757824
----------Random Action----------
fps: 0.36999822953594524
TIMESTEP 7108 / STATE explore / EPSILON 0.09300000699998931 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.551256 / Loss  0.004623687360435724
fps: 0.457392

TIMESTEP 7155 / STATE explore / EPSILON 0.09295305399998924 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.595956 / Loss  0.010195868089795113
fps: 0.4854422441407699
TIMESTEP 7156 / STATE explore / EPSILON 0.09295205499998924 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.79959255 / Loss  0.006404045037925243
fps: 0.5257640167565419
TIMESTEP 7157 / STATE explore / EPSILON 0.09295105599998923 / ACTION 0 / REWARD -1 / Q_MAX  -0.64077103 / Loss  0.00441958662122488
fps: 0.4836489719398995
TIMESTEP 7158 / STATE explore / EPSILON 0.09295005699998923 / ACTION 0 / REWARD 0.1 / Q_MAX  0.8600019 / Loss  0.010211970657110214
fps: 0.4709114788759751
TIMESTEP 7159 / STATE explore / EPSILON 0.09294905799998923 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.8544824 / Loss  0.004758643917739391
fps: 0.5305035117697726
TIMESTEP 7160 / STATE explore / EPSILON 0.09294805899998923 / ACTION 0 / REWARD -1 / Q_MAX  -0.79342335 / Loss  0.004360673017799854
fps: 0.4615673952351518
TIMESTEP 7161 / STATE explore / EPSILON 0.09294705999998923 

TIMESTEP 7206 / STATE explore / EPSILON 0.09290210499998916 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.68802613 / Loss  0.002778255380690098
----------Random Action----------
fps: 0.49236814259187567
TIMESTEP 7207 / STATE explore / EPSILON 0.09290110599998916 / ACTION 1 / REWARD 0.1 / Q_MAX  -1.0571176 / Loss  0.01169564388692379
fps: 0.5249345881789895
TIMESTEP 7208 / STATE explore / EPSILON 0.09290010699998916 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.86598176 / Loss  0.008664844557642937
fps: 0.48232783012711306
TIMESTEP 7209 / STATE explore / EPSILON 0.09289910799998916 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.6874941 / Loss  0.005446516443043947
fps: 0.4725666973238146
TIMESTEP 7210 / STATE explore / EPSILON 0.09289810899998915 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.7047212 / Loss  0.0037665702402591705
fps: 0.5302220463090233
TIMESTEP 7211 / STATE explore / EPSILON 0.09289710999998915 / ACTION 0 / REWARD -1 / Q_MAX  -0.8881607 / Loss  0.0052315853536129
fps: 0.4913706501941734
TIMESTEP 7212 / STATE e

TIMESTEP 7258 / STATE explore / EPSILON 0.09285015699998908 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.66768473 / Loss  0.0074127307161688805
fps: 0.4692645074903463
TIMESTEP 7259 / STATE explore / EPSILON 0.09284915799998908 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.8203984 / Loss  0.006002385634928942
----------Random Action----------
fps: 0.4882809425909843
TIMESTEP 7260 / STATE explore / EPSILON 0.09284815899998908 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.8850875 / Loss  0.005656424909830093
fps: 0.4868560144704385
TIMESTEP 7261 / STATE explore / EPSILON 0.09284715999998908 / ACTION 0 / REWARD -1 / Q_MAX  -0.57129335 / Loss  0.013570940122008324
----------Random Action----------
fps: 0.5324812220028045
TIMESTEP 7262 / STATE explore / EPSILON 0.09284616099998907 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.9715739 / Loss  0.004989129025489092
fps: 0.5009957075639374
TIMESTEP 7263 / STATE explore / EPSILON 0.09284516199998907 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.8536959 / Loss  0.003784062573686242
fps: 0.50890

TIMESTEP 7309 / STATE explore / EPSILON 0.092799207999989 / ACTION 0 / REWARD -1 / Q_MAX  -0.7842278 / Loss  0.008798952214419842
fps: 0.37921924290577336
TIMESTEP 7310 / STATE explore / EPSILON 0.092798208999989 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.8391976 / Loss  0.009235993027687073
fps: 0.46167479822454105
TIMESTEP 7311 / STATE explore / EPSILON 0.092797209999989 / ACTION 0 / REWARD -1 / Q_MAX  -0.73463833 / Loss  0.005782362073659897
fps: 0.43552921478964135
TIMESTEP 7312 / STATE explore / EPSILON 0.092796210999989 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.54327255 / Loss  0.0067012254148721695
fps: 0.41472919572889555
TIMESTEP 7313 / STATE explore / EPSILON 0.092795211999989 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.9264092 / Loss  0.006566997617483139
----------Random Action----------
fps: 0.5387376854891361
TIMESTEP 7314 / STATE explore / EPSILON 0.092794212999989 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.88326216 / Loss  0.005204196088016033
fps: 0.4572473632508127
TIMESTEP 7315 / STATE explore 