In [1]:
import numpy as np
from PIL import Image
import cv2 #opencv
import io
import time
import pandas as pd
import numpy as np
from IPython.display import clear_output
from random import randint
import os

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys

#keras imports
from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD , Adam
from keras.callbacks import TensorBoard
from collections import deque
import random
import pickle
from io import BytesIO
import base64
import json

Using TensorFlow backend.


In [2]:
#path variables
game_url = "chrome://dino"
chrome_driver_path = "chromedriver.exe"
loss_file_path = "./objects/loss_df.csv"
actions_file_path = "./objects/actions_df.csv"
q_value_file_path = "./objects/q_values.csv"
scores_file_path = "./objects/scores_df.csv"

#scripts
#create id for canvas for faster selection from DOM
init_script = "document.getElementsByClassName('runner-canvas')[0].id = 'runner-canvas'"

#get image from canvas
getbase64Script = "canvasRunner = document.getElementById('runner-canvas'); \
return canvasRunner.toDataURL().substring(22)"

In [3]:
class Game:
    def __init__(self,custom_config=True):
        chrome_options = Options()
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--mute-audio")
        self._driver = webdriver.Chrome(executable_path = chrome_driver_path,chrome_options=chrome_options)
        self._driver.set_window_position(x=-10,y=0)
        self._driver.get('chrome://dino')
        self._driver.execute_script("Runner.config.ACCELERATION=0")
        self._driver.execute_script(init_script)
    def get_crashed(self):
        return self._driver.execute_script("return Runner.instance_.crashed")
    def get_playing(self):
        return self._driver.execute_script("return Runner.instance_.playing")
    def restart(self):
        self._driver.execute_script("Runner.instance_.restart()")
    def press_up(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_UP)
    def get_score(self):
        score_array = self._driver.execute_script("return Runner.instance_.distanceMeter.digits")
        score = ''.join(score_array) 
        return int(score)
    def pause(self):
        return self._driver.execute_script("return Runner.instance_.stop()")
    def resume(self):
        return self._driver.execute_script("return Runner.instance_.play()")
    def end(self):
        self._driver.close()

In [4]:
class DinoAgent:
    def __init__(self,game): 
        self._game = game; 
        self.jump(); 
    def is_running(self):
        return self._game.get_playing()
    def is_crashed(self):
        return self._game.get_crashed()
    def jump(self):
        self._game.press_up()
    def duck(self):
        self._game.press_down()

In [5]:
class Game_sate:
    def __init__(self,agent,game):
        self._agent = agent
        self._game = game
        self._display = show_img() #display the processed image on screen using openCV
        self._display.__next__() # initiliaze the display coroutine 
    def get_state(self,actions):
        actions_df.loc[len(actions_df)] = actions[1] # storing actions in a dataframe
        score = self._game.get_score() 
        reward = 0.1
        is_over = False #game over
        if actions[1] == 1:
            self._agent.jump()
        image = grab_screen(self._game._driver) 
        self._display.send(image) #display the image on screen
        if self._agent.is_crashed():
            scores_df.loc[len(loss_df)] = score # log the score when game is over
            self._game.restart()
            reward = -1
            is_over = True
        return image, reward, is_over #return the experience 

In [6]:
def save_obj(obj, name ):
    with open('objects/'+ name + '.pkl', 'wb') as f: #dump files into objects folder
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name ):
    with open('objects/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

def grab_screen(_driver):
    image_b64 = _driver.execute_script(getbase64Script)
    screen = np.array(Image.open(BytesIO(base64.b64decode(image_b64))))
    image = process_img(screen) #preprocessing image as required
    return image

def process_img(image):
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #RGB to Grey Scale
    image = image[:300, :500] #Crop Region of Interest
    image = cv2.resize(image, (80,80))
    return  image

def show_img(graphs = False):
    while True:
        screen = (yield)
        window_title = "logs" if graphs else "game_play"
        cv2.namedWindow(window_title, cv2.WINDOW_NORMAL)        
        imS = cv2.resize(screen, (800, 400)) 
        cv2.imshow(window_title, screen)
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

In [7]:
#Intialize log structures from file if exists else create new
loss_df = pd.read_csv(loss_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns =['loss'])
scores_df = pd.read_csv(scores_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns = ['scores'])
actions_df = pd.read_csv(actions_file_path) if os.path.isfile(actions_file_path) else pd.DataFrame(columns = ['actions'])
q_values_df =pd.read_csv(actions_file_path) if os.path.isfile(q_value_file_path) else pd.DataFrame(columns = ['qvalues'])

In [8]:
#game parameters
ACTIONS = 2 # possible actions: jump, do nothing
GAMMA = 0.99 # decay rate of past observations original 0.99
OBSERVATION = 100. # timesteps to observe before training
EXPLORE = 100000  # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 16 # size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4
img_rows , img_cols = 80,80
img_channels = 4 #We stack 4 frames

In [9]:
# training variables saved as checkpoints to filesystem to resume training from the same step
def init_cache():
    save_obj(INITIAL_EPSILON,"epsilon")
    t = 0
    save_obj(t,"time")
    D = deque()
    save_obj(D,"D")

In [10]:
'''Call only once to init file structure
'''
#init_cache()

'Call only once to init file structure\n'

In [11]:
def buildmodel():
    model = Sequential()
    model.add(Conv2D(32, (8, 8), padding='same',strides=(4, 4),input_shape=(img_cols,img_rows,img_channels)))  #80*80*4
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (4, 4),strides=(2, 2),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3),strides=(1, 1),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(ACTIONS))
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    
    #create model file if not present
    if not os.path.isfile(loss_file_path):
        model.save_weights('model.h5')
    return model

In [12]:
def trainNetwork(model,game_state,observe=False):
    last_time = time.time()
    # store the previous observations in replay memory
    D = load_obj("D") #load from file system
    # get the first state by doing nothing
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] =1 #0 => do nothing,1=> jump
    
    x_t, r_0, terminal = game_state.get_state(do_nothing) # get next step after performing the action
    

    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # stack 4 images to create placeholder input
    

    
    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  #1*20*40*4
    
    initial_state = s_t 

    if observe :
        OBSERVE = 999999999    #We keep observe, never train
        epsilon = FINAL_EPSILON
        print ("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")    
    else:                       #We go to training mode
        OBSERVE = OBSERVATION
        epsilon = load_obj("epsilon") 
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)

    t = load_obj("time") # resume from the previous time step stored in file system
    while (True): #endless running
        
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0 #reward at 4
        a_t = np.zeros([ACTIONS]) # action at t
        
        #choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0: #parameter to skip frames for actions
            if  random.random() <= epsilon: #randomly explore an action
                print("Random Action")
                action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1
            else: # predict the output
                q = model.predict(s_t)       #input a stack of 4 images, get the prediction
                max_Q = np.argmax(q)         # chosing index with maximum q value
                action_index = max_Q 
                a_t[action_index] = 1        # o=> do nothing, 1=> jump
                
        #We reduced the epsilon (exploration parameter) gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE 

        #run the selected action and observed next state and reward
        x_t1, r_t, terminal = game_state.get_state(a_t)
        print('fps: {0}'.format(1 / (time.time()-last_time))) # helpful for measuring frame rate
        last_time = time.time()
        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x20x40x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) # append the new image to input stack and remove the first one
        
        
        # store the transition in D
        D.append((s_t, action_index, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #only train if done observing
        if t > OBSERVE: 
            
            #sample a minibatch to train on
            minibatch = random.sample(D, BATCH)
            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3]))   #32, 20, 40, 4
            targets = np.zeros((inputs.shape[0], ACTIONS))                         #32, 2

            #Now we do the experience replay
            for i in range(0, len(minibatch)):
                state_t = minibatch[i][0]    # 4D stack of images
                action_t = minibatch[i][1]   #This is action index
                reward_t = minibatch[i][2]   #reward at state_t due to action_t
                state_t1 = minibatch[i][3]   #next state
                terminal = minibatch[i][4]   #wheather the agent died or survided due the action
                

                inputs[i:i + 1] = state_t    

                targets[i] = model.predict(state_t)  # predicted q values
                Q_sa = model.predict(state_t1)      #predict q values for next step
                
                if terminal:
                    targets[i, action_t] = reward_t # if terminated, only equals reward
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            loss += model.train_on_batch(inputs, targets)
            loss_df.loc[len(loss_df)] = loss
            q_values_df.loc[len(q_values_df)] = np.max(Q_sa)
        s_t = initial_state if terminal else s_t1 #reset game to initial frame if terminate
        t = t + 1
        
        # save progress every 1000 iterations
        if t % 1000 == 0:
            # saving model
            game_state._game.pause() #pause game while saving to filesystem
            model.save_weights("model.h5", overwrite=True)
            save_obj(D,"D") #saving episodes
            save_obj(t,"time") #caching time steps
            save_obj(epsilon,"epsilon") #cache epsilon to avoid repeated randomness in actions
            loss_df.to_csv("./objects/loss_df.csv",index=False)
            scores_df.to_csv("./objects/scores_df.csv",index=False)
            actions_df.to_csv("./objects/actions_df.csv",index=False)
            q_values_df.to_csv(q_value_file_path,index=False)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)
            clear_output()
            game_state._game.resume()
        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state,             "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t,             "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")
    print("************************")


In [13]:
#main function
def playGame(observe=False):
    game = Game()
    dino = DinoAgent(game)
    game_state = Game_sate(dino,game)    
    model = buildmodel()
    try:
        trainNetwork(model,game_state,observe=observe)
    except StopIteration:
        game.end()

In [14]:
playGame(observe=False);

  


fps: 0.009127702993502237
TIMESTEP 540001 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.9494197 / Loss  0.06696029752492905
fps: 0.4901740891767028
TIMESTEP 540002 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.0591538 / Loss  0.019659310579299927
fps: 2.776798399451301
TIMESTEP 540003 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  4.117376 / Loss  0.03633051738142967
fps: 5.260007900726741
TIMESTEP 540004 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.0619764 / Loss  0.024808568879961967
fps: 5.467605417151707
TIMESTEP 540005 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.2164695 / Loss  0.11979012936353683
fps: 5.621223472334837
TIMESTEP 540006 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.215382 / Loss  0.1582152396440506
fps: 5.458774532771097
TIMESTEP 540007 / STATE train / EPSILO

TIMESTEP 540054 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.9052885 / Loss  0.031113548204302788
fps: 5.157567388436033
TIMESTEP 540055 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD -1 / Q_MAX  3.2870212 / Loss  0.03750041872262955
fps: 4.486889582805141
TIMESTEP 540056 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.7476544 / Loss  0.026312388479709625
fps: 2.6753277900102823
TIMESTEP 540057 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.9986846 / Loss  0.04212336242198944
fps: 4.4470050626872
TIMESTEP 540058 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.8737504 / Loss  0.011077630333602428
fps: 4.632266244337095
TIMESTEP 540059 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.039738 / Loss  0.028987077996134758
fps: 4.610924041796525
TIMESTEP 540060 / STATE train / EPSILON 9.999999987391849e-05 /

TIMESTEP 540107 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.0525205 / Loss  0.3091975748538971
fps: 2.1278580556723803
TIMESTEP 540108 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.043433 / Loss  0.014902101829648018
fps: 1.8522641559428021
TIMESTEP 540109 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.7697225 / Loss  0.0285586379468441
fps: 2.039795996278639
TIMESTEP 540110 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.9420505 / Loss  0.02237154357135296
fps: 2.8907255374402463
TIMESTEP 540111 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.0043035 / Loss  0.010081776417791843
fps: 1.14611935282014
TIMESTEP 540112 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.7723136 / Loss  0.06493235379457474
fps: 5.665168760898311
TIMESTEP 540113 / STATE train / EPSILON 9.999999987391849e-05 /

TIMESTEP 540160 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.8015494 / Loss  0.02331572398543358
fps: 5.184720398925058
TIMESTEP 540161 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  0.56612957 / Loss  0.019410552456974983
fps: 5.717262728302488
TIMESTEP 540162 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.9778569 / Loss  0.20417076349258423
fps: 5.886040261863497
TIMESTEP 540163 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.9914598 / Loss  0.014369850978255272
fps: 5.75038765003873
TIMESTEP 540164 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.4984361 / Loss  0.533437192440033
fps: 5.920545655758069
TIMESTEP 540165 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.9693115 / Loss  0.07214604318141937
fps: 5.844555022483324
TIMESTEP 540166 / STATE train / EPSILON 9.999999987391849e-05 / 

TIMESTEP 540213 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.2523878 / Loss  0.025072459131479263
fps: 5.379016175635105
TIMESTEP 540214 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.8920176 / Loss  0.0072725736536085606
fps: 5.497662286594357
TIMESTEP 540215 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.4099164 / Loss  0.45153170824050903
fps: 1.6255083220846545
TIMESTEP 540216 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.303792 / Loss  0.013713188469409943
fps: 5.497611844830208
TIMESTEP 540217 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.3188 / Loss  0.826041579246521
fps: 4.977971048205912
TIMESTEP 540218 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD -1 / Q_MAX  4.5452576 / Loss  0.07423319667577744
fps: 5.4378965655832285
TIMESTEP 540219 / STATE train / EPSILON 9.999999987391849e-05 / 

TIMESTEP 540266 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.267825 / Loss  0.5013949275016785
fps: 5.621215938758115
TIMESTEP 540267 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.2627535 / Loss  0.018512781709432602
fps: 5.621170737721835
TIMESTEP 540268 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.1154516 / Loss  0.12502068281173706
fps: 5.783611530532746
TIMESTEP 540269 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.2903886 / Loss  0.45850908756256104
fps: 5.699440832161323
TIMESTEP 540270 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.4373305 / Loss  0.06345532089471817
fps: 5.779746172608139
TIMESTEP 540271 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.3006952 / Loss  0.15728352963924408
fps: 5.849209005511302
TIMESTEP 540272 / STATE train / EPSILON 9.999999987391849e-05 / A

TIMESTEP 540319 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.044547 / Loss  0.028500137850642204
fps: 5.4672348146632235
TIMESTEP 540320 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.8769295 / Loss  0.049291908740997314
fps: 5.465524992474697
TIMESTEP 540321 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.9935684 / Loss  0.029341407120227814
fps: 5.466443888948262
TIMESTEP 540322 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.8343008 / Loss  0.02206239476799965
fps: 5.408460002991587
TIMESTEP 540323 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.9289963 / Loss  0.030039479956030846
fps: 4.589496351866522
TIMESTEP 540324 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.9295173 / Loss  0.013610364869236946
fps: 5.498137925881454
TIMESTEP 540325 / STATE train / EPSILON 9.999999987391849e-

TIMESTEP 540372 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.0987444 / Loss  0.04526107385754585
fps: 5.443154049301486
TIMESTEP 540373 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.943813 / Loss  0.03695957735180855
fps: 4.953485021340722
TIMESTEP 540374 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.9388766 / Loss  0.008031819015741348
fps: 4.675887702102659
TIMESTEP 540375 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.7686443 / Loss  0.04585452750325203
fps: 4.99747880336096
TIMESTEP 540376 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.8412042 / Loss  0.00962156057357788
fps: 5.467562652761936
TIMESTEP 540377 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.4845808 / Loss  0.00809435360133648
fps: 5.46788340199197
TIMESTEP 540378 / STATE train / EPSILON 9.999999987391849e-05 / AC

TIMESTEP 540425 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.162613 / Loss  0.040765367448329926
fps: 4.294248335766633
TIMESTEP 540426 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  4.3290515 / Loss  0.044856004416942596
fps: 4.977947416081854
TIMESTEP 540427 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.269944 / Loss  0.07752963900566101
fps: 4.953362172707937
TIMESTEP 540428 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.2270374 / Loss  0.003696570871397853
fps: 5.027574137559034
TIMESTEP 540429 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.4103665 / Loss  0.21401256322860718
fps: 5.053451375200606
TIMESTEP 540430 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.2017713 / Loss  0.028158601373434067
fps: 5.027983965322132
TIMESTEP 540431 / STATE train / EPSILON 9.999999987391849e-05 

fps: 5.246821674783181
TIMESTEP 540479 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.5683126 / Loss  0.4810903072357178
fps: 5.322470899209048
TIMESTEP 540480 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  4.957293 / Loss  0.11142850667238235
fps: 5.2649929704759995
TIMESTEP 540481 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.6057084 / Loss  0.015186501666903496
fps: 5.653234132600111
TIMESTEP 540482 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.88711 / Loss  0.05682455748319626
fps: 4.9047926431104
TIMESTEP 540483 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.4001353 / Loss  0.8172173500061035
fps: 5.350648754220953
TIMESTEP 540484 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  0.30723193 / Loss  0.059440888464450836
fps: 5.05082855763175
TIMESTEP 540485 / STATE train / EPSILON 9.999

TIMESTEP 540532 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.665729 / Loss  0.008926604874432087
fps: 5.71753550367646
TIMESTEP 540533 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  5.4400334 / Loss  0.6361309289932251
fps: 5.850800067515069
TIMESTEP 540534 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  5.2472816 / Loss  0.04085705429315567
fps: 5.3794508073721605
TIMESTEP 540535 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  5.103748 / Loss  0.11243882775306702
fps: 5.230764242323717
TIMESTEP 540536 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  5.117579 / Loss  0.12636835873126984
fps: 5.65292174491995
TIMESTEP 540537 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  5.1588674 / Loss  0.3045041263103485
fps: 5.273837425688793
TIMESTEP 540538 / STATE train / EPSILON 9.999999987391849e-05 / ACTIO

fps: 4.719547118348076
TIMESTEP 540585 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.5541718 / Loss  0.03399067744612694
fps: 5.432782927221011
TIMESTEP 540586 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.4073088 / Loss  0.1195671334862709
fps: 5.410204086897523
TIMESTEP 540587 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.4176044 / Loss  0.029800157994031906
fps: 1.4692523259724246
TIMESTEP 540588 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.4998095 / Loss  0.1262417584657669
fps: 4.325964736763394
TIMESTEP 540589 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.4889836 / Loss  0.04307237267494202
fps: 4.941719273193411
TIMESTEP 540590 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.6040926 / Loss  0.01606878638267517
fps: 4.972146616719617
TIMESTEP 540591 / STATE train / EPSILON 9

fps: 3.775709919747224
TIMESTEP 540639 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.585726 / Loss  0.04839378595352173
fps: 4.584830710236527
TIMESTEP 540640 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  4.495187 / Loss  0.051813140511512756
fps: 4.989453198419288
TIMESTEP 540641 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  4.4655495 / Loss  0.010048913769423962
fps: 5.131136671144158
TIMESTEP 540642 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.2532625 / Loss  0.8688691854476929
fps: 5.102994172253111
TIMESTEP 540643 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.1031816 / Loss  0.02137346938252449
fps: 5.589647773764943
TIMESTEP 540644 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  4.8045483 / Loss  0.015240015462040901
fps: 5.435331500089416
TIMESTEP 540645 / STATE train / EPSILON 9

fps: 5.182599885086587
TIMESTEP 540693 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.8636522 / Loss  0.009280318394303322
fps: 5.103180435576104
TIMESTEP 540694 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.999884 / Loss  0.03042737767100334
fps: 5.079079680310002
TIMESTEP 540695 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.0028515 / Loss  0.023593345656991005
fps: 5.639036405045456
TIMESTEP 540696 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.259596 / Loss  0.6900128722190857
fps: 5.5587710625574855
TIMESTEP 540697 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.809297 / Loss  0.012653034180402756
fps: 5.026935421625522
TIMESTEP 540698 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  4.249548 / Loss  0.020936978980898857
fps: 5.677530439726296
TIMESTEP 540699 / STATE train / EPSILON 9

KeyboardInterrupt: 