In [1]:
import numpy as np
from PIL import Image
import cv2 #opencv
import io
import time
import pandas as pd
import numpy as np
from IPython.display import clear_output
from random import randint
import os

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys

#keras imports
from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD , Adam
from keras.callbacks import TensorBoard
from collections import deque
import random
import pickle
from io import BytesIO
import base64
import json

Using TensorFlow backend.


In [2]:
#path variables
game_url = "chrome://dino"
chrome_driver_path = "chromedriver.exe"
loss_file_path = "./objects/loss_df.csv"
actions_file_path = "./objects/actions_df.csv"
q_value_file_path = "./objects/q_values.csv"
scores_file_path = "./objects/scores_df.csv"

#scripts
#create id for canvas for faster selection from DOM
init_script = "document.getElementsByClassName('runner-canvas')[0].id = 'runner-canvas'"

#get image from canvas
getbase64Script = "canvasRunner = document.getElementById('runner-canvas'); \
return canvasRunner.toDataURL().substring(22)"

In [3]:
class Game:
    def __init__(self,custom_config=True):
        chrome_options = Options()
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--mute-audio")
        self._driver = webdriver.Chrome(executable_path = chrome_driver_path,chrome_options=chrome_options)
        self._driver.set_window_position(x=-10,y=0)
        self._driver.get('chrome://dino')
        self._driver.execute_script("Runner.config.ACCELERATION=0")
        self._driver.execute_script(init_script)
    def get_crashed(self):
        return self._driver.execute_script("return Runner.instance_.crashed")
    def get_playing(self):
        return self._driver.execute_script("return Runner.instance_.playing")
    def restart(self):
        self._driver.execute_script("Runner.instance_.restart()")
    def press_up(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_UP)
    def get_score(self):
        score_array = self._driver.execute_script("return Runner.instance_.distanceMeter.digits")
        score = ''.join(score_array) 
        return int(score)
    def pause(self):
        return self._driver.execute_script("return Runner.instance_.stop()")
    def resume(self):
        return self._driver.execute_script("return Runner.instance_.play()")
    def end(self):
        self._driver.close()

In [4]:
class DinoAgent:
    def __init__(self,game): 
        self._game = game; 
        self.jump(); 
    def is_running(self):
        return self._game.get_playing()
    def is_crashed(self):
        return self._game.get_crashed()
    def jump(self):
        self._game.press_up()
    def duck(self):
        self._game.press_down()

In [5]:
class Game_sate:
    def __init__(self,agent,game):
        self._agent = agent
        self._game = game
        self._display = show_img() #display the processed image on screen using openCV
        self._display.__next__() # initiliaze the display coroutine 
    def get_state(self,actions):
        actions_df.loc[len(actions_df)] = actions[1] # storing actions in a dataframe
        score = self._game.get_score() 
        reward = 0.1
        is_over = False #game over
        if actions[1] == 1:
            self._agent.jump()
        image = grab_screen(self._game._driver) 
        self._display.send(image) #display the image on screen
        if self._agent.is_crashed():
            scores_df.loc[len(loss_df)] = score # log the score when game is over
            self._game.restart()
            reward = -1
            is_over = True
        return image, reward, is_over #return the experience 

In [6]:
def save_obj(obj, name ):
    with open('objects/'+ name + '.pkl', 'wb') as f: #dump files into objects folder
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name ):
    with open('objects/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

def grab_screen(_driver):
    image_b64 = _driver.execute_script(getbase64Script)
    screen = np.array(Image.open(BytesIO(base64.b64decode(image_b64))))
    image = process_img(screen) #preprocessing image as required
    return image

def process_img(image):
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #RGB to Grey Scale
    image = image[:300, :500] #Crop Region of Interest
    image = cv2.resize(image, (80,80))
    return  image

def show_img(graphs = False):
    while True:
        screen = (yield)
        window_title = "logs" if graphs else "game_play"
        cv2.namedWindow(window_title, cv2.WINDOW_NORMAL)        
        imS = cv2.resize(screen, (800, 400)) 
        cv2.imshow(window_title, screen)
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

In [7]:
#Intialize log structures from file if exists else create new
loss_df = pd.read_csv(loss_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns =['loss'])
scores_df = pd.read_csv(scores_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns = ['scores'])
actions_df = pd.read_csv(actions_file_path) if os.path.isfile(actions_file_path) else pd.DataFrame(columns = ['actions'])
q_values_df =pd.read_csv(actions_file_path) if os.path.isfile(q_value_file_path) else pd.DataFrame(columns = ['qvalues'])

In [8]:
#game parameters
ACTIONS = 2 # possible actions: jump, do nothing
GAMMA = 0.99 # decay rate of past observations original 0.99
OBSERVATION = 100. # timesteps to observe before training
EXPLORE = 100000  # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 16 # size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4
img_rows , img_cols = 80,80
img_channels = 4 #We stack 4 frames

In [9]:
# training variables saved as checkpoints to filesystem to resume training from the same step
def init_cache():
    save_obj(INITIAL_EPSILON,"epsilon")
    t = 0
    save_obj(t,"time")
    D = deque()
    save_obj(D,"D")

In [10]:
'''Call only once to init file structure
'''
#init_cache()

'Call only once to init file structure\n'

In [11]:
def buildmodel():
    model = Sequential()
    model.add(Conv2D(32, (8, 8), padding='same',strides=(4, 4),input_shape=(img_cols,img_rows,img_channels)))  #80*80*4
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (4, 4),strides=(2, 2),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3),strides=(1, 1),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(ACTIONS))
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    
    #create model file if not present
    if not os.path.isfile(loss_file_path):
        model.save_weights('model.h5')
    return model

In [12]:
def trainNetwork(model,game_state,observe=False):
    last_time = time.time()
    # store the previous observations in replay memory
    D = load_obj("D") #load from file system
    # get the first state by doing nothing
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] =1 #0 => do nothing,1=> jump
    
    x_t, r_0, terminal = game_state.get_state(do_nothing) # get next step after performing the action
    

    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # stack 4 images to create placeholder input
    

    
    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  #1*20*40*4
    
    initial_state = s_t 

    if observe :
        OBSERVE = 999999999    #We keep observe, never train
        epsilon = FINAL_EPSILON
        print ("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")    
    else:                       #We go to training mode
        OBSERVE = OBSERVATION
        epsilon = load_obj("epsilon") 
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)

    t = load_obj("time") # resume from the previous time step stored in file system
    while (True): #endless running
        
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0 #reward at 4
        a_t = np.zeros([ACTIONS]) # action at t
        
        #choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0: #parameter to skip frames for actions
            if  random.random() <= epsilon: #randomly explore an action
                print("Random Action")
                action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1
            else: # predict the output
                q = model.predict(s_t)       #input a stack of 4 images, get the prediction
                max_Q = np.argmax(q)         # chosing index with maximum q value
                action_index = max_Q 
                a_t[action_index] = 1        # o=> do nothing, 1=> jump
                
        #We reduced the epsilon (exploration parameter) gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE 

        #run the selected action and observed next state and reward
        x_t1, r_t, terminal = game_state.get_state(a_t)
        print('fps: {0}'.format(1 / (time.time()-last_time))) # helpful for measuring frame rate
        last_time = time.time()
        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x20x40x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) # append the new image to input stack and remove the first one
        
        
        # store the transition in D
        D.append((s_t, action_index, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #only train if done observing
        if t > OBSERVE: 
            
            #sample a minibatch to train on
            minibatch = random.sample(D, BATCH)
            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3]))   #32, 20, 40, 4
            targets = np.zeros((inputs.shape[0], ACTIONS))                         #32, 2

            #Now we do the experience replay
            for i in range(0, len(minibatch)):
                state_t = minibatch[i][0]    # 4D stack of images
                action_t = minibatch[i][1]   #This is action index
                reward_t = minibatch[i][2]   #reward at state_t due to action_t
                state_t1 = minibatch[i][3]   #next state
                terminal = minibatch[i][4]   #wheather the agent died or survided due the action
                

                inputs[i:i + 1] = state_t    

                targets[i] = model.predict(state_t)  # predicted q values
                Q_sa = model.predict(state_t1)      #predict q values for next step
                
                if terminal:
                    targets[i, action_t] = reward_t # if terminated, only equals reward
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            loss += model.train_on_batch(inputs, targets)
            loss_df.loc[len(loss_df)] = loss
            q_values_df.loc[len(q_values_df)] = np.max(Q_sa)
        s_t = initial_state if terminal else s_t1 #reset game to initial frame if terminate
        t = t + 1
        
        # save progress every 1000 iterations
        if t % 1000 == 0:
            # saving model
            game_state._game.pause() #pause game while saving to filesystem
            model.save_weights("model.h5", overwrite=True)
            save_obj(D,"D") #saving episodes
            save_obj(t,"time") #caching time steps
            save_obj(epsilon,"epsilon") #cache epsilon to avoid repeated randomness in actions
            loss_df.to_csv("./objects/loss_df.csv",index=False)
            scores_df.to_csv("./objects/scores_df.csv",index=False)
            actions_df.to_csv("./objects/actions_df.csv",index=False)
            q_values_df.to_csv(q_value_file_path,index=False)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)
            clear_output()
            game_state._game.resume()
        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state,             "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t,             "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")
    print("************************")


In [13]:
#main function
def playGame(observe=False):
    game = Game()
    dino = DinoAgent(game)
    game_state = Game_sate(dino,game)    
    model = buildmodel()
    try:
        trainNetwork(model,game_state,observe=observe)
    except StopIteration:
        game.end()

In [14]:
playGame(observe=False);

TIMESTEP 812000 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.9594793 / Loss  0.25773486495018005
fps: 0.025804728485118195
TIMESTEP 812001 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.4953659 / Loss  0.05503523349761963
fps: 2.776235977516369
TIMESTEP 812002 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.8288331 / Loss  0.3497477173805237
fps: 3.0505049623478313
TIMESTEP 812003 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.8593819 / Loss  0.19813764095306396
fps: 3.004765438961977
TIMESTEP 812004 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.527616 / Loss  0.15354600548744202
fps: 2.9428673265770633
TIMESTEP 812005 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD -1 / Q_MAX  2.0451388 / Loss  0.039291027933359146
fps: 2.9768054535324824
TIMESTEP 812006 / STATE train / EPSILON 9.999999987391849e-0

TIMESTEP 812053 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.80956054 / Loss  0.043584149330854416
fps: 2.9342164726754283
TIMESTEP 812054 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.564986 / Loss  0.013957003131508827
fps: 2.794917004844438
TIMESTEP 812055 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.9242119 / Loss  0.09152651578187943
fps: 2.9779002426024705
TIMESTEP 812056 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.1043863 / Loss  0.3109229803085327
fps: 3.0559168944109336
TIMESTEP 812057 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.722365 / Loss  0.027500947937369347
fps: 3.2383546582911005
TIMESTEP 812058 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.5497608 / Loss  0.007662787567824125
fps: 2.9740340407910897
TIMESTEP 812059 / STATE train / EPSILON 9.99999998739184

TIMESTEP 812106 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.7982416 / Loss  0.021975591778755188
fps: 2.8481529769665364
TIMESTEP 812107 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.6980917 / Loss  0.06687001883983612
fps: 2.7412318310153716
TIMESTEP 812108 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.6660724 / Loss  0.21480512619018555
fps: 2.7260115050925404
TIMESTEP 812109 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.2279315 / Loss  0.2072352021932602
fps: 2.861925839597749
TIMESTEP 812110 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.8395418 / Loss  0.010055672377347946
fps: 3.158311502604251
TIMESTEP 812111 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.725108 / Loss  0.11898380517959595
fps: 3.168638045226499
TIMESTEP 812112 / STATE train / EPSILON 9.999999987391849e-05

TIMESTEP 812159 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.697614 / Loss  0.027252018451690674
fps: 2.8199409294104436
TIMESTEP 812160 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD -1 / Q_MAX  1.7323956 / Loss  0.03943590074777603
fps: 2.572165349912397
TIMESTEP 812161 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.0240498 / Loss  0.019757887348532677
fps: 2.5267058998583725
TIMESTEP 812162 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  -1.3347332 / Loss  0.33226874470710754
fps: 2.654201136401749
TIMESTEP 812163 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.721089 / Loss  0.34503552317619324
fps: 2.771684346933691
TIMESTEP 812164 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.5847409 / Loss  0.07539083063602448
fps: 2.8752056507337618
TIMESTEP 812165 / STATE train / EPSILON 9.999999987391849e-05

TIMESTEP 812212 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.6244072 / Loss  0.01933760941028595
fps: 2.99377804679063
TIMESTEP 812213 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.5399306 / Loss  0.030839435756206512
fps: 3.0096036279096468
TIMESTEP 812214 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.6554835 / Loss  0.021074756979942322
fps: 3.001406852214683
TIMESTEP 812215 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.1320028 / Loss  0.011809101328253746
fps: 2.9958436931489865
TIMESTEP 812216 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.326016 / Loss  0.004744294099509716
fps: 3.0039627375189526
TIMESTEP 812217 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.8823352 / Loss  0.18964526057243347
fps: 2.8605634369558826
TIMESTEP 812218 / STATE train / EPSILON 9.999999987391849e

TIMESTEP 812265 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.503481 / Loss  0.22163262963294983
fps: 2.9515734954681077
TIMESTEP 812266 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.6914731 / Loss  0.01751779019832611
fps: 3.146469780250694
TIMESTEP 812267 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.6301539 / Loss  0.13693875074386597
fps: 3.065928772394314
TIMESTEP 812268 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.718986 / Loss  0.006659995298832655
fps: 2.925383710825088
TIMESTEP 812269 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.417696 / Loss  0.10566301643848419
fps: 3.1351804691068073
TIMESTEP 812270 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.61327 / Loss  0.06128326803445816
fps: 2.995732426345964
TIMESTEP 812271 / STATE train / EPSILON 9.999999987391849e-05 / AC

TIMESTEP 812318 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.6082574 / Loss  0.01085464283823967
fps: 2.866438042543769
TIMESTEP 812319 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.4165845 / Loss  0.1268101930618286
fps: 2.991028278643224
TIMESTEP 812320 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.5471288 / Loss  0.01868753507733345
fps: 3.0085609375930242
TIMESTEP 812321 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.6748779 / Loss  0.1679777354001999
fps: 2.8055976780913685
TIMESTEP 812322 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD -1 / Q_MAX  2.5036745 / Loss  0.015810921788215637
fps: 3.05045171631837
TIMESTEP 812323 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.6333685 / Loss  0.2062595933675766
fps: 3.0137513095287707
TIMESTEP 812324 / STATE train / EPSILON 9.999999987391849e-05 / A

TIMESTEP 812371 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.3038313 / Loss  0.06876852363348007
fps: 2.552685024298656
TIMESTEP 812372 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.8685124 / Loss  0.00828380137681961
fps: 2.4898956979098026
TIMESTEP 812373 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.6350402 / Loss  0.3263920247554779
fps: 2.9588047690162087
TIMESTEP 812374 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.39762 / Loss  0.0359598807990551
fps: 0.8486644014324676
TIMESTEP 812375 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.8326418 / Loss  0.44073206186294556
fps: 2.934241105220668
TIMESTEP 812376 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.6975521 / Loss  0.045988041907548904
fps: 2.7871166663344167
TIMESTEP 812377 / STATE train / EPSILON 9.999999987391849e-05 /

TIMESTEP 812424 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.6903743 / Loss  0.17694637179374695
fps: 2.725283522001463
TIMESTEP 812425 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.7900556 / Loss  0.1100478395819664
fps: 2.856507345404619
TIMESTEP 812426 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.1097684 / Loss  0.23903268575668335
fps: 2.8656390103125466
TIMESTEP 812427 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.6753262 / Loss  0.3223392069339752
fps: 2.8391303170606235
TIMESTEP 812428 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.7952762 / Loss  0.0453077107667923
fps: 2.814519559962905
TIMESTEP 812429 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.6811433 / Loss  0.024877460673451424
fps: 2.8410380349082316
TIMESTEP 812430 / STATE train / EPSILON 9.999999987391849e-05 /

TIMESTEP 812477 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.7830527 / Loss  0.018147051334381104
fps: 0.8596657425690624
TIMESTEP 812478 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD -1 / Q_MAX  1.7550135 / Loss  0.02252661995589733
fps: 2.7640202574688706
TIMESTEP 812479 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.627181 / Loss  0.09868819266557693
fps: 2.7564056814380637
TIMESTEP 812480 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.7486234 / Loss  0.029351837933063507
fps: 2.6123945837537526
TIMESTEP 812481 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.7913619 / Loss  0.020502228289842606
fps: 2.578855044296649
TIMESTEP 812482 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.842186 / Loss  0.07571816444396973
fps: 2.7263818451990165
TIMESTEP 812483 / STATE train / EPSILON 9.999999987391849e-

TIMESTEP 812530 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.79673964 / Loss  0.3283354640007019
fps: 2.3594045326085
TIMESTEP 812531 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.1007817 / Loss  0.22294846177101135
fps: 2.2434471913477894
TIMESTEP 812532 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.587657 / Loss  0.014531975612044334
fps: 2.312477153951629
TIMESTEP 812533 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.696433 / Loss  0.014552381820976734
fps: 2.3314434112870592
TIMESTEP 812534 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD -1 / Q_MAX  1.9258447 / Loss  0.07672982662916183
fps: 2.161659151571625
TIMESTEP 812535 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.6404041 / Loss  0.06282331049442291
fps: 2.2653704832926183
TIMESTEP 812536 / STATE train / EPSILON 9.999999987391849e-05 /

TIMESTEP 812583 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.10026 / Loss  0.01864449679851532
fps: 2.0660669891152663
TIMESTEP 812584 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.043607026 / Loss  0.14178533852100372
fps: 1.6959215517244866
TIMESTEP 812585 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.1897163 / Loss  0.3234485387802124
fps: 1.883449255408911
TIMESTEP 812586 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  3.2803485 / Loss  0.03744889050722122
fps: 1.6412959898383201
TIMESTEP 812587 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  3.0635412 / Loss  0.3056607246398926
fps: 2.1607459957746413
TIMESTEP 812588 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD -1 / Q_MAX  1.7318032 / Loss  0.10527114570140839
fps: 2.13913356579552
TIMESTEP 812589 / STATE train / EPSILON 9.999999987391849e-05 /

TIMESTEP 812636 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.9069843 / Loss  0.30777642130851746
fps: 2.0382228995288227
TIMESTEP 812637 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.8874847 / Loss  0.06641778349876404
fps: 2.5057585254733654
TIMESTEP 812638 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.0630739 / Loss  0.011992303654551506
fps: 2.0710382783483063
TIMESTEP 812639 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.7309527 / Loss  0.03961927443742752
fps: 1.8931226044301455
TIMESTEP 812640 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.7244941 / Loss  0.06333602964878082
fps: 1.243762069014599
TIMESTEP 812641 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.1753004 / Loss  0.21788345277309418
fps: 1.692336619042047
TIMESTEP 812642 / STATE train / EPSILON 9.999999987391849e

TIMESTEP 812689 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  2.8437371 / Loss  0.017503662034869194
fps: 0.692963444779905
TIMESTEP 812690 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.8985772 / Loss  0.12539122998714447
fps: 2.740563741585008
TIMESTEP 812691 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.007123 / Loss  0.01362968422472477
fps: 0.4103193410948569
TIMESTEP 812692 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.7886577 / Loss  0.34308668971061707
fps: 0.1899074279709201
TIMESTEP 812693 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.7329683 / Loss  0.021182803437113762
fps: 0.9112790527903889
TIMESTEP 812694 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.707042 / Loss  0.399850070476532
fps: 1.792125899469622
TIMESTEP 812695 / STATE train / EPSILON 9.999999987391849e-05 /

TIMESTEP 812742 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.579166 / Loss  0.020401274785399437
fps: 2.8680864739931824
TIMESTEP 812743 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD -1 / Q_MAX  1.7203279 / Loss  0.031596653163433075
fps: 2.932899234173002
TIMESTEP 812744 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  1.8159783 / Loss  0.01977231539785862
fps: 3.027012359043753
TIMESTEP 812745 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.7822528 / Loss  0.024258557707071304
fps: 2.820020560319122
TIMESTEP 812746 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 1 / REWARD 0.1 / Q_MAX  1.8553292 / Loss  0.271117240190506
fps: 2.859826172808529
TIMESTEP 812747 / STATE train / EPSILON 9.999999987391849e-05 / ACTION 0 / REWARD 0.1 / Q_MAX  2.3493838 / Loss  0.059952229261398315
fps: 2.147882844914024
TIMESTEP 812748 / STATE train / EPSILON 9.999999987391849e-05 /

KeyboardInterrupt: 