Import Libaries

In [9]:
import numpy as np
import cv2 as cv2
from mss import mss
from PIL import Image, ImageEnhance, ImageOps
import keyboard
import time
import tqdm as tqdm
import matplotlib.pyplot as plt
import tensorflow as tf                                                               
import random
from tqdm import tqdm
from tensorflow import keras
from keras.models import model_from_json, Sequential
from keras.optimizers import Adam
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D

The Following Code Block is the Agent Class. It consists of the Convolutional NN that is the brains of the AI. It also contains the pretrained weights

In [10]:
class Agent:
    def __init__(self):
        #This is the actual Neural net
        model = Sequential([ 
            Conv2D(32, (8,8), input_shape=(76, 384, 4),
                   strides=(2,2), activation='relu'),
            MaxPooling2D(pool_size=(5,5), strides=(2, 2)),
            Conv2D(64, (4,4), activation='relu', strides=(1,1)),
            MaxPooling2D(pool_size=(7, 7), strides=(3, 3)),
            Conv2D(128, (1, 1), strides=(1,1), activation='relu'),
            MaxPooling2D(pool_size=(3,3), strides=(3,3)),
            Flatten(),
            Dense(384, activation='relu'),
            Dense(64, activation="relu", name="layer1"),
            Dense(8, activation="relu", name="layer2"),
            Dense(3, activation="linear", name="layer3"),
        ])
        #pick your learning rate here
        model.compile(loss='mean_squared_error', optimizer=Adam(lr=0.0001)) 
        #This is where you import your pretrained weights
        #model.load_weights("DinoGameSpeed4.h5")
        self.model = model
        self.memory = []
        # Print the model summary if you want to see what it looks like
        print(self.model.summary()) 
        self.xTrain = []
        self.yTrain = []
        self.loss = []
        self.location = 0


    def predict(self, state):
        stateConv = state
        qval = self.model.predict(np.reshape(stateConv, (1, 76, 384, 4)))
        return qval

    def act(self, state):
        qval = self.predict(state)
        #you can either pick softmax or epislon greedy actions.
        #To pick Softmax, un comment the bottom 2 lines and delete everything below that 
        # prob = tf.nn.softmax(tf.math.divide((qval.flatten()), 1)) 
        # action = np.random.choice(range(3), p=np.array(prob))

        
        #Epsilon-Greedy actions->
        z = np.random.random()
        epsilon = 0.004
        if self.location > 1000:
            epsilon = 0.05
        epsilon = 0
        if z > epsilon:
            return np.argmax(qval.flatten())
        else:
            return np.random.choice(range(3))
        
        return action

    # This function stores experiences in the experience replay
    def remember(self, state, nextState, action, reward, done, location):
        self.location = location
        self.memory.append(np.array([state, nextState, action, reward, done], dtype=object))

    #This is where the AI learns
    def learn(self):
        #Feel free to tweak this. This number is the number of experiences the AI learns from every round
        self.batchSize = 256 

        #If you don't trim the memory, your GPU might run out of memory during training. 
        #I found 35000 works well
        if len(self.memory) > 35000:
            self.memory = []
            print("trimming memory")
        if len(self.memory) < self.batchSize:
            print("too little info")
            return  
        batch = random.sample(self.memory, self.batchSize)

        self.learnBatch(batch)

    #The alpha value determines how future oriented the AI is.
    #bigger number (up to 1) -> more future oriented
    def learnBatch(self, batch, alpha=0.9):
        batch = np.array(batch)
        actions = batch[:, 2].reshape(self.batchSize).tolist()
        rewards = batch[:, 3].reshape(self.batchSize).tolist()

        stateToPredict = batch[:, 0].reshape(self.batchSize).tolist()
        nextStateToPredict = batch[:, 1].reshape(self.batchSize).tolist()

        statePrediction = self.model.predict(np.reshape(
            stateToPredict, (self.batchSize, 76, 384, 4)))
        nextStatePrediction = self.model.predict(np.reshape(
            nextStateToPredict, (self.batchSize, 76, 384, 4)))
        statePrediction = np.array(statePrediction)
        nextStatePrediction = np.array(nextStatePrediction)

        for i in range(self.batchSize):
            action = actions[i]
            reward = rewards[i]
            nextState = nextStatePrediction[i]
            qval = statePrediction[i, action]
            if reward < -5: 
                statePrediction[i, action] = reward
            else:
                #this is the q learning update rule
                statePrediction[i, action] += alpha * (reward + 0.95 * np.max(nextState) - qval)

        self.xTrain.append(np.reshape(
            stateToPredict, (self.batchSize, 76, 384, 4)))
        self.yTrain.append(statePrediction)
        history = self.model.fit(
            self.xTrain, self.yTrain, batch_size=5, epochs=1, verbose=0)
        loss = history.history.get("loss")[0]
        print("LOSS: ", loss)
        self.loss.append(loss)
        self.xTrain = []
        self.yTrain = []

Next we have the enviornment classs. This interacts with the actual chrome dino game and gathers screenshots for us. It then analyzes the screenshots and determines when the game is done. Make sure to tweak your screenshot view port in the __init__ function

In [11]:

class Enviornment:
    def __init__(self):
        ########
        #these are some various screenshot parameters that I found worked well for different resolutions
        #Use it as a starting point but use the open cv code snippets below to tweak your screenshot window
        # Do note that the lower the resolution you use, the faster the code runs
        # I saw a 200% screenshot framerate increase from dropping my screen resolution from 4k to 720p

        #self.mon = {'top': 243, 'left': 0, 'width': 1366, 'height': 270} # 720p resolution
        self.mon = {'top': 380, 'left': 0, 'width': 1920, 'height': 380} #1080p resolution
        # self.mon = {'top': 1000, 'left': 0, 'width': 3840, 'height': 760} #4k resolution
        ########
        
        self.sct = mss()
        self.counter = 0
        self.startTime = -1
        self.imageBank = []
        self.imageBankLength = 4 #number of frames for the conv net
        self.actionMemory = 2 #init as 2 to show no action taken   
        #image processing
        self.ones = np.ones((76,384,4))
        self.zeros = np.zeros((76,384,4))  
        self.zeros1 = np.zeros((76,384,4))
        self.zeros2 = np.zeros((76,384,4))
        self.zeros3 = np.zeros((76,384,4))
        self.zeros4 = np.zeros((76,384,4))
        self.zeros1[:,:,0] = 1
        self.zeros2[:,:,1] = 1
        self.zeros3[:,:,2] = 1
        self.zeros4[:,:,3] = 1

    def startGame(self):
        #start the game, giving the user a few seconds to click on the chrome tab after starting the code
        for i in reversed(range(5)):
            print("game starting in ", i)
            time.sleep(1)

    def step(self, action):        
        actions ={
            0: 'space',
            1: 'down'
            
            
        }            
        if action != self.actionMemory:
            if self.actionMemory != 2:
                keyboard.release(actions.get(self.actionMemory))
            if action != 2:
                keyboard.press(actions.get(action))
        self.actionMemory = action

        #This is where the screenshot happens
        screenshot = self.sct.grab(self.mon)
        img = np.array(screenshot)[:, :, 0]
        processedImg = self._processImg(img)
        state = self._imageBankHandler(processedImg)
        done = self._done(processedImg)
        reward = self._getReward(done)
        return state, reward, done

    def reset(self):
        self.startTime = time.time()
        keyboard.press("space")
        time.sleep(0.5)
        keyboard.release("space")
        return self.step(0)

    def _processImg(self, img):
        img = Image.fromarray(img)
        img = img.resize((384, 76), Image.ANTIALIAS)
        if np.sum(img) > 2000000:
            img = ImageOps.invert(img)
        img = self._contrast(img)

        #You can use the following open CV code segment to test your in game screenshots
        # cv2.imshow("image",img)
        # if cv2.waitKey(25) & 0xFF == ord('q'): 
        #     cv2.destroyAllWindows()
        
        img = np.reshape(img, (76,384))
        return img

    def _contrast(self, pixvals):

        # minval = np.percentile(pixvals, 2) 
        # maxval = np.percentile(pixvals, 98) 
        
        minval = 32
        maxval = 171
        pixvals = np.clip(pixvals, minval, maxval)
        pixvals = ((pixvals - minval) / (maxval - minval))
        return pixvals

    def _imageBankHandler(self, img):
        img = np.array(img)
        while len(self.imageBank) < (self.imageBankLength): 
            self.imageBank.append(np.reshape(img,(76,384,1)) * self.ones)

        
        bank = np.array(self.imageBank)
        toReturn = self.zeros
        img1 = (np.reshape(img,(76,384,1)) * self.ones)  * self.zeros1
        img2 = bank[0] * self.zeros2
        img3 = bank[1] * self.zeros3
        img4 = bank[2] * self.zeros4


        toReturn = np.array(img1 + img2 + img3 + img4)        

        self.imageBank.pop(0)
        self.imageBank.append(np.reshape(img,(76 ,384,1)) * self.ones)

        return toReturn

    def _getReward(self,done):
        if done:
            return -15
        else: 
            return 1
            return time.time() - self.startTime
        
    def _done(self,img):
        img = np.array(img)
        img  = img[20:40, 180:203]

        cv2.imshow("image",img)
        if cv2.waitKey(25) & 0xFF == ord('q'): 
            cv2.destroyAllWindows()

        val = np.sum(img)
        #Sum of the reset pixels when the game ends in the night mode
        expectedVal = 331.9352517985612 
        #Sum of the reset pixels when the game ends in the day mode
        expectedVal2 = 243.53

        # This method checks if the game is done by reading the pixel values
        # of the area of the screen at the reset button. Then it compares it to
        # a pre determined sum. You might need to fine tune these values since each
        # person's viewport will be different. use the following print statements to 
        # help you find the appropirate values for your use case 

        # print("val: ", val)
        # print("Difference1: ", np.absolute(val-expectedVal2))
        # print("Difference2: ", np.absolute(val-expectedVal))
        if np.absolute(val-expectedVal) > 15 and np.absolute(val-expectedVal2) > 100: #seems to work well
            return False
        return True

Lastly we have the learning loop. Here the Agent/AI is created, the enviornment wrapper is made, and then the AI plays the game. To actually start the game, make sure run this and then click on the chrome dinosaur game. The code gives you a 3 second (adjustable) buffer between the code starting and you shifting to the chrome game. This needs to be done since the AI is not in direct control of the chrome game but rather controlling it via emulating keyboard strokes

In [12]:
plotX = []
while True:
    agent = Agent() #currently agent is configured with only 2 actions
    env = Enviornment()
    env.startGame()    
    #3500 refers to the number of episodes/iterations of the game to play
    for i in tqdm(range(200)): 
        state, reward, doxne = env.reset()
        epReward = 0 
        done = False 
        stepCounter = 0
        episodeTime = ☻   time.time()
        while not done:
            action = agent.act(state)
            nextState, reward, done = env.step(action)
            ########
            #This next section is storing more memory of later parts of the game since 
            #if you don't do this, most of the experience replay fills up with the 
            #starting parts of the game since its played more often. A more elegant 
            #approach to this is "Prioritized experience replay" but this is an effective
            #alternative too
            if stepCounter> 700:
                for _ in range(5):
                    agent.remember(state, nextState, action, reward, done, stepCounter)
            elif stepCounter> 40:
                agent.remember(state, nextState, action, reward, done, stepCounter)                
            if done == True: #game ended
                for _ in range(10):
                    agent.remember(state, nextState, action, reward, done, stepCounter)
                print("breaking")
                break
            ########
            state = nextState
            stepCounter += 1
            epReward += reward

        #post episode 
        if stepCounter != 0:
            print("Avg Frame-Rate: ", 1/((time.time()-episodeTime)/stepCounter))
        plotX.append(epReward)
        print('Reward' + str(epReward))
        agent.learn()


       
        if i % 20 == 0:
            agent.model.save_weights ("DinoGameSpeed4.h5")
            print( "Saved model to disk")

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 35, 189, 32)       8224      
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 16, 93, 32)       0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 13, 90, 64)        32832     
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 3, 28, 64)        0         
 2D)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 3, 28, 128)        8320      
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 1, 9, 128)       

  img = img.resize((384, 76), Image.ANTIALIAS)




  0%|          | 1/200 [00:05<19:27,  5.87s/it]

breaking
Avg Frame-Rate:  4.403974949625614
Reward23
too little info
Saved model to disk


  1%|          | 2/200 [00:06<09:33,  2.90s/it]

breaking
Reward0
too little info


  2%|▏         | 3/200 [00:12<13:11,  4.02s/it]

breaking
Avg Frame-Rate:  4.645668492937024
Reward22
too little info


  2%|▏         | 4/200 [00:12<09:02,  2.77s/it]

breaking
Reward0
too little info


  2%|▎         | 5/200 [00:18<12:01,  3.70s/it]

breaking
Avg Frame-Rate:  4.409163812336233
Reward21
too little info


  3%|▎         | 6/200 [00:19<08:47,  2.72s/it]

breaking
Reward0
too little info


  4%|▎         | 7/200 [00:25<12:13,  3.80s/it]

breaking
Avg Frame-Rate:  4.4150086255636944
Reward24
too little info


  4%|▍         | 8/200 [00:25<09:05,  2.84s/it]

breaking
Reward0
too little info


  4%|▍         | 9/200 [00:31<12:17,  3.86s/it]

breaking
Avg Frame-Rate:  3.8308086045814234
Reward21
too little info


  5%|▌         | 10/200 [00:32<09:14,  2.92s/it]

breaking
Reward0
too little info


  6%|▌         | 11/200 [00:38<11:32,  3.67s/it]

breaking
Avg Frame-Rate:  4.594784818192053
Reward22
too little info


  6%|▌         | 12/200 [00:38<08:47,  2.81s/it]

breaking
Reward0
too little info


  6%|▋         | 13/200 [00:44<11:08,  3.58s/it]

breaking
Avg Frame-Rate:  4.424574000978947
Reward21
too little info


  7%|▋         | 14/200 [00:45<09:12,  2.97s/it]

breaking
Avg Frame-Rate:  4.073343023898079
Reward4
too little info


  8%|▊         | 15/200 [00:51<11:29,  3.72s/it]

breaking
Avg Frame-Rate:  4.513637271680842
Reward22
too little info


  8%|▊         | 16/200 [00:52<08:44,  2.85s/it]

breaking
Reward0
too little info


  8%|▊         | 17/200 [00:57<11:01,  3.62s/it]

breaking
Avg Frame-Rate:  4.976339727830912
Reward24
too little info


  9%|▉         | 18/200 [00:58<08:26,  2.78s/it]

breaking
Reward0
too little info


 10%|▉         | 19/200 [01:03<10:39,  3.53s/it]

breaking
Avg Frame-Rate:  4.4716561291121115
Reward21
too little info


 10%|█         | 20/200 [01:04<08:09,  2.72s/it]

breaking
Reward0
too little info


 10%|█         | 21/200 [01:10<10:37,  3.56s/it]

breaking
Avg Frame-Rate:  4.720692862259343
Reward23
too little info
Saved model to disk


 11%|█         | 22/200 [01:10<08:08,  2.74s/it]

breaking
Reward0
too little info


 12%|█▏        | 23/200 [01:16<10:23,  3.52s/it]

breaking
Avg Frame-Rate:  4.416579731928619
Reward21
too little info


 12%|█▏        | 24/200 [01:17<07:59,  2.72s/it]

breaking
Reward0
too little info


 12%|█▎        | 25/200 [01:22<10:10,  3.49s/it]

breaking
Avg Frame-Rate:  4.6888447699272255
Reward22
too little info
breaking
Reward0


 13%|█▎        | 26/200 [01:28<12:38,  4.36s/it]

LOSS:  46.37673568725586
breaking
Avg Frame-Rate:  4.348907023409786
Reward21


 14%|█▎        | 27/200 [01:38<16:50,  5.84s/it]

LOSS:  5.304029941558838
breaking
Avg Frame-Rate:  4.735277854625273
Reward22


 14%|█▍        | 28/200 [01:47<19:31,  6.81s/it]

LOSS:  6.589424133300781
breaking
Avg Frame-Rate:  4.732786914038602
Reward23


 14%|█▍        | 29/200 [01:56<21:36,  7.58s/it]

LOSS:  7.850617408752441
breaking
Avg Frame-Rate:  4.584063134549005
Reward22


 15%|█▌        | 30/200 [02:05<23:01,  8.13s/it]

LOSS:  7.219976902008057
breaking
Avg Frame-Rate:  4.5159798233792
Reward21


 16%|█▌        | 31/200 [02:15<23:52,  8.48s/it]

LOSS:  3.51430344581604
breaking
Avg Frame-Rate:  4.468609827022735
Reward21


 16%|█▌        | 32/200 [02:24<24:24,  8.71s/it]

LOSS:  1.4557464122772217
breaking
Avg Frame-Rate:  4.888705755471042
Reward23


 16%|█▋        | 33/200 [02:33<24:38,  8.85s/it]

LOSS:  1.270763635635376
breaking
Avg Frame-Rate:  4.655623841072433
Reward22


 17%|█▋        | 34/200 [02:42<24:48,  8.97s/it]

LOSS:  0.9996446967124939
breaking
Avg Frame-Rate:  4.668639231384817
Reward22


 18%|█▊        | 35/200 [02:52<24:58,  9.08s/it]

LOSS:  0.816386342048645
breaking
Avg Frame-Rate:  4.897709653305346
Reward23


 18%|█▊        | 36/200 [03:01<24:57,  9.13s/it]

LOSS:  0.4850957989692688
breaking
Avg Frame-Rate:  4.711685873202296
Reward22


 18%|█▊        | 37/200 [03:10<24:50,  9.14s/it]

LOSS:  0.16127200424671173
breaking
Avg Frame-Rate:  4.914471509725936
Reward23


 19%|█▉        | 38/200 [03:19<24:49,  9.20s/it]

LOSS:  0.16719627380371094
breaking
Avg Frame-Rate:  4.380435373989975
Reward21


 20%|█▉        | 39/200 [03:29<24:51,  9.26s/it]

LOSS:  0.11897505074739456
breaking
Avg Frame-Rate:  4.435413276944879
Reward21


 20%|██        | 40/200 [03:38<24:49,  9.31s/it]

LOSS:  0.11602494865655899
breaking
Avg Frame-Rate:  4.641741507822826
Reward22


 20%|██        | 41/200 [03:48<24:47,  9.36s/it]

LOSS:  0.07066487520933151
Saved model to disk
breaking
Avg Frame-Rate:  5.143598897039212
Reward24


 21%|██        | 42/200 [03:57<24:24,  9.27s/it]

LOSS:  0.09368808567523956
breaking
Avg Frame-Rate:  4.304739509266527
Reward20


 22%|██▏       | 43/200 [04:06<24:16,  9.27s/it]

LOSS:  1.0197142362594604
breaking
Avg Frame-Rate:  4.703327377719064
Reward22


 22%|██▏       | 44/200 [04:15<24:06,  9.27s/it]

LOSS:  0.5591207146644592
breaking
Avg Frame-Rate:  4.842190572281429
Reward23


 22%|██▎       | 45/200 [04:25<24:08,  9.34s/it]

LOSS:  0.3548394739627838
breaking
Avg Frame-Rate:  4.643258559550905
Reward22


 23%|██▎       | 46/200 [04:34<23:56,  9.33s/it]

LOSS:  0.234844371676445
breaking
Avg Frame-Rate:  4.519821547878187
Reward21


 24%|██▎       | 47/200 [04:44<23:48,  9.33s/it]

LOSS:  0.16452452540397644
breaking
Avg Frame-Rate:  4.858306573142099
Reward24


 24%|██▍       | 48/200 [04:54<24:08,  9.53s/it]

LOSS:  0.14101360738277435
breaking
Avg Frame-Rate:  4.182488767979655
Reward28


 24%|██▍       | 49/200 [05:05<25:14, 10.03s/it]

LOSS:  0.2211381494998932
breaking
Avg Frame-Rate:  4.491413574473462
Reward21


 25%|██▌       | 50/200 [05:14<24:32,  9.82s/it]

LOSS:  0.23523910343647003
breaking
Avg Frame-Rate:  4.44718917713979
Reward21


 26%|██▌       | 51/200 [05:23<24:01,  9.68s/it]

LOSS:  0.10784419625997543
breaking
Avg Frame-Rate:  4.506089563664787
Reward21


 26%|██▌       | 52/200 [05:33<23:31,  9.54s/it]

LOSS:  0.057116590440273285
breaking
Avg Frame-Rate:  4.400659076761938
Reward21


 26%|██▋       | 53/200 [05:42<23:20,  9.53s/it]

LOSS:  0.06038232892751694
breaking
Avg Frame-Rate:  4.424652682855962
Reward21


 27%|██▋       | 54/200 [05:52<23:06,  9.50s/it]

LOSS:  0.05182483419775963
breaking
Avg Frame-Rate:  4.621705918368002
Reward22


 28%|██▊       | 55/200 [06:01<22:52,  9.47s/it]

LOSS:  0.05863158032298088
breaking
Avg Frame-Rate:  4.604993201904146
Reward22


 28%|██▊       | 56/200 [06:10<22:43,  9.47s/it]

LOSS:  0.03612374886870384
breaking
Avg Frame-Rate:  4.416368248897568
Reward21


 28%|██▊       | 57/200 [06:20<22:25,  9.41s/it]

LOSS:  0.037701234221458435
breaking
Avg Frame-Rate:  4.620092802719743
Reward22


 29%|██▉       | 58/200 [06:29<22:17,  9.42s/it]

LOSS:  0.022486811503767967
breaking
Avg Frame-Rate:  4.33378514125731
Reward20


 30%|██▉       | 59/200 [06:38<21:57,  9.35s/it]

LOSS:  0.05470792576670647
breaking
Avg Frame-Rate:  4.715433811412569
Reward22


 30%|███       | 60/200 [06:47<21:40,  9.29s/it]

LOSS:  0.04039537534117699
breaking
Avg Frame-Rate:  4.508333470677893
Reward21


 30%|███       | 61/200 [06:57<21:35,  9.32s/it]

LOSS:  0.05374212935566902
Saved model to disk
breaking
Avg Frame-Rate:  4.713114183040761
Reward22


 31%|███       | 62/200 [07:06<21:24,  9.31s/it]

LOSS:  0.03606800362467766
breaking
Avg Frame-Rate:  4.6493266225600145
Reward22


 32%|███▏      | 63/200 [07:16<21:18,  9.33s/it]

LOSS:  0.45438405871391296
breaking
Avg Frame-Rate:  4.701344183934834
Reward22


 32%|███▏      | 64/200 [07:25<21:07,  9.32s/it]

LOSS:  0.17761851847171783
breaking
Avg Frame-Rate:  4.5794410268980235
Reward22


 32%|███▎      | 65/200 [07:34<20:58,  9.32s/it]

LOSS:  0.16610023379325867
breaking
Avg Frame-Rate:  4.473518894846557
Reward21


 33%|███▎      | 66/200 [07:44<20:51,  9.34s/it]

LOSS:  0.11292440444231033
breaking
Avg Frame-Rate:  4.630199156691236
Reward22


 34%|███▎      | 67/200 [07:53<20:36,  9.30s/it]

LOSS:  0.08167849481105804
breaking
Avg Frame-Rate:  4.472777191007625
Reward21


 34%|███▍      | 68/200 [08:02<20:29,  9.32s/it]

LOSS:  0.07520215958356857
breaking
Avg Frame-Rate:  4.762006373343285
Reward22


 34%|███▍      | 69/200 [08:11<20:17,  9.29s/it]

LOSS:  0.05908813700079918
breaking
Avg Frame-Rate:  4.582036330866697
Reward22


 35%|███▌      | 70/200 [08:21<20:13,  9.33s/it]

LOSS:  0.05015921592712402
breaking
Avg Frame-Rate:  5.236657541676103
Reward25


 36%|███▌      | 71/200 [08:30<20:07,  9.36s/it]

LOSS:  0.7664244771003723
breaking
Avg Frame-Rate:  4.628450143978302
Reward28


 36%|███▌      | 72/200 [08:43<21:56, 10.29s/it]

LOSS:  0.5198304653167725
breaking
Avg Frame-Rate:  4.328819244283604
Reward20


 36%|███▋      | 73/200 [08:54<22:23, 10.58s/it]

LOSS:  0.47273120284080505
breaking
Avg Frame-Rate:  4.533661517936143
Reward21


 37%|███▋      | 74/200 [09:05<22:31, 10.72s/it]

LOSS:  0.37396061420440674
breaking
Avg Frame-Rate:  4.238949035132923
Reward20


 38%|███▊      | 75/200 [09:16<22:37, 10.86s/it]

LOSS:  0.42374032735824585
breaking
Avg Frame-Rate:  4.18347027619924
Reward20


 38%|███▊      | 76/200 [09:27<22:45, 11.01s/it]

LOSS:  0.24218101799488068
breaking
Avg Frame-Rate:  4.228203622543744
Reward20


 38%|███▊      | 77/200 [09:38<22:32, 10.99s/it]

LOSS:  0.38477644324302673
breaking
Avg Frame-Rate:  8.00013331585236
Reward38


 39%|███▉      | 78/200 [09:47<20:40, 10.16s/it]

LOSS:  0.888077437877655
breaking
Avg Frame-Rate:  8.764370330979641
Reward41


 40%|███▉      | 79/200 [09:55<19:18,  9.57s/it]

LOSS:  0.7272496223449707
breaking
Avg Frame-Rate:  8.807855636995532
Reward42


 40%|████      | 80/200 [10:03<18:28,  9.24s/it]

LOSS:  1.8081163167953491
breaking
Avg Frame-Rate:  8.879675725357966
Reward41


 40%|████      | 81/200 [10:12<17:41,  8.92s/it]

LOSS:  0.6890466213226318
Saved model to disk
breaking
Avg Frame-Rate:  8.792747554653747
Reward41


 41%|████      | 82/200 [10:20<17:17,  8.79s/it]

LOSS:  0.8350394368171692
breaking
Avg Frame-Rate:  8.801859997245362
Reward49


 42%|████▏     | 83/200 [10:29<17:16,  8.86s/it]

LOSS:  0.6144848465919495
breaking
Avg Frame-Rate:  8.781257876588237
Reward41


 42%|████▏     | 84/200 [10:37<16:45,  8.67s/it]

LOSS:  0.3737541437149048
breaking
Avg Frame-Rate:  8.758833716506356
Reward43


 42%|████▎     | 85/200 [10:46<16:33,  8.64s/it]

LOSS:  0.6468428373336792
breaking
Avg Frame-Rate:  8.779189762959172
Reward52


 43%|████▎     | 86/200 [10:55<16:53,  8.89s/it]

LOSS:  0.9600623846054077
breaking
Avg Frame-Rate:  8.725053732965957
Reward41


 44%|████▎     | 87/200 [11:04<16:25,  8.72s/it]

LOSS:  0.8006821870803833
breaking
Avg Frame-Rate:  8.808954722790185
Reward69


 44%|████▍     | 88/200 [11:15<17:48,  9.54s/it]

LOSS:  0.4914136230945587
breaking
Avg Frame-Rate:  7.71914420554796
Reward36


 44%|████▍     | 89/200 [11:23<16:53,  9.13s/it]

LOSS:  1.0676127672195435
breaking
Avg Frame-Rate:  8.792729571554105
Reward41


 45%|████▌     | 90/200 [11:31<16:10,  8.82s/it]

LOSS:  0.592290997505188
breaking
Avg Frame-Rate:  8.750447128723536
Reward43


 46%|████▌     | 91/200 [11:40<15:46,  8.68s/it]

LOSS:  0.7724168300628662
breaking
Avg Frame-Rate:  8.83967632481275
Reward66


 46%|████▌     | 92/200 [11:51<16:51,  9.36s/it]

LOSS:  0.9901403188705444
breaking
Avg Frame-Rate:  8.79667817670479
Reward41


 46%|████▋     | 93/200 [11:59<16:02,  9.00s/it]

LOSS:  0.6056365966796875
breaking
Avg Frame-Rate:  8.853543410541334
Reward41


 47%|████▋     | 94/200 [12:13<18:29, 10.47s/it]

LOSS:  0.6698411703109741
breaking
Avg Frame-Rate:  4.791772628683561
Reward23


 48%|████▊     | 95/200 [12:22<17:51, 10.20s/it]

LOSS:  0.5085455775260925
breaking
Avg Frame-Rate:  4.68750563450835
Reward27


 48%|████▊     | 96/200 [12:33<17:44, 10.24s/it]

LOSS:  0.40263599157333374
breaking
Avg Frame-Rate:  4.529114436789829
Reward25


 48%|████▊     | 97/200 [12:43<17:28, 10.18s/it]

LOSS:  0.4197690188884735
breaking
Avg Frame-Rate:  4.901041541905016
Reward28


 49%|████▉     | 98/200 [12:53<17:21, 10.22s/it]

LOSS:  0.42597562074661255
breaking
Avg Frame-Rate:  4.703139248107935
Reward27


 50%|████▉     | 99/200 [13:03<17:16, 10.26s/it]

LOSS:  0.5662499070167542
breaking
Avg Frame-Rate:  4.959341822986086
Reward30


 50%|█████     | 100/200 [13:16<18:09, 10.90s/it]

LOSS:  0.6863201260566711
breaking
Avg Frame-Rate:  4.507984470481671
Reward26


 50%|█████     | 101/200 [13:28<18:40, 11.32s/it]

LOSS:  1.028897762298584
Saved model to disk
breaking
Avg Frame-Rate:  6.670194940316871
Reward31


 51%|█████     | 102/200 [13:36<16:54, 10.35s/it]

LOSS:  0.6887466311454773
breaking
Avg Frame-Rate:  8.821003704877217
Reward41


 52%|█████▏    | 103/200 [13:44<15:39,  9.68s/it]

LOSS:  0.49701470136642456
breaking
Avg Frame-Rate:  8.732711331313144
Reward41


 52%|█████▏    | 104/200 [13:52<14:46,  9.23s/it]

LOSS:  0.4642258584499359
breaking
Avg Frame-Rate:  8.70666193579954
Reward41


 52%|█████▎    | 105/200 [14:01<14:07,  8.92s/it]

LOSS:  0.3102351427078247
breaking
Avg Frame-Rate:  8.854070268623058
Reward42


 53%|█████▎    | 106/200 [14:09<13:37,  8.69s/it]

LOSS:  0.3023432493209839
breaking
Avg Frame-Rate:  8.75141921503494
Reward41


 54%|█████▎    | 107/200 [14:17<13:13,  8.53s/it]

LOSS:  0.41765040159225464
breaking
Avg Frame-Rate:  8.845539173896976
Reward56


 54%|█████▍    | 108/200 [14:27<13:39,  8.91s/it]

LOSS:  0.31653058528900146
breaking
Avg Frame-Rate:  8.782788995552488
Reward41


 55%|█████▍    | 109/200 [14:35<13:10,  8.69s/it]

LOSS:  0.3511277139186859
breaking
Avg Frame-Rate:  8.78411020528025
Reward41


 55%|█████▌    | 110/200 [14:43<12:49,  8.55s/it]

LOSS:  0.712178647518158
breaking
Avg Frame-Rate:  8.844448340483268
Reward42


 56%|█████▌    | 111/200 [14:51<12:32,  8.45s/it]

LOSS:  0.6256694197654724
breaking
Avg Frame-Rate:  8.86843153188901
Reward42


 56%|█████▌    | 112/200 [14:59<12:16,  8.37s/it]

LOSS:  0.5736272931098938
breaking
Avg Frame-Rate:  8.765683323710439
Reward41


 56%|█████▋    | 113/200 [15:08<12:01,  8.29s/it]

LOSS:  0.47981715202331543
breaking
Avg Frame-Rate:  8.879350237628442
Reward42


 57%|█████▋    | 114/200 [15:16<11:52,  8.29s/it]

LOSS:  0.44673457741737366
breaking
Avg Frame-Rate:  8.772341900753629
Reward41


 57%|█████▊    | 115/200 [15:24<11:40,  8.24s/it]

LOSS:  0.5638807415962219
breaking
Avg Frame-Rate:  8.728426950010112
Reward41


 58%|█████▊    | 116/200 [15:32<11:30,  8.22s/it]

LOSS:  0.3610300123691559
breaking
Avg Frame-Rate:  8.817372750368774
Reward41


 58%|█████▊    | 117/200 [15:41<11:26,  8.27s/it]

LOSS:  0.3061177134513855
breaking
Avg Frame-Rate:  8.803397463574214
Reward41


 59%|█████▉    | 118/200 [15:49<11:16,  8.24s/it]

LOSS:  0.39685961604118347
breaking
Avg Frame-Rate:  8.734773019993172
Reward41


 60%|█████▉    | 119/200 [15:57<11:06,  8.22s/it]

LOSS:  0.34478920698165894
breaking
Avg Frame-Rate:  7.835092985571121
Reward37


 60%|██████    | 120/200 [16:05<10:56,  8.21s/it]

LOSS:  0.39046576619148254
breaking
Avg Frame-Rate:  8.785056576781283
Reward61


 60%|██████    | 121/200 [16:16<11:40,  8.87s/it]

LOSS:  0.219899520277977
Saved model to disk
breaking
Avg Frame-Rate:  8.650069514216453
Reward46


 61%|██████    | 122/200 [16:24<11:30,  8.85s/it]

LOSS:  0.34771084785461426
breaking
Avg Frame-Rate:  8.718290141708716
Reward42


 62%|██████▏   | 123/200 [16:33<11:07,  8.67s/it]

LOSS:  0.3968653678894043
breaking
Avg Frame-Rate:  8.708234618651035
Reward41


 62%|██████▏   | 124/200 [16:41<10:47,  8.52s/it]

LOSS:  0.35859596729278564
breaking
Avg Frame-Rate:  8.723863521817538
Reward41


 62%|██████▎   | 125/200 [16:49<10:30,  8.41s/it]

LOSS:  0.5329290628433228
breaking
Avg Frame-Rate:  8.702562493547715
Reward41


 63%|██████▎   | 126/200 [16:57<10:16,  8.34s/it]

LOSS:  0.43165597319602966
breaking
Avg Frame-Rate:  8.750509053280116
Reward49


 64%|██████▎   | 127/200 [17:06<10:24,  8.56s/it]

LOSS:  0.37056785821914673
breaking
Avg Frame-Rate:  8.884691495431714
Reward108


 64%|██████▍   | 128/200 [17:22<12:49, 10.68s/it]

LOSS:  0.808688223361969
breaking
Avg Frame-Rate:  8.805394827183724
Reward41


 64%|██████▍   | 129/200 [17:30<11:43,  9.91s/it]

LOSS:  0.6283281445503235
breaking
Avg Frame-Rate:  8.834914799815358
Reward63


 65%|██████▌   | 130/200 [17:40<11:46, 10.10s/it]

LOSS:  0.6659042835235596
breaking
Avg Frame-Rate:  8.7722194447228
Reward46


 66%|██████▌   | 131/200 [17:49<11:08,  9.68s/it]

LOSS:  0.91728675365448
breaking
Avg Frame-Rate:  8.935488851687154
Reward63


 66%|██████▌   | 132/200 [18:00<11:15,  9.93s/it]

LOSS:  1.164047122001648
breaking
Avg Frame-Rate:  8.867964334084904
Reward60


 66%|██████▋   | 133/200 [18:10<11:10, 10.01s/it]

LOSS:  1.1494853496551514
breaking
Avg Frame-Rate:  8.70092406148685
Reward41


 67%|██████▋   | 134/200 [18:18<10:24,  9.46s/it]

LOSS:  1.3786871433258057
breaking
Avg Frame-Rate:  8.90352069997426
Reward83


 68%|██████▊   | 135/200 [18:31<11:19, 10.46s/it]

LOSS:  0.9104315042495728
breaking
Avg Frame-Rate:  8.72162915693935
Reward41


 68%|██████▊   | 136/200 [18:39<10:25,  9.77s/it]

LOSS:  1.3207786083221436
breaking
Avg Frame-Rate:  8.671806789967164
Reward41


 68%|██████▊   | 137/200 [18:47<09:45,  9.29s/it]

LOSS:  1.729158878326416
breaking
Avg Frame-Rate:  8.953331612323092
Reward122


 69%|██████▉   | 138/200 [19:04<12:00, 11.63s/it]

LOSS:  1.6509685516357422
breaking
Avg Frame-Rate:  8.852768784839157
Reward42


 70%|██████▉   | 139/200 [19:12<10:46, 10.60s/it]

LOSS:  1.1432387828826904
breaking
Avg Frame-Rate:  8.84254688133174
Reward42


 70%|███████   | 140/200 [19:21<09:53,  9.89s/it]

LOSS:  1.0611165761947632
breaking
Avg Frame-Rate:  8.76926455467153
Reward55


 70%|███████   | 141/200 [19:30<09:41,  9.86s/it]

LOSS:  1.054007649421692
Saved model to disk
breaking
Avg Frame-Rate:  8.588539327869315
Reward94


 71%|███████   | 142/200 [19:45<10:51, 11.23s/it]

LOSS:  1.4450784921646118
breaking
Avg Frame-Rate:  8.864537692543538
Reward58


 72%|███████▏  | 143/200 [19:55<10:19, 10.87s/it]

LOSS:  1.4103113412857056
breaking
Avg Frame-Rate:  8.85922634538164
Reward60


 72%|███████▏  | 144/200 [20:05<09:58, 10.69s/it]

LOSS:  1.1631431579589844
breaking
Avg Frame-Rate:  8.836202223707673
Reward54


 72%|███████▎  | 145/200 [20:15<09:29, 10.36s/it]

LOSS:  1.0109448432922363
breaking
Avg Frame-Rate:  8.85911954114904
Reward67


 73%|███████▎  | 146/200 [20:26<09:30, 10.56s/it]

LOSS:  1.1627612113952637
breaking
Avg Frame-Rate:  8.779322135643552
Reward43


 74%|███████▎  | 147/200 [20:34<08:44,  9.89s/it]

LOSS:  1.206173062324524
breaking
Avg Frame-Rate:  8.76375936241443
Reward55


 74%|███████▍  | 148/200 [20:44<08:31,  9.84s/it]

LOSS:  0.8371768593788147
breaking
Avg Frame-Rate:  8.809304021697168
Reward64


 74%|███████▍  | 149/200 [20:55<08:36, 10.12s/it]

LOSS:  1.1480984687805176
breaking
Avg Frame-Rate:  8.882781319826295
Reward105


 75%|███████▌  | 150/200 [21:10<09:43, 11.67s/it]

LOSS:  1.7791328430175781
breaking
Avg Frame-Rate:  8.955999094064762
Reward133


 76%|███████▌  | 151/200 [21:28<11:10, 13.68s/it]

LOSS:  1.1366019248962402
breaking
Avg Frame-Rate:  8.815757044396973
Reward53


 76%|███████▌  | 152/200 [21:38<09:56, 12.43s/it]

LOSS:  1.0563572645187378
breaking
Avg Frame-Rate:  8.895278931251699
Reward107


 76%|███████▋  | 153/200 [21:53<10:27, 13.36s/it]

LOSS:  1.3364931344985962
breaking
Avg Frame-Rate:  8.938093798431547
Reward109


 77%|███████▋  | 154/200 [22:09<10:46, 14.06s/it]

LOSS:  1.191281795501709
breaking
Avg Frame-Rate:  8.989478609005445
Reward144


 78%|███████▊  | 155/200 [22:29<11:47, 15.71s/it]

LOSS:  1.242557168006897
breaking
Avg Frame-Rate:  8.799193395803579
Reward48


 78%|███████▊  | 156/200 [22:38<10:02, 13.69s/it]

LOSS:  0.956518292427063
breaking
Avg Frame-Rate:  8.894180723423442
Reward66


 78%|███████▊  | 157/200 [22:48<09:12, 12.86s/it]

LOSS:  1.6641590595245361
breaking
Avg Frame-Rate:  8.36553855117363
Reward68


 79%|███████▉  | 158/200 [23:00<08:43, 12.47s/it]

LOSS:  2.1868155002593994
breaking
Avg Frame-Rate:  8.865896067437818
Reward75


 80%|███████▉  | 159/200 [23:12<08:24, 12.30s/it]

LOSS:  2.7560155391693115
breaking
Avg Frame-Rate:  8.980056165207843
Reward126


 80%|████████  | 160/200 [23:29<09:14, 13.86s/it]

LOSS:  1.944821834564209
breaking
Avg Frame-Rate:  8.766060285565764
Reward43


 80%|████████  | 161/200 [23:38<07:57, 12.23s/it]

LOSS:  1.3632293939590454
Saved model to disk
breaking
Avg Frame-Rate:  8.948566254237006
Reward104


 81%|████████  | 162/200 [23:53<08:18, 13.11s/it]

LOSS:  1.4755353927612305
breaking
Avg Frame-Rate:  8.755719962112925
Reward41


 82%|████████▏ | 163/200 [24:01<07:10, 11.64s/it]

LOSS:  1.3860788345336914
breaking
Avg Frame-Rate:  8.635609788068939
Reward54


 82%|████████▏ | 164/200 [24:11<06:38, 11.07s/it]

LOSS:  1.4292794466018677
breaking
Avg Frame-Rate:  8.756136359019054
Reward41


 82%|████████▎ | 165/200 [24:19<05:57, 10.21s/it]

LOSS:  1.9656506776809692
breaking
Avg Frame-Rate:  8.717172254804783
Reward41


 83%|████████▎ | 166/200 [24:27<05:26,  9.61s/it]

LOSS:  1.6678766012191772
breaking
Avg Frame-Rate:  8.872433067350144
Reward66


 84%|████████▎ | 167/200 [24:38<05:30, 10.02s/it]

LOSS:  1.9535542726516724
breaking
Avg Frame-Rate:  8.670691954855466
Reward52


 84%|████████▍ | 168/200 [24:48<05:15,  9.86s/it]

LOSS:  2.641047716140747
breaking
Avg Frame-Rate:  8.914188557340815
Reward97


 84%|████████▍ | 169/200 [25:02<05:47, 11.21s/it]

LOSS:  2.402700185775757
breaking
Avg Frame-Rate:  8.950755883502977
Reward132


 85%|████████▌ | 170/200 [25:20<06:39, 13.33s/it]

LOSS:  2.5573313236236572
breaking
Avg Frame-Rate:  8.93793550217134
Reward99


 86%|████████▌ | 171/200 [25:35<06:37, 13.71s/it]

LOSS:  1.9924386739730835
breaking
Avg Frame-Rate:  8.873956017336814
Reward73


 86%|████████▌ | 172/200 [25:47<06:07, 13.13s/it]

LOSS:  2.597766637802124
breaking
Avg Frame-Rate:  8.909448064222868
Reward54


 86%|████████▋ | 173/200 [25:56<05:25, 12.07s/it]

LOSS:  2.731468677520752
breaking
Avg Frame-Rate:  8.693370426888297
Reward41


 87%|████████▋ | 174/200 [26:05<04:44, 10.94s/it]

LOSS:  2.401573896408081
breaking
Avg Frame-Rate:  8.883338910609673
Reward98


 88%|████████▊ | 175/200 [26:19<05:00, 12.03s/it]

LOSS:  2.3744277954101562
breaking
Avg Frame-Rate:  8.013536531469951
Reward38


 88%|████████▊ | 176/200 [26:28<04:21, 10.91s/it]

LOSS:  1.7890814542770386
breaking
Avg Frame-Rate:  8.918426220544793
Reward84


 88%|████████▊ | 177/200 [26:41<04:24, 11.52s/it]

LOSS:  2.547022581100464
breaking
Avg Frame-Rate:  8.809474661510913
Reward75


 89%|████████▉ | 178/200 [26:53<04:16, 11.67s/it]

LOSS:  2.1227264404296875
breaking
Avg Frame-Rate:  8.891705679025673
Reward93


 90%|████████▉ | 179/200 [27:07<04:20, 12.39s/it]

LOSS:  2.4722957611083984
breaking
Avg Frame-Rate:  8.929769749709253
Reward114


 90%|█████████ | 180/200 [27:23<04:31, 13.57s/it]

LOSS:  3.0963144302368164
breaking
Avg Frame-Rate:  8.789710973648845
Reward42


 90%|█████████ | 181/200 [27:31<03:48, 12.03s/it]

LOSS:  2.63875412940979
Saved model to disk
breaking
Avg Frame-Rate:  8.827585554999317
Reward57


 91%|█████████ | 182/200 [27:41<03:25, 11.43s/it]

LOSS:  2.1615679264068604
breaking
Avg Frame-Rate:  8.78333278362953
Reward68


 92%|█████████▏| 183/200 [27:53<03:12, 11.35s/it]

LOSS:  2.338730573654175
breaking
Avg Frame-Rate:  8.920118244803781
Reward91


 92%|█████████▏| 184/200 [28:06<03:13, 12.08s/it]

LOSS:  2.0507495403289795
breaking
Avg Frame-Rate:  8.893701522416011
Reward154


 92%|█████████▎| 185/200 [28:27<03:40, 14.71s/it]

LOSS:  2.011807918548584
breaking
Avg Frame-Rate:  8.963802815234303
Reward156


 93%|█████████▎| 186/200 [28:48<03:51, 16.57s/it]

LOSS:  2.365285634994507
breaking
Avg Frame-Rate:  8.962291516611185
Reward161


 94%|█████████▎| 187/200 [29:10<03:55, 18.10s/it]

LOSS:  1.9112818241119385
breaking
Avg Frame-Rate:  8.760211117031686
Reward153


 94%|█████████▍| 188/200 [29:31<03:47, 18.96s/it]

LOSS:  2.91579532623291
breaking
Avg Frame-Rate:  8.894946974373878
Reward124


 94%|█████████▍| 189/200 [29:48<03:23, 18.51s/it]

LOSS:  2.7084221839904785
breaking
Avg Frame-Rate:  8.830619895622965
Reward48


 95%|█████████▌| 190/200 [29:57<02:36, 15.65s/it]

LOSS:  2.1811397075653076
breaking
Avg Frame-Rate:  8.786925033486574
Reward86


 96%|█████████▌| 191/200 [30:10<02:14, 14.92s/it]

LOSS:  2.659592390060425
breaking
Avg Frame-Rate:  8.750709365497173
Reward41


 96%|█████████▌| 192/200 [30:19<01:42, 12.87s/it]

LOSS:  2.765204429626465
breaking
Avg Frame-Rate:  8.92395826572918
Reward151


 96%|█████████▋| 193/200 [30:39<01:45, 15.11s/it]

LOSS:  3.0115163326263428
breaking
Avg Frame-Rate:  8.855334177870878
Reward52


 97%|█████████▋| 194/200 [30:48<01:20, 13.39s/it]

LOSS:  2.2763381004333496
breaking
Avg Frame-Rate:  8.913535546175439
Reward68


 98%|█████████▊| 195/200 [30:59<01:03, 12.73s/it]

LOSS:  3.4172165393829346
breaking
Avg Frame-Rate:  8.949136063437052
Reward150


 98%|█████████▊| 196/200 [31:20<00:59, 14.97s/it]

LOSS:  2.2930757999420166
breaking
Avg Frame-Rate:  8.983831569491294
Reward141


 98%|█████████▊| 197/200 [31:39<00:49, 16.34s/it]

LOSS:  3.290555477142334
breaking
Avg Frame-Rate:  8.703620470818004
Reward41


 99%|█████████▉| 198/200 [31:48<00:27, 13.97s/it]

LOSS:  3.149196147918701
breaking
Avg Frame-Rate:  8.915456442043
Reward81


100%|█████████▉| 199/200 [32:00<00:13, 13.55s/it]

LOSS:  3.852734088897705
breaking
Avg Frame-Rate:  9.028464695386836
Reward169


100%|██████████| 200/200 [32:22<00:00,  9.71s/it]

LOSS:  4.31973123550415
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           (None, 35, 189, 32)       8224      
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 16, 93, 32)       0         
 2D)                                                             
                                                                 
 conv2d_10 (Conv2D)          (None, 13, 90, 64)        32832     
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 3, 28, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 3, 28, 128)        8320      
                                                                 
 max_pooling2d_11 (MaxPoolin  




game starting in  4
game starting in  3
game starting in  2
game starting in  1
game starting in  0


  0%|          | 0/200 [00:00<?, ?it/s]



  0%|          | 1/200 [00:06<21:49,  6.58s/it]

breaking
Avg Frame-Rate:  8.691918514029092
Reward52
too little info
Saved model to disk


  1%|          | 2/200 [00:07<10:15,  3.11s/it]

breaking
Reward0
too little info


  2%|▏         | 3/200 [00:14<15:40,  4.77s/it]

breaking
Avg Frame-Rate:  8.895385211825891
Reward55
too little info


  2%|▏         | 4/200 [00:14<10:18,  3.16s/it]

breaking
Reward0
too little info


  2%|▎         | 5/200 [00:22<15:15,  4.69s/it]

breaking
Avg Frame-Rate:  8.33082104061961
Reward57
too little info


  3%|▎         | 6/200 [00:22<10:44,  3.32s/it]

breaking
Reward0
too little info


  4%|▎         | 7/200 [00:28<12:42,  3.95s/it]

breaking
Avg Frame-Rate:  8.767049003239395
Reward41
too little info


  4%|▍         | 8/200 [00:28<09:19,  2.91s/it]

breaking
Reward0
too little info


  4%|▍         | 9/200 [00:34<11:38,  3.66s/it]

breaking
Avg Frame-Rate:  8.671612197649656
Reward41
too little info


  5%|▌         | 10/200 [00:34<08:40,  2.74s/it]

breaking
Reward0
too little info


  6%|▌         | 11/200 [00:39<11:03,  3.51s/it]

breaking
Avg Frame-Rate:  8.531555058154892
Reward40
too little info


  6%|▌         | 12/200 [00:40<08:17,  2.65s/it]

breaking
Reward0
too little info


  6%|▋         | 13/200 [00:46<11:40,  3.75s/it]

breaking
Avg Frame-Rate:  8.754133701205712
Reward50
too little info


  7%|▋         | 14/200 [00:47<08:44,  2.82s/it]

breaking
Reward0
too little info


  8%|▊         | 15/200 [00:53<11:57,  3.88s/it]

breaking
Avg Frame-Rate:  8.87428840827978
Reward51
too little info


  8%|▊         | 16/200 [00:54<08:56,  2.91s/it]

breaking
Reward0
too little info


  8%|▊         | 17/200 [00:59<11:00,  3.61s/it]

breaking
Avg Frame-Rate:  8.835813975187895
Reward41
too little info


  9%|▉         | 18/200 [01:00<08:15,  2.72s/it]

breaking
Reward0
too little info
breaking
Avg Frame-Rate:  8.78162154747946
Reward41


 10%|▉         | 19/200 [01:09<13:34,  4.50s/it]

LOSS:  51.72956466674805
breaking
Avg Frame-Rate:  8.726524572494517
Reward41


 10%|█         | 20/200 [01:17<16:48,  5.60s/it]

LOSS:  38.16500473022461
breaking
Avg Frame-Rate:  8.698133851033921
Reward41


 10%|█         | 21/200 [01:25<19:04,  6.39s/it]

LOSS:  38.807193756103516
Saved model to disk
breaking
Avg Frame-Rate:  8.759145044634764
Reward41


 11%|█         | 22/200 [01:33<20:33,  6.93s/it]

LOSS:  40.990962982177734
breaking
Avg Frame-Rate:  8.808090969264882
Reward41


 12%|█▏        | 23/200 [01:41<21:31,  7.29s/it]

LOSS:  39.68647003173828
breaking
Avg Frame-Rate:  8.73203555058071
Reward41


 12%|█▏        | 24/200 [01:50<22:10,  7.56s/it]

LOSS:  33.54635238647461
breaking
Avg Frame-Rate:  8.749842917322843
Reward41


 12%|█▎        | 25/200 [01:58<22:35,  7.75s/it]

LOSS:  24.07648277282715
breaking
Avg Frame-Rate:  8.800353351917641
Reward41


 13%|█▎        | 26/200 [02:06<22:49,  7.87s/it]

LOSS:  10.099151611328125
breaking
Avg Frame-Rate:  8.688870791404304
Reward41


 14%|█▎        | 27/200 [02:14<23:01,  7.98s/it]

LOSS:  8.941184997558594
breaking
Avg Frame-Rate:  8.644578611232985
Reward41


 14%|█▍        | 28/200 [02:22<23:05,  8.06s/it]

LOSS:  9.61618423461914
breaking
Avg Frame-Rate:  8.661450449309704
Reward41


 14%|█▍        | 29/200 [02:31<23:06,  8.11s/it]

LOSS:  6.514249324798584
breaking
Avg Frame-Rate:  8.92521351185101
Reward42


 15%|█▌        | 30/200 [02:39<23:01,  8.13s/it]

LOSS:  7.9692792892456055
breaking
Avg Frame-Rate:  8.750133198419316
Reward41


 16%|█▌        | 31/200 [02:47<22:55,  8.14s/it]

LOSS:  10.629438400268555
breaking
Avg Frame-Rate:  8.807029995065564
Reward41


 16%|█▌        | 32/200 [02:55<22:47,  8.14s/it]

LOSS:  9.288687705993652
breaking
Avg Frame-Rate:  8.75126761429592
Reward42


 16%|█▋        | 33/200 [03:03<22:46,  8.18s/it]

LOSS:  6.906513214111328
breaking
Avg Frame-Rate:  8.815027426745395
Reward41


 17%|█▋        | 34/200 [03:11<22:36,  8.17s/it]

LOSS:  5.460285663604736
breaking
Avg Frame-Rate:  8.716898737683776
Reward41


 18%|█▊        | 35/200 [03:20<22:29,  8.18s/it]

LOSS:  4.2379326820373535
breaking
Avg Frame-Rate:  8.775351968985921
Reward42


 18%|█▊        | 36/200 [03:28<22:25,  8.20s/it]

LOSS:  3.566936492919922
breaking
Avg Frame-Rate:  8.73753573561483
Reward41


 18%|█▊        | 37/200 [03:36<22:16,  8.20s/it]

LOSS:  2.514003276824951
breaking
Avg Frame-Rate:  8.78015449243797
Reward41


 19%|█▉        | 38/200 [03:44<22:04,  8.18s/it]

LOSS:  2.242849111557007
breaking
Avg Frame-Rate:  8.792529514531646
Reward41


 20%|█▉        | 39/200 [03:52<21:54,  8.17s/it]

LOSS:  1.7994707822799683
breaking
Avg Frame-Rate:  8.596784730150674
Reward40


 20%|██        | 40/200 [04:00<21:44,  8.15s/it]

LOSS:  1.8809466361999512
breaking
Avg Frame-Rate:  7.864984139454614
Reward37


 20%|██        | 41/200 [04:09<21:40,  8.18s/it]

LOSS:  1.7278658151626587
Saved model to disk
breaking
Avg Frame-Rate:  8.770711982552672
Reward41


 21%|██        | 42/200 [04:17<21:32,  8.18s/it]

LOSS:  1.3649406433105469
breaking
Avg Frame-Rate:  8.626324486507125
Reward40


 22%|██▏       | 43/200 [04:25<21:22,  8.17s/it]

LOSS:  1.2315962314605713
breaking
Avg Frame-Rate:  8.70375394750461
Reward41


 22%|██▏       | 44/200 [04:33<21:18,  8.20s/it]

LOSS:  1.2795606851577759
breaking
Avg Frame-Rate:  8.732941936599781
Reward41


 22%|██▎       | 45/200 [04:42<21:11,  8.20s/it]

LOSS:  1.280123233795166
breaking
Avg Frame-Rate:  8.79413787090245
Reward41


 23%|██▎       | 46/200 [04:50<21:01,  8.19s/it]

LOSS:  1.099753975868225
breaking
Avg Frame-Rate:  8.701188652246863
Reward41


 24%|██▎       | 47/200 [04:58<20:54,  8.20s/it]

LOSS:  0.6614117622375488
breaking
Avg Frame-Rate:  8.817201859828474
Reward41


 24%|██▍       | 48/200 [05:06<20:43,  8.18s/it]

LOSS:  0.6043780446052551
breaking
Avg Frame-Rate:  8.762283486717635
Reward41


 24%|██▍       | 49/200 [05:14<20:34,  8.17s/it]

LOSS:  0.8310044407844543
breaking
Avg Frame-Rate:  8.844730321502237
Reward42


 25%|██▌       | 50/200 [05:22<20:28,  8.19s/it]

LOSS:  0.6777085065841675
breaking
Avg Frame-Rate:  8.752351902575063
Reward41


 26%|██▌       | 51/200 [05:31<20:17,  8.17s/it]

LOSS:  0.7011749744415283
breaking
Avg Frame-Rate:  8.819728369141917
Reward41


 26%|██▌       | 52/200 [05:39<20:04,  8.14s/it]

LOSS:  0.6528192758560181
breaking
Avg Frame-Rate:  8.86939360717796
Reward52


 26%|██▋       | 53/200 [05:48<20:47,  8.49s/it]

LOSS:  1.0426324605941772
breaking
Avg Frame-Rate:  8.60028553790154
Reward40


 27%|██▋       | 54/200 [05:56<20:22,  8.37s/it]

LOSS:  0.7829285860061646
breaking
Avg Frame-Rate:  8.774610835175723
Reward41


 28%|██▊       | 55/200 [06:04<20:04,  8.31s/it]

LOSS:  0.7148967385292053
breaking
Avg Frame-Rate:  8.678524482223798
Reward41


 28%|██▊       | 56/200 [06:12<19:50,  8.27s/it]

LOSS:  0.5398073196411133
breaking
Avg Frame-Rate:  8.742083662388275
Reward50


 28%|██▊       | 57/200 [06:22<20:22,  8.55s/it]

LOSS:  0.5222906470298767
breaking
Avg Frame-Rate:  8.557088079339715
Reward40


 29%|██▉       | 58/200 [06:30<19:54,  8.41s/it]

LOSS:  0.63303142786026
breaking
Avg Frame-Rate:  8.712437398175696
Reward41


 30%|██▉       | 59/200 [06:38<19:37,  8.35s/it]

LOSS:  0.9717940092086792
breaking
Avg Frame-Rate:  8.511153719035097
Reward40


 30%|███       | 60/200 [06:46<19:23,  8.31s/it]

LOSS:  0.8744223713874817
breaking
Avg Frame-Rate:  8.887090402191049
Reward42


 30%|███       | 61/200 [06:54<19:11,  8.29s/it]

LOSS:  0.7720317244529724
Saved model to disk
breaking
Avg Frame-Rate:  8.796218920636342
Reward42


 31%|███       | 62/200 [07:03<19:01,  8.27s/it]

LOSS:  0.8254469037055969
breaking
Avg Frame-Rate:  8.767626953229557
Reward41


 32%|███▏      | 63/200 [07:11<18:47,  8.23s/it]

LOSS:  0.9697191715240479
breaking
Avg Frame-Rate:  8.794175059493712
Reward52


 32%|███▏      | 64/200 [07:20<19:26,  8.58s/it]

LOSS:  0.8143443465232849
breaking
Avg Frame-Rate:  8.734609752701894
Reward41


 32%|███▎      | 65/200 [07:28<19:01,  8.46s/it]

LOSS:  0.7981624603271484
breaking
Avg Frame-Rate:  8.790101233665498
Reward41


 33%|███▎      | 66/200 [07:36<18:38,  8.35s/it]

LOSS:  0.6747240424156189
breaking
Avg Frame-Rate:  8.80334418177777
Reward48


 34%|███▎      | 67/200 [07:45<18:52,  8.52s/it]

LOSS:  0.48769310116767883
breaking
Avg Frame-Rate:  8.807466944674392
Reward59


 34%|███▍      | 68/200 [07:55<19:49,  9.01s/it]

LOSS:  0.6950656175613403
breaking
Avg Frame-Rate:  8.927822947976393
Reward51


 34%|███▍      | 69/200 [08:05<19:46,  9.06s/it]

LOSS:  0.8795720338821411
breaking
Avg Frame-Rate:  8.042853166542493
Reward39


 35%|███▌      | 70/200 [08:13<19:10,  8.85s/it]

LOSS:  0.8324873447418213
breaking
Avg Frame-Rate:  8.727285866890751
Reward41


 36%|███▌      | 71/200 [08:21<18:34,  8.64s/it]

LOSS:  0.6352762579917908
breaking
Avg Frame-Rate:  8.894356085338282
Reward51


 36%|███▌      | 72/200 [08:30<18:48,  8.82s/it]

LOSS:  0.6543718576431274
breaking
Avg Frame-Rate:  8.709758462699314
Reward41


 36%|███▋      | 73/200 [08:39<18:14,  8.62s/it]

LOSS:  0.6493698358535767
breaking
Avg Frame-Rate:  8.653430408197002
Reward41


 37%|███▋      | 74/200 [08:47<17:50,  8.50s/it]

LOSS:  0.6751426458358765
breaking
Avg Frame-Rate:  8.790042374733186
Reward41


 38%|███▊      | 75/200 [08:55<17:28,  8.38s/it]

LOSS:  0.49098077416419983
breaking
Avg Frame-Rate:  8.696518195905426
Reward41


 38%|███▊      | 76/200 [09:03<17:11,  8.32s/it]

LOSS:  0.7191115021705627
breaking
Avg Frame-Rate:  8.682748603635156
Reward41


 38%|███▊      | 77/200 [09:11<17:00,  8.29s/it]

LOSS:  0.5725916624069214
breaking
Avg Frame-Rate:  8.46000591036814
Reward41


 39%|███▉      | 78/200 [09:20<16:53,  8.31s/it]

LOSS:  0.37846264243125916
breaking
Avg Frame-Rate:  7.775725826604024
Reward29


 40%|███▉      | 79/200 [09:27<16:22,  8.12s/it]

LOSS:  0.8287690877914429


 40%|███▉      | 79/200 [09:30<14:33,  7.22s/it]


KeyboardInterrupt: 

This next section plots the AI's total reward and the Nueral net learning loss for the AI as a function of the episode number

In [None]:
plt.plot(range(len(plotX)),plotX) 
plt.show()
plt.plot(range(len(agent.loss)), agent.loss) 
plt.show() 