# Deep Q-learning 
Also called Deep Q networks (DNQs). Deep learning versions of Q-learning. 

* With DQNs, instead of a Q Table to look up values, you have a model that you inference (make predictions from), and rather than updating the Q table, you fit (train) your model.

It is a regression model, which typically will output values for each of our possible actions. These values will be continuous float values, and they are directly our Q values.

The complexity is higher, but the memory required is not. 

In [19]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from keras.callbacks import TensorBoard
from collections import deque
import time
import random
import os
from PIL import Image
import cv2

In [32]:
DISCOUNT = 0.99
REPLAY_MEMORY_SIZE = 50000
MIN_REPLAY_MEMORY_SIZE = 1000
MINIBATCH_SIZE = 64
UPDATE_TARGET_EVERY = 5
MODEL_NAME = '2X256'
MIN_REWARD = -200
MEMORY_FRACTION = 0.2

EPISODES = 1000
epsilon = 1
EPSILON_DECAY = 0.99975
MIN_EPSILON = 0.001

AGGREGATE_STATS_EVERY = 50
SHOW_PREVIEW = False

In [28]:
class DQNAgent:
    def __init__(self):
        # Main model
        self.model = self.create_model()
        
        #Target network
        self.target_model = self.create_model()
        self.target_model.set_weights(self.model.get_weights())
        
        # An array with last n steps for training
        self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
        
        # Custom tensorboard object
        self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{MODEL_NAME}-{int(time.time())}")
        
        # Used to count when to update target network with main network's weights
        self.target_update_counter = 0        
        
    def create_model(self):
        model = tf.keras.models.Sequential([
                    tf.keras.layers.Conv2D(50, (3, 3), input_shape=env.OBSERVATION_SPACE_VALUES, activation='relu'),
                    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
                    tf.keras.layers.Dropout(0.2),
                
                    tf.keras.layers.Conv2D(20, (3, 3), input_shape=env.OBSERVATION_SPACE_VALUES, activation='relu'),
                    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
                    tf.keras.layers.Dropout(0.2),
                    
                    tf.keras.layers.Flatten(),
                    tf.keras.layers.Dense(15),
            
                    tf.keras.layers.Dense(env.ACTION_SPACE_SIZE, activation='linear'),
                ])
        
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
            loss='mean_absolute_error',
            metrics=['accuracy',],
        )
        
        return model
    
    # Adds step's data to a memory replay array
    def update_replay_memory(self, transition):
        self.replay_memory.append(transition)
        
    # Queries the main network for Q values given current state
    def get_qs(self, state):
        return self.model.predict(np.array(state).reshape(-1, *state.shape) / 255)[0]
    
    def train(self, terminal_state, step):
        if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
            return
        
        # Get a mini-batch of random samples from the memory replay table
        minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
        
        # Get current states from minibatch, then query NN model for Q values
        current_states = np.array([transition[0] for transition in minibatch]) / 255
        current_qs_list = self.model.predict(current_states)
        
        # Get future states from minibatch, then query NN model for Q values
        # When using target network, query it, otherwise main network should be queried
        new_current_states = np.array([transition[3] for transition in minibatch]) / 255
        future_qs_list = self.target_model.predict(new_current_states)
        
        X = []
        y = []
        
        for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):
            if not done:
                max_future_q = np.max(future_qs_list[index])
                new_q = reward + DISCOUNT * max_future_q
                
            else:
                new_q = reward
                
            current_qs = current_qs_list[index]
            current_qs[action] = new_q
            
            X.append(current_state)
            y.append(current_qs)
            
        self.model.fit(
            np.array(X) / 255, 
            np.array(y), 
            batch_size=MINIBATCH_SIZE, 
            verbose=0, 
            shuffle=False, 
            callbacks=[self.tensorboard] if terminal_state else None)      
        
        # Update target network counter every episode
        if terminal_state:
            self.target_update_counter += 1
        
        # If counter reaches set value, update target network with weights of main network
        if self.target_update_counter > UPDATE_TARGET_EVERY:
            self.target_model.set_weights(self.model.get_weights())
            self.target_update_counter = 0


In [34]:
# Own Tensorboard class
class ModifiedTensorBoard(TensorBoard):
    # Overriding init to set initial step and writer
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.step = 1
        self.writer = tf.summary.create_file_writer(self.log_dir)
        self._log_write_dir = self.log_dir
        
    # Stop creating default log writer
    def set_model(self, model):
        self.model = model

        self._train_dir = os.path.join(self._log_write_dir, 'train')
        self._train_step = self.model._train_counter

        self._val_dir = os.path.join(self._log_write_dir, 'validation')
        self._val_step = self.model._test_counter

        self._should_write_train_graph = False
    
    # Saves logs with our step no.
    def on_epoch_end(self, epoch, logs=None):
        self.update_stats(**logs)
        
    # We train for one batch only, no need to save anything at epoch end
    def on_batch_end(self, batch, logs=None):
        pass
    
    # Won't close writer
    def on_train_end(self, _):
        pass
    
    # Custom method for saving own metrics
    # Creates writer, writes custom metrics and closes writer
    def update_stats(self, **stats):
        self._write_logs(stats, self.step)
        
    def _write_logs(self, logs, index):
        with self.writer.as_default():
            for name, value in logs.items():
                tf.summary.scalar(name, value, step=index)
                self.step += 1
                self.writer.flush()

In [6]:
class Blob:
    def __init__(self, size):
        self.size = size
        self.x = np.random.randint(0, size)
        self.y = np.random.randint(0, size)

    def __str__(self):
        return f"Blob ({self.x}, {self.y})"

    def __sub__(self, other):
        return (self.x-other.x, self.y-other.y)

    def __eq__(self, other):
        return self.x == other.x and self.y == other.y

    def action(self, choice):
        '''
        Gives us 9 total movement options. (0,1,2,3,4,5,6,7,8)
        '''
        if choice == 0:
            self.move(x=1, y=1)
        elif choice == 1:
            self.move(x=-1, y=-1)
        elif choice == 2:
            self.move(x=-1, y=1)
        elif choice == 3:
            self.move(x=1, y=-1)

        elif choice == 4:
            self.move(x=1, y=0)
        elif choice == 5:
            self.move(x=-1, y=0)

        elif choice == 6:
            self.move(x=0, y=1)
        elif choice == 7:
            self.move(x=0, y=-1)

        elif choice == 8:
            self.move(x=0, y=0)

    def move(self, x=False, y=False):

        # If no value for x, move randomly
        if not x:
            self.x += np.random.randint(-1, 2)
        else:
            self.x += x

        # If no value for y, move randomly
        if not y:
            self.y += np.random.randint(-1, 2)
        else:
            self.y += y

        # If we are out of bounds, fix!
        if self.x < 0:
            self.x = 0
        elif self.x > self.size-1:
            self.x = self.size-1
        if self.y < 0:
            self.y = 0
        elif self.y > self.size-1:
            self.y = self.size-1

class BlobEnv:
    SIZE = 10
    RETURN_IMAGES = True
    MOVE_PENALTY = 1
    ENEMY_PENALTY = 300
    FOOD_REWARD = 25
    OBSERVATION_SPACE_VALUES = (SIZE, SIZE, 3)  # 4
    ACTION_SPACE_SIZE = 9
    PLAYER_N = 1  # player key in dict
    FOOD_N = 2  # food key in dict
    ENEMY_N = 3  # enemy key in dict
    # the dict! (colors)
    d = {1: (255, 175, 0),
         2: (0, 255, 0),
         3: (0, 0, 255)}

    def reset(self):
        self.player = Blob(self.SIZE)
        self.food = Blob(self.SIZE)
        while self.food == self.player:
            self.food = Blob(self.SIZE)
        self.enemy = Blob(self.SIZE)
        while self.enemy == self.player or self.enemy == self.food:
            self.enemy = Blob(self.SIZE)

        self.episode_step = 0

        if self.RETURN_IMAGES:
            observation = np.array(self.get_image())
        else:
            observation = (self.player-self.food) + (self.player-self.enemy)
        return observation

    def step(self, action):
        self.episode_step += 1
        self.player.action(action)

        #### MAYBE ###
        #enemy.move()
        #food.move()
        ##############

        if self.RETURN_IMAGES:
            new_observation = np.array(self.get_image())
        else:
            new_observation = (self.player-self.food) + (self.player-self.enemy)

        if self.player == self.enemy:
            reward = -self.ENEMY_PENALTY
        elif self.player == self.food:
            reward = self.FOOD_REWARD
        else:
            reward = -self.MOVE_PENALTY

        done = False
        if reward == self.FOOD_REWARD or reward == -self.ENEMY_PENALTY or self.episode_step >= 200:
            done = True

        return new_observation, reward, done

    def render(self):
        img = self.get_image()
        img = img.resize((300, 300))  # resizing so we can see our agent in all its glory.
        cv2.imshow("image", np.array(img))  # show it!
        cv2.waitKey(1)

    # FOR CNN #
    def get_image(self):
        env = np.zeros((self.SIZE, self.SIZE, 3), dtype=np.uint8)  # starts an rbg of our size
        env[self.food.x][self.food.y] = self.d[self.FOOD_N]  # sets the food location tile to green color
        env[self.enemy.x][self.enemy.y] = self.d[self.ENEMY_N]  # sets the enemy location to red
        env[self.player.x][self.player.y] = self.d[self.PLAYER_N]  # sets the player tile to blue
        img = Image.fromarray(env, 'RGB')  # reading to rgb. Apparently. Even tho color definitions are bgr. ???
        return img

In [11]:
env = BlobEnv()

ep_rewards = [-200]
np.random.seed(1)
tf.random.set_seed(1)

if not os.path.isdir('models'):
    os.makedirs('models')

In [35]:
agent = DQNAgent()

for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'):
    agent.tensorboard.step = episode
    episode_reward = 0
    step = 1
    current_state = env.reset()
    
    done = False
    while not done:
        if np.random.random() > epsilon:
            action = np.argmax(agent.get_qs(current_state))
        else:
            action = np.random.randint(0, env.ACTION_SPACE_SIZE)
        
        new_state, reward, done = env.step(action)
        episode_reward += reward
        
        if SHOW_PREVIEW and not episode % AGGREGATE_STATS_EVERY:
            env.render()
            
        agent.update_replay_memory((current_state, action, reward, new_state, done))
        agent.train(done, step)
        current_state = new_state
        step += 1
        
    ep_rewards.append(episode_reward)
    if not episode % AGGREGATE_STATS_EVERY or episode == 1:
        average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:]) / len(ep_rewards[-AGGREGATE_STATS_EVERY:])
        min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
        max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
        agent.tensorboard.update_stats(reward_avg=average_reward, 
                                       reward_min=min_reward,
                                       reward_max=max_reward,
                                       epsilon=epsilon,
                                      )
        
        if average_reward >= MIN_REWARD:
            agent.model.save(f'models/{MODEL_NAME}_{max_reward:_>7.2f}max_{average_reward:_>7.2f}min_{int(time.time())}.model')
            
    if epsilon > MIN_EPSILON:
        epsilon *= EPSILON_DECAY
        epsilon = max(MIN_EPSILON, epsilon)



INFO:tensorflow:Assets written to: models/2X256___25.00max_-193.34min_1657171983.model\assets


INFO:tensorflow:Assets written to: models/2X256___25.00max_-193.34min_1657171983.model\assets
  0%|                                                                           | 1/1000 [00:01<17:44,  1.07s/episodes]



  1%|4                                                                          | 6/1000 [00:01<02:27,  6.73episodes/s]



  1%|7                                                                         | 10/1000 [00:01<01:32, 10.74episodes/s]



  1%|#                                                                         | 14/1000 [00:01<01:09, 14.26episodes/s]



  1%|#                                                                         | 14/1000 [00:17<01:09, 14.26episodes/s]



  2%|#2                                                                        | 17/1000 [00:29<48:54,  2.98s/episodes]



  2%|#2                                                                      | 18/1000 [00:41<1:05:57,  4.03s/episodes]



  2%|#3                                                                      | 19/1000 [00:45<1:07:39,  4.14s/episodes]



  2%|#3                                                                      | 19/1000 [00:57<1:07:39,  4.14s/episodes]



  2%|#5                                                                      | 21/1000 [01:18<2:07:58,  7.84s/episodes]



  2%|#5                                                                      | 22/1000 [01:29<2:17:03,  8.41s/episodes]



  2%|#6                                                                      | 23/1000 [01:31<1:56:44,  7.17s/episodes]



  2%|#8                                                                      | 25/1000 [01:41<1:42:38,  6.32s/episodes]



  3%|#8                                                                      | 26/1000 [01:51<1:56:26,  7.17s/episodes]



  3%|#9                                                                      | 27/1000 [02:01<2:03:59,  7.65s/episodes]



  3%|##                                                                      | 28/1000 [02:06<1:53:32,  7.01s/episodes]



  3%|##                                                                      | 29/1000 [02:15<2:01:52,  7.53s/episodes]



  3%|##1                                                                     | 30/1000 [02:28<2:29:26,  9.24s/episodes]



  3%|##2                                                                     | 31/1000 [02:36<2:22:27,  8.82s/episodes]



  3%|##3                                                                     | 32/1000 [03:03<3:43:47, 13.87s/episodes]



  3%|##3                                                                     | 33/1000 [03:07<2:59:13, 11.12s/episodes]



  3%|##4                                                                     | 34/1000 [03:18<2:57:21, 11.02s/episodes]



  4%|##5                                                                     | 35/1000 [03:36<3:29:48, 13.05s/episodes]



  4%|##5                                                                     | 36/1000 [03:43<3:02:00, 11.33s/episodes]



  4%|##6                                                                     | 37/1000 [03:52<2:48:32, 10.50s/episodes]



  4%|##7                                                                     | 38/1000 [03:53<2:06:08,  7.87s/episodes]



  4%|##8                                                                     | 39/1000 [04:01<2:05:47,  7.85s/episodes]



  4%|##8                                                                     | 40/1000 [04:19<2:53:01, 10.81s/episodes]



  4%|##9                                                                     | 41/1000 [04:20<2:04:59,  7.82s/episodes]



  4%|###                                                                     | 42/1000 [04:21<1:33:41,  5.87s/episodes]



  4%|###                                                                     | 43/1000 [04:30<1:47:20,  6.73s/episodes]



  4%|###1                                                                    | 44/1000 [04:50<2:51:21, 10.75s/episodes]



  4%|###2                                                                    | 45/1000 [05:00<2:47:49, 10.54s/episodes]



  5%|###3                                                                    | 46/1000 [05:12<2:55:21, 11.03s/episodes]



  5%|###3                                                                    | 47/1000 [05:18<2:31:00,  9.51s/episodes]



  5%|###4                                                                    | 48/1000 [05:34<3:03:41, 11.58s/episodes]



  5%|###5                                                                    | 49/1000 [05:58<4:02:42, 15.31s/episodes]





INFO:tensorflow:Assets written to: models/2X256___23.00max_-183.02min_1657172348.model\assets


INFO:tensorflow:Assets written to: models/2X256___23.00max_-183.02min_1657172348.model\assets
  5%|###6                                                                    | 50/1000 [06:05<3:23:14, 12.84s/episodes]



  5%|###6                                                                    | 51/1000 [06:20<3:30:43, 13.32s/episodes]



  5%|###7                                                                    | 52/1000 [06:29<3:09:09, 11.97s/episodes]



  5%|###8                                                                    | 53/1000 [06:29<2:13:15,  8.44s/episodes]



  5%|###8                                                                    | 54/1000 [06:49<3:09:29, 12.02s/episodes]



  6%|###9                                                                    | 55/1000 [06:51<2:19:16,  8.84s/episodes]



  6%|####                                                                    | 56/1000 [07:00<2:18:52,  8.83s/episodes]



  6%|####1                                                                   | 57/1000 [07:11<2:31:06,  9.61s/episodes]



  6%|####1                                                                   | 58/1000 [07:19<2:21:56,  9.04s/episodes]



  6%|####2                                                                   | 59/1000 [07:23<2:01:40,  7.76s/episodes]



  6%|####3                                                                   | 60/1000 [07:31<1:59:20,  7.62s/episodes]



  6%|####3                                                                   | 61/1000 [07:31<1:26:47,  5.55s/episodes]



  6%|####4                                                                   | 62/1000 [07:41<1:47:16,  6.86s/episodes]



  6%|####5                                                                   | 63/1000 [08:00<2:40:55, 10.30s/episodes]



  6%|####6                                                                   | 64/1000 [08:12<2:48:34, 10.81s/episodes]



  6%|####6                                                                   | 65/1000 [08:12<2:01:41,  7.81s/episodes]



  7%|####7                                                                   | 66/1000 [08:13<1:28:54,  5.71s/episodes]



  7%|####8                                                                   | 67/1000 [08:14<1:04:19,  4.14s/episodes]



  7%|#####                                                                     | 68/1000 [08:14<46:05,  2.97s/episodes]



  7%|#####1                                                                    | 69/1000 [08:17<45:30,  2.93s/episodes]



  7%|#####                                                                   | 70/1000 [08:36<2:01:27,  7.84s/episodes]



  7%|#####1                                                                  | 71/1000 [08:39<1:38:12,  6.34s/episodes]



  7%|#####1                                                                  | 72/1000 [08:40<1:12:04,  4.66s/episodes]



  7%|#####2                                                                  | 73/1000 [08:46<1:20:46,  5.23s/episodes]



  7%|#####3                                                                  | 74/1000 [08:57<1:44:34,  6.78s/episodes]



  8%|#####3                                                                  | 75/1000 [09:05<1:53:31,  7.36s/episodes]



  8%|#####4                                                                  | 76/1000 [09:31<3:19:30, 12.95s/episodes]



  8%|#####5                                                                  | 77/1000 [09:38<2:50:49, 11.11s/episodes]



  8%|#####6                                                                  | 78/1000 [09:39<2:04:51,  8.13s/episodes]



  8%|#####6                                                                  | 79/1000 [10:05<3:23:47, 13.28s/episodes]



  8%|#####7                                                                  | 80/1000 [10:15<3:09:09, 12.34s/episodes]



  8%|#####8                                                                  | 81/1000 [10:21<2:40:51, 10.50s/episodes]



  8%|#####9                                                                  | 82/1000 [10:22<1:59:05,  7.78s/episodes]



  8%|#####9                                                                  | 83/1000 [10:24<1:29:21,  5.85s/episodes]



  8%|######                                                                  | 84/1000 [10:49<2:59:43, 11.77s/episodes]



  8%|######1                                                                 | 85/1000 [10:54<2:25:51,  9.56s/episodes]



  9%|######1                                                                 | 86/1000 [11:10<2:54:50, 11.48s/episodes]



  9%|######2                                                                 | 87/1000 [11:15<2:27:27,  9.69s/episodes]



  9%|######3                                                                 | 88/1000 [11:41<3:40:35, 14.51s/episodes]



  9%|######4                                                                 | 89/1000 [11:43<2:42:04, 10.67s/episodes]



  9%|######4                                                                 | 90/1000 [11:51<2:32:51, 10.08s/episodes]



  9%|######5                                                                 | 91/1000 [12:17<3:43:13, 14.73s/episodes]



  9%|######6                                                                 | 92/1000 [12:18<2:40:01, 10.57s/episodes]



  9%|######6                                                                 | 93/1000 [12:26<2:30:20,  9.95s/episodes]



  9%|######7                                                                 | 94/1000 [12:34<2:19:17,  9.22s/episodes]



 10%|######8                                                                 | 95/1000 [12:35<1:41:50,  6.75s/episodes]



 10%|######9                                                                 | 96/1000 [12:39<1:28:41,  5.89s/episodes]



 10%|######9                                                                 | 97/1000 [12:41<1:10:40,  4.70s/episodes]



 10%|#######                                                                 | 98/1000 [12:46<1:12:20,  4.81s/episodes]



 10%|#######3                                                                  | 99/1000 [12:46<53:46,  3.58s/episodes]





INFO:tensorflow:Assets written to: models/2X256___25.00max_-157.52min_1657172755.model\assets


INFO:tensorflow:Assets written to: models/2X256___25.00max_-157.52min_1657172755.model\assets
 10%|#######1                                                               | 100/1000 [12:53<1:06:57,  4.46s/episodes]



 10%|#######3                                                                 | 101/1000 [12:54<53:05,  3.54s/episodes]



 10%|#######4                                                                 | 102/1000 [12:56<43:28,  2.90s/episodes]



 10%|#######5                                                                 | 103/1000 [12:57<35:49,  2.40s/episodes]



 10%|#######3                                                               | 104/1000 [13:09<1:19:54,  5.35s/episodes]



 10%|#######4                                                               | 105/1000 [13:13<1:13:42,  4.94s/episodes]



 11%|#######5                                                               | 106/1000 [13:27<1:50:42,  7.43s/episodes]



 11%|#######5                                                               | 107/1000 [13:30<1:32:53,  6.24s/episodes]



 11%|#######6                                                               | 108/1000 [13:38<1:42:54,  6.92s/episodes]



 11%|#######7                                                               | 109/1000 [13:40<1:18:02,  5.25s/episodes]



 11%|#######8                                                               | 110/1000 [14:00<2:24:12,  9.72s/episodes]



 11%|#######8                                                               | 111/1000 [14:02<1:49:56,  7.42s/episodes]



 11%|#######9                                                               | 112/1000 [14:23<2:49:42, 11.47s/episodes]



 11%|########                                                               | 113/1000 [14:50<3:57:06, 16.04s/episodes]



 11%|########                                                               | 114/1000 [14:59<3:27:44, 14.07s/episodes]



 12%|########1                                                              | 115/1000 [15:13<3:27:18, 14.05s/episodes]



 12%|########2                                                              | 116/1000 [15:16<2:35:42, 10.57s/episodes]



 12%|########3                                                              | 117/1000 [15:24<2:24:40,  9.83s/episodes]



 12%|########3                                                              | 118/1000 [15:47<3:24:42, 13.93s/episodes]



 12%|########4                                                              | 119/1000 [15:48<2:25:58,  9.94s/episodes]



 12%|########5                                                              | 120/1000 [15:49<1:47:47,  7.35s/episodes]



 12%|########5                                                              | 121/1000 [15:54<1:36:35,  6.59s/episodes]



 12%|########6                                                              | 122/1000 [15:58<1:26:28,  5.91s/episodes]



 12%|########7                                                              | 123/1000 [16:00<1:08:09,  4.66s/episodes]



 12%|########8                                                              | 124/1000 [16:26<2:40:37, 11.00s/episodes]



 12%|########8                                                              | 125/1000 [16:28<2:01:02,  8.30s/episodes]



 13%|########9                                                              | 126/1000 [16:29<1:30:30,  6.21s/episodes]



 13%|#########                                                              | 127/1000 [16:30<1:08:17,  4.69s/episodes]



 13%|#########                                                              | 128/1000 [16:36<1:13:15,  5.04s/episodes]



 13%|#########1                                                             | 129/1000 [16:41<1:13:35,  5.07s/episodes]



 13%|#########2                                                             | 130/1000 [16:48<1:22:02,  5.66s/episodes]



 13%|#########3                                                             | 131/1000 [16:55<1:27:52,  6.07s/episodes]



 13%|#########3                                                             | 132/1000 [17:02<1:32:09,  6.37s/episodes]



 13%|#########4                                                             | 133/1000 [17:25<2:41:22, 11.17s/episodes]



 13%|#########5                                                             | 134/1000 [17:27<2:04:27,  8.62s/episodes]



 14%|#########5                                                             | 135/1000 [17:28<1:28:53,  6.17s/episodes]



 14%|#########6                                                             | 136/1000 [17:37<1:43:23,  7.18s/episodes]



 14%|#########7                                                             | 137/1000 [17:38<1:15:47,  5.27s/episodes]



 14%|#########7                                                             | 138/1000 [18:04<2:45:03, 11.49s/episodes]



 14%|#########8                                                             | 139/1000 [18:17<2:51:41, 11.96s/episodes]



 14%|#########9                                                             | 140/1000 [18:33<3:08:27, 13.15s/episodes]



 14%|##########                                                             | 141/1000 [18:36<2:21:57,  9.92s/episodes]



 14%|##########                                                             | 142/1000 [18:45<2:18:53,  9.71s/episodes]



 14%|##########1                                                            | 143/1000 [19:00<2:40:57, 11.27s/episodes]



 14%|##########2                                                            | 144/1000 [19:05<2:14:02,  9.40s/episodes]



 14%|##########2                                                            | 145/1000 [19:30<3:20:33, 14.07s/episodes]



 15%|##########3                                                            | 146/1000 [19:31<2:26:17, 10.28s/episodes]



 15%|##########4                                                            | 147/1000 [19:46<2:45:24, 11.64s/episodes]



 15%|##########5                                                            | 148/1000 [20:12<3:44:27, 15.81s/episodes]



 15%|##########5                                                            | 149/1000 [20:37<4:23:36, 18.59s/episodes]





INFO:tensorflow:Assets written to: models/2X256___23.00max_-179.96min_1657173238.model\assets


INFO:tensorflow:Assets written to: models/2X256___23.00max_-179.96min_1657173238.model\assets
 15%|##########6                                                            | 150/1000 [20:56<4:26:27, 18.81s/episodes]



 15%|##########7                                                            | 151/1000 [21:22<4:58:31, 21.10s/episodes]



 15%|##########7                                                            | 152/1000 [21:28<3:54:04, 16.56s/episodes]



 15%|##########8                                                            | 153/1000 [21:35<3:12:41, 13.65s/episodes]



 15%|##########9                                                            | 154/1000 [21:37<2:22:56, 10.14s/episodes]



 16%|###########                                                            | 155/1000 [21:38<1:41:59,  7.24s/episodes]



 16%|###########                                                            | 156/1000 [21:40<1:21:10,  5.77s/episodes]



 16%|###########4                                                             | 157/1000 [21:41<59:36,  4.24s/episodes]



 16%|###########2                                                           | 158/1000 [22:06<2:29:52, 10.68s/episodes]



 16%|###########2                                                           | 159/1000 [22:33<3:36:24, 15.44s/episodes]



 16%|###########3                                                           | 160/1000 [22:33<2:32:41, 10.91s/episodes]



 16%|###########4                                                           | 161/1000 [22:59<3:33:43, 15.28s/episodes]



 16%|###########5                                                           | 162/1000 [23:02<2:44:48, 11.80s/episodes]



 16%|###########5                                                           | 163/1000 [23:09<2:23:59, 10.32s/episodes]



 16%|###########6                                                           | 164/1000 [23:17<2:11:59,  9.47s/episodes]



 16%|###########7                                                           | 165/1000 [23:18<1:38:07,  7.05s/episodes]



 17%|###########7                                                           | 166/1000 [23:43<2:51:02, 12.31s/episodes]



 17%|###########8                                                           | 167/1000 [23:44<2:05:53,  9.07s/episodes]



 17%|###########9                                                           | 168/1000 [24:10<3:16:10, 14.15s/episodes]



 17%|###########9                                                           | 169/1000 [24:20<2:55:43, 12.69s/episodes]



 17%|############                                                           | 170/1000 [24:29<2:42:29, 11.75s/episodes]



 17%|############1                                                          | 171/1000 [24:30<1:58:41,  8.59s/episodes]



 17%|############2                                                          | 172/1000 [24:57<3:14:22, 14.09s/episodes]



 17%|############2                                                          | 173/1000 [25:01<2:29:52, 10.87s/episodes]



 17%|############3                                                          | 174/1000 [25:14<2:40:41, 11.67s/episodes]



 18%|############4                                                          | 175/1000 [25:18<2:07:25,  9.27s/episodes]



 18%|############4                                                          | 176/1000 [25:31<2:24:25, 10.52s/episodes]



 18%|############5                                                          | 177/1000 [25:37<2:05:49,  9.17s/episodes]



 18%|############6                                                          | 178/1000 [25:44<1:53:48,  8.31s/episodes]



 18%|############7                                                          | 179/1000 [25:46<1:30:56,  6.65s/episodes]



 18%|############7                                                          | 180/1000 [25:56<1:45:17,  7.70s/episodes]



 18%|############8                                                          | 181/1000 [25:58<1:19:13,  5.80s/episodes]



 18%|############9                                                          | 182/1000 [26:05<1:23:46,  6.14s/episodes]



 18%|############9                                                          | 183/1000 [26:06<1:02:42,  4.61s/episodes]



 18%|#############                                                          | 184/1000 [26:17<1:28:40,  6.52s/episodes]



 18%|#############1                                                         | 185/1000 [26:25<1:34:05,  6.93s/episodes]



 19%|#############2                                                         | 186/1000 [26:50<2:50:40, 12.58s/episodes]



 19%|#############2                                                         | 187/1000 [26:53<2:08:15,  9.47s/episodes]



 19%|#############3                                                         | 188/1000 [27:15<2:59:42, 13.28s/episodes]



 19%|#############4                                                         | 189/1000 [27:25<2:45:15, 12.23s/episodes]



 19%|#############4                                                         | 190/1000 [27:29<2:13:31,  9.89s/episodes]



 19%|#############5                                                         | 191/1000 [27:40<2:18:18, 10.26s/episodes]



 19%|#############6                                                         | 192/1000 [28:06<3:19:13, 14.79s/episodes]



 19%|#############7                                                         | 193/1000 [28:08<2:27:36, 10.97s/episodes]



 19%|#############7                                                         | 194/1000 [28:15<2:14:49, 10.04s/episodes]



 20%|#############8                                                         | 195/1000 [28:16<1:37:58,  7.30s/episodes]



 20%|#############9                                                         | 196/1000 [28:22<1:30:52,  6.78s/episodes]



 20%|#############9                                                         | 197/1000 [28:24<1:13:46,  5.51s/episodes]



 20%|##############                                                         | 198/1000 [28:33<1:25:13,  6.38s/episodes]



 20%|##############1                                                        | 199/1000 [28:48<2:00:05,  9.00s/episodes]





INFO:tensorflow:Assets written to: models/2X256___24.00max_-199.32min_1657173712.model\assets


INFO:tensorflow:Assets written to: models/2X256___24.00max_-199.32min_1657173712.model\assets
 20%|##############2                                                        | 200/1000 [28:50<1:31:12,  6.84s/episodes]



 20%|##############2                                                        | 201/1000 [29:09<2:20:13, 10.53s/episodes]



 20%|##############3                                                        | 202/1000 [29:29<2:56:20, 13.26s/episodes]



 20%|##############4                                                        | 203/1000 [29:37<2:37:02, 11.82s/episodes]



 20%|##############4                                                        | 204/1000 [29:53<2:54:03, 13.12s/episodes]



 20%|##############5                                                        | 205/1000 [30:01<2:31:07, 11.41s/episodes]



 21%|##############6                                                        | 206/1000 [30:11<2:27:07, 11.12s/episodes]



 21%|##############6                                                        | 207/1000 [30:20<2:19:42, 10.57s/episodes]



 21%|##############7                                                        | 208/1000 [30:23<1:49:06,  8.27s/episodes]



 21%|##############8                                                        | 209/1000 [30:29<1:39:56,  7.58s/episodes]



 21%|##############9                                                        | 210/1000 [30:37<1:39:43,  7.57s/episodes]



 21%|##############9                                                        | 211/1000 [30:45<1:41:39,  7.73s/episodes]



 21%|###############                                                        | 212/1000 [30:47<1:18:33,  5.98s/episodes]



 21%|###############1                                                       | 213/1000 [30:50<1:07:11,  5.12s/episodes]



 21%|###############1                                                       | 214/1000 [31:01<1:32:43,  7.08s/episodes]



 22%|##############9                                                      | 216/1000 [49:24<51:15:49, 235.40s/episodes]



 22%|##############9                                                      | 217/1000 [49:46<37:14:50, 171.25s/episodes]



 22%|###############                                                      | 218/1000 [49:48<26:10:37, 120.51s/episodes]



 22%|###############3                                                      | 219/1000 [50:15<20:02:52, 92.41s/episodes]



 22%|###############4                                                      | 220/1000 [50:43<15:51:39, 73.20s/episodes]



 22%|###############4                                                      | 221/1000 [51:10<12:51:18, 59.41s/episodes]



 22%|###############5                                                      | 222/1000 [51:28<10:05:47, 46.72s/episodes]



 22%|###############8                                                       | 223/1000 [51:59<9:07:01, 42.24s/episodes]



 22%|###############9                                                       | 224/1000 [52:00<6:24:12, 29.71s/episodes]



 22%|###############9                                                       | 225/1000 [52:04<4:44:17, 22.01s/episodes]



 23%|################                                                       | 226/1000 [52:18<4:14:07, 19.70s/episodes]



 23%|################1                                                      | 227/1000 [52:20<3:04:32, 14.32s/episodes]



 23%|################1                                                      | 228/1000 [52:40<3:26:38, 16.06s/episodes]



 23%|################2                                                      | 229/1000 [52:44<2:41:01, 12.53s/episodes]



 23%|################2                                                      | 229/1000 [53:03<2:58:38, 13.90s/episodes]


KeyboardInterrupt: 

In [41]:
%load_ext tensorboard
%tensorboard --logdir "logs/"

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 12460), started 0:00:22 ago. (Use '!kill 12460' to kill it.)