<a href="https://colab.research.google.com/github/Sumyak/RL-Mario-Agent/blob/main/RL_mario_Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install nes-py==0.2.6
!pip install gym-super-mario-bros
!apt-get update
!apt-get install ffmpeg libsm6 libxext6  -y
!apt install -y libgl1-mesa-glx
!pip install opencv-python

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting nes-py==0.2.6
  Using cached nes_py-0.2.6-cp37-cp37m-linux_x86_64.whl
Installing collected packages: nes-py
  Attempting uninstall: nes-py
    Found existing installation: nes-py 8.2.1
    Uninstalling nes-py-8.2.1:
      Successfully uninstalled nes-py-8.2.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gym-super-mario-bros 7.4.0 requires nes-py>=8.1.4, but you have nes-py 0.2.6 which is incompatible.[0m
Successfully installed nes-py-0.2.6
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting nes-py>=8.1.4
  Using cached nes_py-8.2.1-cp37-cp37m-linux_x86_64.whl
Installing collected packages: nes-py
  Attempting uninstall: nes-py
    Found existing installation: nes-py 0.2.6
    Uninstallin

In [None]:
import torch
import torch.nn as nn
import random
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from tqdm import tqdm
import pickle 
from gym_super_mario_bros.actions import RIGHT_ONLY
import gym
import numpy as np
import collections 
import cv2
import matplotlib.pyplot as plt
import pylab as pl
from IPython import display
import time

In [None]:
class MaxAndSkipEnv(gym.Wrapper):
    def __init__(self, env=None, skip=4):
        """Return only every `skip`-th frame"""
        super(MaxAndSkipEnv, self).__init__(env)
        # most recent raw observations (for max pooling across time steps)
        self._obs_buffer = collections.deque(maxlen=2)
        self._skip = skip

    def step(self, action):
        total_reward = 0.0
        done = None
        for _ in range(self._skip):
            obs, reward, done, info = self.env.step(action)
            self._obs_buffer.append(obs)
            total_reward += reward
            if done:
                break
        max_frame = np.max(np.stack(self._obs_buffer), axis=0)
        return max_frame, total_reward, done, info

    def reset(self):
        """Clear past frame buffer and init to first obs"""
        self._obs_buffer.clear()
        obs = self.env.reset()
        self._obs_buffer.append(obs)
        return obs


class ProcessFrame84(gym.ObservationWrapper):
    """
    Downsamples image to 84x84
    Greyscales image

    Returns numpy array
    """
    def __init__(self, env=None):
        super(ProcessFrame84, self).__init__(env)
        self.observation_space = gym.spaces.Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)

    def observation(self, obs):
        return ProcessFrame84.process(obs)

    @staticmethod
    def process(frame):
        if frame.size == 240 * 256 * 3:
            img = np.reshape(frame, [240, 256, 3]).astype(np.float32)
        else:
            assert False, "Unknown resolution."
        img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :, 2] * 0.114
        resized_screen = cv2.resize(img, (84, 110), interpolation=cv2.INTER_AREA)
        x_t = resized_screen[18:102, :]
        x_t = np.reshape(x_t, [84, 84, 1])
        return x_t.astype(np.uint8)


class ImageToPyTorch(gym.ObservationWrapper):
    def __init__(self, env):
        super(ImageToPyTorch, self).__init__(env)
        old_shape = self.observation_space.shape
        self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(old_shape[-1], old_shape[0], old_shape[1]),
                                                dtype=np.float32)

    def observation(self, observation):
        return np.moveaxis(observation, 2, 0)


class ScaledFloatFrame(gym.ObservationWrapper):
    """Normalize pixel values in frame --> 0 to 1"""
    def observation(self, obs):
        return np.array(obs).astype(np.float32) / 255.0


class BufferWrapper(gym.ObservationWrapper):
    def __init__(self, env, n_steps, dtype=np.float32):
        super(BufferWrapper, self).__init__(env)
        self.dtype = dtype
        old_space = env.observation_space
        self.observation_space = gym.spaces.Box(old_space.low.repeat(n_steps, axis=0),
                                                old_space.high.repeat(n_steps, axis=0), dtype=dtype)

    def reset(self):
        self.buffer = np.zeros_like(self.observation_space.low, dtype=self.dtype)
        return self.observation(self.env.reset())

    def observation(self, observation):
        self.buffer[:-1] = self.buffer[1:]
        self.buffer[-1] = observation
        return self.buffer


def make_env(env):
    env = MaxAndSkipEnv(env)
    env = ProcessFrame84(env)
    env = ImageToPyTorch(env)
    env = BufferWrapper(env, 4)
    env = ScaledFloatFrame(env)
    return JoypadSpace(env, RIGHT_ONLY)

In [None]:
class DQN(nn.Module):

    def __init__(self, input_shape, n_actions):
        super(DQN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU()
        )

        conv_out_size = self._get_conv_out(input_shape)
        self.fc = nn.Sequential(
            nn.Linear(conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, n_actions)
        )
    
    def _get_conv_out(self, shape):
        o = self.conv(torch.zeros(1, *shape))
        return int(np.prod(o.size()))

    def forward(self, x):
        conv_out = self.conv(x).view(x.size()[0], -1)
        return self.fc(conv_out)
    

class Agent:

    def __init__(self, state_space, action_space, trained):

        self.trained = trained
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        
        # Define DQN Layers
        self.state_space = state_space
        self.action_space = action_space
        self.local_net = DQN(state_space, action_space).to(self.device)
        self.target_net = DQN(state_space, action_space).to(self.device)
            
        if self.trained:
            self.local_net.load_state_dict(torch.load("dq1.pt", map_location=torch.device(self.device)))
            self.target_net.load_state_dict(torch.load("dq2.pt", map_location=torch.device(self.device)))
                    
        self.optimizer = torch.optim.Adam(self.local_net.parameters(), lr=lr)
        self.copy_weights = 5000  # Copy the local model weights into the target network every 5000 steps
        self.step = 0
        

        # Create memory
        self.max_memory_size = max_memory_size
        
        # Reloading memory
        if self.trained:
            self.STATE_MEM = torch.load("STATE_MEM.pt")
            self.ACTION_MEM = torch.load("ACTION_MEM.pt")
            self.REWARD_MEM = torch.load("REWARD_MEM.pt")
            self.STATE2_MEM = torch.load("STATE2_MEM.pt")
            self.DONE_MEM = torch.load("DONE_MEM.pt")
            with open("ending_position.pkl", 'rb') as f:
                self.ending_position = pickle.load(f)
            with open("num_in_queue.pkl", 'rb') as f:
                self.num_in_queue = pickle.load(f)

        else:
            self.STATE_MEM = torch.zeros(max_memory_size, *self.state_space)
            self.ACTION_MEM = torch.zeros(max_memory_size, 1)
            self.REWARD_MEM = torch.zeros(max_memory_size, 1)
            self.STATE2_MEM = torch.zeros(max_memory_size, *self.state_space)
            self.DONE_MEM = torch.zeros(max_memory_size, 1)
            self.ending_position = 0
            self.num_in_queue = 0
        
        self.memory_sample_size = batch_size
        
        # Learning parameters
        self.gamma = gamma
        self.l1 = nn.SmoothL1Loss().to(self.device) # Also known as Huber loss
        self.exploration_max = exploration_max
        self.exploration_rate = exploration_max
        self.exploration_min = exploration_min
        self.exploration_decay = exploration_decay

    def remember(self, state, action, reward, state2, done):

        # Loading memory
        self.STATE_MEM[self.ending_position] = state.float()
        self.ACTION_MEM[self.ending_position] = action.float()
        self.REWARD_MEM[self.ending_position] = reward.float()
        self.STATE2_MEM[self.ending_position] = state2.float()
        self.DONE_MEM[self.ending_position] = done.float()
        self.ending_position = (self.ending_position + 1) % self.max_memory_size  # FIFO tensor
        self.num_in_queue = min(self.num_in_queue + 1, self.max_memory_size)
        
    def recall(self):
        # Randomly sample 'batch size' experiences
        idx = random.choices(range(self.num_in_queue), k=self.memory_sample_size)
        
        STATE = self.STATE_MEM[idx]
        ACTION = self.ACTION_MEM[idx]
        REWARD = self.REWARD_MEM[idx]
        STATE2 = self.STATE2_MEM[idx]
        DONE = self.DONE_MEM[idx]
        
        return STATE, ACTION, REWARD, STATE2, DONE

    def epsilon_greedy(self, state):
        # Epsilon-greedy action
        
        self.step += 1
        if random.random() < self.exploration_rate:  
            return torch.tensor([[random.randrange(self.action_space)]])
      

        # Local net is used for the policy
        return torch.argmax(self.local_net(state.to(self.device))).unsqueeze(0).unsqueeze(0).cpu()
        
    def copy_model(self):
        # Copy local net weights into target net
        
        self.target_net.load_state_dict(self.local_net.state_dict())
    
    def experience_replay(self):
        
        if self.step % self.copy_weights == 0:
            self.copy_model()

        if self.memory_sample_size > self.num_in_queue:
            return

        STATE, ACTION, REWARD, STATE2, DONE = self.recall()
        STATE = STATE.to(self.device)
        ACTION = ACTION.to(self.device)
        REWARD = REWARD.to(self.device)
        STATE2 = STATE2.to(self.device)
        DONE = DONE.to(self.device)
        
        self.optimizer.zero_grad()
        
        # Double Q-Learning target is Q*(S, A) <- r + γ max_a Q_target(S', a)
        target = REWARD + torch.mul((self.gamma * self.target_net(STATE2).max(1).values.unsqueeze(1)), 1 - DONE)

        current = self.local_net(STATE).gather(1, ACTION.long()) # Local net approximation of Q-value
       
        loss = self.l1(current, target)
        loss.backward() # Compute gradients
        self.optimizer.step() # Backpropagate error

        self.exploration_rate *= self.exploration_decay
        
        # Makes sure that exploration rate is always at least 'exploration min'
        self.exploration_rate = max(self.exploration_rate, self.exploration_min)

In [None]:
def show_state(env, ep=1, info=""):
    plt.figure(3)
    plt.clf()
    plt.imshow(env.render(mode='rgb_array'))
    plt.title("Episode: %d %s" % (ep+1, info))
    plt.axis('off')

    display.clear_output(wait=True)
    display.display(plt.gcf())

In [None]:
total_rewards = []
max_memory_size = 30000
batch_size = 32
gamma = 0.90
lr = 0.00025
dropout = 0.
exploration_max = 1.0
exploration_min = 0.02
exploration_decay = 0.99
#double_dq=True

env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')
env = make_env(env)  # Wraps the environment so that frames are grayscale 
env.reset()
observation_space = env.observation_space.shape
action_space = env.action_space.n

In [None]:
def play_step(training, trained):
   
    agent = Agent(state_space=observation_space, action_space=action_space, trained=trained)
    episodes = episode_count
    
    for episode_n in tqdm(range(episodes)):
        state = env.reset()
        state = torch.Tensor([state])
        total_reward = 0
        steps = 0
        while True:
            if not training:
                show_state(env, episode_n)
            action = agent.epsilon_greedy(state)
            steps += 1
            
            state_next, reward, terminal, info = env.step(int(action[0]))
            total_reward += reward
            state_next = torch.Tensor([state_next])
            reward = torch.tensor([reward]).unsqueeze(0)
            
            terminal = torch.tensor([int(terminal)]).unsqueeze(0)
            
            if training:
                agent.remember(state, action, reward, state_next, terminal)
                agent.experience_replay()
            
            state = state_next
            if terminal:
                break
        
        total_rewards.append(total_reward)

        print("Total reward after episode {} is {}".format(episode_n + 1, total_rewards[-1]))
        episodes += 1      
    
    if training:
        with open("ending_position.pkl", "wb") as f:
            pickle.dump(agent.ending_position, f)
        with open("num_in_queue.pkl", "wb") as f:
            pickle.dump(agent.num_in_queue, f)
        with open("total_rewards.pkl", "wb") as f:
            pickle.dump(total_rewards, f)
        
        torch.save(agent.local_net.state_dict(), "dq1.pt")
        torch.save(agent.target_net.state_dict(), "dq2.pt")
        
        torch.save(agent.STATE_MEM,  "STATE_MEM.pt")
        torch.save(agent.ACTION_MEM, "ACTION_MEM.pt")
        torch.save(agent.REWARD_MEM, "REWARD_MEM.pt")
        torch.save(agent.STATE2_MEM, "STATE2_MEM.pt")
        torch.save(agent.DONE_MEM,   "DONE_MEM.pt")
    
    env.close()
    
    if episodes > 500:
        plt.title("Episodes trained vs. Average Rewards (per 500 eps)")
        plt.plot([0 for _ in range(500)] + 
                 np.convolve(total_rewards, np.ones((500,))/500, mode="valid").tolist())
        plt.show()

In [None]:
episode_count = 10000
play_step(training=True, trained=False)
# To view mario game play, declare training to False and trained to True in above statement

  
  0%|          | 1/10000 [00:00<2:08:33,  1.30it/s]

Total reward after episode 1 is 250.0


  0%|          | 2/10000 [00:42<68:33:20, 24.68s/it]

Total reward after episode 2 is 1295.0


  0%|          | 3/10000 [00:47<43:20:39, 15.61s/it]

Total reward after episode 3 is 230.0


  0%|          | 4/10000 [01:15<57:22:00, 20.66s/it]

Total reward after episode 4 is 620.0


  0%|          | 5/10000 [01:39<60:32:41, 21.81s/it]

Total reward after episode 5 is 573.0


  0%|          | 6/10000 [02:03<62:29:21, 22.51s/it]

Total reward after episode 6 is 808.0


  0%|          | 7/10000 [02:06<45:29:42, 16.39s/it]

Total reward after episode 7 is 249.0


  0%|          | 8/10000 [02:11<34:43:02, 12.51s/it]

Total reward after episode 8 is 252.0


  0%|          | 9/10000 [02:14<26:50:35,  9.67s/it]

Total reward after episode 9 is 252.0


  0%|          | 10/10000 [02:18<21:35:33,  7.78s/it]

Total reward after episode 10 is 250.0


  0%|          | 11/10000 [02:28<24:03:58,  8.67s/it]

Total reward after episode 11 is 629.0


  0%|          | 12/10000 [02:39<25:34:55,  9.22s/it]

Total reward after episode 12 is 628.0


  0%|          | 13/10000 [02:42<20:44:20,  7.48s/it]

Total reward after episode 13 is 248.0


  0%|          | 14/10000 [02:46<17:22:24,  6.26s/it]

Total reward after episode 14 is 248.0


  0%|          | 15/10000 [02:49<15:09:09,  5.46s/it]

Total reward after episode 15 is 251.0


  0%|          | 16/10000 [02:53<13:35:49,  4.90s/it]

Total reward after episode 16 is 249.0


  0%|          | 17/10000 [02:56<12:23:51,  4.47s/it]

Total reward after episode 17 is 248.0


  0%|          | 18/10000 [03:00<11:40:03,  4.21s/it]

Total reward after episode 18 is 252.0


  0%|          | 19/10000 [03:04<11:09:15,  4.02s/it]

Total reward after episode 19 is 251.0


  0%|          | 20/10000 [03:07<10:42:56,  3.87s/it]

Total reward after episode 20 is 252.0


  0%|          | 21/10000 [03:11<10:32:31,  3.80s/it]

Total reward after episode 21 is 252.0


  0%|          | 22/10000 [03:14<10:18:09,  3.72s/it]

Total reward after episode 22 is 252.0


  0%|          | 23/10000 [03:18<10:13:05,  3.69s/it]

Total reward after episode 23 is 251.0


  0%|          | 24/10000 [03:22<10:12:08,  3.68s/it]

Total reward after episode 24 is 251.0


  0%|          | 25/10000 [03:26<10:39:31,  3.85s/it]

Total reward after episode 25 is 252.0


  0%|          | 26/10000 [03:30<10:40:21,  3.85s/it]

Total reward after episode 26 is 247.0


  0%|          | 27/10000 [03:33<10:30:58,  3.80s/it]

Total reward after episode 27 is 251.0


  0%|          | 28/10000 [03:37<10:25:14,  3.76s/it]

Total reward after episode 28 is 251.0


  0%|          | 29/10000 [03:41<10:22:01,  3.74s/it]

Total reward after episode 29 is 248.0


  0%|          | 30/10000 [03:44<10:11:00,  3.68s/it]

Total reward after episode 30 is 248.0


  0%|          | 31/10000 [03:48<10:09:52,  3.67s/it]

Total reward after episode 31 is 248.0


  0%|          | 32/10000 [03:51<10:01:43,  3.62s/it]

Total reward after episode 32 is 248.0


  0%|          | 33/10000 [03:55<10:02:12,  3.63s/it]

Total reward after episode 33 is 251.0


  0%|          | 34/10000 [03:59<10:04:09,  3.64s/it]

Total reward after episode 34 is 252.0


  0%|          | 35/10000 [04:02<10:12:24,  3.69s/it]

Total reward after episode 35 is 251.0


  0%|          | 36/10000 [04:06<10:11:13,  3.68s/it]

Total reward after episode 36 is 251.0


  0%|          | 37/10000 [04:10<10:09:39,  3.67s/it]

Total reward after episode 37 is 248.0


  0%|          | 38/10000 [04:13<10:08:09,  3.66s/it]

Total reward after episode 38 is 251.0


  0%|          | 39/10000 [04:17<10:12:20,  3.69s/it]

Total reward after episode 39 is 248.0


  0%|          | 40/10000 [04:21<10:03:13,  3.63s/it]

Total reward after episode 40 is 252.0


  0%|          | 41/10000 [04:24<9:56:35,  3.59s/it] 

Total reward after episode 41 is 252.0


  0%|          | 42/10000 [04:35<15:50:48,  5.73s/it]

Total reward after episode 42 is 635.0


  0%|          | 43/10000 [04:39<14:08:17,  5.11s/it]

Total reward after episode 43 is 251.0


  0%|          | 44/10000 [04:42<12:49:08,  4.64s/it]

Total reward after episode 44 is 247.0


  0%|          | 45/10000 [04:46<11:51:05,  4.29s/it]

Total reward after episode 45 is 248.0


  0%|          | 46/10000 [04:49<11:20:07,  4.10s/it]

Total reward after episode 46 is 251.0


  0%|          | 47/10000 [04:53<11:05:37,  4.01s/it]

Total reward after episode 47 is 251.0


  0%|          | 48/10000 [04:57<10:47:19,  3.90s/it]

Total reward after episode 48 is 251.0


  0%|          | 49/10000 [05:00<10:27:42,  3.78s/it]

Total reward after episode 49 is 248.0


  0%|          | 50/10000 [05:04<10:19:36,  3.74s/it]

Total reward after episode 50 is 251.0


  1%|          | 51/10000 [05:14<15:39:01,  5.66s/it]

Total reward after episode 51 is 632.0


  1%|          | 52/10000 [05:17<13:51:27,  5.01s/it]

Total reward after episode 52 is 252.0


  1%|          | 53/10000 [05:21<12:42:56,  4.60s/it]

Total reward after episode 53 is 251.0


  1%|          | 54/10000 [05:25<11:56:57,  4.33s/it]

Total reward after episode 54 is 251.0


  1%|          | 55/10000 [05:28<11:24:26,  4.13s/it]

Total reward after episode 55 is 251.0


  1%|          | 56/10000 [05:32<10:56:01,  3.96s/it]

Total reward after episode 56 is 248.0


  1%|          | 57/10000 [05:36<10:37:50,  3.85s/it]

Total reward after episode 57 is 248.0


  1%|          | 58/10000 [05:39<10:29:30,  3.80s/it]

Total reward after episode 58 is 250.0


  1%|          | 59/10000 [05:43<10:49:17,  3.92s/it]

Total reward after episode 59 is 251.0


  1%|          | 60/10000 [05:47<10:42:17,  3.88s/it]

Total reward after episode 60 is 252.0


  1%|          | 61/10000 [05:51<10:33:48,  3.83s/it]

Total reward after episode 61 is 252.0


  1%|          | 62/10000 [05:55<10:28:58,  3.80s/it]

Total reward after episode 62 is 251.0


  1%|          | 63/10000 [05:58<10:17:55,  3.73s/it]

Total reward after episode 63 is 248.0


  1%|          | 64/10000 [06:08<15:32:12,  5.63s/it]

Total reward after episode 64 is 637.0


  1%|          | 65/10000 [06:12<13:49:56,  5.01s/it]

Total reward after episode 65 is 250.0


  1%|          | 66/10000 [06:16<12:43:31,  4.61s/it]

Total reward after episode 66 is 252.0


  1%|          | 67/10000 [06:19<11:56:40,  4.33s/it]

Total reward after episode 67 is 252.0


  1%|          | 68/10000 [06:29<16:27:57,  5.97s/it]

Total reward after episode 68 is 633.0


  1%|          | 69/10000 [06:39<19:44:34,  7.16s/it]

Total reward after episode 69 is 637.0


  1%|          | 70/10000 [07:29<54:58:37, 19.93s/it]

Total reward after episode 70 is 737.0


  1%|          | 71/10000 [08:05<68:25:42, 24.81s/it]

Total reward after episode 71 is 759.0


  1%|          | 72/10000 [08:15<56:31:45, 20.50s/it]

Total reward after episode 72 is 632.0


  1%|          | 73/10000 [08:26<48:18:44, 17.52s/it]

Total reward after episode 73 is 628.0


  1%|          | 74/10000 [08:49<52:46:58, 19.14s/it]

Total reward after episode 74 is 808.0


  1%|          | 75/10000 [08:59<45:13:26, 16.40s/it]

Total reward after episode 75 is 631.0


  1%|          | 76/10000 [09:09<40:10:59, 14.58s/it]

Total reward after episode 76 is 637.0


  1%|          | 77/10000 [09:13<31:03:14, 11.27s/it]

Total reward after episode 77 is 248.0


  1%|          | 78/10000 [09:23<30:27:54, 11.05s/it]

Total reward after episode 78 is 619.0


  1%|          | 79/10000 [09:27<24:34:50,  8.92s/it]

Total reward after episode 79 is 250.0


  1%|          | 80/10000 [09:31<20:27:05,  7.42s/it]

Total reward after episode 80 is 252.0


  1%|          | 81/10000 [09:35<17:32:46,  6.37s/it]

Total reward after episode 81 is 248.0


  1%|          | 82/10000 [09:39<15:31:53,  5.64s/it]

Total reward after episode 82 is 250.0


  1%|          | 83/10000 [10:51<70:10:39, 25.48s/it]

Total reward after episode 83 is 734.0


  1%|          | 84/10000 [10:54<52:03:24, 18.90s/it]

Total reward after episode 84 is 250.0


  1%|          | 85/10000 [10:58<39:34:26, 14.37s/it]

Total reward after episode 85 is 250.0


  1%|          | 86/10000 [11:02<30:37:28, 11.12s/it]

Total reward after episode 86 is 248.0


  1%|          | 87/10000 [11:12<30:09:05, 10.95s/it]

Total reward after episode 87 is 632.0


  1%|          | 88/10000 [11:30<36:04:35, 13.10s/it]

Total reward after episode 88 is 640.0


  1%|          | 89/10000 [11:34<28:22:46, 10.31s/it]

Total reward after episode 89 is 251.0


  1%|          | 90/10000 [11:42<26:40:47,  9.69s/it]

Total reward after episode 90 is 611.0


  1%|          | 91/10000 [12:04<36:22:27, 13.21s/it]

Total reward after episode 91 is 1434.0


  1%|          | 92/10000 [12:53<66:10:54, 24.05s/it]

Total reward after episode 92 is 1303.0


  1%|          | 93/10000 [13:29<75:56:51, 27.60s/it]

Total reward after episode 93 is 758.0


  1%|          | 94/10000 [13:59<77:48:29, 28.28s/it]

Total reward after episode 94 is 1330.0


  1%|          | 95/10000 [14:08<62:21:22, 22.66s/it]

Total reward after episode 95 is 611.0


  1%|          | 96/10000 [14:32<63:29:21, 23.08s/it]

Total reward after episode 96 is 1336.0


  1%|          | 97/10000 [14:36<47:40:14, 17.33s/it]

Total reward after episode 97 is 240.0


  1%|          | 98/10000 [15:04<55:51:21, 20.31s/it]

Total reward after episode 98 is 1334.0


  1%|          | 99/10000 [15:23<55:11:15, 20.07s/it]

Total reward after episode 99 is 1348.0


  1%|          | 100/10000 [15:42<53:48:53, 19.57s/it]

Total reward after episode 100 is 1346.0


  1%|          | 101/10000 [15:51<45:45:24, 16.64s/it]

Total reward after episode 101 is 612.0


  1%|          | 102/10000 [16:00<39:28:22, 14.36s/it]

Total reward after episode 102 is 615.0


  1%|          | 103/10000 [16:09<34:50:52, 12.68s/it]

Total reward after episode 103 is 627.0


  1%|          | 104/10000 [16:13<27:50:16, 10.13s/it]

Total reward after episode 104 is 242.0


  1%|          | 105/10000 [16:17<22:56:11,  8.34s/it]

Total reward after episode 105 is 241.0


  1%|          | 106/10000 [16:35<30:52:55, 11.24s/it]

Total reward after episode 106 is 816.0


  1%|          | 107/10000 [16:40<25:04:53,  9.13s/it]

Total reward after episode 107 is 241.0


  1%|          | 108/10000 [17:06<39:36:33, 14.41s/it]

Total reward after episode 108 is 628.0


  1%|          | 109/10000 [17:11<31:11:55, 11.36s/it]

Total reward after episode 109 is 241.0


  1%|          | 110/10000 [17:30<37:39:11, 13.71s/it]

Total reward after episode 110 is 721.0


  1%|          | 111/10000 [18:11<60:20:11, 21.97s/it]

Total reward after episode 111 is 1314.0


  1%|          | 112/10000 [19:08<89:08:47, 32.46s/it]

Total reward after episode 112 is 710.0


  1%|          | 113/10000 [19:16<68:39:27, 25.00s/it]

Total reward after episode 113 is 613.0


  1%|          | 114/10000 [19:20<51:31:20, 18.76s/it]

Total reward after episode 114 is 241.0


  1%|          | 115/10000 [19:28<43:10:33, 15.72s/it]

Total reward after episode 115 is 614.0


  1%|          | 116/10000 [19:32<33:27:14, 12.18s/it]

Total reward after episode 116 is 240.0


  1%|          | 117/10000 [19:58<44:08:41, 16.08s/it]

Total reward after episode 117 is 1339.0


  1%|          | 118/10000 [20:02<34:14:28, 12.47s/it]

Total reward after episode 118 is 240.0


  1%|          | 119/10000 [20:59<71:11:39, 25.94s/it]

Total reward after episode 119 is 1649.0


  1%|          | 120/10000 [21:03<53:20:26, 19.44s/it]

Total reward after episode 120 is 241.0


  1%|          | 121/10000 [21:28<57:57:07, 21.12s/it]

Total reward after episode 121 is 1339.0


  1%|          | 122/10000 [21:37<47:49:25, 17.43s/it]

Total reward after episode 122 is 609.0


  1%|          | 123/10000 [22:00<52:09:31, 19.01s/it]

Total reward after episode 123 is 1341.0


  1%|          | 124/10000 [22:15<48:59:56, 17.86s/it]

Total reward after episode 124 is 757.0


  1%|▏         | 125/10000 [22:35<50:33:20, 18.43s/it]

Total reward after episode 125 is 813.0


  1%|▏         | 126/10000 [22:45<43:40:40, 15.92s/it]

Total reward after episode 126 is 620.0


  1%|▏         | 127/10000 [22:55<39:18:25, 14.33s/it]

Total reward after episode 127 is 735.0


  1%|▏         | 128/10000 [22:59<30:50:40, 11.25s/it]

Total reward after episode 128 is 240.0


  1%|▏         | 129/10000 [23:19<37:30:33, 13.68s/it]

Total reward after episode 129 is 1348.0


  1%|▏         | 130/10000 [23:50<52:08:16, 19.02s/it]

Total reward after episode 130 is 1329.0


  1%|▏         | 131/10000 [23:54<39:48:55, 14.52s/it]

Total reward after episode 131 is 240.0


  1%|▏         | 132/10000 [23:59<31:18:08, 11.42s/it]

Total reward after episode 132 is 241.0


  1%|▏         | 133/10000 [24:16<36:31:33, 13.33s/it]

Total reward after episode 133 is 816.0


  1%|▏         | 134/10000 [24:20<28:53:43, 10.54s/it]

Total reward after episode 134 is 241.0


  1%|▏         | 135/10000 [24:38<34:21:42, 12.54s/it]

Total reward after episode 135 is 1349.0


  1%|▏         | 136/10000 [25:14<53:59:45, 19.71s/it]

Total reward after episode 136 is 1322.0


  1%|▏         | 137/10000 [26:35<104:06:06, 38.00s/it]

Total reward after episode 137 is 953.0


  1%|▏         | 138/10000 [26:45<80:59:47, 29.57s/it] 

Total reward after episode 138 is 737.0


  1%|▏         | 139/10000 [27:01<70:33:18, 25.76s/it]

Total reward after episode 139 is 1342.0


  1%|▏         | 140/10000 [27:23<67:24:25, 24.61s/it]

Total reward after episode 140 is 595.0


  1%|▏         | 141/10000 [27:42<62:24:27, 22.79s/it]

Total reward after episode 141 is 815.0


  1%|▏         | 142/10000 [28:20<75:20:50, 27.52s/it]

Total reward after episode 142 is 1318.0


  1%|▏         | 143/10000 [28:32<62:18:56, 22.76s/it]

Total reward after episode 143 is 614.0


  1%|▏         | 144/10000 [29:14<78:18:55, 28.61s/it]

Total reward after episode 144 is 1311.0


  1%|▏         | 145/10000 [29:39<75:25:47, 27.55s/it]

Total reward after episode 145 is 1032.0


  1%|▏         | 146/10000 [29:44<56:14:14, 20.55s/it]

Total reward after episode 146 is 240.0


  1%|▏         | 147/10000 [30:07<58:22:06, 21.33s/it]

Total reward after episode 147 is 1037.0


  1%|▏         | 148/10000 [30:26<56:55:52, 20.80s/it]

Total reward after episode 148 is 1043.0


  1%|▏         | 149/10000 [30:37<48:32:23, 17.74s/it]

Total reward after episode 149 is 636.0


  2%|▏         | 150/10000 [30:46<41:47:59, 15.28s/it]

Total reward after episode 150 is 653.0


  2%|▏         | 151/10000 [31:02<41:44:55, 15.26s/it]

Total reward after episode 151 is 599.0


  2%|▏         | 152/10000 [31:12<37:44:26, 13.80s/it]

Total reward after episode 152 is 735.0


  2%|▏         | 153/10000 [31:30<41:08:12, 15.04s/it]

Total reward after episode 153 is 1349.0


  2%|▏         | 154/10000 [31:34<32:07:21, 11.75s/it]

Total reward after episode 154 is 241.0


  2%|▏         | 155/10000 [31:46<31:52:37, 11.66s/it]

Total reward after episode 155 is 613.0


  2%|▏         | 156/10000 [31:54<29:08:28, 10.66s/it]

Total reward after episode 156 is 610.0


  2%|▏         | 157/10000 [31:58<23:44:06,  8.68s/it]

Total reward after episode 157 is 241.0


  2%|▏         | 158/10000 [32:14<29:47:22, 10.90s/it]

Total reward after episode 158 is 1049.0


  2%|▏         | 159/10000 [32:21<26:58:54,  9.87s/it]

Total reward after episode 159 is 610.0


  2%|▏         | 160/10000 [32:46<38:52:19, 14.22s/it]

Total reward after episode 160 is 1339.0


  2%|▏         | 161/10000 [33:07<44:30:18, 16.28s/it]

Total reward after episode 161 is 1345.0


  2%|▏         | 162/10000 [33:14<37:17:54, 13.65s/it]

Total reward after episode 162 is 611.0


  2%|▏         | 163/10000 [33:19<29:26:22, 10.77s/it]

Total reward after episode 163 is 239.0


  2%|▏         | 164/10000 [33:22<23:28:39,  8.59s/it]

Total reward after episode 164 is 248.0


  2%|▏         | 165/10000 [33:31<24:11:31,  8.86s/it]

Total reward after episode 165 is 624.0


  2%|▏         | 166/10000 [34:01<41:16:16, 15.11s/it]

Total reward after episode 166 is 1330.0


  2%|▏         | 167/10000 [34:11<37:03:43, 13.57s/it]

Total reward after episode 167 is 633.0


  2%|▏         | 168/10000 [34:22<34:30:58, 12.64s/it]

Total reward after episode 168 is 631.0


  2%|▏         | 169/10000 [34:47<44:58:44, 16.47s/it]

Total reward after episode 169 is 1035.0


  2%|▏         | 170/10000 [35:50<83:10:01, 30.46s/it]

Total reward after episode 170 is 1282.0


  2%|▏         | 171/10000 [35:54<61:38:22, 22.58s/it]

Total reward after episode 171 is 239.0


  2%|▏         | 172/10000 [35:58<46:12:33, 16.93s/it]

Total reward after episode 172 is 249.0


  2%|▏         | 173/10000 [36:31<59:08:47, 21.67s/it]

Total reward after episode 173 is 603.0


  2%|▏         | 174/10000 [36:54<60:44:30, 22.25s/it]

Total reward after episode 174 is 808.0


  2%|▏         | 175/10000 [37:05<50:51:51, 18.64s/it]

Total reward after episode 175 is 640.0


  2%|▏         | 176/10000 [37:14<43:14:48, 15.85s/it]

Total reward after episode 176 is 611.0


  2%|▏         | 177/10000 [37:25<39:37:22, 14.52s/it]

Total reward after episode 177 is 607.0


  2%|▏         | 178/10000 [37:29<31:04:43, 11.39s/it]

Total reward after episode 178 is 238.0


  2%|▏         | 179/10000 [37:46<35:17:00, 12.93s/it]

Total reward after episode 179 is 1047.0


  2%|▏         | 180/10000 [37:55<31:52:35, 11.69s/it]

Total reward after episode 180 is 608.0


  2%|▏         | 181/10000 [38:16<40:03:20, 14.69s/it]

Total reward after episode 181 is 1345.0


  2%|▏         | 182/10000 [38:21<31:22:08, 11.50s/it]

Total reward after episode 182 is 239.0


  2%|▏         | 183/10000 [38:25<25:19:34,  9.29s/it]

Total reward after episode 183 is 239.0


  2%|▏         | 184/10000 [38:29<21:05:14,  7.73s/it]

Total reward after episode 184 is 239.0


  2%|▏         | 185/10000 [38:33<17:58:50,  6.60s/it]

Total reward after episode 185 is 237.0


  2%|▏         | 186/10000 [38:43<20:57:43,  7.69s/it]

Total reward after episode 186 is 736.0


  2%|▏         | 187/10000 [38:47<17:59:07,  6.60s/it]

Total reward after episode 187 is 239.0


  2%|▏         | 188/10000 [38:59<22:06:08,  8.11s/it]

Total reward after episode 188 is 611.0


  2%|▏         | 189/10000 [39:26<37:38:44, 13.81s/it]

Total reward after episode 189 is 1031.0


  2%|▏         | 190/10000 [39:30<29:39:27, 10.88s/it]

Total reward after episode 190 is 241.0


  2%|▏         | 191/10000 [39:50<37:14:59, 13.67s/it]

Total reward after episode 191 is 813.0


  2%|▏         | 192/10000 [40:09<41:35:10, 15.26s/it]

Total reward after episode 192 is 618.0


  2%|▏         | 193/10000 [40:33<48:34:57, 17.83s/it]

Total reward after episode 193 is 1339.0


  2%|▏         | 194/10000 [40:53<50:23:37, 18.50s/it]

Total reward after episode 194 is 1331.0


  2%|▏         | 195/10000 [41:02<42:26:04, 15.58s/it]

Total reward after episode 195 is 611.0


  2%|▏         | 196/10000 [41:16<41:47:09, 15.34s/it]

Total reward after episode 196 is 1050.0


  2%|▏         | 197/10000 [41:34<43:24:28, 15.94s/it]

Total reward after episode 197 is 1046.0


  2%|▏         | 198/10000 [41:38<33:40:58, 12.37s/it]

Total reward after episode 198 is 239.0


  2%|▏         | 199/10000 [41:42<26:53:05,  9.88s/it]

Total reward after episode 199 is 242.0


  2%|▏         | 200/10000 [41:46<21:54:31,  8.05s/it]

Total reward after episode 200 is 250.0


  2%|▏         | 201/10000 [42:10<35:36:36, 13.08s/it]

Total reward after episode 201 is 1340.0


  2%|▏         | 202/10000 [42:14<28:13:46, 10.37s/it]

Total reward after episode 202 is 239.0


  2%|▏         | 203/10000 [42:18<22:38:40,  8.32s/it]

Total reward after episode 203 is 248.0


  2%|▏         | 204/10000 [42:35<29:57:27, 11.01s/it]

Total reward after episode 204 is 1351.0


  2%|▏         | 205/10000 [42:39<24:16:21,  8.92s/it]

Total reward after episode 205 is 241.0


  2%|▏         | 206/10000 [42:43<20:17:24,  7.46s/it]

Total reward after episode 206 is 241.0


  2%|▏         | 207/10000 [42:53<21:51:00,  8.03s/it]

Total reward after episode 207 is 635.0


  2%|▏         | 208/10000 [43:49<60:54:54, 22.40s/it]

Total reward after episode 208 is 1663.0


  2%|▏         | 209/10000 [44:12<62:01:03, 22.80s/it]

Total reward after episode 209 is 1344.0


  2%|▏         | 210/10000 [44:32<59:45:17, 21.97s/it]

Total reward after episode 210 is 1040.0


  2%|▏         | 211/10000 [44:54<59:12:52, 21.78s/it]

Total reward after episode 211 is 1037.0


  2%|▏         | 212/10000 [44:58<44:44:47, 16.46s/it]

Total reward after episode 212 is 239.0


  2%|▏         | 213/10000 [45:17<47:10:32, 17.35s/it]

Total reward after episode 213 is 1346.0


  2%|▏         | 214/10000 [45:31<44:13:06, 16.27s/it]

Total reward after episode 214 is 730.0


  2%|▏         | 215/10000 [45:48<44:42:26, 16.45s/it]

Total reward after episode 215 is 1351.0


  2%|▏         | 216/10000 [46:15<53:07:03, 19.54s/it]

Total reward after episode 216 is 1577.0


  2%|▏         | 217/10000 [46:32<51:11:11, 18.84s/it]

Total reward after episode 217 is 1047.0


  2%|▏         | 218/10000 [46:41<43:12:23, 15.90s/it]

Total reward after episode 218 is 623.0


  2%|▏         | 219/10000 [46:45<33:32:07, 12.34s/it]

Total reward after episode 219 is 239.0


  2%|▏         | 220/10000 [46:49<26:46:03,  9.85s/it]

Total reward after episode 220 is 239.0


  2%|▏         | 221/10000 [47:11<36:51:36, 13.57s/it]

Total reward after episode 221 is 1345.0


  2%|▏         | 222/10000 [48:14<77:05:08, 28.38s/it]

Total reward after episode 222 is 1284.0


  2%|▏         | 223/10000 [48:32<68:37:07, 25.27s/it]

Total reward after episode 223 is 816.0


  2%|▏         | 224/10000 [48:42<56:21:12, 20.75s/it]

Total reward after episode 224 is 734.0


  2%|▏         | 225/10000 [49:18<68:09:28, 25.10s/it]

Total reward after episode 225 is 1016.0


  2%|▏         | 226/10000 [49:29<56:49:15, 20.93s/it]

Total reward after episode 226 is 604.0


  2%|▏         | 227/10000 [49:40<48:30:03, 17.87s/it]

Total reward after episode 227 is 632.0


  2%|▏         | 228/10000 [49:58<48:47:14, 17.97s/it]

Total reward after episode 228 is 816.0


  2%|▏         | 229/10000 [50:07<41:32:58, 15.31s/it]

Total reward after episode 229 is 625.0


  2%|▏         | 230/10000 [51:27<94:12:11, 34.71s/it]

Total reward after episode 230 is 973.0


  2%|▏         | 231/10000 [51:45<80:58:25, 29.84s/it]

Total reward after episode 231 is 621.0


  2%|▏         | 232/10000 [51:57<66:00:43, 24.33s/it]

Total reward after episode 232 is 733.0


  2%|▏         | 233/10000 [52:05<53:13:54, 19.62s/it]

Total reward after episode 233 is 607.0


  2%|▏         | 234/10000 [52:09<40:34:57, 14.96s/it]

Total reward after episode 234 is 239.0


  2%|▏         | 235/10000 [52:27<42:29:05, 15.66s/it]

Total reward after episode 235 is 641.0


  2%|▏         | 236/10000 [52:45<44:43:12, 16.49s/it]

Total reward after episode 236 is 816.0


  2%|▏         | 237/10000 [52:54<38:27:01, 14.18s/it]

Total reward after episode 237 is 610.0


  2%|▏         | 238/10000 [53:04<35:06:26, 12.95s/it]

Total reward after episode 238 is 622.0


  2%|▏         | 239/10000 [53:22<38:52:44, 14.34s/it]

Total reward after episode 239 is 1046.0


  2%|▏         | 240/10000 [53:38<40:35:44, 14.97s/it]

Total reward after episode 240 is 767.0


  2%|▏         | 241/10000 [53:48<36:32:25, 13.48s/it]

Total reward after episode 241 is 625.0


  2%|▏         | 242/10000 [53:52<28:54:36, 10.67s/it]

Total reward after episode 242 is 239.0


  2%|▏         | 243/10000 [54:11<35:41:26, 13.17s/it]

Total reward after episode 243 is 1331.0


  2%|▏         | 244/10000 [54:20<32:29:01, 11.99s/it]

Total reward after episode 244 is 637.0


  2%|▏         | 245/10000 [54:39<37:37:10, 13.88s/it]

Total reward after episode 245 is 1044.0


  2%|▏         | 246/10000 [54:49<34:51:07, 12.86s/it]

Total reward after episode 246 is 633.0


  2%|▏         | 247/10000 [55:18<47:39:09, 17.59s/it]

Total reward after episode 247 is 1314.0


  2%|▏         | 248/10000 [55:39<50:07:47, 18.51s/it]

Total reward after episode 248 is 745.0


  2%|▏         | 249/10000 [56:04<55:59:42, 20.67s/it]

Total reward after episode 249 is 628.0


  2%|▎         | 250/10000 [56:28<58:42:08, 21.67s/it]

Total reward after episode 250 is 1330.0


  3%|▎         | 251/10000 [56:47<56:08:44, 20.73s/it]

Total reward after episode 251 is 1045.0


  3%|▎         | 252/10000 [57:02<52:00:36, 19.21s/it]

Total reward after episode 252 is 1048.0


  3%|▎         | 253/10000 [57:19<49:32:08, 18.30s/it]

Total reward after episode 253 is 743.0


  3%|▎         | 254/10000 [57:32<45:28:34, 16.80s/it]

Total reward after episode 254 is 732.0


  3%|▎         | 255/10000 [57:48<45:05:17, 16.66s/it]

Total reward after episode 255 is 619.0


  3%|▎         | 256/10000 [57:57<38:45:07, 14.32s/it]

Total reward after episode 256 is 617.0


  3%|▎         | 257/10000 [1:00:09<134:28:34, 49.69s/it]

Total reward after episode 257 is 581.0


  3%|▎         | 258/10000 [1:00:26<107:45:48, 39.82s/it]

Total reward after episode 258 is 608.0


  3%|▎         | 259/10000 [1:00:34<81:29:43, 30.12s/it] 

Total reward after episode 259 is 610.0


  3%|▎         | 260/10000 [1:00:56<75:05:19, 27.75s/it]

Total reward after episode 260 is 1330.0


  3%|▎         | 261/10000 [1:01:13<66:11:53, 24.47s/it]

Total reward after episode 261 is 611.0


  3%|▎         | 262/10000 [1:01:28<58:50:48, 21.75s/it]

Total reward after episode 262 is 762.0


  3%|▎         | 263/10000 [1:01:43<53:42:41, 19.86s/it]

Total reward after episode 263 is 1048.0


  3%|▎         | 264/10000 [1:02:02<52:13:46, 19.31s/it]

Total reward after episode 264 is 816.0


  3%|▎         | 265/10000 [1:02:19<50:56:12, 18.84s/it]

Total reward after episode 265 is 1348.0


  3%|▎         | 266/10000 [1:02:34<47:44:45, 17.66s/it]

Total reward after episode 266 is 1049.0


  3%|▎         | 267/10000 [1:02:52<48:07:08, 17.80s/it]

Total reward after episode 267 is 1045.0


  3%|▎         | 268/10000 [1:03:04<42:53:17, 15.86s/it]

Total reward after episode 268 is 610.0


  3%|▎         | 269/10000 [1:03:13<37:32:46, 13.89s/it]

Total reward after episode 269 is 640.0


  3%|▎         | 270/10000 [1:03:28<38:06:53, 14.10s/it]

Total reward after episode 270 is 1051.0


  3%|▎         | 271/10000 [1:03:44<40:14:14, 14.89s/it]

Total reward after episode 271 is 1045.0


  3%|▎         | 272/10000 [1:04:02<42:15:44, 15.64s/it]

Total reward after episode 272 is 641.0


  3%|▎         | 273/10000 [1:04:26<49:35:13, 18.35s/it]

Total reward after episode 273 is 1338.0


  3%|▎         | 274/10000 [1:04:41<46:57:52, 17.38s/it]

Total reward after episode 274 is 1050.0


  3%|▎         | 275/10000 [1:05:00<47:54:40, 17.74s/it]

Total reward after episode 275 is 1337.0


  3%|▎         | 276/10000 [1:05:21<50:13:27, 18.59s/it]

Total reward after episode 276 is 812.0


  3%|▎         | 277/10000 [1:05:25<38:27:33, 14.24s/it]

Total reward after episode 277 is 240.0


  3%|▎         | 278/10000 [1:05:29<30:15:03, 11.20s/it]

Total reward after episode 278 is 234.0


  3%|▎         | 279/10000 [1:05:48<36:43:39, 13.60s/it]

Total reward after episode 279 is 1346.0


  3%|▎         | 280/10000 [1:06:04<38:51:43, 14.39s/it]

Total reward after episode 280 is 1047.0


  3%|▎         | 281/10000 [1:06:14<35:15:08, 13.06s/it]

Total reward after episode 281 is 611.0


  3%|▎         | 282/10000 [1:06:35<41:49:04, 15.49s/it]

Total reward after episode 282 is 1041.0


  3%|▎         | 283/10000 [1:06:59<48:46:06, 18.07s/it]

Total reward after episode 283 is 1033.0


  3%|▎         | 284/10000 [1:07:07<40:24:46, 14.97s/it]

Total reward after episode 284 is 612.0


  3%|▎         | 285/10000 [1:07:17<35:57:59, 13.33s/it]

Total reward after episode 285 is 610.0


  3%|▎         | 286/10000 [1:07:35<40:25:39, 14.98s/it]

Total reward after episode 286 is 1341.0


  3%|▎         | 287/10000 [1:07:39<31:28:37, 11.67s/it]

Total reward after episode 287 is 238.0


  3%|▎         | 288/10000 [1:07:57<36:25:44, 13.50s/it]

Total reward after episode 288 is 1041.0


  3%|▎         | 289/10000 [1:08:15<39:33:34, 14.67s/it]

Total reward after episode 289 is 767.0


  3%|▎         | 290/10000 [1:08:32<41:32:05, 15.40s/it]

Total reward after episode 290 is 608.0


  3%|▎         | 291/10000 [1:08:48<41:57:58, 15.56s/it]

Total reward after episode 291 is 1048.0


  3%|▎         | 292/10000 [1:09:15<51:10:05, 18.97s/it]

Total reward after episode 292 is 1863.0


  3%|▎         | 293/10000 [1:09:36<53:13:16, 19.74s/it]

Total reward after episode 293 is 1041.0


  3%|▎         | 294/10000 [1:09:52<50:07:43, 18.59s/it]

Total reward after episode 294 is 1046.0


  3%|▎         | 295/10000 [1:10:11<50:13:36, 18.63s/it]

Total reward after episode 295 is 1045.0


  3%|▎         | 296/10000 [1:10:27<48:01:18, 17.82s/it]

Total reward after episode 296 is 1043.0


  3%|▎         | 297/10000 [1:10:44<48:01:09, 17.82s/it]

Total reward after episode 297 is 816.0


  3%|▎         | 298/10000 [1:10:48<36:46:19, 13.64s/it]

Total reward after episode 298 is 238.0


  3%|▎         | 299/10000 [1:10:52<28:35:08, 10.61s/it]

Total reward after episode 299 is 249.0


  3%|▎         | 300/10000 [1:11:03<28:42:47, 10.66s/it]

Total reward after episode 300 is 613.0


  3%|▎         | 301/10000 [1:11:23<36:40:51, 13.62s/it]

Total reward after episode 301 is 1438.0


  3%|▎         | 302/10000 [1:11:34<34:07:20, 12.67s/it]

Total reward after episode 302 is 733.0


  3%|▎         | 303/10000 [1:11:51<38:08:34, 14.16s/it]

Total reward after episode 303 is 1350.0


  3%|▎         | 304/10000 [1:11:55<29:58:21, 11.13s/it]

Total reward after episode 304 is 235.0


  3%|▎         | 305/10000 [1:11:59<24:09:06,  8.97s/it]

Total reward after episode 305 is 239.0


  3%|▎         | 306/10000 [1:12:10<25:22:43,  9.42s/it]

Total reward after episode 306 is 617.0


  3%|▎         | 307/10000 [1:13:11<66:50:03, 24.82s/it]

Total reward after episode 307 is 750.0


  3%|▎         | 308/10000 [1:13:40<70:14:06, 26.09s/it]

Total reward after episode 308 is 799.0


  3%|▎         | 309/10000 [1:13:47<55:11:26, 20.50s/it]

Total reward after episode 309 is 610.0


  3%|▎         | 310/10000 [1:14:03<51:46:18, 19.23s/it]

Total reward after episode 310 is 1048.0


  3%|▎         | 311/10000 [1:14:21<50:45:13, 18.86s/it]

Total reward after episode 311 is 1046.0


  3%|▎         | 312/10000 [1:14:40<50:15:11, 18.67s/it]

Total reward after episode 312 is 1046.0


  3%|▎         | 313/10000 [1:14:54<46:37:00, 17.32s/it]

Total reward after episode 313 is 722.0


  3%|▎         | 314/10000 [1:15:08<44:15:10, 16.45s/it]

Total reward after episode 314 is 1052.0


  3%|▎         | 315/10000 [1:15:12<34:15:52, 12.74s/it]

Total reward after episode 315 is 235.0


  3%|▎         | 316/10000 [1:16:17<76:09:41, 28.31s/it]

Total reward after episode 316 is 1279.0


  3%|▎         | 317/10000 [1:16:45<76:11:37, 28.33s/it]

Total reward after episode 317 is 1029.0


  3%|▎         | 318/10000 [1:17:31<90:33:48, 33.67s/it]

Total reward after episode 318 is 1024.0


  3%|▎         | 319/10000 [1:17:36<66:47:22, 24.84s/it]

Total reward after episode 319 is 238.0


  3%|▎         | 320/10000 [1:19:34<142:37:11, 53.04s/it]

Total reward after episode 320 is 891.0


  3%|▎         | 321/10000 [1:19:50<112:39:27, 41.90s/it]

Total reward after episode 321 is 1046.0


  3%|▎         | 322/10000 [1:20:07<92:26:09, 34.38s/it] 

Total reward after episode 322 is 1047.0


  3%|▎         | 323/10000 [1:21:05<111:01:39, 41.30s/it]

Total reward after episode 323 is 1289.0


  3%|▎         | 324/10000 [1:21:26<95:17:40, 35.45s/it] 

Total reward after episode 324 is 1060.0


  3%|▎         | 325/10000 [1:23:50<182:37:57, 67.96s/it]

Total reward after episode 325 is 556.0


  3%|▎         | 326/10000 [1:24:25<156:15:39, 58.15s/it]

Total reward after episode 326 is 1322.0


  3%|▎         | 327/10000 [1:24:50<129:02:01, 48.02s/it]

Total reward after episode 327 is 1035.0


  3%|▎         | 328/10000 [1:25:05<102:54:21, 38.30s/it]

Total reward after episode 328 is 1050.0


  3%|▎         | 329/10000 [1:25:15<80:01:44, 29.79s/it] 

Total reward after episode 329 is 614.0


  3%|▎         | 330/10000 [1:25:20<59:25:17, 22.12s/it]

Total reward after episode 330 is 239.0


  3%|▎         | 331/10000 [1:25:24<45:01:33, 16.76s/it]

Total reward after episode 331 is 235.0


  3%|▎         | 332/10000 [1:25:52<54:32:25, 20.31s/it]

Total reward after episode 332 is 729.0


  3%|▎         | 333/10000 [1:26:01<45:13:40, 16.84s/it]

Total reward after episode 333 is 617.0


  3%|▎         | 334/10000 [1:26:16<43:39:16, 16.26s/it]

Total reward after episode 334 is 1050.0


  3%|▎         | 335/10000 [1:26:34<44:32:18, 16.59s/it]

Total reward after episode 335 is 1146.0


  3%|▎         | 336/10000 [1:26:54<47:42:27, 17.77s/it]

Total reward after episode 336 is 605.0


  3%|▎         | 337/10000 [1:27:12<48:13:25, 17.97s/it]

Total reward after episode 337 is 770.0


  3%|▎         | 338/10000 [1:27:27<45:32:57, 16.97s/it]

Total reward after episode 338 is 1048.0


  3%|▎         | 339/10000 [1:27:42<44:10:49, 16.46s/it]

Total reward after episode 339 is 1050.0


  3%|▎         | 340/10000 [1:27:58<43:41:14, 16.28s/it]

Total reward after episode 340 is 1049.0


  3%|▎         | 341/10000 [1:28:09<38:52:06, 14.49s/it]

Total reward after episode 341 is 615.0


  3%|▎         | 342/10000 [1:29:03<71:07:55, 26.51s/it]

Total reward after episode 342 is 1294.0


  3%|▎         | 343/10000 [1:29:34<74:28:33, 27.76s/it]

Total reward after episode 343 is 1143.0


  3%|▎         | 344/10000 [1:29:48<63:52:21, 23.81s/it]

Total reward after episode 344 is 748.0


  3%|▎         | 345/10000 [1:29:52<47:56:07, 17.87s/it]

Total reward after episode 345 is 243.0


  3%|▎         | 346/10000 [1:30:03<41:45:03, 15.57s/it]

Total reward after episode 346 is 619.0


  3%|▎         | 347/10000 [1:30:19<42:13:47, 15.75s/it]

Total reward after episode 347 is 1044.0


  3%|▎         | 348/10000 [1:30:34<41:57:43, 15.65s/it]

Total reward after episode 348 is 1049.0


  3%|▎         | 349/10000 [1:30:53<44:42:36, 16.68s/it]

Total reward after episode 349 is 1043.0


  4%|▎         | 350/10000 [1:31:11<45:30:37, 16.98s/it]

Total reward after episode 350 is 641.0


  4%|▎         | 351/10000 [1:31:16<35:44:18, 13.33s/it]

Total reward after episode 351 is 238.0


  4%|▎         | 352/10000 [1:31:21<28:56:29, 10.80s/it]

Total reward after episode 352 is 234.0


  4%|▎         | 353/10000 [1:31:26<24:11:49,  9.03s/it]

Total reward after episode 353 is 238.0


  4%|▎         | 354/10000 [1:31:45<32:42:09, 12.20s/it]

Total reward after episode 354 is 639.0


  4%|▎         | 355/10000 [1:32:00<34:48:20, 12.99s/it]

Total reward after episode 355 is 1052.0


  4%|▎         | 356/10000 [1:32:09<31:28:46, 11.75s/it]

Total reward after episode 356 is 612.0


  4%|▎         | 357/10000 [1:32:18<29:28:02, 11.00s/it]

Total reward after episode 357 is 613.0


  4%|▎         | 358/10000 [1:33:52<96:14:22, 35.93s/it]

Total reward after episode 358 is 2271.0


  4%|▎         | 359/10000 [1:34:03<76:00:12, 28.38s/it]

Total reward after episode 359 is 639.0


  4%|▎         | 360/10000 [1:34:07<56:25:53, 21.07s/it]

Total reward after episode 360 is 239.0


  4%|▎         | 361/10000 [1:34:21<50:55:30, 19.02s/it]

Total reward after episode 361 is 723.0


  4%|▎         | 362/10000 [1:34:39<49:33:31, 18.51s/it]

Total reward after episode 362 is 1044.0


  4%|▎         | 363/10000 [1:34:47<41:37:38, 15.55s/it]

Total reward after episode 363 is 611.0


  4%|▎         | 364/10000 [1:35:04<42:53:06, 16.02s/it]

Total reward after episode 364 is 818.0


  4%|▎         | 365/10000 [1:35:20<42:24:19, 15.84s/it]

Total reward after episode 365 is 1050.0


  4%|▎         | 366/10000 [1:35:40<45:43:26, 17.09s/it]

Total reward after episode 366 is 1045.0


  4%|▎         | 367/10000 [1:36:03<50:26:19, 18.85s/it]

Total reward after episode 367 is 1040.0


  4%|▎         | 368/10000 [1:36:23<51:40:18, 19.31s/it]

Total reward after episode 368 is 637.0


  4%|▎         | 369/10000 [1:36:40<49:42:36, 18.58s/it]

Total reward after episode 369 is 1048.0


  4%|▎         | 370/10000 [1:37:10<58:35:44, 21.90s/it]

Total reward after episode 370 is 800.0


  4%|▎         | 371/10000 [1:37:30<57:04:08, 21.34s/it]

Total reward after episode 371 is 1065.0


  4%|▎         | 372/10000 [1:37:48<54:53:13, 20.52s/it]

Total reward after episode 372 is 816.0


  4%|▎         | 373/10000 [1:37:56<44:36:20, 16.68s/it]

Total reward after episode 373 is 610.0


  4%|▎         | 374/10000 [1:38:04<37:47:17, 14.13s/it]

Total reward after episode 374 is 613.0


  4%|▍         | 375/10000 [1:38:23<41:43:24, 15.61s/it]

Total reward after episode 375 is 815.0


  4%|▍         | 376/10000 [1:39:02<59:55:10, 22.41s/it]

Total reward after episode 376 is 1302.0


  4%|▍         | 377/10000 [1:39:23<59:16:22, 22.17s/it]

Total reward after episode 377 is 636.0


  4%|▍         | 378/10000 [1:39:32<48:57:56, 18.32s/it]

Total reward after episode 378 is 611.0


  4%|▍         | 379/10000 [1:39:48<46:52:52, 17.54s/it]

Total reward after episode 379 is 1070.0


  4%|▍         | 380/10000 [1:40:09<49:41:25, 18.60s/it]

Total reward after episode 380 is 1347.0


  4%|▍         | 381/10000 [1:40:33<53:46:03, 20.12s/it]

Total reward after episode 381 is 809.0


  4%|▍         | 382/10000 [1:40:37<40:55:17, 15.32s/it]

Total reward after episode 382 is 242.0


  4%|▍         | 383/10000 [1:41:00<46:54:44, 17.56s/it]

Total reward after episode 383 is 1582.0


  4%|▍         | 384/10000 [1:41:09<39:56:20, 14.95s/it]

Total reward after episode 384 is 611.0


  4%|▍         | 385/10000 [1:41:20<36:40:55, 13.73s/it]

Total reward after episode 385 is 609.0


  4%|▍         | 386/10000 [1:41:29<33:13:04, 12.44s/it]

Total reward after episode 386 is 611.0


  4%|▍         | 387/10000 [1:41:39<30:54:24, 11.57s/it]

Total reward after episode 387 is 627.0


  4%|▍         | 388/10000 [1:41:42<24:42:51,  9.26s/it]

Total reward after episode 388 is 247.0


  4%|▍         | 389/10000 [1:41:51<23:58:48,  8.98s/it]

Total reward after episode 389 is 616.0


  4%|▍         | 390/10000 [1:42:12<33:43:46, 12.64s/it]

Total reward after episode 390 is 1043.0


  4%|▍         | 391/10000 [1:42:20<30:23:20, 11.39s/it]

Total reward after episode 391 is 616.0


  4%|▍         | 392/10000 [1:42:31<29:56:08, 11.22s/it]

Total reward after episode 392 is 623.0


  4%|▍         | 393/10000 [1:42:51<36:57:03, 13.85s/it]

Total reward after episode 393 is 1355.0


  4%|▍         | 394/10000 [1:43:02<34:33:37, 12.95s/it]

Total reward after episode 394 is 735.0


  4%|▍         | 395/10000 [1:43:13<33:11:05, 12.44s/it]

Total reward after episode 395 is 638.0


  4%|▍         | 396/10000 [1:43:23<30:53:28, 11.58s/it]

Total reward after episode 396 is 639.0


  4%|▍         | 397/10000 [1:43:32<28:41:45, 10.76s/it]

Total reward after episode 397 is 621.0


  4%|▍         | 398/10000 [1:43:52<36:14:03, 13.59s/it]

Total reward after episode 398 is 1043.0


  4%|▍         | 399/10000 [1:43:56<28:53:26, 10.83s/it]

Total reward after episode 399 is 236.0


  4%|▍         | 400/10000 [1:44:14<34:30:35, 12.94s/it]

Total reward after episode 400 is 745.0


  4%|▍         | 401/10000 [1:44:31<37:47:54, 14.18s/it]

Total reward after episode 401 is 1048.0


  4%|▍         | 402/10000 [1:44:36<30:05:43, 11.29s/it]

Total reward after episode 402 is 235.0


  4%|▍         | 403/10000 [1:44:52<33:57:16, 12.74s/it]

Total reward after episode 403 is 1047.0


  4%|▍         | 404/10000 [1:45:09<37:25:31, 14.04s/it]

Total reward after episode 404 is 819.0


  4%|▍         | 405/10000 [1:45:18<33:42:54, 12.65s/it]

Total reward after episode 405 is 603.0


  4%|▍         | 406/10000 [1:45:28<31:31:02, 11.83s/it]

Total reward after episode 406 is 610.0


  4%|▍         | 407/10000 [1:45:45<35:25:56, 13.30s/it]

Total reward after episode 407 is 819.0


  4%|▍         | 408/10000 [1:45:55<32:25:49, 12.17s/it]

Total reward after episode 408 is 654.0


  4%|▍         | 409/10000 [1:46:19<42:03:09, 15.78s/it]

Total reward after episode 409 is 1038.0


  4%|▍         | 410/10000 [1:46:28<36:37:17, 13.75s/it]

Total reward after episode 410 is 615.0


  4%|▍         | 411/10000 [1:46:57<48:52:16, 18.35s/it]

Total reward after episode 411 is 730.0


  4%|▍         | 412/10000 [1:47:12<46:08:38, 17.33s/it]

Total reward after episode 412 is 1051.0


  4%|▍         | 413/10000 [1:47:22<40:12:17, 15.10s/it]

Total reward after episode 413 is 607.0


  4%|▍         | 414/10000 [1:47:31<35:52:02, 13.47s/it]

Total reward after episode 414 is 630.0


  4%|▍         | 415/10000 [1:48:07<53:20:51, 20.04s/it]

Total reward after episode 415 is 1020.0


  4%|▍         | 416/10000 [1:48:26<53:09:00, 19.96s/it]

Total reward after episode 416 is 1044.0


  4%|▍         | 417/10000 [1:48:36<44:33:33, 16.74s/it]

Total reward after episode 417 is 627.0


  4%|▍         | 418/10000 [1:48:44<37:35:11, 14.12s/it]

Total reward after episode 418 is 607.0


  4%|▍         | 419/10000 [1:48:52<32:42:02, 12.29s/it]

Total reward after episode 419 is 610.0


  4%|▍         | 420/10000 [1:49:01<30:01:37, 11.28s/it]

Total reward after episode 420 is 610.0


  4%|▍         | 421/10000 [1:49:10<28:51:06, 10.84s/it]

Total reward after episode 421 is 637.0


  4%|▍         | 422/10000 [1:49:49<50:47:11, 19.09s/it]

Total reward after episode 422 is 1018.0


  4%|▍         | 423/10000 [1:50:00<44:28:47, 16.72s/it]

Total reward after episode 423 is 628.0


  4%|▍         | 424/10000 [1:50:25<51:07:58, 19.22s/it]

Total reward after episode 424 is 1575.0


  4%|▍         | 425/10000 [1:50:56<60:26:39, 22.73s/it]

Total reward after episode 425 is 1030.0


  4%|▍         | 426/10000 [1:51:11<54:37:56, 20.54s/it]

Total reward after episode 426 is 1050.0


  4%|▍         | 427/10000 [1:51:20<44:43:30, 16.82s/it]

Total reward after episode 427 is 606.0


  4%|▍         | 428/10000 [1:51:30<39:50:49, 14.99s/it]

Total reward after episode 428 is 736.0


  4%|▍         | 429/10000 [1:51:40<35:38:38, 13.41s/it]

Total reward after episode 429 is 636.0


  4%|▍         | 430/10000 [1:51:58<39:18:31, 14.79s/it]

Total reward after episode 430 is 1048.0


  4%|▍         | 431/10000 [1:52:16<42:01:57, 15.81s/it]

Total reward after episode 431 is 1159.0


  4%|▍         | 432/10000 [1:52:53<59:06:07, 22.24s/it]

Total reward after episode 432 is 791.0


  4%|▍         | 433/10000 [1:53:23<64:33:39, 24.29s/it]

Total reward after episode 433 is 802.0


  4%|▍         | 434/10000 [1:53:31<51:35:30, 19.42s/it]

Total reward after episode 434 is 606.0


  4%|▍         | 435/10000 [1:53:46<48:33:16, 18.27s/it]

Total reward after episode 435 is 1050.0


  4%|▍         | 436/10000 [1:54:03<47:23:10, 17.84s/it]

Total reward after episode 436 is 1049.0


  4%|▍         | 437/10000 [1:54:07<36:41:59, 13.82s/it]

Total reward after episode 437 is 239.0


  4%|▍         | 438/10000 [1:54:29<42:40:41, 16.07s/it]

Total reward after episode 438 is 1330.0


  4%|▍         | 439/10000 [1:54:33<33:18:06, 12.54s/it]

Total reward after episode 439 is 238.0


  4%|▍         | 440/10000 [1:54:43<31:08:52, 11.73s/it]

Total reward after episode 440 is 621.0


  4%|▍         | 441/10000 [1:54:58<33:44:44, 12.71s/it]

Total reward after episode 441 is 729.0


  4%|▍         | 442/10000 [1:55:42<58:29:39, 22.03s/it]

Total reward after episode 442 is 1010.0


  4%|▍         | 443/10000 [1:55:50<47:30:54, 17.90s/it]

Total reward after episode 443 is 610.0


  4%|▍         | 444/10000 [1:56:10<49:10:18, 18.52s/it]

Total reward after episode 444 is 815.0


  4%|▍         | 445/10000 [1:56:34<53:51:01, 20.29s/it]

Total reward after episode 445 is 1576.0


  4%|▍         | 446/10000 [1:57:07<63:55:49, 24.09s/it]

Total reward after episode 446 is 1315.0


  4%|▍         | 447/10000 [1:57:29<62:22:57, 23.51s/it]

Total reward after episode 447 is 1350.0


  4%|▍         | 448/10000 [1:57:51<60:46:01, 22.90s/it]

Total reward after episode 448 is 1041.0


  4%|▍         | 449/10000 [1:58:13<59:59:20, 22.61s/it]

Total reward after episode 449 is 1330.0


  4%|▍         | 450/10000 [1:58:24<50:34:27, 19.06s/it]

Total reward after episode 450 is 607.0


  5%|▍         | 451/10000 [1:58:33<42:51:11, 16.16s/it]

Total reward after episode 451 is 616.0


  5%|▍         | 452/10000 [1:59:04<54:34:05, 20.57s/it]

Total reward after episode 452 is 1573.0


  5%|▍         | 453/10000 [1:59:08<41:43:49, 15.74s/it]

Total reward after episode 453 is 241.0


  5%|▍         | 454/10000 [1:59:24<41:26:53, 15.63s/it]

Total reward after episode 454 is 1050.0


  5%|▍         | 455/10000 [1:59:41<42:44:44, 16.12s/it]

Total reward after episode 455 is 819.0


  5%|▍         | 456/10000 [1:59:51<37:50:22, 14.27s/it]

Total reward after episode 456 is 614.0


  5%|▍         | 457/10000 [2:00:14<44:46:20, 16.89s/it]

Total reward after episode 457 is 1346.0


  5%|▍         | 458/10000 [2:00:32<45:34:55, 17.20s/it]

Total reward after episode 458 is 818.0


  5%|▍         | 459/10000 [2:01:24<73:09:39, 27.61s/it]

Total reward after episode 459 is 2003.0


  5%|▍         | 460/10000 [2:01:41<65:00:32, 24.53s/it]

Total reward after episode 460 is 1046.0


  5%|▍         | 461/10000 [2:02:00<60:55:26, 22.99s/it]

Total reward after episode 461 is 817.0


  5%|▍         | 462/10000 [2:02:18<56:42:04, 21.40s/it]

Total reward after episode 462 is 1049.0


  5%|▍         | 463/10000 [2:02:35<53:04:58, 20.04s/it]

Total reward after episode 463 is 1049.0


  5%|▍         | 464/10000 [2:02:44<44:21:14, 16.74s/it]

Total reward after episode 464 is 609.0


  5%|▍         | 465/10000 [2:02:54<39:07:33, 14.77s/it]

Total reward after episode 465 is 609.0


  5%|▍         | 466/10000 [2:03:11<40:41:44, 15.37s/it]

Total reward after episode 466 is 1051.0


  5%|▍         | 467/10000 [2:03:20<35:55:23, 13.57s/it]

Total reward after episode 467 is 613.0


  5%|▍         | 468/10000 [2:03:38<39:16:15, 14.83s/it]

Total reward after episode 468 is 1047.0


  5%|▍         | 469/10000 [2:04:01<45:19:36, 17.12s/it]

Total reward after episode 469 is 1345.0


  5%|▍         | 470/10000 [2:04:23<49:52:38, 18.84s/it]

Total reward after episode 470 is 1334.0


  5%|▍         | 471/10000 [2:04:42<49:21:19, 18.65s/it]

Total reward after episode 471 is 750.0


  5%|▍         | 472/10000 [2:05:01<49:53:50, 18.85s/it]

Total reward after episode 472 is 1047.0


  5%|▍         | 473/10000 [2:05:27<55:56:27, 21.14s/it]

Total reward after episode 473 is 1181.0


  5%|▍         | 474/10000 [2:05:36<45:52:03, 17.33s/it]

Total reward after episode 474 is 611.0


  5%|▍         | 475/10000 [2:05:53<45:54:48, 17.35s/it]

Total reward after episode 475 is 1049.0


  5%|▍         | 476/10000 [2:06:02<38:59:41, 14.74s/it]

Total reward after episode 476 is 614.0


  5%|▍         | 477/10000 [2:06:06<30:25:59, 11.50s/it]

Total reward after episode 477 is 252.0


  5%|▍         | 478/10000 [2:06:30<40:09:22, 15.18s/it]

Total reward after episode 478 is 811.0


  5%|▍         | 479/10000 [2:06:40<35:54:13, 13.58s/it]

Total reward after episode 479 is 630.0


  5%|▍         | 480/10000 [2:06:48<31:51:45, 12.05s/it]

Total reward after episode 480 is 611.0


  5%|▍         | 481/10000 [2:07:07<37:22:56, 14.14s/it]

Total reward after episode 481 is 1047.0


  5%|▍         | 482/10000 [2:07:23<38:46:20, 14.66s/it]

Total reward after episode 482 is 1051.0


  5%|▍         | 483/10000 [2:07:27<30:16:03, 11.45s/it]

Total reward after episode 483 is 249.0


  5%|▍         | 484/10000 [2:07:31<24:46:46,  9.37s/it]

Total reward after episode 484 is 238.0


  5%|▍         | 485/10000 [2:07:49<31:00:25, 11.73s/it]

Total reward after episode 485 is 1050.0


  5%|▍         | 486/10000 [2:07:53<24:49:36,  9.39s/it]

Total reward after episode 486 is 252.0


  5%|▍         | 487/10000 [2:08:01<23:57:58,  9.07s/it]

Total reward after episode 487 is 611.0


  5%|▍         | 488/10000 [2:08:14<27:17:06, 10.33s/it]

Total reward after episode 488 is 627.0


  5%|▍         | 489/10000 [2:08:35<35:44:02, 13.53s/it]

Total reward after episode 489 is 1337.0


  5%|▍         | 490/10000 [2:08:46<33:26:32, 12.66s/it]

Total reward after episode 490 is 629.0


  5%|▍         | 491/10000 [2:08:57<32:40:21, 12.37s/it]

Total reward after episode 491 is 639.0


  5%|▍         | 492/10000 [2:09:08<31:13:55, 11.83s/it]

Total reward after episode 492 is 611.0


  5%|▍         | 493/10000 [2:09:16<28:28:06, 10.78s/it]

Total reward after episode 493 is 606.0


  5%|▍         | 494/10000 [2:09:21<23:52:09,  9.04s/it]

Total reward after episode 494 is 236.0


  5%|▍         | 495/10000 [2:09:42<32:40:40, 12.38s/it]

Total reward after episode 495 is 816.0


  5%|▍         | 496/10000 [2:10:02<38:47:26, 14.69s/it]

Total reward after episode 496 is 816.0


  5%|▍         | 497/10000 [2:10:14<37:01:31, 14.03s/it]

Total reward after episode 497 is 733.0


  5%|▍         | 498/10000 [2:10:30<38:29:12, 14.58s/it]

Total reward after episode 498 is 1050.0


  5%|▍         | 499/10000 [2:10:48<41:00:26, 15.54s/it]

Total reward after episode 499 is 1048.0


  5%|▌         | 500/10000 [2:10:52<32:13:29, 12.21s/it]

Total reward after episode 500 is 238.0


  5%|▌         | 501/10000 [2:11:08<35:14:08, 13.35s/it]

Total reward after episode 501 is 1051.0


  5%|▌         | 502/10000 [2:11:29<40:49:44, 15.48s/it]

Total reward after episode 502 is 1045.0


  5%|▌         | 503/10000 [2:11:37<35:20:01, 13.39s/it]

Total reward after episode 503 is 606.0


  5%|▌         | 504/10000 [2:12:14<53:38:43, 20.34s/it]

Total reward after episode 504 is 1692.0


  5%|▌         | 505/10000 [2:12:23<44:53:42, 17.02s/it]

Total reward after episode 505 is 614.0


  5%|▌         | 506/10000 [2:12:41<45:44:54, 17.35s/it]

Total reward after episode 506 is 1049.0


  5%|▌         | 507/10000 [2:13:01<47:31:16, 18.02s/it]

Total reward after episode 507 is 1046.0


  5%|▌         | 508/10000 [2:13:09<40:12:46, 15.25s/it]

Total reward after episode 508 is 610.0


  5%|▌         | 509/10000 [2:13:18<34:50:40, 13.22s/it]

Total reward after episode 509 is 607.0


  5%|▌         | 510/10000 [2:13:27<31:18:12, 11.87s/it]

Total reward after episode 510 is 606.0


  5%|▌         | 511/10000 [2:13:36<29:24:11, 11.16s/it]

Total reward after episode 511 is 608.0


  5%|▌         | 512/10000 [2:13:59<38:33:03, 14.63s/it]

Total reward after episode 512 is 1040.0


  5%|▌         | 513/10000 [2:14:19<42:57:53, 16.30s/it]

Total reward after episode 513 is 1348.0


  5%|▌         | 514/10000 [2:14:44<49:27:19, 18.77s/it]

Total reward after episode 514 is 810.0


  5%|▌         | 515/10000 [2:14:55<43:27:13, 16.49s/it]

Total reward after episode 515 is 633.0


  5%|▌         | 516/10000 [2:15:12<44:19:16, 16.82s/it]

Total reward after episode 516 is 1050.0


  5%|▌         | 517/10000 [2:15:16<34:14:01, 13.00s/it]

Total reward after episode 517 is 252.0


  5%|▌         | 518/10000 [2:15:27<32:13:48, 12.24s/it]

Total reward after episode 518 is 637.0


  5%|▌         | 519/10000 [2:15:36<29:29:16, 11.20s/it]

Total reward after episode 519 is 610.0


  5%|▌         | 520/10000 [2:15:53<34:19:39, 13.04s/it]

Total reward after episode 520 is 820.0


  5%|▌         | 521/10000 [2:16:23<47:35:16, 18.07s/it]

Total reward after episode 521 is 628.0


  5%|▌         | 522/10000 [2:16:34<42:12:23, 16.03s/it]

Total reward after episode 522 is 615.0


  5%|▌         | 523/10000 [2:16:55<45:42:17, 17.36s/it]

Total reward after episode 523 is 816.0


  5%|▌         | 524/10000 [2:17:03<38:48:25, 14.74s/it]

Total reward after episode 524 is 610.0


  5%|▌         | 525/10000 [2:17:25<44:32:57, 16.93s/it]

Total reward after episode 525 is 1065.0


  5%|▌         | 526/10000 [2:17:34<38:03:58, 14.46s/it]

Total reward after episode 526 is 606.0


  5%|▌         | 527/10000 [2:17:45<35:15:06, 13.40s/it]

Total reward after episode 527 is 607.0


  5%|▌         | 528/10000 [2:17:49<27:53:40, 10.60s/it]

Total reward after episode 528 is 245.0


  5%|▌         | 529/10000 [2:17:58<26:33:46, 10.10s/it]

Total reward after episode 529 is 610.0


  5%|▌         | 530/10000 [2:18:02<21:49:16,  8.30s/it]

Total reward after episode 530 is 252.0


  5%|▌         | 531/10000 [2:18:06<18:29:18,  7.03s/it]

Total reward after episode 531 is 252.0


  5%|▌         | 532/10000 [2:18:15<19:50:27,  7.54s/it]

Total reward after episode 532 is 611.0


  5%|▌         | 533/10000 [2:18:32<27:07:37, 10.32s/it]

Total reward after episode 533 is 1048.0


  5%|▌         | 534/10000 [2:18:36<22:13:46,  8.45s/it]

Total reward after episode 534 is 252.0


  5%|▌         | 535/10000 [2:18:45<22:40:51,  8.63s/it]

Total reward after episode 535 is 611.0


  5%|▌         | 536/10000 [2:19:12<37:28:45, 14.26s/it]

Total reward after episode 536 is 1686.0


  5%|▌         | 537/10000 [2:19:51<56:41:32, 21.57s/it]

Total reward after episode 537 is 1328.0


  5%|▌         | 538/10000 [2:19:59<46:31:22, 17.70s/it]

Total reward after episode 538 is 608.0


  5%|▌         | 539/10000 [2:20:09<39:55:33, 15.19s/it]

Total reward after episode 539 is 609.0


  5%|▌         | 540/10000 [2:20:31<45:17:36, 17.24s/it]

Total reward after episode 540 is 1348.0


  5%|▌         | 541/10000 [2:20:40<38:44:24, 14.74s/it]

Total reward after episode 541 is 610.0


  5%|▌         | 542/10000 [2:21:02<44:52:47, 17.08s/it]

Total reward after episode 542 is 814.0


  5%|▌         | 543/10000 [2:21:11<38:28:41, 14.65s/it]

Total reward after episode 543 is 610.0


  5%|▌         | 544/10000 [2:21:16<30:37:30, 11.66s/it]

Total reward after episode 544 is 238.0


  5%|▌         | 545/10000 [2:21:25<28:31:45, 10.86s/it]

Total reward after episode 545 is 608.0


  5%|▌         | 546/10000 [2:21:29<23:23:56,  8.91s/it]

Total reward after episode 546 is 247.0


  5%|▌         | 547/10000 [2:21:39<24:15:52,  9.24s/it]

Total reward after episode 547 is 612.0


  5%|▌         | 548/10000 [2:21:48<24:08:07,  9.19s/it]

Total reward after episode 548 is 611.0


  5%|▌         | 549/10000 [2:21:54<21:15:47,  8.10s/it]

Total reward after episode 549 is 236.0


  6%|▌         | 550/10000 [2:27:06<260:33:29, 99.26s/it]

Total reward after episode 550 is 443.0


  6%|▌         | 551/10000 [2:27:18<191:48:54, 73.08s/it]

Total reward after episode 551 is 637.0


  6%|▌         | 552/10000 [2:27:28<142:35:48, 54.33s/it]

Total reward after episode 552 is 608.0


  6%|▌         | 553/10000 [2:27:37<106:55:15, 40.74s/it]

Total reward after episode 553 is 608.0


  6%|▌         | 554/10000 [2:27:48<83:26:35, 31.80s/it] 

Total reward after episode 554 is 654.0


  6%|▌         | 555/10000 [2:27:58<66:01:06, 25.16s/it]

Total reward after episode 555 is 618.0


  6%|▌         | 556/10000 [2:28:12<56:59:37, 21.73s/it]

Total reward after episode 556 is 606.0


  6%|▌         | 557/10000 [2:28:23<48:26:33, 18.47s/it]

Total reward after episode 557 is 654.0


  6%|▌         | 558/10000 [2:28:27<37:18:58, 14.23s/it]

Total reward after episode 558 is 248.0


  6%|▌         | 559/10000 [2:28:32<29:54:25, 11.40s/it]

Total reward after episode 559 is 249.0


  6%|▌         | 560/10000 [2:28:43<29:23:16, 11.21s/it]

Total reward after episode 560 is 627.0


  6%|▌         | 561/10000 [2:28:52<27:44:05, 10.58s/it]

Total reward after episode 561 is 608.0


  6%|▌         | 562/10000 [2:28:57<23:19:40,  8.90s/it]

Total reward after episode 562 is 243.0


  6%|▌         | 563/10000 [2:29:28<41:06:53, 15.68s/it]

Total reward after episode 563 is 1031.0


  6%|▌         | 564/10000 [2:29:37<35:57:11, 13.72s/it]

Total reward after episode 564 is 607.0


  6%|▌         | 565/10000 [2:29:42<28:57:41, 11.05s/it]

Total reward after episode 565 is 245.0


  6%|▌         | 566/10000 [2:29:53<28:36:42, 10.92s/it]

Total reward after episode 566 is 625.0


  6%|▌         | 567/10000 [2:30:35<53:19:51, 20.35s/it]

Total reward after episode 567 is 791.0


  6%|▌         | 568/10000 [2:31:02<58:29:45, 22.33s/it]

Total reward after episode 568 is 1326.0


  6%|▌         | 569/10000 [2:31:12<48:28:02, 18.50s/it]

Total reward after episode 569 is 610.0


  6%|▌         | 570/10000 [2:31:22<41:47:50, 15.96s/it]

Total reward after episode 570 is 609.0


  6%|▌         | 571/10000 [2:31:33<38:10:02, 14.57s/it]

Total reward after episode 571 is 639.0


  6%|▌         | 572/10000 [2:37:03<286:00:34, 109.21s/it]

Total reward after episode 572 is 2395.0


  6%|▌         | 573/10000 [2:37:13<207:46:37, 79.35s/it] 

Total reward after episode 573 is 611.0


  6%|▌         | 574/10000 [2:42:46<407:38:46, 155.69s/it]

Total reward after episode 574 is 1771.0


  6%|▌         | 575/10000 [2:42:56<292:49:25, 111.85s/it]

Total reward after episode 575 is 606.0


  6%|▌         | 576/10000 [2:43:49<246:20:58, 94.11s/it] 

Total reward after episode 576 is 1299.0


  6%|▌         | 577/10000 [2:43:58<179:58:35, 68.76s/it]

Total reward after episode 577 is 608.0


  6%|▌         | 578/10000 [2:44:09<134:42:27, 51.47s/it]

Total reward after episode 578 is 611.0


  6%|▌         | 579/10000 [2:44:20<102:44:10, 39.26s/it]

Total reward after episode 579 is 607.0


  6%|▌         | 580/10000 [2:44:25<75:28:46, 28.85s/it] 

Total reward after episode 580 is 252.0


  6%|▌         | 581/10000 [2:44:34<60:21:38, 23.07s/it]

Total reward after episode 581 is 606.0


  6%|▌         | 582/10000 [2:44:44<50:04:32, 19.14s/it]

Total reward after episode 582 is 607.0


  6%|▌         | 583/10000 [2:45:20<63:12:12, 24.16s/it]

Total reward after episode 583 is 1919.0


  6%|▌         | 584/10000 [2:45:31<52:25:46, 20.05s/it]

Total reward after episode 584 is 607.0


  6%|▌         | 585/10000 [2:45:41<45:04:17, 17.23s/it]

Total reward after episode 585 is 606.0


  6%|▌         | 586/10000 [2:45:51<39:04:37, 14.94s/it]

Total reward after episode 586 is 606.0


  6%|▌         | 587/10000 [2:46:01<34:55:38, 13.36s/it]

Total reward after episode 587 is 606.0


  6%|▌         | 588/10000 [2:46:06<28:50:01, 11.03s/it]

Total reward after episode 588 is 235.0


  6%|▌         | 589/10000 [2:46:16<28:10:28, 10.78s/it]

Total reward after episode 589 is 609.0


  6%|▌         | 590/10000 [2:46:43<40:58:34, 15.68s/it]

Total reward after episode 590 is 1040.0


  6%|▌         | 591/10000 [2:47:02<43:28:29, 16.63s/it]

Total reward after episode 591 is 1050.0


  6%|▌         | 592/10000 [2:47:22<46:10:11, 17.67s/it]

Total reward after episode 592 is 1049.0


  6%|▌         | 593/10000 [2:47:32<40:03:59, 15.33s/it]

Total reward after episode 593 is 609.0


  6%|▌         | 594/10000 [2:47:42<35:45:38, 13.69s/it]

Total reward after episode 594 is 610.0


  6%|▌         | 595/10000 [2:48:06<43:32:50, 16.67s/it]

Total reward after episode 595 is 1190.0


  6%|▌         | 596/10000 [2:48:15<38:04:40, 14.58s/it]

Total reward after episode 596 is 610.0


  6%|▌         | 597/10000 [2:48:28<36:49:42, 14.10s/it]

Total reward after episode 597 is 625.0


  6%|▌         | 598/10000 [2:48:38<33:23:55, 12.79s/it]

Total reward after episode 598 is 610.0


  6%|▌         | 599/10000 [2:49:20<55:59:37, 21.44s/it]

Total reward after episode 599 is 1686.0


  6%|▌         | 600/10000 [2:49:30<46:54:45, 17.97s/it]

Total reward after episode 600 is 607.0


  6%|▌         | 601/10000 [2:49:40<40:34:08, 15.54s/it]

Total reward after episode 601 is 611.0


  6%|▌         | 602/10000 [2:49:49<35:59:34, 13.79s/it]

Total reward after episode 602 is 607.0


  6%|▌         | 603/10000 [2:50:35<61:03:51, 23.39s/it]

Total reward after episode 603 is 1913.0


  6%|▌         | 604/10000 [2:50:40<46:55:23, 17.98s/it]

Total reward after episode 604 is 238.0


  6%|▌         | 605/10000 [2:50:50<40:29:15, 15.51s/it]

Total reward after episode 605 is 606.0


  6%|▌         | 606/10000 [2:51:00<36:17:35, 13.91s/it]

Total reward after episode 606 is 612.0


  6%|▌         | 607/10000 [2:51:33<50:38:58, 19.41s/it]

Total reward after episode 607 is 1033.0


  6%|▌         | 608/10000 [2:51:46<45:47:33, 17.55s/it]

Total reward after episode 608 is 624.0


  6%|▌         | 609/10000 [2:52:00<43:06:49, 16.53s/it]

Total reward after episode 609 is 608.0


  6%|▌         | 610/10000 [2:52:23<48:14:58, 18.50s/it]

Total reward after episode 610 is 641.0


  6%|▌         | 611/10000 [2:52:33<41:26:20, 15.89s/it]

Total reward after episode 611 is 610.0


  6%|▌         | 612/10000 [2:52:45<38:23:42, 14.72s/it]

Total reward after episode 612 is 611.0


  6%|▌         | 613/10000 [2:52:50<30:37:39, 11.75s/it]

Total reward after episode 613 is 244.0


  6%|▌         | 614/10000 [2:54:07<81:42:08, 31.34s/it]

Total reward after episode 614 is 1097.0


  6%|▌         | 615/10000 [2:54:40<83:07:11, 31.88s/it]

Total reward after episode 615 is 1035.0


  6%|▌         | 616/10000 [2:54:53<68:05:36, 26.12s/it]

Total reward after episode 616 is 623.0


  6%|▌         | 617/10000 [2:55:02<55:20:46, 21.23s/it]

Total reward after episode 617 is 606.0


  6%|▌         | 618/10000 [2:55:23<55:10:08, 21.17s/it]

Total reward after episode 618 is 819.0


  6%|▌         | 619/10000 [2:55:33<46:15:09, 17.75s/it]

Total reward after episode 619 is 608.0


  6%|▌         | 620/10000 [2:55:43<40:25:37, 15.52s/it]

Total reward after episode 620 is 611.0


  6%|▌         | 621/10000 [2:55:56<37:51:31, 14.53s/it]

Total reward after episode 621 is 616.0


  6%|▌         | 622/10000 [2:56:00<30:05:49, 11.55s/it]

Total reward after episode 622 is 252.0


  6%|▌         | 623/10000 [2:56:10<28:42:07, 11.02s/it]

Total reward after episode 623 is 610.0


  6%|▌         | 624/10000 [2:56:15<24:17:07,  9.32s/it]

Total reward after episode 624 is 235.0


  6%|▋         | 625/10000 [2:57:03<54:09:18, 20.80s/it]

Total reward after episode 625 is 1914.0


  6%|▋         | 626/10000 [2:57:32<60:46:51, 23.34s/it]

Total reward after episode 626 is 1153.0


  6%|▋         | 627/10000 [2:57:43<50:59:15, 19.58s/it]

Total reward after episode 627 is 622.0


  6%|▋         | 628/10000 [2:58:41<80:54:26, 31.08s/it]

Total reward after episode 628 is 1005.0


  6%|▋         | 629/10000 [2:58:51<64:37:32, 24.83s/it]

Total reward after episode 629 is 608.0


  6%|▋         | 630/10000 [2:59:01<52:53:44, 20.32s/it]

Total reward after episode 630 is 606.0


  6%|▋         | 631/10000 [2:59:32<61:04:01, 23.46s/it]

Total reward after episode 631 is 1342.0


  6%|▋         | 632/10000 [2:59:42<50:28:32, 19.40s/it]

Total reward after episode 632 is 610.0


  6%|▋         | 633/10000 [2:59:52<42:58:37, 16.52s/it]

Total reward after episode 633 is 606.0


  6%|▋         | 634/10000 [3:00:01<37:41:08, 14.49s/it]

Total reward after episode 634 is 606.0


  6%|▋         | 635/10000 [3:00:11<34:08:54, 13.13s/it]

Total reward after episode 635 is 611.0


  6%|▋         | 636/10000 [3:00:21<31:33:19, 12.13s/it]

Total reward after episode 636 is 606.0


  6%|▋         | 637/10000 [3:00:34<32:24:12, 12.46s/it]

Total reward after episode 637 is 626.0


  6%|▋         | 638/10000 [3:00:44<30:20:29, 11.67s/it]

Total reward after episode 638 is 606.0


  6%|▋         | 639/10000 [3:00:54<28:55:25, 11.12s/it]

Total reward after episode 639 is 606.0


  6%|▋         | 640/10000 [3:00:59<23:51:26,  9.18s/it]

Total reward after episode 640 is 252.0


  6%|▋         | 641/10000 [3:01:47<54:30:06, 20.96s/it]

Total reward after episode 641 is 1123.0


  6%|▋         | 642/10000 [3:01:58<46:51:49, 18.03s/it]

Total reward after episode 642 is 628.0


  6%|▋         | 643/10000 [3:02:03<36:41:43, 14.12s/it]

Total reward after episode 643 is 251.0


  6%|▋         | 644/10000 [3:02:18<37:03:51, 14.26s/it]

Total reward after episode 644 is 609.0


  6%|▋         | 645/10000 [3:02:28<33:34:04, 12.92s/it]

Total reward after episode 645 is 606.0


  6%|▋         | 646/10000 [3:02:37<31:09:27, 11.99s/it]

Total reward after episode 646 is 606.0


  6%|▋         | 647/10000 [3:02:52<33:03:32, 12.72s/it]

Total reward after episode 647 is 610.0


  6%|▋         | 648/10000 [3:03:20<44:48:00, 17.25s/it]

Total reward after episode 648 is 1587.0


  6%|▋         | 649/10000 [3:03:29<38:58:29, 15.00s/it]

Total reward after episode 649 is 607.0


  6%|▋         | 650/10000 [3:03:39<34:56:15, 13.45s/it]

Total reward after episode 650 is 606.0


  7%|▋         | 651/10000 [3:03:49<32:07:03, 12.37s/it]

Total reward after episode 651 is 607.0


  7%|▋         | 652/10000 [3:03:59<30:25:34, 11.72s/it]

Total reward after episode 652 is 608.0


  7%|▋         | 653/10000 [3:04:09<28:53:04, 11.12s/it]

Total reward after episode 653 is 606.0


  7%|▋         | 654/10000 [3:04:40<44:40:07, 17.21s/it]

Total reward after episode 654 is 1578.0


  7%|▋         | 655/10000 [3:04:50<38:53:43, 14.98s/it]

Total reward after episode 655 is 606.0


  7%|▋         | 656/10000 [3:05:26<55:06:50, 21.23s/it]

Total reward after episode 656 is 1030.0


  7%|▋         | 657/10000 [3:05:52<58:41:12, 22.61s/it]

Total reward after episode 657 is 1347.0


  7%|▋         | 658/10000 [3:06:26<67:29:25, 26.01s/it]

Total reward after episode 658 is 804.0


  7%|▋         | 659/10000 [3:06:36<55:07:39, 21.25s/it]

Total reward after episode 659 is 607.0


  7%|▋         | 660/10000 [3:06:46<46:22:37, 17.88s/it]

Total reward after episode 660 is 611.0


  7%|▋         | 661/10000 [3:07:09<50:25:00, 19.43s/it]

Total reward after episode 661 is 1350.0


  7%|▋         | 662/10000 [3:07:19<43:04:57, 16.61s/it]

Total reward after episode 662 is 609.0


  7%|▋         | 663/10000 [3:07:29<37:48:10, 14.58s/it]

Total reward after episode 663 is 608.0


  7%|▋         | 664/10000 [3:07:39<34:17:34, 13.22s/it]

Total reward after episode 664 is 611.0


  7%|▋         | 665/10000 [3:07:49<31:41:27, 12.22s/it]

Total reward after episode 665 is 610.0


  7%|▋         | 666/10000 [3:07:59<29:52:49, 11.52s/it]

Total reward after episode 666 is 611.0


  7%|▋         | 667/10000 [3:08:09<28:33:57, 11.02s/it]

Total reward after episode 667 is 606.0


  7%|▋         | 668/10000 [3:08:18<27:39:03, 10.67s/it]

Total reward after episode 668 is 609.0


  7%|▋         | 669/10000 [3:08:29<27:34:36, 10.64s/it]

Total reward after episode 669 is 614.0


  7%|▋         | 670/10000 [3:08:39<26:56:31, 10.40s/it]

Total reward after episode 670 is 611.0


  7%|▋         | 671/10000 [3:08:49<26:50:06, 10.36s/it]

Total reward after episode 671 is 606.0


  7%|▋         | 672/10000 [3:09:00<27:15:14, 10.52s/it]

Total reward after episode 672 is 620.0


  7%|▋         | 673/10000 [3:09:10<26:42:45, 10.31s/it]

Total reward after episode 673 is 606.0


  7%|▋         | 674/10000 [3:09:20<26:19:41, 10.16s/it]

Total reward after episode 674 is 610.0


  7%|▋         | 675/10000 [3:09:41<35:17:52, 13.63s/it]

Total reward after episode 675 is 1046.0


  7%|▋         | 676/10000 [3:09:46<28:18:46, 10.93s/it]

Total reward after episode 676 is 252.0


  7%|▋         | 677/10000 [3:09:51<23:27:33,  9.06s/it]

Total reward after episode 677 is 252.0


  7%|▋         | 678/10000 [3:09:55<20:01:56,  7.74s/it]

Total reward after episode 678 is 249.0


  7%|▋         | 679/10000 [3:10:09<24:20:13,  9.40s/it]

Total reward after episode 679 is 624.0


  7%|▋         | 680/10000 [3:10:19<24:55:43,  9.63s/it]

Total reward after episode 680 is 611.0


  7%|▋         | 681/10000 [3:10:46<38:21:07, 14.82s/it]

Total reward after episode 681 is 1041.0


  7%|▋         | 682/10000 [3:11:08<44:01:44, 17.01s/it]

Total reward after episode 682 is 1048.0


  7%|▋         | 683/10000 [3:11:13<34:41:28, 13.40s/it]

Total reward after episode 683 is 251.0


  7%|▋         | 684/10000 [3:11:23<31:57:13, 12.35s/it]

Total reward after episode 684 is 606.0


  7%|▋         | 685/10000 [3:11:46<40:26:33, 15.63s/it]

Total reward after episode 685 is 1192.0


  7%|▋         | 686/10000 [3:11:56<35:57:55, 13.90s/it]

Total reward after episode 686 is 611.0


  7%|▋         | 687/10000 [3:12:06<32:49:17, 12.69s/it]

Total reward after episode 687 is 606.0


  7%|▋         | 688/10000 [3:12:10<26:36:24, 10.29s/it]

Total reward after episode 688 is 250.0


  7%|▋         | 689/10000 [3:12:20<26:15:06, 10.15s/it]

Total reward after episode 689 is 606.0


  7%|▋         | 690/10000 [3:12:39<33:14:14, 12.85s/it]

Total reward after episode 690 is 1052.0


  7%|▋         | 691/10000 [3:12:49<30:53:18, 11.95s/it]

Total reward after episode 691 is 608.0


  7%|▋         | 692/10000 [3:12:59<29:17:29, 11.33s/it]

Total reward after episode 692 is 608.0


  7%|▋         | 693/10000 [3:13:09<28:18:47, 10.95s/it]

Total reward after episode 693 is 610.0


  7%|▋         | 694/10000 [3:13:29<35:31:30, 13.74s/it]

Total reward after episode 694 is 1050.0


  7%|▋         | 695/10000 [3:13:41<33:59:14, 13.15s/it]

Total reward after episode 695 is 654.0


  7%|▋         | 696/10000 [3:13:47<28:17:05, 10.94s/it]

Total reward after episode 696 is 234.0


  7%|▋         | 697/10000 [3:13:57<27:26:49, 10.62s/it]

Total reward after episode 697 is 610.0


  7%|▋         | 698/10000 [3:14:10<29:32:45, 11.43s/it]

Total reward after episode 698 is 624.0


  7%|▋         | 699/10000 [3:14:20<28:26:40, 11.01s/it]

Total reward after episode 699 is 611.0


  7%|▋         | 700/10000 [3:15:10<58:20:08, 22.58s/it]

Total reward after episode 700 is 563.0


  7%|▋         | 701/10000 [3:15:20<48:44:22, 18.87s/it]

Total reward after episode 701 is 607.0


  7%|▋         | 702/10000 [3:15:25<38:17:18, 14.82s/it]

Total reward after episode 702 is 250.0


  7%|▋         | 703/10000 [3:15:30<30:26:31, 11.79s/it]

Total reward after episode 703 is 249.0


  7%|▋         | 704/10000 [3:15:44<32:04:00, 12.42s/it]

Total reward after episode 704 is 619.0


  7%|▋         | 705/10000 [3:15:54<30:14:49, 11.71s/it]

Total reward after episode 705 is 609.0


  7%|▋         | 706/10000 [3:16:23<43:13:21, 16.74s/it]

Total reward after episode 706 is 1041.0


  7%|▋         | 707/10000 [3:16:35<40:05:15, 15.53s/it]

Total reward after episode 707 is 608.0


  7%|▋         | 708/10000 [3:16:46<36:16:56, 14.06s/it]

Total reward after episode 708 is 606.0


  7%|▋         | 709/10000 [3:16:59<35:25:48, 13.73s/it]

Total reward after episode 709 is 641.0


  7%|▋         | 710/10000 [3:17:25<45:15:14, 17.54s/it]

Total reward after episode 710 is 1345.0


  7%|▋         | 711/10000 [3:17:30<35:20:45, 13.70s/it]

Total reward after episode 711 is 252.0


  7%|▋         | 712/10000 [3:17:40<32:25:38, 12.57s/it]

Total reward after episode 712 is 608.0


  7%|▋         | 713/10000 [3:17:45<26:20:46, 10.21s/it]

Total reward after episode 713 is 248.0


  7%|▋         | 714/10000 [3:17:49<22:04:13,  8.56s/it]

Total reward after episode 714 is 248.0


  7%|▋         | 715/10000 [3:18:02<25:02:15,  9.71s/it]

Total reward after episode 715 is 636.0


  7%|▋         | 716/10000 [3:18:07<21:17:25,  8.26s/it]

Total reward after episode 716 is 250.0


  7%|▋         | 717/10000 [3:18:37<38:30:25, 14.93s/it]

Total reward after episode 717 is 1331.0


  7%|▋         | 718/10000 [3:18:42<30:56:07, 12.00s/it]

Total reward after episode 718 is 252.0


  7%|▋         | 719/10000 [3:18:52<29:21:04, 11.39s/it]

Total reward after episode 719 is 607.0


  7%|▋         | 720/10000 [3:18:57<24:11:05,  9.38s/it]

Total reward after episode 720 is 252.0


  7%|▋         | 721/10000 [3:19:07<24:36:08,  9.55s/it]

Total reward after episode 721 is 608.0


  7%|▋         | 722/10000 [3:19:17<24:53:23,  9.66s/it]

Total reward after episode 722 is 608.0


  7%|▋         | 723/10000 [3:19:45<39:27:55, 15.31s/it]

Total reward after episode 723 is 1335.0


  7%|▋         | 724/10000 [3:20:09<46:18:24, 17.97s/it]

Total reward after episode 724 is 816.0


  7%|▋         | 725/10000 [3:20:20<40:45:35, 15.82s/it]

Total reward after episode 725 is 606.0


  7%|▋         | 726/10000 [3:20:35<39:46:50, 15.44s/it]

Total reward after episode 726 is 608.0


  7%|▋         | 727/10000 [3:20:45<35:51:51, 13.92s/it]

Total reward after episode 727 is 607.0


  7%|▋         | 728/10000 [3:20:50<28:46:37, 11.17s/it]

Total reward after episode 728 is 252.0


  7%|▋         | 729/10000 [3:21:00<28:12:11, 10.95s/it]

Total reward after episode 729 is 609.0


  7%|▋         | 730/10000 [3:21:11<28:09:01, 10.93s/it]

Total reward after episode 730 is 607.0


  7%|▋         | 731/10000 [3:21:37<39:54:36, 15.50s/it]

Total reward after episode 731 is 1042.0


  7%|▋         | 732/10000 [3:21:44<33:06:29, 12.86s/it]

Total reward after episode 732 is 236.0


  7%|▋         | 733/10000 [3:21:49<26:50:42, 10.43s/it]

Total reward after episode 733 is 252.0


  7%|▋         | 734/10000 [3:22:19<42:00:33, 16.32s/it]

Total reward after episode 734 is 1696.0


  7%|▋         | 735/10000 [3:22:45<49:19:39, 19.17s/it]

Total reward after episode 735 is 1326.0


  7%|▋         | 736/10000 [3:22:56<42:54:18, 16.67s/it]

Total reward after episode 736 is 604.0


  7%|▋         | 737/10000 [3:23:06<37:51:18, 14.71s/it]

Total reward after episode 737 is 611.0


  7%|▋         | 738/10000 [3:23:10<30:10:13, 11.73s/it]

Total reward after episode 738 is 252.0


  7%|▋         | 739/10000 [3:23:20<28:49:20, 11.20s/it]

Total reward after episode 739 is 606.0


  7%|▋         | 740/10000 [3:23:46<39:38:16, 15.41s/it]

Total reward after episode 740 is 1043.0


  7%|▋         | 741/10000 [3:23:56<35:54:51, 13.96s/it]

Total reward after episode 741 is 610.0


  7%|▋         | 742/10000 [3:24:12<36:53:24, 14.34s/it]

Total reward after episode 742 is 635.0


  7%|▋         | 743/10000 [3:24:22<34:11:41, 13.30s/it]

Total reward after episode 743 is 604.0


  7%|▋         | 744/10000 [3:24:45<41:14:03, 16.04s/it]

Total reward after episode 744 is 1048.0


  7%|▋         | 745/10000 [3:25:04<43:36:23, 16.96s/it]

Total reward after episode 745 is 1052.0


  7%|▋         | 746/10000 [3:25:09<34:34:58, 13.45s/it]

Total reward after episode 746 is 250.0


  7%|▋         | 747/10000 [3:25:19<31:54:19, 12.41s/it]

Total reward after episode 747 is 608.0


  7%|▋         | 748/10000 [3:25:30<30:41:40, 11.94s/it]

Total reward after episode 748 is 604.0


  7%|▋         | 749/10000 [3:25:45<32:48:16, 12.77s/it]

Total reward after episode 749 is 629.0


  8%|▊         | 750/10000 [3:25:55<30:49:03, 11.99s/it]

Total reward after episode 750 is 608.0


  8%|▊         | 751/10000 [3:26:18<39:34:46, 15.41s/it]

Total reward after episode 751 is 1351.0


  8%|▊         | 752/10000 [3:26:37<42:08:05, 16.40s/it]

Total reward after episode 752 is 1052.0


  8%|▊         | 753/10000 [3:26:47<37:16:20, 14.51s/it]

Total reward after episode 753 is 608.0


  8%|▊         | 754/10000 [3:26:59<34:57:12, 13.61s/it]

Total reward after episode 754 is 623.0


  8%|▊         | 755/10000 [3:27:27<46:10:04, 17.98s/it]

Total reward after episode 755 is 1438.0


  8%|▊         | 756/10000 [3:27:40<42:23:39, 16.51s/it]

Total reward after episode 756 is 632.0


  8%|▊         | 757/10000 [3:30:05<141:24:18, 55.08s/it]

Total reward after episode 757 is 908.0


  8%|▊         | 758/10000 [3:30:29<117:09:32, 45.64s/it]

Total reward after episode 758 is 1351.0


  8%|▊         | 759/10000 [3:30:43<93:08:57, 36.29s/it] 

Total reward after episode 759 is 629.0


  8%|▊         | 760/10000 [3:30:54<73:38:29, 28.69s/it]

Total reward after episode 760 is 607.0


  8%|▊         | 761/10000 [3:31:05<59:58:05, 23.37s/it]

Total reward after episode 761 is 604.0


  8%|▊         | 762/10000 [3:31:15<49:44:55, 19.39s/it]

Total reward after episode 762 is 609.0


  8%|▊         | 763/10000 [3:31:43<56:15:21, 21.93s/it]

Total reward after episode 763 is 1586.0


  8%|▊         | 764/10000 [3:31:53<47:31:55, 18.53s/it]

Total reward after episode 764 is 613.0


  8%|▊         | 765/10000 [3:32:05<42:05:36, 16.41s/it]

Total reward after episode 765 is 626.0


  8%|▊         | 766/10000 [3:32:15<37:14:11, 14.52s/it]

Total reward after episode 766 is 607.0


  8%|▊         | 767/10000 [3:32:25<33:50:36, 13.20s/it]

Total reward after episode 767 is 608.0


  8%|▊         | 768/10000 [3:32:47<40:44:24, 15.89s/it]

Total reward after episode 768 is 1158.0


  8%|▊         | 769/10000 [3:32:59<37:46:15, 14.73s/it]

Total reward after episode 769 is 654.0


  8%|▊         | 770/10000 [3:33:10<34:21:11, 13.40s/it]

Total reward after episode 770 is 610.0


  8%|▊         | 771/10000 [3:33:21<32:53:29, 12.83s/it]

Total reward after episode 771 is 608.0


  8%|▊         | 772/10000 [3:33:26<26:42:32, 10.42s/it]

Total reward after episode 772 is 251.0


  8%|▊         | 773/10000 [3:33:36<26:44:58, 10.44s/it]

Total reward after episode 773 is 609.0


  8%|▊         | 774/10000 [3:33:47<26:32:19, 10.36s/it]

Total reward after episode 774 is 610.0


  8%|▊         | 775/10000 [3:33:59<28:22:14, 11.07s/it]

Total reward after episode 775 is 641.0


  8%|▊         | 776/10000 [3:34:09<27:35:35, 10.77s/it]

Total reward after episode 776 is 610.0


  8%|▊         | 777/10000 [3:34:20<27:09:35, 10.60s/it]

Total reward after episode 777 is 610.0


  8%|▊         | 778/10000 [3:34:25<22:57:20,  8.96s/it]

Total reward after episode 778 is 243.0


  8%|▊         | 779/10000 [3:34:53<37:38:28, 14.70s/it]

Total reward after episode 779 is 1428.0


  8%|▊         | 780/10000 [3:35:17<44:58:00, 17.56s/it]

Total reward after episode 780 is 1342.0


  8%|▊         | 781/10000 [3:35:28<39:47:31, 15.54s/it]

Total reward after episode 781 is 606.0


  8%|▊         | 782/10000 [3:35:38<35:37:56, 13.92s/it]

Total reward after episode 782 is 611.0


  8%|▊         | 783/10000 [3:35:48<32:53:25, 12.85s/it]

Total reward after episode 783 is 610.0


  8%|▊         | 784/10000 [3:35:59<30:51:33, 12.05s/it]

Total reward after episode 784 is 608.0


  8%|▊         | 785/10000 [3:36:10<30:41:47, 11.99s/it]

Total reward after episode 785 is 632.0


  8%|▊         | 786/10000 [3:36:20<29:12:49, 11.41s/it]

Total reward after episode 786 is 608.0


  8%|▊         | 787/10000 [3:36:31<28:24:42, 11.10s/it]

Total reward after episode 787 is 611.0


  8%|▊         | 788/10000 [3:36:42<28:21:49, 11.08s/it]

Total reward after episode 788 is 621.0


  8%|▊         | 789/10000 [3:37:11<42:04:44, 16.45s/it]

Total reward after episode 789 is 1434.0


  8%|▊         | 790/10000 [3:37:16<33:07:41, 12.95s/it]

Total reward after episode 790 is 252.0


  8%|▊         | 791/10000 [3:38:32<82:04:16, 32.08s/it]

Total reward after episode 791 is 1273.0


  8%|▊         | 792/10000 [3:38:43<65:32:03, 25.62s/it]

Total reward after episode 792 is 606.0


  8%|▊         | 793/10000 [3:38:48<49:32:56, 19.37s/it]

Total reward after episode 793 is 252.0


  8%|▊         | 794/10000 [3:38:58<42:25:55, 16.59s/it]

Total reward after episode 794 is 606.0


  8%|▊         | 795/10000 [3:39:08<37:28:28, 14.66s/it]

Total reward after episode 795 is 606.0


  8%|▊         | 796/10000 [3:39:14<30:35:15, 11.96s/it]

Total reward after episode 796 is 235.0


  8%|▊         | 797/10000 [3:39:24<29:07:04, 11.39s/it]

Total reward after episode 797 is 606.0


  8%|▊         | 798/10000 [3:39:34<28:24:17, 11.11s/it]

Total reward after episode 798 is 607.0


  8%|▊         | 799/10000 [3:39:44<27:38:41, 10.82s/it]

Total reward after episode 799 is 611.0


  8%|▊         | 800/10000 [3:39:54<27:04:16, 10.59s/it]

Total reward after episode 800 is 606.0


  8%|▊         | 801/10000 [3:40:09<30:20:00, 11.87s/it]

Total reward after episode 801 is 617.0


  8%|▊         | 802/10000 [3:40:19<28:57:42, 11.34s/it]

Total reward after episode 802 is 609.0


  8%|▊         | 803/10000 [3:40:30<28:08:19, 11.01s/it]

Total reward after episode 803 is 611.0


  8%|▊         | 804/10000 [3:40:41<28:23:53, 11.12s/it]

Total reward after episode 804 is 624.0


  8%|▊         | 805/10000 [3:40:51<27:54:34, 10.93s/it]

Total reward after episode 805 is 608.0


  8%|▊         | 806/10000 [3:41:02<27:24:29, 10.73s/it]

Total reward after episode 806 is 611.0


  8%|▊         | 807/10000 [3:41:12<27:04:41, 10.60s/it]

Total reward after episode 807 is 611.0


  8%|▊         | 808/10000 [3:41:36<37:23:42, 14.65s/it]

Total reward after episode 808 is 816.0


  8%|▊         | 809/10000 [3:41:59<44:05:53, 17.27s/it]

Total reward after episode 809 is 1047.0


  8%|▊         | 810/10000 [3:42:11<39:25:03, 15.44s/it]

Total reward after episode 810 is 608.0


  8%|▊         | 811/10000 [3:42:21<35:19:46, 13.84s/it]

Total reward after episode 811 is 606.0


  8%|▊         | 812/10000 [3:42:31<32:51:18, 12.87s/it]

Total reward after episode 812 is 615.0


  8%|▊         | 813/10000 [3:42:41<30:45:39, 12.05s/it]

Total reward after episode 813 is 606.0


  8%|▊         | 814/10000 [3:42:52<29:23:41, 11.52s/it]

Total reward after episode 814 is 610.0


  8%|▊         | 815/10000 [3:43:04<29:40:50, 11.63s/it]

Total reward after episode 815 is 606.0


  8%|▊         | 816/10000 [3:43:14<28:41:22, 11.25s/it]

Total reward after episode 816 is 611.0


  8%|▊         | 817/10000 [3:43:20<24:26:17,  9.58s/it]

Total reward after episode 817 is 238.0


  8%|▊         | 818/10000 [3:43:25<21:23:56,  8.39s/it]

Total reward after episode 818 is 247.0


  8%|▊         | 819/10000 [3:43:36<23:04:31,  9.05s/it]

Total reward after episode 819 is 608.0


  8%|▊         | 820/10000 [3:43:46<23:53:57,  9.37s/it]

Total reward after episode 820 is 608.0


  8%|▊         | 821/10000 [3:43:56<24:44:04,  9.70s/it]

Total reward after episode 821 is 611.0


  8%|▊         | 822/10000 [3:44:03<22:14:18,  8.72s/it]

Total reward after episode 822 is 238.0


  8%|▊         | 823/10000 [3:44:15<24:32:57,  9.63s/it]

Total reward after episode 823 is 622.0


  8%|▊         | 824/10000 [3:44:44<39:26:44, 15.48s/it]

Total reward after episode 824 is 1585.0


  8%|▊         | 825/10000 [3:44:55<35:52:58, 14.08s/it]

Total reward after episode 825 is 605.0


  8%|▊         | 826/10000 [3:45:19<43:24:46, 17.04s/it]

Total reward after episode 826 is 1352.0


  8%|▊         | 827/10000 [3:45:23<34:03:52, 13.37s/it]

Total reward after episode 827 is 252.0


  8%|▊         | 828/10000 [3:45:33<31:34:30, 12.39s/it]

Total reward after episode 828 is 607.0


  8%|▊         | 829/10000 [3:45:44<29:50:12, 11.71s/it]

Total reward after episode 829 is 609.0


  8%|▊         | 830/10000 [3:45:55<29:58:04, 11.76s/it]

Total reward after episode 830 is 629.0


  8%|▊         | 831/10000 [3:46:00<24:38:35,  9.68s/it]

Total reward after episode 831 is 252.0


  8%|▊         | 832/10000 [3:46:05<20:56:22,  8.22s/it]

Total reward after episode 832 is 252.0


  8%|▊         | 833/10000 [3:46:25<29:36:37, 11.63s/it]

Total reward after episode 833 is 748.0


  8%|▊         | 834/10000 [3:46:35<28:29:44, 11.19s/it]

Total reward after episode 834 is 611.0


  8%|▊         | 835/10000 [3:46:45<27:42:32, 10.88s/it]

Total reward after episode 835 is 607.0


  8%|▊         | 836/10000 [3:46:55<27:09:25, 10.67s/it]

Total reward after episode 836 is 611.0


  8%|▊         | 837/10000 [3:47:18<36:15:34, 14.25s/it]

Total reward after episode 837 is 1047.0


  8%|▊         | 838/10000 [3:47:28<33:16:29, 13.07s/it]

Total reward after episode 838 is 610.0


  8%|▊         | 839/10000 [3:47:56<44:28:45, 17.48s/it]

Total reward after episode 839 is 1582.0


  8%|▊         | 840/10000 [3:48:32<58:32:18, 23.01s/it]

Total reward after episode 840 is 721.0


  8%|▊         | 841/10000 [3:48:59<61:51:35, 24.31s/it]

Total reward after episode 841 is 1330.0


  8%|▊         | 842/10000 [3:49:09<51:04:24, 20.08s/it]

Total reward after episode 842 is 606.0


  8%|▊         | 843/10000 [3:50:08<80:15:09, 31.55s/it]

Total reward after episode 843 is 1303.0


  8%|▊         | 844/10000 [3:50:32<74:50:20, 29.43s/it]

Total reward after episode 844 is 1348.0


  8%|▊         | 845/10000 [3:50:44<61:49:11, 24.31s/it]

Total reward after episode 845 is 654.0


  8%|▊         | 846/10000 [3:50:55<51:37:03, 20.30s/it]

Total reward after episode 846 is 606.0


  8%|▊         | 847/10000 [3:51:06<43:54:22, 17.27s/it]

Total reward after episode 847 is 611.0


  8%|▊         | 848/10000 [3:51:30<49:07:32, 19.32s/it]

Total reward after episode 848 is 816.0


  8%|▊         | 849/10000 [3:51:40<42:08:45, 16.58s/it]

Total reward after episode 849 is 610.0


  8%|▊         | 850/10000 [3:51:53<39:46:14, 15.65s/it]

Total reward after episode 850 is 736.0


  9%|▊         | 851/10000 [3:52:04<35:38:56, 14.03s/it]

Total reward after episode 851 is 604.0


  9%|▊         | 852/10000 [3:52:14<32:45:20, 12.89s/it]

Total reward after episode 852 is 610.0


  9%|▊         | 853/10000 [3:52:26<32:10:25, 12.66s/it]

Total reward after episode 853 is 631.0


  9%|▊         | 854/10000 [3:52:36<30:21:40, 11.95s/it]

Total reward after episode 854 is 606.0


  9%|▊         | 855/10000 [3:52:48<30:03:08, 11.83s/it]

Total reward after episode 855 is 623.0


  9%|▊         | 856/10000 [3:53:16<42:28:44, 16.72s/it]

Total reward after episode 856 is 1326.0


  9%|▊         | 857/10000 [3:53:21<33:28:10, 13.18s/it]

Total reward after episode 857 is 252.0


  9%|▊         | 858/10000 [3:53:32<31:34:39, 12.43s/it]

Total reward after episode 858 is 608.0


  9%|▊         | 859/10000 [3:53:52<38:00:50, 14.97s/it]

Total reward after episode 859 is 1050.0


  9%|▊         | 860/10000 [3:54:21<48:30:24, 19.11s/it]

Total reward after episode 860 is 1433.0


  9%|▊         | 861/10000 [3:54:26<37:40:15, 14.84s/it]

Total reward after episode 861 is 252.0


  9%|▊         | 862/10000 [3:54:38<35:19:34, 13.92s/it]

Total reward after episode 862 is 606.0


  9%|▊         | 863/10000 [3:54:44<29:39:57, 11.69s/it]

Total reward after episode 863 is 238.0


  9%|▊         | 864/10000 [3:54:55<28:51:48, 11.37s/it]

Total reward after episode 864 is 611.0


  9%|▊         | 865/10000 [3:55:39<53:34:14, 21.11s/it]

Total reward after episode 865 is 1695.0


  9%|▊         | 866/10000 [3:55:50<45:58:44, 18.12s/it]

Total reward after episode 866 is 616.0


  9%|▊         | 867/10000 [3:56:01<40:15:34, 15.87s/it]

Total reward after episode 867 is 608.0


  9%|▊         | 868/10000 [3:56:22<44:49:46, 17.67s/it]

Total reward after episode 868 is 1048.0


  9%|▊         | 869/10000 [3:56:45<48:34:35, 19.15s/it]

Total reward after episode 869 is 1068.0


  9%|▊         | 870/10000 [3:57:10<52:52:15, 20.85s/it]

Total reward after episode 870 is 1336.0


  9%|▊         | 871/10000 [3:57:21<45:18:59, 17.87s/it]

Total reward after episode 871 is 605.0


  9%|▊         | 872/10000 [4:03:21<305:46:04, 120.59s/it]

Total reward after episode 872 is 2582.0


  9%|▊         | 873/10000 [4:03:26<217:41:51, 85.87s/it] 

Total reward after episode 873 is 252.0


  9%|▊         | 874/10000 [4:03:31<156:03:31, 61.56s/it]

Total reward after episode 874 is 248.0


  9%|▉         | 875/10000 [4:03:41<117:18:56, 46.28s/it]

Total reward after episode 875 is 611.0


  9%|▉         | 876/10000 [4:04:00<96:18:32, 38.00s/it] 

Total reward after episode 876 is 729.0


  9%|▉         | 877/10000 [4:04:13<77:04:37, 30.42s/it]

Total reward after episode 877 is 640.0


  9%|▉         | 878/10000 [4:04:36<71:48:46, 28.34s/it]

Total reward after episode 878 is 817.0


  9%|▉         | 879/10000 [4:04:47<58:03:01, 22.91s/it]

Total reward after episode 879 is 607.0


  9%|▉         | 880/10000 [4:05:09<57:49:08, 22.82s/it]

Total reward after episode 880 is 1047.0


  9%|▉         | 881/10000 [4:05:19<48:15:55, 19.05s/it]

Total reward after episode 881 is 607.0


  9%|▉         | 882/10000 [4:05:31<42:17:22, 16.70s/it]

Total reward after episode 882 is 606.0


  9%|▉         | 883/10000 [4:05:41<37:27:04, 14.79s/it]

Total reward after episode 883 is 606.0


  9%|▉         | 884/10000 [4:06:06<44:58:17, 17.76s/it]

Total reward after episode 884 is 1336.0


  9%|▉         | 885/10000 [4:06:16<39:15:57, 15.51s/it]

Total reward after episode 885 is 609.0


  9%|▉         | 886/10000 [4:06:42<47:08:35, 18.62s/it]

Total reward after episode 886 is 1331.0


  9%|▉         | 887/10000 [4:06:55<43:21:56, 17.13s/it]

Total reward after episode 887 is 624.0


  9%|▉         | 888/10000 [4:07:06<38:35:34, 15.25s/it]

Total reward after episode 888 is 605.0


  9%|▉         | 889/10000 [4:07:17<34:47:03, 13.74s/it]

Total reward after episode 889 is 607.0


  9%|▉         | 890/10000 [4:07:28<32:40:41, 12.91s/it]

Total reward after episode 890 is 614.0


  9%|▉         | 891/10000 [4:07:33<26:41:14, 10.55s/it]

Total reward after episode 891 is 251.0


  9%|▉         | 892/10000 [4:07:54<34:47:32, 13.75s/it]

Total reward after episode 892 is 1048.0


  9%|▉         | 893/10000 [4:08:04<32:17:18, 12.76s/it]

Total reward after episode 893 is 610.0


  9%|▉         | 894/10000 [4:08:14<30:23:03, 12.01s/it]

Total reward after episode 894 is 611.0


  9%|▉         | 895/10000 [4:08:43<43:12:43, 17.09s/it]

Total reward after episode 895 is 811.0


  9%|▉         | 896/10000 [4:08:55<38:42:05, 15.30s/it]

Total reward after episode 896 is 606.0


  9%|▉         | 897/10000 [4:09:05<34:53:02, 13.80s/it]

Total reward after episode 897 is 606.0


  9%|▉         | 898/10000 [4:09:16<32:51:49, 13.00s/it]

Total reward after episode 898 is 617.0


  9%|▉         | 899/10000 [4:09:27<31:18:58, 12.39s/it]

Total reward after episode 899 is 610.0


  9%|▉         | 900/10000 [4:10:02<48:38:27, 19.24s/it]

Total reward after episode 900 is 1319.0


  9%|▉         | 901/10000 [4:10:13<41:57:40, 16.60s/it]

Total reward after episode 901 is 613.0


  9%|▉         | 902/10000 [4:10:41<50:32:26, 20.00s/it]

Total reward after episode 902 is 1040.0


  9%|▉         | 903/10000 [4:11:04<53:30:32, 21.18s/it]

Total reward after episode 903 is 1067.0


  9%|▉         | 904/10000 [4:11:15<45:13:33, 17.90s/it]

Total reward after episode 904 is 607.0


  9%|▉         | 905/10000 [4:11:28<41:28:15, 16.42s/it]

Total reward after episode 905 is 604.0


  9%|▉         | 906/10000 [4:11:38<36:47:17, 14.56s/it]

Total reward after episode 906 is 607.0


  9%|▉         | 907/10000 [4:12:18<55:52:04, 22.12s/it]

Total reward after episode 907 is 1315.0


  9%|▉         | 908/10000 [4:12:29<47:49:59, 18.94s/it]

Total reward after episode 908 is 615.0


  9%|▉         | 909/10000 [4:12:41<42:45:04, 16.93s/it]

Total reward after episode 909 is 632.0


  9%|▉         | 910/10000 [4:12:55<40:16:57, 15.95s/it]

Total reward after episode 910 is 638.0


  9%|▉         | 911/10000 [4:13:00<31:53:39, 12.63s/it]

Total reward after episode 911 is 252.0


  9%|▉         | 912/10000 [4:13:06<26:42:46, 10.58s/it]

Total reward after episode 912 is 236.0


  9%|▉         | 913/10000 [4:13:11<22:55:19,  9.08s/it]

Total reward after episode 913 is 247.0


  9%|▉         | 914/10000 [4:13:22<24:07:18,  9.56s/it]

Total reward after episode 914 is 610.0


  9%|▉         | 915/10000 [4:13:32<24:40:00,  9.77s/it]

Total reward after episode 915 is 607.0


  9%|▉         | 916/10000 [4:13:43<25:11:44,  9.99s/it]

Total reward after episode 916 is 607.0


  9%|▉         | 917/10000 [4:13:53<25:23:29, 10.06s/it]

Total reward after episode 917 is 606.0


  9%|▉         | 918/10000 [4:14:28<44:19:39, 17.57s/it]

Total reward after episode 918 is 1420.0


  9%|▉         | 919/10000 [4:14:34<35:07:38, 13.93s/it]

Total reward after episode 919 is 252.0


  9%|▉         | 920/10000 [4:14:39<28:42:37, 11.38s/it]

Total reward after episode 920 is 251.0


  9%|▉         | 921/10000 [4:15:03<38:32:21, 15.28s/it]

Total reward after episode 921 is 817.0


  9%|▉         | 922/10000 [4:15:39<54:09:37, 21.48s/it]

Total reward after episode 922 is 1033.0


  9%|▉         | 923/10000 [4:15:50<45:40:55, 18.12s/it]

Total reward after episode 923 is 606.0


  9%|▉         | 924/10000 [4:16:01<40:42:43, 16.15s/it]

Total reward after episode 924 is 624.0


  9%|▉         | 925/10000 [4:16:14<38:18:51, 15.20s/it]

Total reward after episode 925 is 641.0


  9%|▉         | 926/10000 [4:16:24<34:36:58, 13.73s/it]

Total reward after episode 926 is 608.0


  9%|▉         | 927/10000 [4:16:35<32:00:20, 12.70s/it]

Total reward after episode 927 is 606.0


  9%|▉         | 928/10000 [4:16:45<30:10:26, 11.97s/it]

Total reward after episode 928 is 610.0


  9%|▉         | 929/10000 [4:16:55<28:55:26, 11.48s/it]

Total reward after episode 929 is 606.0


  9%|▉         | 930/10000 [4:17:06<28:36:00, 11.35s/it]

Total reward after episode 930 is 609.0


  9%|▉         | 931/10000 [4:17:32<39:01:50, 15.49s/it]

Total reward after episode 931 is 1350.0


  9%|▉         | 932/10000 [4:17:43<35:47:47, 14.21s/it]

Total reward after episode 932 is 619.0


  9%|▉         | 933/10000 [4:17:53<32:51:50, 13.05s/it]

Total reward after episode 933 is 606.0


  9%|▉         | 934/10000 [4:18:04<30:54:30, 12.27s/it]

Total reward after episode 934 is 610.0


  9%|▉         | 935/10000 [4:18:26<38:26:48, 15.27s/it]

Total reward after episode 935 is 1048.0


  9%|▉         | 936/10000 [4:18:37<35:06:46, 13.95s/it]

Total reward after episode 936 is 605.0


  9%|▉         | 937/10000 [4:18:49<33:53:07, 13.46s/it]

Total reward after episode 937 is 624.0


  9%|▉         | 938/10000 [4:18:59<31:38:45, 12.57s/it]

Total reward after episode 938 is 608.0


  9%|▉         | 939/10000 [4:19:23<39:43:45, 15.78s/it]

Total reward after episode 939 is 1046.0


  9%|▉         | 940/10000 [4:19:43<43:05:34, 17.12s/it]

Total reward after episode 940 is 1075.0


  9%|▉         | 941/10000 [4:19:48<34:15:45, 13.62s/it]

Total reward after episode 941 is 240.0


  9%|▉         | 942/10000 [4:19:59<31:44:40, 12.62s/it]

Total reward after episode 942 is 610.0


  9%|▉         | 943/10000 [4:20:26<42:29:28, 16.89s/it]

Total reward after episode 943 is 1426.0


  9%|▉         | 944/10000 [4:20:36<37:35:27, 14.94s/it]

Total reward after episode 944 is 606.0


  9%|▉         | 945/10000 [4:20:41<30:26:35, 12.10s/it]

Total reward after episode 945 is 250.0


  9%|▉         | 946/10000 [4:21:05<39:10:15, 15.57s/it]

Total reward after episode 946 is 1047.0


  9%|▉         | 947/10000 [4:21:18<37:00:11, 14.71s/it]

Total reward after episode 947 is 636.0


  9%|▉         | 948/10000 [4:21:28<33:41:53, 13.40s/it]

Total reward after episode 948 is 611.0


  9%|▉         | 949/10000 [4:21:56<44:28:28, 17.69s/it]

Total reward after episode 949 is 1425.0


 10%|▉         | 950/10000 [4:22:06<39:05:46, 15.55s/it]

Total reward after episode 950 is 607.0


 10%|▉         | 951/10000 [4:22:17<35:09:05, 13.98s/it]

Total reward after episode 951 is 606.0


 10%|▉         | 952/10000 [4:22:28<33:03:50, 13.16s/it]

Total reward after episode 952 is 609.0


 10%|▉         | 953/10000 [4:22:39<31:05:39, 12.37s/it]

Total reward after episode 953 is 609.0


 10%|▉         | 954/10000 [4:22:49<29:33:04, 11.76s/it]

Total reward after episode 954 is 608.0


 10%|▉         | 955/10000 [4:23:02<30:49:06, 12.27s/it]

Total reward after episode 955 is 640.0


 10%|▉         | 956/10000 [4:23:13<29:29:31, 11.74s/it]

Total reward after episode 956 is 606.0


 10%|▉         | 957/10000 [4:23:18<24:19:17,  9.68s/it]

Total reward after episode 957 is 252.0


 10%|▉         | 958/10000 [4:23:31<27:14:29, 10.85s/it]

Total reward after episode 958 is 632.0


 10%|▉         | 959/10000 [4:23:36<22:46:10,  9.07s/it]

Total reward after episode 959 is 249.0


 10%|▉         | 960/10000 [4:23:47<23:51:39,  9.50s/it]

Total reward after episode 960 is 610.0


 10%|▉         | 961/10000 [4:23:59<25:50:26, 10.29s/it]

Total reward after episode 961 is 621.0


 10%|▉         | 962/10000 [4:24:12<27:56:14, 11.13s/it]

Total reward after episode 962 is 635.0


 10%|▉         | 963/10000 [4:24:37<38:46:54, 15.45s/it]

Total reward after episode 963 is 1439.0


 10%|▉         | 964/10000 [4:25:02<45:42:31, 18.21s/it]

Total reward after episode 964 is 1328.0


 10%|▉         | 965/10000 [4:25:13<39:51:41, 15.88s/it]

Total reward after episode 965 is 609.0


 10%|▉         | 966/10000 [4:25:18<32:20:12, 12.89s/it]

Total reward after episode 966 is 238.0


 10%|▉         | 967/10000 [4:25:29<30:28:51, 12.15s/it]

Total reward after episode 967 is 606.0


 10%|▉         | 968/10000 [4:26:30<67:05:47, 26.74s/it]

Total reward after episode 968 is 2642.0


 10%|▉         | 969/10000 [4:26:40<55:02:53, 21.94s/it]

Total reward after episode 969 is 611.0


 10%|▉         | 970/10000 [4:26:51<46:21:25, 18.48s/it]

Total reward after episode 970 is 610.0


 10%|▉         | 971/10000 [4:27:26<58:41:48, 23.40s/it]

Total reward after episode 971 is 1676.0


 10%|▉         | 972/10000 [4:27:37<49:34:23, 19.77s/it]

Total reward after episode 972 is 609.0


 10%|▉         | 973/10000 [4:28:37<80:10:26, 31.97s/it]

Total reward after episode 973 is 1902.0


 10%|▉         | 974/10000 [4:28:48<63:57:36, 25.51s/it]

Total reward after episode 974 is 607.0


 10%|▉         | 975/10000 [4:29:06<58:28:31, 23.33s/it]

Total reward after episode 975 is 1050.0


 10%|▉         | 976/10000 [4:29:26<56:14:42, 22.44s/it]

Total reward after episode 976 is 1051.0


 10%|▉         | 977/10000 [4:29:53<59:37:21, 23.79s/it]

Total reward after episode 977 is 1348.0


 10%|▉         | 978/10000 [4:30:07<51:34:29, 20.58s/it]

Total reward after episode 978 is 641.0


 10%|▉         | 979/10000 [4:30:18<44:25:44, 17.73s/it]

Total reward after episode 979 is 609.0


 10%|▉         | 980/10000 [4:30:32<42:10:56, 16.84s/it]

Total reward after episode 980 is 734.0


 10%|▉         | 981/10000 [4:30:43<37:29:09, 14.96s/it]

Total reward after episode 981 is 611.0


 10%|▉         | 982/10000 [4:30:49<30:37:19, 12.22s/it]

Total reward after episode 982 is 238.0


 10%|▉         | 983/10000 [4:31:14<40:25:09, 16.14s/it]

Total reward after episode 983 is 1335.0


 10%|▉         | 984/10000 [4:31:25<36:38:05, 14.63s/it]

Total reward after episode 984 is 607.0


 10%|▉         | 985/10000 [4:32:24<69:29:25, 27.75s/it]

Total reward after episode 985 is 779.0


 10%|▉         | 986/10000 [4:32:34<56:28:14, 22.55s/it]

Total reward after episode 986 is 606.0


 10%|▉         | 987/10000 [4:33:00<59:24:01, 23.73s/it]

Total reward after episode 987 is 1045.0


 10%|▉         | 988/10000 [4:33:06<45:57:02, 18.36s/it]

Total reward after episode 988 is 236.0


 10%|▉         | 989/10000 [4:33:20<42:10:16, 16.85s/it]

Total reward after episode 989 is 635.0


 10%|▉         | 990/10000 [4:33:30<37:17:22, 14.90s/it]

Total reward after episode 990 is 610.0


 10%|▉         | 991/10000 [4:33:40<33:55:10, 13.55s/it]

Total reward after episode 991 is 610.0


 10%|▉         | 992/10000 [4:34:05<42:38:07, 17.04s/it]

Total reward after episode 992 is 1334.0


 10%|▉         | 993/10000 [4:34:36<53:06:05, 21.22s/it]

Total reward after episode 993 is 1315.0


 10%|▉         | 994/10000 [4:34:47<45:15:08, 18.09s/it]

Total reward after episode 994 is 607.0


 10%|▉         | 995/10000 [4:34:58<39:35:44, 15.83s/it]

Total reward after episode 995 is 605.0


 10%|▉         | 996/10000 [4:35:08<35:30:42, 14.20s/it]

Total reward after episode 996 is 611.0


 10%|▉         | 997/10000 [4:35:19<32:47:54, 13.12s/it]

Total reward after episode 997 is 611.0


 10%|▉         | 998/10000 [4:35:31<31:45:53, 12.70s/it]

Total reward after episode 998 is 607.0


 10%|▉         | 999/10000 [4:35:59<43:19:43, 17.33s/it]

Total reward after episode 999 is 1583.0


 10%|█         | 1000/10000 [4:36:11<39:14:19, 15.70s/it]

Total reward after episode 1000 is 627.0


 10%|█         | 1001/10000 [4:36:21<35:39:53, 14.27s/it]

Total reward after episode 1001 is 615.0


 10%|█         | 1002/10000 [4:36:47<44:04:22, 17.63s/it]

Total reward after episode 1002 is 1349.0


 10%|█         | 1003/10000 [4:37:07<45:49:38, 18.34s/it]

Total reward after episode 1003 is 1052.0


 10%|█         | 1004/10000 [4:37:26<46:37:21, 18.66s/it]

Total reward after episode 1004 is 740.0


 10%|█         | 1005/10000 [4:37:50<50:10:11, 20.08s/it]

Total reward after episode 1005 is 602.0


 10%|█         | 1006/10000 [4:38:01<43:44:43, 17.51s/it]

Total reward after episode 1006 is 610.0


 10%|█         | 1007/10000 [4:38:12<38:57:20, 15.59s/it]

Total reward after episode 1007 is 606.0


 10%|█         | 1008/10000 [4:38:24<36:16:56, 14.53s/it]

Total reward after episode 1008 is 609.0


 10%|█         | 1009/10000 [4:39:41<82:28:01, 33.02s/it]

Total reward after episode 1009 is 2333.0


 10%|█         | 1010/10000 [4:40:45<105:42:01, 42.33s/it]

Total reward after episode 1010 is 773.0


 10%|█         | 1011/10000 [4:40:59<84:30:49, 33.85s/it] 

Total reward after episode 1011 is 621.0


 10%|█         | 1012/10000 [4:41:24<78:10:03, 31.31s/it]

Total reward after episode 1012 is 1355.0


 10%|█         | 1013/10000 [4:41:49<73:43:54, 29.54s/it]

Total reward after episode 1013 is 1338.0


 10%|█         | 1014/10000 [4:42:19<73:44:35, 29.54s/it]

Total reward after episode 1014 is 1151.0


 10%|█         | 1015/10000 [4:42:43<69:46:06, 27.95s/it]

Total reward after episode 1015 is 817.0


 10%|█         | 1016/10000 [4:42:48<52:32:16, 21.05s/it]

Total reward after episode 1016 is 252.0


 10%|█         | 1017/10000 [4:43:01<46:31:19, 18.64s/it]

Total reward after episode 1017 is 641.0


 10%|█         | 1018/10000 [4:43:24<49:56:08, 20.01s/it]

Total reward after episode 1018 is 1045.0


 10%|█         | 1019/10000 [4:43:36<43:34:34, 17.47s/it]

Total reward after episode 1019 is 606.0


 10%|█         | 1020/10000 [4:43:47<38:27:47, 15.42s/it]

Total reward after episode 1020 is 609.0


 10%|█         | 1021/10000 [4:44:14<47:12:35, 18.93s/it]

Total reward after episode 1021 is 1042.0


 10%|█         | 1022/10000 [4:44:42<54:08:52, 21.71s/it]

Total reward after episode 1022 is 1587.0


 10%|█         | 1023/10000 [4:44:52<45:41:50, 18.33s/it]

Total reward after episode 1023 is 606.0


 10%|█         | 1024/10000 [4:45:03<39:47:08, 15.96s/it]

Total reward after episode 1024 is 611.0


 10%|█         | 1025/10000 [4:45:15<36:53:43, 14.80s/it]

Total reward after episode 1025 is 627.0


 10%|█         | 1026/10000 [4:45:27<34:35:25, 13.88s/it]

Total reward after episode 1026 is 625.0


 10%|█         | 1027/10000 [4:45:53<43:55:09, 17.62s/it]

Total reward after episode 1027 is 1437.0


 10%|█         | 1028/10000 [4:46:07<41:33:01, 16.67s/it]

Total reward after episode 1028 is 633.0


 10%|█         | 1029/10000 [4:46:29<45:22:43, 18.21s/it]

Total reward after episode 1029 is 1050.0


 10%|█         | 1030/10000 [4:46:40<39:37:39, 15.90s/it]

Total reward after episode 1030 is 607.0


 10%|█         | 1031/10000 [4:46:50<35:34:14, 14.28s/it]

Total reward after episode 1031 is 611.0


 10%|█         | 1032/10000 [4:47:01<32:44:58, 13.15s/it]

Total reward after episode 1032 is 610.0


 10%|█         | 1033/10000 [4:47:14<32:42:17, 13.13s/it]

Total reward after episode 1033 is 639.0


 10%|█         | 1034/10000 [4:47:42<44:03:52, 17.69s/it]

Total reward after episode 1034 is 1435.0


 10%|█         | 1035/10000 [4:47:53<38:55:36, 15.63s/it]

Total reward after episode 1035 is 609.0


 10%|█         | 1036/10000 [4:48:05<36:01:26, 14.47s/it]

Total reward after episode 1036 is 625.0


 10%|█         | 1037/10000 [4:48:15<33:01:13, 13.26s/it]

Total reward after episode 1037 is 608.0


 10%|█         | 1038/10000 [4:48:44<44:16:07, 17.78s/it]

Total reward after episode 1038 is 1349.0


 10%|█         | 1039/10000 [4:48:55<39:29:42, 15.87s/it]

Total reward after episode 1039 is 605.0


 10%|█         | 1040/10000 [4:49:01<32:11:10, 12.93s/it]

Total reward after episode 1040 is 232.0


 10%|█         | 1041/10000 [4:49:07<27:04:01, 10.88s/it]

Total reward after episode 1041 is 237.0


 10%|█         | 1042/10000 [4:49:18<26:46:04, 10.76s/it]

Total reward after episode 1042 is 608.0


 10%|█         | 1043/10000 [4:49:28<26:32:33, 10.67s/it]

Total reward after episode 1043 is 606.0


 10%|█         | 1044/10000 [4:49:39<26:24:25, 10.61s/it]

Total reward after episode 1044 is 610.0


 10%|█         | 1045/10000 [4:50:06<39:17:53, 15.80s/it]

Total reward after episode 1045 is 1041.0


 10%|█         | 1046/10000 [4:50:18<36:13:53, 14.57s/it]

Total reward after episode 1046 is 625.0


 10%|█         | 1047/10000 [4:50:46<46:24:31, 18.66s/it]

Total reward after episode 1047 is 1065.0


 10%|█         | 1048/10000 [4:50:57<40:37:41, 16.34s/it]

Total reward after episode 1048 is 607.0


 10%|█         | 1049/10000 [4:51:22<46:34:04, 18.73s/it]

Total reward after episode 1049 is 1352.0


 10%|█         | 1050/10000 [4:51:32<40:38:38, 16.35s/it]

Total reward after episode 1050 is 608.0


 11%|█         | 1051/10000 [4:51:38<32:27:08, 13.05s/it]

Total reward after episode 1051 is 246.0


 11%|█         | 1052/10000 [4:52:08<45:07:19, 18.15s/it]

Total reward after episode 1052 is 1041.0


 11%|█         | 1053/10000 [4:52:51<63:27:50, 25.54s/it]

Total reward after episode 1053 is 1330.0


 11%|█         | 1054/10000 [4:52:56<48:09:04, 19.38s/it]

Total reward after episode 1054 is 248.0


 11%|█         | 1055/10000 [4:53:08<43:10:52, 17.38s/it]

Total reward after episode 1055 is 637.0


 11%|█         | 1056/10000 [4:53:32<48:06:38, 19.36s/it]

Total reward after episode 1056 is 597.0


 11%|█         | 1057/10000 [4:53:57<51:54:31, 20.90s/it]

Total reward after episode 1057 is 1067.0


 11%|█         | 1058/10000 [4:54:25<57:40:44, 23.22s/it]

Total reward after episode 1058 is 1439.0


 11%|█         | 1059/10000 [4:54:37<48:46:47, 19.64s/it]

Total reward after episode 1059 is 610.0


 11%|█         | 1060/10000 [4:55:02<53:04:43, 21.37s/it]

Total reward after episode 1060 is 1440.0


 11%|█         | 1061/10000 [4:55:07<40:52:41, 16.46s/it]

Total reward after episode 1061 is 248.0


 11%|█         | 1062/10000 [4:55:39<52:19:01, 21.07s/it]

Total reward after episode 1062 is 1697.0


 11%|█         | 1063/10000 [4:56:11<60:33:05, 24.39s/it]

Total reward after episode 1063 is 1317.0


 11%|█         | 1064/10000 [4:56:34<59:08:51, 23.83s/it]

Total reward after episode 1064 is 819.0


 11%|█         | 1065/10000 [4:56:47<51:25:28, 20.72s/it]

Total reward after episode 1065 is 637.0


 11%|█         | 1066/10000 [4:57:13<55:20:42, 22.30s/it]

Total reward after episode 1066 is 1438.0


 11%|█         | 1067/10000 [4:57:37<56:38:20, 22.83s/it]

Total reward after episode 1067 is 1047.0


 11%|█         | 1068/10000 [4:57:50<49:30:31, 19.95s/it]

Total reward after episode 1068 is 639.0


 11%|█         | 1069/10000 [4:58:01<42:28:43, 17.12s/it]

Total reward after episode 1069 is 611.0


 11%|█         | 1070/10000 [4:58:07<34:14:27, 13.80s/it]

Total reward after episode 1070 is 244.0


 11%|█         | 1071/10000 [4:58:37<46:00:11, 18.55s/it]

Total reward after episode 1071 is 1583.0


 11%|█         | 1072/10000 [4:58:47<40:01:35, 16.14s/it]

Total reward after episode 1072 is 609.0


 11%|█         | 1073/10000 [4:59:01<38:33:53, 15.55s/it]

Total reward after episode 1073 is 735.0


 11%|█         | 1074/10000 [4:59:29<47:23:54, 19.12s/it]

Total reward after episode 1074 is 1437.0


 11%|█         | 1075/10000 [4:59:40<41:40:31, 16.81s/it]

Total reward after episode 1075 is 603.0


 11%|█         | 1076/10000 [4:59:53<39:03:08, 15.75s/it]

Total reward after episode 1076 is 634.0


 11%|█         | 1077/10000 [4:59:59<31:44:11, 12.80s/it]

Total reward after episode 1077 is 242.0


 11%|█         | 1078/10000 [5:00:24<40:32:16, 16.36s/it]

Total reward after episode 1078 is 817.0


 11%|█         | 1079/10000 [5:00:37<38:12:53, 15.42s/it]

Total reward after episode 1079 is 606.0


 11%|█         | 1080/10000 [5:00:50<35:56:31, 14.51s/it]

Total reward after episode 1080 is 604.0


 11%|█         | 1081/10000 [5:01:14<43:38:13, 17.61s/it]

Total reward after episode 1081 is 1331.0


 11%|█         | 1082/10000 [5:01:41<50:32:58, 20.41s/it]

Total reward after episode 1082 is 1438.0


 11%|█         | 1083/10000 [5:01:52<43:31:07, 17.57s/it]

Total reward after episode 1083 is 608.0


 11%|█         | 1084/10000 [5:02:23<53:33:00, 21.62s/it]

Total reward after episode 1084 is 1040.0


 11%|█         | 1085/10000 [5:02:38<48:21:07, 19.53s/it]

Total reward after episode 1085 is 625.0


 11%|█         | 1086/10000 [5:02:49<41:40:14, 16.83s/it]

Total reward after episode 1086 is 611.0


 11%|█         | 1087/10000 [5:03:02<38:51:51, 15.70s/it]

Total reward after episode 1087 is 609.0


 11%|█         | 1088/10000 [5:03:14<36:14:34, 14.64s/it]

Total reward after episode 1088 is 611.0


 11%|█         | 1089/10000 [5:03:25<33:43:07, 13.62s/it]

Total reward after episode 1089 is 610.0


 11%|█         | 1090/10000 [5:03:36<31:39:41, 12.79s/it]

Total reward after episode 1090 is 611.0


 11%|█         | 1091/10000 [5:03:48<31:13:08, 12.62s/it]

Total reward after episode 1091 is 606.0


 11%|█         | 1092/10000 [5:03:59<29:39:28, 11.99s/it]

Total reward after episode 1092 is 611.0


 11%|█         | 1093/10000 [5:04:09<28:32:48, 11.54s/it]

Total reward after episode 1093 is 611.0


 11%|█         | 1094/10000 [5:04:20<27:54:20, 11.28s/it]

Total reward after episode 1094 is 610.0


 11%|█         | 1095/10000 [5:04:33<29:38:16, 11.98s/it]

Total reward after episode 1095 is 633.0


 11%|█         | 1096/10000 [5:04:44<28:32:06, 11.54s/it]

Total reward after episode 1096 is 608.0


 11%|█         | 1097/10000 [5:05:11<40:01:17, 16.18s/it]

Total reward after episode 1097 is 1352.0


 11%|█         | 1098/10000 [5:05:24<37:59:21, 15.36s/it]

Total reward after episode 1098 is 635.0


 11%|█         | 1099/10000 [5:05:29<30:18:11, 12.26s/it]

Total reward after episode 1099 is 249.0


 11%|█         | 1100/10000 [5:05:44<31:46:24, 12.85s/it]

Total reward after episode 1100 is 614.0


 11%|█         | 1101/10000 [5:06:05<38:26:41, 15.55s/it]

Total reward after episode 1101 is 1050.0


 11%|█         | 1102/10000 [5:06:32<46:14:04, 18.71s/it]

Total reward after episode 1102 is 639.0


 11%|█         | 1103/10000 [5:06:42<40:16:56, 16.30s/it]

Total reward after episode 1103 is 612.0


 11%|█         | 1104/10000 [5:06:48<32:12:29, 13.03s/it]

Total reward after episode 1104 is 252.0


 11%|█         | 1105/10000 [5:07:13<41:02:14, 16.61s/it]

Total reward after episode 1105 is 1330.0


 11%|█         | 1106/10000 [5:07:26<38:23:52, 15.54s/it]

Total reward after episode 1106 is 637.0


 11%|█         | 1107/10000 [5:07:46<42:10:05, 17.07s/it]

Total reward after episode 1107 is 1051.0


 11%|█         | 1108/10000 [5:08:10<47:25:26, 19.20s/it]

Total reward after episode 1108 is 1325.0


 11%|█         | 1109/10000 [5:08:39<54:11:58, 21.95s/it]

Total reward after episode 1109 is 1351.0


 11%|█         | 1110/10000 [5:09:15<64:59:03, 26.32s/it]

Total reward after episode 1110 is 1912.0


 11%|█         | 1111/10000 [5:09:42<65:08:53, 26.38s/it]

Total reward after episode 1111 is 1348.0


 11%|█         | 1112/10000 [5:10:03<61:26:54, 24.89s/it]

Total reward after episode 1112 is 1051.0


 11%|█         | 1113/10000 [5:10:27<60:28:10, 24.50s/it]

Total reward after episode 1113 is 1046.0


 11%|█         | 1114/10000 [5:10:39<51:20:38, 20.80s/it]

Total reward after episode 1114 is 608.0


 11%|█         | 1115/10000 [5:11:05<54:53:40, 22.24s/it]

Total reward after episode 1115 is 1331.0


 11%|█         | 1116/10000 [5:11:36<61:48:32, 25.05s/it]

Total reward after episode 1116 is 1714.0


 11%|█         | 1117/10000 [5:12:03<63:06:30, 25.58s/it]

Total reward after episode 1117 is 1347.0


 11%|█         | 1118/10000 [5:13:54<126:22:26, 51.22s/it]

Total reward after episode 1118 is 547.0


 11%|█         | 1119/10000 [5:14:05<96:24:44, 39.08s/it] 

Total reward after episode 1119 is 610.0


 11%|█         | 1120/10000 [5:14:17<76:46:06, 31.12s/it]

Total reward after episode 1120 is 654.0


 11%|█         | 1121/10000 [5:14:43<72:50:24, 29.53s/it]

Total reward after episode 1121 is 1349.0


 11%|█         | 1122/10000 [5:14:55<59:51:49, 24.27s/it]

Total reward after episode 1122 is 628.0


 11%|█         | 1123/10000 [5:15:07<50:31:34, 20.49s/it]

Total reward after episode 1123 is 617.0


 11%|█         | 1124/10000 [5:15:54<70:10:31, 28.46s/it]

Total reward after episode 1124 is 1023.0


 11%|█▏        | 1125/10000 [5:16:05<57:03:15, 23.14s/it]

Total reward after episode 1125 is 611.0


 11%|█▏        | 1126/10000 [5:16:27<56:21:23, 22.86s/it]

Total reward after episode 1126 is 1048.0


 11%|█▏        | 1127/10000 [5:16:56<60:40:05, 24.61s/it]

Total reward after episode 1127 is 1338.0


 11%|█▏        | 1128/10000 [5:17:32<69:38:59, 28.26s/it]

Total reward after episode 1128 is 1864.0


 11%|█▏        | 1129/10000 [5:17:43<56:41:51, 23.01s/it]

Total reward after episode 1129 is 609.0


 11%|█▏        | 1130/10000 [5:18:08<58:00:02, 23.54s/it]

Total reward after episode 1130 is 1343.0


 11%|█▏        | 1131/10000 [5:18:36<61:23:16, 24.92s/it]

Total reward after episode 1131 is 1043.0


 11%|█▏        | 1132/10000 [5:19:37<88:22:57, 35.88s/it]

Total reward after episode 1132 is 1661.0


 11%|█▏        | 1133/10000 [5:19:57<76:29:05, 31.05s/it]

Total reward after episode 1133 is 1052.0


 11%|█▏        | 1134/10000 [5:20:09<62:01:33, 25.19s/it]

Total reward after episode 1134 is 606.0


 11%|█▏        | 1135/10000 [5:20:14<47:15:06, 19.19s/it]

Total reward after episode 1135 is 249.0


 11%|█▏        | 1136/10000 [5:20:54<63:00:47, 25.59s/it]

Total reward after episode 1136 is 1030.0


 11%|█▏        | 1137/10000 [5:21:07<53:28:50, 21.72s/it]

Total reward after episode 1137 is 654.0


 11%|█▏        | 1138/10000 [5:21:31<54:52:36, 22.29s/it]

Total reward after episode 1138 is 1048.0


 11%|█▏        | 1139/10000 [5:22:08<66:01:02, 26.82s/it]

Total reward after episode 1139 is 1858.0


 11%|█▏        | 1140/10000 [5:22:30<62:27:56, 25.38s/it]

Total reward after episode 1140 is 1048.0


 11%|█▏        | 1141/10000 [5:22:41<51:31:00, 20.93s/it]

Total reward after episode 1141 is 608.0


 11%|█▏        | 1142/10000 [5:23:05<54:05:28, 21.98s/it]

Total reward after episode 1142 is 1334.0


 11%|█▏        | 1143/10000 [5:23:27<53:40:03, 21.81s/it]

Total reward after episode 1143 is 1049.0


 11%|█▏        | 1144/10000 [5:23:45<50:57:14, 20.71s/it]

Total reward after episode 1144 is 743.0


 11%|█▏        | 1145/10000 [5:24:11<55:11:18, 22.44s/it]

Total reward after episode 1145 is 1341.0


 11%|█▏        | 1146/10000 [5:24:42<61:15:21, 24.91s/it]

Total reward after episode 1146 is 1327.0


 11%|█▏        | 1147/10000 [5:24:53<51:12:32, 20.82s/it]

Total reward after episode 1147 is 609.0


 11%|█▏        | 1148/10000 [5:25:04<43:55:45, 17.87s/it]

Total reward after episode 1148 is 610.0


 11%|█▏        | 1149/10000 [5:25:32<51:17:26, 20.86s/it]

Total reward after episode 1149 is 1344.0


 12%|█▏        | 1150/10000 [5:25:37<39:35:23, 16.10s/it]

Total reward after episode 1150 is 251.0


 12%|█▏        | 1151/10000 [5:26:03<46:49:24, 19.05s/it]

Total reward after episode 1151 is 1343.0


 12%|█▏        | 1152/10000 [5:26:29<51:44:50, 21.05s/it]

Total reward after episode 1152 is 1351.0


 12%|█▏        | 1153/10000 [5:26:52<53:10:33, 21.64s/it]

Total reward after episode 1153 is 1049.0


 12%|█▏        | 1154/10000 [5:27:18<56:24:24, 22.96s/it]

Total reward after episode 1154 is 593.0


 12%|█▏        | 1155/10000 [5:27:51<64:04:02, 26.08s/it]

Total reward after episode 1155 is 1700.0


 12%|█▏        | 1156/10000 [5:28:14<61:58:01, 25.22s/it]

Total reward after episode 1156 is 1348.0


 12%|█▏        | 1157/10000 [5:28:27<52:51:16, 21.52s/it]

Total reward after episode 1157 is 654.0


 12%|█▏        | 1158/10000 [5:29:01<61:55:17, 25.21s/it]

Total reward after episode 1158 is 1415.0


 12%|█▏        | 1159/10000 [5:29:25<60:55:24, 24.81s/it]

Total reward after episode 1159 is 1048.0


 12%|█▏        | 1160/10000 [5:29:38<52:10:10, 21.25s/it]

Total reward after episode 1160 is 654.0


 12%|█▏        | 1161/10000 [5:29:59<52:20:52, 21.32s/it]

Total reward after episode 1161 is 1050.0


 12%|█▏        | 1162/10000 [5:30:36<63:40:11, 25.93s/it]

Total reward after episode 1162 is 1928.0


 12%|█▏        | 1163/10000 [5:30:55<58:53:52, 23.99s/it]

Total reward after episode 1163 is 1052.0


 12%|█▏        | 1164/10000 [5:31:06<49:06:49, 20.01s/it]

Total reward after episode 1164 is 610.0


 12%|█▏        | 1165/10000 [5:31:31<52:28:39, 21.38s/it]

Total reward after episode 1165 is 1353.0


 12%|█▏        | 1166/10000 [5:31:41<44:28:55, 18.13s/it]

Total reward after episode 1166 is 610.0


 12%|█▏        | 1167/10000 [5:31:52<38:54:13, 15.86s/it]

Total reward after episode 1167 is 611.0


 12%|█▏        | 1168/10000 [5:32:03<35:08:55, 14.33s/it]

Total reward after episode 1168 is 607.0


 12%|█▏        | 1169/10000 [5:32:16<34:43:25, 14.16s/it]

Total reward after episode 1169 is 618.0


 12%|█▏        | 1170/10000 [5:32:41<42:35:06, 17.36s/it]

Total reward after episode 1170 is 1333.0


 12%|█▏        | 1171/10000 [5:32:53<38:32:58, 15.72s/it]

Total reward after episode 1171 is 609.0


 12%|█▏        | 1172/10000 [5:33:22<48:23:01, 19.73s/it]

Total reward after episode 1172 is 1583.0


 12%|█▏        | 1173/10000 [5:33:49<53:31:11, 21.83s/it]

Total reward after episode 1173 is 1347.0


 12%|█▏        | 1174/10000 [5:33:54<41:08:30, 16.78s/it]

Total reward after episode 1174 is 250.0


 12%|█▏        | 1175/10000 [5:34:06<38:03:07, 15.52s/it]

Total reward after episode 1175 is 609.0


 12%|█▏        | 1176/10000 [5:34:33<45:50:54, 18.71s/it]

Total reward after episode 1176 is 1347.0


 12%|█▏        | 1177/10000 [5:34:38<35:47:36, 14.60s/it]

Total reward after episode 1177 is 251.0


 12%|█▏        | 1178/10000 [5:34:49<33:38:28, 13.73s/it]

Total reward after episode 1178 is 609.0


 12%|█▏        | 1179/10000 [5:35:15<42:29:24, 17.34s/it]

Total reward after episode 1179 is 1336.0


 12%|█▏        | 1180/10000 [5:35:47<52:57:52, 21.62s/it]

Total reward after episode 1180 is 1326.0


 12%|█▏        | 1181/10000 [5:36:08<52:41:13, 21.51s/it]

Total reward after episode 1181 is 1050.0


 12%|█▏        | 1182/10000 [5:42:19<309:28:31, 126.35s/it]

Total reward after episode 1182 is 1771.0


 12%|█▏        | 1183/10000 [5:44:08<297:06:06, 121.31s/it]

Total reward after episode 1183 is 1251.0


 12%|█▏        | 1184/10000 [5:44:44<234:03:30, 95.58s/it] 

Total reward after episode 1184 is 1035.0


 12%|█▏        | 1185/10000 [5:44:55<171:44:13, 70.14s/it]

Total reward after episode 1185 is 609.0


 12%|█▏        | 1186/10000 [5:45:20<138:40:42, 56.64s/it]

Total reward after episode 1186 is 1332.0


 12%|█▏        | 1187/10000 [5:45:40<112:02:05, 45.76s/it]

Total reward after episode 1187 is 1051.0


 12%|█▏        | 1188/10000 [5:45:52<87:07:39, 35.59s/it] 

Total reward after episode 1188 is 608.0


 12%|█▏        | 1189/10000 [5:46:04<69:26:16, 28.37s/it]

Total reward after episode 1189 is 609.0


 12%|█▏        | 1190/10000 [5:52:16<321:51:34, 131.52s/it]

Total reward after episode 1190 is 443.0


 12%|█▏        | 1191/10000 [5:52:27<233:05:10, 95.26s/it] 

Total reward after episode 1191 is 608.0


 12%|█▏        | 1192/10000 [5:52:39<172:25:16, 70.47s/it]

Total reward after episode 1192 is 654.0


 12%|█▏        | 1193/10000 [5:52:51<129:10:52, 52.80s/it]

Total reward after episode 1193 is 609.0


 12%|█▏        | 1194/10000 [5:52:56<94:24:38, 38.60s/it] 

Total reward after episode 1194 is 241.0


 12%|█▏        | 1195/10000 [5:53:09<75:14:14, 30.76s/it]

Total reward after episode 1195 is 603.0


 12%|█▏        | 1196/10000 [5:53:35<72:05:36, 29.48s/it]

Total reward after episode 1196 is 1333.0


 12%|█▏        | 1197/10000 [5:53:48<59:54:29, 24.50s/it]

Total reward after episode 1197 is 607.0


 12%|█▏        | 1198/10000 [5:54:06<54:52:51, 22.45s/it]

Total reward after episode 1198 is 605.0


 12%|█▏        | 1199/10000 [5:54:31<57:16:26, 23.43s/it]

Total reward after episode 1199 is 1355.0


 12%|█▏        | 1200/10000 [5:54:42<48:04:11, 19.66s/it]

Total reward after episode 1200 is 609.0


 12%|█▏        | 1201/10000 [5:54:53<41:35:19, 17.02s/it]

Total reward after episode 1201 is 608.0


 12%|█▏        | 1202/10000 [5:55:15<45:25:48, 18.59s/it]

Total reward after episode 1202 is 1050.0


 12%|█▏        | 1203/10000 [5:55:27<40:32:03, 16.59s/it]

Total reward after episode 1203 is 608.0


 12%|█▏        | 1204/10000 [5:56:00<52:07:14, 21.33s/it]

Total reward after episode 1204 is 1325.0


 12%|█▏        | 1205/10000 [5:56:38<64:11:42, 26.28s/it]

Total reward after episode 1205 is 1688.0


 12%|█▏        | 1206/10000 [5:57:02<62:58:01, 25.78s/it]

Total reward after episode 1206 is 1347.0


 12%|█▏        | 1207/10000 [5:57:18<55:33:34, 22.75s/it]

Total reward after episode 1207 is 651.0


 12%|█▏        | 1208/10000 [5:57:32<49:05:02, 20.10s/it]

Total reward after episode 1208 is 652.0


 12%|█▏        | 1209/10000 [5:57:59<54:03:49, 22.14s/it]

Total reward after episode 1209 is 1438.0


 12%|█▏        | 1210/10000 [5:58:04<42:04:02, 17.23s/it]

Total reward after episode 1210 is 245.0


 12%|█▏        | 1211/10000 [5:58:39<54:42:50, 22.41s/it]

Total reward after episode 1211 is 585.0


 12%|█▏        | 1212/10000 [5:58:50<46:36:56, 19.10s/it]

Total reward after episode 1212 is 610.0


 12%|█▏        | 1213/10000 [5:59:24<57:30:45, 23.56s/it]

Total reward after episode 1213 is 1881.0


 12%|█▏        | 1214/10000 [5:59:29<43:56:10, 18.00s/it]

Total reward after episode 1214 is 251.0


 12%|█▏        | 1215/10000 [5:59:42<40:07:57, 16.45s/it]

Total reward after episode 1215 is 635.0


 12%|█▏        | 1216/10000 [6:00:22<57:10:02, 23.43s/it]

Total reward after episode 1216 is 1334.0


 12%|█▏        | 1217/10000 [6:00:48<58:55:11, 24.15s/it]

Total reward after episode 1217 is 1350.0


 12%|█▏        | 1218/10000 [6:01:05<54:11:10, 22.21s/it]

Total reward after episode 1218 is 731.0


 12%|█▏        | 1219/10000 [6:01:17<46:37:27, 19.11s/it]

Total reward after episode 1219 is 610.0


 12%|█▏        | 1220/10000 [6:01:28<40:29:52, 16.61s/it]

Total reward after episode 1220 is 610.0


 12%|█▏        | 1221/10000 [6:01:41<37:35:32, 15.42s/it]

Total reward after episode 1221 is 654.0


 12%|█▏        | 1222/10000 [6:02:11<48:36:03, 19.93s/it]

Total reward after episode 1222 is 745.0


 12%|█▏        | 1223/10000 [6:02:23<42:26:45, 17.41s/it]

Total reward after episode 1223 is 611.0


 12%|█▏        | 1224/10000 [6:02:35<38:52:46, 15.95s/it]

Total reward after episode 1224 is 602.0


 12%|█▏        | 1225/10000 [6:03:00<45:23:47, 18.62s/it]

Total reward after episode 1225 is 1343.0


 12%|█▏        | 1226/10000 [6:03:12<40:10:36, 16.48s/it]

Total reward after episode 1226 is 611.0


 12%|█▏        | 1227/10000 [6:03:29<41:07:53, 16.88s/it]

Total reward after episode 1227 is 609.0


 12%|█▏        | 1228/10000 [6:03:44<39:50:05, 16.35s/it]

Total reward after episode 1228 is 635.0


 12%|█▏        | 1229/10000 [6:04:04<42:00:24, 17.24s/it]

Total reward after episode 1229 is 1052.0


 12%|█▏        | 1230/10000 [6:04:15<37:16:50, 15.30s/it]

Total reward after episode 1230 is 608.0


 12%|█▏        | 1231/10000 [6:04:52<53:28:19, 21.95s/it]

Total reward after episode 1231 is 1948.0


 12%|█▏        | 1232/10000 [6:05:07<48:24:14, 19.87s/it]

Total reward after episode 1232 is 651.0


 12%|█▏        | 1233/10000 [6:05:23<45:33:52, 18.71s/it]

Total reward after episode 1233 is 650.0


 12%|█▏        | 1234/10000 [6:05:35<40:29:23, 16.63s/it]

Total reward after episode 1234 is 610.0


 12%|█▏        | 1235/10000 [6:05:50<39:22:57, 16.18s/it]

Total reward after episode 1235 is 622.0


 12%|█▏        | 1236/10000 [6:06:16<46:16:20, 19.01s/it]

Total reward after episode 1236 is 1352.0


 12%|█▏        | 1237/10000 [6:06:41<51:11:04, 21.03s/it]

Total reward after episode 1237 is 1341.0


 12%|█▏        | 1238/10000 [6:07:06<54:08:32, 22.25s/it]

Total reward after episode 1238 is 1352.0


 12%|█▏        | 1239/10000 [6:07:26<52:29:00, 21.57s/it]

Total reward after episode 1239 is 740.0


 12%|█▏        | 1240/10000 [6:07:47<52:10:09, 21.44s/it]

Total reward after episode 1240 is 1073.0


 12%|█▏        | 1241/10000 [6:07:58<44:17:05, 18.20s/it]

Total reward after episode 1241 is 611.0


 12%|█▏        | 1242/10000 [6:08:10<39:25:53, 16.21s/it]

Total reward after episode 1242 is 617.0


 12%|█▏        | 1243/10000 [6:08:39<49:06:21, 20.19s/it]

Total reward after episode 1243 is 614.0


 12%|█▏        | 1244/10000 [6:08:54<45:28:15, 18.70s/it]

Total reward after episode 1244 is 617.0


 12%|█▏        | 1245/10000 [6:09:05<39:42:52, 16.33s/it]

Total reward after episode 1245 is 611.0


 12%|█▏        | 1246/10000 [6:09:17<36:11:03, 14.88s/it]

Total reward after episode 1246 is 605.0


 12%|█▏        | 1247/10000 [6:09:27<33:04:09, 13.60s/it]

Total reward after episode 1247 is 610.0


 12%|█▏        | 1248/10000 [6:09:38<31:08:44, 12.81s/it]

Total reward after episode 1248 is 607.0


 12%|█▏        | 1249/10000 [6:09:49<29:42:05, 12.22s/it]

Total reward after episode 1249 is 609.0


 12%|█▎        | 1250/10000 [6:10:02<30:21:14, 12.49s/it]

Total reward after episode 1250 is 654.0


 13%|█▎        | 1251/10000 [6:10:28<40:14:32, 16.56s/it]

Total reward after episode 1251 is 1046.0


 13%|█▎        | 1252/10000 [6:10:50<43:49:31, 18.04s/it]

Total reward after episode 1252 is 1049.0


 13%|█▎        | 1253/10000 [6:11:11<46:27:45, 19.12s/it]

Total reward after episode 1253 is 1050.0


 13%|█▎        | 1254/10000 [6:11:43<55:11:51, 22.72s/it]

Total reward after episode 1254 is 1580.0


 13%|█▎        | 1255/10000 [6:17:55<309:54:20, 127.58s/it]

Total reward after episode 1255 is 1771.0


 13%|█▎        | 1256/10000 [6:18:20<235:25:13, 96.93s/it] 

Total reward after episode 1256 is 1331.0


 13%|█▎        | 1257/10000 [6:19:51<230:59:05, 95.11s/it]

Total reward after episode 1257 is 2320.0


 13%|█▎        | 1258/10000 [6:20:42<198:35:23, 81.78s/it]

Total reward after episode 1258 is 2363.0


 13%|█▎        | 1259/10000 [6:21:17<164:59:10, 67.95s/it]

Total reward after episode 1259 is 1854.0


 13%|█▎        | 1260/10000 [6:21:46<136:08:52, 56.08s/it]

Total reward after episode 1260 is 1445.0


 13%|█▎        | 1261/10000 [6:21:59<105:16:06, 43.37s/it]

Total reward after episode 1261 is 608.0


 13%|█▎        | 1262/10000 [6:22:27<93:36:11, 38.56s/it] 

Total reward after episode 1262 is 1428.0


 13%|█▎        | 1263/10000 [6:23:37<116:42:47, 48.09s/it]

Total reward after episode 1263 is 2339.0


 13%|█▎        | 1264/10000 [6:24:09<104:39:57, 43.13s/it]

Total reward after episode 1264 is 1708.0


 13%|█▎        | 1265/10000 [6:24:40<96:11:53, 39.65s/it] 

Total reward after episode 1265 is 1702.0


 13%|█▎        | 1266/10000 [6:25:42<112:13:45, 46.26s/it]

Total reward after episode 1266 is 2347.0


 13%|█▎        | 1267/10000 [6:26:06<96:24:35, 39.74s/it] 

Total reward after episode 1267 is 817.0


 13%|█▎        | 1268/10000 [6:26:18<76:08:13, 31.39s/it]

Total reward after episode 1268 is 621.0


 13%|█▎        | 1269/10000 [6:27:33<107:43:24, 44.42s/it]

Total reward after episode 1269 is 2334.0


 13%|█▎        | 1270/10000 [6:28:03<97:16:48, 40.12s/it] 

Total reward after episode 1270 is 1576.0


 13%|█▎        | 1271/10000 [6:28:45<98:16:08, 40.53s/it]

Total reward after episode 1271 is 1333.0


 13%|█▎        | 1272/10000 [6:28:57<77:46:03, 32.08s/it]

Total reward after episode 1272 is 616.0


 13%|█▎        | 1273/10000 [6:29:23<73:07:51, 30.17s/it]

Total reward after episode 1273 is 1346.0


 13%|█▎        | 1274/10000 [6:29:36<60:31:33, 24.97s/it]

Total reward after episode 1274 is 654.0


 13%|█▎        | 1275/10000 [6:29:50<52:31:30, 21.67s/it]

Total reward after episode 1275 is 620.0
