In [1]:
!pip install nes-py==0.2.6
!pip install gym-super-mario-bros

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting nes-py==0.2.6
  Downloading nes_py-0.2.6.tar.gz (75 kB)
[K     |████████████████████████████████| 75 kB 5.3 MB/s 
Collecting pygame>=1.9.3
  Downloading pygame-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.8 MB)
[K     |████████████████████████████████| 21.8 MB 1.4 MB/s 
Building wheels for collected packages: nes-py
  Building wheel for nes-py (setup.py) ... [?25l[?25hdone
  Created wheel for nes-py: filename=nes_py-0.2.6-cp37-cp37m-linux_x86_64.whl size=168848 sha256=6566151859fb3da53258fe94e66ca4ca8df40c45314aac3d8af480fc57b3d251
  Stored in directory: /root/.cache/pip/wheels/cf/87/a9/d777bc0614683325afc2501fe16a01ae29a9bf6c5650cffbad
Successfully built nes-py
Installing collected packages: pygame, nes-py
Successfully installed nes-py-0.2.6 pygame-2.1.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple

In [1]:
import torch
from tqdm import tqdm
import torch.nn as nn
import random
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
import pickle
from gym_super_mario_bros.actions import RIGHT_ONLY
import gym
import numpy as np
import collections
import cv2
import matplotlib.pyplot as plt
from IPython import display
import time
%matplotlib inline

In [None]:
class MaxAndSkipEnv(gym.Wrapper):
  def __init__(self, env=None, skip=4):
      super(MaxAndSkipEnv, self).__init__(env)
      self._obs_buffer = collections.deque(maxlen=2)
      self._skip = skip

  def step(self, action):
    total_reward = 0.0
    done = None
    for _ in range(self._skip):
      obs, reward, done, info = self.env.step(action)
      self._obs_buffer.append(obs)
      total_reward += reward
      if done:
        break
    max_frame = np.max(np.stack(self._obs_buffer), axis=0)
    return max_frame, total_reward, done, info

  def reset(self):
    self._obs_buffer.clear()
    obs = self.env.reset()
    self._obs_buffer.append(obs)
    return obs

class ProcessFrame84(gym.ObservationWrapper):
    """
    Downsamples image to 84x84
    Greyscales image

    Returns numpy array
    """
    def __init__(self, env=None):
        super(ProcessFrame84, self).__init__(env)
        self.observation_space = gym.spaces.Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)

    def observation(self, obs):
        return ProcessFrame84.process(obs)

    @staticmethod
    def process(frame):
        if frame.size == 240 * 256 * 3:
            img = np.reshape(frame, [240, 256, 3]).astype(np.float32)
        else:
            assert False, "Unknown resolution."
        img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :, 2] * 0.114
        resized_screen = cv2.resize(img, (84, 110), interpolation=cv2.INTER_AREA)
        x_t = resized_screen[18:102, :]
        x_t = np.reshape(x_t, [84, 84, 1])
        return x_t.astype(np.uint8)

class ImageToPyTorch(gym.ObservationWrapper):
    def __init__(self, env):
        super(ImageToPyTorch, self).__init__(env)
        old_shape = self.observation_space.shape
        self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(old_shape[-1], old_shape[0], old_shape[1]),
                                                dtype=np.float32)

    def observation(self, observation):
        return np.moveaxis(observation, 2, 0)

class ScaledFloatFrame(gym.ObservationWrapper):
    """Normalize pixel values in frame --> 0 to 1"""
    def observation(self, obs):
        return np.array(obs).astype(np.float32) / 255.0

class BufferWrapper(gym.ObservationWrapper):
    def __init__(self, env, n_steps, dtype=np.float32):
        super(BufferWrapper, self).__init__(env)
        self.dtype = dtype
        old_space = env.observation_space
        self.observation_space = gym.spaces.Box(old_space.low.repeat(n_steps, axis=0),
                                                old_space.high.repeat(n_steps, axis=0), dtype=dtype)

    def reset(self):
        self.buffer = np.zeros_like(self.observation_space.low, dtype=self.dtype)
        return self.observation(self.env.reset())

    def observation(self, observation):
        self.buffer[:-1] = self.buffer[1:]
        self.buffer[-1] = observation
        return self.buffer

def make_env(env):
    env = MaxAndSkipEnv(env)
    env = ProcessFrame84(env)
    env = ImageToPyTorch(env)
    env = BufferWrapper(env, 4)
    env = ScaledFloatFrame(env)
    return JoypadSpace(env, RIGHT_ONLY)

In [None]:
def show_state(env, ep=0, info=""):
    plt.figure(3)
    plt.clf()
    plt.imshow(env.render(mode='rgb_array'))
    plt.title("Episode: %d %s" % (ep, info))
    plt.axis('off')

    display.display(plt.gcf())
    display.clear_output(wait=True)
    time.sleep(0.1)

In [None]:
env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')
env = make_env(env)  # Wraps the environment so that frames are grayscale 
env.reset()

array([[[0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        ...,
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ]],

       [[0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        ...,
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0. 

In [None]:
class Deep_Q_Learning(nn.Module):

    def __init__(self, input_shape, n_actions):
        super(Deep_Q_Learning, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU()
        )

        conv_out_size = self._get_conv_out(input_shape)
        self.fc = nn.Sequential(
            nn.Linear(conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, n_actions)
        )
    
    def _get_conv_out(self, shape):
        output = self.conv(torch.zeros(1, *shape))
        return int(np.prod(output.size()))

    def forward(self, x):
        conv_out = self.conv(x).view(x.size()[0], -1)
        return self.fc(conv_out)
    

class DQNAgent:

    def __init__(self, state_space, action_space, max_mem, batch_size, gamma, lr,
                 epsilon_greedy, epsilon_min, epsilon_decay, pretrained):

        # Define DQN Layers
        self.state_space = state_space
        self.action_space = action_space
        self.pretrained = pretrained
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
       
        self.local_net = Deep_Q_Learning(state_space, action_space).to(self.device)
        self.target_net = Deep_Q_Learning(state_space, action_space).to(self.device)
            
        if self.pretrained:
            self.local_net.load_state_dict(torch.load("dq1.pt", map_location=torch.device(self.device)))
            self.target_net.load_state_dict(torch.load("dq2.pt", map_location=torch.device(self.device)))
                    
        self.optimizer = torch.optim.Adam(self.local_net.parameters(), lr=lr)
        self.copy = 5000  # Copy the local model weights into the target network every 5000 steps
        self.step = 0

        # Create memory
        self.max_mem = max_mem
        if self.pretrained:
            self.State_Memory = torch.load("State_Memory.pt")
            self.Action_Memory = torch.load("Action_Memory.pt")
            self.Reward_Memory = torch.load("Reward_Memory.pt")
            self.State2_Memory = torch.load("State2_Memory.pt")
            self.Done_Memory = torch.load("Done_Memory.pt")
            with open("ending_position.pkl", 'rb') as f:
                self.ending_position = pickle.load(f)
            with open("num_in_queue.pkl", 'rb') as f:
                self.num_in_queue = pickle.load(f)
        else:
            self.State_Memory = torch.zeros(max_mem, *self.state_space)
            self.Action_Memory = torch.zeros(max_mem, 1)
            self.Reward_Memory = torch.zeros(max_mem, 1)
            self.State2_Memory = torch.zeros(max_mem, *self.state_space)
            self.Done_Memory = torch.zeros(max_mem, 1)
            self.ending_position = 0
            self.num_in_queue = 0
        
        self.mem_size = batch_size
        
        # Learning parameters
        self.gamma = gamma
        self.l1 = nn.SmoothL1Loss().to(self.device) # Also known as Huber loss
        self.epsilon_greedy = epsilon_greedy
        self.exploration_rate = epsilon_greedy
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay

    def remember(self, state, action, reward, state2, done):
        self.State_Memory[self.ending_position] = state.float()
        self.Action_Memory[self.ending_position] = action.float()
        self.Reward_Memory[self.ending_position] = reward.float()
        self.State2_Memory[self.ending_position] = state2.float()
        self.Done_Memory[self.ending_position] = done.float()
        self.ending_position = (self.ending_position + 1) % self.max_mem  # FIFO tensor
        self.num_in_queue = min(self.num_in_queue + 1, self.max_mem)
        
    def recall(self):
        # Randomly sample 'batch size' experiences
        idx = random.choices(range(self.num_in_queue), k=self.mem_size)
        
        STATE = self.State_Memory[idx]
        ACTION = self.Action_Memory[idx]
        REWARD = self.Reward_Memory[idx]
        STATE2 = self.State2_Memory[idx]
        DONE = self.Done_Memory[idx]
        
        return STATE, ACTION, REWARD, STATE2, DONE

    def act(self, state):
        # Epsilon-greedy action
        
        self.step += 1
        if random.random() < self.exploration_rate:  
            return torch.tensor([[random.randrange(self.action_space)]])

            # Local net is used for the policy
        return torch.argmax(self.local_net(state.to(self.device))).unsqueeze(0).unsqueeze(0).cpu()


    def copy_model(self):
        # Copy local net weights into target net
        
        self.target_net.load_state_dict(self.local_net.state_dict())
    
    def experience_replay(self):
        
        if self.step % self.copy == 0:
            self.copy_model()

        if self.mem_size > self.num_in_queue:
            return

        STATE, ACTION, REWARD, STATE2, DONE = self.recall()
        STATE = STATE.to(self.device)
        ACTION = ACTION.to(self.device)
        REWARD = REWARD.to(self.device)
        STATE2 = STATE2.to(self.device)
        DONE = DONE.to(self.device)
        
        self.optimizer.zero_grad()
            # Double Q-Learning target is Q*(S, A) <- r + γ max_a Q_target(S', a)
        target = REWARD + torch.mul((self.gamma * 
                                        self.target_net(STATE2).max(1).values.unsqueeze(1)), 
                                        1 - DONE)

        current = self.local_net(STATE).gather(1, ACTION.long()) # Local net approximation of Q-value
        
        loss = self.l1(current, target)
        loss.backward() # Compute gradients
        self.optimizer.step() # Backpropagate error

        self.exploration_rate *= self.epsilon_decay
        
        # Makes sure that exploration rate is always at least 'exploration min'
        self.exploration_rate = max(self.exploration_rate, self.epsilon_min)

In [None]:
def run(training_mode, pretrained):
   
    env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')
    env = make_env(env)  # Wraps the environment so that frames are grayscale 
    observation_space = env.observation_space.shape
    action_space = env.action_space.n
    agent = DQNAgent(state_space=observation_space,
                     action_space=action_space,
                     max_mem=30000,
                     batch_size=32,
                     gamma=0.90,
                     lr=0.00025,
                     epsilon_greedy=1.0,
                     epsilon_min=0.02,
                     epsilon_decay=0.99,
                     pretrained=pretrained)
    
    num_episodes = 10000
    env.reset()
    total_rewards = []
    
    for ep_num in tqdm(range(num_episodes)):
        state = env.reset()
        state = torch.Tensor(np.array([state]))
        total_reward = 0
        steps = 0
        while True:
            if not training_mode:
                show_state(env, ep_num)
            action = agent.act(state)
            steps += 1
            
            state_next, reward, terminal, info = env.step(int(action[0]))
            total_reward += reward
            state_next = torch.Tensor(np.array([state_next]))
            reward = torch.tensor(np.array([reward])).unsqueeze(0)
            
            terminal = torch.tensor([int(terminal)]).unsqueeze(0)
            
            if training_mode:
                agent.remember(state, action, reward, state_next, terminal)
                agent.experience_replay()
            
            state = state_next
            if terminal:
                break
        
        total_rewards.append(total_reward)

        print("Total reward after episode {} is {}".format(ep_num + 1, total_rewards[-1]))
        num_episodes += 1      
    
    if training_mode:
        with open("ending_position.pkl", "wb") as f:
            pickle.dump(agent.ending_position, f)
        with open("num_in_queue.pkl", "wb") as f:
            pickle.dump(agent.num_in_queue, f)
        with open("total_rewards.pkl", "wb") as f:
            pickle.dump(total_rewards, f)
        
        torch.save(agent.local_net.state_dict(), "dq1.pt")
        torch.save(agent.target_net.state_dict(), "dq2.pt")
        torch.save(agent.State_Memory,  "State_Memory.pt")
        torch.save(agent.Action_Memory, "Action_Memory.pt")
        torch.save(agent.Reward_Memory, "Reward_Memory.pt")
        torch.save(agent.State2_Memory, "State2_Memory.pt")
        torch.save(agent.Done_Memory,   "Done_Memory.pt")
    
    env.close()
    
    if num_episodes > 500:
        plt.title("Episodes trained vs. Average Rewards (per 500 eps)")
        plt.plot([0 for _ in range(500)] + 
                 np.convolve(total_rewards, np.ones((500,))/500, mode="valid").tolist())
        plt.show()

run(training_mode=True, pretrained=False)

  0%|          | 1/10000 [00:04<13:12:59,  4.76s/it]

Total reward after episode 1 is 751.0


  0%|          | 2/10000 [00:05<6:33:14,  2.36s/it] 

Total reward after episode 2 is 250.0


  0%|          | 3/10000 [00:08<7:45:00,  2.79s/it]

Total reward after episode 3 is 578.0


  0%|          | 4/10000 [00:09<5:36:39,  2.02s/it]

Total reward after episode 4 is 235.0


  0%|          | 5/10000 [00:10<4:27:58,  1.61s/it]

Total reward after episode 5 is 231.0


  0%|          | 6/10000 [00:11<3:46:03,  1.36s/it]

Total reward after episode 6 is 231.0


  0%|          | 7/10000 [00:12<3:20:23,  1.20s/it]

Total reward after episode 7 is 231.0


  0%|          | 8/10000 [00:13<3:02:54,  1.10s/it]

Total reward after episode 8 is 231.0


  0%|          | 9/10000 [00:13<2:50:39,  1.02s/it]

Total reward after episode 9 is 231.0


  0%|          | 10/10000 [00:14<2:41:22,  1.03it/s]

Total reward after episode 10 is 233.0


  0%|          | 11/10000 [00:15<2:33:27,  1.08it/s]

Total reward after episode 11 is 236.0


  0%|          | 12/10000 [00:16<2:28:48,  1.12it/s]

Total reward after episode 12 is 233.0


  0%|          | 13/10000 [00:17<2:27:38,  1.13it/s]

Total reward after episode 13 is 230.0


  0%|          | 14/10000 [00:18<2:26:18,  1.14it/s]

Total reward after episode 14 is 233.0


  0%|          | 15/10000 [00:19<2:26:20,  1.14it/s]

Total reward after episode 15 is 231.0


  0%|          | 16/10000 [00:19<2:24:41,  1.15it/s]

Total reward after episode 16 is 233.0


  0%|          | 17/10000 [00:20<2:25:09,  1.15it/s]

Total reward after episode 17 is 231.0


  0%|          | 18/10000 [00:21<2:23:23,  1.16it/s]

Total reward after episode 18 is 233.0


  0%|          | 19/10000 [00:22<2:23:30,  1.16it/s]

Total reward after episode 19 is 231.0


  0%|          | 20/10000 [00:24<3:04:34,  1.11s/it]

Total reward after episode 20 is 231.0


  0%|          | 21/10000 [00:25<3:24:53,  1.23s/it]

Total reward after episode 21 is 231.0


  0%|          | 22/10000 [00:34<9:42:43,  3.50s/it]

Total reward after episode 22 is 593.0


  0%|          | 23/10000 [00:35<7:18:25,  2.64s/it]

Total reward after episode 23 is 251.0


  0%|          | 24/10000 [00:35<5:37:41,  2.03s/it]

Total reward after episode 24 is 249.0


  0%|          | 25/10000 [00:36<4:26:54,  1.61s/it]

Total reward after episode 25 is 251.0


  0%|          | 26/10000 [00:37<3:46:26,  1.36s/it]

Total reward after episode 26 is 248.0


  0%|          | 27/10000 [00:38<3:40:30,  1.33s/it]

Total reward after episode 27 is 248.0


  0%|          | 28/10000 [00:39<3:19:01,  1.20s/it]

Total reward after episode 28 is 238.0


  0%|          | 29/10000 [00:39<2:47:49,  1.01s/it]

Total reward after episode 29 is 248.0


  0%|          | 30/10000 [00:40<2:27:21,  1.13it/s]

Total reward after episode 30 is 248.0


  0%|          | 31/10000 [00:41<2:14:03,  1.24it/s]

Total reward after episode 31 is 251.0


  0%|          | 32/10000 [00:41<2:05:15,  1.33it/s]

Total reward after episode 32 is 251.0


  0%|          | 33/10000 [00:42<1:57:04,  1.42it/s]

Total reward after episode 33 is 252.0


  0%|          | 34/10000 [00:42<1:52:56,  1.47it/s]

Total reward after episode 34 is 251.0


  0%|          | 35/10000 [00:43<1:48:35,  1.53it/s]

Total reward after episode 35 is 248.0


  0%|          | 36/10000 [00:44<1:45:40,  1.57it/s]

Total reward after episode 36 is 248.0


  0%|          | 37/10000 [00:44<1:44:45,  1.59it/s]

Total reward after episode 37 is 251.0


  0%|          | 38/10000 [00:45<1:43:50,  1.60it/s]

Total reward after episode 38 is 251.0


  0%|          | 39/10000 [00:45<1:42:03,  1.63it/s]

Total reward after episode 39 is 252.0


  0%|          | 40/10000 [00:46<1:40:46,  1.65it/s]

Total reward after episode 40 is 248.0


  0%|          | 41/10000 [00:47<1:42:27,  1.62it/s]

Total reward after episode 41 is 251.0


  0%|          | 42/10000 [00:47<1:41:20,  1.64it/s]

Total reward after episode 42 is 248.0


  0%|          | 43/10000 [00:48<1:40:29,  1.65it/s]

Total reward after episode 43 is 252.0


  0%|          | 44/10000 [00:48<1:39:18,  1.67it/s]

Total reward after episode 44 is 248.0


  0%|          | 45/10000 [00:49<1:38:37,  1.68it/s]

Total reward after episode 45 is 248.0


  0%|          | 46/10000 [00:50<1:39:47,  1.66it/s]

Total reward after episode 46 is 251.0


  0%|          | 47/10000 [00:50<1:39:36,  1.67it/s]

Total reward after episode 47 is 249.0


  0%|          | 48/10000 [00:51<1:40:17,  1.65it/s]

Total reward after episode 48 is 251.0


  0%|          | 49/10000 [00:51<1:43:09,  1.61it/s]

Total reward after episode 49 is 250.0


  0%|          | 50/10000 [00:52<1:42:29,  1.62it/s]

Total reward after episode 50 is 251.0


  1%|          | 51/10000 [00:53<1:41:02,  1.64it/s]

Total reward after episode 51 is 252.0


  1%|          | 52/10000 [00:53<1:39:12,  1.67it/s]

Total reward after episode 52 is 248.0


  1%|          | 53/10000 [00:54<1:39:29,  1.67it/s]

Total reward after episode 53 is 251.0


  1%|          | 54/10000 [00:54<1:40:19,  1.65it/s]

Total reward after episode 54 is 251.0


  1%|          | 55/10000 [00:59<4:52:19,  1.76s/it]

Total reward after episode 55 is 802.0


  1%|          | 56/10000 [01:00<3:54:16,  1.41s/it]

Total reward after episode 56 is 251.0


  1%|          | 57/10000 [01:00<3:14:36,  1.17s/it]

Total reward after episode 57 is 251.0


  1%|          | 58/10000 [01:01<2:44:43,  1.01it/s]

Total reward after episode 58 is 252.0


  1%|          | 59/10000 [01:03<3:59:41,  1.45s/it]

Total reward after episode 59 is 629.0


  1%|          | 60/10000 [01:04<3:16:24,  1.19s/it]

Total reward after episode 60 is 248.0


  1%|          | 61/10000 [01:06<3:48:36,  1.38s/it]

Total reward after episode 61 is 638.0


  1%|          | 62/10000 [01:07<3:22:43,  1.22s/it]

Total reward after episode 62 is 252.0


  1%|          | 63/10000 [01:09<4:07:36,  1.50s/it]

Total reward after episode 63 is 632.0


  1%|          | 64/10000 [01:09<3:23:01,  1.23s/it]

Total reward after episode 64 is 252.0


  1%|          | 65/10000 [01:11<3:46:50,  1.37s/it]

Total reward after episode 65 is 629.0


  1%|          | 66/10000 [01:12<3:08:19,  1.14s/it]

Total reward after episode 66 is 252.0


  1%|          | 67/10000 [01:12<2:41:14,  1.03it/s]

Total reward after episode 67 is 252.0


  1%|          | 68/10000 [01:13<2:24:33,  1.15it/s]

Total reward after episode 68 is 251.0


  1%|          | 69/10000 [01:13<2:11:54,  1.25it/s]

Total reward after episode 69 is 252.0


  1%|          | 70/10000 [01:14<2:05:25,  1.32it/s]

Total reward after episode 70 is 247.0


  1%|          | 71/10000 [01:15<1:58:43,  1.39it/s]

Total reward after episode 71 is 252.0


  1%|          | 72/10000 [01:15<1:53:51,  1.45it/s]

Total reward after episode 72 is 252.0


  1%|          | 73/10000 [01:17<2:44:58,  1.00it/s]

Total reward after episode 73 is 630.0


  1%|          | 74/10000 [01:19<3:21:28,  1.22s/it]

Total reward after episode 74 is 637.0


  1%|          | 75/10000 [01:19<2:51:34,  1.04s/it]

Total reward after episode 75 is 252.0


  1%|          | 76/10000 [01:20<2:30:01,  1.10it/s]

Total reward after episode 76 is 252.0


  1%|          | 77/10000 [01:22<3:10:58,  1.15s/it]

Total reward after episode 77 is 637.0


  1%|          | 78/10000 [01:22<2:43:21,  1.01it/s]

Total reward after episode 78 is 248.0


  1%|          | 79/10000 [01:23<2:23:57,  1.15it/s]

Total reward after episode 79 is 252.0


  1%|          | 80/10000 [01:25<3:02:57,  1.11s/it]

Total reward after episode 80 is 632.0


  1%|          | 81/10000 [01:26<3:34:57,  1.30s/it]

Total reward after episode 81 is 639.0


  1%|          | 82/10000 [01:27<3:01:16,  1.10s/it]

Total reward after episode 82 is 252.0


  1%|          | 83/10000 [01:29<3:32:00,  1.28s/it]

Total reward after episode 83 is 635.0


  1%|          | 84/10000 [01:29<2:57:54,  1.08s/it]

Total reward after episode 84 is 248.0


  1%|          | 85/10000 [01:33<5:08:46,  1.87s/it]

Total reward after episode 85 is 739.0


  1%|          | 86/10000 [01:36<6:07:59,  2.23s/it]

Total reward after episode 86 is 640.0


  1%|          | 87/10000 [01:38<6:20:41,  2.30s/it]

Total reward after episode 87 is 630.0


  1%|          | 88/10000 [01:40<5:49:45,  2.12s/it]

Total reward after episode 88 is 628.0


  1%|          | 89/10000 [01:41<4:34:02,  1.66s/it]

Total reward after episode 89 is 252.0


  1%|          | 90/10000 [01:42<4:36:39,  1.67s/it]

Total reward after episode 90 is 633.0


  1%|          | 91/10000 [01:46<5:48:32,  2.11s/it]

Total reward after episode 91 is 620.0


  1%|          | 92/10000 [01:49<6:35:32,  2.40s/it]

Total reward after episode 92 is 765.0


  1%|          | 93/10000 [01:49<5:06:10,  1.85s/it]

Total reward after episode 93 is 248.0


  1%|          | 94/10000 [01:54<7:31:49,  2.74s/it]

Total reward after episode 94 is 739.0


  1%|          | 95/10000 [01:56<6:40:58,  2.43s/it]

Total reward after episode 95 is 640.0


  1%|          | 96/10000 [01:56<5:11:47,  1.89s/it]

Total reward after episode 96 is 251.0


  1%|          | 97/10000 [01:57<4:10:22,  1.52s/it]

Total reward after episode 97 is 250.0


  1%|          | 98/10000 [01:59<4:25:04,  1.61s/it]

Total reward after episode 98 is 629.0


  1%|          | 99/10000 [02:00<3:37:55,  1.32s/it]

Total reward after episode 99 is 252.0


  1%|          | 100/10000 [02:05<6:55:10,  2.52s/it]

Total reward after episode 100 is 795.0


  1%|          | 101/10000 [02:12<10:44:06,  3.90s/it]

Total reward after episode 101 is 778.0


  1%|          | 102/10000 [02:15<9:59:15,  3.63s/it] 

Total reward after episode 102 is 640.0


  1%|          | 103/10000 [02:19<10:20:51,  3.76s/it]

Total reward after episode 103 is 807.0


  1%|          | 104/10000 [02:22<9:40:09,  3.52s/it] 

Total reward after episode 104 is 641.0


  1%|          | 105/10000 [02:23<7:56:51,  2.89s/it]

Total reward after episode 105 is 622.0


  1%|          | 106/10000 [02:25<7:09:47,  2.61s/it]

Total reward after episode 106 is 610.0


  1%|          | 107/10000 [02:27<6:08:28,  2.23s/it]

Total reward after episode 107 is 621.0


  1%|          | 108/10000 [02:28<5:36:44,  2.04s/it]

Total reward after episode 108 is 629.0


  1%|          | 109/10000 [02:30<5:14:33,  1.91s/it]

Total reward after episode 109 is 626.0


  1%|          | 110/10000 [02:32<4:59:36,  1.82s/it]

Total reward after episode 110 is 629.0


  1%|          | 111/10000 [02:34<5:55:55,  2.16s/it]

Total reward after episode 111 is 817.0


  1%|          | 112/10000 [02:36<5:37:26,  2.05s/it]

Total reward after episode 112 is 627.0


  1%|          | 113/10000 [02:37<4:26:16,  1.62s/it]

Total reward after episode 113 is 252.0


  1%|          | 114/10000 [02:39<4:30:17,  1.64s/it]

Total reward after episode 114 is 637.0


  1%|          | 115/10000 [02:41<5:29:18,  2.00s/it]

Total reward after episode 115 is 818.0


  1%|          | 116/10000 [02:43<5:11:02,  1.89s/it]

Total reward after episode 116 is 626.0


  1%|          | 117/10000 [02:44<4:45:17,  1.73s/it]

Total reward after episode 117 is 622.0


  1%|          | 118/10000 [02:46<4:37:31,  1.69s/it]

Total reward after episode 118 is 624.0


  1%|          | 119/10000 [02:49<5:49:07,  2.12s/it]

Total reward after episode 119 is 815.0


  1%|          | 120/10000 [02:53<7:07:31,  2.60s/it]

Total reward after episode 120 is 734.0


  1%|          | 121/10000 [02:56<7:24:12,  2.70s/it]

Total reward after episode 121 is 741.0


  1%|          | 122/10000 [03:39<41:03:20, 14.96s/it]

Total reward after episode 122 is 443.0


  1%|          | 123/10000 [03:40<29:16:14, 10.67s/it]

Total reward after episode 123 is 240.0


  1%|          | 124/10000 [03:41<21:03:36,  7.68s/it]

Total reward after episode 124 is 243.0


  1%|▏         | 125/10000 [03:46<19:12:44,  7.00s/it]

Total reward after episode 125 is 744.0


  1%|▏         | 126/10000 [03:48<14:40:37,  5.35s/it]

Total reward after episode 126 is 630.0


  1%|▏         | 127/10000 [03:50<12:02:30,  4.39s/it]

Total reward after episode 127 is 636.0


  1%|▏         | 128/10000 [03:53<10:55:05,  3.98s/it]

Total reward after episode 128 is 640.0


  1%|▏         | 129/10000 [03:56<10:33:05,  3.85s/it]

Total reward after episode 129 is 811.0


  1%|▏         | 130/10000 [04:02<11:53:10,  4.34s/it]

Total reward after episode 130 is 1130.0


  1%|▏         | 131/10000 [04:03<9:41:09,  3.53s/it] 

Total reward after episode 131 is 626.0


  1%|▏         | 132/10000 [04:04<7:20:30,  2.68s/it]

Total reward after episode 132 is 241.0


  1%|▏         | 133/10000 [04:05<5:45:35,  2.10s/it]

Total reward after episode 133 is 232.0


  1%|▏         | 134/10000 [04:07<5:42:37,  2.08s/it]

Total reward after episode 134 is 604.0


  1%|▏         | 135/10000 [04:09<5:27:44,  1.99s/it]

Total reward after episode 135 is 733.0


  1%|▏         | 136/10000 [04:12<6:29:35,  2.37s/it]

Total reward after episode 136 is 814.0


  1%|▏         | 137/10000 [04:16<7:28:56,  2.73s/it]

Total reward after episode 137 is 811.0


  1%|▏         | 138/10000 [04:16<5:47:44,  2.12s/it]

Total reward after episode 138 is 240.0


  1%|▏         | 139/10000 [04:19<6:29:14,  2.37s/it]

Total reward after episode 139 is 593.0


  1%|▏         | 140/10000 [04:20<5:01:04,  1.83s/it]

Total reward after episode 140 is 248.0


  1%|▏         | 141/10000 [04:21<4:42:33,  1.72s/it]

Total reward after episode 141 is 610.0


  1%|▏         | 142/10000 [04:24<6:00:01,  2.19s/it]

Total reward after episode 142 is 813.0


  1%|▏         | 143/10000 [04:27<6:32:39,  2.39s/it]

Total reward after episode 143 is 723.0


  1%|▏         | 144/10000 [04:29<5:51:10,  2.14s/it]

Total reward after episode 144 is 621.0


  1%|▏         | 145/10000 [04:32<6:20:22,  2.32s/it]

Total reward after episode 145 is 744.0


  1%|▏         | 146/10000 [04:35<7:06:32,  2.60s/it]

Total reward after episode 146 is 814.0


  1%|▏         | 147/10000 [04:37<6:20:34,  2.32s/it]

Total reward after episode 147 is 625.0


  1%|▏         | 148/10000 [04:41<7:41:56,  2.81s/it]

Total reward after episode 148 is 1340.0


  1%|▏         | 149/10000 [04:41<5:56:25,  2.17s/it]

Total reward after episode 149 is 235.0


  2%|▏         | 150/10000 [04:44<6:41:12,  2.44s/it]

Total reward after episode 150 is 815.0


  2%|▏         | 151/10000 [04:50<9:24:27,  3.44s/it]

Total reward after episode 151 is 1566.0


  2%|▏         | 152/10000 [04:51<7:04:04,  2.58s/it]

Total reward after episode 152 is 252.0


  2%|▏         | 153/10000 [04:53<7:11:22,  2.63s/it]

Total reward after episode 153 is 819.0


  2%|▏         | 154/10000 [04:55<6:10:57,  2.26s/it]

Total reward after episode 154 is 626.0


  2%|▏         | 155/10000 [05:38<39:53:45, 14.59s/it]

Total reward after episode 155 is 443.0


  2%|▏         | 156/10000 [05:40<29:27:51, 10.78s/it]

Total reward after episode 156 is 622.0


  2%|▏         | 157/10000 [05:44<24:17:58,  8.89s/it]

Total reward after episode 157 is 1320.0


  2%|▏         | 158/10000 [05:48<19:37:30,  7.18s/it]

Total reward after episode 158 is 815.0


  2%|▏         | 159/10000 [05:48<14:13:17,  5.20s/it]

Total reward after episode 159 is 252.0


  2%|▏         | 160/10000 [05:51<12:29:48,  4.57s/it]

Total reward after episode 160 is 639.0


  2%|▏         | 161/10000 [05:52<9:19:00,  3.41s/it] 

Total reward after episode 161 is 237.0


  2%|▏         | 162/10000 [05:53<7:05:44,  2.60s/it]

Total reward after episode 162 is 236.0


  2%|▏         | 163/10000 [05:56<7:37:51,  2.79s/it]

Total reward after episode 163 is 814.0


  2%|▏         | 164/10000 [06:30<32:58:15, 12.07s/it]

Total reward after episode 164 is 335.0


  2%|▏         | 165/10000 [06:30<23:33:52,  8.63s/it]

Total reward after episode 165 is 251.0


  2%|▏         | 166/10000 [06:31<16:58:45,  6.22s/it]

Total reward after episode 166 is 247.0


  2%|▏         | 167/10000 [06:34<14:30:58,  5.31s/it]

Total reward after episode 167 is 1150.0


  2%|▏         | 168/10000 [06:35<10:38:16,  3.90s/it]

Total reward after episode 168 is 248.0


  2%|▏         | 169/10000 [06:39<11:19:20,  4.15s/it]

Total reward after episode 169 is 1425.0


  2%|▏         | 170/10000 [06:40<8:24:18,  3.08s/it] 

Total reward after episode 170 is 248.0


  2%|▏         | 171/10000 [06:45<9:44:29,  3.57s/it]

Total reward after episode 171 is 1430.0


  2%|▏         | 172/10000 [06:45<7:17:34,  2.67s/it]

Total reward after episode 172 is 252.0


  2%|▏         | 173/10000 [06:46<5:35:03,  2.05s/it]

Total reward after episode 173 is 252.0


  2%|▏         | 174/10000 [06:46<4:23:36,  1.61s/it]

Total reward after episode 174 is 248.0


  2%|▏         | 175/10000 [06:50<6:13:42,  2.28s/it]

Total reward after episode 175 is 1434.0


  2%|▏         | 176/10000 [06:54<7:09:50,  2.63s/it]

Total reward after episode 176 is 1329.0


  2%|▏         | 177/10000 [07:00<10:11:33,  3.74s/it]

Total reward after episode 177 is 737.0


  2%|▏         | 178/10000 [07:04<10:28:23,  3.84s/it]

Total reward after episode 178 is 1325.0


  2%|▏         | 179/10000 [07:05<7:49:13,  2.87s/it] 

Total reward after episode 179 is 252.0


  2%|▏         | 180/10000 [07:06<6:49:04,  2.50s/it]

Total reward after episode 180 is 621.0


  2%|▏         | 181/10000 [07:07<5:15:37,  1.93s/it]

Total reward after episode 181 is 252.0


  2%|▏         | 182/10000 [07:08<4:09:37,  1.53s/it]

Total reward after episode 182 is 248.0


  2%|▏         | 183/10000 [07:08<3:23:21,  1.24s/it]

Total reward after episode 183 is 252.0


  2%|▏         | 184/10000 [07:16<8:57:43,  3.29s/it]

Total reward after episode 184 is 1288.0


  2%|▏         | 185/10000 [07:17<6:45:05,  2.48s/it]

Total reward after episode 185 is 248.0


  2%|▏         | 186/10000 [07:17<5:12:56,  1.91s/it]

Total reward after episode 186 is 252.0


  2%|▏         | 187/10000 [07:22<7:24:09,  2.72s/it]

Total reward after episode 187 is 1031.0


  2%|▏         | 188/10000 [07:25<7:37:45,  2.80s/it]

Total reward after episode 188 is 1045.0


  2%|▏         | 189/10000 [07:49<25:17:30,  9.28s/it]

Total reward after episode 189 is 564.0


  2%|▏         | 190/10000 [07:56<22:53:28,  8.40s/it]

Total reward after episode 190 is 1413.0


  2%|▏         | 191/10000 [07:56<16:33:41,  6.08s/it]

Total reward after episode 191 is 243.0


  2%|▏         | 192/10000 [07:58<12:54:08,  4.74s/it]

Total reward after episode 192 is 632.0


  2%|▏         | 193/10000 [08:03<12:47:21,  4.69s/it]

Total reward after episode 193 is 801.0


  2%|▏         | 194/10000 [08:07<12:18:39,  4.52s/it]

Total reward after episode 194 is 732.0


  2%|▏         | 195/10000 [08:10<11:41:55,  4.30s/it]

Total reward after episode 195 is 1154.0


  2%|▏         | 196/10000 [08:15<11:58:40,  4.40s/it]

Total reward after episode 196 is 1031.0


  2%|▏         | 197/10000 [08:20<12:05:21,  4.44s/it]

Total reward after episode 197 is 1429.0


  2%|▏         | 198/10000 [08:22<10:10:34,  3.74s/it]

Total reward after episode 198 is 603.0


  2%|▏         | 199/10000 [08:25<9:51:09,  3.62s/it] 

Total reward after episode 199 is 1436.0


  2%|▏         | 200/10000 [08:28<9:13:13,  3.39s/it]

Total reward after episode 200 is 617.0


  2%|▏         | 201/10000 [08:29<6:58:29,  2.56s/it]

Total reward after episode 201 is 242.0


  2%|▏         | 202/10000 [08:31<6:34:24,  2.42s/it]

Total reward after episode 202 is 606.0


  2%|▏         | 203/10000 [08:32<5:58:33,  2.20s/it]

Total reward after episode 203 is 629.0


  2%|▏         | 204/10000 [08:33<4:44:52,  1.74s/it]

Total reward after episode 204 is 241.0


  2%|▏         | 205/10000 [08:34<4:20:10,  1.59s/it]

Total reward after episode 205 is 608.0


  2%|▏         | 206/10000 [08:39<7:01:17,  2.58s/it]

Total reward after episode 206 is 723.0


  2%|▏         | 207/10000 [08:41<6:26:31,  2.37s/it]

Total reward after episode 207 is 619.0


  2%|▏         | 208/10000 [08:44<6:31:57,  2.40s/it]

Total reward after episode 208 is 727.0


  2%|▏         | 209/10000 [08:48<8:14:31,  3.03s/it]

Total reward after episode 209 is 1423.0


  2%|▏         | 210/10000 [08:50<7:01:55,  2.59s/it]

Total reward after episode 210 is 653.0


  2%|▏         | 211/10000 [08:58<11:57:34,  4.40s/it]

Total reward after episode 211 is 1387.0


  2%|▏         | 212/10000 [09:02<11:11:31,  4.12s/it]

Total reward after episode 212 is 1437.0


  2%|▏         | 213/10000 [09:05<10:36:18,  3.90s/it]

Total reward after episode 213 is 758.0


  2%|▏         | 214/10000 [09:06<7:57:28,  2.93s/it] 

Total reward after episode 214 is 237.0


  2%|▏         | 215/10000 [09:08<7:03:14,  2.60s/it]

Total reward after episode 215 is 620.0


  2%|▏         | 216/10000 [09:08<5:25:00,  1.99s/it]

Total reward after episode 216 is 248.0


  2%|▏         | 217/10000 [09:09<4:20:41,  1.60s/it]

Total reward after episode 217 is 241.0


  2%|▏         | 218/10000 [09:11<4:36:43,  1.70s/it]

Total reward after episode 218 is 595.0


  2%|▏         | 219/10000 [09:14<5:45:19,  2.12s/it]

Total reward after episode 219 is 1439.0


  2%|▏         | 220/10000 [09:18<7:39:50,  2.82s/it]

Total reward after episode 220 is 803.0


  2%|▏         | 221/10000 [09:20<6:35:35,  2.43s/it]

Total reward after episode 221 is 625.0


  2%|▏         | 222/10000 [09:21<5:37:33,  2.07s/it]

Total reward after episode 222 is 607.0


  2%|▏         | 223/10000 [09:22<5:03:08,  1.86s/it]

Total reward after episode 223 is 621.0


  2%|▏         | 224/10000 [09:24<4:40:33,  1.72s/it]

Total reward after episode 224 is 618.0


  2%|▏         | 225/10000 [09:27<5:55:38,  2.18s/it]

Total reward after episode 225 is 1151.0


  2%|▏         | 226/10000 [09:29<5:34:32,  2.05s/it]

Total reward after episode 226 is 626.0


  2%|▏         | 227/10000 [09:34<8:05:12,  2.98s/it]

Total reward after episode 227 is 1329.0


  2%|▏         | 228/10000 [09:36<6:57:33,  2.56s/it]

Total reward after episode 228 is 631.0


  2%|▏         | 229/10000 [09:40<8:44:41,  3.22s/it]

Total reward after episode 229 is 1423.0


  2%|▏         | 230/10000 [09:46<11:04:11,  4.08s/it]

Total reward after episode 230 is 1416.0


  2%|▏         | 231/10000 [09:47<8:14:12,  3.04s/it] 

Total reward after episode 231 is 252.0


  2%|▏         | 232/10000 [09:49<7:23:25,  2.72s/it]

Total reward after episode 232 is 632.0


  2%|▏         | 233/10000 [09:51<6:40:58,  2.46s/it]

Total reward after episode 233 is 619.0


  2%|▏         | 234/10000 [09:55<8:03:48,  2.97s/it]

Total reward after episode 234 is 1431.0


  2%|▏         | 235/10000 [09:57<7:06:25,  2.62s/it]

Total reward after episode 235 is 612.0


  2%|▏         | 236/10000 [10:00<7:31:35,  2.78s/it]

Total reward after episode 236 is 746.0


  2%|▏         | 237/10000 [10:02<7:03:14,  2.60s/it]

Total reward after episode 237 is 609.0


  2%|▏         | 238/10000 [10:06<8:03:44,  2.97s/it]

Total reward after episode 238 is 808.0


  2%|▏         | 239/10000 [10:08<7:08:16,  2.63s/it]

Total reward after episode 239 is 736.0


  2%|▏         | 240/10000 [10:12<8:45:19,  3.23s/it]

Total reward after episode 240 is 1139.0


  2%|▏         | 241/10000 [10:13<6:36:59,  2.44s/it]

Total reward after episode 241 is 248.0


  2%|▏         | 242/10000 [10:17<7:43:21,  2.85s/it]

Total reward after episode 242 is 1351.0


  2%|▏         | 243/10000 [10:21<8:44:41,  3.23s/it]

Total reward after episode 243 is 1431.0


  2%|▏         | 244/10000 [10:22<6:35:36,  2.43s/it]

Total reward after episode 244 is 252.0


  2%|▏         | 245/10000 [10:26<7:57:40,  2.94s/it]

Total reward after episode 245 is 1433.0


  2%|▏         | 246/10000 [10:29<8:27:34,  3.12s/it]

Total reward after episode 246 is 1437.0


  2%|▏         | 247/10000 [10:31<7:07:54,  2.63s/it]

Total reward after episode 247 is 621.0


  2%|▏         | 248/10000 [10:31<5:33:57,  2.05s/it]

Total reward after episode 248 is 237.0


  2%|▏         | 249/10000 [10:32<4:26:47,  1.64s/it]

Total reward after episode 249 is 239.0


  2%|▎         | 250/10000 [10:36<6:32:01,  2.41s/it]

Total reward after episode 250 is 1428.0


  3%|▎         | 251/10000 [10:40<7:46:23,  2.87s/it]

Total reward after episode 251 is 1438.0


  3%|▎         | 252/10000 [10:41<6:00:14,  2.22s/it]

Total reward after episode 252 is 235.0


  3%|▎         | 253/10000 [10:45<7:12:46,  2.66s/it]

Total reward after episode 253 is 1433.0


  3%|▎         | 254/10000 [10:46<6:26:31,  2.38s/it]

Total reward after episode 254 is 608.0


  3%|▎         | 255/10000 [10:49<7:05:32,  2.62s/it]

Total reward after episode 255 is 1044.0


  3%|▎         | 256/10000 [10:54<8:18:31,  3.07s/it]

Total reward after episode 256 is 1339.0


  3%|▎         | 257/10000 [11:00<11:00:10,  4.07s/it]

Total reward after episode 257 is 1406.0


  3%|▎         | 258/10000 [11:05<11:23:28,  4.21s/it]

Total reward after episode 258 is 802.0


  3%|▎         | 259/10000 [11:06<9:20:39,  3.45s/it] 

Total reward after episode 259 is 652.0


  3%|▎         | 260/10000 [11:08<8:08:52,  3.01s/it]

Total reward after episode 260 is 631.0


  3%|▎         | 261/10000 [11:09<6:11:46,  2.29s/it]

Total reward after episode 261 is 251.0


  3%|▎         | 262/10000 [11:10<5:37:32,  2.08s/it]

Total reward after episode 262 is 636.0


  3%|▎         | 263/10000 [11:12<5:20:44,  1.98s/it]

Total reward after episode 263 is 738.0


  3%|▎         | 264/10000 [11:13<4:13:55,  1.56s/it]

Total reward after episode 264 is 247.0


  3%|▎         | 265/10000 [11:13<3:31:51,  1.31s/it]

Total reward after episode 265 is 235.0


  3%|▎         | 266/10000 [11:14<2:57:56,  1.10s/it]

Total reward after episode 266 is 250.0


  3%|▎         | 267/10000 [11:19<6:11:31,  2.29s/it]

Total reward after episode 267 is 1422.0


  3%|▎         | 268/10000 [11:24<8:07:21,  3.00s/it]

Total reward after episode 268 is 1427.0


  3%|▎         | 269/10000 [11:25<7:03:01,  2.61s/it]

Total reward after episode 269 is 652.0


  3%|▎         | 270/10000 [11:29<7:58:54,  2.95s/it]

Total reward after episode 270 is 734.0


  3%|▎         | 271/10000 [11:32<7:59:06,  2.95s/it]

Total reward after episode 271 is 1046.0


  3%|▎         | 272/10000 [11:33<6:03:54,  2.24s/it]

Total reward after episode 272 is 248.0


  3%|▎         | 273/10000 [11:37<7:33:46,  2.80s/it]

Total reward after episode 273 is 1434.0


  3%|▎         | 274/10000 [11:39<7:13:41,  2.68s/it]

Total reward after episode 274 is 729.0


  3%|▎         | 275/10000 [11:43<8:14:02,  3.05s/it]

Total reward after episode 275 is 1704.0


  3%|▎         | 276/10000 [11:44<6:19:30,  2.34s/it]

Total reward after episode 276 is 237.0


  3%|▎         | 277/10000 [11:45<4:58:03,  1.84s/it]

Total reward after episode 277 is 237.0


  3%|▎         | 278/10000 [11:46<4:34:33,  1.69s/it]

Total reward after episode 278 is 618.0


  3%|▎         | 279/10000 [11:48<4:36:55,  1.71s/it]

Total reward after episode 279 is 735.0


  3%|▎         | 280/10000 [11:49<4:14:17,  1.57s/it]

Total reward after episode 280 is 607.0


  3%|▎         | 281/10000 [11:51<4:27:42,  1.65s/it]

Total reward after episode 281 is 630.0


  3%|▎         | 282/10000 [11:55<6:38:02,  2.46s/it]

Total reward after episode 282 is 1571.0


  3%|▎         | 283/10000 [11:57<5:57:00,  2.20s/it]

Total reward after episode 283 is 653.0


  3%|▎         | 284/10000 [12:00<6:42:19,  2.48s/it]

Total reward after episode 284 is 1065.0


  3%|▎         | 285/10000 [12:03<7:14:01,  2.68s/it]

Total reward after episode 285 is 639.0


  3%|▎         | 286/10000 [12:05<6:25:26,  2.38s/it]

Total reward after episode 286 is 622.0


  3%|▎         | 287/10000 [12:10<8:50:12,  3.28s/it]

Total reward after episode 287 is 1428.0


  3%|▎         | 288/10000 [12:12<7:36:10,  2.82s/it]

Total reward after episode 288 is 652.0


  3%|▎         | 289/10000 [12:12<5:47:38,  2.15s/it]

Total reward after episode 289 is 248.0


  3%|▎         | 290/10000 [12:14<5:16:36,  1.96s/it]

Total reward after episode 290 is 628.0


  3%|▎         | 291/10000 [12:17<6:08:58,  2.28s/it]

Total reward after episode 291 is 625.0


  3%|▎         | 292/10000 [12:20<7:10:34,  2.66s/it]

Total reward after episode 292 is 1333.0


  3%|▎         | 293/10000 [12:22<6:07:23,  2.27s/it]

Total reward after episode 293 is 603.0


  3%|▎         | 294/10000 [12:24<6:02:20,  2.24s/it]

Total reward after episode 294 is 603.0


  3%|▎         | 295/10000 [12:28<7:44:14,  2.87s/it]

Total reward after episode 295 is 1034.0


  3%|▎         | 296/10000 [12:33<9:24:03,  3.49s/it]

Total reward after episode 296 is 1426.0


  3%|▎         | 297/10000 [12:36<9:08:18,  3.39s/it]

Total reward after episode 297 is 619.0


  3%|▎         | 298/10000 [12:39<8:08:07,  3.02s/it]

Total reward after episode 298 is 648.0


  3%|▎         | 299/10000 [12:39<6:09:56,  2.29s/it]

Total reward after episode 299 is 248.0


  3%|▎         | 300/10000 [12:40<4:47:22,  1.78s/it]

Total reward after episode 300 is 247.0


  3%|▎         | 301/10000 [12:44<7:10:17,  2.66s/it]

Total reward after episode 301 is 1428.0


  3%|▎         | 302/10000 [12:45<5:29:54,  2.04s/it]

Total reward after episode 302 is 248.0


  3%|▎         | 303/10000 [12:49<7:10:27,  2.66s/it]

Total reward after episode 303 is 1430.0


  3%|▎         | 304/10000 [12:51<6:33:53,  2.44s/it]

Total reward after episode 304 is 609.0


  3%|▎         | 305/10000 [12:53<5:55:25,  2.20s/it]

Total reward after episode 305 is 602.0


  3%|▎         | 306/10000 [12:53<4:37:05,  1.72s/it]

Total reward after episode 306 is 248.0


  3%|▎         | 307/10000 [12:54<3:42:38,  1.38s/it]

Total reward after episode 307 is 252.0


  3%|▎         | 308/10000 [12:57<5:22:33,  2.00s/it]

Total reward after episode 308 is 1061.0


  3%|▎         | 309/10000 [12:58<4:14:22,  1.57s/it]

Total reward after episode 309 is 252.0


  3%|▎         | 310/10000 [13:00<4:26:22,  1.65s/it]

Total reward after episode 310 is 651.0


  3%|▎         | 311/10000 [13:00<3:35:17,  1.33s/it]

Total reward after episode 311 is 252.0


  3%|▎         | 312/10000 [13:01<2:59:31,  1.11s/it]

Total reward after episode 312 is 252.0


  3%|▎         | 313/10000 [13:03<3:28:00,  1.29s/it]

Total reward after episode 313 is 610.0


  3%|▎         | 314/10000 [13:07<6:04:02,  2.26s/it]

Total reward after episode 314 is 1700.0


  3%|▎         | 315/10000 [13:08<4:43:24,  1.76s/it]

Total reward after episode 315 is 252.0


  3%|▎         | 316/10000 [13:08<3:47:17,  1.41s/it]

Total reward after episode 316 is 252.0


  3%|▎         | 317/10000 [13:11<4:25:05,  1.64s/it]

Total reward after episode 317 is 605.0


  3%|▎         | 318/10000 [13:13<5:04:35,  1.89s/it]

Total reward after episode 318 is 727.0


  3%|▎         | 319/10000 [13:17<6:58:05,  2.59s/it]

Total reward after episode 319 is 1035.0


  3%|▎         | 320/10000 [13:18<5:23:24,  2.00s/it]

Total reward after episode 320 is 243.0


  3%|▎         | 321/10000 [13:22<6:47:14,  2.52s/it]

Total reward after episode 321 is 810.0


  3%|▎         | 322/10000 [13:27<9:20:49,  3.48s/it]

Total reward after episode 322 is 725.0


  3%|▎         | 323/10000 [13:29<8:07:30,  3.02s/it]

Total reward after episode 323 is 606.0


  3%|▎         | 324/10000 [13:30<6:09:39,  2.29s/it]

Total reward after episode 324 is 252.0


  3%|▎         | 325/10000 [13:30<4:47:41,  1.78s/it]

Total reward after episode 325 is 248.0


  3%|▎         | 326/10000 [13:35<7:21:49,  2.74s/it]

Total reward after episode 326 is 582.0


  3%|▎         | 327/10000 [13:37<6:34:35,  2.45s/it]

Total reward after episode 327 is 620.0


  3%|▎         | 328/10000 [13:38<5:36:31,  2.09s/it]

Total reward after episode 328 is 606.0


  3%|▎         | 329/10000 [13:49<12:11:52,  4.54s/it]

Total reward after episode 329 is 2321.0


  3%|▎         | 330/10000 [13:53<12:24:44,  4.62s/it]

Total reward after episode 330 is 1030.0


  3%|▎         | 331/10000 [13:57<11:36:11,  4.32s/it]

Total reward after episode 331 is 1040.0


  3%|▎         | 332/10000 [14:01<10:52:51,  4.05s/it]

Total reward after episode 332 is 1040.0


  3%|▎         | 333/10000 [14:02<8:39:25,  3.22s/it] 

Total reward after episode 333 is 607.0


  3%|▎         | 334/10000 [14:06<9:34:45,  3.57s/it]

Total reward after episode 334 is 1430.0


  3%|▎         | 335/10000 [14:10<9:32:22,  3.55s/it]

Total reward after episode 335 is 1039.0


  3%|▎         | 336/10000 [14:16<11:33:39,  4.31s/it]

Total reward after episode 336 is 1038.0


  3%|▎         | 337/10000 [14:21<12:29:05,  4.65s/it]

Total reward after episode 337 is 794.0


  3%|▎         | 338/10000 [14:26<12:30:48,  4.66s/it]

Total reward after episode 338 is 1575.0


  3%|▎         | 339/10000 [14:29<11:05:47,  4.13s/it]

Total reward after episode 339 is 1048.0


  3%|▎         | 340/10000 [14:29<8:17:48,  3.09s/it] 

Total reward after episode 340 is 242.0


  3%|▎         | 341/10000 [14:33<9:01:32,  3.36s/it]

Total reward after episode 341 is 1430.0


  3%|▎         | 342/10000 [14:36<8:42:10,  3.24s/it]

Total reward after episode 342 is 1046.0


  3%|▎         | 343/10000 [14:39<8:23:33,  3.13s/it]

Total reward after episode 343 is 775.0


  3%|▎         | 344/10000 [14:43<8:46:01,  3.27s/it]

Total reward after episode 344 is 760.0


  3%|▎         | 345/10000 [14:45<7:33:30,  2.82s/it]

Total reward after episode 345 is 651.0


  3%|▎         | 346/10000 [14:47<7:15:46,  2.71s/it]

Total reward after episode 346 is 729.0


  3%|▎         | 347/10000 [14:50<7:20:12,  2.74s/it]

Total reward after episode 347 is 1047.0


  3%|▎         | 348/10000 [14:58<11:53:05,  4.43s/it]

Total reward after episode 348 is 998.0


  3%|▎         | 349/10000 [14:59<8:48:17,  3.28s/it] 

Total reward after episode 349 is 252.0


  4%|▎         | 350/10000 [15:00<7:11:50,  2.69s/it]

Total reward after episode 350 is 607.0


  4%|▎         | 351/10000 [15:04<8:18:58,  3.10s/it]

Total reward after episode 351 is 1036.0


  4%|▎         | 352/10000 [15:05<6:20:17,  2.36s/it]

Total reward after episode 352 is 243.0


  4%|▎         | 353/10000 [15:08<7:10:55,  2.68s/it]

Total reward after episode 353 is 813.0


  4%|▎         | 354/10000 [15:09<5:34:36,  2.08s/it]

Total reward after episode 354 is 243.0


  4%|▎         | 355/10000 [15:12<5:56:28,  2.22s/it]

Total reward after episode 355 is 757.0


  4%|▎         | 356/10000 [15:15<7:10:43,  2.68s/it]

Total reward after episode 356 is 1038.0


  4%|▎         | 357/10000 [15:17<6:13:00,  2.32s/it]

Total reward after episode 357 is 627.0


  4%|▎         | 358/10000 [15:17<4:52:12,  1.82s/it]

Total reward after episode 358 is 243.0


  4%|▎         | 359/10000 [15:18<3:55:19,  1.46s/it]

Total reward after episode 359 is 243.0


  4%|▎         | 360/10000 [15:22<5:50:50,  2.18s/it]

Total reward after episode 360 is 1438.0


  4%|▎         | 361/10000 [15:24<5:19:55,  1.99s/it]

Total reward after episode 361 is 615.0


  4%|▎         | 362/10000 [15:25<4:45:19,  1.78s/it]

Total reward after episode 362 is 610.0


  4%|▎         | 363/10000 [15:26<4:31:32,  1.69s/it]

Total reward after episode 363 is 622.0


  4%|▎         | 364/10000 [15:35<10:04:58,  3.77s/it]

Total reward after episode 364 is 765.0


  4%|▎         | 365/10000 [15:36<7:34:33,  2.83s/it] 

Total reward after episode 365 is 243.0


  4%|▎         | 366/10000 [15:37<6:18:37,  2.36s/it]

Total reward after episode 366 is 608.0


  4%|▎         | 367/10000 [15:42<8:48:23,  3.29s/it]

Total reward after episode 367 is 1041.0


  4%|▎         | 368/10000 [15:43<6:38:35,  2.48s/it]

Total reward after episode 368 is 245.0


  4%|▎         | 369/10000 [15:46<6:51:52,  2.57s/it]

Total reward after episode 369 is 1046.0


  4%|▎         | 370/10000 [15:50<7:59:31,  2.99s/it]

Total reward after episode 370 is 1341.0


  4%|▎         | 371/10000 [15:50<6:04:17,  2.27s/it]

Total reward after episode 371 is 252.0


  4%|▎         | 372/10000 [15:51<4:43:08,  1.76s/it]

Total reward after episode 372 is 248.0


  4%|▎         | 373/10000 [15:52<4:37:50,  1.73s/it]

Total reward after episode 373 is 629.0


  4%|▎         | 374/10000 [15:56<5:54:11,  2.21s/it]

Total reward after episode 374 is 1042.0


  4%|▍         | 375/10000 [15:56<4:36:42,  1.72s/it]

Total reward after episode 375 is 245.0


  4%|▍         | 376/10000 [16:00<6:00:19,  2.25s/it]

Total reward after episode 376 is 1042.0


  4%|▍         | 377/10000 [16:01<5:12:33,  1.95s/it]

Total reward after episode 377 is 608.0


  4%|▍         | 378/10000 [16:04<6:24:53,  2.40s/it]

Total reward after episode 378 is 1039.0


  4%|▍         | 379/10000 [16:10<9:05:01,  3.40s/it]

Total reward after episode 379 is 1027.0


  4%|▍         | 380/10000 [16:12<7:30:57,  2.81s/it]

Total reward after episode 380 is 602.0


  4%|▍         | 381/10000 [16:14<7:11:12,  2.69s/it]

Total reward after episode 381 is 742.0


  4%|▍         | 382/10000 [16:18<8:21:18,  3.13s/it]

Total reward after episode 382 is 1055.0


  4%|▍         | 383/10000 [16:19<6:19:25,  2.37s/it]

Total reward after episode 383 is 248.0


  4%|▍         | 384/10000 [16:21<6:09:40,  2.31s/it]

Total reward after episode 384 is 742.0


  4%|▍         | 385/10000 [16:24<6:58:03,  2.61s/it]

Total reward after episode 385 is 1350.0


  4%|▍         | 386/10000 [16:26<5:54:29,  2.21s/it]

Total reward after episode 386 is 608.0


  4%|▍         | 387/10000 [16:27<5:22:50,  2.02s/it]

Total reward after episode 387 is 640.0


  4%|▍         | 388/10000 [16:31<6:40:21,  2.50s/it]

Total reward after episode 388 is 810.0


  4%|▍         | 389/10000 [16:34<7:27:32,  2.79s/it]

Total reward after episode 389 is 812.0


  4%|▍         | 390/10000 [16:35<5:42:13,  2.14s/it]

Total reward after episode 390 is 252.0


  4%|▍         | 391/10000 [16:40<8:14:40,  3.09s/it]

Total reward after episode 391 is 766.0


  4%|▍         | 392/10000 [16:42<7:23:25,  2.77s/it]

Total reward after episode 392 is 627.0


  4%|▍         | 393/10000 [16:44<6:23:37,  2.40s/it]

Total reward after episode 393 is 637.0


  4%|▍         | 394/10000 [16:45<5:30:36,  2.06s/it]

Total reward after episode 394 is 613.0


  4%|▍         | 395/10000 [16:48<6:30:38,  2.44s/it]

Total reward after episode 395 is 1042.0


  4%|▍         | 396/10000 [16:50<6:03:48,  2.27s/it]

Total reward after episode 396 is 733.0


  4%|▍         | 397/10000 [16:52<5:31:32,  2.07s/it]

Total reward after episode 397 is 637.0


  4%|▍         | 398/10000 [16:55<6:10:05,  2.31s/it]

Total reward after episode 398 is 787.0


  4%|▍         | 399/10000 [16:58<6:51:05,  2.57s/it]

Total reward after episode 399 is 724.0


  4%|▍         | 400/10000 [17:01<7:03:44,  2.65s/it]

Total reward after episode 400 is 790.0


  4%|▍         | 401/10000 [17:02<6:11:34,  2.32s/it]

Total reward after episode 401 is 619.0


  4%|▍         | 402/10000 [17:05<6:34:19,  2.47s/it]

Total reward after episode 402 is 725.0


  4%|▍         | 403/10000 [17:09<7:56:12,  2.98s/it]

Total reward after episode 403 is 1698.0


  4%|▍         | 404/10000 [17:14<9:24:32,  3.53s/it]

Total reward after episode 404 is 591.0


  4%|▍         | 405/10000 [17:17<9:11:59,  3.45s/it]

Total reward after episode 405 is 1043.0


  4%|▍         | 406/10000 [17:21<9:22:32,  3.52s/it]

Total reward after episode 406 is 817.0


  4%|▍         | 407/10000 [17:24<8:41:58,  3.26s/it]

Total reward after episode 407 is 1048.0


  4%|▍         | 408/10000 [17:27<8:30:15,  3.19s/it]

Total reward after episode 408 is 785.0


  4%|▍         | 409/10000 [17:28<7:10:48,  2.70s/it]

Total reward after episode 409 is 637.0


  4%|▍         | 410/10000 [17:31<7:05:57,  2.67s/it]

Total reward after episode 410 is 1049.0


  4%|▍         | 411/10000 [17:34<7:49:52,  2.94s/it]

Total reward after episode 411 is 1060.0


  4%|▍         | 412/10000 [17:36<6:46:42,  2.55s/it]

Total reward after episode 412 is 609.0


  4%|▍         | 413/10000 [17:49<14:47:47,  5.56s/it]

Total reward after episode 413 is 728.0


  4%|▍         | 414/10000 [17:51<11:53:04,  4.46s/it]

Total reward after episode 414 is 632.0


  4%|▍         | 415/10000 [17:52<9:22:37,  3.52s/it] 

Total reward after episode 415 is 614.0


  4%|▍         | 416/10000 [17:57<11:04:27,  4.16s/it]

Total reward after episode 416 is 1022.0


  4%|▍         | 417/10000 [18:01<10:37:52,  3.99s/it]

Total reward after episode 417 is 1038.0


  4%|▍         | 418/10000 [18:05<10:16:51,  3.86s/it]

Total reward after episode 418 is 1345.0


  4%|▍         | 419/10000 [18:06<8:16:50,  3.11s/it] 

Total reward after episode 419 is 606.0


  4%|▍         | 420/10000 [18:07<6:15:52,  2.35s/it]

Total reward after episode 420 is 248.0


  4%|▍         | 421/10000 [18:09<5:58:30,  2.25s/it]

Total reward after episode 421 is 733.0


  4%|▍         | 422/10000 [18:12<6:58:35,  2.62s/it]

Total reward after episode 422 is 601.0


  4%|▍         | 423/10000 [18:15<7:21:39,  2.77s/it]

Total reward after episode 423 is 1045.0


  4%|▍         | 424/10000 [18:17<6:22:01,  2.39s/it]

Total reward after episode 424 is 614.0


  4%|▍         | 425/10000 [18:18<5:45:35,  2.17s/it]

Total reward after episode 425 is 652.0


  4%|▍         | 426/10000 [18:21<6:21:14,  2.39s/it]

Total reward after episode 426 is 641.0


  4%|▍         | 427/10000 [18:23<5:43:27,  2.15s/it]

Total reward after episode 427 is 611.0


  4%|▍         | 428/10000 [18:30<9:35:58,  3.61s/it]

Total reward after episode 428 is 1009.0


  4%|▍         | 429/10000 [18:32<8:14:46,  3.10s/it]

Total reward after episode 429 is 627.0


  4%|▍         | 430/10000 [18:33<6:22:14,  2.40s/it]

Total reward after episode 430 is 231.0


  4%|▍         | 431/10000 [18:33<4:57:01,  1.86s/it]

Total reward after episode 431 is 248.0


  4%|▍         | 432/10000 [18:36<6:05:29,  2.29s/it]

Total reward after episode 432 is 1044.0


  4%|▍         | 433/10000 [18:39<6:41:53,  2.52s/it]

Total reward after episode 433 is 1045.0


  4%|▍         | 434/10000 [18:41<6:01:15,  2.27s/it]

Total reward after episode 434 is 635.0


  4%|▍         | 435/10000 [18:44<6:48:19,  2.56s/it]

Total reward after episode 435 is 1044.0


  4%|▍         | 436/10000 [18:48<7:16:29,  2.74s/it]

Total reward after episode 436 is 1046.0


  4%|▍         | 437/10000 [18:49<6:18:59,  2.38s/it]

Total reward after episode 437 is 600.0


  4%|▍         | 438/10000 [18:50<5:28:48,  2.06s/it]

Total reward after episode 438 is 608.0


  4%|▍         | 439/10000 [18:51<4:18:24,  1.62s/it]

Total reward after episode 439 is 252.0


  4%|▍         | 440/10000 [18:52<4:04:28,  1.53s/it]

Total reward after episode 440 is 605.0


  4%|▍         | 441/10000 [18:53<3:20:20,  1.26s/it]

Total reward after episode 441 is 248.0


  4%|▍         | 442/10000 [18:56<4:56:07,  1.86s/it]

Total reward after episode 442 is 1041.0


  4%|▍         | 443/10000 [18:59<5:38:35,  2.13s/it]

Total reward after episode 443 is 1048.0


  4%|▍         | 444/10000 [19:00<5:09:48,  1.95s/it]

Total reward after episode 444 is 636.0


  4%|▍         | 445/10000 [19:02<4:49:35,  1.82s/it]

Total reward after episode 445 is 639.0


  4%|▍         | 446/10000 [19:06<6:23:02,  2.41s/it]

Total reward after episode 446 is 1341.0


  4%|▍         | 447/10000 [19:09<7:13:12,  2.72s/it]

Total reward after episode 447 is 1041.0


  4%|▍         | 448/10000 [19:11<6:07:20,  2.31s/it]

Total reward after episode 448 is 608.0


  4%|▍         | 449/10000 [19:13<6:33:32,  2.47s/it]

Total reward after episode 449 is 1049.0


  4%|▍         | 450/10000 [19:14<5:04:19,  1.91s/it]

Total reward after episode 450 is 249.0


  5%|▍         | 451/10000 [19:15<4:03:21,  1.53s/it]

Total reward after episode 451 is 252.0


  5%|▍         | 452/10000 [19:15<3:22:45,  1.27s/it]

Total reward after episode 452 is 238.0


  5%|▍         | 453/10000 [19:17<3:48:31,  1.44s/it]

Total reward after episode 453 is 633.0


  5%|▍         | 454/10000 [19:25<8:32:12,  3.22s/it]

Total reward after episode 454 is 1667.0


  5%|▍         | 455/10000 [19:28<8:38:36,  3.26s/it]

Total reward after episode 455 is 1063.0


  5%|▍         | 456/10000 [19:29<6:33:43,  2.48s/it]

Total reward after episode 456 is 242.0


  5%|▍         | 457/10000 [19:32<6:59:19,  2.64s/it]

Total reward after episode 457 is 1046.0


  5%|▍         | 458/10000 [19:33<5:53:30,  2.22s/it]

Total reward after episode 458 is 610.0


  5%|▍         | 459/10000 [19:37<7:42:00,  2.91s/it]

Total reward after episode 459 is 718.0


  5%|▍         | 460/10000 [19:39<6:42:32,  2.53s/it]

Total reward after episode 460 is 607.0


  5%|▍         | 461/10000 [19:57<18:40:39,  7.05s/it]

Total reward after episode 461 is 682.0


  5%|▍         | 462/10000 [19:58<14:04:55,  5.32s/it]

Total reward after episode 462 is 610.0


  5%|▍         | 463/10000 [20:02<13:12:51,  4.99s/it]

Total reward after episode 463 is 1033.0


  5%|▍         | 464/10000 [20:04<10:30:22,  3.97s/it]

Total reward after episode 464 is 605.0


  5%|▍         | 465/10000 [20:09<11:48:15,  4.46s/it]

Total reward after episode 465 is 1018.0


  5%|▍         | 466/10000 [20:12<10:35:56,  4.00s/it]

Total reward after episode 466 is 1041.0


  5%|▍         | 467/10000 [20:14<8:40:50,  3.28s/it] 

Total reward after episode 467 is 607.0


  5%|▍         | 468/10000 [20:15<7:05:24,  2.68s/it]

Total reward after episode 468 is 610.0


  5%|▍         | 469/10000 [20:16<5:25:54,  2.05s/it]

Total reward after episode 469 is 252.0


  5%|▍         | 470/10000 [20:20<7:31:04,  2.84s/it]

Total reward after episode 470 is 1572.0


  5%|▍         | 471/10000 [20:22<6:15:17,  2.36s/it]

Total reward after episode 471 is 608.0


  5%|▍         | 472/10000 [20:22<4:55:11,  1.86s/it]

Total reward after episode 472 is 241.0


  5%|▍         | 473/10000 [20:25<5:50:45,  2.21s/it]

Total reward after episode 473 is 816.0


  5%|▍         | 474/10000 [20:28<5:52:24,  2.22s/it]

Total reward after episode 474 is 604.0


  5%|▍         | 475/10000 [20:29<5:33:56,  2.10s/it]

Total reward after episode 475 is 638.0


  5%|▍         | 476/10000 [20:30<4:24:23,  1.67s/it]

Total reward after episode 476 is 241.0


  5%|▍         | 477/10000 [20:34<6:14:43,  2.36s/it]

Total reward after episode 477 is 1039.0


  5%|▍         | 478/10000 [20:37<6:42:35,  2.54s/it]

Total reward after episode 478 is 1047.0


  5%|▍         | 479/10000 [20:38<5:09:47,  1.95s/it]

Total reward after episode 479 is 248.0


  5%|▍         | 480/10000 [20:38<4:04:58,  1.54s/it]

Total reward after episode 480 is 248.0


  5%|▍         | 481/10000 [20:42<6:18:42,  2.39s/it]

Total reward after episode 481 is 1055.0


  5%|▍         | 482/10000 [20:44<5:38:21,  2.13s/it]

Total reward after episode 482 is 634.0


  5%|▍         | 483/10000 [20:48<6:54:52,  2.62s/it]

Total reward after episode 483 is 1040.0


  5%|▍         | 484/10000 [20:51<7:07:11,  2.69s/it]

Total reward after episode 484 is 1046.0


  5%|▍         | 485/10000 [20:54<7:37:35,  2.89s/it]

Total reward after episode 485 is 813.0


  5%|▍         | 486/10000 [20:55<5:48:29,  2.20s/it]

Total reward after episode 486 is 245.0


  5%|▍         | 487/10000 [20:57<6:11:22,  2.34s/it]

Total reward after episode 487 is 1047.0


  5%|▍         | 488/10000 [20:59<5:21:00,  2.02s/it]

Total reward after episode 488 is 614.0


  5%|▍         | 489/10000 [21:02<6:07:45,  2.32s/it]

Total reward after episode 489 is 1045.0


  5%|▍         | 490/10000 [21:05<7:08:58,  2.71s/it]

Total reward after episode 490 is 1343.0


  5%|▍         | 491/10000 [21:07<6:11:27,  2.34s/it]

Total reward after episode 491 is 610.0


  5%|▍         | 492/10000 [21:10<6:54:09,  2.61s/it]

Total reward after episode 492 is 1042.0


  5%|▍         | 493/10000 [21:11<5:19:56,  2.02s/it]

Total reward after episode 493 is 242.0


  5%|▍         | 494/10000 [21:11<4:15:11,  1.61s/it]

Total reward after episode 494 is 242.0


  5%|▍         | 495/10000 [21:12<3:26:11,  1.30s/it]

Total reward after episode 495 is 247.0


  5%|▍         | 496/10000 [21:14<4:07:40,  1.56s/it]

Total reward after episode 496 is 627.0


  5%|▍         | 497/10000 [21:15<3:20:59,  1.27s/it]

Total reward after episode 497 is 252.0


  5%|▍         | 498/10000 [21:16<3:33:18,  1.35s/it]

Total reward after episode 498 is 632.0


  5%|▍         | 499/10000 [21:17<3:02:35,  1.15s/it]

Total reward after episode 499 is 233.0


  5%|▌         | 500/10000 [21:20<4:46:44,  1.81s/it]

Total reward after episode 500 is 1042.0


  5%|▌         | 501/10000 [21:24<6:45:50,  2.56s/it]

Total reward after episode 501 is 1034.0


  5%|▌         | 502/10000 [21:28<7:24:14,  2.81s/it]

Total reward after episode 502 is 1037.0


  5%|▌         | 503/10000 [21:28<5:39:09,  2.14s/it]

Total reward after episode 503 is 252.0


  5%|▌         | 504/10000 [21:32<7:09:28,  2.71s/it]

Total reward after episode 504 is 1326.0


  5%|▌         | 505/10000 [21:34<6:13:18,  2.36s/it]

Total reward after episode 505 is 635.0


  5%|▌         | 506/10000 [21:38<7:32:21,  2.86s/it]

Total reward after episode 506 is 1339.0


  5%|▌         | 507/10000 [21:39<5:48:08,  2.20s/it]

Total reward after episode 507 is 242.0


  5%|▌         | 508/10000 [21:42<6:21:29,  2.41s/it]

Total reward after episode 508 is 1047.0


  5%|▌         | 509/10000 [21:45<7:15:56,  2.76s/it]

Total reward after episode 509 is 1035.0


  5%|▌         | 510/10000 [21:48<7:15:11,  2.75s/it]

Total reward after episode 510 is 1049.0


  5%|▌         | 511/10000 [21:51<7:51:06,  2.98s/it]

Total reward after episode 511 is 812.0


  5%|▌         | 512/10000 [21:53<6:45:45,  2.57s/it]

Total reward after episode 512 is 608.0


  5%|▌         | 513/10000 [21:56<7:28:43,  2.84s/it]

Total reward after episode 513 is 1043.0


  5%|▌         | 514/10000 [21:57<5:42:48,  2.17s/it]

Total reward after episode 514 is 245.0


  5%|▌         | 515/10000 [22:00<6:02:28,  2.29s/it]

Total reward after episode 515 is 1071.0


  5%|▌         | 516/10000 [22:00<4:46:57,  1.82s/it]

Total reward after episode 516 is 233.0


  5%|▌         | 517/10000 [22:03<5:47:27,  2.20s/it]

Total reward after episode 517 is 1043.0


  5%|▌         | 518/10000 [22:04<4:31:49,  1.72s/it]

Total reward after episode 518 is 252.0


  5%|▌         | 519/10000 [22:06<4:22:34,  1.66s/it]

Total reward after episode 519 is 602.0


  5%|▌         | 520/10000 [22:07<4:19:35,  1.64s/it]

Total reward after episode 520 is 634.0


  5%|▌         | 521/10000 [22:11<5:42:11,  2.17s/it]

Total reward after episode 521 is 636.0


  5%|▌         | 522/10000 [22:14<6:24:07,  2.43s/it]

Total reward after episode 522 is 1045.0


  5%|▌         | 523/10000 [22:16<6:34:10,  2.50s/it]

Total reward after episode 523 is 1047.0


  5%|▌         | 524/10000 [22:19<6:56:01,  2.63s/it]

Total reward after episode 524 is 1045.0


  5%|▌         | 525/10000 [22:20<5:22:02,  2.04s/it]

Total reward after episode 525 is 242.0


  5%|▌         | 526/10000 [22:24<7:16:24,  2.76s/it]

Total reward after episode 526 is 1031.0


  5%|▌         | 527/10000 [22:28<7:56:51,  3.02s/it]

Total reward after episode 527 is 1582.0


  5%|▌         | 528/10000 [22:31<7:48:20,  2.97s/it]

Total reward after episode 528 is 817.0


  5%|▌         | 529/10000 [22:31<5:57:05,  2.26s/it]

Total reward after episode 529 is 250.0


  5%|▌         | 530/10000 [22:33<5:09:12,  1.96s/it]

Total reward after episode 530 is 611.0


  5%|▌         | 531/10000 [22:34<4:51:17,  1.85s/it]

Total reward after episode 531 is 641.0


  5%|▌         | 532/10000 [22:36<4:44:42,  1.80s/it]

Total reward after episode 532 is 633.0


  5%|▌         | 533/10000 [22:39<5:50:13,  2.22s/it]

Total reward after episode 533 is 1346.0


  5%|▌         | 534/10000 [22:41<5:22:15,  2.04s/it]

Total reward after episode 534 is 602.0


  5%|▌         | 535/10000 [22:44<6:17:59,  2.40s/it]

Total reward after episode 535 is 814.0


  5%|▌         | 536/10000 [22:45<4:52:28,  1.85s/it]

Total reward after episode 536 is 245.0


  5%|▌         | 537/10000 [22:46<4:38:28,  1.77s/it]

Total reward after episode 537 is 608.0


  5%|▌         | 538/10000 [22:47<4:15:11,  1.62s/it]

Total reward after episode 538 is 608.0


  5%|▌         | 539/10000 [22:48<3:27:51,  1.32s/it]

Total reward after episode 539 is 246.0


  5%|▌         | 540/10000 [22:53<6:22:38,  2.43s/it]

Total reward after episode 540 is 1865.0


  5%|▌         | 541/10000 [22:57<7:14:56,  2.76s/it]

Total reward after episode 541 is 1427.0


  5%|▌         | 542/10000 [22:58<6:03:35,  2.31s/it]

Total reward after episode 542 is 608.0


  5%|▌         | 543/10000 [22:59<5:27:30,  2.08s/it]

Total reward after episode 543 is 610.0


  5%|▌         | 544/10000 [23:03<6:27:56,  2.46s/it]

Total reward after episode 544 is 1043.0


  5%|▌         | 545/10000 [23:04<5:31:10,  2.10s/it]

Total reward after episode 545 is 611.0


  5%|▌         | 546/10000 [23:05<5:02:21,  1.92s/it]

Total reward after episode 546 is 634.0


  5%|▌         | 547/10000 [23:07<5:00:49,  1.91s/it]

Total reward after episode 547 is 636.0


  5%|▌         | 548/10000 [23:11<6:03:51,  2.31s/it]

Total reward after episode 548 is 814.0


  5%|▌         | 549/10000 [23:13<6:21:55,  2.42s/it]

Total reward after episode 549 is 819.0


  6%|▌         | 550/10000 [23:18<8:20:08,  3.18s/it]

Total reward after episode 550 is 1863.0


  6%|▌         | 551/10000 [23:19<6:18:38,  2.40s/it]

Total reward after episode 551 is 252.0


  6%|▌         | 552/10000 [23:20<5:24:18,  2.06s/it]

Total reward after episode 552 is 608.0


  6%|▌         | 553/10000 [23:22<5:01:17,  1.91s/it]

Total reward after episode 553 is 638.0


  6%|▌         | 554/10000 [23:22<4:06:42,  1.57s/it]

Total reward after episode 554 is 230.0


  6%|▌         | 555/10000 [23:26<5:35:46,  2.13s/it]

Total reward after episode 555 is 1042.0


  6%|▌         | 556/10000 [23:30<7:09:18,  2.73s/it]

Total reward after episode 556 is 1340.0


  6%|▌         | 557/10000 [23:31<5:29:30,  2.09s/it]

Total reward after episode 557 is 250.0


  6%|▌         | 558/10000 [23:33<6:04:50,  2.32s/it]

Total reward after episode 558 is 1066.0


  6%|▌         | 559/10000 [23:35<5:16:43,  2.01s/it]

Total reward after episode 559 is 612.0


  6%|▌         | 560/10000 [23:36<4:54:44,  1.87s/it]

Total reward after episode 560 is 605.0


  6%|▌         | 561/10000 [23:38<4:25:23,  1.69s/it]

Total reward after episode 561 is 613.0


  6%|▌         | 562/10000 [23:39<4:10:09,  1.59s/it]

Total reward after episode 562 is 606.0


  6%|▌         | 563/10000 [23:43<6:25:58,  2.45s/it]

Total reward after episode 563 is 1687.0


  6%|▌         | 564/10000 [23:46<6:44:55,  2.57s/it]

Total reward after episode 564 is 817.0


  6%|▌         | 565/10000 [23:52<9:03:17,  3.45s/it]

Total reward after episode 565 is 794.0


  6%|▌         | 566/10000 [23:53<7:31:37,  2.87s/it]

Total reward after episode 566 is 654.0


  6%|▌         | 567/10000 [23:56<7:21:05,  2.81s/it]

Total reward after episode 567 is 630.0


  6%|▌         | 568/10000 [23:58<6:37:14,  2.53s/it]

Total reward after episode 568 is 631.0


  6%|▌         | 569/10000 [24:01<7:02:49,  2.69s/it]

Total reward after episode 569 is 1066.0


  6%|▌         | 570/10000 [24:02<6:07:47,  2.34s/it]

Total reward after episode 570 is 615.0


  6%|▌         | 571/10000 [24:07<7:56:05,  3.03s/it]

Total reward after episode 571 is 1690.0


  6%|▌         | 572/10000 [24:08<6:35:43,  2.52s/it]

Total reward after episode 572 is 610.0


  6%|▌         | 573/10000 [24:11<6:43:16,  2.57s/it]

Total reward after episode 573 is 742.0


  6%|▌         | 574/10000 [24:14<7:09:35,  2.73s/it]

Total reward after episode 574 is 1045.0


  6%|▌         | 575/10000 [24:15<5:28:49,  2.09s/it]

Total reward after episode 575 is 252.0


  6%|▌         | 576/10000 [24:19<7:09:44,  2.74s/it]

Total reward after episode 576 is 1032.0


  6%|▌         | 577/10000 [24:21<6:21:12,  2.43s/it]

Total reward after episode 577 is 621.0


  6%|▌         | 578/10000 [24:21<4:54:37,  1.88s/it]

Total reward after episode 578 is 249.0


  6%|▌         | 579/10000 [24:24<5:20:10,  2.04s/it]

Total reward after episode 579 is 1050.0


  6%|▌         | 580/10000 [24:24<4:11:34,  1.60s/it]

Total reward after episode 580 is 247.0


  6%|▌         | 581/10000 [24:25<3:53:52,  1.49s/it]

Total reward after episode 581 is 606.0


  6%|▌         | 582/10000 [24:27<4:10:59,  1.60s/it]

Total reward after episode 582 is 607.0


  6%|▌         | 583/10000 [24:30<4:59:17,  1.91s/it]

Total reward after episode 583 is 1156.0


  6%|▌         | 584/10000 [24:31<4:04:19,  1.56s/it]

Total reward after episode 584 is 239.0


  6%|▌         | 585/10000 [24:32<3:49:33,  1.46s/it]

Total reward after episode 585 is 608.0


  6%|▌         | 586/10000 [24:33<3:41:06,  1.41s/it]

Total reward after episode 586 is 610.0


  6%|▌         | 587/10000 [24:36<5:05:23,  1.95s/it]

Total reward after episode 587 is 1044.0


  6%|▌         | 588/10000 [24:39<5:40:11,  2.17s/it]

Total reward after episode 588 is 1070.0


  6%|▌         | 589/10000 [24:41<5:06:18,  1.95s/it]

Total reward after episode 589 is 610.0


  6%|▌         | 590/10000 [24:44<6:06:17,  2.34s/it]

Total reward after episode 590 is 814.0


  6%|▌         | 591/10000 [24:47<6:37:55,  2.54s/it]

Total reward after episode 591 is 1040.0


  6%|▌         | 592/10000 [24:49<6:33:47,  2.51s/it]

Total reward after episode 592 is 1051.0


  6%|▌         | 593/10000 [24:50<5:33:40,  2.13s/it]

Total reward after episode 593 is 611.0


  6%|▌         | 594/10000 [24:53<6:04:08,  2.32s/it]

Total reward after episode 594 is 609.0


  6%|▌         | 595/10000 [24:58<7:53:35,  3.02s/it]

Total reward after episode 595 is 1328.0


  6%|▌         | 596/10000 [24:59<6:29:43,  2.49s/it]

Total reward after episode 596 is 606.0


  6%|▌         | 597/10000 [25:00<5:31:49,  2.12s/it]

Total reward after episode 597 is 608.0


  6%|▌         | 598/10000 [25:03<5:43:32,  2.19s/it]

Total reward after episode 598 is 1048.0


  6%|▌         | 599/10000 [25:04<5:00:11,  1.92s/it]

Total reward after episode 599 is 610.0


  6%|▌         | 600/10000 [25:05<4:29:03,  1.72s/it]

Total reward after episode 600 is 608.0


  6%|▌         | 601/10000 [25:09<6:05:48,  2.34s/it]

Total reward after episode 601 is 1037.0


  6%|▌         | 602/10000 [25:12<6:31:05,  2.50s/it]

Total reward after episode 602 is 1047.0


  6%|▌         | 603/10000 [25:13<5:05:01,  1.95s/it]

Total reward after episode 603 is 239.0


  6%|▌         | 604/10000 [25:13<4:07:21,  1.58s/it]

Total reward after episode 604 is 237.0


  6%|▌         | 605/10000 [25:16<4:58:34,  1.91s/it]

Total reward after episode 605 is 1155.0


  6%|▌         | 606/10000 [25:19<5:50:35,  2.24s/it]

Total reward after episode 606 is 816.0


  6%|▌         | 607/10000 [25:20<5:03:09,  1.94s/it]

Total reward after episode 607 is 606.0


  6%|▌         | 608/10000 [25:21<4:00:16,  1.53s/it]

Total reward after episode 608 is 249.0


  6%|▌         | 609/10000 [25:23<4:27:41,  1.71s/it]

Total reward after episode 609 is 742.0


  6%|▌         | 610/10000 [25:25<4:35:59,  1.76s/it]

Total reward after episode 610 is 610.0


  6%|▌         | 611/10000 [25:26<4:13:25,  1.62s/it]

Total reward after episode 611 is 607.0


  6%|▌         | 612/10000 [25:28<4:07:11,  1.58s/it]

Total reward after episode 612 is 612.0


  6%|▌         | 613/10000 [25:31<5:31:03,  2.12s/it]

Total reward after episode 613 is 1337.0


  6%|▌         | 614/10000 [25:36<7:59:40,  3.07s/it]

Total reward after episode 614 is 1025.0


  6%|▌         | 615/10000 [25:40<8:39:54,  3.32s/it]

Total reward after episode 615 is 807.0


  6%|▌         | 616/10000 [25:43<8:12:28,  3.15s/it]

Total reward after episode 616 is 1046.0


  6%|▌         | 617/10000 [25:44<6:15:12,  2.40s/it]

Total reward after episode 617 is 239.0


  6%|▌         | 618/10000 [25:49<8:27:43,  3.25s/it]

Total reward after episode 618 is 1850.0


  6%|▌         | 619/10000 [25:53<9:03:00,  3.47s/it]

Total reward after episode 619 is 1432.0


  6%|▌         | 620/10000 [25:54<7:21:04,  2.82s/it]

Total reward after episode 620 is 607.0


  6%|▌         | 621/10000 [25:55<6:12:48,  2.39s/it]

Total reward after episode 621 is 605.0


  6%|▌         | 622/10000 [26:00<8:13:18,  3.16s/it]

Total reward after episode 622 is 1849.0


  6%|▌         | 623/10000 [26:01<6:13:17,  2.39s/it]

Total reward after episode 623 is 249.0


  6%|▌         | 624/10000 [26:02<5:22:49,  2.07s/it]

Total reward after episode 624 is 614.0


  6%|▋         | 625/10000 [26:04<5:12:04,  2.00s/it]

Total reward after episode 625 is 633.0


  6%|▋         | 626/10000 [26:05<4:07:04,  1.58s/it]

Total reward after episode 626 is 249.0


  6%|▋         | 627/10000 [26:10<6:52:38,  2.64s/it]

Total reward after episode 627 is 1925.0


  6%|▋         | 628/10000 [26:12<6:15:11,  2.40s/it]

Total reward after episode 628 is 633.0


  6%|▋         | 629/10000 [26:13<5:32:03,  2.13s/it]

Total reward after episode 629 is 738.0


  6%|▋         | 630/10000 [26:15<5:17:09,  2.03s/it]

Total reward after episode 630 is 605.0


  6%|▋         | 631/10000 [26:17<5:06:29,  1.96s/it]

Total reward after episode 631 is 606.0


  6%|▋         | 632/10000 [26:17<4:02:02,  1.55s/it]

Total reward after episode 632 is 248.0


  6%|▋         | 633/10000 [26:22<5:59:56,  2.31s/it]

Total reward after episode 633 is 1584.0


  6%|▋         | 634/10000 [26:22<4:39:47,  1.79s/it]

Total reward after episode 634 is 248.0


  6%|▋         | 635/10000 [26:24<4:26:24,  1.71s/it]

Total reward after episode 635 is 654.0


  6%|▋         | 636/10000 [26:24<3:34:09,  1.37s/it]

Total reward after episode 636 is 248.0


  6%|▋         | 637/10000 [26:26<3:39:03,  1.40s/it]

Total reward after episode 637 is 654.0


  6%|▋         | 638/10000 [26:28<4:29:19,  1.73s/it]

Total reward after episode 638 is 1046.0


  6%|▋         | 639/10000 [26:30<4:23:01,  1.69s/it]

Total reward after episode 639 is 611.0


  6%|▋         | 640/10000 [26:33<5:39:21,  2.18s/it]

Total reward after episode 640 is 1042.0


  6%|▋         | 641/10000 [26:34<4:55:11,  1.89s/it]

Total reward after episode 641 is 606.0


  6%|▋         | 642/10000 [26:38<6:00:11,  2.31s/it]

Total reward after episode 642 is 814.0


  6%|▋         | 643/10000 [26:39<5:27:36,  2.10s/it]

Total reward after episode 643 is 636.0


  6%|▋         | 644/10000 [26:41<5:03:12,  1.94s/it]

Total reward after episode 644 is 633.0


  6%|▋         | 645/10000 [26:44<6:21:14,  2.45s/it]

Total reward after episode 645 is 1425.0


  6%|▋         | 646/10000 [26:49<7:51:28,  3.02s/it]

Total reward after episode 646 is 1849.0


  6%|▋         | 647/10000 [26:50<6:43:06,  2.59s/it]

Total reward after episode 647 is 600.0


  6%|▋         | 648/10000 [26:53<6:46:47,  2.61s/it]

Total reward after episode 648 is 1047.0


  6%|▋         | 649/10000 [26:56<7:07:50,  2.75s/it]

Total reward after episode 649 is 1332.0


  6%|▋         | 650/10000 [26:58<6:14:29,  2.40s/it]

Total reward after episode 650 is 609.0


  7%|▋         | 651/10000 [26:59<5:35:54,  2.16s/it]

Total reward after episode 651 is 638.0


  7%|▋         | 652/10000 [27:01<5:33:27,  2.14s/it]

Total reward after episode 652 is 1053.0


  7%|▋         | 653/10000 [27:04<5:54:03,  2.27s/it]

Total reward after episode 653 is 1069.0


  7%|▋         | 654/10000 [27:05<5:06:31,  1.97s/it]

Total reward after episode 654 is 606.0


  7%|▋         | 655/10000 [27:06<4:02:31,  1.56s/it]

Total reward after episode 655 is 251.0


  7%|▋         | 656/10000 [27:06<3:17:19,  1.27s/it]

Total reward after episode 656 is 249.0


  7%|▋         | 657/10000 [27:08<3:33:08,  1.37s/it]

Total reward after episode 657 is 630.0


  7%|▋         | 658/10000 [27:10<4:26:58,  1.71s/it]

Total reward after episode 658 is 1046.0


  7%|▋         | 659/10000 [27:12<4:06:27,  1.58s/it]

Total reward after episode 659 is 607.0


  7%|▋         | 660/10000 [27:13<3:50:46,  1.48s/it]

Total reward after episode 660 is 607.0


  7%|▋         | 661/10000 [27:15<3:53:29,  1.50s/it]

Total reward after episode 661 is 639.0


  7%|▋         | 662/10000 [27:16<3:55:32,  1.51s/it]

Total reward after episode 662 is 612.0


  7%|▋         | 663/10000 [27:17<3:12:21,  1.24s/it]

Total reward after episode 663 is 252.0


  7%|▋         | 664/10000 [27:18<3:24:47,  1.32s/it]

Total reward after episode 664 is 738.0


  7%|▋         | 665/10000 [27:20<3:39:13,  1.41s/it]

Total reward after episode 665 is 629.0


  7%|▋         | 666/10000 [27:23<4:51:28,  1.87s/it]

Total reward after episode 666 is 816.0


  7%|▋         | 667/10000 [27:26<5:34:53,  2.15s/it]

Total reward after episode 667 is 1045.0


  7%|▋         | 668/10000 [27:27<5:07:13,  1.98s/it]

Total reward after episode 668 is 609.0


  7%|▋         | 669/10000 [27:29<4:46:55,  1.85s/it]

Total reward after episode 669 is 738.0


  7%|▋         | 670/10000 [27:31<5:18:53,  2.05s/it]

Total reward after episode 670 is 1049.0


  7%|▋         | 671/10000 [27:33<4:52:39,  1.88s/it]

Total reward after episode 671 is 633.0


  7%|▋         | 672/10000 [27:36<6:09:01,  2.37s/it]

Total reward after episode 672 is 1580.0


  7%|▋         | 673/10000 [27:38<5:55:27,  2.29s/it]

Total reward after episode 673 is 622.0


  7%|▋         | 674/10000 [27:40<5:22:23,  2.07s/it]

Total reward after episode 674 is 639.0


  7%|▋         | 675/10000 [27:42<5:01:53,  1.94s/it]

Total reward after episode 675 is 607.0


  7%|▋         | 676/10000 [27:42<4:04:30,  1.57s/it]

Total reward after episode 676 is 232.0


  7%|▋         | 677/10000 [27:44<4:12:06,  1.62s/it]

Total reward after episode 677 is 618.0


  7%|▋         | 678/10000 [27:46<4:08:03,  1.60s/it]

Total reward after episode 678 is 604.0


  7%|▋         | 679/10000 [27:47<4:00:37,  1.55s/it]

Total reward after episode 679 is 602.0


  7%|▋         | 680/10000 [27:51<5:50:41,  2.26s/it]

Total reward after episode 680 is 1585.0


  7%|▋         | 681/10000 [27:52<5:10:29,  2.00s/it]

Total reward after episode 681 is 623.0


  7%|▋         | 682/10000 [27:56<6:42:51,  2.59s/it]

Total reward after episode 682 is 1583.0


  7%|▋         | 683/10000 [27:58<5:53:36,  2.28s/it]

Total reward after episode 683 is 737.0


  7%|▋         | 684/10000 [28:01<6:28:29,  2.50s/it]

Total reward after episode 684 is 816.0


  7%|▋         | 685/10000 [28:03<5:55:23,  2.29s/it]

Total reward after episode 685 is 627.0


  7%|▋         | 686/10000 [28:06<7:08:53,  2.76s/it]

Total reward after episode 686 is 1338.0


  7%|▋         | 687/10000 [28:09<6:52:20,  2.66s/it]

Total reward after episode 687 is 1051.0


  7%|▋         | 688/10000 [28:12<7:01:57,  2.72s/it]

Total reward after episode 688 is 1046.0


  7%|▋         | 689/10000 [28:15<7:38:31,  2.95s/it]

Total reward after episode 689 is 1331.0


  7%|▋         | 690/10000 [28:18<7:42:46,  2.98s/it]

Total reward after episode 690 is 816.0


  7%|▋         | 691/10000 [28:20<6:24:39,  2.48s/it]

Total reward after episode 691 is 614.0


  7%|▋         | 692/10000 [28:21<5:40:08,  2.19s/it]

Total reward after episode 692 is 608.0


  7%|▋         | 693/10000 [28:22<5:01:05,  1.94s/it]

Total reward after episode 693 is 606.0


  7%|▋         | 694/10000 [28:23<3:59:01,  1.54s/it]

Total reward after episode 694 is 252.0


  7%|▋         | 695/10000 [28:25<4:16:41,  1.66s/it]

Total reward after episode 695 is 614.0


  7%|▋         | 696/10000 [28:27<4:13:46,  1.64s/it]

Total reward after episode 696 is 606.0


  7%|▋         | 697/10000 [28:28<4:13:15,  1.63s/it]

Total reward after episode 697 is 630.0


  7%|▋         | 698/10000 [28:32<6:10:54,  2.39s/it]

Total reward after episode 698 is 1338.0


  7%|▋         | 699/10000 [28:34<5:17:26,  2.05s/it]

Total reward after episode 699 is 610.0


  7%|▋         | 700/10000 [28:34<4:09:24,  1.61s/it]

Total reward after episode 700 is 248.0


  7%|▋         | 701/10000 [28:37<4:52:03,  1.88s/it]

Total reward after episode 701 is 1164.0


  7%|▋         | 702/10000 [28:38<4:38:33,  1.80s/it]

Total reward after episode 702 is 607.0


  7%|▋         | 703/10000 [28:40<4:21:07,  1.69s/it]

Total reward after episode 703 is 602.0


  7%|▋         | 704/10000 [28:43<5:38:06,  2.18s/it]

Total reward after episode 704 is 1585.0


  7%|▋         | 705/10000 [28:44<4:25:42,  1.72s/it]

Total reward after episode 705 is 247.0


  7%|▋         | 706/10000 [28:46<4:28:46,  1.74s/it]

Total reward after episode 706 is 638.0


  7%|▋         | 707/10000 [28:48<5:07:54,  1.99s/it]

Total reward after episode 707 is 1161.0


  7%|▋         | 708/10000 [28:51<6:10:04,  2.39s/it]

Total reward after episode 708 is 1584.0


  7%|▋         | 709/10000 [28:55<6:59:36,  2.71s/it]

Total reward after episode 709 is 1325.0


  7%|▋         | 710/10000 [28:56<6:02:41,  2.34s/it]

Total reward after episode 710 is 611.0


  7%|▋         | 711/10000 [28:58<5:26:14,  2.11s/it]

Total reward after episode 711 is 637.0


  7%|▋         | 712/10000 [29:00<5:41:24,  2.21s/it]

Total reward after episode 712 is 1049.0


  7%|▋         | 713/10000 [29:03<5:56:52,  2.31s/it]

Total reward after episode 713 is 1162.0


  7%|▋         | 714/10000 [29:05<6:02:29,  2.34s/it]

Total reward after episode 714 is 1052.0


  7%|▋         | 715/10000 [29:07<5:29:28,  2.13s/it]

Total reward after episode 715 is 633.0


  7%|▋         | 716/10000 [29:10<5:49:51,  2.26s/it]

Total reward after episode 716 is 1048.0


  7%|▋         | 717/10000 [29:11<5:27:17,  2.12s/it]

Total reward after episode 717 is 638.0


  7%|▋         | 718/10000 [29:13<5:01:07,  1.95s/it]

Total reward after episode 718 is 736.0


  7%|▋         | 719/10000 [29:14<4:29:44,  1.74s/it]

Total reward after episode 719 is 613.0


  7%|▋         | 720/10000 [29:18<5:50:14,  2.26s/it]

Total reward after episode 720 is 1344.0


  7%|▋         | 721/10000 [29:20<6:02:40,  2.35s/it]

Total reward after episode 721 is 761.0


  7%|▋         | 722/10000 [29:21<5:15:11,  2.04s/it]

Total reward after episode 722 is 613.0


  7%|▋         | 723/10000 [29:24<5:50:00,  2.26s/it]

Total reward after episode 723 is 1044.0


  7%|▋         | 724/10000 [29:25<5:03:24,  1.96s/it]

Total reward after episode 724 is 607.0


  7%|▋         | 725/10000 [29:29<6:14:53,  2.43s/it]

Total reward after episode 725 is 1343.0


  7%|▋         | 726/10000 [29:30<4:49:21,  1.87s/it]

Total reward after episode 726 is 252.0


  7%|▋         | 727/10000 [29:35<7:17:48,  2.83s/it]

Total reward after episode 727 is 1572.0


  7%|▋         | 728/10000 [29:36<6:13:18,  2.42s/it]

Total reward after episode 728 is 606.0


  7%|▋         | 729/10000 [29:38<5:37:46,  2.19s/it]

Total reward after episode 729 is 652.0


  7%|▋         | 730/10000 [29:41<6:43:19,  2.61s/it]

Total reward after episode 730 is 1343.0


  7%|▋         | 731/10000 [29:45<7:43:54,  3.00s/it]

Total reward after episode 731 is 1707.0


  7%|▋         | 732/10000 [29:47<6:38:08,  2.58s/it]

Total reward after episode 732 is 736.0


  7%|▋         | 733/10000 [29:48<5:48:33,  2.26s/it]

Total reward after episode 733 is 654.0


  7%|▋         | 734/10000 [29:49<4:31:49,  1.76s/it]

Total reward after episode 734 is 251.0


  7%|▋         | 735/10000 [29:52<5:37:25,  2.19s/it]

Total reward after episode 735 is 814.0


  7%|▋         | 736/10000 [29:53<4:27:44,  1.73s/it]

Total reward after episode 736 is 237.0


  7%|▋         | 737/10000 [29:54<4:09:27,  1.62s/it]

Total reward after episode 737 is 606.0


  7%|▋         | 738/10000 [29:55<3:53:13,  1.51s/it]

Total reward after episode 738 is 610.0


  7%|▋         | 739/10000 [29:59<5:38:00,  2.19s/it]

Total reward after episode 739 is 1426.0


  7%|▋         | 740/10000 [30:04<7:33:05,  2.94s/it]

Total reward after episode 740 is 1692.0


  7%|▋         | 741/10000 [30:09<9:00:23,  3.50s/it]

Total reward after episode 741 is 1135.0


  7%|▋         | 742/10000 [30:10<7:31:54,  2.93s/it]

Total reward after episode 742 is 608.0


  7%|▋         | 743/10000 [30:16<9:20:52,  3.64s/it]

Total reward after episode 743 is 1557.0


  7%|▋         | 744/10000 [30:21<10:54:55,  4.25s/it]

Total reward after episode 744 is 1915.0


  7%|▋         | 745/10000 [30:23<8:51:51,  3.45s/it] 

Total reward after episode 745 is 618.0


  7%|▋         | 746/10000 [30:24<6:45:36,  2.63s/it]

Total reward after episode 746 is 236.0


  7%|▋         | 747/10000 [30:24<5:15:44,  2.05s/it]

Total reward after episode 747 is 235.0


  7%|▋         | 748/10000 [30:38<14:30:23,  5.64s/it]

Total reward after episode 748 is 2288.0


  7%|▋         | 749/10000 [30:40<11:18:26,  4.40s/it]

Total reward after episode 749 is 609.0


  8%|▊         | 750/10000 [30:42<9:50:53,  3.83s/it] 

Total reward after episode 750 is 1161.0


  8%|▊         | 751/10000 [30:43<7:25:00,  2.89s/it]

Total reward after episode 751 is 235.0


  8%|▊         | 752/10000 [30:46<7:46:45,  3.03s/it]

Total reward after episode 752 is 1425.0


  8%|▊         | 753/10000 [30:47<5:56:03,  2.31s/it]

Total reward after episode 753 is 246.0


  8%|▊         | 754/10000 [30:48<5:16:28,  2.05s/it]

Total reward after episode 754 is 608.0


  8%|▊         | 755/10000 [30:51<6:03:24,  2.36s/it]

Total reward after episode 755 is 1430.0


  8%|▊         | 756/10000 [30:53<5:37:12,  2.19s/it]

Total reward after episode 756 is 736.0


  8%|▊         | 757/10000 [30:56<6:13:15,  2.42s/it]

Total reward after episode 757 is 1042.0


  8%|▊         | 758/10000 [31:00<7:25:42,  2.89s/it]

Total reward after episode 758 is 1429.0


  8%|▊         | 759/10000 [31:03<7:14:54,  2.82s/it]

Total reward after episode 759 is 601.0


  8%|▊         | 760/10000 [31:04<6:18:06,  2.46s/it]

Total reward after episode 760 is 638.0


  8%|▊         | 761/10000 [31:07<6:35:11,  2.57s/it]

Total reward after episode 761 is 1155.0


  8%|▊         | 762/10000 [31:09<5:48:42,  2.26s/it]

Total reward after episode 762 is 639.0


  8%|▊         | 763/10000 [31:12<6:46:11,  2.64s/it]

Total reward after episode 763 is 1338.0


  8%|▊         | 764/10000 [31:14<5:42:25,  2.22s/it]

Total reward after episode 764 is 606.0


  8%|▊         | 765/10000 [31:15<5:15:05,  2.05s/it]

Total reward after episode 765 is 631.0


  8%|▊         | 766/10000 [31:18<5:28:35,  2.14s/it]

Total reward after episode 766 is 1049.0


  8%|▊         | 767/10000 [31:21<6:27:21,  2.52s/it]

Total reward after episode 767 is 1423.0


  8%|▊         | 768/10000 [31:23<5:47:29,  2.26s/it]

Total reward after episode 768 is 637.0


  8%|▊         | 769/10000 [31:24<5:15:48,  2.05s/it]

Total reward after episode 769 is 638.0


  8%|▊         | 770/10000 [31:26<4:51:11,  1.89s/it]

Total reward after episode 770 is 738.0


  8%|▊         | 771/10000 [31:27<4:42:57,  1.84s/it]

Total reward after episode 771 is 618.0


  8%|▊         | 772/10000 [31:29<4:28:40,  1.75s/it]

Total reward after episode 772 is 607.0


  8%|▊         | 773/10000 [31:32<5:21:38,  2.09s/it]

Total reward after episode 773 is 1349.0


  8%|▊         | 774/10000 [31:35<6:19:13,  2.47s/it]

Total reward after episode 774 is 1583.0


  8%|▊         | 775/10000 [31:37<5:34:59,  2.18s/it]

Total reward after episode 775 is 654.0


  8%|▊         | 776/10000 [31:40<6:19:07,  2.47s/it]

Total reward after episode 776 is 1347.0


  8%|▊         | 777/10000 [31:41<5:22:38,  2.10s/it]

Total reward after episode 777 is 607.0


  8%|▊         | 778/10000 [31:45<6:47:52,  2.65s/it]

Total reward after episode 778 is 1691.0


  8%|▊         | 779/10000 [31:49<7:41:49,  3.01s/it]

Total reward after episode 779 is 1419.0


  8%|▊         | 780/10000 [31:53<8:31:02,  3.33s/it]

Total reward after episode 780 is 1857.0


  8%|▊         | 781/10000 [31:54<7:03:50,  2.76s/it]

Total reward after episode 781 is 609.0


  8%|▊         | 782/10000 [31:56<6:19:55,  2.47s/it]

Total reward after episode 782 is 632.0


  8%|▊         | 783/10000 [32:12<16:44:33,  6.54s/it]

Total reward after episode 783 is 2266.0


  8%|▊         | 784/10000 [32:14<12:41:14,  4.96s/it]

Total reward after episode 784 is 609.0


  8%|▊         | 785/10000 [32:17<11:24:55,  4.46s/it]

Total reward after episode 785 is 1346.0


  8%|▊         | 786/10000 [32:18<9:04:05,  3.54s/it] 

Total reward after episode 786 is 603.0


  8%|▊         | 787/10000 [32:20<7:34:58,  2.96s/it]

Total reward after episode 787 is 653.0


  8%|▊         | 788/10000 [32:30<12:52:23,  5.03s/it]

Total reward after episode 788 is 3030.0


  8%|▊         | 789/10000 [32:31<9:58:48,  3.90s/it] 

Total reward after episode 789 is 609.0


  8%|▊         | 790/10000 [32:33<8:13:55,  3.22s/it]

Total reward after episode 790 is 653.0


  8%|▊         | 791/10000 [32:36<8:21:53,  3.27s/it]

Total reward after episode 791 is 1345.0


  8%|▊         | 792/10000 [32:38<7:41:17,  3.01s/it]

Total reward after episode 792 is 1046.0


  8%|▊         | 793/10000 [32:43<9:05:45,  3.56s/it]

Total reward after episode 793 is 1843.0


  8%|▊         | 794/10000 [32:45<7:34:50,  2.96s/it]

Total reward after episode 794 is 628.0


  8%|▊         | 795/10000 [32:48<8:01:16,  3.14s/it]

Total reward after episode 795 is 1426.0


  8%|▊         | 796/10000 [32:50<6:35:26,  2.58s/it]

Total reward after episode 796 is 611.0


  8%|▊         | 797/10000 [32:51<5:51:54,  2.29s/it]

Total reward after episode 797 is 619.0


  8%|▊         | 798/10000 [32:53<5:16:33,  2.06s/it]

Total reward after episode 798 is 621.0


  8%|▊         | 799/10000 [32:57<7:14:09,  2.83s/it]

Total reward after episode 799 is 1864.0


  8%|▊         | 800/10000 [32:59<6:10:48,  2.42s/it]

Total reward after episode 800 is 610.0


  8%|▊         | 801/10000 [33:03<7:29:46,  2.93s/it]

Total reward after episode 801 is 1867.0


  8%|▊         | 802/10000 [33:06<7:33:06,  2.96s/it]

Total reward after episode 802 is 1043.0


  8%|▊         | 803/10000 [33:09<7:31:35,  2.95s/it]

Total reward after episode 803 is 1351.0


  8%|▊         | 804/10000 [33:14<8:52:12,  3.47s/it]

Total reward after episode 804 is 1862.0


  8%|▊         | 805/10000 [33:14<6:43:46,  2.63s/it]

Total reward after episode 805 is 236.0


  8%|▊         | 806/10000 [33:18<7:31:57,  2.95s/it]

Total reward after episode 806 is 1034.0


  8%|▊         | 807/10000 [33:20<6:35:22,  2.58s/it]

Total reward after episode 807 is 640.0


  8%|▊         | 808/10000 [33:21<5:58:31,  2.34s/it]

Total reward after episode 808 is 611.0


  8%|▊         | 809/10000 [33:23<5:18:50,  2.08s/it]

Total reward after episode 809 is 739.0


  8%|▊         | 810/10000 [33:27<6:51:14,  2.68s/it]

Total reward after episode 810 is 1696.0


  8%|▊         | 811/10000 [33:29<6:36:41,  2.59s/it]

Total reward after episode 811 is 1049.0


  8%|▊         | 812/10000 [33:33<7:20:04,  2.87s/it]

Total reward after episode 812 is 1345.0


  8%|▊         | 813/10000 [33:35<7:02:11,  2.76s/it]

Total reward after episode 813 is 1045.0


  8%|▊         | 814/10000 [33:37<6:13:23,  2.44s/it]

Total reward after episode 814 is 637.0


  8%|▊         | 815/10000 [33:39<5:26:59,  2.14s/it]

Total reward after episode 815 is 610.0


  8%|▊         | 816/10000 [33:40<5:02:37,  1.98s/it]

Total reward after episode 816 is 630.0


  8%|▊         | 817/10000 [33:41<4:30:06,  1.76s/it]

Total reward after episode 817 is 611.0


  8%|▊         | 818/10000 [33:43<4:24:09,  1.73s/it]

Total reward after episode 818 is 626.0


  8%|▊         | 819/10000 [33:44<4:03:01,  1.59s/it]

Total reward after episode 819 is 610.0


  8%|▊         | 820/10000 [33:46<4:13:05,  1.65s/it]

Total reward after episode 820 is 636.0


  8%|▊         | 821/10000 [33:48<4:07:02,  1.61s/it]

Total reward after episode 821 is 654.0


  8%|▊         | 822/10000 [33:49<4:14:54,  1.67s/it]

Total reward after episode 822 is 640.0


  8%|▊         | 823/10000 [33:51<3:55:38,  1.54s/it]

Total reward after episode 823 is 610.0


  8%|▊         | 824/10000 [33:52<4:05:32,  1.61s/it]

Total reward after episode 824 is 651.0


  8%|▊         | 825/10000 [33:55<5:05:16,  2.00s/it]

Total reward after episode 825 is 609.0


  8%|▊         | 826/10000 [33:58<5:43:11,  2.24s/it]

Total reward after episode 826 is 1161.0


  8%|▊         | 827/10000 [33:59<4:32:54,  1.79s/it]

Total reward after episode 827 is 239.0


  8%|▊         | 828/10000 [34:00<4:19:24,  1.70s/it]

Total reward after episode 828 is 654.0


  8%|▊         | 829/10000 [34:02<4:10:40,  1.64s/it]

Total reward after episode 829 is 654.0


  8%|▊         | 830/10000 [34:03<4:05:26,  1.61s/it]

Total reward after episode 830 is 654.0


  8%|▊         | 831/10000 [34:05<3:50:13,  1.51s/it]

Total reward after episode 831 is 610.0


  8%|▊         | 832/10000 [34:06<3:39:50,  1.44s/it]

Total reward after episode 832 is 608.0


  8%|▊         | 833/10000 [34:09<5:00:19,  1.97s/it]

Total reward after episode 833 is 1423.0


  8%|▊         | 834/10000 [34:12<5:53:36,  2.31s/it]

Total reward after episode 834 is 1041.0


  8%|▊         | 835/10000 [34:14<5:21:35,  2.11s/it]

Total reward after episode 835 is 612.0


  8%|▊         | 836/10000 [34:16<5:01:28,  1.97s/it]

Total reward after episode 836 is 611.0


  8%|▊         | 837/10000 [34:17<4:31:43,  1.78s/it]

Total reward after episode 837 is 615.0


  8%|▊         | 838/10000 [34:20<5:35:20,  2.20s/it]

Total reward after episode 838 is 1348.0


  8%|▊         | 839/10000 [34:21<4:52:25,  1.92s/it]

Total reward after episode 839 is 611.0


  8%|▊         | 840/10000 [34:25<6:24:02,  2.52s/it]

Total reward after episode 840 is 1423.0


  8%|▊         | 841/10000 [34:27<5:36:58,  2.21s/it]

Total reward after episode 841 is 608.0


  8%|▊         | 842/10000 [34:28<5:06:18,  2.01s/it]

Total reward after episode 842 is 607.0


  8%|▊         | 843/10000 [34:30<4:32:07,  1.78s/it]

Total reward after episode 843 is 610.0


  8%|▊         | 844/10000 [34:31<4:08:31,  1.63s/it]

Total reward after episode 844 is 611.0


  8%|▊         | 845/10000 [34:32<3:51:34,  1.52s/it]

Total reward after episode 845 is 610.0


  8%|▊         | 846/10000 [34:34<3:50:18,  1.51s/it]

Total reward after episode 846 is 654.0


  8%|▊         | 847/10000 [34:34<3:09:29,  1.24s/it]

Total reward after episode 847 is 248.0


  8%|▊         | 848/10000 [34:35<3:12:39,  1.26s/it]

Total reward after episode 848 is 615.0


  8%|▊         | 849/10000 [34:37<3:13:16,  1.27s/it]

Total reward after episode 849 is 606.0


  8%|▊         | 850/10000 [34:39<4:12:07,  1.65s/it]

Total reward after episode 850 is 1046.0


  9%|▊         | 851/10000 [34:41<4:03:11,  1.59s/it]

Total reward after episode 851 is 611.0


  9%|▊         | 852/10000 [34:44<5:23:58,  2.12s/it]

Total reward after episode 852 is 1422.0


  9%|▊         | 853/10000 [34:45<4:44:43,  1.87s/it]

Total reward after episode 853 is 606.0


  9%|▊         | 854/10000 [34:48<5:36:16,  2.21s/it]

Total reward after episode 854 is 1331.0


  9%|▊         | 855/10000 [34:50<5:03:32,  1.99s/it]

Total reward after episode 855 is 611.0


  9%|▊         | 856/10000 [34:51<4:30:43,  1.78s/it]

Total reward after episode 856 is 611.0


  9%|▊         | 857/10000 [34:54<5:31:08,  2.17s/it]

Total reward after episode 857 is 1586.0


  9%|▊         | 858/10000 [34:56<4:59:24,  1.97s/it]

Total reward after episode 858 is 654.0


  9%|▊         | 859/10000 [35:09<13:46:53,  5.43s/it]

Total reward after episode 859 is 2290.0


  9%|▊         | 860/10000 [35:11<10:44:35,  4.23s/it]

Total reward after episode 860 is 612.0


  9%|▊         | 861/10000 [35:15<10:33:39,  4.16s/it]

Total reward after episode 861 is 1931.0


  9%|▊         | 862/10000 [35:16<8:29:01,  3.34s/it] 

Total reward after episode 862 is 612.0


  9%|▊         | 863/10000 [35:18<7:06:17,  2.80s/it]

Total reward after episode 863 is 653.0


  9%|▊         | 864/10000 [35:19<6:10:20,  2.43s/it]

Total reward after episode 864 is 612.0


  9%|▊         | 865/10000 [35:20<5:15:44,  2.07s/it]

Total reward after episode 865 is 608.0


  9%|▊         | 866/10000 [35:22<4:41:04,  1.85s/it]

Total reward after episode 866 is 614.0


  9%|▊         | 867/10000 [35:23<4:15:18,  1.68s/it]

Total reward after episode 867 is 607.0


  9%|▊         | 868/10000 [35:27<6:06:37,  2.41s/it]

Total reward after episode 868 is 1700.0


  9%|▊         | 869/10000 [35:30<6:20:44,  2.50s/it]

Total reward after episode 869 is 1043.0


  9%|▊         | 870/10000 [35:33<6:53:52,  2.72s/it]

Total reward after episode 870 is 1348.0


  9%|▊         | 871/10000 [35:37<7:57:40,  3.14s/it]

Total reward after episode 871 is 1853.0


  9%|▊         | 872/10000 [35:39<6:45:04,  2.66s/it]

Total reward after episode 872 is 607.0


  9%|▊         | 873/10000 [35:44<8:42:16,  3.43s/it]

Total reward after episode 873 is 1919.0


  9%|▊         | 874/10000 [35:46<7:57:32,  3.14s/it]

Total reward after episode 874 is 1050.0


  9%|▉         | 875/10000 [35:47<6:03:20,  2.39s/it]

Total reward after episode 875 is 247.0


  9%|▉         | 876/10000 [35:48<4:41:46,  1.85s/it]

Total reward after episode 876 is 248.0


  9%|▉         | 877/10000 [35:50<4:58:42,  1.96s/it]

Total reward after episode 877 is 603.0


  9%|▉         | 878/10000 [35:51<4:35:45,  1.81s/it]

Total reward after episode 878 is 609.0


  9%|▉         | 879/10000 [35:53<4:36:41,  1.82s/it]

Total reward after episode 879 is 651.0


  9%|▉         | 880/10000 [35:55<4:22:45,  1.73s/it]

Total reward after episode 880 is 654.0


  9%|▉         | 881/10000 [35:57<5:07:28,  2.02s/it]

Total reward after episode 881 is 1049.0


  9%|▉         | 882/10000 [35:59<4:32:31,  1.79s/it]

Total reward after episode 882 is 610.0


  9%|▉         | 883/10000 [36:00<4:16:11,  1.69s/it]

Total reward after episode 883 is 613.0


  9%|▉         | 884/10000 [36:02<4:08:17,  1.63s/it]

Total reward after episode 884 is 609.0


  9%|▉         | 885/10000 [36:05<5:19:25,  2.10s/it]

Total reward after episode 885 is 1330.0


  9%|▉         | 886/10000 [36:06<4:41:15,  1.85s/it]

Total reward after episode 886 is 607.0


  9%|▉         | 887/10000 [36:08<4:31:25,  1.79s/it]

Total reward after episode 887 is 630.0


  9%|▉         | 888/10000 [36:10<4:55:36,  1.95s/it]

Total reward after episode 888 is 1049.0


  9%|▉         | 889/10000 [36:11<4:24:17,  1.74s/it]

Total reward after episode 889 is 610.0


  9%|▉         | 890/10000 [36:14<5:11:36,  2.05s/it]

Total reward after episode 890 is 1046.0


  9%|▉         | 891/10000 [36:17<5:29:47,  2.17s/it]

Total reward after episode 891 is 1050.0


  9%|▉         | 892/10000 [36:20<6:11:50,  2.45s/it]

Total reward after episode 892 is 1427.0


  9%|▉         | 893/10000 [36:23<6:36:08,  2.61s/it]

Total reward after episode 893 is 1349.0


  9%|▉         | 894/10000 [36:25<6:31:07,  2.58s/it]

Total reward after episode 894 is 1048.0


  9%|▉         | 895/10000 [36:30<8:15:50,  3.27s/it]

Total reward after episode 895 is 1849.0


  9%|▉         | 896/10000 [36:32<6:57:06,  2.75s/it]

Total reward after episode 896 is 607.0


  9%|▉         | 897/10000 [36:33<6:04:23,  2.40s/it]

Total reward after episode 897 is 635.0


  9%|▉         | 898/10000 [36:36<6:07:29,  2.42s/it]

Total reward after episode 898 is 1047.0


  9%|▉         | 899/10000 [36:39<6:59:15,  2.76s/it]

Total reward after episode 899 is 1321.0


  9%|▉         | 900/10000 [36:41<6:03:33,  2.40s/it]

Total reward after episode 900 is 624.0


  9%|▉         | 901/10000 [36:42<5:18:02,  2.10s/it]

Total reward after episode 901 is 608.0


  9%|▉         | 902/10000 [36:43<4:39:29,  1.84s/it]

Total reward after episode 902 is 610.0


  9%|▉         | 903/10000 [36:46<5:25:08,  2.14s/it]

Total reward after episode 903 is 1159.0


  9%|▉         | 904/10000 [36:51<7:30:43,  2.97s/it]

Total reward after episode 904 is 1928.0


  9%|▉         | 905/10000 [36:54<7:29:00,  2.96s/it]

Total reward after episode 905 is 1349.0


  9%|▉         | 906/10000 [36:58<8:28:49,  3.36s/it]

Total reward after episode 906 is 1881.0


  9%|▉         | 907/10000 [37:02<8:42:08,  3.45s/it]

Total reward after episode 907 is 1437.0


  9%|▉         | 908/10000 [37:04<7:14:34,  2.87s/it]

Total reward after episode 908 is 654.0


  9%|▉         | 909/10000 [37:08<8:31:28,  3.38s/it]

Total reward after episode 909 is 1026.0


  9%|▉         | 910/10000 [37:10<7:04:56,  2.80s/it]

Total reward after episode 910 is 609.0


  9%|▉         | 911/10000 [37:11<6:17:52,  2.49s/it]

Total reward after episode 911 is 635.0


  9%|▉         | 912/10000 [37:13<5:33:30,  2.20s/it]

Total reward after episode 912 is 654.0


  9%|▉         | 913/10000 [37:16<5:53:18,  2.33s/it]

Total reward after episode 913 is 1044.0


  9%|▉         | 914/10000 [37:18<5:55:18,  2.35s/it]

Total reward after episode 914 is 1050.0


  9%|▉         | 915/10000 [37:19<5:16:54,  2.09s/it]

Total reward after episode 915 is 608.0


  9%|▉         | 916/10000 [37:22<5:37:48,  2.23s/it]

Total reward after episode 916 is 1044.0


  9%|▉         | 917/10000 [37:23<4:52:21,  1.93s/it]

Total reward after episode 917 is 606.0


  9%|▉         | 918/10000 [37:26<5:15:55,  2.09s/it]

Total reward after episode 918 is 1051.0


  9%|▉         | 919/10000 [37:27<4:37:59,  1.84s/it]

Total reward after episode 919 is 608.0


  9%|▉         | 920/10000 [37:32<7:00:20,  2.78s/it]

Total reward after episode 920 is 1857.0


  9%|▉         | 921/10000 [37:34<6:43:16,  2.67s/it]

Total reward after episode 921 is 745.0


  9%|▉         | 922/10000 [37:38<7:15:57,  2.88s/it]

Total reward after episode 922 is 1156.0


  9%|▉         | 923/10000 [37:42<8:39:38,  3.43s/it]

Total reward after episode 923 is 1926.0


  9%|▉         | 924/10000 [37:47<9:36:55,  3.81s/it]

Total reward after episode 924 is 1337.0


  9%|▉         | 925/10000 [37:49<7:51:05,  3.11s/it]

Total reward after episode 925 is 619.0


  9%|▉         | 926/10000 [37:50<6:31:48,  2.59s/it]

Total reward after episode 926 is 607.0


  9%|▉         | 927/10000 [37:54<7:18:05,  2.90s/it]

Total reward after episode 927 is 1586.0


  9%|▉         | 928/10000 [37:55<6:04:10,  2.41s/it]

Total reward after episode 928 is 607.0


  9%|▉         | 929/10000 [37:57<5:54:52,  2.35s/it]

Total reward after episode 929 is 634.0


  9%|▉         | 930/10000 [37:59<6:00:44,  2.39s/it]

Total reward after episode 930 is 1051.0


  9%|▉         | 931/10000 [38:01<5:09:34,  2.05s/it]

Total reward after episode 931 is 607.0


  9%|▉         | 932/10000 [38:03<5:01:33,  2.00s/it]

Total reward after episode 932 is 621.0


  9%|▉         | 933/10000 [38:04<4:30:14,  1.79s/it]

Total reward after episode 933 is 613.0


  9%|▉         | 934/10000 [38:05<4:07:10,  1.64s/it]

Total reward after episode 934 is 612.0


  9%|▉         | 935/10000 [38:06<3:51:18,  1.53s/it]

Total reward after episode 935 is 611.0


  9%|▉         | 936/10000 [38:08<3:40:28,  1.46s/it]

Total reward after episode 936 is 612.0


  9%|▉         | 937/10000 [38:09<3:32:09,  1.40s/it]

Total reward after episode 937 is 609.0


  9%|▉         | 938/10000 [38:11<3:35:54,  1.43s/it]

Total reward after episode 938 is 609.0


  9%|▉         | 939/10000 [38:14<4:48:03,  1.91s/it]

Total reward after episode 939 is 1340.0


  9%|▉         | 940/10000 [38:15<4:20:36,  1.73s/it]

Total reward after episode 940 is 615.0


  9%|▉         | 941/10000 [38:17<4:47:36,  1.90s/it]

Total reward after episode 941 is 822.0


  9%|▉         | 942/10000 [38:18<4:18:47,  1.71s/it]

Total reward after episode 942 is 610.0


  9%|▉         | 943/10000 [38:43<21:41:49,  8.62s/it]

Total reward after episode 943 is 2892.0


  9%|▉         | 944/10000 [38:45<16:28:46,  6.55s/it]

Total reward after episode 944 is 736.0


  9%|▉         | 945/10000 [38:47<13:25:48,  5.34s/it]

Total reward after episode 945 is 1164.0


  9%|▉         | 946/10000 [38:49<10:42:50,  4.26s/it]

Total reward after episode 946 is 616.0


  9%|▉         | 947/10000 [38:50<8:27:05,  3.36s/it] 

Total reward after episode 947 is 610.0


  9%|▉         | 948/10000 [38:54<8:18:03,  3.30s/it]

Total reward after episode 948 is 600.0


  9%|▉         | 949/10000 [38:56<7:33:19,  3.01s/it]

Total reward after episode 949 is 1059.0


 10%|▉         | 950/10000 [39:03<10:25:34,  4.15s/it]

Total reward after episode 950 is 2642.0


 10%|▉         | 951/10000 [39:06<9:25:00,  3.75s/it] 

Total reward after episode 951 is 597.0


 10%|▉         | 952/10000 [39:07<7:52:53,  3.14s/it]

Total reward after episode 952 is 735.0


 10%|▉         | 953/10000 [39:10<7:19:29,  2.91s/it]

Total reward after episode 953 is 744.0


 10%|▉         | 954/10000 [39:11<6:04:05,  2.41s/it]

Total reward after episode 954 is 608.0


 10%|▉         | 955/10000 [39:15<7:18:26,  2.91s/it]

Total reward after episode 955 is 1338.0


 10%|▉         | 956/10000 [39:17<6:57:50,  2.77s/it]

Total reward after episode 956 is 1052.0


 10%|▉         | 957/10000 [39:19<6:02:10,  2.40s/it]

Total reward after episode 957 is 607.0


 10%|▉         | 958/10000 [39:26<9:41:57,  3.86s/it]

Total reward after episode 958 is 1295.0


 10%|▉         | 959/10000 [39:29<8:54:39,  3.55s/it]

Total reward after episode 959 is 1341.0


 10%|▉         | 960/10000 [39:32<8:39:00,  3.44s/it]

Total reward after episode 960 is 1330.0


 10%|▉         | 961/10000 [39:34<7:13:27,  2.88s/it]

Total reward after episode 961 is 611.0


 10%|▉         | 962/10000 [39:35<6:01:30,  2.40s/it]

Total reward after episode 962 is 611.0


 10%|▉         | 963/10000 [39:37<5:27:22,  2.17s/it]

Total reward after episode 963 is 608.0


 10%|▉         | 964/10000 [39:38<5:00:59,  2.00s/it]

Total reward after episode 964 is 610.0


 10%|▉         | 965/10000 [39:40<5:07:14,  2.04s/it]

Total reward after episode 965 is 1060.0


 10%|▉         | 966/10000 [39:44<6:13:35,  2.48s/it]

Total reward after episode 966 is 1578.0


 10%|▉         | 967/10000 [39:46<5:36:30,  2.24s/it]

Total reward after episode 967 is 625.0


 10%|▉         | 968/10000 [39:50<6:59:49,  2.79s/it]

Total reward after episode 968 is 1930.0


 10%|▉         | 969/10000 [39:53<7:06:18,  2.83s/it]

Total reward after episode 969 is 1054.0


 10%|▉         | 970/10000 [39:54<5:55:45,  2.36s/it]

Total reward after episode 970 is 610.0


 10%|▉         | 971/10000 [39:55<5:16:19,  2.10s/it]

Total reward after episode 971 is 654.0


 10%|▉         | 972/10000 [39:57<4:38:13,  1.85s/it]

Total reward after episode 972 is 611.0


 10%|▉         | 973/10000 [39:58<4:11:11,  1.67s/it]

Total reward after episode 973 is 608.0


 10%|▉         | 974/10000 [40:01<5:03:54,  2.02s/it]

Total reward after episode 974 is 1352.0


 10%|▉         | 975/10000 [40:02<4:29:02,  1.79s/it]

Total reward after episode 975 is 610.0


 10%|▉         | 976/10000 [40:04<4:21:46,  1.74s/it]

Total reward after episode 976 is 603.0


 10%|▉         | 977/10000 [40:06<4:46:02,  1.90s/it]

Total reward after episode 977 is 1052.0


 10%|▉         | 978/10000 [40:09<5:20:14,  2.13s/it]

Total reward after episode 978 is 1160.0


 10%|▉         | 979/10000 [40:10<4:41:39,  1.87s/it]

Total reward after episode 979 is 612.0


 10%|▉         | 980/10000 [40:13<5:43:32,  2.29s/it]

Total reward after episode 980 is 638.0


 10%|▉         | 981/10000 [40:15<5:06:26,  2.04s/it]

Total reward after episode 981 is 604.0


 10%|▉         | 982/10000 [40:16<4:50:30,  1.93s/it]

Total reward after episode 982 is 625.0


 10%|▉         | 983/10000 [40:18<4:24:24,  1.76s/it]

Total reward after episode 983 is 607.0


 10%|▉         | 984/10000 [40:20<5:16:26,  2.11s/it]

Total reward after episode 984 is 1349.0


 10%|▉         | 985/10000 [40:25<6:57:40,  2.78s/it]

Total reward after episode 985 is 1863.0


 10%|▉         | 986/10000 [40:28<7:06:38,  2.84s/it]

Total reward after episode 986 is 1045.0


 10%|▉         | 987/10000 [40:29<6:04:31,  2.43s/it]

Total reward after episode 987 is 617.0


 10%|▉         | 988/10000 [40:32<5:55:51,  2.37s/it]

Total reward after episode 988 is 1052.0


 10%|▉         | 989/10000 [40:33<5:11:03,  2.07s/it]

Total reward after episode 989 is 605.0


 10%|▉         | 990/10000 [40:34<4:35:30,  1.83s/it]

Total reward after episode 990 is 610.0


 10%|▉         | 991/10000 [40:36<4:19:15,  1.73s/it]

Total reward after episode 991 is 737.0


 10%|▉         | 992/10000 [40:36<3:29:06,  1.39s/it]

Total reward after episode 992 is 248.0


 10%|▉         | 993/10000 [40:39<4:38:38,  1.86s/it]

Total reward after episode 993 is 1161.0


 10%|▉         | 994/10000 [40:41<4:49:52,  1.93s/it]

Total reward after episode 994 is 1053.0


 10%|▉         | 995/10000 [40:43<4:24:29,  1.76s/it]

Total reward after episode 995 is 608.0


 10%|▉         | 996/10000 [40:46<5:24:16,  2.16s/it]

Total reward after episode 996 is 1044.0


 10%|▉         | 997/10000 [40:47<4:47:39,  1.92s/it]

Total reward after episode 997 is 616.0


 10%|▉         | 998/10000 [41:30<35:36:32, 14.24s/it]

Total reward after episode 998 is 1771.0


 10%|▉         | 999/10000 [41:33<27:23:15, 10.95s/it]

Total reward after episode 999 is 1345.0


 10%|█         | 1000/10000 [41:37<21:34:54,  8.63s/it]

Total reward after episode 1000 is 1436.0


 10%|█         | 1001/10000 [41:39<16:50:03,  6.73s/it]

Total reward after episode 1001 is 1046.0


 10%|█         | 1002/10000 [41:40<12:44:18,  5.10s/it]

Total reward after episode 1002 is 609.0


 10%|█         | 1003/10000 [41:41<9:53:35,  3.96s/it] 

Total reward after episode 1003 is 612.0


 10%|█         | 1004/10000 [41:43<7:57:42,  3.19s/it]

Total reward after episode 1004 is 609.0


 10%|█         | 1005/10000 [41:46<7:35:34,  3.04s/it]

Total reward after episode 1005 is 1043.0


 10%|█         | 1006/10000 [41:51<9:15:07,  3.70s/it]

Total reward after episode 1006 is 1851.0


 10%|█         | 1007/10000 [41:52<7:25:08,  2.97s/it]

Total reward after episode 1007 is 611.0


 10%|█         | 1008/10000 [41:54<6:58:40,  2.79s/it]

Total reward after episode 1008 is 1156.0


 10%|█         | 1009/10000 [42:00<9:21:44,  3.75s/it]

Total reward after episode 1009 is 2652.0


 10%|█         | 1010/10000 [42:02<7:49:19,  3.13s/it]

Total reward after episode 1010 is 606.0


 10%|█         | 1011/10000 [42:06<8:43:35,  3.49s/it]

Total reward after episode 1011 is 1335.0


 10%|█         | 1012/10000 [42:08<7:03:00,  2.82s/it]

Total reward after episode 1012 is 610.0


 10%|█         | 1013/10000 [42:11<7:21:29,  2.95s/it]

Total reward after episode 1013 is 1347.0


 10%|█         | 1014/10000 [42:12<6:10:24,  2.47s/it]

Total reward after episode 1014 is 606.0


 10%|█         | 1015/10000 [42:14<5:29:20,  2.20s/it]

Total reward after episode 1015 is 611.0


 10%|█         | 1016/10000 [42:15<4:18:08,  1.72s/it]

Total reward after episode 1016 is 248.0


 10%|█         | 1017/10000 [42:16<3:59:25,  1.60s/it]

Total reward after episode 1017 is 613.0


 10%|█         | 1018/10000 [42:20<6:12:49,  2.49s/it]

Total reward after episode 1018 is 802.0


 10%|█         | 1019/10000 [42:23<6:35:03,  2.64s/it]

Total reward after episode 1019 is 1350.0


 10%|█         | 1020/10000 [42:27<7:33:47,  3.03s/it]

Total reward after episode 1020 is 1581.0


 10%|█         | 1021/10000 [42:31<8:01:48,  3.22s/it]

Total reward after episode 1021 is 1038.0


 10%|█         | 1022/10000 [42:34<7:37:24,  3.06s/it]

Total reward after episode 1022 is 1161.0


 10%|█         | 1023/10000 [42:37<7:45:46,  3.11s/it]

Total reward after episode 1023 is 1441.0


 10%|█         | 1024/10000 [42:38<6:32:11,  2.62s/it]

Total reward after episode 1024 is 607.0


 10%|█         | 1025/10000 [42:40<5:33:15,  2.23s/it]

Total reward after episode 1025 is 614.0


 10%|█         | 1026/10000 [42:44<7:04:12,  2.84s/it]

Total reward after episode 1026 is 1333.0


 10%|█         | 1027/10000 [42:47<7:16:30,  2.92s/it]

Total reward after episode 1027 is 1349.0


 10%|█         | 1028/10000 [42:49<6:21:15,  2.55s/it]

Total reward after episode 1028 is 620.0


 10%|█         | 1029/10000 [42:55<9:04:23,  3.64s/it]

Total reward after episode 1029 is 1313.0


 10%|█         | 1030/10000 [42:56<6:51:28,  2.75s/it]

Total reward after episode 1030 is 236.0


 10%|█         | 1031/10000 [42:57<5:45:31,  2.31s/it]

Total reward after episode 1031 is 616.0


 10%|█         | 1032/10000 [42:59<5:44:47,  2.31s/it]

Total reward after episode 1032 is 1059.0


 10%|█         | 1033/10000 [43:01<5:16:26,  2.12s/it]

Total reward after episode 1033 is 735.0


 10%|█         | 1034/10000 [43:05<6:32:47,  2.63s/it]

Total reward after episode 1034 is 1886.0


 10%|█         | 1035/10000 [43:10<8:42:11,  3.49s/it]

Total reward after episode 1035 is 1677.0


 10%|█         | 1036/10000 [43:12<7:17:41,  2.93s/it]

Total reward after episode 1036 is 610.0


 10%|█         | 1037/10000 [43:13<6:12:17,  2.49s/it]

Total reward after episode 1037 is 603.0


 10%|█         | 1038/10000 [43:16<6:36:04,  2.65s/it]

Total reward after episode 1038 is 1147.0


 10%|█         | 1039/10000 [43:18<5:47:23,  2.33s/it]

Total reward after episode 1039 is 601.0


 10%|█         | 1040/10000 [43:19<4:34:01,  1.83s/it]

Total reward after episode 1040 is 237.0


 10%|█         | 1041/10000 [43:21<4:55:50,  1.98s/it]

Total reward after episode 1041 is 1050.0


 10%|█         | 1042/10000 [43:23<5:19:53,  2.14s/it]

Total reward after episode 1042 is 1155.0


 10%|█         | 1043/10000 [43:25<4:49:19,  1.94s/it]

Total reward after episode 1043 is 605.0


 10%|█         | 1044/10000 [43:27<5:17:03,  2.12s/it]

Total reward after episode 1044 is 1044.0


 10%|█         | 1045/10000 [43:29<4:39:53,  1.88s/it]

Total reward after episode 1045 is 610.0


 10%|█         | 1046/10000 [43:32<5:30:55,  2.22s/it]

Total reward after episode 1046 is 1350.0


 10%|█         | 1047/10000 [43:33<4:58:04,  2.00s/it]

Total reward after episode 1047 is 605.0


 10%|█         | 1048/10000 [43:37<5:55:51,  2.39s/it]

Total reward after episode 1048 is 1344.0


 10%|█         | 1049/10000 [43:38<5:29:44,  2.21s/it]

Total reward after episode 1049 is 631.0


 10%|█         | 1050/10000 [43:41<5:54:34,  2.38s/it]

Total reward after episode 1050 is 817.0


 11%|█         | 1051/10000 [43:42<5:05:58,  2.05s/it]

Total reward after episode 1051 is 604.0


 11%|█         | 1052/10000 [43:44<4:30:55,  1.82s/it]

Total reward after episode 1052 is 616.0


 11%|█         | 1053/10000 [43:45<4:04:26,  1.64s/it]

Total reward after episode 1053 is 610.0


 11%|█         | 1054/10000 [43:47<4:04:59,  1.64s/it]

Total reward after episode 1054 is 737.0


 11%|█         | 1055/10000 [43:48<3:46:55,  1.52s/it]

Total reward after episode 1055 is 610.0


 11%|█         | 1056/10000 [43:49<3:42:25,  1.49s/it]

Total reward after episode 1056 is 605.0


 11%|█         | 1057/10000 [43:51<3:40:15,  1.48s/it]

Total reward after episode 1057 is 739.0


 11%|█         | 1058/10000 [43:51<3:00:10,  1.21s/it]

Total reward after episode 1058 is 252.0


 11%|█         | 1059/10000 [43:53<3:04:31,  1.24s/it]

Total reward after episode 1059 is 608.0


 11%|█         | 1060/10000 [43:55<3:49:17,  1.54s/it]

Total reward after episode 1060 is 1049.0


 11%|█         | 1061/10000 [43:56<3:52:56,  1.56s/it]

Total reward after episode 1061 is 603.0


 11%|█         | 1062/10000 [43:59<4:36:18,  1.85s/it]

Total reward after episode 1062 is 1047.0


 11%|█         | 1063/10000 [44:02<5:09:29,  2.08s/it]

Total reward after episode 1063 is 1056.0


 11%|█         | 1064/10000 [44:03<4:34:10,  1.84s/it]

Total reward after episode 1064 is 608.0


 11%|█         | 1065/10000 [44:06<5:27:07,  2.20s/it]

Total reward after episode 1065 is 1039.0


 11%|█         | 1066/10000 [44:07<4:46:39,  1.93s/it]

Total reward after episode 1066 is 604.0


 11%|█         | 1067/10000 [44:12<6:42:00,  2.70s/it]

Total reward after episode 1067 is 1874.0


 11%|█         | 1068/10000 [44:14<6:14:45,  2.52s/it]

Total reward after episode 1068 is 1048.0


 11%|█         | 1069/10000 [44:15<5:25:56,  2.19s/it]

Total reward after episode 1069 is 609.0


 11%|█         | 1070/10000 [44:18<5:42:05,  2.30s/it]

Total reward after episode 1070 is 1048.0


 11%|█         | 1071/10000 [44:19<5:01:53,  2.03s/it]

Total reward after episode 1071 is 616.0


 11%|█         | 1072/10000 [44:23<6:16:53,  2.53s/it]

Total reward after episode 1072 is 1340.0


 11%|█         | 1073/10000 [44:23<4:52:48,  1.97s/it]

Total reward after episode 1073 is 240.0


 11%|█         | 1074/10000 [44:25<4:22:49,  1.77s/it]

Total reward after episode 1074 is 602.0


 11%|█         | 1075/10000 [44:28<5:15:01,  2.12s/it]

Total reward after episode 1075 is 1329.0


 11%|█         | 1076/10000 [44:31<5:55:08,  2.39s/it]

Total reward after episode 1076 is 1350.0


 11%|█         | 1077/10000 [44:34<6:57:48,  2.81s/it]

Total reward after episode 1077 is 1340.0


 11%|█         | 1078/10000 [44:38<7:15:03,  2.93s/it]

Total reward after episode 1078 is 1579.0


 11%|█         | 1079/10000 [44:39<6:00:31,  2.42s/it]

Total reward after episode 1079 is 618.0


 11%|█         | 1080/10000 [44:40<5:16:28,  2.13s/it]

Total reward after episode 1080 is 739.0


 11%|█         | 1081/10000 [44:43<5:35:56,  2.26s/it]

Total reward after episode 1081 is 774.0


 11%|█         | 1082/10000 [44:45<5:26:44,  2.20s/it]

Total reward after episode 1082 is 1053.0


 11%|█         | 1083/10000 [44:46<4:43:58,  1.91s/it]

Total reward after episode 1083 is 611.0


 11%|█         | 1084/10000 [44:50<6:05:35,  2.46s/it]

Total reward after episode 1084 is 1143.0


 11%|█         | 1085/10000 [44:51<5:13:40,  2.11s/it]

Total reward after episode 1085 is 607.0


 11%|█         | 1086/10000 [44:56<6:50:46,  2.76s/it]

Total reward after episode 1086 is 1925.0


 11%|█         | 1087/10000 [44:56<5:14:44,  2.12s/it]

Total reward after episode 1087 is 242.0


 11%|█         | 1088/10000 [44:57<4:37:18,  1.87s/it]

Total reward after episode 1088 is 617.0


 11%|█         | 1089/10000 [44:59<4:14:51,  1.72s/it]

Total reward after episode 1089 is 600.0


 11%|█         | 1090/10000 [45:05<7:39:56,  3.10s/it]

Total reward after episode 1090 is 1315.0


 11%|█         | 1091/10000 [45:06<6:17:41,  2.54s/it]

Total reward after episode 1091 is 614.0


 11%|█         | 1092/10000 [45:07<4:52:20,  1.97s/it]

Total reward after episode 1092 is 242.0


 11%|█         | 1093/10000 [45:09<4:41:27,  1.90s/it]

Total reward after episode 1093 is 606.0


 11%|█         | 1094/10000 [45:12<5:19:24,  2.15s/it]

Total reward after episode 1094 is 1155.0


 11%|█         | 1095/10000 [45:14<5:40:22,  2.29s/it]

Total reward after episode 1095 is 1042.0


 11%|█         | 1096/10000 [45:15<4:55:37,  1.99s/it]

Total reward after episode 1096 is 609.0


 11%|█         | 1097/10000 [45:17<4:22:28,  1.77s/it]

Total reward after episode 1097 is 614.0


 11%|█         | 1098/10000 [45:18<4:10:50,  1.69s/it]

Total reward after episode 1098 is 619.0


 11%|█         | 1099/10000 [45:21<5:14:57,  2.12s/it]

Total reward after episode 1099 is 1325.0


 11%|█         | 1100/10000 [45:24<5:59:58,  2.43s/it]

Total reward after episode 1100 is 1327.0


 11%|█         | 1101/10000 [45:28<7:10:22,  2.90s/it]

Total reward after episode 1101 is 1331.0


 11%|█         | 1102/10000 [45:30<5:54:22,  2.39s/it]

Total reward after episode 1102 is 607.0


 11%|█         | 1103/10000 [45:31<5:14:29,  2.12s/it]

Total reward after episode 1103 is 609.0


 11%|█         | 1104/10000 [45:32<4:36:45,  1.87s/it]

Total reward after episode 1104 is 610.0


 11%|█         | 1105/10000 [45:36<5:48:40,  2.35s/it]

Total reward after episode 1105 is 1696.0


 11%|█         | 1106/10000 [45:57<19:45:51,  8.00s/it]

Total reward after episode 1106 is 2215.0


 11%|█         | 1107/10000 [45:58<14:44:40,  5.97s/it]

Total reward after episode 1107 is 608.0


 11%|█         | 1108/10000 [46:01<12:02:05,  4.87s/it]

Total reward after episode 1108 is 1051.0


 11%|█         | 1109/10000 [46:06<12:43:59,  5.16s/it]

Total reward after episode 1109 is 1914.0


 11%|█         | 1110/10000 [46:08<9:57:31,  4.03s/it] 

Total reward after episode 1110 is 604.0


 11%|█         | 1111/10000 [46:09<8:00:09,  3.24s/it]

Total reward after episode 1111 is 603.0


 11%|█         | 1112/10000 [46:11<6:50:59,  2.77s/it]

Total reward after episode 1112 is 737.0


 11%|█         | 1113/10000 [46:13<6:39:06,  2.69s/it]

Total reward after episode 1113 is 1049.0


 11%|█         | 1114/10000 [46:15<6:10:38,  2.50s/it]

Total reward after episode 1114 is 620.0


 11%|█         | 1115/10000 [46:16<4:49:35,  1.96s/it]

Total reward after episode 1115 is 241.0


 11%|█         | 1116/10000 [46:20<6:07:30,  2.48s/it]

Total reward after episode 1116 is 1701.0


 11%|█         | 1117/10000 [46:21<5:13:21,  2.12s/it]

Total reward after episode 1117 is 610.0


 11%|█         | 1118/10000 [46:22<4:07:58,  1.68s/it]

Total reward after episode 1118 is 242.0


 11%|█         | 1119/10000 [46:25<5:26:16,  2.20s/it]

Total reward after episode 1119 is 1437.0


 11%|█         | 1120/10000 [46:26<4:17:12,  1.74s/it]

Total reward after episode 1120 is 244.0


 11%|█         | 1121/10000 [46:28<4:52:44,  1.98s/it]

Total reward after episode 1121 is 1048.0


 11%|█         | 1122/10000 [46:31<5:36:26,  2.27s/it]

Total reward after episode 1122 is 1348.0


 11%|█         | 1123/10000 [46:33<5:01:03,  2.03s/it]

Total reward after episode 1123 is 608.0


 11%|█         | 1124/10000 [46:34<4:32:45,  1.84s/it]

Total reward after episode 1124 is 604.0


 11%|█▏        | 1125/10000 [46:37<5:06:36,  2.07s/it]

Total reward after episode 1125 is 1070.0


 11%|█▏        | 1126/10000 [46:38<4:30:02,  1.83s/it]

Total reward after episode 1126 is 609.0


 11%|█▏        | 1127/10000 [46:39<3:37:40,  1.47s/it]

Total reward after episode 1127 is 242.0


 11%|█▏        | 1128/10000 [46:39<3:00:30,  1.22s/it]

Total reward after episode 1128 is 242.0


 11%|█▏        | 1129/10000 [46:41<3:12:52,  1.30s/it]

Total reward after episode 1129 is 608.0


 11%|█▏        | 1130/10000 [46:42<3:11:38,  1.30s/it]

Total reward after episode 1130 is 607.0


 11%|█▏        | 1131/10000 [46:45<4:12:06,  1.71s/it]

Total reward after episode 1131 is 1043.0


 11%|█▏        | 1132/10000 [46:47<4:46:00,  1.94s/it]

Total reward after episode 1132 is 1057.0


 11%|█▏        | 1133/10000 [46:49<4:25:32,  1.80s/it]

Total reward after episode 1133 is 654.0


 11%|█▏        | 1134/10000 [46:50<4:20:30,  1.76s/it]

Total reward after episode 1134 is 621.0


 11%|█▏        | 1135/10000 [46:54<5:17:29,  2.15s/it]

Total reward after episode 1135 is 1040.0


 11%|█▏        | 1136/10000 [46:56<5:27:20,  2.22s/it]

Total reward after episode 1136 is 1051.0


 11%|█▏        | 1137/10000 [46:57<4:48:39,  1.95s/it]

Total reward after episode 1137 is 606.0


 11%|█▏        | 1138/10000 [46:59<4:20:47,  1.77s/it]

Total reward after episode 1138 is 604.0


 11%|█▏        | 1139/10000 [47:01<5:04:53,  2.06s/it]

Total reward after episode 1139 is 1329.0


 11%|█▏        | 1140/10000 [47:05<6:20:25,  2.58s/it]

Total reward after episode 1140 is 1340.0


 11%|█▏        | 1141/10000 [47:08<6:29:27,  2.64s/it]

Total reward after episode 1141 is 1350.0


 11%|█▏        | 1142/10000 [47:09<5:39:46,  2.30s/it]

Total reward after episode 1142 is 654.0


 11%|█▏        | 1143/10000 [47:11<5:06:46,  2.08s/it]

Total reward after episode 1143 is 638.0


 11%|█▏        | 1144/10000 [47:12<4:38:05,  1.88s/it]

Total reward after episode 1144 is 609.0


 11%|█▏        | 1145/10000 [47:14<4:14:59,  1.73s/it]

Total reward after episode 1145 is 606.0


 11%|█▏        | 1146/10000 [47:17<5:14:09,  2.13s/it]

Total reward after episode 1146 is 1356.0


 11%|█▏        | 1147/10000 [47:21<6:28:19,  2.63s/it]

Total reward after episode 1147 is 1702.0


 11%|█▏        | 1148/10000 [47:22<5:29:39,  2.23s/it]

Total reward after episode 1148 is 607.0


 11%|█▏        | 1149/10000 [47:24<5:33:17,  2.26s/it]

Total reward after episode 1149 is 1058.0


 12%|█▏        | 1150/10000 [47:26<4:57:57,  2.02s/it]

Total reward after episode 1150 is 654.0


 12%|█▏        | 1151/10000 [47:29<5:36:14,  2.28s/it]

Total reward after episode 1151 is 1350.0


 12%|█▏        | 1152/10000 [47:30<4:54:14,  2.00s/it]

Total reward after episode 1152 is 618.0


 12%|█▏        | 1153/10000 [47:31<4:20:51,  1.77s/it]

Total reward after episode 1153 is 610.0


 12%|█▏        | 1154/10000 [47:34<5:06:14,  2.08s/it]

Total reward after episode 1154 is 1440.0


 12%|█▏        | 1155/10000 [47:37<5:46:09,  2.35s/it]

Total reward after episode 1155 is 1440.0


 12%|█▏        | 1156/10000 [47:38<5:00:38,  2.04s/it]

Total reward after episode 1156 is 614.0


 12%|█▏        | 1157/10000 [47:40<4:26:48,  1.81s/it]

Total reward after episode 1157 is 613.0


 12%|█▏        | 1158/10000 [47:41<4:09:14,  1.69s/it]

Total reward after episode 1158 is 604.0


 12%|█▏        | 1159/10000 [47:42<3:51:52,  1.57s/it]

Total reward after episode 1159 is 612.0


 12%|█▏        | 1160/10000 [47:51<8:50:14,  3.60s/it]

Total reward after episode 1160 is 2338.0


 12%|█▏        | 1161/10000 [47:55<9:40:00,  3.94s/it]

Total reward after episode 1161 is 1327.0


 12%|█▏        | 1162/10000 [47:58<9:00:27,  3.67s/it]

Total reward after episode 1162 is 1439.0


 12%|█▏        | 1163/10000 [48:03<9:49:46,  4.00s/it]

Total reward after episode 1163 is 1859.0


 12%|█▏        | 1164/10000 [48:06<9:14:12,  3.76s/it]

Total reward after episode 1164 is 1439.0


 12%|█▏        | 1165/10000 [48:08<7:33:01,  3.08s/it]

Total reward after episode 1165 is 612.0


 12%|█▏        | 1166/10000 [48:09<6:13:32,  2.54s/it]

Total reward after episode 1166 is 610.0


 12%|█▏        | 1167/10000 [48:11<5:31:22,  2.25s/it]

Total reward after episode 1167 is 618.0


 12%|█▏        | 1168/10000 [48:12<4:51:01,  1.98s/it]

Total reward after episode 1168 is 615.0


 12%|█▏        | 1169/10000 [48:13<4:18:00,  1.75s/it]

Total reward after episode 1169 is 608.0


 12%|█▏        | 1170/10000 [48:16<4:43:04,  1.92s/it]

Total reward after episode 1170 is 1052.0


 12%|█▏        | 1171/10000 [48:35<17:28:16,  7.12s/it]

Total reward after episode 1171 is 2935.0


 12%|█▏        | 1172/10000 [48:36<13:09:14,  5.36s/it]

Total reward after episode 1172 is 608.0


 12%|█▏        | 1173/10000 [48:40<12:15:20,  5.00s/it]

Total reward after episode 1173 is 1690.0


 12%|█▏        | 1174/10000 [48:42<9:49:00,  4.00s/it] 

Total reward after episode 1174 is 736.0


 12%|█▏        | 1175/10000 [48:47<10:33:24,  4.31s/it]

Total reward after episode 1175 is 1848.0


 12%|█▏        | 1176/10000 [48:48<8:20:55,  3.41s/it] 

Total reward after episode 1176 is 608.0


 12%|█▏        | 1177/10000 [48:51<7:39:59,  3.13s/it]

Total reward after episode 1177 is 1048.0


 12%|█▏        | 1178/10000 [48:52<6:17:03,  2.56s/it]

Total reward after episode 1178 is 608.0


 12%|█▏        | 1179/10000 [48:54<6:10:28,  2.52s/it]

Total reward after episode 1179 is 1050.0


 12%|█▏        | 1180/10000 [48:57<6:18:20,  2.57s/it]

Total reward after episode 1180 is 1044.0


 12%|█▏        | 1181/10000 [49:00<6:23:37,  2.61s/it]

Total reward after episode 1181 is 819.0


 12%|█▏        | 1182/10000 [49:01<5:24:10,  2.21s/it]

Total reward after episode 1182 is 607.0


 12%|█▏        | 1183/10000 [49:02<4:12:52,  1.72s/it]

Total reward after episode 1183 is 251.0


 12%|█▏        | 1184/10000 [49:05<5:11:48,  2.12s/it]

Total reward after episode 1184 is 1063.0


 12%|█▏        | 1185/10000 [49:06<4:34:12,  1.87s/it]

Total reward after episode 1185 is 613.0


 12%|█▏        | 1186/10000 [49:07<4:06:36,  1.68s/it]

Total reward after episode 1186 is 611.0


 12%|█▏        | 1187/10000 [49:11<6:00:43,  2.46s/it]

Total reward after episode 1187 is 1848.0


 12%|█▏        | 1188/10000 [49:13<5:09:35,  2.11s/it]

Total reward after episode 1188 is 609.0


 12%|█▏        | 1189/10000 [49:15<5:24:59,  2.21s/it]

Total reward after episode 1189 is 1049.0


 12%|█▏        | 1190/10000 [49:17<4:49:26,  1.97s/it]

Total reward after episode 1190 is 606.0


 12%|█▏        | 1191/10000 [49:18<4:20:02,  1.77s/it]

Total reward after episode 1191 is 611.0


 12%|█▏        | 1192/10000 [49:20<4:45:42,  1.95s/it]

Total reward after episode 1192 is 1059.0


 12%|█▏        | 1193/10000 [49:22<4:17:44,  1.76s/it]

Total reward after episode 1193 is 608.0


 12%|█▏        | 1194/10000 [49:23<3:57:13,  1.62s/it]

Total reward after episode 1194 is 610.0


 12%|█▏        | 1195/10000 [49:28<6:19:46,  2.59s/it]

Total reward after episode 1195 is 1928.0


 12%|█▏        | 1196/10000 [49:29<5:23:28,  2.20s/it]

Total reward after episode 1196 is 616.0


 12%|█▏        | 1197/10000 [49:33<6:57:10,  2.84s/it]

Total reward after episode 1197 is 1863.0


 12%|█▏        | 1198/10000 [49:35<5:47:03,  2.37s/it]

Total reward after episode 1198 is 606.0


 12%|█▏        | 1199/10000 [49:36<4:59:31,  2.04s/it]

Total reward after episode 1199 is 613.0


 12%|█▏        | 1200/10000 [49:39<5:44:31,  2.35s/it]

Total reward after episode 1200 is 1438.0


 12%|█▏        | 1201/10000 [49:40<5:00:00,  2.05s/it]

Total reward after episode 1201 is 614.0


 12%|█▏        | 1202/10000 [49:41<4:00:18,  1.64s/it]

Total reward after episode 1202 is 244.0


 12%|█▏        | 1203/10000 [49:42<3:45:37,  1.54s/it]

Total reward after episode 1203 is 609.0


 12%|█▏        | 1204/10000 [49:45<4:47:19,  1.96s/it]

Total reward after episode 1204 is 1439.0


 12%|█▏        | 1205/10000 [49:49<6:12:40,  2.54s/it]

Total reward after episode 1205 is 1694.0


 12%|█▏        | 1206/10000 [49:50<5:18:10,  2.17s/it]

Total reward after episode 1206 is 606.0


 12%|█▏        | 1207/10000 [49:54<6:11:08,  2.53s/it]

Total reward after episode 1207 is 1344.0


 12%|█▏        | 1208/10000 [49:56<5:56:04,  2.43s/it]

Total reward after episode 1208 is 731.0


 12%|█▏        | 1209/10000 [49:57<5:05:21,  2.08s/it]

Total reward after episode 1209 is 607.0


 12%|█▏        | 1210/10000 [50:01<6:19:17,  2.59s/it]

Total reward after episode 1210 is 1703.0


 12%|█▏        | 1211/10000 [50:02<5:24:30,  2.22s/it]

Total reward after episode 1211 is 607.0


 12%|█▏        | 1212/10000 [50:04<4:49:16,  1.98s/it]

Total reward after episode 1212 is 604.0


 12%|█▏        | 1213/10000 [50:08<6:20:15,  2.60s/it]

Total reward after episode 1213 is 1694.0


 12%|█▏        | 1214/10000 [50:09<5:31:26,  2.26s/it]

Total reward after episode 1214 is 610.0


 12%|█▏        | 1215/10000 [50:12<5:44:17,  2.35s/it]

Total reward after episode 1215 is 1049.0


 12%|█▏        | 1216/10000 [50:13<4:55:39,  2.02s/it]

Total reward after episode 1216 is 606.0


 12%|█▏        | 1217/10000 [50:16<5:39:05,  2.32s/it]

Total reward after episode 1217 is 1337.0


 12%|█▏        | 1218/10000 [50:17<4:35:29,  1.88s/it]

Total reward after episode 1218 is 251.0


 12%|█▏        | 1219/10000 [50:18<3:46:05,  1.54s/it]

Total reward after episode 1219 is 242.0


 12%|█▏        | 1220/10000 [50:19<3:53:21,  1.59s/it]

Total reward after episode 1220 is 736.0


 12%|█▏        | 1221/10000 [50:21<3:38:39,  1.49s/it]

Total reward after episode 1221 is 608.0


 12%|█▏        | 1222/10000 [50:24<5:00:48,  2.06s/it]

Total reward after episode 1222 is 1585.0


 12%|█▏        | 1223/10000 [50:25<4:28:38,  1.84s/it]

Total reward after episode 1223 is 609.0


 12%|█▏        | 1224/10000 [50:29<6:01:23,  2.47s/it]

Total reward after episode 1224 is 1699.0


 12%|█▏        | 1225/10000 [50:31<5:15:09,  2.15s/it]

Total reward after episode 1225 is 603.0


 12%|█▏        | 1226/10000 [50:31<4:07:10,  1.69s/it]

Total reward after episode 1226 is 251.0


 12%|█▏        | 1227/10000 [50:33<3:59:37,  1.64s/it]

Total reward after episode 1227 is 654.0


 12%|█▏        | 1228/10000 [50:34<3:53:41,  1.60s/it]

Total reward after episode 1228 is 608.0


 12%|█▏        | 1229/10000 [50:36<4:06:06,  1.68s/it]

Total reward after episode 1229 is 626.0


 12%|█▏        | 1230/10000 [51:04<23:11:44,  9.52s/it]

Total reward after episode 1230 is 2871.0


 12%|█▏        | 1231/10000 [51:06<17:14:35,  7.08s/it]

Total reward after episode 1231 is 603.0


 12%|█▏        | 1232/10000 [51:09<14:19:08,  5.88s/it]

Total reward after episode 1232 is 1440.0


 12%|█▏        | 1233/10000 [51:09<10:31:17,  4.32s/it]

Total reward after episode 1233 is 243.0


 12%|█▏        | 1234/10000 [51:12<9:32:44,  3.92s/it] 

Total reward after episode 1234 is 1334.0


 12%|█▏        | 1235/10000 [51:15<8:49:46,  3.63s/it]

Total reward after episode 1235 is 1351.0


 12%|█▏        | 1236/10000 [51:18<7:57:06,  3.27s/it]

Total reward after episode 1236 is 1048.0


 12%|█▏        | 1237/10000 [51:19<6:38:44,  2.73s/it]

Total reward after episode 1237 is 605.0


 12%|█▏        | 1238/10000 [51:20<5:37:44,  2.31s/it]

Total reward after episode 1238 is 608.0


 12%|█▏        | 1239/10000 [51:22<4:55:35,  2.02s/it]

Total reward after episode 1239 is 604.0


 12%|█▏        | 1240/10000 [51:23<4:37:07,  1.90s/it]

Total reward after episode 1240 is 606.0


 12%|█▏        | 1241/10000 [51:28<6:28:11,  2.66s/it]

Total reward after episode 1241 is 1923.0


 12%|█▏        | 1242/10000 [51:29<5:43:14,  2.35s/it]

Total reward after episode 1242 is 737.0


 12%|█▏        | 1243/10000 [51:32<6:06:14,  2.51s/it]

Total reward after episode 1243 is 1351.0


 12%|█▏        | 1244/10000 [51:34<5:13:54,  2.15s/it]

Total reward after episode 1244 is 608.0


 12%|█▏        | 1245/10000 [51:35<4:50:11,  1.99s/it]

Total reward after episode 1245 is 607.0


 12%|█▏        | 1246/10000 [51:37<5:00:03,  2.06s/it]

Total reward after episode 1246 is 1047.0


 12%|█▏        | 1247/10000 [51:39<4:42:58,  1.94s/it]

Total reward after episode 1247 is 636.0


 12%|█▏        | 1248/10000 [51:41<4:21:05,  1.79s/it]

Total reward after episode 1248 is 617.0


 12%|█▏        | 1249/10000 [51:42<3:56:48,  1.62s/it]

Total reward after episode 1249 is 611.0


 12%|█▎        | 1250/10000 [51:42<3:10:38,  1.31s/it]

Total reward after episode 1250 is 248.0


 13%|█▎        | 1251/10000 [51:43<2:39:08,  1.09s/it]

Total reward after episode 1251 is 252.0


 13%|█▎        | 1252/10000 [51:45<3:35:16,  1.48s/it]

Total reward after episode 1252 is 1057.0


 13%|█▎        | 1253/10000 [51:48<4:39:27,  1.92s/it]

Total reward after episode 1253 is 817.0


 13%|█▎        | 1254/10000 [51:52<5:44:24,  2.36s/it]

Total reward after episode 1254 is 1344.0


 13%|█▎        | 1255/10000 [51:52<4:26:16,  1.83s/it]

Total reward after episode 1255 is 252.0


 13%|█▎        | 1256/10000 [51:54<4:02:35,  1.66s/it]

Total reward after episode 1256 is 615.0


 13%|█▎        | 1257/10000 [51:55<3:58:35,  1.64s/it]

Total reward after episode 1257 is 604.0


 13%|█▎        | 1258/10000 [51:58<4:55:08,  2.03s/it]

Total reward after episode 1258 is 1431.0


 13%|█▎        | 1259/10000 [52:02<6:38:33,  2.74s/it]

Total reward after episode 1259 is 1423.0


 13%|█▎        | 1260/10000 [52:07<8:09:32,  3.36s/it]

Total reward after episode 1260 is 1687.0


 13%|█▎        | 1261/10000 [52:10<7:36:49,  3.14s/it]

Total reward after episode 1261 is 1046.0


 13%|█▎        | 1262/10000 [52:11<6:16:44,  2.59s/it]

Total reward after episode 1262 is 607.0


 13%|█▎        | 1263/10000 [52:13<5:27:13,  2.25s/it]

Total reward after episode 1263 is 607.0


 13%|█▎        | 1264/10000 [52:13<4:16:27,  1.76s/it]

Total reward after episode 1264 is 247.0


 13%|█▎        | 1265/10000 [52:16<5:14:26,  2.16s/it]

Total reward after episode 1265 is 1039.0


 13%|█▎        | 1266/10000 [52:19<5:48:17,  2.39s/it]

Total reward after episode 1266 is 1054.0


 13%|█▎        | 1267/10000 [52:21<4:59:14,  2.06s/it]

Total reward after episode 1267 is 609.0


 13%|█▎        | 1268/10000 [52:25<6:43:09,  2.77s/it]

Total reward after episode 1268 is 1699.0


 13%|█▎        | 1269/10000 [52:27<5:48:26,  2.39s/it]

Total reward after episode 1269 is 610.0


 13%|█▎        | 1270/10000 [52:30<6:19:08,  2.61s/it]

Total reward after episode 1270 is 1348.0


 13%|█▎        | 1271/10000 [52:33<6:48:43,  2.81s/it]

Total reward after episode 1271 is 1427.0


 13%|█▎        | 1272/10000 [52:36<6:59:26,  2.88s/it]

Total reward after episode 1272 is 1347.0


 13%|█▎        | 1273/10000 [52:41<8:12:56,  3.39s/it]

Total reward after episode 1273 is 1919.0


 13%|█▎        | 1274/10000 [52:45<8:42:00,  3.59s/it]

Total reward after episode 1274 is 1929.0


 13%|█▎        | 1275/10000 [52:46<7:13:38,  2.98s/it]

Total reward after episode 1275 is 631.0


 13%|█▎        | 1276/10000 [52:50<7:52:04,  3.25s/it]

Total reward after episode 1276 is 1428.0


 13%|█▎        | 1277/10000 [52:54<8:43:53,  3.60s/it]

Total reward after episode 1277 is 1928.0


 13%|█▎        | 1278/10000 [52:56<7:09:33,  2.96s/it]

Total reward after episode 1278 is 609.0


 13%|█▎        | 1279/10000 [52:56<5:26:00,  2.24s/it]

Total reward after episode 1279 is 250.0


 13%|█▎        | 1280/10000 [52:59<5:32:09,  2.29s/it]

Total reward after episode 1280 is 1058.0


 13%|█▎        | 1281/10000 [53:04<7:28:54,  3.09s/it]

Total reward after episode 1281 is 1420.0


 13%|█▎        | 1282/10000 [53:04<5:39:54,  2.34s/it]

Total reward after episode 1282 is 249.0


 13%|█▎        | 1283/10000 [53:07<5:30:23,  2.27s/it]

Total reward after episode 1283 is 1051.0


 13%|█▎        | 1284/10000 [53:09<5:57:34,  2.46s/it]

Total reward after episode 1284 is 1437.0


 13%|█▎        | 1285/10000 [53:12<6:19:32,  2.61s/it]

Total reward after episode 1285 is 1348.0


 13%|█▎        | 1286/10000 [53:15<6:37:12,  2.73s/it]

Total reward after episode 1286 is 1438.0


 13%|█▎        | 1287/10000 [53:22<9:02:17,  3.73s/it]

Total reward after episode 1287 is 1301.0


 13%|█▎        | 1288/10000 [53:25<8:31:34,  3.52s/it]

Total reward after episode 1288 is 1439.0


 13%|█▎        | 1289/10000 [53:26<7:05:09,  2.93s/it]

Total reward after episode 1289 is 654.0


 13%|█▎        | 1290/10000 [53:28<6:13:18,  2.57s/it]

Total reward after episode 1290 is 736.0


 13%|█▎        | 1291/10000 [53:29<5:16:37,  2.18s/it]

Total reward after episode 1291 is 606.0


 13%|█▎        | 1292/10000 [53:32<6:07:02,  2.53s/it]

Total reward after episode 1292 is 1050.0


 13%|█▎        | 1293/10000 [53:35<5:55:37,  2.45s/it]

Total reward after episode 1293 is 1047.0


 13%|█▎        | 1294/10000 [53:36<5:15:20,  2.17s/it]

Total reward after episode 1294 is 637.0


 13%|█▎        | 1295/10000 [53:38<5:08:23,  2.13s/it]

Total reward after episode 1295 is 736.0


 13%|█▎        | 1296/10000 [53:39<4:03:57,  1.68s/it]

Total reward after episode 1296 is 242.0


 13%|█▎        | 1297/10000 [53:41<4:04:33,  1.69s/it]

Total reward after episode 1297 is 737.0


 13%|█▎        | 1298/10000 [53:42<4:01:12,  1.66s/it]

Total reward after episode 1298 is 604.0


 13%|█▎        | 1299/10000 [53:45<4:41:13,  1.94s/it]

Total reward after episode 1299 is 1043.0


 13%|█▎        | 1300/10000 [53:48<5:49:10,  2.41s/it]

Total reward after episode 1300 is 1434.0


 13%|█▎        | 1301/10000 [53:52<6:27:18,  2.67s/it]

Total reward after episode 1301 is 1348.0


 13%|█▎        | 1302/10000 [53:55<7:09:41,  2.96s/it]

Total reward after episode 1302 is 1580.0


 13%|█▎        | 1303/10000 [53:58<6:47:37,  2.81s/it]

Total reward after episode 1303 is 1049.0


 13%|█▎        | 1304/10000 [54:01<6:54:34,  2.86s/it]

Total reward after episode 1304 is 1342.0


 13%|█▎        | 1305/10000 [54:02<6:00:56,  2.49s/it]

Total reward after episode 1305 is 606.0


 13%|█▎        | 1306/10000 [54:03<4:38:24,  1.92s/it]

Total reward after episode 1306 is 250.0


 13%|█▎        | 1307/10000 [54:03<3:42:37,  1.54s/it]

Total reward after episode 1307 is 245.0


 13%|█▎        | 1308/10000 [54:05<3:41:44,  1.53s/it]

Total reward after episode 1308 is 617.0


 13%|█▎        | 1309/10000 [54:06<3:01:08,  1.25s/it]

Total reward after episode 1309 is 251.0


 13%|█▎        | 1310/10000 [54:07<3:01:24,  1.25s/it]

Total reward after episode 1310 is 611.0


 13%|█▎        | 1311/10000 [54:08<3:01:43,  1.25s/it]

Total reward after episode 1311 is 610.0


 13%|█▎        | 1312/10000 [54:09<2:33:07,  1.06s/it]

Total reward after episode 1312 is 250.0


 13%|█▎        | 1313/10000 [54:10<2:52:51,  1.19s/it]

Total reward after episode 1313 is 737.0


 13%|█▎        | 1314/10000 [54:12<3:04:03,  1.27s/it]

Total reward after episode 1314 is 602.0


 13%|█▎        | 1315/10000 [54:14<3:53:10,  1.61s/it]

Total reward after episode 1315 is 1045.0


 13%|█▎        | 1316/10000 [54:16<3:55:31,  1.63s/it]

Total reward after episode 1316 is 612.0


 13%|█▎        | 1317/10000 [54:19<5:06:39,  2.12s/it]

Total reward after episode 1317 is 1345.0


 13%|█▎        | 1318/10000 [54:22<5:51:03,  2.43s/it]

Total reward after episode 1318 is 1436.0


 13%|█▎        | 1319/10000 [54:25<6:13:14,  2.58s/it]

Total reward after episode 1319 is 1349.0


 13%|█▎        | 1320/10000 [54:26<4:49:33,  2.00s/it]

Total reward after episode 1320 is 248.0


 13%|█▎        | 1321/10000 [54:29<5:34:08,  2.31s/it]

Total reward after episode 1321 is 1068.0


 13%|█▎        | 1322/10000 [54:30<4:53:28,  2.03s/it]

Total reward after episode 1322 is 613.0


 13%|█▎        | 1323/10000 [54:33<5:22:36,  2.23s/it]

Total reward after episode 1323 is 1045.0


 13%|█▎        | 1324/10000 [54:34<4:40:10,  1.94s/it]

Total reward after episode 1324 is 609.0


 13%|█▎        | 1325/10000 [54:36<4:20:12,  1.80s/it]

Total reward after episode 1325 is 603.0


 13%|█▎        | 1326/10000 [54:38<4:59:11,  2.07s/it]

Total reward after episode 1326 is 1341.0


 13%|█▎        | 1327/10000 [54:40<5:01:18,  2.08s/it]

Total reward after episode 1327 is 1052.0


 13%|█▎        | 1328/10000 [54:41<3:58:58,  1.65s/it]

Total reward after episode 1328 is 245.0


 13%|█▎        | 1329/10000 [54:42<3:42:33,  1.54s/it]

Total reward after episode 1329 is 609.0


 13%|█▎        | 1330/10000 [54:44<3:32:16,  1.47s/it]

Total reward after episode 1330 is 614.0


 13%|█▎        | 1331/10000 [54:46<4:08:59,  1.72s/it]

Total reward after episode 1331 is 1047.0


 13%|█▎        | 1332/10000 [54:49<4:53:50,  2.03s/it]

Total reward after episode 1332 is 818.0


 13%|█▎        | 1333/10000 [54:50<4:19:40,  1.80s/it]

Total reward after episode 1333 is 609.0


 13%|█▎        | 1334/10000 [54:51<3:55:34,  1.63s/it]

Total reward after episode 1334 is 611.0


 13%|█▎        | 1335/10000 [54:54<4:47:38,  1.99s/it]

Total reward after episode 1335 is 818.0


 13%|█▎        | 1336/10000 [54:57<5:21:00,  2.22s/it]

Total reward after episode 1336 is 1332.0


 13%|█▎        | 1337/10000 [54:58<4:38:28,  1.93s/it]

Total reward after episode 1337 is 607.0


 13%|█▎        | 1338/10000 [55:01<5:06:36,  2.12s/it]

Total reward after episode 1338 is 820.0


 13%|█▎        | 1339/10000 [55:02<4:28:37,  1.86s/it]

Total reward after episode 1339 is 609.0


 13%|█▎        | 1340/10000 [55:03<4:02:18,  1.68s/it]

Total reward after episode 1340 is 609.0


 13%|█▎        | 1341/10000 [55:06<4:50:00,  2.01s/it]

Total reward after episode 1341 is 1352.0


 13%|█▎        | 1342/10000 [55:10<6:22:42,  2.65s/it]

Total reward after episode 1342 is 1861.0


 13%|█▎        | 1343/10000 [55:13<6:22:43,  2.65s/it]

Total reward after episode 1343 is 1045.0


 13%|█▎        | 1344/10000 [55:14<5:36:20,  2.33s/it]

Total reward after episode 1344 is 607.0


 13%|█▎        | 1345/10000 [55:17<6:10:31,  2.57s/it]

Total reward after episode 1345 is 1437.0


 13%|█▎        | 1346/10000 [55:19<5:46:42,  2.40s/it]

Total reward after episode 1346 is 634.0


 13%|█▎        | 1347/10000 [55:22<5:37:36,  2.34s/it]

Total reward after episode 1347 is 1050.0


 13%|█▎        | 1348/10000 [55:23<4:57:07,  2.06s/it]

Total reward after episode 1348 is 603.0


 13%|█▎        | 1349/10000 [55:24<4:21:28,  1.81s/it]

Total reward after episode 1349 is 611.0


 14%|█▎        | 1350/10000 [55:26<3:58:22,  1.65s/it]

Total reward after episode 1350 is 610.0


 14%|█▎        | 1351/10000 [55:28<4:32:09,  1.89s/it]

Total reward after episode 1351 is 1047.0


 14%|█▎        | 1352/10000 [55:30<4:38:56,  1.94s/it]

Total reward after episode 1352 is 734.0


 14%|█▎        | 1353/10000 [55:33<5:13:04,  2.17s/it]

Total reward after episode 1353 is 1161.0


 14%|█▎        | 1354/10000 [55:33<4:04:42,  1.70s/it]

Total reward after episode 1354 is 248.0


 14%|█▎        | 1355/10000 [55:37<5:19:58,  2.22s/it]

Total reward after episode 1355 is 1427.0


 14%|█▎        | 1356/10000 [55:38<4:52:36,  2.03s/it]

Total reward after episode 1356 is 603.0


 14%|█▎        | 1357/10000 [55:43<6:35:44,  2.75s/it]

Total reward after episode 1357 is 1923.0


 14%|█▎        | 1358/10000 [55:46<7:03:16,  2.94s/it]

Total reward after episode 1358 is 1350.0


 14%|█▎        | 1359/10000 [55:48<6:04:02,  2.53s/it]

Total reward after episode 1359 is 640.0


 14%|█▎        | 1360/10000 [55:54<8:34:51,  3.58s/it]

Total reward after episode 1360 is 1407.0


 14%|█▎        | 1361/10000 [55:57<8:06:35,  3.38s/it]

Total reward after episode 1361 is 1437.0


 14%|█▎        | 1362/10000 [56:00<7:59:32,  3.33s/it]

Total reward after episode 1362 is 1347.0


 14%|█▎        | 1363/10000 [56:01<6:38:02,  2.77s/it]

Total reward after episode 1363 is 636.0


 14%|█▎        | 1364/10000 [56:04<6:45:43,  2.82s/it]

Total reward after episode 1364 is 1349.0


 14%|█▎        | 1365/10000 [56:08<7:06:11,  2.96s/it]

Total reward after episode 1365 is 1438.0


 14%|█▎        | 1366/10000 [56:10<7:02:40,  2.94s/it]

Total reward after episode 1366 is 1338.0


 14%|█▎        | 1367/10000 [56:12<5:59:03,  2.50s/it]

Total reward after episode 1367 is 602.0


 14%|█▎        | 1368/10000 [56:13<5:08:01,  2.14s/it]

Total reward after episode 1368 is 617.0


 14%|█▎        | 1369/10000 [56:15<4:47:06,  2.00s/it]

Total reward after episode 1369 is 738.0


 14%|█▎        | 1370/10000 [56:16<4:15:45,  1.78s/it]

Total reward after episode 1370 is 611.0


 14%|█▎        | 1371/10000 [56:19<5:06:37,  2.13s/it]

Total reward after episode 1371 is 1349.0


 14%|█▎        | 1372/10000 [56:21<4:42:20,  1.96s/it]

Total reward after episode 1372 is 607.0


 14%|█▎        | 1373/10000 [56:23<4:37:44,  1.93s/it]

Total reward after episode 1373 is 735.0


 14%|█▎        | 1374/10000 [56:24<4:25:18,  1.85s/it]

Total reward after episode 1374 is 616.0


 14%|█▍        | 1375/10000 [56:27<5:01:55,  2.10s/it]

Total reward after episode 1375 is 1161.0


 14%|█▍        | 1376/10000 [56:30<6:03:38,  2.53s/it]

Total reward after episode 1376 is 1584.0


 14%|█▍        | 1377/10000 [56:33<6:06:09,  2.55s/it]

Total reward after episode 1377 is 1043.0


 14%|█▍        | 1378/10000 [56:37<7:04:58,  2.96s/it]

Total reward after episode 1378 is 1035.0


 14%|█▍        | 1379/10000 [56:39<6:46:18,  2.83s/it]

Total reward after episode 1379 is 1044.0


 14%|█▍        | 1380/10000 [56:41<6:09:06,  2.57s/it]

Total reward after episode 1380 is 624.0


 14%|█▍        | 1381/10000 [56:43<5:23:37,  2.25s/it]

Total reward after episode 1381 is 635.0


 14%|█▍        | 1382/10000 [56:48<7:19:33,  3.06s/it]

Total reward after episode 1382 is 1861.0


 14%|█▍        | 1383/10000 [56:51<7:15:09,  3.03s/it]

Total reward after episode 1383 is 816.0


 14%|█▍        | 1384/10000 [56:52<6:01:25,  2.52s/it]

Total reward after episode 1384 is 607.0


 14%|█▍        | 1385/10000 [56:55<6:25:41,  2.69s/it]

Total reward after episode 1385 is 1336.0


 14%|█▍        | 1386/10000 [56:57<5:39:36,  2.37s/it]

Total reward after episode 1386 is 602.0


 14%|█▍        | 1387/10000 [56:58<4:55:40,  2.06s/it]

Total reward after episode 1387 is 604.0


 14%|█▍        | 1388/10000 [57:00<4:24:50,  1.85s/it]

Total reward after episode 1388 is 608.0


 14%|█▍        | 1389/10000 [57:03<5:38:26,  2.36s/it]

Total reward after episode 1389 is 1577.0


 14%|█▍        | 1390/10000 [57:04<4:51:42,  2.03s/it]

Total reward after episode 1390 is 609.0


 14%|█▍        | 1391/10000 [57:09<6:29:34,  2.72s/it]

Total reward after episode 1391 is 1860.0


 14%|█▍        | 1392/10000 [57:16<9:47:51,  4.10s/it]

Total reward after episode 1392 is 1897.0


 14%|█▍        | 1393/10000 [57:20<9:36:06,  4.02s/it]

Total reward after episode 1393 is 1418.0


 14%|█▍        | 1394/10000 [57:24<9:51:10,  4.12s/it]

Total reward after episode 1394 is 1692.0


 14%|█▍        | 1395/10000 [57:25<7:20:18,  3.07s/it]

Total reward after episode 1395 is 247.0


 14%|█▍        | 1396/10000 [57:31<9:40:22,  4.05s/it]

Total reward after episode 1396 is 1836.0


 14%|█▍        | 1397/10000 [57:35<9:14:53,  3.87s/it]

Total reward after episode 1397 is 1695.0


 14%|█▍        | 1398/10000 [57:36<7:35:09,  3.17s/it]

Total reward after episode 1398 is 607.0


 14%|█▍        | 1399/10000 [57:40<7:58:01,  3.33s/it]

Total reward after episode 1399 is 1697.0


 14%|█▍        | 1400/10000 [57:43<8:05:18,  3.39s/it]

Total reward after episode 1400 is 1436.0


 14%|█▍        | 1401/10000 [57:44<6:05:11,  2.55s/it]

Total reward after episode 1401 is 252.0


 14%|█▍        | 1402/10000 [57:45<5:15:52,  2.20s/it]

Total reward after episode 1402 is 604.0


 14%|█▍        | 1403/10000 [57:47<4:34:45,  1.92s/it]

Total reward after episode 1403 is 607.0


 14%|█▍        | 1404/10000 [57:48<4:21:17,  1.82s/it]

Total reward after episode 1404 is 631.0


 14%|█▍        | 1405/10000 [57:49<3:56:50,  1.65s/it]

Total reward after episode 1405 is 613.0


 14%|█▍        | 1406/10000 [57:52<4:15:03,  1.78s/it]

Total reward after episode 1406 is 624.0


 14%|█▍        | 1407/10000 [57:55<5:17:10,  2.21s/it]

Total reward after episode 1407 is 1341.0


 14%|█▍        | 1408/10000 [58:02<9:12:55,  3.86s/it]

Total reward after episode 1408 is 3050.0


 14%|█▍        | 1409/10000 [58:06<8:59:25,  3.77s/it]

Total reward after episode 1409 is 1568.0


 14%|█▍        | 1410/10000 [58:07<7:14:06,  3.03s/it]

Total reward after episode 1410 is 616.0


 14%|█▍        | 1411/10000 [58:12<8:08:27,  3.41s/it]

Total reward after episode 1411 is 1924.0


 14%|█▍        | 1412/10000 [58:17<9:14:41,  3.88s/it]

Total reward after episode 1412 is 1843.0


 14%|█▍        | 1413/10000 [58:18<7:21:55,  3.09s/it]

Total reward after episode 1413 is 610.0


 14%|█▍        | 1414/10000 [58:19<6:11:44,  2.60s/it]

Total reward after episode 1414 is 604.0


 14%|█▍        | 1415/10000 [58:23<6:55:38,  2.90s/it]

Total reward after episode 1415 is 1340.0


 14%|█▍        | 1416/10000 [58:24<5:44:49,  2.41s/it]

Total reward after episode 1416 is 611.0


 14%|█▍        | 1417/10000 [58:27<5:59:45,  2.51s/it]

Total reward after episode 1417 is 1043.0


 14%|█▍        | 1418/10000 [58:31<7:16:17,  3.05s/it]

Total reward after episode 1418 is 1563.0


 14%|█▍        | 1419/10000 [58:35<7:52:59,  3.31s/it]

Total reward after episode 1419 is 1182.0


 14%|█▍        | 1420/10000 [58:39<8:05:07,  3.39s/it]

Total reward after episode 1420 is 1702.0


 14%|█▍        | 1421/10000 [58:40<6:32:42,  2.75s/it]

Total reward after episode 1421 is 606.0


 14%|█▍        | 1422/10000 [58:41<5:32:54,  2.33s/it]

Total reward after episode 1422 is 619.0


 14%|█▍        | 1423/10000 [58:45<6:31:23,  2.74s/it]

Total reward after episode 1423 is 1694.0


 14%|█▍        | 1424/10000 [58:47<5:50:02,  2.45s/it]

Total reward after episode 1424 is 635.0


 14%|█▍        | 1425/10000 [58:50<6:04:10,  2.55s/it]

Total reward after episode 1425 is 1338.0


 14%|█▍        | 1426/10000 [58:52<5:41:41,  2.39s/it]

Total reward after episode 1426 is 635.0


 14%|█▍        | 1427/10000 [58:53<4:57:11,  2.08s/it]

Total reward after episode 1427 is 604.0


 14%|█▍        | 1428/10000 [58:56<5:46:51,  2.43s/it]

Total reward after episode 1428 is 1331.0


 14%|█▍        | 1429/10000 [58:59<5:42:48,  2.40s/it]

Total reward after episode 1429 is 1046.0


 14%|█▍        | 1430/10000 [59:00<4:58:05,  2.09s/it]

Total reward after episode 1430 is 607.0


 14%|█▍        | 1431/10000 [59:01<4:31:37,  1.90s/it]

Total reward after episode 1431 is 613.0


 14%|█▍        | 1432/10000 [59:04<4:51:20,  2.04s/it]

Total reward after episode 1432 is 1049.0


 14%|█▍        | 1433/10000 [59:07<5:28:34,  2.30s/it]

Total reward after episode 1433 is 817.0


 14%|█▍        | 1434/10000 [59:10<6:10:16,  2.59s/it]

Total reward after episode 1434 is 1160.0


 14%|█▍        | 1435/10000 [59:15<7:44:33,  3.25s/it]

Total reward after episode 1435 is 1685.0


 14%|█▍        | 1436/10000 [59:16<6:18:45,  2.65s/it]

Total reward after episode 1436 is 611.0


 14%|█▍        | 1437/10000 [59:21<7:52:10,  3.31s/it]

Total reward after episode 1437 is 736.0


 14%|█▍        | 1438/10000 [59:23<7:14:20,  3.04s/it]

Total reward after episode 1438 is 1058.0


 14%|█▍        | 1439/10000 [59:26<7:14:38,  3.05s/it]

Total reward after episode 1439 is 1348.0


 14%|█▍        | 1440/10000 [59:30<7:31:57,  3.17s/it]

Total reward after episode 1440 is 1437.0


 14%|█▍        | 1441/10000 [59:33<7:31:23,  3.16s/it]

Total reward after episode 1441 is 1347.0


 14%|█▍        | 1442/10000 [59:33<5:41:42,  2.40s/it]

Total reward after episode 1442 is 251.0


 14%|█▍        | 1443/10000 [59:35<5:05:23,  2.14s/it]

Total reward after episode 1443 is 613.0


 14%|█▍        | 1444/10000 [59:36<4:00:27,  1.69s/it]

Total reward after episode 1444 is 252.0


 14%|█▍        | 1445/10000 [59:39<5:02:22,  2.12s/it]

Total reward after episode 1445 is 1347.0


 14%|█▍        | 1446/10000 [59:40<4:25:13,  1.86s/it]

Total reward after episode 1446 is 608.0


 14%|█▍        | 1447/10000 [59:43<5:19:56,  2.24s/it]

Total reward after episode 1447 is 1440.0


 14%|█▍        | 1448/10000 [59:44<4:37:55,  1.95s/it]

Total reward after episode 1448 is 609.0


 14%|█▍        | 1449/10000 [59:47<5:18:39,  2.24s/it]

Total reward after episode 1449 is 817.0


 14%|█▍        | 1450/10000 [59:50<5:41:55,  2.40s/it]

Total reward after episode 1450 is 818.0


 15%|█▍        | 1451/10000 [59:53<6:21:54,  2.68s/it]

Total reward after episode 1451 is 1429.0


 15%|█▍        | 1452/10000 [59:54<4:53:32,  2.06s/it]

Total reward after episode 1452 is 251.0


 15%|█▍        | 1453/10000 [59:55<4:20:32,  1.83s/it]

Total reward after episode 1453 is 611.0


 15%|█▍        | 1454/10000 [59:57<4:06:43,  1.73s/it]

Total reward after episode 1454 is 615.0


 15%|█▍        | 1455/10000 [59:57<3:18:46,  1.40s/it]

Total reward after episode 1455 is 243.0


 15%|█▍        | 1456/10000 [59:58<2:49:57,  1.19s/it]

Total reward after episode 1456 is 245.0


 15%|█▍        | 1457/10000 [59:59<2:25:01,  1.02s/it]

Total reward after episode 1457 is 243.0


 15%|█▍        | 1458/10000 [59:59<2:08:55,  1.10it/s]

Total reward after episode 1458 is 245.0


 15%|█▍        | 1459/10000 [1:00:01<2:24:49,  1.02s/it]

Total reward after episode 1459 is 612.0


 15%|█▍        | 1460/10000 [1:00:02<2:42:33,  1.14s/it]

Total reward after episode 1460 is 609.0


 15%|█▍        | 1461/10000 [1:00:03<2:20:57,  1.01it/s]

Total reward after episode 1461 is 247.0


 15%|█▍        | 1462/10000 [1:00:06<3:53:08,  1.64s/it]

Total reward after episode 1462 is 592.0


 15%|█▍        | 1463/10000 [1:00:07<3:44:36,  1.58s/it]

Total reward after episode 1463 is 618.0


 15%|█▍        | 1464/10000 [1:00:12<5:51:45,  2.47s/it]

Total reward after episode 1464 is 1856.0


 15%|█▍        | 1465/10000 [1:00:13<5:10:56,  2.19s/it]

Total reward after episode 1465 is 654.0


 15%|█▍        | 1466/10000 [1:00:15<4:43:56,  2.00s/it]

Total reward after episode 1466 is 642.0


 15%|█▍        | 1467/10000 [1:00:18<5:04:04,  2.14s/it]

Total reward after episode 1467 is 1049.0


 15%|█▍        | 1468/10000 [1:00:36<16:47:57,  7.09s/it]

Total reward after episode 1468 is 2949.0


 15%|█▍        | 1469/10000 [1:00:41<14:54:35,  6.29s/it]

Total reward after episode 1469 is 1335.0


 15%|█▍        | 1470/10000 [1:00:42<11:21:37,  4.79s/it]

Total reward after episode 1470 is 607.0


 15%|█▍        | 1471/10000 [1:00:43<8:52:56,  3.75s/it] 

Total reward after episode 1471 is 613.0


 15%|█▍        | 1472/10000 [1:00:47<9:07:45,  3.85s/it]

Total reward after episode 1472 is 1931.0


 15%|█▍        | 1473/10000 [1:00:48<6:50:21,  2.89s/it]

Total reward after episode 1473 is 248.0


 15%|█▍        | 1474/10000 [1:00:50<6:31:17,  2.75s/it]

Total reward after episode 1474 is 1069.0


 15%|█▍        | 1475/10000 [1:00:54<6:56:30,  2.93s/it]

Total reward after episode 1475 is 1572.0


 15%|█▍        | 1476/10000 [1:00:55<5:46:04,  2.44s/it]

Total reward after episode 1476 is 609.0


 15%|█▍        | 1477/10000 [1:00:56<4:56:01,  2.08s/it]

Total reward after episode 1477 is 608.0


 15%|█▍        | 1478/10000 [1:00:58<4:30:49,  1.91s/it]

Total reward after episode 1478 is 738.0


 15%|█▍        | 1479/10000 [1:00:58<3:35:23,  1.52s/it]

Total reward after episode 1479 is 251.0


 15%|█▍        | 1480/10000 [1:01:01<4:06:55,  1.74s/it]

Total reward after episode 1480 is 1050.0


 15%|█▍        | 1481/10000 [1:01:02<3:46:50,  1.60s/it]

Total reward after episode 1481 is 607.0


 15%|█▍        | 1482/10000 [1:01:15<12:00:11,  5.07s/it]

Total reward after episode 1482 is 3007.0


 15%|█▍        | 1483/10000 [1:01:17<10:01:50,  4.24s/it]

Total reward after episode 1483 is 1052.0


 15%|█▍        | 1484/10000 [1:01:19<7:55:18,  3.35s/it] 

Total reward after episode 1484 is 606.0


 15%|█▍        | 1485/10000 [1:01:21<7:04:50,  2.99s/it]

Total reward after episode 1485 is 1051.0


 15%|█▍        | 1486/10000 [1:01:24<7:09:51,  3.03s/it]

Total reward after episode 1486 is 1351.0


 15%|█▍        | 1487/10000 [1:01:25<5:54:35,  2.50s/it]

Total reward after episode 1487 is 607.0


 15%|█▍        | 1488/10000 [1:01:26<4:34:22,  1.93s/it]

Total reward after episode 1488 is 251.0


 15%|█▍        | 1489/10000 [1:01:27<4:06:22,  1.74s/it]

Total reward after episode 1489 is 608.0


 15%|█▍        | 1490/10000 [1:01:30<4:40:52,  1.98s/it]

Total reward after episode 1490 is 1047.0


 15%|█▍        | 1491/10000 [1:01:33<5:44:13,  2.43s/it]

Total reward after episode 1491 is 1571.0


 15%|█▍        | 1492/10000 [1:01:34<4:55:34,  2.08s/it]

Total reward after episode 1492 is 609.0


 15%|█▍        | 1493/10000 [1:01:37<5:11:21,  2.20s/it]

Total reward after episode 1493 is 1070.0


 15%|█▍        | 1494/10000 [1:01:37<4:03:24,  1.72s/it]

Total reward after episode 1494 is 252.0


 15%|█▍        | 1495/10000 [1:01:40<4:53:06,  2.07s/it]

Total reward after episode 1495 is 1186.0


 15%|█▍        | 1496/10000 [1:01:44<6:07:53,  2.60s/it]

Total reward after episode 1496 is 1692.0


 15%|█▍        | 1497/10000 [1:01:47<6:35:45,  2.79s/it]

Total reward after episode 1497 is 1329.0


 15%|█▍        | 1498/10000 [1:01:50<6:49:38,  2.89s/it]

Total reward after episode 1498 is 1348.0


 15%|█▍        | 1499/10000 [1:01:52<5:41:34,  2.41s/it]

Total reward after episode 1499 is 612.0


 15%|█▌        | 1500/10000 [1:01:53<4:52:51,  2.07s/it]

Total reward after episode 1500 is 606.0


 15%|█▌        | 1501/10000 [1:01:54<4:18:15,  1.82s/it]

Total reward after episode 1501 is 606.0


 15%|█▌        | 1502/10000 [1:01:59<6:19:57,  2.68s/it]

Total reward after episode 1502 is 1860.0


 15%|█▌        | 1503/10000 [1:02:00<5:27:24,  2.31s/it]

Total reward after episode 1503 is 606.0


 15%|█▌        | 1504/10000 [1:02:03<5:43:24,  2.43s/it]

Total reward after episode 1504 is 1162.0


 15%|█▌        | 1505/10000 [1:02:05<5:04:22,  2.15s/it]

Total reward after episode 1505 is 738.0


 15%|█▌        | 1506/10000 [1:02:06<4:26:49,  1.88s/it]

Total reward after episode 1506 is 607.0


 15%|█▌        | 1507/10000 [1:02:08<4:17:09,  1.82s/it]

Total reward after episode 1507 is 606.0


 15%|█▌        | 1508/10000 [1:02:09<3:53:33,  1.65s/it]

Total reward after episode 1508 is 606.0


 15%|█▌        | 1509/10000 [1:02:10<3:43:30,  1.58s/it]

Total reward after episode 1509 is 607.0


 15%|█▌        | 1510/10000 [1:02:14<5:01:13,  2.13s/it]

Total reward after episode 1510 is 1324.0


 15%|█▌        | 1511/10000 [1:02:15<4:24:02,  1.87s/it]

Total reward after episode 1511 is 606.0


 15%|█▌        | 1512/10000 [1:02:18<5:13:18,  2.21s/it]

Total reward after episode 1512 is 1428.0


 15%|█▌        | 1513/10000 [1:02:20<4:58:46,  2.11s/it]

Total reward after episode 1513 is 623.0


 15%|█▌        | 1514/10000 [1:02:21<4:32:57,  1.93s/it]

Total reward after episode 1514 is 634.0


 15%|█▌        | 1515/10000 [1:02:24<5:24:12,  2.29s/it]

Total reward after episode 1515 is 1329.0


 15%|█▌        | 1516/10000 [1:02:26<5:03:12,  2.14s/it]

Total reward after episode 1516 is 606.0


 15%|█▌        | 1517/10000 [1:02:28<4:26:28,  1.88s/it]

Total reward after episode 1517 is 611.0


 15%|█▌        | 1518/10000 [1:02:32<6:01:16,  2.56s/it]

Total reward after episode 1518 is 1417.0


 15%|█▌        | 1519/10000 [1:02:35<6:36:01,  2.80s/it]

Total reward after episode 1519 is 1438.0


 15%|█▌        | 1520/10000 [1:02:36<5:31:29,  2.35s/it]

Total reward after episode 1520 is 608.0


 15%|█▌        | 1521/10000 [1:02:41<6:59:30,  2.97s/it]

Total reward after episode 1521 is 1863.0


 15%|█▌        | 1522/10000 [1:02:42<5:58:07,  2.53s/it]

Total reward after episode 1522 is 634.0


 15%|█▌        | 1523/10000 [1:02:45<6:13:55,  2.65s/it]

Total reward after episode 1523 is 817.0


 15%|█▌        | 1524/10000 [1:02:48<6:02:07,  2.56s/it]

Total reward after episode 1524 is 1072.0


 15%|█▌        | 1525/10000 [1:02:51<6:45:30,  2.87s/it]

Total reward after episode 1525 is 1328.0


 15%|█▌        | 1526/10000 [1:02:52<5:37:24,  2.39s/it]

Total reward after episode 1526 is 610.0


 15%|█▌        | 1527/10000 [1:02:53<4:22:09,  1.86s/it]

Total reward after episode 1527 is 251.0


 15%|█▌        | 1528/10000 [1:02:57<6:05:26,  2.59s/it]

Total reward after episode 1528 is 1688.0


 15%|█▌        | 1529/10000 [1:03:00<6:20:11,  2.69s/it]

Total reward after episode 1529 is 1331.0


 15%|█▌        | 1530/10000 [1:03:01<4:53:03,  2.08s/it]

Total reward after episode 1530 is 250.0


 15%|█▌        | 1531/10000 [1:03:04<5:33:33,  2.36s/it]

Total reward after episode 1531 is 1431.0


 15%|█▌        | 1532/10000 [1:03:05<4:46:09,  2.03s/it]

Total reward after episode 1532 is 606.0


 15%|█▌        | 1533/10000 [1:03:06<3:46:51,  1.61s/it]

Total reward after episode 1533 is 251.0


 15%|█▌        | 1534/10000 [1:03:07<3:32:43,  1.51s/it]

Total reward after episode 1534 is 611.0


 15%|█▌        | 1535/10000 [1:03:08<3:30:26,  1.49s/it]

Total reward after episode 1535 is 605.0


 15%|█▌        | 1536/10000 [1:03:13<5:18:00,  2.25s/it]

Total reward after episode 1536 is 1575.0


 15%|█▌        | 1537/10000 [1:03:15<5:38:09,  2.40s/it]

Total reward after episode 1537 is 1055.0


 15%|█▌        | 1538/10000 [1:03:17<4:55:11,  2.09s/it]

Total reward after episode 1538 is 608.0


 15%|█▌        | 1539/10000 [1:03:18<4:20:05,  1.84s/it]

Total reward after episode 1539 is 608.0


 15%|█▌        | 1540/10000 [1:03:20<4:50:56,  2.06s/it]

Total reward after episode 1540 is 1071.0


 15%|█▌        | 1541/10000 [1:03:22<4:26:53,  1.89s/it]

Total reward after episode 1541 is 613.0


 15%|█▌        | 1542/10000 [1:03:23<4:11:04,  1.78s/it]

Total reward after episode 1542 is 634.0


 15%|█▌        | 1543/10000 [1:03:24<3:23:47,  1.45s/it]

Total reward after episode 1543 is 249.0


 15%|█▌        | 1544/10000 [1:03:25<2:48:09,  1.19s/it]

Total reward after episode 1544 is 251.0


 15%|█▌        | 1545/10000 [1:03:27<3:40:06,  1.56s/it]

Total reward after episode 1545 is 1046.0


 15%|█▌        | 1546/10000 [1:03:28<2:59:10,  1.27s/it]

Total reward after episode 1546 is 251.0


 15%|█▌        | 1547/10000 [1:03:29<2:58:47,  1.27s/it]

Total reward after episode 1547 is 611.0


 15%|█▌        | 1548/10000 [1:03:30<2:32:06,  1.08s/it]

Total reward after episode 1548 is 249.0


 15%|█▌        | 1549/10000 [1:03:32<3:04:42,  1.31s/it]

Total reward after episode 1549 is 599.0


 16%|█▌        | 1550/10000 [1:03:33<3:09:19,  1.34s/it]

Total reward after episode 1550 is 603.0


 16%|█▌        | 1551/10000 [1:03:34<3:06:23,  1.32s/it]

Total reward after episode 1551 is 606.0


 16%|█▌        | 1552/10000 [1:03:36<3:04:14,  1.31s/it]

Total reward after episode 1552 is 612.0


 16%|█▌        | 1553/10000 [1:03:37<3:02:49,  1.30s/it]

Total reward after episode 1553 is 611.0


 16%|█▌        | 1554/10000 [1:03:40<4:39:43,  1.99s/it]

Total reward after episode 1554 is 1040.0


 16%|█▌        | 1555/10000 [1:03:42<4:12:59,  1.80s/it]

Total reward after episode 1555 is 604.0


 16%|█▌        | 1556/10000 [1:03:43<4:09:59,  1.78s/it]

Total reward after episode 1556 is 611.0


 16%|█▌        | 1557/10000 [1:03:45<3:47:20,  1.62s/it]

Total reward after episode 1557 is 606.0


 16%|█▌        | 1558/10000 [1:03:48<4:42:31,  2.01s/it]

Total reward after episode 1558 is 817.0


 16%|█▌        | 1559/10000 [1:03:53<6:46:09,  2.89s/it]

Total reward after episode 1559 is 1919.0


 16%|█▌        | 1560/10000 [1:03:53<5:12:06,  2.22s/it]

Total reward after episode 1560 is 249.0


 16%|█▌        | 1561/10000 [1:03:54<4:31:37,  1.93s/it]

Total reward after episode 1561 is 606.0


 16%|█▌        | 1562/10000 [1:03:56<4:12:15,  1.79s/it]

Total reward after episode 1562 is 610.0


 16%|█▌        | 1563/10000 [1:04:00<5:58:22,  2.55s/it]

Total reward after episode 1563 is 1426.0


 16%|█▌        | 1564/10000 [1:04:02<5:18:18,  2.26s/it]

Total reward after episode 1564 is 604.0


 16%|█▌        | 1565/10000 [1:04:05<5:39:21,  2.41s/it]

Total reward after episode 1565 is 1338.0


 16%|█▌        | 1566/10000 [1:04:08<6:21:25,  2.71s/it]

Total reward after episode 1566 is 1157.0


 16%|█▌        | 1567/10000 [1:04:11<6:39:08,  2.84s/it]

Total reward after episode 1567 is 1343.0


 16%|█▌        | 1568/10000 [1:04:12<5:33:16,  2.37s/it]

Total reward after episode 1568 is 609.0


 16%|█▌        | 1569/10000 [1:04:14<4:59:19,  2.13s/it]

Total reward after episode 1569 is 608.0


 16%|█▌        | 1570/10000 [1:04:15<4:22:32,  1.87s/it]

Total reward after episode 1570 is 610.0


 16%|█▌        | 1571/10000 [1:04:17<3:56:52,  1.69s/it]

Total reward after episode 1571 is 611.0


 16%|█▌        | 1572/10000 [1:04:21<5:38:03,  2.41s/it]

Total reward after episode 1572 is 1864.0


 16%|█▌        | 1573/10000 [1:04:22<4:51:37,  2.08s/it]

Total reward after episode 1573 is 613.0


 16%|█▌        | 1574/10000 [1:04:24<4:31:19,  1.93s/it]

Total reward after episode 1574 is 608.0


 16%|█▌        | 1575/10000 [1:04:26<5:15:13,  2.24s/it]

Total reward after episode 1575 is 817.0


 16%|█▌        | 1576/10000 [1:04:28<4:33:24,  1.95s/it]

Total reward after episode 1576 is 607.0


 16%|█▌        | 1577/10000 [1:04:28<3:36:25,  1.54s/it]

Total reward after episode 1577 is 249.0


 16%|█▌        | 1578/10000 [1:04:33<5:45:03,  2.46s/it]

Total reward after episode 1578 is 1696.0


 16%|█▌        | 1579/10000 [1:04:34<4:54:06,  2.10s/it]

Total reward after episode 1579 is 607.0


 16%|█▌        | 1580/10000 [1:04:35<3:51:57,  1.65s/it]

Total reward after episode 1580 is 251.0


 16%|█▌        | 1581/10000 [1:04:38<4:52:08,  2.08s/it]

Total reward after episode 1581 is 1350.0


 16%|█▌        | 1582/10000 [1:04:40<5:07:10,  2.19s/it]

Total reward after episode 1582 is 1047.0


 16%|█▌        | 1583/10000 [1:04:44<5:49:19,  2.49s/it]

Total reward after episode 1583 is 1349.0


 16%|█▌        | 1584/10000 [1:04:47<6:51:30,  2.93s/it]

Total reward after episode 1584 is 1861.0


 16%|█▌        | 1585/10000 [1:04:51<7:03:21,  3.02s/it]

Total reward after episode 1585 is 1039.0


 16%|█▌        | 1586/10000 [1:04:53<6:12:16,  2.65s/it]

Total reward after episode 1586 is 613.0


 16%|█▌        | 1587/10000 [1:04:55<6:18:59,  2.70s/it]

Total reward after episode 1587 is 1348.0


 16%|█▌        | 1588/10000 [1:04:57<5:44:22,  2.46s/it]

Total reward after episode 1588 is 639.0


 16%|█▌        | 1589/10000 [1:05:00<6:01:50,  2.58s/it]

Total reward after episode 1589 is 1347.0


 16%|█▌        | 1590/10000 [1:05:04<6:51:45,  2.94s/it]

Total reward after episode 1590 is 1573.0


 16%|█▌        | 1591/10000 [1:05:05<5:51:34,  2.51s/it]

Total reward after episode 1591 is 605.0


 16%|█▌        | 1592/10000 [1:05:09<6:48:21,  2.91s/it]

Total reward after episode 1592 is 1571.0


 16%|█▌        | 1593/10000 [1:05:10<5:38:54,  2.42s/it]

Total reward after episode 1593 is 608.0


 16%|█▌        | 1594/10000 [1:05:11<4:22:31,  1.87s/it]

Total reward after episode 1594 is 251.0


 16%|█▌        | 1595/10000 [1:05:13<4:24:23,  1.89s/it]

Total reward after episode 1595 is 602.0


 16%|█▌        | 1596/10000 [1:05:15<4:27:36,  1.91s/it]

Total reward after episode 1596 is 614.0


 16%|█▌        | 1597/10000 [1:05:18<5:23:50,  2.31s/it]

Total reward after episode 1597 is 1440.0


 16%|█▌        | 1598/10000 [1:05:20<4:56:24,  2.12s/it]

Total reward after episode 1598 is 606.0


 16%|█▌        | 1599/10000 [1:05:21<3:56:07,  1.69s/it]

Total reward after episode 1599 is 240.0


 16%|█▌        | 1600/10000 [1:05:22<3:49:46,  1.64s/it]

Total reward after episode 1600 is 623.0


 16%|█▌        | 1601/10000 [1:05:24<3:45:04,  1.61s/it]

Total reward after episode 1601 is 654.0


 16%|█▌        | 1602/10000 [1:05:27<4:50:17,  2.07s/it]

Total reward after episode 1602 is 1347.0


 16%|█▌        | 1603/10000 [1:05:28<4:27:15,  1.91s/it]

Total reward after episode 1603 is 608.0


 16%|█▌        | 1604/10000 [1:05:29<3:34:24,  1.53s/it]

Total reward after episode 1604 is 250.0


 16%|█▌        | 1605/10000 [1:05:33<5:16:58,  2.27s/it]

Total reward after episode 1605 is 1429.0


 16%|█▌        | 1606/10000 [1:05:35<5:27:49,  2.34s/it]

Total reward after episode 1606 is 738.0


 16%|█▌        | 1607/10000 [1:05:37<4:43:41,  2.03s/it]

Total reward after episode 1607 is 611.0


 16%|█▌        | 1608/10000 [1:05:50<12:36:21,  5.41s/it]

Total reward after episode 1608 is 2999.0


 16%|█▌        | 1609/10000 [1:05:53<10:49:47,  4.65s/it]

Total reward after episode 1609 is 1344.0


 16%|█▌        | 1610/10000 [1:05:56<9:41:33,  4.16s/it] 

Total reward after episode 1610 is 1330.0


 16%|█▌        | 1611/10000 [1:06:00<9:16:05,  3.98s/it]

Total reward after episode 1611 is 1435.0


 16%|█▌        | 1612/10000 [1:06:02<8:13:14,  3.53s/it]

Total reward after episode 1612 is 1045.0


 16%|█▌        | 1613/10000 [1:06:03<6:38:33,  2.85s/it]

Total reward after episode 1613 is 606.0


 16%|█▌        | 1614/10000 [1:06:05<6:00:21,  2.58s/it]

Total reward after episode 1614 is 609.0


 16%|█▌        | 1615/10000 [1:06:06<5:04:28,  2.18s/it]

Total reward after episode 1615 is 608.0


 16%|█▌        | 1616/10000 [1:06:07<3:58:58,  1.71s/it]

Total reward after episode 1616 is 251.0


 16%|█▌        | 1617/10000 [1:06:13<6:56:00,  2.98s/it]

Total reward after episode 1617 is 1324.0


 16%|█▌        | 1618/10000 [1:06:15<5:54:53,  2.54s/it]

Total reward after episode 1618 is 637.0


 16%|█▌        | 1619/10000 [1:06:15<4:33:25,  1.96s/it]

Total reward after episode 1619 is 249.0


 16%|█▌        | 1620/10000 [1:06:16<4:08:21,  1.78s/it]

Total reward after episode 1620 is 615.0


 16%|█▌        | 1621/10000 [1:06:17<3:19:12,  1.43s/it]

Total reward after episode 1621 is 251.0


 16%|█▌        | 1622/10000 [1:06:18<3:17:28,  1.41s/it]

Total reward after episode 1622 is 624.0


 16%|█▌        | 1623/10000 [1:06:22<4:50:51,  2.08s/it]

Total reward after episode 1623 is 1059.0


 16%|█▌        | 1624/10000 [1:06:23<4:16:10,  1.84s/it]

Total reward after episode 1624 is 608.0


 16%|█▋        | 1625/10000 [1:06:25<4:06:27,  1.77s/it]

Total reward after episode 1625 is 653.0


 16%|█▋        | 1626/10000 [1:06:28<5:20:05,  2.29s/it]

Total reward after episode 1626 is 1152.0


 16%|█▋        | 1627/10000 [1:06:36<8:58:12,  3.86s/it]

Total reward after episode 1627 is 1671.0


 16%|█▋        | 1628/10000 [1:06:37<7:19:12,  3.15s/it]

Total reward after episode 1628 is 654.0


 16%|█▋        | 1629/10000 [1:06:41<7:53:01,  3.39s/it]

Total reward after episode 1629 is 1031.0


 16%|█▋        | 1630/10000 [1:06:43<6:37:48,  2.85s/it]

Total reward after episode 1630 is 603.0


 16%|█▋        | 1631/10000 [1:06:46<6:58:12,  3.00s/it]

Total reward after episode 1631 is 1429.0


 16%|█▋        | 1632/10000 [1:06:50<7:10:01,  3.08s/it]

Total reward after episode 1632 is 1434.0


 16%|█▋        | 1633/10000 [1:06:50<5:30:27,  2.37s/it]

Total reward after episode 1633 is 247.0


 16%|█▋        | 1634/10000 [1:06:54<6:20:05,  2.73s/it]

Total reward after episode 1634 is 1038.0


 16%|█▋        | 1635/10000 [1:06:55<5:29:14,  2.36s/it]

Total reward after episode 1635 is 618.0


 16%|█▋        | 1636/10000 [1:06:59<6:07:52,  2.64s/it]

Total reward after episode 1636 is 588.0


 16%|█▋        | 1637/10000 [1:07:02<6:48:30,  2.93s/it]

Total reward after episode 1637 is 1331.0


 16%|█▋        | 1638/10000 [1:07:07<7:42:38,  3.32s/it]

Total reward after episode 1638 is 1698.0


 16%|█▋        | 1639/10000 [1:07:10<7:51:59,  3.39s/it]

Total reward after episode 1639 is 1696.0


 16%|█▋        | 1640/10000 [1:07:11<5:55:41,  2.55s/it]

Total reward after episode 1640 is 251.0


 16%|█▋        | 1641/10000 [1:07:12<4:42:33,  2.03s/it]

Total reward after episode 1641 is 234.0


 16%|█▋        | 1642/10000 [1:07:13<4:10:28,  1.80s/it]

Total reward after episode 1642 is 611.0


 16%|█▋        | 1643/10000 [1:07:14<3:48:10,  1.64s/it]

Total reward after episode 1643 is 608.0


 16%|█▋        | 1644/10000 [1:07:17<4:45:54,  2.05s/it]

Total reward after episode 1644 is 1441.0


 16%|█▋        | 1645/10000 [1:07:18<4:16:31,  1.84s/it]

Total reward after episode 1645 is 605.0


 16%|█▋        | 1646/10000 [1:07:20<3:55:44,  1.69s/it]

Total reward after episode 1646 is 606.0


 16%|█▋        | 1647/10000 [1:07:21<3:53:24,  1.68s/it]

Total reward after episode 1647 is 637.0


 16%|█▋        | 1648/10000 [1:07:23<3:39:56,  1.58s/it]

Total reward after episode 1648 is 607.0


 16%|█▋        | 1649/10000 [1:07:24<3:42:09,  1.60s/it]

Total reward after episode 1649 is 603.0


 16%|█▋        | 1650/10000 [1:07:25<3:01:45,  1.31s/it]

Total reward after episode 1650 is 251.0


 17%|█▋        | 1651/10000 [1:07:28<4:20:37,  1.87s/it]

Total reward after episode 1651 is 1342.0


 17%|█▋        | 1652/10000 [1:07:31<5:16:10,  2.27s/it]

Total reward after episode 1652 is 1346.0


 17%|█▋        | 1653/10000 [1:07:35<6:09:07,  2.65s/it]

Total reward after episode 1653 is 1436.0


 17%|█▋        | 1654/10000 [1:07:38<6:28:18,  2.79s/it]

Total reward after episode 1654 is 1436.0


 17%|█▋        | 1655/10000 [1:07:40<5:38:02,  2.43s/it]

Total reward after episode 1655 is 614.0


 17%|█▋        | 1656/10000 [1:07:48<9:34:33,  4.13s/it]

Total reward after episode 1656 is 1303.0


 17%|█▋        | 1657/10000 [1:07:50<8:31:22,  3.68s/it]

Total reward after episode 1657 is 1156.0


 17%|█▋        | 1658/10000 [1:07:54<8:49:42,  3.81s/it]

Total reward after episode 1658 is 1927.0


 17%|█▋        | 1659/10000 [1:07:57<7:50:55,  3.39s/it]

Total reward after episode 1659 is 1049.0


 17%|█▋        | 1660/10000 [1:07:58<6:21:54,  2.75s/it]

Total reward after episode 1660 is 606.0


 17%|█▋        | 1661/10000 [1:08:01<6:38:55,  2.87s/it]

Total reward after episode 1661 is 1346.0


 17%|█▋        | 1662/10000 [1:08:05<7:15:11,  3.13s/it]

Total reward after episode 1662 is 1424.0


 17%|█▋        | 1663/10000 [1:08:08<7:05:33,  3.06s/it]

Total reward after episode 1663 is 1336.0


 17%|█▋        | 1664/10000 [1:08:10<6:03:45,  2.62s/it]

Total reward after episode 1664 is 627.0


 17%|█▋        | 1665/10000 [1:08:11<5:08:00,  2.22s/it]

Total reward after episode 1665 is 608.0


 17%|█▋        | 1666/10000 [1:08:12<4:27:52,  1.93s/it]

Total reward after episode 1666 is 610.0


 17%|█▋        | 1667/10000 [1:08:14<4:16:53,  1.85s/it]

Total reward after episode 1667 is 735.0


 17%|█▋        | 1668/10000 [1:08:15<3:54:01,  1.69s/it]

Total reward after episode 1668 is 609.0


 17%|█▋        | 1669/10000 [1:08:16<3:11:25,  1.38s/it]

Total reward after episode 1669 is 244.0


 17%|█▋        | 1670/10000 [1:08:19<4:14:30,  1.83s/it]

Total reward after episode 1670 is 1054.0


 17%|█▋        | 1671/10000 [1:08:22<5:08:15,  2.22s/it]

Total reward after episode 1671 is 1439.0


 17%|█▋        | 1672/10000 [1:08:24<5:12:01,  2.25s/it]

Total reward after episode 1672 is 1050.0


 17%|█▋        | 1673/10000 [1:08:27<5:33:02,  2.40s/it]

Total reward after episode 1673 is 1056.0


 17%|█▋        | 1674/10000 [1:08:28<4:53:54,  2.12s/it]

Total reward after episode 1674 is 605.0


 17%|█▋        | 1675/10000 [1:08:31<5:20:17,  2.31s/it]

Total reward after episode 1675 is 1046.0


 17%|█▋        | 1676/10000 [1:08:34<5:48:33,  2.51s/it]

Total reward after episode 1676 is 1440.0


 17%|█▋        | 1677/10000 [1:08:35<4:56:50,  2.14s/it]

Total reward after episode 1677 is 608.0


 17%|█▋        | 1678/10000 [1:08:37<4:19:47,  1.87s/it]

Total reward after episode 1678 is 610.0


 17%|█▋        | 1679/10000 [1:08:38<3:54:16,  1.69s/it]

Total reward after episode 1679 is 606.0


 17%|█▋        | 1680/10000 [1:08:39<3:49:56,  1.66s/it]

Total reward after episode 1680 is 617.0


 17%|█▋        | 1681/10000 [1:08:41<3:36:04,  1.56s/it]

Total reward after episode 1681 is 606.0


 17%|█▋        | 1682/10000 [1:08:44<4:30:30,  1.95s/it]

Total reward after episode 1682 is 1046.0


 17%|█▋        | 1683/10000 [1:08:47<5:29:20,  2.38s/it]

Total reward after episode 1683 is 1438.0


 17%|█▋        | 1684/10000 [1:08:48<4:15:56,  1.85s/it]

Total reward after episode 1684 is 251.0


 17%|█▋        | 1685/10000 [1:08:52<6:02:56,  2.62s/it]

Total reward after episode 1685 is 1870.0


 17%|█▋        | 1686/10000 [1:08:55<6:12:17,  2.69s/it]

Total reward after episode 1686 is 1349.0


 17%|█▋        | 1687/10000 [1:08:58<6:31:56,  2.83s/it]

Total reward after episode 1687 is 1439.0


 17%|█▋        | 1688/10000 [1:09:00<6:17:15,  2.72s/it]

Total reward after episode 1688 is 1045.0


 17%|█▋        | 1689/10000 [1:09:03<6:19:30,  2.74s/it]

Total reward after episode 1689 is 1350.0


 17%|█▋        | 1690/10000 [1:09:04<4:51:18,  2.10s/it]

Total reward after episode 1690 is 252.0


 17%|█▋        | 1691/10000 [1:09:05<4:30:06,  1.95s/it]

Total reward after episode 1691 is 603.0


 17%|█▋        | 1692/10000 [1:09:08<5:10:31,  2.24s/it]

Total reward after episode 1692 is 1441.0


 17%|█▋        | 1693/10000 [1:09:09<4:07:12,  1.79s/it]

Total reward after episode 1693 is 251.0


 17%|█▋        | 1694/10000 [1:09:11<4:00:42,  1.74s/it]

Total reward after episode 1694 is 605.0


 17%|█▋        | 1695/10000 [1:09:14<4:52:40,  2.11s/it]

Total reward after episode 1695 is 1043.0


 17%|█▋        | 1696/10000 [1:09:15<4:17:43,  1.86s/it]

Total reward after episode 1696 is 610.0


 17%|█▋        | 1697/10000 [1:09:16<3:25:57,  1.49s/it]

Total reward after episode 1697 is 251.0


 17%|█▋        | 1698/10000 [1:09:17<3:31:23,  1.53s/it]

Total reward after episode 1698 is 614.0


 17%|█▋        | 1699/10000 [1:09:19<3:35:39,  1.56s/it]

Total reward after episode 1699 is 612.0


 17%|█▋        | 1700/10000 [1:09:22<4:37:27,  2.01s/it]

Total reward after episode 1700 is 1442.0


 17%|█▋        | 1701/10000 [1:09:26<5:50:27,  2.53s/it]

Total reward after episode 1701 is 1329.0


 17%|█▋        | 1702/10000 [1:09:30<7:07:33,  3.09s/it]

Total reward after episode 1702 is 1850.0


 17%|█▋        | 1703/10000 [1:09:31<5:51:14,  2.54s/it]

Total reward after episode 1703 is 611.0


 17%|█▋        | 1704/10000 [1:09:34<5:59:56,  2.60s/it]

Total reward after episode 1704 is 1351.0


 17%|█▋        | 1705/10000 [1:09:36<5:18:48,  2.31s/it]

Total reward after episode 1705 is 608.0


 17%|█▋        | 1706/10000 [1:09:40<6:32:41,  2.84s/it]

Total reward after episode 1706 is 1925.0


 17%|█▋        | 1707/10000 [1:09:41<5:29:17,  2.38s/it]

Total reward after episode 1707 is 607.0


 17%|█▋        | 1708/10000 [1:09:42<4:19:32,  1.88s/it]

Total reward after episode 1708 is 238.0


 17%|█▋        | 1709/10000 [1:09:42<3:28:42,  1.51s/it]

Total reward after episode 1709 is 249.0


 17%|█▋        | 1710/10000 [1:09:47<5:34:19,  2.42s/it]

Total reward after episode 1710 is 1869.0


 17%|█▋        | 1711/10000 [1:09:48<4:45:30,  2.07s/it]

Total reward after episode 1711 is 607.0


 17%|█▋        | 1712/10000 [1:09:50<4:22:27,  1.90s/it]

Total reward after episode 1712 is 610.0


 17%|█▋        | 1713/10000 [1:09:53<5:03:23,  2.20s/it]

Total reward after episode 1713 is 1350.0


 17%|█▋        | 1714/10000 [1:10:08<14:24:52,  6.26s/it]

Total reward after episode 1714 is 2270.0


 17%|█▋        | 1715/10000 [1:10:11<12:04:08,  5.24s/it]

Total reward after episode 1715 is 1352.0


 17%|█▋        | 1716/10000 [1:10:15<11:04:26,  4.81s/it]

Total reward after episode 1716 is 1692.0


 17%|█▋        | 1717/10000 [1:10:18<9:46:44,  4.25s/it] 

Total reward after episode 1717 is 1334.0


 17%|█▋        | 1718/10000 [1:10:19<7:49:33,  3.40s/it]

Total reward after episode 1718 is 607.0


 17%|█▋        | 1719/10000 [1:10:22<7:20:46,  3.19s/it]

Total reward after episode 1719 is 1354.0


 17%|█▋        | 1720/10000 [1:10:25<7:01:06,  3.05s/it]

Total reward after episode 1720 is 1354.0


 17%|█▋        | 1721/10000 [1:10:28<7:04:03,  3.07s/it]

Total reward after episode 1721 is 1436.0


 17%|█▋        | 1722/10000 [1:10:57<24:57:45, 10.86s/it]

Total reward after episode 1722 is 2853.0


 17%|█▋        | 1723/10000 [1:10:58<18:20:02,  7.97s/it]

Total reward after episode 1723 is 606.0


 17%|█▋        | 1724/10000 [1:11:01<14:55:35,  6.49s/it]

Total reward after episode 1724 is 1333.0


 17%|█▋        | 1725/10000 [1:11:05<13:18:13,  5.79s/it]

Total reward after episode 1725 is 1865.0


 17%|█▋        | 1726/10000 [1:11:09<11:54:43,  5.18s/it]

Total reward after episode 1726 is 1706.0


 17%|█▋        | 1727/10000 [1:11:13<10:52:12,  4.73s/it]

Total reward after episode 1727 is 1433.0


 17%|█▋        | 1728/10000 [1:11:17<10:26:58,  4.55s/it]

Total reward after episode 1728 is 1697.0


 17%|█▋        | 1729/10000 [1:11:18<7:46:53,  3.39s/it] 

Total reward after episode 1729 is 240.0


 17%|█▋        | 1730/10000 [1:11:21<7:59:00,  3.48s/it]

Total reward after episode 1730 is 1702.0


 17%|█▋        | 1731/10000 [1:11:23<6:36:12,  2.87s/it]

Total reward after episode 1731 is 607.0


 17%|█▋        | 1732/10000 [1:11:24<5:30:00,  2.39s/it]

Total reward after episode 1732 is 610.0


 17%|█▋        | 1733/10000 [1:11:25<4:45:59,  2.08s/it]

Total reward after episode 1733 is 614.0


 17%|█▋        | 1734/10000 [1:11:27<4:25:01,  1.92s/it]

Total reward after episode 1734 is 637.0


 17%|█▋        | 1735/10000 [1:11:30<5:11:40,  2.26s/it]

Total reward after episode 1735 is 1443.0


 17%|█▋        | 1736/10000 [1:11:33<5:36:45,  2.44s/it]

Total reward after episode 1736 is 1351.0


 17%|█▋        | 1737/10000 [1:11:36<6:25:20,  2.80s/it]

Total reward after episode 1737 is 1347.0


 17%|█▋        | 1738/10000 [1:11:39<6:31:17,  2.84s/it]

Total reward after episode 1738 is 1335.0


 17%|█▋        | 1739/10000 [1:11:45<8:40:04,  3.78s/it]

Total reward after episode 1739 is 1317.0


 17%|█▋        | 1740/10000 [1:11:48<8:06:56,  3.54s/it]

Total reward after episode 1740 is 1442.0


 17%|█▋        | 1741/10000 [1:11:50<6:34:50,  2.87s/it]

Total reward after episode 1741 is 609.0


 17%|█▋        | 1742/10000 [1:11:51<5:42:00,  2.48s/it]

Total reward after episode 1742 is 628.0


 17%|█▋        | 1743/10000 [1:11:52<4:26:59,  1.94s/it]

Total reward after episode 1743 is 244.0


 17%|█▋        | 1744/10000 [1:11:55<5:16:14,  2.30s/it]

Total reward after episode 1744 is 1356.0


 17%|█▋        | 1745/10000 [1:11:58<5:27:50,  2.38s/it]

Total reward after episode 1745 is 1153.0


 17%|█▋        | 1746/10000 [1:11:59<4:55:23,  2.15s/it]

Total reward after episode 1746 is 622.0


 17%|█▋        | 1747/10000 [1:12:01<4:29:02,  1.96s/it]

Total reward after episode 1747 is 633.0


 17%|█▋        | 1748/10000 [1:12:05<6:02:55,  2.64s/it]

Total reward after episode 1748 is 1867.0


 17%|█▋        | 1749/10000 [1:12:09<6:40:41,  2.91s/it]

Total reward after episode 1749 is 1572.0


 18%|█▊        | 1750/10000 [1:12:11<6:41:24,  2.92s/it]

Total reward after episode 1750 is 1332.0


 18%|█▊        | 1751/10000 [1:12:16<7:48:56,  3.41s/it]

Total reward after episode 1751 is 1688.0


 18%|█▊        | 1752/10000 [1:12:20<7:55:55,  3.46s/it]

Total reward after episode 1752 is 1702.0


 18%|█▊        | 1753/10000 [1:12:21<6:28:43,  2.83s/it]

Total reward after episode 1753 is 608.0


 18%|█▊        | 1754/10000 [1:12:23<5:42:28,  2.49s/it]

Total reward after episode 1754 is 609.0


 18%|█▊        | 1755/10000 [1:12:26<6:27:00,  2.82s/it]

Total reward after episode 1755 is 1435.0


 18%|█▊        | 1756/10000 [1:12:30<7:18:14,  3.19s/it]

Total reward after episode 1756 is 1930.0


 18%|█▊        | 1757/10000 [1:12:33<7:16:42,  3.18s/it]

Total reward after episode 1757 is 1348.0


 18%|█▊        | 1758/10000 [1:12:37<7:14:48,  3.17s/it]

Total reward after episode 1758 is 1439.0


 18%|█▊        | 1759/10000 [1:12:38<6:02:50,  2.64s/it]

Total reward after episode 1759 is 606.0


 18%|█▊        | 1760/10000 [1:12:42<6:56:07,  3.03s/it]

Total reward after episode 1760 is 1859.0


 18%|█▊        | 1761/10000 [1:12:45<6:52:15,  3.00s/it]

Total reward after episode 1761 is 1334.0


 18%|█▊        | 1762/10000 [1:12:48<7:02:01,  3.07s/it]

Total reward after episode 1762 is 1434.0


 18%|█▊        | 1763/10000 [1:12:49<5:48:04,  2.54s/it]

Total reward after episode 1763 is 610.0


 18%|█▊        | 1764/10000 [1:12:52<6:07:09,  2.67s/it]

Total reward after episode 1764 is 1438.0


 18%|█▊        | 1765/10000 [1:12:57<7:44:18,  3.38s/it]

Total reward after episode 1765 is 797.0


 18%|█▊        | 1766/10000 [1:13:10<14:06:59,  6.17s/it]

Total reward after episode 1766 is 3004.0


 18%|█▊        | 1767/10000 [1:13:11<10:44:47,  4.70s/it]

Total reward after episode 1767 is 609.0


 18%|█▊        | 1768/10000 [1:13:14<9:25:52,  4.12s/it] 

Total reward after episode 1768 is 1341.0


 18%|█▊        | 1769/10000 [1:13:17<8:35:21,  3.76s/it]

Total reward after episode 1769 is 1350.0


 18%|█▊        | 1770/10000 [1:13:20<7:57:10,  3.48s/it]

Total reward after episode 1770 is 1338.0


 18%|█▊        | 1771/10000 [1:13:25<8:55:37,  3.91s/it]

Total reward after episode 1771 is 1335.0


 18%|█▊        | 1772/10000 [1:13:26<7:07:51,  3.12s/it]

Total reward after episode 1772 is 608.0


 18%|█▊        | 1773/10000 [1:13:30<7:31:39,  3.29s/it]

Total reward after episode 1773 is 1428.0


 18%|█▊        | 1774/10000 [1:13:31<6:07:33,  2.68s/it]

Total reward after episode 1774 is 606.0


 18%|█▊        | 1775/10000 [1:13:34<6:19:16,  2.77s/it]

Total reward after episode 1775 is 1350.0


 18%|█▊        | 1776/10000 [1:13:35<5:19:52,  2.33s/it]

Total reward after episode 1776 is 612.0


 18%|█▊        | 1777/10000 [1:13:38<5:22:06,  2.35s/it]

Total reward after episode 1777 is 1058.0


 18%|█▊        | 1778/10000 [1:13:38<4:10:07,  1.83s/it]

Total reward after episode 1778 is 247.0


 18%|█▊        | 1779/10000 [1:13:40<3:48:59,  1.67s/it]

Total reward after episode 1779 is 610.0


 18%|█▊        | 1780/10000 [1:13:41<3:44:33,  1.64s/it]

Total reward after episode 1780 is 637.0


 18%|█▊        | 1781/10000 [1:13:43<3:46:17,  1.65s/it]

Total reward after episode 1781 is 652.0


 18%|█▊        | 1782/10000 [1:13:44<3:34:05,  1.56s/it]

Total reward after episode 1782 is 606.0


 18%|█▊        | 1783/10000 [1:13:47<4:14:55,  1.86s/it]

Total reward after episode 1783 is 1049.0


 18%|█▊        | 1784/10000 [1:13:48<3:51:51,  1.69s/it]

Total reward after episode 1784 is 609.0


 18%|█▊        | 1785/10000 [1:13:50<3:48:02,  1.67s/it]

Total reward after episode 1785 is 611.0


 18%|█▊        | 1786/10000 [1:13:51<3:43:54,  1.64s/it]

Total reward after episode 1786 is 617.0


 18%|█▊        | 1787/10000 [1:13:54<4:13:41,  1.85s/it]

Total reward after episode 1787 is 1046.0


 18%|█▊        | 1788/10000 [1:13:56<4:32:36,  1.99s/it]

Total reward after episode 1788 is 1050.0


 18%|█▊        | 1789/10000 [1:14:00<6:03:35,  2.66s/it]

Total reward after episode 1789 is 1859.0


 18%|█▊        | 1790/10000 [1:14:01<5:08:16,  2.25s/it]

Total reward after episode 1790 is 611.0


 18%|█▊        | 1791/10000 [1:14:05<6:18:05,  2.76s/it]

Total reward after episode 1791 is 1855.0


 18%|█▊        | 1792/10000 [1:14:07<5:17:06,  2.32s/it]

Total reward after episode 1792 is 607.0


 18%|█▊        | 1793/10000 [1:14:07<4:07:32,  1.81s/it]

Total reward after episode 1793 is 251.0


 18%|█▊        | 1794/10000 [1:14:11<5:08:05,  2.25s/it]

Total reward after episode 1794 is 1349.0


 18%|█▊        | 1795/10000 [1:14:12<4:37:39,  2.03s/it]

Total reward after episode 1795 is 632.0


 18%|█▊        | 1796/10000 [1:14:13<3:39:15,  1.60s/it]

Total reward after episode 1796 is 251.0


 18%|█▊        | 1797/10000 [1:14:14<3:35:09,  1.57s/it]

Total reward after episode 1797 is 631.0


 18%|█▊        | 1798/10000 [1:14:17<4:35:40,  2.02s/it]

Total reward after episode 1798 is 1332.0


 18%|█▊        | 1799/10000 [1:14:19<4:13:44,  1.86s/it]

Total reward after episode 1799 is 610.0


 18%|█▊        | 1800/10000 [1:14:20<4:01:30,  1.77s/it]

Total reward after episode 1800 is 610.0


 18%|█▊        | 1801/10000 [1:14:22<3:42:16,  1.63s/it]

Total reward after episode 1801 is 608.0


 18%|█▊        | 1802/10000 [1:14:23<3:36:54,  1.59s/it]

Total reward after episode 1802 is 614.0


 18%|█▊        | 1803/10000 [1:14:24<3:24:42,  1.50s/it]

Total reward after episode 1803 is 610.0


 18%|█▊        | 1804/10000 [1:14:37<11:16:02,  4.95s/it]

Total reward after episode 1804 is 3002.0


 18%|█▊        | 1805/10000 [1:14:39<8:45:30,  3.85s/it] 

Total reward after episode 1805 is 608.0


 18%|█▊        | 1806/10000 [1:14:40<6:59:42,  3.07s/it]

Total reward after episode 1806 is 610.0


 18%|█▊        | 1807/10000 [1:14:41<5:45:41,  2.53s/it]

Total reward after episode 1807 is 609.0


 18%|█▊        | 1808/10000 [1:14:44<5:56:29,  2.61s/it]

Total reward after episode 1808 is 1351.0


 18%|█▊        | 1809/10000 [1:14:47<6:20:27,  2.79s/it]

Total reward after episode 1809 is 1335.0


 18%|█▊        | 1810/10000 [1:14:49<5:30:40,  2.42s/it]

Total reward after episode 1810 is 638.0


 18%|█▊        | 1811/10000 [1:14:51<5:32:55,  2.44s/it]

Total reward after episode 1811 is 1050.0


 18%|█▊        | 1812/10000 [1:14:54<5:39:35,  2.49s/it]

Total reward after episode 1812 is 1044.0


 18%|█▊        | 1813/10000 [1:14:56<5:32:54,  2.44s/it]

Total reward after episode 1813 is 728.0


 18%|█▊        | 1814/10000 [1:14:58<4:52:52,  2.15s/it]

Total reward after episode 1814 is 605.0


 18%|█▊        | 1815/10000 [1:14:59<4:16:18,  1.88s/it]

Total reward after episode 1815 is 606.0


 18%|█▊        | 1816/10000 [1:15:00<3:24:23,  1.50s/it]

Total reward after episode 1816 is 252.0


 18%|█▊        | 1817/10000 [1:15:01<3:15:48,  1.44s/it]

Total reward after episode 1817 is 607.0


 18%|█▊        | 1818/10000 [1:15:04<4:41:24,  2.06s/it]

Total reward after episode 1818 is 1699.0


 18%|█▊        | 1819/10000 [1:15:07<5:23:15,  2.37s/it]

Total reward after episode 1819 is 1442.0


 18%|█▊        | 1820/10000 [1:15:10<5:30:34,  2.42s/it]

Total reward after episode 1820 is 1153.0


 18%|█▊        | 1821/10000 [1:15:13<6:00:05,  2.64s/it]

Total reward after episode 1821 is 1432.0


 18%|█▊        | 1822/10000 [1:15:17<7:04:04,  3.11s/it]

Total reward after episode 1822 is 1860.0


 18%|█▊        | 1823/10000 [1:15:19<6:05:03,  2.68s/it]

Total reward after episode 1823 is 637.0


 18%|█▊        | 1824/10000 [1:15:22<6:11:09,  2.72s/it]

Total reward after episode 1824 is 1351.0


 18%|█▊        | 1825/10000 [1:15:26<7:23:59,  3.26s/it]

Total reward after episode 1825 is 1861.0


 18%|█▊        | 1826/10000 [1:15:28<6:11:38,  2.73s/it]

Total reward after episode 1826 is 608.0


 18%|█▊        | 1827/10000 [1:15:31<6:13:30,  2.74s/it]

Total reward after episode 1827 is 1046.0


 18%|█▊        | 1828/10000 [1:15:34<6:40:42,  2.94s/it]

Total reward after episode 1828 is 1345.0


 18%|█▊        | 1829/10000 [1:15:35<5:40:53,  2.50s/it]

Total reward after episode 1829 is 606.0


 18%|█▊        | 1830/10000 [1:15:38<5:40:52,  2.50s/it]

Total reward after episode 1830 is 1047.0


 18%|█▊        | 1831/10000 [1:15:40<5:19:01,  2.34s/it]

Total reward after episode 1831 is 725.0


 18%|█▊        | 1832/10000 [1:15:41<4:45:27,  2.10s/it]

Total reward after episode 1832 is 607.0


 18%|█▊        | 1833/10000 [1:15:46<6:07:12,  2.70s/it]

Total reward after episode 1833 is 1866.0


 18%|█▊        | 1834/10000 [1:15:48<5:56:21,  2.62s/it]

Total reward after episode 1834 is 1047.0


 18%|█▊        | 1835/10000 [1:15:51<6:02:16,  2.66s/it]

Total reward after episode 1835 is 1345.0


 18%|█▊        | 1836/10000 [1:15:55<7:05:46,  3.13s/it]

Total reward after episode 1836 is 1925.0


 18%|█▊        | 1837/10000 [1:15:59<7:32:06,  3.32s/it]

Total reward after episode 1837 is 1701.0


 18%|█▊        | 1838/10000 [1:16:00<6:07:33,  2.70s/it]

Total reward after episode 1838 is 610.0


 18%|█▊        | 1839/10000 [1:16:01<5:08:27,  2.27s/it]

Total reward after episode 1839 is 606.0


 18%|█▊        | 1840/10000 [1:16:04<5:34:53,  2.46s/it]

Total reward after episode 1840 is 1352.0


 18%|█▊        | 1841/10000 [1:16:06<4:51:49,  2.15s/it]

Total reward after episode 1841 is 610.0


 18%|█▊        | 1842/10000 [1:16:08<5:14:09,  2.31s/it]

Total reward after episode 1842 is 1045.0


 18%|█▊        | 1843/10000 [1:16:10<4:41:31,  2.07s/it]

Total reward after episode 1843 is 612.0


 18%|█▊        | 1844/10000 [1:16:13<5:16:20,  2.33s/it]

Total reward after episode 1844 is 1349.0


 18%|█▊        | 1845/10000 [1:16:15<5:08:24,  2.27s/it]

Total reward after episode 1845 is 1073.0


 18%|█▊        | 1846/10000 [1:16:16<4:38:54,  2.05s/it]

Total reward after episode 1846 is 607.0


 18%|█▊        | 1847/10000 [1:16:18<4:09:09,  1.83s/it]

Total reward after episode 1847 is 614.0


 18%|█▊        | 1848/10000 [1:16:21<5:20:07,  2.36s/it]

Total reward after episode 1848 is 1433.0


 18%|█▊        | 1849/10000 [1:16:24<5:48:19,  2.56s/it]

Total reward after episode 1849 is 1332.0


 18%|█▊        | 1850/10000 [1:16:27<5:53:24,  2.60s/it]

Total reward after episode 1850 is 1180.0


 19%|█▊        | 1851/10000 [1:16:30<6:13:27,  2.75s/it]

Total reward after episode 1851 is 1341.0


 19%|█▊        | 1852/10000 [1:16:33<6:21:15,  2.81s/it]

Total reward after episode 1852 is 817.0


 19%|█▊        | 1853/10000 [1:16:37<7:02:07,  3.11s/it]

Total reward after episode 1853 is 1695.0


 19%|█▊        | 1854/10000 [1:16:40<7:01:33,  3.11s/it]

Total reward after episode 1854 is 1341.0


 19%|█▊        | 1855/10000 [1:16:41<5:22:59,  2.38s/it]

Total reward after episode 1855 is 239.0


 19%|█▊        | 1856/10000 [1:16:42<4:40:36,  2.07s/it]

Total reward after episode 1856 is 602.0


 19%|█▊        | 1857/10000 [1:16:43<4:09:24,  1.84s/it]

Total reward after episode 1857 is 607.0


 19%|█▊        | 1858/10000 [1:16:45<3:46:01,  1.67s/it]

Total reward after episode 1858 is 607.0


 19%|█▊        | 1859/10000 [1:16:48<4:40:44,  2.07s/it]

Total reward after episode 1859 is 1350.0


 19%|█▊        | 1860/10000 [1:16:48<3:43:01,  1.64s/it]

Total reward after episode 1860 is 251.0


 19%|█▊        | 1861/10000 [1:16:52<5:22:49,  2.38s/it]

Total reward after episode 1861 is 1693.0


 19%|█▊        | 1862/10000 [1:16:58<7:54:47,  3.50s/it]

Total reward after episode 1862 is 1319.0


 19%|█▊        | 1863/10000 [1:17:01<7:14:54,  3.21s/it]

Total reward after episode 1863 is 1049.0


 19%|█▊        | 1864/10000 [1:17:04<7:14:04,  3.20s/it]

Total reward after episode 1864 is 1443.0


 19%|█▊        | 1865/10000 [1:17:05<5:42:04,  2.52s/it]

Total reward after episode 1865 is 253.0


 19%|█▊        | 1866/10000 [1:17:09<6:31:11,  2.89s/it]

Total reward after episode 1866 is 1438.0


 19%|█▊        | 1867/10000 [1:17:11<6:11:03,  2.74s/it]

Total reward after episode 1867 is 742.0


 19%|█▊        | 1868/10000 [1:17:40<24:01:22, 10.63s/it]

Total reward after episode 1868 is 2852.0


 19%|█▊        | 1869/10000 [1:17:42<17:47:56,  7.88s/it]

Total reward after episode 1869 is 612.0


 19%|█▊        | 1870/10000 [1:17:45<14:56:03,  6.61s/it]

Total reward after episode 1870 is 1433.0


 19%|█▊        | 1871/10000 [1:17:48<12:17:28,  5.44s/it]

Total reward after episode 1871 is 819.0


 19%|█▊        | 1872/10000 [1:17:52<11:22:41,  5.04s/it]

Total reward after episode 1872 is 1034.0


 19%|█▊        | 1873/10000 [1:17:54<9:05:03,  4.02s/it] 

Total reward after episode 1873 is 652.0


 19%|█▊        | 1874/10000 [1:17:58<9:08:31,  4.05s/it]

Total reward after episode 1874 is 1573.0


 19%|█▉        | 1875/10000 [1:18:00<7:25:58,  3.29s/it]

Total reward after episode 1875 is 738.0


 19%|█▉        | 1876/10000 [1:18:03<7:27:11,  3.30s/it]

Total reward after episode 1876 is 1427.0


 19%|█▉        | 1877/10000 [1:18:04<5:41:13,  2.52s/it]

Total reward after episode 1877 is 250.0


 19%|█▉        | 1878/10000 [1:18:08<6:49:19,  3.02s/it]

Total reward after episode 1878 is 1925.0


 19%|█▉        | 1879/10000 [1:18:11<7:16:28,  3.22s/it]

Total reward after episode 1879 is 1586.0


 19%|█▉        | 1880/10000 [1:18:13<5:59:26,  2.66s/it]

Total reward after episode 1880 is 614.0


 19%|█▉        | 1881/10000 [1:18:13<4:36:14,  2.04s/it]

Total reward after episode 1881 is 249.0


 19%|█▉        | 1882/10000 [1:18:15<4:13:01,  1.87s/it]

Total reward after episode 1882 is 609.0


 19%|█▉        | 1883/10000 [1:18:15<3:23:14,  1.50s/it]

Total reward after episode 1883 is 249.0


 19%|█▉        | 1884/10000 [1:18:16<2:57:27,  1.31s/it]

Total reward after episode 1884 is 233.0


 19%|█▉        | 1885/10000 [1:18:20<4:52:11,  2.16s/it]

Total reward after episode 1885 is 1421.0


 19%|█▉        | 1886/10000 [1:18:24<5:42:30,  2.53s/it]

Total reward after episode 1886 is 1337.0


 19%|█▉        | 1887/10000 [1:18:25<4:51:45,  2.16s/it]

Total reward after episode 1887 is 611.0


 19%|█▉        | 1888/10000 [1:18:28<5:16:44,  2.34s/it]

Total reward after episode 1888 is 1335.0


 19%|█▉        | 1889/10000 [1:18:29<4:43:34,  2.10s/it]

Total reward after episode 1889 is 610.0


 19%|█▉        | 1890/10000 [1:18:33<5:33:25,  2.47s/it]

Total reward after episode 1890 is 1336.0


 19%|█▉        | 1891/10000 [1:18:36<6:08:57,  2.73s/it]

Total reward after episode 1891 is 1327.0


 19%|█▉        | 1892/10000 [1:18:39<6:30:26,  2.89s/it]

Total reward after episode 1892 is 1425.0


 19%|█▉        | 1893/10000 [1:19:16<29:20:41, 13.03s/it]

Total reward after episode 1893 is 2780.0


 19%|█▉        | 1894/10000 [1:19:18<21:40:33,  9.63s/it]

Total reward after episode 1894 is 636.0


 19%|█▉        | 1895/10000 [1:19:20<16:50:29,  7.48s/it]

Total reward after episode 1895 is 1075.0


 19%|█▉        | 1896/10000 [1:19:24<14:02:20,  6.24s/it]

Total reward after episode 1896 is 1426.0


 19%|█▉        | 1897/10000 [1:19:25<10:50:36,  4.82s/it]

Total reward after episode 1897 is 737.0


 19%|█▉        | 1898/10000 [1:19:28<9:49:47,  4.37s/it] 

Total reward after episode 1898 is 1428.0


 19%|█▉        | 1899/10000 [1:19:32<9:17:24,  4.13s/it]

Total reward after episode 1899 is 1434.0


 19%|█▉        | 1900/10000 [1:19:34<7:34:01,  3.36s/it]

Total reward after episode 1900 is 616.0


 19%|█▉        | 1901/10000 [1:19:35<6:09:11,  2.74s/it]

Total reward after episode 1901 is 609.0


 19%|█▉        | 1902/10000 [1:19:38<6:19:08,  2.81s/it]

Total reward after episode 1902 is 1358.0


 19%|█▉        | 1903/10000 [1:19:56<16:57:58,  7.54s/it]

Total reward after episode 1903 is 2949.0


 19%|█▉        | 1904/10000 [1:20:01<14:43:22,  6.55s/it]

Total reward after episode 1904 is 1852.0


 19%|█▉        | 1905/10000 [1:20:04<12:24:44,  5.52s/it]

Total reward after episode 1905 is 1353.0


 19%|█▉        | 1906/10000 [1:20:07<11:06:39,  4.94s/it]

Total reward after episode 1906 is 1434.0


 19%|█▉        | 1907/10000 [1:20:11<9:59:22,  4.44s/it] 

Total reward after episode 1907 is 1348.0


 19%|█▉        | 1908/10000 [1:20:11<7:26:01,  3.31s/it]

Total reward after episode 1908 is 250.0


 19%|█▉        | 1909/10000 [1:20:14<6:45:14,  3.01s/it]

Total reward after episode 1909 is 604.0


 19%|█▉        | 1910/10000 [1:20:17<7:00:55,  3.12s/it]

Total reward after episode 1910 is 1346.0


 19%|█▉        | 1911/10000 [1:20:19<6:33:32,  2.92s/it]

Total reward after episode 1911 is 1046.0


 19%|█▉        | 1912/10000 [1:20:21<5:36:12,  2.49s/it]

Total reward after episode 1912 is 738.0


 19%|█▉        | 1913/10000 [1:20:24<5:53:34,  2.62s/it]

Total reward after episode 1913 is 1349.0


 19%|█▉        | 1914/10000 [1:20:25<5:08:28,  2.29s/it]

Total reward after episode 1914 is 738.0


 19%|█▉        | 1915/10000 [1:20:30<6:29:14,  2.89s/it]

Total reward after episode 1915 is 1696.0


 19%|█▉        | 1916/10000 [1:20:33<7:06:33,  3.17s/it]

Total reward after episode 1916 is 1709.0


 19%|█▉        | 1917/10000 [1:20:37<7:34:24,  3.37s/it]

Total reward after episode 1917 is 1695.0


 19%|█▉        | 1918/10000 [1:20:39<6:19:44,  2.82s/it]

Total reward after episode 1918 is 738.0


 19%|█▉        | 1919/10000 [1:20:40<5:27:32,  2.43s/it]

Total reward after episode 1919 is 737.0


 19%|█▉        | 1920/10000 [1:20:43<5:43:30,  2.55s/it]

Total reward after episode 1920 is 1348.0


 19%|█▉        | 1921/10000 [1:20:46<5:38:08,  2.51s/it]

Total reward after episode 1921 is 1155.0


 19%|█▉        | 1922/10000 [1:20:47<4:51:22,  2.16s/it]

Total reward after episode 1922 is 608.0


 19%|█▉        | 1923/10000 [1:20:51<6:02:07,  2.69s/it]

Total reward after episode 1923 is 1691.0


 19%|█▉        | 1924/10000 [1:20:55<6:43:46,  3.00s/it]

Total reward after episode 1924 is 1714.0


 19%|█▉        | 1925/10000 [1:20:58<6:40:30,  2.98s/it]

Total reward after episode 1925 is 1351.0


 19%|█▉        | 1926/10000 [1:21:02<7:27:03,  3.32s/it]

Total reward after episode 1926 is 1431.0


 19%|█▉        | 1927/10000 [1:21:03<6:03:38,  2.70s/it]

Total reward after episode 1927 is 608.0


 19%|█▉        | 1928/10000 [1:21:05<5:49:29,  2.60s/it]

Total reward after episode 1928 is 1049.0


 19%|█▉        | 1929/10000 [1:21:06<4:29:47,  2.01s/it]

Total reward after episode 1929 is 248.0


 19%|█▉        | 1930/10000 [1:21:09<5:16:43,  2.35s/it]

Total reward after episode 1930 is 1584.0


 19%|█▉        | 1931/10000 [1:21:12<5:47:59,  2.59s/it]

Total reward after episode 1931 is 1330.0


 19%|█▉        | 1932/10000 [1:21:15<6:05:03,  2.71s/it]

Total reward after episode 1932 is 1350.0


 19%|█▉        | 1933/10000 [1:21:18<6:18:28,  2.82s/it]

Total reward after episode 1933 is 1349.0


 19%|█▉        | 1934/10000 [1:21:21<6:04:40,  2.71s/it]

Total reward after episode 1934 is 1069.0


 19%|█▉        | 1935/10000 [1:21:24<6:29:20,  2.90s/it]

Total reward after episode 1935 is 1335.0


 19%|█▉        | 1936/10000 [1:21:27<6:49:34,  3.05s/it]

Total reward after episode 1936 is 1344.0


 19%|█▉        | 1937/10000 [1:21:29<5:39:10,  2.52s/it]

Total reward after episode 1937 is 609.0


 19%|█▉        | 1938/10000 [1:21:29<4:21:51,  1.95s/it]

Total reward after episode 1938 is 251.0


 19%|█▉        | 1939/10000 [1:21:32<4:43:35,  2.11s/it]

Total reward after episode 1939 is 1157.0


 19%|█▉        | 1940/10000 [1:21:33<4:09:34,  1.86s/it]

Total reward after episode 1940 is 611.0


 19%|█▉        | 1941/10000 [1:21:36<4:58:31,  2.22s/it]

Total reward after episode 1941 is 1354.0


 19%|█▉        | 1942/10000 [1:21:39<5:03:50,  2.26s/it]

Total reward after episode 1942 is 1152.0


 19%|█▉        | 1943/10000 [1:21:39<3:57:36,  1.77s/it]

Total reward after episode 1943 is 251.0


 19%|█▉        | 1944/10000 [1:21:40<3:37:24,  1.62s/it]

Total reward after episode 1944 is 606.0


 19%|█▉        | 1945/10000 [1:21:43<4:06:21,  1.84s/it]

Total reward after episode 1945 is 1073.0


 19%|█▉        | 1946/10000 [1:21:43<3:19:03,  1.48s/it]

Total reward after episode 1946 is 244.0


 19%|█▉        | 1947/10000 [1:21:45<3:20:31,  1.49s/it]

Total reward after episode 1947 is 605.0


 19%|█▉        | 1948/10000 [1:21:48<4:28:59,  2.00s/it]

Total reward after episode 1948 is 1434.0


 19%|█▉        | 1949/10000 [1:21:52<5:38:25,  2.52s/it]

Total reward after episode 1949 is 1695.0


 20%|█▉        | 1950/10000 [1:21:56<6:40:57,  2.99s/it]

Total reward after episode 1950 is 1865.0


 20%|█▉        | 1951/10000 [1:21:57<5:09:33,  2.31s/it]

Total reward after episode 1951 is 243.0


 20%|█▉        | 1952/10000 [1:22:01<6:30:21,  2.91s/it]

Total reward after episode 1952 is 1923.0


 20%|█▉        | 1953/10000 [1:22:02<5:29:50,  2.46s/it]

Total reward after episode 1953 is 604.0


 20%|█▉        | 1954/10000 [1:22:05<5:48:11,  2.60s/it]

Total reward after episode 1954 is 1350.0


 20%|█▉        | 1955/10000 [1:22:09<6:21:58,  2.85s/it]

Total reward after episode 1955 is 1698.0


 20%|█▉        | 1956/10000 [1:22:12<6:20:46,  2.84s/it]

Total reward after episode 1956 is 1350.0


 20%|█▉        | 1957/10000 [1:22:16<7:13:21,  3.23s/it]

Total reward after episode 1957 is 1861.0


 20%|█▉        | 1958/10000 [1:22:18<6:15:27,  2.80s/it]

Total reward after episode 1958 is 736.0


 20%|█▉        | 1959/10000 [1:22:20<5:51:43,  2.62s/it]

Total reward after episode 1959 is 1048.0


 20%|█▉        | 1960/10000 [1:22:20<4:30:23,  2.02s/it]

Total reward after episode 1960 is 251.0


 20%|█▉        | 1961/10000 [1:22:22<4:11:38,  1.88s/it]

Total reward after episode 1961 is 736.0


 20%|█▉        | 1962/10000 [1:22:24<4:33:10,  2.04s/it]

Total reward after episode 1962 is 1153.0


 20%|█▉        | 1963/10000 [1:22:27<5:20:07,  2.39s/it]

Total reward after episode 1963 is 1438.0


 20%|█▉        | 1964/10000 [1:22:32<6:41:59,  3.00s/it]

Total reward after episode 1964 is 1341.0


 20%|█▉        | 1965/10000 [1:22:36<7:09:16,  3.21s/it]

Total reward after episode 1965 is 1432.0


 20%|█▉        | 1966/10000 [1:22:36<5:28:56,  2.46s/it]

Total reward after episode 1966 is 242.0


 20%|█▉        | 1967/10000 [1:22:39<5:49:45,  2.61s/it]

Total reward after episode 1967 is 1348.0


 20%|█▉        | 1968/10000 [1:22:42<6:08:17,  2.75s/it]

Total reward after episode 1968 is 1348.0


 20%|█▉        | 1969/10000 [1:22:45<6:19:52,  2.84s/it]

Total reward after episode 1969 is 1349.0


 20%|█▉        | 1970/10000 [1:22:47<5:29:09,  2.46s/it]

Total reward after episode 1970 is 630.0


 20%|█▉        | 1971/10000 [1:22:50<5:45:01,  2.58s/it]

Total reward after episode 1971 is 1067.0


 20%|█▉        | 1972/10000 [1:22:51<5:04:01,  2.27s/it]

Total reward after episode 1972 is 613.0


 20%|█▉        | 1973/10000 [1:22:53<4:25:23,  1.98s/it]

Total reward after episode 1973 is 614.0


 20%|█▉        | 1974/10000 [1:22:56<5:35:11,  2.51s/it]

Total reward after episode 1974 is 1700.0


 20%|█▉        | 1975/10000 [1:22:57<4:19:18,  1.94s/it]

Total reward after episode 1975 is 251.0


 20%|█▉        | 1976/10000 [1:23:01<5:35:53,  2.51s/it]

Total reward after episode 1976 is 1343.0


 20%|█▉        | 1977/10000 [1:23:02<4:22:20,  1.96s/it]

Total reward after episode 1977 is 243.0


 20%|█▉        | 1978/10000 [1:23:04<4:54:38,  2.20s/it]

Total reward after episode 1978 is 1346.0


 20%|█▉        | 1979/10000 [1:23:08<5:33:57,  2.50s/it]

Total reward after episode 1979 is 1437.0


 20%|█▉        | 1980/10000 [1:23:10<5:22:57,  2.42s/it]

Total reward after episode 1980 is 1061.0


 20%|█▉        | 1981/10000 [1:23:11<4:47:06,  2.15s/it]

Total reward after episode 1981 is 608.0


 20%|█▉        | 1982/10000 [1:23:16<6:10:54,  2.78s/it]

Total reward after episode 1982 is 1925.0


 20%|█▉        | 1983/10000 [1:23:17<5:20:14,  2.40s/it]

Total reward after episode 1983 is 738.0


 20%|█▉        | 1984/10000 [1:23:18<4:08:38,  1.86s/it]

Total reward after episode 1984 is 249.0


 20%|█▉        | 1985/10000 [1:23:21<5:14:23,  2.35s/it]

Total reward after episode 1985 is 1343.0


 20%|█▉        | 1986/10000 [1:23:25<6:26:12,  2.89s/it]

Total reward after episode 1986 is 1928.0


 20%|█▉        | 1987/10000 [1:23:30<7:23:47,  3.32s/it]

Total reward after episode 1987 is 1852.0


 20%|█▉        | 1988/10000 [1:23:32<7:04:37,  3.18s/it]

Total reward after episode 1988 is 1348.0


 20%|█▉        | 1989/10000 [1:23:36<7:08:22,  3.21s/it]

Total reward after episode 1989 is 1347.0


 20%|█▉        | 1990/10000 [1:23:39<7:00:23,  3.15s/it]

Total reward after episode 1990 is 1337.0


 20%|█▉        | 1991/10000 [1:23:41<6:33:19,  2.95s/it]

Total reward after episode 1991 is 1058.0


 20%|█▉        | 1992/10000 [1:23:43<5:29:50,  2.47s/it]

Total reward after episode 1992 is 602.0


 20%|█▉        | 1993/10000 [1:23:46<6:12:25,  2.79s/it]

Total reward after episode 1993 is 1686.0


 20%|█▉        | 1994/10000 [1:23:47<4:46:08,  2.14s/it]

Total reward after episode 1994 is 251.0


 20%|█▉        | 1995/10000 [1:23:50<5:20:12,  2.40s/it]

Total reward after episode 1995 is 1349.0


 20%|█▉        | 1996/10000 [1:23:54<6:26:50,  2.90s/it]

Total reward after episode 1996 is 1698.0


 20%|█▉        | 1997/10000 [1:23:55<5:25:35,  2.44s/it]

Total reward after episode 1997 is 607.0


 20%|█▉        | 1998/10000 [1:24:01<7:57:29,  3.58s/it]

Total reward after episode 1998 is 2649.0


 20%|█▉        | 1999/10000 [1:24:05<7:49:14,  3.52s/it]

Total reward after episode 1999 is 1346.0


 20%|██        | 2000/10000 [1:24:07<7:16:09,  3.27s/it]

Total reward after episode 2000 is 1347.0


 20%|██        | 2001/10000 [1:24:12<8:17:28,  3.73s/it]

Total reward after episode 2001 is 1858.0


 20%|██        | 2002/10000 [1:24:15<7:46:29,  3.50s/it]

Total reward after episode 2002 is 1351.0


 20%|██        | 2003/10000 [1:24:20<8:22:58,  3.77s/it]

Total reward after episode 2003 is 1327.0


 20%|██        | 2004/10000 [1:24:23<7:55:21,  3.57s/it]

Total reward after episode 2004 is 1427.0


 20%|██        | 2005/10000 [1:24:24<6:06:40,  2.75s/it]

Total reward after episode 2005 is 236.0


 20%|██        | 2006/10000 [1:24:27<6:13:38,  2.80s/it]

Total reward after episode 2006 is 1353.0


 20%|██        | 2007/10000 [1:24:30<6:22:59,  2.87s/it]

Total reward after episode 2007 is 1442.0


 20%|██        | 2008/10000 [1:24:34<7:13:12,  3.25s/it]

Total reward after episode 2008 is 1691.0


 20%|██        | 2009/10000 [1:24:37<7:23:00,  3.33s/it]

Total reward after episode 2009 is 1422.0


 20%|██        | 2010/10000 [1:24:39<6:10:15,  2.78s/it]

Total reward after episode 2010 is 608.0


 20%|██        | 2011/10000 [1:24:39<4:43:53,  2.13s/it]

Total reward after episode 2011 is 251.0


 20%|██        | 2012/10000 [1:24:44<6:14:08,  2.81s/it]

Total reward after episode 2012 is 1692.0


 20%|██        | 2013/10000 [1:24:47<6:26:14,  2.90s/it]

Total reward after episode 2013 is 1331.0


 20%|██        | 2014/10000 [1:24:47<4:54:59,  2.22s/it]

Total reward after episode 2014 is 251.0


 20%|██        | 2015/10000 [1:24:52<6:33:19,  2.96s/it]

Total reward after episode 2015 is 1921.0


 20%|██        | 2016/10000 [1:24:53<5:00:32,  2.26s/it]

Total reward after episode 2016 is 249.0


 20%|██        | 2017/10000 [1:24:55<5:14:47,  2.37s/it]

Total reward after episode 2017 is 1153.0


 20%|██        | 2018/10000 [1:24:57<4:31:48,  2.04s/it]

Total reward after episode 2018 is 611.0


 20%|██        | 2019/10000 [1:25:00<5:10:03,  2.33s/it]

Total reward after episode 2019 is 1343.0


 20%|██        | 2020/10000 [1:25:03<5:59:52,  2.71s/it]

Total reward after episode 2020 is 1561.0


 20%|██        | 2021/10000 [1:25:05<5:10:41,  2.34s/it]

Total reward after episode 2021 is 625.0


 20%|██        | 2022/10000 [1:25:06<4:31:58,  2.05s/it]

Total reward after episode 2022 is 608.0


 20%|██        | 2023/10000 [1:25:09<5:09:30,  2.33s/it]

Total reward after episode 2023 is 1351.0


 20%|██        | 2024/10000 [1:25:12<5:49:30,  2.63s/it]

Total reward after episode 2024 is 1330.0


 20%|██        | 2025/10000 [1:25:17<7:03:48,  3.19s/it]

Total reward after episode 2025 is 1923.0


 20%|██        | 2026/10000 [1:25:20<6:56:14,  3.13s/it]

Total reward after episode 2026 is 1440.0


 20%|██        | 2027/10000 [1:25:24<7:48:30,  3.53s/it]

Total reward after episode 2027 is 1863.0


 20%|██        | 2028/10000 [1:25:26<6:32:19,  2.95s/it]

Total reward after episode 2028 is 636.0


 20%|██        | 2029/10000 [1:25:28<6:10:41,  2.79s/it]

Total reward after episode 2029 is 1058.0


 20%|██        | 2030/10000 [1:25:29<4:44:28,  2.14s/it]

Total reward after episode 2030 is 252.0


 20%|██        | 2031/10000 [1:25:32<5:14:52,  2.37s/it]

Total reward after episode 2031 is 1350.0


 20%|██        | 2032/10000 [1:25:33<4:31:32,  2.04s/it]

Total reward after episode 2032 is 609.0


 20%|██        | 2033/10000 [1:25:34<3:34:31,  1.62s/it]

Total reward after episode 2033 is 251.0


 20%|██        | 2034/10000 [1:25:36<3:37:08,  1.64s/it]

Total reward after episode 2034 is 625.0


 20%|██        | 2035/10000 [1:25:38<4:29:17,  2.03s/it]

Total reward after episode 2035 is 1343.0


 20%|██        | 2036/10000 [1:25:39<3:35:42,  1.63s/it]

Total reward after episode 2036 is 242.0


 20%|██        | 2037/10000 [1:25:41<3:41:46,  1.67s/it]

Total reward after episode 2037 is 736.0


 20%|██        | 2038/10000 [1:25:44<4:30:15,  2.04s/it]

Total reward after episode 2038 is 1331.0


 20%|██        | 2039/10000 [1:25:47<5:19:34,  2.41s/it]

Total reward after episode 2039 is 1441.0


 20%|██        | 2040/10000 [1:25:49<4:52:22,  2.20s/it]

Total reward after episode 2040 is 652.0


 20%|██        | 2041/10000 [1:25:52<5:35:02,  2.53s/it]

Total reward after episode 2041 is 1064.0


 20%|██        | 2042/10000 [1:26:02<10:31:44,  4.76s/it]

Total reward after episode 2042 is 3031.0


 20%|██        | 2043/10000 [1:26:06<10:01:25,  4.54s/it]

Total reward after episode 2043 is 1569.0


 20%|██        | 2044/10000 [1:26:09<9:06:04,  4.12s/it] 

Total reward after episode 2044 is 1347.0


 20%|██        | 2045/10000 [1:26:13<8:34:09,  3.88s/it]

Total reward after episode 2045 is 1436.0


 20%|██        | 2046/10000 [1:26:17<8:49:48,  4.00s/it]

Total reward after episode 2046 is 1929.0


 20%|██        | 2047/10000 [1:26:20<8:04:42,  3.66s/it]

Total reward after episode 2047 is 1339.0


 20%|██        | 2048/10000 [1:26:24<8:20:49,  3.78s/it]

Total reward after episode 2048 is 1699.0


 20%|██        | 2049/10000 [1:26:24<6:14:46,  2.83s/it]

Total reward after episode 2049 is 251.0


 20%|██        | 2050/10000 [1:26:25<4:49:09,  2.18s/it]

Total reward after episode 2050 is 244.0


 21%|██        | 2051/10000 [1:26:26<4:13:13,  1.91s/it]

Total reward after episode 2051 is 610.0


 21%|██        | 2052/10000 [1:26:30<5:25:19,  2.46s/it]

Total reward after episode 2052 is 1707.0


 21%|██        | 2053/10000 [1:26:33<5:51:46,  2.66s/it]

Total reward after episode 2053 is 1429.0


 21%|██        | 2054/10000 [1:26:37<6:49:15,  3.09s/it]

Total reward after episode 2054 is 1692.0


 21%|██        | 2055/10000 [1:26:40<6:41:26,  3.03s/it]

Total reward after episode 2055 is 817.0


 21%|██        | 2056/10000 [1:26:44<7:31:42,  3.41s/it]

Total reward after episode 2056 is 1880.0


 21%|██        | 2057/10000 [1:26:45<5:40:52,  2.57s/it]

Total reward after episode 2057 is 251.0


 21%|██        | 2058/10000 [1:26:49<6:48:56,  3.09s/it]

Total reward after episode 2058 is 1928.0


 21%|██        | 2059/10000 [1:26:54<7:35:48,  3.44s/it]

Total reward after episode 2059 is 1924.0


 21%|██        | 2060/10000 [1:26:55<6:14:12,  2.83s/it]

Total reward after episode 2060 is 605.0


 21%|██        | 2061/10000 [1:26:58<6:21:28,  2.88s/it]

Total reward after episode 2061 is 1350.0


 21%|██        | 2062/10000 [1:26:59<4:51:16,  2.20s/it]

Total reward after episode 2062 is 250.0


 21%|██        | 2063/10000 [1:27:03<6:35:41,  2.99s/it]

Total reward after episode 2063 is 1924.0


 21%|██        | 2064/10000 [1:27:05<5:43:32,  2.60s/it]

Total reward after episode 2064 is 619.0


 21%|██        | 2065/10000 [1:27:08<5:56:07,  2.69s/it]

Total reward after episode 2065 is 1438.0


 21%|██        | 2066/10000 [1:27:09<4:59:18,  2.26s/it]

Total reward after episode 2066 is 606.0


 21%|██        | 2067/10000 [1:27:13<6:03:10,  2.75s/it]

Total reward after episode 2067 is 1691.0


 21%|██        | 2068/10000 [1:27:16<6:07:56,  2.78s/it]

Total reward after episode 2068 is 1330.0


 21%|██        | 2069/10000 [1:27:20<6:37:30,  3.01s/it]

Total reward after episode 2069 is 1699.0


 21%|██        | 2070/10000 [1:27:20<5:01:52,  2.28s/it]

Total reward after episode 2070 is 251.0


 21%|██        | 2071/10000 [1:27:22<4:30:43,  2.05s/it]

Total reward after episode 2071 is 636.0


 21%|██        | 2072/10000 [1:27:25<5:07:28,  2.33s/it]

Total reward after episode 2072 is 1349.0


 21%|██        | 2073/10000 [1:27:26<4:28:43,  2.03s/it]

Total reward after episode 2073 is 607.0


 21%|██        | 2074/10000 [1:27:27<3:32:42,  1.61s/it]

Total reward after episode 2074 is 251.0


 21%|██        | 2075/10000 [1:27:31<5:15:55,  2.39s/it]

Total reward after episode 2075 is 1929.0


 21%|██        | 2076/10000 [1:27:34<5:35:56,  2.54s/it]

Total reward after episode 2076 is 1353.0


 21%|██        | 2077/10000 [1:27:38<6:59:49,  3.18s/it]

Total reward after episode 2077 is 1927.0


 21%|██        | 2078/10000 [1:27:43<7:40:45,  3.49s/it]

Total reward after episode 2078 is 1933.0


 21%|██        | 2079/10000 [1:27:44<6:17:16,  2.86s/it]

Total reward after episode 2079 is 607.0


 21%|██        | 2080/10000 [1:27:45<5:18:41,  2.41s/it]

Total reward after episode 2080 is 604.0


 21%|██        | 2081/10000 [1:27:49<6:08:21,  2.79s/it]

Total reward after episode 2081 is 1038.0


 21%|██        | 2082/10000 [1:27:51<5:18:31,  2.41s/it]

Total reward after episode 2082 is 654.0


 21%|██        | 2083/10000 [1:27:52<4:42:26,  2.14s/it]

Total reward after episode 2083 is 654.0


 21%|██        | 2084/10000 [1:27:55<5:16:57,  2.40s/it]

Total reward after episode 2084 is 1441.0


 21%|██        | 2085/10000 [1:27:58<5:41:17,  2.59s/it]

Total reward after episode 2085 is 1438.0


 21%|██        | 2086/10000 [1:28:00<4:58:44,  2.26s/it]

Total reward after episode 2086 is 634.0


 21%|██        | 2087/10000 [1:28:00<3:53:22,  1.77s/it]

Total reward after episode 2087 is 252.0


 21%|██        | 2088/10000 [1:28:02<3:42:56,  1.69s/it]

Total reward after episode 2088 is 634.0


 21%|██        | 2089/10000 [1:28:03<3:28:05,  1.58s/it]

Total reward after episode 2089 is 614.0


 21%|██        | 2090/10000 [1:28:07<5:11:30,  2.36s/it]

Total reward after episode 2090 is 712.0


 21%|██        | 2091/10000 [1:28:11<6:15:00,  2.84s/it]

Total reward after episode 2091 is 1697.0


 21%|██        | 2092/10000 [1:28:15<6:54:22,  3.14s/it]

Total reward after episode 2092 is 1695.0


 21%|██        | 2093/10000 [1:28:18<6:45:22,  3.08s/it]

Total reward after episode 2093 is 1354.0


 21%|██        | 2094/10000 [1:28:19<5:40:40,  2.59s/it]

Total reward after episode 2094 is 627.0


 21%|██        | 2095/10000 [1:28:23<6:18:47,  2.88s/it]

Total reward after episode 2095 is 1698.0


 21%|██        | 2096/10000 [1:28:24<4:48:59,  2.19s/it]

Total reward after episode 2096 is 250.0


 21%|██        | 2097/10000 [1:28:27<5:38:49,  2.57s/it]

Total reward after episode 2097 is 1437.0


 21%|██        | 2098/10000 [1:28:30<6:06:31,  2.78s/it]

Total reward after episode 2098 is 1426.0


 21%|██        | 2099/10000 [1:28:32<5:21:21,  2.44s/it]

Total reward after episode 2099 is 621.0


 21%|██        | 2100/10000 [1:28:35<5:57:33,  2.72s/it]

Total reward after episode 2100 is 1345.0


 21%|██        | 2101/10000 [1:28:37<5:14:01,  2.39s/it]

Total reward after episode 2101 is 633.0


 21%|██        | 2102/10000 [1:28:39<4:47:00,  2.18s/it]

Total reward after episode 2102 is 633.0


 21%|██        | 2103/10000 [1:28:40<4:10:43,  1.91s/it]

Total reward after episode 2103 is 606.0


 21%|██        | 2104/10000 [1:28:44<5:41:43,  2.60s/it]

Total reward after episode 2104 is 1711.0


 21%|██        | 2105/10000 [1:28:47<5:54:55,  2.70s/it]

Total reward after episode 2105 is 1338.0


 21%|██        | 2106/10000 [1:28:50<5:50:18,  2.66s/it]

Total reward after episode 2106 is 1046.0


 21%|██        | 2107/10000 [1:28:57<8:47:41,  4.01s/it]

Total reward after episode 2107 is 2348.0


 21%|██        | 2108/10000 [1:28:57<6:33:09,  2.99s/it]

Total reward after episode 2108 is 251.0


 21%|██        | 2109/10000 [1:29:01<6:55:46,  3.16s/it]

Total reward after episode 2109 is 1433.0


 21%|██        | 2110/10000 [1:29:04<6:37:55,  3.03s/it]

Total reward after episode 2110 is 1163.0


 21%|██        | 2111/10000 [1:29:15<12:07:21,  5.53s/it]

Total reward after episode 2111 is 2598.0


 21%|██        | 2112/10000 [1:29:18<10:22:29,  4.73s/it]

Total reward after episode 2112 is 1330.0


 21%|██        | 2113/10000 [1:29:21<9:07:36,  4.17s/it] 

Total reward after episode 2113 is 1330.0


 21%|██        | 2114/10000 [1:29:25<9:07:37,  4.17s/it]

Total reward after episode 2114 is 1931.0


 21%|██        | 2115/10000 [1:29:28<8:28:53,  3.87s/it]

Total reward after episode 2115 is 1328.0


 21%|██        | 2116/10000 [1:29:31<7:57:54,  3.64s/it]

Total reward after episode 2116 is 1333.0


 21%|██        | 2117/10000 [1:29:34<7:38:35,  3.49s/it]

Total reward after episode 2117 is 1427.0


 21%|██        | 2118/10000 [1:29:38<8:01:40,  3.67s/it]

Total reward after episode 2118 is 1929.0


 21%|██        | 2119/10000 [1:29:39<6:03:03,  2.76s/it]

Total reward after episode 2119 is 242.0


 21%|██        | 2120/10000 [1:29:42<6:09:13,  2.81s/it]

Total reward after episode 2120 is 1348.0


 21%|██        | 2121/10000 [1:29:45<6:22:29,  2.91s/it]

Total reward after episode 2121 is 1330.0


 21%|██        | 2122/10000 [1:29:49<6:41:04,  3.05s/it]

Total reward after episode 2122 is 1345.0


 21%|██        | 2123/10000 [1:29:49<5:06:21,  2.33s/it]

Total reward after episode 2123 is 242.0


 21%|██        | 2124/10000 [1:29:52<5:21:51,  2.45s/it]

Total reward after episode 2124 is 1043.0


 21%|██▏       | 2125/10000 [1:29:56<6:11:06,  2.83s/it]

Total reward after episode 2125 is 1435.0


 21%|██▏       | 2126/10000 [1:29:57<5:20:52,  2.45s/it]

Total reward after episode 2126 is 609.0


 21%|██▏       | 2127/10000 [1:29:58<4:08:58,  1.90s/it]

Total reward after episode 2127 is 251.0


 21%|██▏       | 2128/10000 [1:29:59<3:47:03,  1.73s/it]

Total reward after episode 2128 is 607.0


 21%|██▏       | 2129/10000 [1:30:01<3:42:13,  1.69s/it]

Total reward after episode 2129 is 604.0


 21%|██▏       | 2130/10000 [1:30:02<3:26:19,  1.57s/it]

Total reward after episode 2130 is 610.0


 21%|██▏       | 2131/10000 [1:30:05<4:19:58,  1.98s/it]

Total reward after episode 2131 is 1058.0


 21%|██▏       | 2132/10000 [1:30:16<10:10:16,  4.65s/it]

Total reward after episode 2132 is 3015.0


 21%|██▏       | 2133/10000 [1:30:17<7:57:26,  3.64s/it] 

Total reward after episode 2133 is 608.0


 21%|██▏       | 2134/10000 [1:30:18<6:24:52,  2.94s/it]

Total reward after episode 2134 is 608.0


 21%|██▏       | 2135/10000 [1:30:22<6:40:01,  3.05s/it]

Total reward after episode 2135 is 1346.0


 21%|██▏       | 2136/10000 [1:30:25<6:50:56,  3.14s/it]

Total reward after episode 2136 is 1346.0


 21%|██▏       | 2137/10000 [1:30:26<5:37:14,  2.57s/it]

Total reward after episode 2137 is 608.0


 21%|██▏       | 2138/10000 [1:30:30<6:33:21,  3.00s/it]

Total reward after episode 2138 is 1703.0


 21%|██▏       | 2139/10000 [1:30:32<5:26:23,  2.49s/it]

Total reward after episode 2139 is 612.0


 21%|██▏       | 2140/10000 [1:30:33<4:59:30,  2.29s/it]

Total reward after episode 2140 is 605.0


 21%|██▏       | 2141/10000 [1:30:37<5:41:24,  2.61s/it]

Total reward after episode 2141 is 1440.0


 21%|██▏       | 2142/10000 [1:30:40<6:10:54,  2.83s/it]

Total reward after episode 2142 is 1420.0


 21%|██▏       | 2143/10000 [1:30:43<6:14:40,  2.86s/it]

Total reward after episode 2143 is 1332.0


 21%|██▏       | 2144/10000 [1:30:44<5:12:36,  2.39s/it]

Total reward after episode 2144 is 611.0


 21%|██▏       | 2145/10000 [1:30:49<6:42:24,  3.07s/it]

Total reward after episode 2145 is 1689.0


 21%|██▏       | 2146/10000 [1:30:53<7:08:57,  3.28s/it]

Total reward after episode 2146 is 1865.0


 21%|██▏       | 2147/10000 [1:30:56<7:00:35,  3.21s/it]

Total reward after episode 2147 is 1330.0


 21%|██▏       | 2148/10000 [1:30:57<5:19:52,  2.44s/it]

Total reward after episode 2148 is 242.0


 21%|██▏       | 2149/10000 [1:30:58<4:39:35,  2.14s/it]

Total reward after episode 2149 is 619.0


 22%|██▏       | 2150/10000 [1:31:42<31:51:53, 14.61s/it]

Total reward after episode 2150 is 267.0


 22%|██▏       | 2151/10000 [1:31:43<23:19:02, 10.69s/it]

Total reward after episode 2151 is 610.0


 22%|██▏       | 2152/10000 [1:31:45<17:10:33,  7.88s/it]

Total reward after episode 2152 is 610.0


 22%|██▏       | 2153/10000 [1:31:47<13:52:34,  6.37s/it]

Total reward after episode 2153 is 1342.0


 22%|██▏       | 2154/10000 [1:31:51<12:05:35,  5.55s/it]

Total reward after episode 2154 is 1866.0


 22%|██▏       | 2155/10000 [1:31:53<9:32:08,  4.38s/it] 

Total reward after episode 2155 is 619.0


 22%|██▏       | 2156/10000 [1:31:54<7:30:00,  3.44s/it]

Total reward after episode 2156 is 608.0


 22%|██▏       | 2157/10000 [1:31:55<5:41:36,  2.61s/it]

Total reward after episode 2157 is 240.0


 22%|██▏       | 2158/10000 [1:31:55<4:27:13,  2.04s/it]

Total reward after episode 2158 is 241.0


 22%|██▏       | 2159/10000 [1:31:57<3:56:56,  1.81s/it]

Total reward after episode 2159 is 608.0


 22%|██▏       | 2160/10000 [1:31:58<3:38:07,  1.67s/it]

Total reward after episode 2160 is 615.0


 22%|██▏       | 2161/10000 [1:32:01<4:32:17,  2.08s/it]

Total reward after episode 2161 is 1328.0


 22%|██▏       | 2162/10000 [1:32:05<5:43:45,  2.63s/it]

Total reward after episode 2162 is 1702.0


 22%|██▏       | 2163/10000 [1:32:06<5:02:48,  2.32s/it]

Total reward after episode 2163 is 623.0


 22%|██▏       | 2164/10000 [1:32:09<5:23:03,  2.47s/it]

Total reward after episode 2164 is 1333.0


 22%|██▏       | 2165/10000 [1:32:11<4:55:19,  2.26s/it]

Total reward after episode 2165 is 603.0


 22%|██▏       | 2166/10000 [1:32:12<4:16:36,  1.97s/it]

Total reward after episode 2166 is 610.0


 22%|██▏       | 2167/10000 [1:32:15<4:57:07,  2.28s/it]

Total reward after episode 2167 is 1046.0


 22%|██▏       | 2168/10000 [1:32:18<5:14:49,  2.41s/it]

Total reward after episode 2168 is 1337.0


 22%|██▏       | 2169/10000 [1:32:20<4:43:28,  2.17s/it]

Total reward after episode 2169 is 606.0


 22%|██▏       | 2170/10000 [1:32:21<4:07:24,  1.90s/it]

Total reward after episode 2170 is 608.0


 22%|██▏       | 2171/10000 [1:32:22<3:49:05,  1.76s/it]

Total reward after episode 2171 is 608.0


 22%|██▏       | 2172/10000 [1:32:24<3:29:09,  1.60s/it]

Total reward after episode 2172 is 611.0


 22%|██▏       | 2173/10000 [1:32:25<3:15:31,  1.50s/it]

Total reward after episode 2173 is 611.0


 22%|██▏       | 2174/10000 [1:32:29<4:59:06,  2.29s/it]

Total reward after episode 2174 is 1691.0


 22%|██▏       | 2175/10000 [1:32:30<4:18:54,  1.99s/it]

Total reward after episode 2175 is 611.0


 22%|██▏       | 2176/10000 [1:32:31<3:27:20,  1.59s/it]

Total reward after episode 2176 is 244.0


 22%|██▏       | 2177/10000 [1:32:32<3:18:25,  1.52s/it]

Total reward after episode 2177 is 607.0


 22%|██▏       | 2178/10000 [1:32:34<3:21:35,  1.55s/it]

Total reward after episode 2178 is 603.0


 22%|██▏       | 2179/10000 [1:32:37<4:16:04,  1.96s/it]

Total reward after episode 2179 is 1344.0


 22%|██▏       | 2180/10000 [1:32:39<4:40:48,  2.15s/it]

Total reward after episode 2180 is 1048.0


 22%|██▏       | 2181/10000 [1:32:41<4:05:33,  1.88s/it]

Total reward after episode 2181 is 611.0


 22%|██▏       | 2182/10000 [1:32:43<4:32:27,  2.09s/it]

Total reward after episode 2182 is 1163.0


 22%|██▏       | 2183/10000 [1:32:45<4:09:22,  1.91s/it]

Total reward after episode 2183 is 608.0


 22%|██▏       | 2184/10000 [1:32:48<4:55:12,  2.27s/it]

Total reward after episode 2184 is 1449.0


 22%|██▏       | 2185/10000 [1:32:49<4:24:26,  2.03s/it]

Total reward after episode 2185 is 606.0


 22%|██▏       | 2186/10000 [1:32:51<3:54:50,  1.80s/it]

Total reward after episode 2186 is 610.0


 22%|██▏       | 2187/10000 [1:32:54<5:02:37,  2.32s/it]

Total reward after episode 2187 is 1700.0


 22%|██▏       | 2188/10000 [1:32:57<5:41:39,  2.62s/it]

Total reward after episode 2188 is 1333.0


 22%|██▏       | 2189/10000 [1:32:59<4:49:40,  2.23s/it]

Total reward after episode 2189 is 611.0


 22%|██▏       | 2190/10000 [1:33:00<4:12:16,  1.94s/it]

Total reward after episode 2190 is 611.0


 22%|██▏       | 2191/10000 [1:33:03<5:02:13,  2.32s/it]

Total reward after episode 2191 is 1355.0


 22%|██▏       | 2192/10000 [1:33:05<4:22:33,  2.02s/it]

Total reward after episode 2192 is 610.0


 22%|██▏       | 2193/10000 [1:33:06<4:00:50,  1.85s/it]

Total reward after episode 2193 is 612.0


 22%|██▏       | 2194/10000 [1:33:07<3:41:08,  1.70s/it]

Total reward after episode 2194 is 602.0


 22%|██▏       | 2195/10000 [1:33:10<4:25:30,  2.04s/it]

Total reward after episode 2195 is 1055.0


 22%|██▏       | 2196/10000 [1:33:11<3:30:00,  1.61s/it]

Total reward after episode 2196 is 251.0


 22%|██▏       | 2197/10000 [1:33:12<3:18:27,  1.53s/it]

Total reward after episode 2197 is 613.0


 22%|██▏       | 2198/10000 [1:33:13<3:10:20,  1.46s/it]

Total reward after episode 2198 is 611.0


 22%|██▏       | 2199/10000 [1:33:17<4:18:28,  1.99s/it]

Total reward after episode 2199 is 1038.0


 22%|██▏       | 2200/10000 [1:33:18<3:53:59,  1.80s/it]

Total reward after episode 2200 is 616.0


 22%|██▏       | 2201/10000 [1:33:21<4:30:48,  2.08s/it]

Total reward after episode 2201 is 1346.0


 22%|██▏       | 2202/10000 [1:33:26<6:18:14,  2.91s/it]

Total reward after episode 2202 is 1859.0


 22%|██▏       | 2203/10000 [1:33:27<5:14:25,  2.42s/it]

Total reward after episode 2203 is 611.0


 22%|██▏       | 2204/10000 [1:33:28<4:35:08,  2.12s/it]

Total reward after episode 2204 is 608.0


 22%|██▏       | 2205/10000 [1:33:32<5:27:41,  2.52s/it]

Total reward after episode 2205 is 1338.0


 22%|██▏       | 2206/10000 [1:33:33<4:48:42,  2.22s/it]

Total reward after episode 2206 is 603.0


 22%|██▏       | 2207/10000 [1:33:36<5:22:38,  2.48s/it]

Total reward after episode 2207 is 1340.0


 22%|██▏       | 2208/10000 [1:33:39<5:34:41,  2.58s/it]

Total reward after episode 2208 is 1336.0


 22%|██▏       | 2209/10000 [1:33:43<6:06:32,  2.82s/it]

Total reward after episode 2209 is 1584.0


 22%|██▏       | 2210/10000 [1:33:47<7:22:18,  3.41s/it]

Total reward after episode 2210 is 1920.0


 22%|██▏       | 2211/10000 [1:33:49<6:00:53,  2.78s/it]

Total reward after episode 2211 is 613.0


 22%|██▏       | 2212/10000 [1:33:50<5:11:25,  2.40s/it]

Total reward after episode 2212 is 609.0


 22%|██▏       | 2213/10000 [1:33:52<4:37:24,  2.14s/it]

Total reward after episode 2213 is 604.0


 22%|██▏       | 2214/10000 [1:33:55<5:29:38,  2.54s/it]

Total reward after episode 2214 is 1345.0


 22%|██▏       | 2215/10000 [1:33:57<4:57:26,  2.29s/it]

Total reward after episode 2215 is 734.0


 22%|██▏       | 2216/10000 [1:34:00<5:28:24,  2.53s/it]

Total reward after episode 2216 is 1355.0


 22%|██▏       | 2217/10000 [1:34:04<6:44:54,  3.12s/it]

Total reward after episode 2217 is 1336.0


 22%|██▏       | 2218/10000 [1:34:07<6:37:39,  3.07s/it]

Total reward after episode 2218 is 1054.0


 22%|██▏       | 2219/10000 [1:34:10<6:33:03,  3.03s/it]

Total reward after episode 2219 is 1330.0


 22%|██▏       | 2220/10000 [1:34:14<7:06:35,  3.29s/it]

Total reward after episode 2220 is 1695.0


 22%|██▏       | 2221/10000 [1:34:25<11:37:31,  5.38s/it]

Total reward after episode 2221 is 3027.0


 22%|██▏       | 2222/10000 [1:34:25<8:32:31,  3.95s/it] 

Total reward after episode 2222 is 250.0


 22%|██▏       | 2223/10000 [1:34:26<6:23:33,  2.96s/it]

Total reward after episode 2223 is 251.0


 22%|██▏       | 2224/10000 [1:34:30<7:10:13,  3.32s/it]

Total reward after episode 2224 is 1698.0


 22%|██▏       | 2225/10000 [1:34:33<7:00:46,  3.25s/it]

Total reward after episode 2225 is 1350.0


 22%|██▏       | 2226/10000 [1:34:34<5:18:19,  2.46s/it]

Total reward after episode 2226 is 251.0


 22%|██▏       | 2227/10000 [1:34:37<5:52:56,  2.72s/it]

Total reward after episode 2227 is 1348.0


 22%|██▏       | 2228/10000 [1:34:39<5:09:43,  2.39s/it]

Total reward after episode 2228 is 736.0


 22%|██▏       | 2229/10000 [1:34:41<5:22:57,  2.49s/it]

Total reward after episode 2229 is 1340.0


 22%|██▏       | 2230/10000 [1:34:45<6:19:58,  2.93s/it]

Total reward after episode 2230 is 1325.0


 22%|██▏       | 2231/10000 [1:34:49<6:39:42,  3.09s/it]

Total reward after episode 2231 is 1441.0


 22%|██▏       | 2232/10000 [1:34:53<7:18:43,  3.39s/it]

Total reward after episode 2232 is 1931.0


 22%|██▏       | 2233/10000 [1:34:57<7:31:23,  3.49s/it]

Total reward after episode 2233 is 1580.0


 22%|██▏       | 2234/10000 [1:34:58<6:06:35,  2.83s/it]

Total reward after episode 2234 is 611.0


 22%|██▏       | 2235/10000 [1:35:01<6:03:06,  2.81s/it]

Total reward after episode 2235 is 1339.0


 22%|██▏       | 2236/10000 [1:35:05<7:06:15,  3.29s/it]

Total reward after episode 2236 is 1425.0


 22%|██▏       | 2237/10000 [1:35:06<5:52:05,  2.72s/it]

Total reward after episode 2237 is 613.0


 22%|██▏       | 2238/10000 [1:35:50<32:15:38, 14.96s/it]

Total reward after episode 2238 is 139.0


 22%|██▏       | 2239/10000 [1:35:53<24:30:22, 11.37s/it]

Total reward after episode 2239 is 1329.0


 22%|██▏       | 2240/10000 [1:35:54<18:08:32,  8.42s/it]

Total reward after episode 2240 is 616.0


 22%|██▏       | 2241/10000 [1:35:57<14:13:52,  6.60s/it]

Total reward after episode 2241 is 1047.0


 22%|██▏       | 2242/10000 [1:36:00<12:03:08,  5.59s/it]

Total reward after episode 2242 is 1349.0


 22%|██▏       | 2243/10000 [1:36:01<9:17:24,  4.31s/it] 

Total reward after episode 2243 is 612.0


 22%|██▏       | 2244/10000 [1:36:04<7:52:32,  3.66s/it]

Total reward after episode 2244 is 733.0


 22%|██▏       | 2245/10000 [1:36:08<8:17:54,  3.85s/it]

Total reward after episode 2245 is 1430.0


 22%|██▏       | 2246/10000 [1:36:11<8:02:30,  3.73s/it]

Total reward after episode 2246 is 1344.0


 22%|██▏       | 2247/10000 [1:36:15<7:57:08,  3.69s/it]

Total reward after episode 2247 is 1439.0


 22%|██▏       | 2248/10000 [1:36:58<33:30:16, 15.56s/it]

Total reward after episode 2248 is 443.0


 22%|██▏       | 2249/10000 [1:37:00<24:27:30, 11.36s/it]

Total reward after episode 2249 is 620.0


 22%|██▎       | 2250/10000 [1:37:04<19:47:56,  9.20s/it]

Total reward after episode 2250 is 728.0


 23%|██▎       | 2251/10000 [1:37:07<15:48:22,  7.34s/it]

Total reward after episode 2251 is 1331.0


 23%|██▎       | 2252/10000 [1:37:11<13:34:05,  6.30s/it]

Total reward after episode 2252 is 1430.0


 23%|██▎       | 2253/10000 [1:37:14<11:18:47,  5.26s/it]

Total reward after episode 2253 is 1336.0


 23%|██▎       | 2254/10000 [1:37:18<10:38:30,  4.95s/it]

Total reward after episode 2254 is 1925.0


 23%|██▎       | 2255/10000 [1:37:20<9:10:45,  4.27s/it] 

Total reward after episode 2255 is 1046.0


 23%|██▎       | 2256/10000 [1:37:23<8:15:40,  3.84s/it]

Total reward after episode 2256 is 1330.0


 23%|██▎       | 2257/10000 [1:37:27<7:58:07,  3.70s/it]

Total reward after episode 2257 is 1329.0


 23%|██▎       | 2258/10000 [1:37:29<7:22:10,  3.43s/it]

Total reward after episode 2258 is 1340.0


 23%|██▎       | 2259/10000 [1:37:32<7:01:08,  3.26s/it]

Total reward after episode 2259 is 1331.0


 23%|██▎       | 2260/10000 [1:37:34<5:46:12,  2.68s/it]

Total reward after episode 2260 is 615.0


 23%|██▎       | 2261/10000 [1:37:37<5:53:41,  2.74s/it]

Total reward after episode 2261 is 1329.0


 23%|██▎       | 2262/10000 [1:37:40<6:04:16,  2.82s/it]

Total reward after episode 2262 is 1330.0


 23%|██▎       | 2263/10000 [1:37:43<6:19:11,  2.94s/it]

Total reward after episode 2263 is 1329.0


 23%|██▎       | 2264/10000 [1:37:44<5:04:38,  2.36s/it]

Total reward after episode 2264 is 250.0


 23%|██▎       | 2265/10000 [1:37:47<5:52:25,  2.73s/it]

Total reward after episode 2265 is 1345.0


 23%|██▎       | 2266/10000 [1:37:52<6:45:40,  3.15s/it]

Total reward after episode 2266 is 1691.0


 23%|██▎       | 2267/10000 [1:37:53<5:34:46,  2.60s/it]

Total reward after episode 2267 is 615.0


 23%|██▎       | 2268/10000 [1:38:03<10:11:41,  4.75s/it]

Total reward after episode 2268 is 3032.0


 23%|██▎       | 2269/10000 [1:38:05<8:58:08,  4.18s/it] 

Total reward after episode 2269 is 1330.0


 23%|██▎       | 2270/10000 [1:38:10<9:06:55,  4.25s/it]

Total reward after episode 2270 is 1693.0


 23%|██▎       | 2271/10000 [1:38:12<7:51:49,  3.66s/it]

Total reward after episode 2271 is 756.0


 23%|██▎       | 2272/10000 [1:38:17<8:28:18,  3.95s/it]

Total reward after episode 2272 is 1690.0


 23%|██▎       | 2273/10000 [1:38:17<6:19:25,  2.95s/it]

Total reward after episode 2273 is 251.0


 23%|██▎       | 2274/10000 [1:38:19<5:23:57,  2.52s/it]

Total reward after episode 2274 is 738.0


 23%|██▎       | 2275/10000 [1:38:23<6:22:45,  2.97s/it]

Total reward after episode 2275 is 1696.0


 23%|██▎       | 2276/10000 [1:38:26<6:27:49,  3.01s/it]

Total reward after episode 2276 is 1427.0


 23%|██▎       | 2277/10000 [1:38:27<5:26:46,  2.54s/it]

Total reward after episode 2277 is 627.0


 23%|██▎       | 2278/10000 [1:38:30<5:29:58,  2.56s/it]

Total reward after episode 2278 is 1044.0


 23%|██▎       | 2279/10000 [1:38:34<6:04:06,  2.83s/it]

Total reward after episode 2279 is 1429.0


 23%|██▎       | 2280/10000 [1:38:37<6:22:23,  2.97s/it]

Total reward after episode 2280 is 1439.0


 23%|██▎       | 2281/10000 [1:38:40<6:31:34,  3.04s/it]

Total reward after episode 2281 is 1062.0


 23%|██▎       | 2282/10000 [1:38:44<7:10:27,  3.35s/it]

Total reward after episode 2282 is 1930.0


 23%|██▎       | 2283/10000 [1:38:45<5:24:40,  2.52s/it]

Total reward after episode 2283 is 252.0


 23%|██▎       | 2284/10000 [1:38:48<5:39:30,  2.64s/it]

Total reward after episode 2284 is 1329.0


 23%|██▎       | 2285/10000 [1:38:51<5:49:24,  2.72s/it]

Total reward after episode 2285 is 1333.0


 23%|██▎       | 2286/10000 [1:38:52<4:56:23,  2.31s/it]

Total reward after episode 2286 is 609.0


 23%|██▎       | 2287/10000 [1:38:52<3:51:46,  1.80s/it]

Total reward after episode 2287 is 251.0


 23%|██▎       | 2288/10000 [1:38:56<4:58:25,  2.32s/it]

Total reward after episode 2288 is 1344.0


 23%|██▎       | 2289/10000 [1:39:00<6:13:34,  2.91s/it]

Total reward after episode 2289 is 1698.0


 23%|██▎       | 2290/10000 [1:39:04<6:28:53,  3.03s/it]

Total reward after episode 2290 is 1323.0


 23%|██▎       | 2291/10000 [1:39:07<6:34:01,  3.07s/it]

Total reward after episode 2291 is 1331.0


 23%|██▎       | 2292/10000 [1:39:10<6:43:10,  3.14s/it]

Total reward after episode 2292 is 1440.0


 23%|██▎       | 2293/10000 [1:39:11<5:33:12,  2.59s/it]

Total reward after episode 2293 is 611.0


 23%|██▎       | 2294/10000 [1:39:12<4:18:50,  2.02s/it]

Total reward after episode 2294 is 250.0


 23%|██▎       | 2295/10000 [1:39:15<5:00:41,  2.34s/it]

Total reward after episode 2295 is 1327.0


 23%|██▎       | 2296/10000 [1:39:16<4:18:53,  2.02s/it]

Total reward after episode 2296 is 608.0


 23%|██▎       | 2297/10000 [1:39:17<3:27:46,  1.62s/it]

Total reward after episode 2297 is 246.0


 23%|██▎       | 2298/10000 [1:39:20<4:06:52,  1.92s/it]

Total reward after episode 2298 is 748.0


 23%|██▎       | 2299/10000 [1:39:20<3:18:30,  1.55s/it]

Total reward after episode 2299 is 247.0


 23%|██▎       | 2300/10000 [1:39:24<4:27:44,  2.09s/it]

Total reward after episode 2300 is 1427.0


 23%|██▎       | 2301/10000 [1:39:27<5:19:36,  2.49s/it]

Total reward after episode 2301 is 1344.0


 23%|██▎       | 2302/10000 [1:39:31<6:21:30,  2.97s/it]

Total reward after episode 2302 is 1926.0


 23%|██▎       | 2303/10000 [1:39:33<5:18:39,  2.48s/it]

Total reward after episode 2303 is 615.0


 23%|██▎       | 2304/10000 [1:39:36<5:39:07,  2.64s/it]

Total reward after episode 2304 is 1329.0


 23%|██▎       | 2305/10000 [1:39:44<9:16:06,  4.34s/it]

Total reward after episode 2305 is 2340.0


 23%|██▎       | 2306/10000 [1:39:47<8:27:50,  3.96s/it]

Total reward after episode 2306 is 1350.0


 23%|██▎       | 2307/10000 [1:39:50<7:58:12,  3.73s/it]

Total reward after episode 2307 is 1353.0


 23%|██▎       | 2308/10000 [1:39:53<7:23:19,  3.46s/it]

Total reward after episode 2308 is 1333.0


 23%|██▎       | 2309/10000 [1:39:56<7:10:40,  3.36s/it]

Total reward after episode 2309 is 1327.0


 23%|██▎       | 2310/10000 [1:39:59<7:02:22,  3.30s/it]

Total reward after episode 2310 is 1349.0


 23%|██▎       | 2311/10000 [1:40:02<6:56:43,  3.25s/it]

Total reward after episode 2311 is 1348.0


 23%|██▎       | 2312/10000 [1:40:06<6:59:23,  3.27s/it]

Total reward after episode 2312 is 1355.0


 23%|██▎       | 2313/10000 [1:40:07<5:44:11,  2.69s/it]

Total reward after episode 2313 is 615.0


 23%|██▎       | 2314/10000 [1:40:09<4:57:55,  2.33s/it]

Total reward after episode 2314 is 630.0


 23%|██▎       | 2315/10000 [1:40:11<4:51:01,  2.27s/it]

Total reward after episode 2315 is 1076.0


 23%|██▎       | 2316/10000 [1:40:12<4:16:35,  2.00s/it]

Total reward after episode 2316 is 616.0


 23%|██▎       | 2317/10000 [1:40:13<3:22:56,  1.58s/it]

Total reward after episode 2317 is 250.0


 23%|██▎       | 2318/10000 [1:40:16<4:21:35,  2.04s/it]

Total reward after episode 2318 is 1348.0


 23%|██▎       | 2319/10000 [1:40:19<5:11:12,  2.43s/it]

Total reward after episode 2319 is 1588.0


 23%|██▎       | 2320/10000 [1:40:22<5:32:23,  2.60s/it]

Total reward after episode 2320 is 776.0


 23%|██▎       | 2321/10000 [1:40:26<6:20:01,  2.97s/it]

Total reward after episode 2321 is 1431.0


 23%|██▎       | 2322/10000 [1:40:31<7:20:29,  3.44s/it]

Total reward after episode 2322 is 1695.0


 23%|██▎       | 2323/10000 [1:40:36<8:54:04,  4.17s/it]

Total reward after episode 2323 is 1672.0


 23%|██▎       | 2324/10000 [1:40:43<10:16:25,  4.82s/it]

Total reward after episode 2324 is 2356.0


 23%|██▎       | 2325/10000 [1:40:46<9:09:07,  4.29s/it] 

Total reward after episode 2325 is 1357.0


 23%|██▎       | 2326/10000 [1:40:48<8:04:24,  3.79s/it]

Total reward after episode 2326 is 1046.0


 23%|██▎       | 2327/10000 [1:40:51<7:36:42,  3.57s/it]

Total reward after episode 2327 is 1437.0


 23%|██▎       | 2328/10000 [1:40:52<5:43:22,  2.69s/it]

Total reward after episode 2328 is 251.0


 23%|██▎       | 2329/10000 [1:40:55<6:08:06,  2.88s/it]

Total reward after episode 2329 is 1345.0


 23%|██▎       | 2330/10000 [1:40:57<5:23:21,  2.53s/it]

Total reward after episode 2330 is 615.0


 23%|██▎       | 2331/10000 [1:41:00<5:34:58,  2.62s/it]

Total reward after episode 2331 is 1352.0


 23%|██▎       | 2332/10000 [1:41:03<5:41:38,  2.67s/it]

Total reward after episode 2332 is 1042.0


 23%|██▎       | 2333/10000 [1:41:04<4:54:53,  2.31s/it]

Total reward after episode 2333 is 627.0


 23%|██▎       | 2334/10000 [1:41:08<5:47:10,  2.72s/it]

Total reward after episode 2334 is 1581.0


 23%|██▎       | 2335/10000 [1:41:19<11:05:57,  5.21s/it]

Total reward after episode 2335 is 3020.0


 23%|██▎       | 2336/10000 [1:42:03<35:39:35, 16.75s/it]

Total reward after episode 2336 is 267.0


 23%|██▎       | 2337/10000 [1:42:06<27:19:18, 12.84s/it]

Total reward after episode 2337 is 1705.0


 23%|██▎       | 2338/10000 [1:42:09<20:54:02,  9.82s/it]

Total reward after episode 2338 is 1352.0


 23%|██▎       | 2339/10000 [1:42:10<15:00:58,  7.06s/it]

Total reward after episode 2339 is 251.0


 23%|██▎       | 2340/10000 [1:42:12<12:09:15,  5.71s/it]

Total reward after episode 2340 is 1046.0


 23%|██▎       | 2341/10000 [1:42:15<10:27:00,  4.91s/it]

Total reward after episode 2341 is 1348.0


 23%|██▎       | 2342/10000 [1:42:18<9:16:32,  4.36s/it] 

Total reward after episode 2342 is 1348.0


 23%|██▎       | 2343/10000 [1:42:23<9:09:00,  4.30s/it]

Total reward after episode 2343 is 1692.0


 23%|██▎       | 2344/10000 [1:42:25<7:50:50,  3.69s/it]

Total reward after episode 2344 is 1048.0


 23%|██▎       | 2345/10000 [1:42:28<7:50:31,  3.69s/it]

Total reward after episode 2345 is 1707.0


 23%|██▎       | 2346/10000 [1:42:29<5:53:02,  2.77s/it]

Total reward after episode 2346 is 251.0


 23%|██▎       | 2347/10000 [1:42:31<5:29:54,  2.59s/it]

Total reward after episode 2347 is 1051.0


 23%|██▎       | 2348/10000 [1:42:32<4:14:32,  2.00s/it]

Total reward after episode 2348 is 249.0


 23%|██▎       | 2349/10000 [1:42:32<3:21:09,  1.58s/it]

Total reward after episode 2349 is 251.0


 24%|██▎       | 2350/10000 [1:42:35<4:11:33,  1.97s/it]

Total reward after episode 2350 is 1334.0


 24%|██▎       | 2351/10000 [1:42:38<4:45:09,  2.24s/it]

Total reward after episode 2351 is 1332.0


 24%|██▎       | 2352/10000 [1:42:41<5:19:57,  2.51s/it]

Total reward after episode 2352 is 1345.0


 24%|██▎       | 2353/10000 [1:42:45<6:10:51,  2.91s/it]

Total reward after episode 2353 is 1694.0


 24%|██▎       | 2354/10000 [1:42:46<4:45:02,  2.24s/it]

Total reward after episode 2354 is 251.0


 24%|██▎       | 2355/10000 [1:42:50<5:47:56,  2.73s/it]

Total reward after episode 2355 is 1583.0


 24%|██▎       | 2356/10000 [1:42:53<5:51:37,  2.76s/it]

Total reward after episode 2356 is 1350.0


 24%|██▎       | 2357/10000 [1:42:54<5:04:20,  2.39s/it]

Total reward after episode 2357 is 609.0


 24%|██▎       | 2358/10000 [1:42:58<6:19:45,  2.98s/it]

Total reward after episode 2358 is 1928.0


 24%|██▎       | 2359/10000 [1:42:59<4:49:17,  2.27s/it]

Total reward after episode 2359 is 251.0


 24%|██▎       | 2360/10000 [1:43:02<5:21:51,  2.53s/it]

Total reward after episode 2360 is 1331.0


 24%|██▎       | 2361/10000 [1:43:05<5:24:27,  2.55s/it]

Total reward after episode 2361 is 1047.0


 24%|██▎       | 2362/10000 [1:43:05<4:12:18,  1.98s/it]

Total reward after episode 2362 is 250.0


 24%|██▎       | 2363/10000 [1:43:06<3:20:08,  1.57s/it]

Total reward after episode 2363 is 251.0


 24%|██▎       | 2364/10000 [1:43:09<4:21:51,  2.06s/it]

Total reward after episode 2364 is 1348.0


 24%|██▎       | 2365/10000 [1:43:11<3:54:06,  1.84s/it]

Total reward after episode 2365 is 615.0


 24%|██▎       | 2366/10000 [1:43:12<3:37:49,  1.71s/it]

Total reward after episode 2366 is 614.0


 24%|██▎       | 2367/10000 [1:43:15<4:28:40,  2.11s/it]

Total reward after episode 2367 is 1439.0


 24%|██▎       | 2368/10000 [1:43:18<5:09:04,  2.43s/it]

Total reward after episode 2368 is 1440.0


 24%|██▎       | 2369/10000 [1:43:21<5:35:13,  2.64s/it]

Total reward after episode 2369 is 1427.0


 24%|██▎       | 2370/10000 [1:43:26<6:45:27,  3.19s/it]

Total reward after episode 2370 is 1870.0


 24%|██▎       | 2371/10000 [1:43:36<11:03:43,  5.22s/it]

Total reward after episode 2371 is 2613.0


 24%|██▎       | 2372/10000 [1:43:37<8:36:08,  4.06s/it] 

Total reward after episode 2372 is 612.0


 24%|██▎       | 2373/10000 [1:43:41<8:17:48,  3.92s/it]

Total reward after episode 2373 is 1701.0


 24%|██▎       | 2374/10000 [1:43:43<7:16:12,  3.43s/it]

Total reward after episode 2374 is 1052.0


 24%|██▍       | 2375/10000 [1:43:46<7:00:19,  3.31s/it]

Total reward after episode 2375 is 1356.0


 24%|██▍       | 2376/10000 [1:43:50<7:16:32,  3.44s/it]

Total reward after episode 2376 is 1706.0


 24%|██▍       | 2377/10000 [1:43:54<7:34:55,  3.58s/it]

Total reward after episode 2377 is 1583.0


 24%|██▍       | 2378/10000 [1:44:05<12:12:32,  5.77s/it]

Total reward after episode 2378 is 2598.0


 24%|██▍       | 2379/10000 [1:44:09<11:11:11,  5.28s/it]

Total reward after episode 2379 is 1338.0


 24%|██▍       | 2380/10000 [1:44:09<8:13:24,  3.89s/it] 

Total reward after episode 2380 is 250.0


 24%|██▍       | 2381/10000 [1:44:11<6:35:48,  3.12s/it]

Total reward after episode 2381 is 611.0


 24%|██▍       | 2382/10000 [1:44:11<5:01:30,  2.37s/it]

Total reward after episode 2382 is 251.0


 24%|██▍       | 2383/10000 [1:44:14<5:16:37,  2.49s/it]

Total reward after episode 2383 is 1046.0


 24%|██▍       | 2384/10000 [1:44:17<5:40:17,  2.68s/it]

Total reward after episode 2384 is 1439.0


 24%|██▍       | 2385/10000 [1:44:20<6:00:32,  2.84s/it]

Total reward after episode 2385 is 1346.0


 24%|██▍       | 2386/10000 [1:44:24<6:25:06,  3.03s/it]

Total reward after episode 2386 is 1433.0


 24%|██▍       | 2387/10000 [1:44:25<5:19:30,  2.52s/it]

Total reward after episode 2387 is 611.0


 24%|██▍       | 2388/10000 [1:44:28<5:35:18,  2.64s/it]

Total reward after episode 2388 is 1440.0


 24%|██▍       | 2389/10000 [1:44:32<6:03:11,  2.86s/it]

Total reward after episode 2389 is 1436.0


 24%|██▍       | 2390/10000 [1:44:35<6:38:55,  3.15s/it]

Total reward after episode 2390 is 1692.0


 24%|██▍       | 2391/10000 [1:44:38<6:33:56,  3.11s/it]

Total reward after episode 2391 is 1350.0


 24%|██▍       | 2392/10000 [1:44:42<6:36:46,  3.13s/it]

Total reward after episode 2392 is 1347.0


 24%|██▍       | 2393/10000 [1:44:45<6:45:56,  3.20s/it]

Total reward after episode 2393 is 1437.0


 24%|██▍       | 2394/10000 [1:44:48<6:43:30,  3.18s/it]

Total reward after episode 2394 is 1441.0


 24%|██▍       | 2395/10000 [1:44:51<6:30:29,  3.08s/it]

Total reward after episode 2395 is 1356.0


 24%|██▍       | 2396/10000 [1:44:52<5:21:39,  2.54s/it]

Total reward after episode 2396 is 609.0


 24%|██▍       | 2397/10000 [1:44:55<5:50:55,  2.77s/it]

Total reward after episode 2397 is 1590.0


 24%|██▍       | 2398/10000 [1:44:58<5:57:13,  2.82s/it]

Total reward after episode 2398 is 1348.0


 24%|██▍       | 2399/10000 [1:45:02<6:37:35,  3.14s/it]

Total reward after episode 2399 is 1695.0


 24%|██▍       | 2400/10000 [1:45:05<6:36:11,  3.13s/it]

Total reward after episode 2400 is 1441.0


 24%|██▍       | 2401/10000 [1:45:07<5:36:09,  2.65s/it]

Total reward after episode 2401 is 635.0


 24%|██▍       | 2402/10000 [1:45:11<6:36:17,  3.13s/it]

Total reward after episode 2402 is 1929.0


 24%|██▍       | 2403/10000 [1:45:15<7:09:10,  3.39s/it]

Total reward after episode 2403 is 1712.0


 24%|██▍       | 2404/10000 [1:45:28<12:52:15,  6.10s/it]

Total reward after episode 2404 is 2586.0


 24%|██▍       | 2405/10000 [1:45:29<10:03:19,  4.77s/it]

Total reward after episode 2405 is 607.0


 24%|██▍       | 2406/10000 [1:45:34<9:48:05,  4.65s/it] 

Total reward after episode 2406 is 1926.0


 24%|██▍       | 2407/10000 [1:45:36<8:37:08,  4.09s/it]

Total reward after episode 2407 is 1344.0


 24%|██▍       | 2408/10000 [1:45:40<8:19:22,  3.95s/it]

Total reward after episode 2408 is 1693.0


 24%|██▍       | 2409/10000 [1:45:43<7:46:59,  3.69s/it]

Total reward after episode 2409 is 1440.0


 24%|██▍       | 2410/10000 [1:45:44<6:15:14,  2.97s/it]

Total reward after episode 2410 is 611.0


 24%|██▍       | 2411/10000 [1:45:47<6:08:58,  2.92s/it]

Total reward after episode 2411 is 1350.0


 24%|██▍       | 2412/10000 [1:45:50<6:07:21,  2.90s/it]

Total reward after episode 2412 is 1350.0


 24%|██▍       | 2413/10000 [1:45:52<5:47:50,  2.75s/it]

Total reward after episode 2413 is 1066.0


 24%|██▍       | 2414/10000 [1:45:56<5:58:11,  2.83s/it]

Total reward after episode 2414 is 1348.0


 24%|██▍       | 2415/10000 [1:45:59<6:29:57,  3.08s/it]

Total reward after episode 2415 is 1692.0


 24%|██▍       | 2416/10000 [1:46:02<6:38:01,  3.15s/it]

Total reward after episode 2416 is 1440.0


 24%|██▍       | 2417/10000 [1:46:05<6:05:05,  2.89s/it]

Total reward after episode 2417 is 1074.0


 24%|██▍       | 2418/10000 [1:46:08<6:04:40,  2.89s/it]

Total reward after episode 2418 is 1046.0


 24%|██▍       | 2419/10000 [1:46:11<6:31:54,  3.10s/it]

Total reward after episode 2419 is 1696.0


 24%|██▍       | 2420/10000 [1:46:15<6:40:53,  3.17s/it]

Total reward after episode 2420 is 1437.0


 24%|██▍       | 2421/10000 [1:46:18<6:35:06,  3.13s/it]

Total reward after episode 2421 is 1429.0


 24%|██▍       | 2422/10000 [1:46:21<6:50:19,  3.25s/it]

Total reward after episode 2422 is 1588.0


 24%|██▍       | 2423/10000 [1:46:26<7:57:20,  3.78s/it]

Total reward after episode 2423 is 1933.0


 24%|██▍       | 2424/10000 [1:46:30<7:41:48,  3.66s/it]

Total reward after episode 2424 is 1355.0


 24%|██▍       | 2425/10000 [1:46:32<7:12:44,  3.43s/it]

Total reward after episode 2425 is 1359.0


 24%|██▍       | 2426/10000 [1:46:36<7:29:54,  3.56s/it]

Total reward after episode 2426 is 1702.0


 24%|██▍       | 2427/10000 [1:46:40<7:30:15,  3.57s/it]

Total reward after episode 2427 is 1437.0


 24%|██▍       | 2428/10000 [1:46:44<7:48:11,  3.71s/it]

Total reward after episode 2428 is 1695.0


 24%|██▍       | 2429/10000 [1:46:47<7:23:31,  3.51s/it]

Total reward after episode 2429 is 1440.0


 24%|██▍       | 2430/10000 [1:46:50<6:54:51,  3.29s/it]

Total reward after episode 2430 is 818.0


 24%|██▍       | 2431/10000 [1:46:51<5:49:20,  2.77s/it]

Total reward after episode 2431 is 606.0


 24%|██▍       | 2432/10000 [1:46:52<4:28:06,  2.13s/it]

Total reward after episode 2432 is 250.0


 24%|██▍       | 2433/10000 [1:46:53<3:30:38,  1.67s/it]

Total reward after episode 2433 is 249.0


 24%|██▍       | 2434/10000 [1:46:56<4:33:34,  2.17s/it]

Total reward after episode 2434 is 1588.0


 24%|██▍       | 2435/10000 [1:46:58<4:49:12,  2.29s/it]

Total reward after episode 2435 is 1047.0


 24%|██▍       | 2436/10000 [1:47:02<5:19:40,  2.54s/it]

Total reward after episode 2436 is 1157.0


 24%|██▍       | 2437/10000 [1:47:04<5:30:51,  2.62s/it]

Total reward after episode 2437 is 1352.0


 24%|██▍       | 2438/10000 [1:47:08<5:54:32,  2.81s/it]

Total reward after episode 2438 is 1356.0


 24%|██▍       | 2439/10000 [1:47:11<5:59:49,  2.86s/it]

Total reward after episode 2439 is 1349.0


 24%|██▍       | 2440/10000 [1:47:11<4:34:59,  2.18s/it]

Total reward after episode 2440 is 251.0


 24%|██▍       | 2441/10000 [1:47:14<4:40:49,  2.23s/it]

Total reward after episode 2441 is 1049.0


 24%|██▍       | 2442/10000 [1:47:18<6:21:24,  3.03s/it]

Total reward after episode 2442 is 1842.0


 24%|██▍       | 2443/10000 [1:47:22<6:47:23,  3.23s/it]

Total reward after episode 2443 is 1581.0


 24%|██▍       | 2444/10000 [1:47:23<5:08:27,  2.45s/it]

Total reward after episode 2444 is 251.0


 24%|██▍       | 2445/10000 [1:47:26<5:23:36,  2.57s/it]

Total reward after episode 2445 is 1335.0


 24%|██▍       | 2446/10000 [1:47:26<4:09:51,  1.98s/it]

Total reward after episode 2446 is 251.0


 24%|██▍       | 2447/10000 [1:47:27<3:19:25,  1.58s/it]

Total reward after episode 2447 is 251.0


 24%|██▍       | 2448/10000 [1:47:31<5:07:21,  2.44s/it]

Total reward after episode 2448 is 1862.0


 24%|██▍       | 2449/10000 [1:47:35<5:52:16,  2.80s/it]

Total reward after episode 2449 is 1435.0


 24%|██▍       | 2450/10000 [1:47:39<6:20:25,  3.02s/it]

Total reward after episode 2450 is 1703.0


 25%|██▍       | 2451/10000 [1:47:42<6:20:33,  3.02s/it]

Total reward after episode 2451 is 1440.0


 25%|██▍       | 2452/10000 [1:47:45<6:31:29,  3.11s/it]

Total reward after episode 2452 is 1439.0


 25%|██▍       | 2453/10000 [1:47:48<6:40:29,  3.18s/it]

Total reward after episode 2453 is 1587.0


 25%|██▍       | 2454/10000 [1:47:51<6:38:27,  3.17s/it]

Total reward after episode 2454 is 1347.0


 25%|██▍       | 2455/10000 [1:48:02<11:37:53,  5.55s/it]

Total reward after episode 2455 is 3019.0


 25%|██▍       | 2456/10000 [1:48:06<10:28:30,  5.00s/it]

Total reward after episode 2456 is 1584.0


 25%|██▍       | 2457/10000 [1:48:11<10:14:29,  4.89s/it]

Total reward after episode 2457 is 1688.0


 25%|██▍       | 2458/10000 [1:48:14<9:14:26,  4.41s/it] 

Total reward after episode 2458 is 1586.0


 25%|██▍       | 2459/10000 [1:48:17<8:21:27,  3.99s/it]

Total reward after episode 2459 is 1331.0


 25%|██▍       | 2460/10000 [1:48:20<7:34:20,  3.62s/it]

Total reward after episode 2460 is 1354.0


 25%|██▍       | 2461/10000 [1:48:23<7:28:59,  3.57s/it]

Total reward after episode 2461 is 1588.0


 25%|██▍       | 2462/10000 [1:48:26<7:13:23,  3.45s/it]

Total reward after episode 2462 is 1347.0


 25%|██▍       | 2463/10000 [1:48:32<8:14:08,  3.93s/it]

Total reward after episode 2463 is 797.0


 25%|██▍       | 2464/10000 [1:48:35<7:44:46,  3.70s/it]

Total reward after episode 2464 is 1349.0


 25%|██▍       | 2465/10000 [1:48:38<7:12:07,  3.44s/it]

Total reward after episode 2465 is 1056.0


 25%|██▍       | 2466/10000 [1:48:41<7:05:10,  3.39s/it]

Total reward after episode 2466 is 1346.0


 25%|██▍       | 2467/10000 [1:48:41<5:21:00,  2.56s/it]

Total reward after episode 2467 is 251.0


 25%|██▍       | 2468/10000 [1:48:45<6:18:35,  3.02s/it]

Total reward after episode 2468 is 1926.0


 25%|██▍       | 2469/10000 [1:48:49<6:27:00,  3.08s/it]

Total reward after episode 2469 is 1330.0


 25%|██▍       | 2470/10000 [1:48:52<6:17:28,  3.01s/it]

Total reward after episode 2470 is 818.0


 25%|██▍       | 2471/10000 [1:48:52<4:48:40,  2.30s/it]

Total reward after episode 2471 is 252.0


 25%|██▍       | 2472/10000 [1:48:56<5:28:43,  2.62s/it]

Total reward after episode 2472 is 1586.0


 25%|██▍       | 2473/10000 [1:48:59<6:13:59,  2.98s/it]

Total reward after episode 2473 is 1861.0


 25%|██▍       | 2474/10000 [1:49:04<7:02:37,  3.37s/it]

Total reward after episode 2474 is 1924.0


 25%|██▍       | 2475/10000 [1:49:07<7:07:06,  3.41s/it]

Total reward after episode 2475 is 1707.0


 25%|██▍       | 2476/10000 [1:49:11<7:17:01,  3.49s/it]

Total reward after episode 2476 is 1425.0


 25%|██▍       | 2477/10000 [1:49:15<7:27:54,  3.57s/it]

Total reward after episode 2477 is 1697.0


 25%|██▍       | 2478/10000 [1:49:15<5:37:16,  2.69s/it]

Total reward after episode 2478 is 251.0


 25%|██▍       | 2479/10000 [1:49:18<5:57:17,  2.85s/it]

Total reward after episode 2479 is 1590.0


 25%|██▍       | 2480/10000 [1:49:22<6:24:35,  3.07s/it]

Total reward after episode 2480 is 1587.0


 25%|██▍       | 2481/10000 [1:49:25<6:30:07,  3.11s/it]

Total reward after episode 2481 is 1347.0


 25%|██▍       | 2482/10000 [1:49:27<5:38:54,  2.70s/it]

Total reward after episode 2482 is 632.0


 25%|██▍       | 2483/10000 [1:49:30<5:55:48,  2.84s/it]

Total reward after episode 2483 is 1439.0


 25%|██▍       | 2484/10000 [1:49:33<5:57:09,  2.85s/it]

Total reward after episode 2484 is 1350.0


 25%|██▍       | 2485/10000 [1:49:37<6:29:59,  3.11s/it]

Total reward after episode 2485 is 1326.0


 25%|██▍       | 2486/10000 [1:49:40<6:22:13,  3.05s/it]

Total reward after episode 2486 is 1354.0


 25%|██▍       | 2487/10000 [1:49:43<6:16:42,  3.01s/it]

Total reward after episode 2487 is 1334.0


 25%|██▍       | 2488/10000 [1:49:47<6:51:30,  3.29s/it]

Total reward after episode 2488 is 1699.0


 25%|██▍       | 2489/10000 [1:49:50<6:42:19,  3.21s/it]

Total reward after episode 2489 is 1441.0


 25%|██▍       | 2490/10000 [1:49:53<6:34:58,  3.16s/it]

Total reward after episode 2490 is 1438.0


 25%|██▍       | 2491/10000 [1:49:56<6:35:38,  3.16s/it]

Total reward after episode 2491 is 1357.0


 25%|██▍       | 2492/10000 [1:49:59<6:32:31,  3.14s/it]

Total reward after episode 2492 is 1437.0


 25%|██▍       | 2493/10000 [1:50:02<6:27:14,  3.10s/it]

Total reward after episode 2493 is 1349.0


 25%|██▍       | 2494/10000 [1:50:05<6:46:15,  3.25s/it]

Total reward after episode 2494 is 1588.0


 25%|██▍       | 2495/10000 [1:50:09<7:05:11,  3.40s/it]

Total reward after episode 2495 is 1691.0


 25%|██▍       | 2496/10000 [1:50:12<6:50:06,  3.28s/it]

Total reward after episode 2496 is 1350.0


 25%|██▍       | 2497/10000 [1:50:16<7:20:41,  3.52s/it]

Total reward after episode 2497 is 1926.0


 25%|██▍       | 2498/10000 [1:50:20<7:17:00,  3.50s/it]

Total reward after episode 2498 is 1588.0


 25%|██▍       | 2499/10000 [1:50:21<5:55:02,  2.84s/it]

Total reward after episode 2499 is 608.0


 25%|██▌       | 2500/10000 [1:50:24<6:12:51,  2.98s/it]

Total reward after episode 2500 is 1437.0


 25%|██▌       | 2501/10000 [1:50:28<6:38:32,  3.19s/it]

Total reward after episode 2501 is 1709.0


 25%|██▌       | 2502/10000 [1:50:31<6:36:07,  3.17s/it]

Total reward after episode 2502 is 1442.0


 25%|██▌       | 2503/10000 [1:50:35<6:57:54,  3.34s/it]

Total reward after episode 2503 is 1695.0


 25%|██▌       | 2504/10000 [1:50:37<6:29:14,  3.12s/it]

Total reward after episode 2504 is 1047.0


 25%|██▌       | 2505/10000 [1:50:40<6:21:53,  3.06s/it]

Total reward after episode 2505 is 1350.0


 25%|██▌       | 2506/10000 [1:50:43<6:11:50,  2.98s/it]

Total reward after episode 2506 is 1157.0


 25%|██▌       | 2507/10000 [1:50:46<6:14:37,  3.00s/it]

Total reward after episode 2507 is 1443.0


 25%|██▌       | 2508/10000 [1:50:49<6:18:29,  3.03s/it]

Total reward after episode 2508 is 1441.0


 25%|██▌       | 2509/10000 [1:50:53<6:48:03,  3.27s/it]

Total reward after episode 2509 is 1695.0


 25%|██▌       | 2510/10000 [1:50:56<6:22:52,  3.07s/it]

Total reward after episode 2510 is 1048.0


 25%|██▌       | 2511/10000 [1:50:57<5:17:45,  2.55s/it]

Total reward after episode 2511 is 606.0


 25%|██▌       | 2512/10000 [1:51:00<5:41:49,  2.74s/it]

Total reward after episode 2512 is 1438.0


 25%|██▌       | 2513/10000 [1:51:04<6:23:50,  3.08s/it]

Total reward after episode 2513 is 1434.0


 25%|██▌       | 2514/10000 [1:51:05<5:16:42,  2.54s/it]

Total reward after episode 2514 is 607.0


 25%|██▌       | 2515/10000 [1:51:10<6:38:19,  3.19s/it]

Total reward after episode 2515 is 1925.0


 25%|██▌       | 2516/10000 [1:51:13<6:29:48,  3.13s/it]

Total reward after episode 2516 is 1349.0


 25%|██▌       | 2517/10000 [1:51:16<6:21:32,  3.06s/it]

Total reward after episode 2517 is 1331.0


 25%|██▌       | 2518/10000 [1:51:18<5:53:09,  2.83s/it]

Total reward after episode 2518 is 1077.0


 25%|██▌       | 2519/10000 [1:51:21<6:03:26,  2.91s/it]

Total reward after episode 2519 is 1348.0


 25%|██▌       | 2520/10000 [1:51:24<6:03:04,  2.91s/it]

Total reward after episode 2520 is 1346.0


 25%|██▌       | 2521/10000 [1:51:27<5:51:13,  2.82s/it]

Total reward after episode 2521 is 1153.0


 25%|██▌       | 2522/10000 [1:51:30<6:03:55,  2.92s/it]

Total reward after episode 2522 is 1356.0


 25%|██▌       | 2523/10000 [1:51:33<5:56:49,  2.86s/it]

Total reward after episode 2523 is 1043.0


 25%|██▌       | 2524/10000 [1:51:33<4:33:29,  2.20s/it]

Total reward after episode 2524 is 251.0


 25%|██▌       | 2525/10000 [1:51:35<4:01:13,  1.94s/it]

Total reward after episode 2525 is 607.0


 25%|██▌       | 2526/10000 [1:51:35<3:12:04,  1.54s/it]

Total reward after episode 2526 is 248.0


 25%|██▌       | 2527/10000 [1:51:37<3:14:08,  1.56s/it]

Total reward after episode 2527 is 654.0


 25%|██▌       | 2528/10000 [1:51:40<3:57:29,  1.91s/it]

Total reward after episode 2528 is 819.0


 25%|██▌       | 2529/10000 [1:51:43<4:30:48,  2.17s/it]

Total reward after episode 2529 is 1350.0


 25%|██▌       | 2530/10000 [1:51:44<3:58:19,  1.91s/it]

Total reward after episode 2530 is 608.0


 25%|██▌       | 2531/10000 [1:51:47<4:57:05,  2.39s/it]

Total reward after episode 2531 is 1351.0


 25%|██▌       | 2532/10000 [1:51:56<8:34:06,  4.13s/it]

Total reward after episode 2532 is 2623.0


 25%|██▌       | 2533/10000 [1:51:58<7:50:24,  3.78s/it]

Total reward after episode 2533 is 1439.0


 25%|██▌       | 2534/10000 [1:52:02<7:24:08,  3.57s/it]

Total reward after episode 2534 is 1348.0


 25%|██▌       | 2535/10000 [1:52:06<8:07:31,  3.92s/it]

Total reward after episode 2535 is 1925.0


 25%|██▌       | 2536/10000 [1:52:10<8:05:40,  3.90s/it]

Total reward after episode 2536 is 1695.0


 25%|██▌       | 2537/10000 [1:52:14<8:10:01,  3.94s/it]

Total reward after episode 2537 is 1925.0


 25%|██▌       | 2538/10000 [1:52:17<7:25:58,  3.59s/it]

Total reward after episode 2538 is 1335.0


 25%|██▌       | 2539/10000 [1:52:21<7:59:45,  3.86s/it]

Total reward after episode 2539 is 1926.0


 25%|██▌       | 2540/10000 [1:52:25<8:00:10,  3.86s/it]

Total reward after episode 2540 is 1585.0


 25%|██▌       | 2541/10000 [1:52:28<7:05:32,  3.42s/it]

Total reward after episode 2541 is 1049.0


 25%|██▌       | 2542/10000 [1:52:31<6:56:40,  3.35s/it]

Total reward after episode 2542 is 1438.0


 25%|██▌       | 2543/10000 [1:52:32<5:37:45,  2.72s/it]

Total reward after episode 2543 is 606.0


 25%|██▌       | 2544/10000 [1:52:33<4:23:51,  2.12s/it]

Total reward after episode 2544 is 232.0


 25%|██▌       | 2545/10000 [1:52:36<5:16:45,  2.55s/it]

Total reward after episode 2545 is 1707.0


 25%|██▌       | 2546/10000 [1:52:47<10:34:02,  5.10s/it]

Total reward after episode 2546 is 3018.0


 25%|██▌       | 2547/10000 [1:52:50<8:51:15,  4.28s/it] 

Total reward after episode 2547 is 1046.0


 25%|██▌       | 2548/10000 [1:52:53<7:56:43,  3.84s/it]

Total reward after episode 2548 is 1333.0


 25%|██▌       | 2549/10000 [1:52:56<7:23:18,  3.57s/it]

Total reward after episode 2549 is 1430.0


 26%|██▌       | 2550/10000 [1:52:59<7:27:04,  3.60s/it]

Total reward after episode 2550 is 1339.0


 26%|██▌       | 2551/10000 [1:53:01<5:59:15,  2.89s/it]

Total reward after episode 2551 is 608.0


 26%|██▌       | 2552/10000 [1:53:04<6:32:59,  3.17s/it]

Total reward after episode 2552 is 1583.0


 26%|██▌       | 2553/10000 [1:53:08<7:09:58,  3.46s/it]

Total reward after episode 2553 is 1871.0


 26%|██▌       | 2554/10000 [1:53:12<7:03:25,  3.41s/it]

Total reward after episode 2554 is 1441.0


 26%|██▌       | 2555/10000 [1:53:15<7:10:11,  3.47s/it]

Total reward after episode 2555 is 1318.0


 26%|██▌       | 2556/10000 [1:53:19<6:58:58,  3.38s/it]

Total reward after episode 2556 is 1346.0


 26%|██▌       | 2557/10000 [1:53:22<7:04:03,  3.42s/it]

Total reward after episode 2557 is 1699.0


 26%|██▌       | 2558/10000 [1:53:25<6:37:11,  3.20s/it]

Total reward after episode 2558 is 1334.0


 26%|██▌       | 2559/10000 [1:53:29<7:24:03,  3.58s/it]

Total reward after episode 2559 is 1858.0


 26%|██▌       | 2560/10000 [1:53:33<7:15:41,  3.51s/it]

Total reward after episode 2560 is 1708.0


 26%|██▌       | 2561/10000 [1:53:35<6:42:59,  3.25s/it]

Total reward after episode 2561 is 1155.0


 26%|██▌       | 2562/10000 [1:53:39<7:08:00,  3.45s/it]

Total reward after episode 2562 is 1703.0


 26%|██▌       | 2563/10000 [1:53:43<7:13:13,  3.50s/it]

Total reward after episode 2563 is 1700.0


 26%|██▌       | 2564/10000 [1:53:46<6:50:06,  3.31s/it]

Total reward after episode 2564 is 1333.0


 26%|██▌       | 2565/10000 [1:53:49<6:45:00,  3.27s/it]

Total reward after episode 2565 is 742.0


 26%|██▌       | 2566/10000 [1:53:53<7:24:30,  3.59s/it]

Total reward after episode 2566 is 1928.0


 26%|██▌       | 2567/10000 [1:53:57<7:29:35,  3.63s/it]

Total reward after episode 2567 is 1688.0


 26%|██▌       | 2568/10000 [1:53:57<5:35:54,  2.71s/it]

Total reward after episode 2568 is 247.0


 26%|██▌       | 2569/10000 [1:54:00<5:35:18,  2.71s/it]

Total reward after episode 2569 is 1359.0


 26%|██▌       | 2570/10000 [1:54:03<5:25:14,  2.63s/it]

Total reward after episode 2570 is 1047.0


 26%|██▌       | 2571/10000 [1:54:07<6:24:27,  3.11s/it]

Total reward after episode 2571 is 1859.0


 26%|██▌       | 2572/10000 [1:54:11<7:01:11,  3.40s/it]

Total reward after episode 2572 is 1860.0


 26%|██▌       | 2573/10000 [1:54:14<6:52:13,  3.33s/it]

Total reward after episode 2573 is 1426.0


 26%|██▌       | 2574/10000 [1:54:17<6:40:47,  3.24s/it]

Total reward after episode 2574 is 1437.0


 26%|██▌       | 2575/10000 [1:54:21<7:01:03,  3.40s/it]

Total reward after episode 2575 is 1694.0


 26%|██▌       | 2576/10000 [1:54:24<7:07:43,  3.46s/it]

Total reward after episode 2576 is 1427.0


 26%|██▌       | 2577/10000 [1:54:26<5:44:22,  2.78s/it]

Total reward after episode 2577 is 608.0


 26%|██▌       | 2578/10000 [1:54:26<4:23:04,  2.13s/it]

Total reward after episode 2578 is 245.0


 26%|██▌       | 2579/10000 [1:54:29<4:35:25,  2.23s/it]

Total reward after episode 2579 is 1151.0


 26%|██▌       | 2580/10000 [1:54:30<4:09:54,  2.02s/it]

Total reward after episode 2580 is 639.0


 26%|██▌       | 2581/10000 [1:54:34<5:25:54,  2.64s/it]

Total reward after episode 2581 is 1861.0


 26%|██▌       | 2582/10000 [1:54:37<5:33:17,  2.70s/it]

Total reward after episode 2582 is 1350.0


 26%|██▌       | 2583/10000 [1:54:45<8:43:22,  4.23s/it]

Total reward after episode 2583 is 3046.0


 26%|██▌       | 2584/10000 [1:54:48<8:00:53,  3.89s/it]

Total reward after episode 2584 is 1437.0


 26%|██▌       | 2585/10000 [1:54:50<7:04:53,  3.44s/it]

Total reward after episode 2585 is 1057.0


 26%|██▌       | 2586/10000 [1:54:54<6:55:02,  3.36s/it]

Total reward after episode 2586 is 1588.0


 26%|██▌       | 2587/10000 [1:54:58<7:31:34,  3.66s/it]

Total reward after episode 2587 is 1426.0


 26%|██▌       | 2588/10000 [1:55:02<8:02:42,  3.91s/it]

Total reward after episode 2588 is 1920.0


 26%|██▌       | 2589/10000 [1:55:06<7:33:48,  3.67s/it]

Total reward after episode 2589 is 1346.0


 26%|██▌       | 2590/10000 [1:55:08<6:43:29,  3.27s/it]

Total reward after episode 2590 is 1060.0


 26%|██▌       | 2591/10000 [1:55:10<6:05:57,  2.96s/it]

Total reward after episode 2591 is 1058.0


 26%|██▌       | 2592/10000 [1:55:13<5:57:37,  2.90s/it]

Total reward after episode 2592 is 1339.0


 26%|██▌       | 2593/10000 [1:55:17<6:37:26,  3.22s/it]

Total reward after episode 2593 is 1716.0


 26%|██▌       | 2594/10000 [1:55:20<6:37:26,  3.22s/it]

Total reward after episode 2594 is 1437.0


 26%|██▌       | 2595/10000 [1:55:27<8:50:22,  4.30s/it]

Total reward after episode 2595 is 2354.0


 26%|██▌       | 2596/10000 [1:55:28<6:59:40,  3.40s/it]

Total reward after episode 2596 is 611.0


 26%|██▌       | 2597/10000 [1:55:32<7:03:46,  3.43s/it]

Total reward after episode 2597 is 1696.0


 26%|██▌       | 2598/10000 [1:55:35<6:57:19,  3.38s/it]

Total reward after episode 2598 is 1351.0


 26%|██▌       | 2599/10000 [1:55:38<6:33:40,  3.19s/it]

Total reward after episode 2599 is 1338.0


 26%|██▌       | 2600/10000 [1:55:39<5:21:24,  2.61s/it]

Total reward after episode 2600 is 606.0


 26%|██▌       | 2601/10000 [1:55:40<4:31:15,  2.20s/it]

Total reward after episode 2601 is 608.0


 26%|██▌       | 2602/10000 [1:55:43<5:11:11,  2.52s/it]

Total reward after episode 2602 is 1331.0


 26%|██▌       | 2603/10000 [1:55:45<4:24:06,  2.14s/it]

Total reward after episode 2603 is 607.0


 26%|██▌       | 2604/10000 [1:55:46<3:50:58,  1.87s/it]

Total reward after episode 2604 is 608.0


 26%|██▌       | 2605/10000 [1:55:56<9:10:45,  4.47s/it]

Total reward after episode 2605 is 3024.0


 26%|██▌       | 2606/10000 [1:56:00<8:26:12,  4.11s/it]

Total reward after episode 2606 is 1438.0


 26%|██▌       | 2607/10000 [1:56:02<7:18:32,  3.56s/it]

Total reward after episode 2607 is 1047.0


 26%|██▌       | 2608/10000 [1:56:05<6:57:01,  3.38s/it]

Total reward after episode 2608 is 1350.0


 26%|██▌       | 2609/10000 [1:56:08<6:35:03,  3.21s/it]

Total reward after episode 2609 is 1336.0


 26%|██▌       | 2610/10000 [1:56:11<6:46:33,  3.30s/it]

Total reward after episode 2610 is 1709.0


 26%|██▌       | 2611/10000 [1:56:14<6:28:05,  3.15s/it]

Total reward after episode 2611 is 1334.0


 26%|██▌       | 2612/10000 [1:56:17<6:21:00,  3.09s/it]

Total reward after episode 2612 is 1350.0


 26%|██▌       | 2613/10000 [1:56:28<10:59:46,  5.36s/it]

Total reward after episode 2613 is 3022.0


 26%|██▌       | 2614/10000 [1:56:32<10:06:36,  4.93s/it]

Total reward after episode 2614 is 1931.0


 26%|██▌       | 2615/10000 [1:56:33<7:51:20,  3.83s/it] 

Total reward after episode 2615 is 608.0


 26%|██▌       | 2616/10000 [1:56:37<8:06:22,  3.95s/it]

Total reward after episode 2616 is 1860.0


 26%|██▌       | 2617/10000 [1:56:38<6:27:14,  3.15s/it]

Total reward after episode 2617 is 611.0


 26%|██▌       | 2618/10000 [1:56:41<5:53:43,  2.88s/it]

Total reward after episode 2618 is 1049.0


 26%|██▌       | 2619/10000 [1:56:42<4:55:07,  2.40s/it]

Total reward after episode 2619 is 606.0


 26%|██▌       | 2620/10000 [1:56:46<6:03:42,  2.96s/it]

Total reward after episode 2620 is 1922.0


 26%|██▌       | 2621/10000 [1:56:50<6:30:34,  3.18s/it]

Total reward after episode 2621 is 1431.0


 26%|██▌       | 2622/10000 [1:56:53<6:39:27,  3.25s/it]

Total reward after episode 2622 is 1331.0


 26%|██▌       | 2623/10000 [1:56:57<6:46:30,  3.31s/it]

Total reward after episode 2623 is 1335.0


 26%|██▌       | 2624/10000 [1:56:58<5:38:53,  2.76s/it]

Total reward after episode 2624 is 627.0


 26%|██▋       | 2625/10000 [1:57:00<4:44:20,  2.31s/it]

Total reward after episode 2625 is 610.0


 26%|██▋       | 2626/10000 [1:57:06<7:11:31,  3.51s/it]

Total reward after episode 2626 is 2643.0


 26%|██▋       | 2627/10000 [1:57:09<6:46:48,  3.31s/it]

Total reward after episode 2627 is 1347.0


 26%|██▋       | 2628/10000 [1:57:12<6:35:10,  3.22s/it]

Total reward after episode 2628 is 1333.0


 26%|██▋       | 2629/10000 [1:57:15<6:22:26,  3.11s/it]

Total reward after episode 2629 is 1340.0


 26%|██▋       | 2630/10000 [1:57:17<6:00:56,  2.94s/it]

Total reward after episode 2630 is 1071.0


 26%|██▋       | 2631/10000 [1:57:21<6:50:08,  3.34s/it]

Total reward after episode 2631 is 1925.0


 26%|██▋       | 2632/10000 [1:57:24<6:30:27,  3.18s/it]

Total reward after episode 2632 is 1350.0


 26%|██▋       | 2633/10000 [1:57:25<5:19:14,  2.60s/it]

Total reward after episode 2633 is 609.0


 26%|██▋       | 2634/10000 [1:57:27<4:29:29,  2.20s/it]

Total reward after episode 2634 is 608.0


 26%|██▋       | 2635/10000 [1:57:34<7:31:31,  3.68s/it]

Total reward after episode 2635 is 2640.0


 26%|██▋       | 2636/10000 [1:57:36<6:53:30,  3.37s/it]

Total reward after episode 2636 is 819.0


 26%|██▋       | 2637/10000 [1:57:38<5:36:25,  2.74s/it]

Total reward after episode 2637 is 611.0


 26%|██▋       | 2638/10000 [1:57:42<6:40:40,  3.27s/it]

Total reward after episode 2638 is 1687.0


 26%|██▋       | 2639/10000 [1:57:45<6:08:30,  3.00s/it]

Total reward after episode 2639 is 1046.0


 26%|██▋       | 2640/10000 [1:57:48<6:24:57,  3.14s/it]

Total reward after episode 2640 is 1344.0


 26%|██▋       | 2641/10000 [1:57:52<6:56:12,  3.39s/it]

Total reward after episode 2641 is 1693.0


 26%|██▋       | 2642/10000 [1:57:53<5:13:42,  2.56s/it]

Total reward after episode 2642 is 251.0


 26%|██▋       | 2643/10000 [1:57:56<5:29:46,  2.69s/it]

Total reward after episode 2643 is 1355.0


 26%|██▋       | 2644/10000 [1:57:58<5:35:03,  2.73s/it]

Total reward after episode 2644 is 1347.0


 26%|██▋       | 2645/10000 [1:58:06<8:22:44,  4.10s/it]

Total reward after episode 2645 is 3054.0


 26%|██▋       | 2646/10000 [1:58:07<6:47:58,  3.33s/it]

Total reward after episode 2646 is 635.0


 26%|██▋       | 2647/10000 [1:58:11<7:06:55,  3.48s/it]

Total reward after episode 2647 is 1717.0


 26%|██▋       | 2648/10000 [1:58:14<6:43:00,  3.29s/it]

Total reward after episode 2648 is 1331.0


 26%|██▋       | 2649/10000 [1:58:18<6:59:11,  3.42s/it]

Total reward after episode 2649 is 1697.0


 26%|██▋       | 2650/10000 [1:58:20<6:21:21,  3.11s/it]

Total reward after episode 2650 is 1055.0


 27%|██▋       | 2651/10000 [1:58:21<4:50:07,  2.37s/it]

Total reward after episode 2651 is 248.0


 27%|██▋       | 2652/10000 [1:58:24<5:11:30,  2.54s/it]

Total reward after episode 2652 is 1357.0


 27%|██▋       | 2653/10000 [1:58:27<5:27:50,  2.68s/it]

Total reward after episode 2653 is 1354.0


 27%|██▋       | 2654/10000 [1:58:29<5:14:38,  2.57s/it]

Total reward after episode 2654 is 1046.0


 27%|██▋       | 2655/10000 [1:58:32<5:28:07,  2.68s/it]

Total reward after episode 2655 is 1440.0


 27%|██▋       | 2656/10000 [1:58:33<4:36:59,  2.26s/it]

Total reward after episode 2656 is 606.0


 27%|██▋       | 2657/10000 [1:58:34<3:36:29,  1.77s/it]

Total reward after episode 2657 is 251.0


 27%|██▋       | 2658/10000 [1:58:35<3:18:04,  1.62s/it]

Total reward after episode 2658 is 611.0


 27%|██▋       | 2659/10000 [1:58:38<4:08:17,  2.03s/it]

Total reward after episode 2659 is 1438.0


 27%|██▋       | 2660/10000 [1:58:41<4:49:43,  2.37s/it]

Total reward after episode 2660 is 1437.0


 27%|██▋       | 2661/10000 [1:58:44<5:06:03,  2.50s/it]

Total reward after episode 2661 is 1325.0


 27%|██▋       | 2662/10000 [1:59:25<28:20:49, 13.91s/it]

Total reward after episode 2662 is 382.0


 27%|██▋       | 2663/10000 [1:59:28<21:44:10, 10.67s/it]

Total reward after episode 2663 is 1440.0


 27%|██▋       | 2664/10000 [1:59:30<16:36:52,  8.15s/it]

Total reward after episode 2664 is 1049.0


 27%|██▋       | 2665/10000 [2:00:10<36:11:44, 17.76s/it]

Total reward after episode 2665 is 991.0


 27%|██▋       | 2666/10000 [2:00:12<26:45:04, 13.13s/it]

Total reward after episode 2666 is 1075.0


 27%|██▋       | 2667/10000 [2:00:16<20:39:44, 10.14s/it]

Total reward after episode 2667 is 1588.0


 27%|██▋       | 2668/10000 [2:00:16<14:50:44,  7.29s/it]

Total reward after episode 2668 is 250.0


 27%|██▋       | 2669/10000 [2:00:21<13:12:11,  6.48s/it]

Total reward after episode 2669 is 1850.0


 27%|██▋       | 2670/10000 [2:00:22<9:38:01,  4.73s/it] 

Total reward after episode 2670 is 249.0


 27%|██▋       | 2671/10000 [2:00:24<8:32:50,  4.20s/it]

Total reward after episode 2671 is 1442.0


 27%|██▋       | 2672/10000 [2:00:28<7:59:10,  3.92s/it]

Total reward after episode 2672 is 1326.0


 27%|██▋       | 2673/10000 [2:00:29<6:23:25,  3.14s/it]

Total reward after episode 2673 is 608.0


 27%|██▋       | 2674/10000 [2:00:32<6:26:39,  3.17s/it]

Total reward after episode 2674 is 1348.0


 27%|██▋       | 2675/10000 [2:00:34<5:20:39,  2.63s/it]

Total reward after episode 2675 is 610.0


 27%|██▋       | 2676/10000 [2:00:36<5:24:45,  2.66s/it]

Total reward after episode 2676 is 1160.0


 27%|██▋       | 2677/10000 [2:00:39<5:38:10,  2.77s/it]

Total reward after episode 2677 is 1439.0


 27%|██▋       | 2678/10000 [2:00:42<5:41:17,  2.80s/it]

Total reward after episode 2678 is 1352.0


 27%|██▋       | 2679/10000 [2:00:43<4:21:45,  2.15s/it]

Total reward after episode 2679 is 249.0


 27%|██▋       | 2680/10000 [2:00:47<5:21:57,  2.64s/it]

Total reward after episode 2680 is 1706.0


 27%|██▋       | 2681/10000 [2:00:48<4:45:02,  2.34s/it]

Total reward after episode 2681 is 613.0


 27%|██▋       | 2682/10000 [2:00:52<5:24:47,  2.66s/it]

Total reward after episode 2682 is 1434.0


 27%|██▋       | 2683/10000 [2:00:56<6:11:25,  3.05s/it]

Total reward after episode 2683 is 1885.0


 27%|██▋       | 2684/10000 [2:00:59<6:09:21,  3.03s/it]

Total reward after episode 2684 is 1348.0


 27%|██▋       | 2685/10000 [2:01:02<6:08:30,  3.02s/it]

Total reward after episode 2685 is 1438.0


 27%|██▋       | 2686/10000 [2:01:04<5:59:50,  2.95s/it]

Total reward after episode 2686 is 1344.0


 27%|██▋       | 2687/10000 [2:01:06<5:01:28,  2.47s/it]

Total reward after episode 2687 is 610.0


 27%|██▋       | 2688/10000 [2:01:09<5:41:56,  2.81s/it]

Total reward after episode 2688 is 1709.0


 27%|██▋       | 2689/10000 [2:01:12<5:47:15,  2.85s/it]

Total reward after episode 2689 is 1436.0


 27%|██▋       | 2690/10000 [2:01:14<4:48:42,  2.37s/it]

Total reward after episode 2690 is 608.0


 27%|██▋       | 2691/10000 [2:01:15<4:07:21,  2.03s/it]

Total reward after episode 2691 is 608.0


 27%|██▋       | 2692/10000 [2:01:19<5:10:08,  2.55s/it]

Total reward after episode 2692 is 1697.0


 27%|██▋       | 2693/10000 [2:01:22<5:55:49,  2.92s/it]

Total reward after episode 2693 is 1865.0


 27%|██▋       | 2694/10000 [2:01:24<4:54:20,  2.42s/it]

Total reward after episode 2694 is 608.0


 27%|██▋       | 2695/10000 [2:01:27<5:32:35,  2.73s/it]

Total reward after episode 2695 is 1704.0


 27%|██▋       | 2696/10000 [2:01:28<4:42:56,  2.32s/it]

Total reward after episode 2696 is 607.0


 27%|██▋       | 2697/10000 [2:01:30<4:02:21,  1.99s/it]

Total reward after episode 2697 is 608.0


 27%|██▋       | 2698/10000 [2:01:31<3:34:22,  1.76s/it]

Total reward after episode 2698 is 608.0


 27%|██▋       | 2699/10000 [2:01:32<2:51:49,  1.41s/it]

Total reward after episode 2699 is 248.0


 27%|██▋       | 2700/10000 [2:01:33<2:45:01,  1.36s/it]

Total reward after episode 2700 is 607.0


 27%|██▋       | 2701/10000 [2:01:33<2:18:23,  1.14s/it]

Total reward after episode 2701 is 247.0


 27%|██▋       | 2702/10000 [2:01:36<3:18:18,  1.63s/it]

Total reward after episode 2702 is 1348.0


 27%|██▋       | 2703/10000 [2:01:40<4:40:32,  2.31s/it]

Total reward after episode 2703 is 1699.0


 27%|██▋       | 2704/10000 [2:01:46<6:42:14,  3.31s/it]

Total reward after episode 2704 is 2366.0


 27%|██▋       | 2705/10000 [2:01:58<11:52:55,  5.86s/it]

Total reward after episode 2705 is 2594.0


 27%|██▋       | 2706/10000 [2:02:00<10:05:40,  4.98s/it]

Total reward after episode 2706 is 598.0


 27%|██▋       | 2707/10000 [2:02:02<7:50:10,  3.87s/it] 

Total reward after episode 2707 is 608.0


 27%|██▋       | 2708/10000 [2:02:05<7:15:16,  3.58s/it]

Total reward after episode 2708 is 1357.0


 27%|██▋       | 2709/10000 [2:02:05<5:29:01,  2.71s/it]

Total reward after episode 2709 is 243.0


 27%|██▋       | 2710/10000 [2:02:08<5:42:10,  2.82s/it]

Total reward after episode 2710 is 1042.0


 27%|██▋       | 2711/10000 [2:02:10<4:53:03,  2.41s/it]

Total reward after episode 2711 is 611.0


 27%|██▋       | 2712/10000 [2:02:13<5:24:42,  2.67s/it]

Total reward after episode 2712 is 1347.0


 27%|██▋       | 2713/10000 [2:02:56<30:03:16, 14.85s/it]

Total reward after episode 2713 is 267.0


 27%|██▋       | 2714/10000 [2:03:00<23:23:22, 11.56s/it]

Total reward after episode 2714 is 1584.0


 27%|██▋       | 2715/10000 [2:03:03<18:13:49,  9.01s/it]

Total reward after episode 2715 is 1042.0


 27%|██▋       | 2716/10000 [2:03:06<14:40:37,  7.25s/it]

Total reward after episode 2716 is 1331.0


 27%|██▋       | 2717/10000 [2:03:14<14:47:31,  7.31s/it]

Total reward after episode 2717 is 2344.0


 27%|██▋       | 2718/10000 [2:03:17<12:21:19,  6.11s/it]

Total reward after episode 2718 is 1587.0


 27%|██▋       | 2719/10000 [2:03:22<11:36:26,  5.74s/it]

Total reward after episode 2719 is 1423.0


 27%|██▋       | 2720/10000 [2:03:37<17:15:46,  8.54s/it]

Total reward after episode 2720 is 2560.0


 27%|██▋       | 2721/10000 [2:03:41<14:19:28,  7.08s/it]

Total reward after episode 2721 is 1582.0


 27%|██▋       | 2722/10000 [2:03:51<16:28:47,  8.15s/it]

Total reward after episode 2722 is 2603.0


 27%|██▋       | 2723/10000 [2:04:08<21:42:07, 10.74s/it]

Total reward after episode 2723 is 2964.0


 27%|██▋       | 2724/10000 [2:04:09<15:33:37,  7.70s/it]

Total reward after episode 2724 is 249.0


 27%|██▋       | 2725/10000 [2:04:12<13:02:56,  6.46s/it]

Total reward after episode 2725 is 1432.0


 27%|██▋       | 2726/10000 [2:04:16<11:07:24,  5.51s/it]

Total reward after episode 2726 is 1438.0


 27%|██▋       | 2727/10000 [2:04:19<9:58:41,  4.94s/it] 

Total reward after episode 2727 is 1584.0


 27%|██▋       | 2728/10000 [2:04:21<7:49:39,  3.88s/it]

Total reward after episode 2728 is 627.0


 27%|██▋       | 2729/10000 [2:04:22<6:19:42,  3.13s/it]

Total reward after episode 2729 is 627.0


 27%|██▋       | 2730/10000 [2:04:26<6:54:19,  3.42s/it]

Total reward after episode 2730 is 1923.0


 27%|██▋       | 2731/10000 [2:04:27<5:35:04,  2.77s/it]

Total reward after episode 2731 is 608.0


 27%|██▋       | 2732/10000 [2:04:32<6:28:04,  3.20s/it]

Total reward after episode 2732 is 1921.0


 27%|██▋       | 2733/10000 [2:04:36<7:00:08,  3.47s/it]

Total reward after episode 2733 is 1868.0


 27%|██▋       | 2734/10000 [2:04:39<6:44:05,  3.34s/it]

Total reward after episode 2734 is 1348.0


 27%|██▋       | 2735/10000 [2:04:43<7:17:03,  3.61s/it]

Total reward after episode 2735 is 1698.0


 27%|██▋       | 2736/10000 [2:04:49<8:31:09,  4.22s/it]

Total reward after episode 2736 is 2359.0


 27%|██▋       | 2737/10000 [2:04:52<7:43:27,  3.83s/it]

Total reward after episode 2737 is 1348.0


 27%|██▋       | 2738/10000 [2:04:55<7:24:21,  3.67s/it]

Total reward after episode 2738 is 1436.0


 27%|██▋       | 2739/10000 [2:04:56<5:32:57,  2.75s/it]

Total reward after episode 2739 is 249.0


 27%|██▋       | 2740/10000 [2:04:56<4:14:59,  2.11s/it]

Total reward after episode 2740 is 251.0


 27%|██▋       | 2741/10000 [2:04:59<4:59:23,  2.47s/it]

Total reward after episode 2741 is 1328.0


 27%|██▋       | 2742/10000 [2:05:01<4:21:24,  2.16s/it]

Total reward after episode 2742 is 609.0


 27%|██▋       | 2743/10000 [2:05:02<3:26:11,  1.70s/it]

Total reward after episode 2743 is 249.0


 27%|██▋       | 2744/10000 [2:05:12<8:57:22,  4.44s/it]

Total reward after episode 2744 is 3021.0


 27%|██▋       | 2745/10000 [2:05:16<8:32:47,  4.24s/it]

Total reward after episode 2745 is 1706.0


 27%|██▋       | 2746/10000 [2:05:19<7:44:28,  3.84s/it]

Total reward after episode 2746 is 1440.0


 27%|██▋       | 2747/10000 [2:05:22<7:30:00,  3.72s/it]

Total reward after episode 2747 is 1438.0


 27%|██▋       | 2748/10000 [2:05:27<7:42:26,  3.83s/it]

Total reward after episode 2748 is 1860.0


 27%|██▋       | 2749/10000 [2:05:30<7:43:26,  3.83s/it]

Total reward after episode 2749 is 1867.0


 28%|██▊       | 2750/10000 [2:05:53<18:48:03,  9.34s/it]

Total reward after episode 2750 is 2912.0


 28%|██▊       | 2751/10000 [2:05:56<15:17:25,  7.59s/it]

Total reward after episode 2751 is 1435.0


 28%|██▊       | 2752/10000 [2:06:00<13:08:20,  6.53s/it]

Total reward after episode 2752 is 1427.0


 28%|██▊       | 2753/10000 [2:06:04<11:32:46,  5.74s/it]

Total reward after episode 2753 is 1871.0


 28%|██▊       | 2754/10000 [2:06:07<9:58:31,  4.96s/it] 

Total reward after episode 2754 is 1349.0


 28%|██▊       | 2755/10000 [2:06:10<8:55:24,  4.43s/it]

Total reward after episode 2755 is 1348.0


 28%|██▊       | 2756/10000 [2:06:11<6:37:35,  3.29s/it]

Total reward after episode 2756 is 246.0


 28%|██▊       | 2757/10000 [2:06:14<6:26:19,  3.20s/it]

Total reward after episode 2757 is 1437.0


 28%|██▊       | 2758/10000 [2:06:19<7:47:49,  3.88s/it]

Total reward after episode 2758 is 1323.0


 28%|██▊       | 2759/10000 [2:06:22<7:13:34,  3.59s/it]

Total reward after episode 2759 is 1438.0


 28%|██▊       | 2760/10000 [2:06:27<7:36:44,  3.79s/it]

Total reward after episode 2760 is 1703.0


 28%|██▊       | 2761/10000 [2:06:30<7:26:11,  3.70s/it]

Total reward after episode 2761 is 1329.0


 28%|██▊       | 2762/10000 [2:06:33<6:59:20,  3.48s/it]

Total reward after episode 2762 is 1338.0


 28%|██▊       | 2763/10000 [2:06:36<6:23:56,  3.18s/it]

Total reward after episode 2763 is 1047.0


 28%|██▊       | 2764/10000 [2:06:40<6:50:56,  3.41s/it]

Total reward after episode 2764 is 1865.0


 28%|██▊       | 2765/10000 [2:06:43<6:46:53,  3.37s/it]

Total reward after episode 2765 is 1348.0


 28%|██▊       | 2766/10000 [2:06:47<7:15:47,  3.61s/it]

Total reward after episode 2766 is 1932.0


 28%|██▊       | 2767/10000 [2:06:50<7:05:12,  3.53s/it]

Total reward after episode 2767 is 1439.0


 28%|██▊       | 2768/10000 [2:06:54<7:02:40,  3.51s/it]

Total reward after episode 2768 is 1452.0


 28%|██▊       | 2769/10000 [2:06:58<7:28:31,  3.72s/it]

Total reward after episode 2769 is 1338.0


 28%|██▊       | 2770/10000 [2:07:03<8:28:26,  4.22s/it]

Total reward after episode 2770 is 2370.0


 28%|██▊       | 2771/10000 [2:07:10<10:10:54,  5.07s/it]

Total reward after episode 2771 is 2352.0


 28%|██▊       | 2772/10000 [2:07:13<8:34:53,  4.27s/it] 

Total reward after episode 2772 is 1164.0


 28%|██▊       | 2773/10000 [2:07:17<8:26:00,  4.20s/it]

Total reward after episode 2773 is 1691.0


 28%|██▊       | 2774/10000 [2:07:20<7:38:04,  3.80s/it]

Total reward after episode 2774 is 1346.0


 28%|██▊       | 2775/10000 [2:07:23<7:35:57,  3.79s/it]

Total reward after episode 2775 is 1442.0


 28%|██▊       | 2776/10000 [2:07:31<9:51:59,  4.92s/it]

Total reward after episode 2776 is 2637.0


 28%|██▊       | 2777/10000 [2:07:34<8:49:53,  4.40s/it]

Total reward after episode 2777 is 1346.0


 28%|██▊       | 2778/10000 [2:07:37<8:07:55,  4.05s/it]

Total reward after episode 2778 is 1584.0


 28%|██▊       | 2779/10000 [2:07:41<7:39:56,  3.82s/it]

Total reward after episode 2779 is 1438.0


 28%|██▊       | 2780/10000 [2:07:42<6:23:50,  3.19s/it]

Total reward after episode 2780 is 652.0


 28%|██▊       | 2781/10000 [2:07:45<5:58:45,  2.98s/it]

Total reward after episode 2781 is 1045.0


 28%|██▊       | 2782/10000 [2:07:49<6:36:17,  3.29s/it]

Total reward after episode 2782 is 1861.0


 28%|██▊       | 2783/10000 [2:07:52<6:33:29,  3.27s/it]

Total reward after episode 2783 is 1339.0


 28%|██▊       | 2784/10000 [2:07:55<6:24:33,  3.20s/it]

Total reward after episode 2784 is 1344.0


 28%|██▊       | 2785/10000 [2:08:09<12:31:32,  6.25s/it]

Total reward after episode 2785 is 946.0


 28%|██▊       | 2786/10000 [2:08:11<10:17:56,  5.14s/it]

Total reward after episode 2786 is 1044.0


 28%|██▊       | 2787/10000 [2:08:13<8:27:59,  4.23s/it] 

Total reward after episode 2787 is 742.0


 28%|██▊       | 2788/10000 [2:08:17<8:07:16,  4.05s/it]

Total reward after episode 2788 is 1702.0


 28%|██▊       | 2789/10000 [2:08:23<9:25:05,  4.70s/it]

Total reward after episode 2789 is 1908.0


 28%|██▊       | 2790/10000 [2:08:27<8:47:32,  4.39s/it]

Total reward after episode 2790 is 1587.0


 28%|██▊       | 2791/10000 [2:08:28<6:57:05,  3.47s/it]

Total reward after episode 2791 is 607.0


 28%|██▊       | 2792/10000 [2:08:32<6:59:52,  3.50s/it]

Total reward after episode 2792 is 1580.0


 28%|██▊       | 2793/10000 [2:08:35<6:45:44,  3.38s/it]

Total reward after episode 2793 is 1436.0


 28%|██▊       | 2794/10000 [2:08:38<6:25:54,  3.21s/it]

Total reward after episode 2794 is 1068.0


 28%|██▊       | 2795/10000 [2:08:40<6:09:37,  3.08s/it]

Total reward after episode 2795 is 1352.0


 28%|██▊       | 2796/10000 [2:08:45<6:49:10,  3.41s/it]

Total reward after episode 2796 is 1860.0


 28%|██▊       | 2797/10000 [2:08:46<5:32:14,  2.77s/it]

Total reward after episode 2797 is 608.0


 28%|██▊       | 2798/10000 [2:08:57<10:17:30,  5.14s/it]

Total reward after episode 2798 is 2318.0


 28%|██▊       | 2799/10000 [2:08:59<8:34:43,  4.29s/it] 

Total reward after episode 2799 is 1047.0


 28%|██▊       | 2800/10000 [2:09:06<10:03:49,  5.03s/it]

Total reward after episode 2800 is 2644.0


 28%|██▊       | 2801/10000 [2:09:09<9:02:05,  4.52s/it] 

Total reward after episode 2801 is 1585.0


 28%|██▊       | 2802/10000 [2:09:10<7:07:44,  3.57s/it]

Total reward after episode 2802 is 604.0


 28%|██▊       | 2803/10000 [2:09:12<5:50:24,  2.92s/it]

Total reward after episode 2803 is 607.0


 28%|██▊       | 2804/10000 [2:09:15<6:16:37,  3.14s/it]

Total reward after episode 2804 is 1693.0


 28%|██▊       | 2805/10000 [2:09:20<6:55:09,  3.46s/it]

Total reward after episode 2805 is 1927.0


 28%|██▊       | 2806/10000 [2:09:24<7:33:07,  3.78s/it]

Total reward after episode 2806 is 1921.0


 28%|██▊       | 2807/10000 [2:09:28<7:29:58,  3.75s/it]

Total reward after episode 2807 is 1693.0


 28%|██▊       | 2808/10000 [2:09:32<7:50:04,  3.92s/it]

Total reward after episode 2808 is 1925.0


 28%|██▊       | 2809/10000 [2:09:37<8:10:45,  4.09s/it]

Total reward after episode 2809 is 1700.0


 28%|██▊       | 2810/10000 [2:09:40<7:46:31,  3.89s/it]

Total reward after episode 2810 is 1348.0


 28%|██▊       | 2811/10000 [2:09:44<7:51:06,  3.93s/it]

Total reward after episode 2811 is 1924.0


 28%|██▊       | 2812/10000 [2:09:48<8:11:47,  4.11s/it]

Total reward after episode 2812 is 1877.0


 28%|██▊       | 2813/10000 [2:09:50<6:30:40,  3.26s/it]

Total reward after episode 2813 is 610.0


 28%|██▊       | 2814/10000 [2:09:53<6:35:39,  3.30s/it]

Total reward after episode 2814 is 1342.0


 28%|██▊       | 2815/10000 [2:09:54<4:58:52,  2.50s/it]

Total reward after episode 2815 is 251.0


 28%|██▊       | 2816/10000 [2:09:57<5:12:51,  2.61s/it]

Total reward after episode 2816 is 817.0


 28%|██▊       | 2817/10000 [2:09:59<5:17:36,  2.65s/it]

Total reward after episode 2817 is 1347.0


 28%|██▊       | 2818/10000 [2:10:01<4:28:37,  2.24s/it]

Total reward after episode 2818 is 610.0


 28%|██▊       | 2819/10000 [2:10:04<5:03:10,  2.53s/it]

Total reward after episode 2819 is 1587.0


 28%|██▊       | 2820/10000 [2:10:07<5:33:39,  2.79s/it]

Total reward after episode 2820 is 1329.0


 28%|██▊       | 2821/10000 [2:10:09<4:44:36,  2.38s/it]

Total reward after episode 2821 is 621.0


 28%|██▊       | 2822/10000 [2:10:10<4:07:30,  2.07s/it]

Total reward after episode 2822 is 614.0


 28%|██▊       | 2823/10000 [2:10:13<4:33:43,  2.29s/it]

Total reward after episode 2823 is 1349.0


 28%|██▊       | 2824/10000 [2:10:16<4:49:29,  2.42s/it]

Total reward after episode 2824 is 818.0


 28%|██▊       | 2825/10000 [2:10:18<5:00:11,  2.51s/it]

Total reward after episode 2825 is 1068.0


 28%|██▊       | 2826/10000 [2:10:21<5:09:28,  2.59s/it]

Total reward after episode 2826 is 1345.0


 28%|██▊       | 2827/10000 [2:10:24<5:19:26,  2.67s/it]

Total reward after episode 2827 is 1334.0


 28%|██▊       | 2828/10000 [2:10:28<5:56:44,  2.98s/it]

Total reward after episode 2828 is 1706.0


 28%|██▊       | 2829/10000 [2:10:28<4:34:35,  2.30s/it]

Total reward after episode 2829 is 240.0


 28%|██▊       | 2830/10000 [2:10:37<8:36:19,  4.32s/it]

Total reward after episode 2830 is 2623.0


 28%|██▊       | 2831/10000 [2:10:42<8:34:36,  4.31s/it]

Total reward after episode 2831 is 1884.0


 28%|██▊       | 2832/10000 [2:10:46<8:29:18,  4.26s/it]

Total reward after episode 2832 is 1855.0


 28%|██▊       | 2833/10000 [2:10:49<7:32:10,  3.79s/it]

Total reward after episode 2833 is 1068.0


 28%|██▊       | 2834/10000 [2:10:53<7:44:18,  3.89s/it]

Total reward after episode 2834 is 1858.0


 28%|██▊       | 2835/10000 [2:10:54<6:16:26,  3.15s/it]

Total reward after episode 2835 is 626.0


 28%|██▊       | 2836/10000 [2:10:55<4:46:10,  2.40s/it]

Total reward after episode 2836 is 251.0


 28%|██▊       | 2837/10000 [2:10:56<4:06:03,  2.06s/it]

Total reward after episode 2837 is 608.0


 28%|██▊       | 2838/10000 [2:10:59<4:22:43,  2.20s/it]

Total reward after episode 2838 is 1044.0


 28%|██▊       | 2839/10000 [2:11:02<5:15:31,  2.64s/it]

Total reward after episode 2839 is 1588.0


 28%|██▊       | 2840/10000 [2:11:05<5:37:03,  2.82s/it]

Total reward after episode 2840 is 1587.0


 28%|██▊       | 2841/10000 [2:11:08<5:19:22,  2.68s/it]

Total reward after episode 2841 is 1049.0


 28%|██▊       | 2842/10000 [2:11:12<6:27:18,  3.25s/it]

Total reward after episode 2842 is 1930.0


 28%|██▊       | 2843/10000 [2:11:16<6:51:11,  3.45s/it]

Total reward after episode 2843 is 1928.0


 28%|██▊       | 2844/10000 [2:11:20<6:46:36,  3.41s/it]

Total reward after episode 2844 is 1586.0


 28%|██▊       | 2845/10000 [2:11:21<5:30:23,  2.77s/it]

Total reward after episode 2845 is 608.0


 28%|██▊       | 2846/10000 [2:11:32<10:42:07,  5.39s/it]

Total reward after episode 2846 is 3016.0


 28%|██▊       | 2847/10000 [2:11:36<9:41:04,  4.87s/it] 

Total reward after episode 2847 is 1704.0


 28%|██▊       | 2848/10000 [2:11:39<8:41:15,  4.37s/it]

Total reward after episode 2848 is 1349.0


 28%|██▊       | 2849/10000 [2:11:41<6:51:34,  3.45s/it]

Total reward after episode 2849 is 613.0


 28%|██▊       | 2850/10000 [2:11:43<6:29:40,  3.27s/it]

Total reward after episode 2850 is 1349.0


 29%|██▊       | 2851/10000 [2:11:50<8:39:17,  4.36s/it]

Total reward after episode 2851 is 1828.0


 29%|██▊       | 2852/10000 [2:11:54<8:16:28,  4.17s/it]

Total reward after episode 2852 is 1698.0


 29%|██▊       | 2853/10000 [2:11:58<7:55:44,  3.99s/it]

Total reward after episode 2853 is 1705.0


 29%|██▊       | 2854/10000 [2:12:01<7:23:26,  3.72s/it]

Total reward after episode 2854 is 1348.0


 29%|██▊       | 2855/10000 [2:12:04<7:06:13,  3.58s/it]

Total reward after episode 2855 is 1353.0


 29%|██▊       | 2856/10000 [2:12:07<6:41:17,  3.37s/it]

Total reward after episode 2856 is 1355.0


 29%|██▊       | 2857/10000 [2:12:11<7:25:32,  3.74s/it]

Total reward after episode 2857 is 1925.0


 29%|██▊       | 2858/10000 [2:12:16<7:59:17,  4.03s/it]

Total reward after episode 2858 is 1854.0


 29%|██▊       | 2859/10000 [2:12:19<7:17:46,  3.68s/it]

Total reward after episode 2859 is 1340.0


 29%|██▊       | 2860/10000 [2:12:22<6:48:18,  3.43s/it]

Total reward after episode 2860 is 1346.0


 29%|██▊       | 2861/10000 [2:12:27<7:38:49,  3.86s/it]

Total reward after episode 2861 is 1702.0


 29%|██▊       | 2862/10000 [2:12:31<8:02:47,  4.06s/it]

Total reward after episode 2862 is 1926.0


 29%|██▊       | 2863/10000 [2:12:35<7:49:49,  3.95s/it]

Total reward after episode 2863 is 1698.0


 29%|██▊       | 2864/10000 [2:12:39<7:54:46,  3.99s/it]

Total reward after episode 2864 is 1691.0


 29%|██▊       | 2865/10000 [2:12:42<7:35:25,  3.83s/it]

Total reward after episode 2865 is 1346.0


 29%|██▊       | 2866/10000 [2:12:45<7:04:13,  3.57s/it]

Total reward after episode 2866 is 1350.0


 29%|██▊       | 2867/10000 [2:12:50<7:51:51,  3.97s/it]

Total reward after episode 2867 is 1567.0


 29%|██▊       | 2868/10000 [2:12:54<7:31:54,  3.80s/it]

Total reward after episode 2868 is 1425.0


 29%|██▊       | 2869/10000 [2:12:57<7:29:26,  3.78s/it]

Total reward after episode 2869 is 1587.0


 29%|██▊       | 2870/10000 [2:13:00<6:53:04,  3.48s/it]

Total reward after episode 2870 is 1353.0


 29%|██▊       | 2871/10000 [2:13:01<5:10:45,  2.62s/it]

Total reward after episode 2871 is 252.0


 29%|██▊       | 2872/10000 [2:13:04<5:22:15,  2.71s/it]

Total reward after episode 2872 is 1355.0


 29%|██▊       | 2873/10000 [2:13:07<5:32:16,  2.80s/it]

Total reward after episode 2873 is 1351.0


 29%|██▊       | 2874/10000 [2:13:10<6:05:15,  3.08s/it]

Total reward after episode 2874 is 1698.0


 29%|██▉       | 2875/10000 [2:13:13<5:54:51,  2.99s/it]

Total reward after episode 2875 is 1338.0


 29%|██▉       | 2876/10000 [2:13:17<6:15:32,  3.16s/it]

Total reward after episode 2876 is 1582.0


 29%|██▉       | 2877/10000 [2:13:20<6:22:01,  3.22s/it]

Total reward after episode 2877 is 1588.0


 29%|██▉       | 2878/10000 [2:13:27<8:26:43,  4.27s/it]

Total reward after episode 2878 is 2641.0


 29%|██▉       | 2879/10000 [2:13:29<7:26:07,  3.76s/it]

Total reward after episode 2879 is 1046.0


 29%|██▉       | 2880/10000 [2:13:33<7:30:33,  3.80s/it]

Total reward after episode 2880 is 1694.0


 29%|██▉       | 2881/10000 [2:13:41<9:56:43,  5.03s/it]

Total reward after episode 2881 is 1542.0


 29%|██▉       | 2882/10000 [2:13:44<8:47:00,  4.44s/it]

Total reward after episode 2882 is 1437.0


 29%|██▉       | 2883/10000 [2:13:49<8:49:46,  4.47s/it]

Total reward after episode 2883 is 1917.0


 29%|██▉       | 2884/10000 [2:13:53<8:25:24,  4.26s/it]

Total reward after episode 2884 is 1698.0


 29%|██▉       | 2885/10000 [2:13:59<9:51:44,  4.99s/it]

Total reward after episode 2885 is 2353.0


 29%|██▉       | 2886/10000 [2:14:04<9:33:11,  4.83s/it]

Total reward after episode 2886 is 1850.0


 29%|██▉       | 2887/10000 [2:14:48<32:38:34, 16.52s/it]

Total reward after episode 2887 is 267.0


 29%|██▉       | 2888/10000 [2:14:52<25:26:35, 12.88s/it]

Total reward after episode 2888 is 1848.0


 29%|██▉       | 2889/10000 [2:14:56<20:16:12, 10.26s/it]

Total reward after episode 2889 is 1575.0


 29%|██▉       | 2890/10000 [2:14:57<14:59:10,  7.59s/it]

Total reward after episode 2890 is 612.0


 29%|██▉       | 2891/10000 [2:14:59<11:16:02,  5.71s/it]

Total reward after episode 2891 is 613.0


 29%|██▉       | 2892/10000 [2:15:01<9:25:06,  4.77s/it] 

Total reward after episode 2892 is 1049.0


 29%|██▉       | 2893/10000 [2:15:04<8:25:13,  4.27s/it]

Total reward after episode 2893 is 1342.0


 29%|██▉       | 2894/10000 [2:15:08<8:15:04,  4.18s/it]

Total reward after episode 2894 is 1928.0


 29%|██▉       | 2895/10000 [2:15:12<7:49:54,  3.97s/it]

Total reward after episode 2895 is 1710.0


 29%|██▉       | 2896/10000 [2:15:15<7:08:59,  3.62s/it]

Total reward after episode 2896 is 1349.0


 29%|██▉       | 2897/10000 [2:15:18<6:42:08,  3.40s/it]

Total reward after episode 2897 is 1335.0


 29%|██▉       | 2898/10000 [2:15:22<7:21:53,  3.73s/it]

Total reward after episode 2898 is 1925.0


 29%|██▉       | 2899/10000 [2:15:25<6:44:58,  3.42s/it]

Total reward after episode 2899 is 1046.0


 29%|██▉       | 2900/10000 [2:15:27<6:14:35,  3.17s/it]

Total reward after episode 2900 is 1045.0


 29%|██▉       | 2901/10000 [2:15:32<6:56:53,  3.52s/it]

Total reward after episode 2901 is 1930.0


 29%|██▉       | 2902/10000 [2:15:35<6:50:57,  3.47s/it]

Total reward after episode 2902 is 1588.0


 29%|██▉       | 2903/10000 [2:15:39<7:16:15,  3.69s/it]

Total reward after episode 2903 is 1690.0


 29%|██▉       | 2904/10000 [2:15:42<6:47:30,  3.45s/it]

Total reward after episode 2904 is 1340.0


 29%|██▉       | 2905/10000 [2:15:45<6:39:10,  3.38s/it]

Total reward after episode 2905 is 1042.0


 29%|██▉       | 2906/10000 [2:15:55<10:26:17,  5.30s/it]

Total reward after episode 2906 is 679.0


 29%|██▉       | 2907/10000 [2:15:59<9:30:29,  4.83s/it] 

Total reward after episode 2907 is 1701.0


 29%|██▉       | 2908/10000 [2:16:03<8:50:22,  4.49s/it]

Total reward after episode 2908 is 1697.0


 29%|██▉       | 2909/10000 [2:16:04<7:06:21,  3.61s/it]

Total reward after episode 2909 is 737.0


 29%|██▉       | 2910/10000 [2:16:08<6:57:48,  3.54s/it]

Total reward after episode 2910 is 1347.0


 29%|██▉       | 2911/10000 [2:16:10<6:33:58,  3.33s/it]

Total reward after episode 2911 is 1342.0


 29%|██▉       | 2912/10000 [2:16:14<6:45:46,  3.43s/it]

Total reward after episode 2912 is 1695.0


 29%|██▉       | 2913/10000 [2:16:18<6:50:51,  3.48s/it]

Total reward after episode 2913 is 1707.0


 29%|██▉       | 2914/10000 [2:16:19<5:33:45,  2.83s/it]

Total reward after episode 2914 is 611.0


 29%|██▉       | 2915/10000 [2:16:22<5:31:37,  2.81s/it]

Total reward after episode 2915 is 781.0


 29%|██▉       | 2916/10000 [2:16:25<5:32:52,  2.82s/it]

Total reward after episode 2916 is 1330.0


 29%|██▉       | 2917/10000 [2:16:29<6:19:49,  3.22s/it]

Total reward after episode 2917 is 1703.0


 29%|██▉       | 2918/10000 [2:16:33<6:49:13,  3.47s/it]

Total reward after episode 2918 is 1928.0


 29%|██▉       | 2919/10000 [2:16:36<6:35:06,  3.35s/it]

Total reward after episode 2919 is 1590.0


 29%|██▉       | 2920/10000 [2:16:37<5:01:13,  2.55s/it]

Total reward after episode 2920 is 242.0


 29%|██▉       | 2921/10000 [2:16:39<5:08:39,  2.62s/it]

Total reward after episode 2921 is 1352.0


 29%|██▉       | 2922/10000 [2:16:44<6:09:22,  3.13s/it]

Total reward after episode 2922 is 1875.0


 29%|██▉       | 2923/10000 [2:16:48<6:42:18,  3.41s/it]

Total reward after episode 2923 is 1927.0


 29%|██▉       | 2924/10000 [2:16:48<5:03:21,  2.57s/it]

Total reward after episode 2924 is 252.0


 29%|██▉       | 2925/10000 [2:16:50<4:17:11,  2.18s/it]

Total reward after episode 2925 is 610.0


 29%|██▉       | 2926/10000 [2:16:59<8:25:38,  4.29s/it]

Total reward after episode 2926 is 759.0


 29%|██▉       | 2927/10000 [2:17:02<7:38:37,  3.89s/it]

Total reward after episode 2927 is 1349.0


 29%|██▉       | 2928/10000 [2:17:02<5:43:22,  2.91s/it]

Total reward after episode 2928 is 249.0


 29%|██▉       | 2929/10000 [2:17:08<7:19:39,  3.73s/it]

Total reward after episode 2929 is 799.0


 29%|██▉       | 2930/10000 [2:17:11<6:51:16,  3.49s/it]

Total reward after episode 2930 is 1352.0


 29%|██▉       | 2931/10000 [2:17:15<7:06:36,  3.62s/it]

Total reward after episode 2931 is 1696.0


 29%|██▉       | 2932/10000 [2:17:19<7:33:37,  3.85s/it]

Total reward after episode 2932 is 1849.0


 29%|██▉       | 2933/10000 [2:17:29<11:02:18,  5.62s/it]

Total reward after episode 2933 is 1286.0


 29%|██▉       | 2934/10000 [2:17:33<9:54:51,  5.05s/it] 

Total reward after episode 2934 is 1708.0


 29%|██▉       | 2935/10000 [2:17:35<8:21:25,  4.26s/it]

Total reward after episode 2935 is 744.0


 29%|██▉       | 2936/10000 [2:17:42<10:06:11,  5.15s/it]

Total reward after episode 2936 is 1005.0


 29%|██▉       | 2937/10000 [2:17:47<9:31:17,  4.85s/it] 

Total reward after episode 2937 is 1928.0


 29%|██▉       | 2938/10000 [2:17:49<8:14:43,  4.20s/it]

Total reward after episode 2938 is 1064.0


 29%|██▉       | 2939/10000 [2:17:50<6:07:47,  3.13s/it]

Total reward after episode 2939 is 250.0


 29%|██▉       | 2940/10000 [2:17:54<6:55:08,  3.53s/it]

Total reward after episode 2940 is 1925.0


 29%|██▉       | 2941/10000 [2:17:59<7:28:24,  3.81s/it]

Total reward after episode 2941 is 1928.0


 29%|██▉       | 2942/10000 [2:18:03<7:38:08,  3.89s/it]

Total reward after episode 2942 is 1867.0


 29%|██▉       | 2943/10000 [2:18:06<7:27:20,  3.80s/it]

Total reward after episode 2943 is 1696.0


 29%|██▉       | 2944/10000 [2:18:10<7:36:36,  3.88s/it]

Total reward after episode 2944 is 1932.0


 29%|██▉       | 2945/10000 [2:18:15<7:48:41,  3.99s/it]

Total reward after episode 2945 is 1923.0


 29%|██▉       | 2946/10000 [2:18:17<7:04:00,  3.61s/it]

Total reward after episode 2946 is 819.0


 29%|██▉       | 2947/10000 [2:18:22<7:31:54,  3.84s/it]

Total reward after episode 2947 is 1856.0


 29%|██▉       | 2948/10000 [2:18:25<7:15:33,  3.71s/it]

Total reward after episode 2948 is 1585.0


 29%|██▉       | 2949/10000 [2:18:41<14:37:12,  7.46s/it]

Total reward after episode 2949 is 1203.0


 30%|██▉       | 2950/10000 [2:18:45<12:31:14,  6.39s/it]

Total reward after episode 2950 is 1697.0


 30%|██▉       | 2951/10000 [2:18:48<10:31:07,  5.37s/it]

Total reward after episode 2951 is 1340.0


 30%|██▉       | 2952/10000 [2:18:52<9:29:25,  4.85s/it] 

Total reward after episode 2952 is 1691.0


 30%|██▉       | 2953/10000 [2:18:55<8:26:06,  4.31s/it]

Total reward after episode 2953 is 1330.0


 30%|██▉       | 2954/10000 [2:18:57<7:13:40,  3.69s/it]

Total reward after episode 2954 is 1052.0


 30%|██▉       | 2955/10000 [2:19:00<6:45:12,  3.45s/it]

Total reward after episode 2955 is 1325.0


 30%|██▉       | 2956/10000 [2:19:03<6:24:49,  3.28s/it]

Total reward after episode 2956 is 1334.0


 30%|██▉       | 2957/10000 [2:19:04<4:53:12,  2.50s/it]

Total reward after episode 2957 is 250.0


 30%|██▉       | 2958/10000 [2:19:04<3:46:40,  1.93s/it]

Total reward after episode 2958 is 251.0


 30%|██▉       | 2959/10000 [2:19:07<4:19:54,  2.21s/it]

Total reward after episode 2959 is 1354.0


 30%|██▉       | 2960/10000 [2:19:11<5:29:19,  2.81s/it]

Total reward after episode 2960 is 1929.0


 30%|██▉       | 2961/10000 [2:19:15<6:14:35,  3.19s/it]

Total reward after episode 2961 is 1933.0


 30%|██▉       | 2962/10000 [2:19:19<6:31:20,  3.34s/it]

Total reward after episode 2962 is 1585.0


 30%|██▉       | 2963/10000 [2:19:23<6:32:31,  3.35s/it]

Total reward after episode 2963 is 1352.0


 30%|██▉       | 2964/10000 [2:19:32<9:58:35,  5.10s/it]

Total reward after episode 2964 is 988.0


 30%|██▉       | 2965/10000 [2:19:36<9:13:28,  4.72s/it]

Total reward after episode 2965 is 1700.0


 30%|██▉       | 2966/10000 [2:19:38<7:58:20,  4.08s/it]

Total reward after episode 2966 is 1160.0


 30%|██▉       | 2967/10000 [2:19:42<7:56:10,  4.06s/it]

Total reward after episode 2967 is 1865.0


 30%|██▉       | 2968/10000 [2:19:45<7:23:28,  3.78s/it]

Total reward after episode 2968 is 1348.0


 30%|██▉       | 2969/10000 [2:19:46<5:32:12,  2.83s/it]

Total reward after episode 2969 is 252.0


 30%|██▉       | 2970/10000 [2:19:47<4:44:20,  2.43s/it]

Total reward after episode 2970 is 625.0


 30%|██▉       | 2971/10000 [2:19:57<8:41:04,  4.45s/it]

Total reward after episode 2971 is 2334.0


 30%|██▉       | 2972/10000 [2:19:59<7:31:16,  3.85s/it]

Total reward after episode 2972 is 1048.0


 30%|██▉       | 2973/10000 [2:20:02<7:09:10,  3.66s/it]

Total reward after episode 2973 is 1347.0


 30%|██▉       | 2974/10000 [2:20:05<6:39:55,  3.42s/it]

Total reward after episode 2974 is 1353.0


 30%|██▉       | 2975/10000 [2:20:09<7:10:12,  3.67s/it]

Total reward after episode 2975 is 1872.0


 30%|██▉       | 2976/10000 [2:20:14<7:41:45,  3.94s/it]

Total reward after episode 2976 is 1944.0


 30%|██▉       | 2977/10000 [2:20:17<7:13:29,  3.70s/it]

Total reward after episode 2977 is 1331.0


 30%|██▉       | 2978/10000 [2:20:18<5:25:16,  2.78s/it]

Total reward after episode 2978 is 251.0


 30%|██▉       | 2979/10000 [2:20:21<6:00:24,  3.08s/it]

Total reward after episode 2979 is 1698.0


 30%|██▉       | 2980/10000 [2:20:22<4:33:46,  2.34s/it]

Total reward after episode 2980 is 251.0


 30%|██▉       | 2981/10000 [2:20:23<3:33:31,  1.83s/it]

Total reward after episode 2981 is 251.0


 30%|██▉       | 2982/10000 [2:20:25<4:00:19,  2.05s/it]

Total reward after episode 2982 is 1044.0


 30%|██▉       | 2983/10000 [2:20:30<5:35:02,  2.86s/it]

Total reward after episode 2983 is 1920.0


 30%|██▉       | 2984/10000 [2:20:31<4:16:13,  2.19s/it]

Total reward after episode 2984 is 250.0


 30%|██▉       | 2985/10000 [2:20:35<5:25:59,  2.79s/it]

Total reward after episode 2985 is 1932.0


 30%|██▉       | 2986/10000 [2:20:35<4:10:00,  2.14s/it]

Total reward after episode 2986 is 250.0


 30%|██▉       | 2987/10000 [2:20:40<5:18:30,  2.72s/it]

Total reward after episode 2987 is 1864.0


 30%|██▉       | 2988/10000 [2:20:44<6:05:32,  3.13s/it]

Total reward after episode 2988 is 1929.0


 30%|██▉       | 2989/10000 [2:20:47<6:06:09,  3.13s/it]

Total reward after episode 2989 is 1335.0


 30%|██▉       | 2990/10000 [2:20:49<5:47:52,  2.98s/it]

Total reward after episode 2990 is 1068.0


 30%|██▉       | 2991/10000 [2:20:51<4:48:50,  2.47s/it]

Total reward after episode 2991 is 611.0


 30%|██▉       | 2992/10000 [2:20:52<4:08:11,  2.12s/it]

Total reward after episode 2992 is 610.0


 30%|██▉       | 2993/10000 [2:20:56<4:59:10,  2.56s/it]

Total reward after episode 2993 is 1702.0


 30%|██▉       | 2994/10000 [2:20:57<4:20:55,  2.23s/it]

Total reward after episode 2994 is 619.0


 30%|██▉       | 2995/10000 [2:20:58<3:24:27,  1.75s/it]

Total reward after episode 2995 is 252.0


 30%|██▉       | 2996/10000 [2:21:01<4:08:36,  2.13s/it]

Total reward after episode 2996 is 1354.0


 30%|██▉       | 2997/10000 [2:21:04<4:36:06,  2.37s/it]

Total reward after episode 2997 is 1325.0


 30%|██▉       | 2998/10000 [2:21:07<4:54:29,  2.52s/it]

Total reward after episode 2998 is 1333.0


 30%|██▉       | 2999/10000 [2:21:11<5:58:06,  3.07s/it]

Total reward after episode 2999 is 1925.0


 30%|███       | 3000/10000 [2:21:14<5:49:38,  3.00s/it]

Total reward after episode 3000 is 1351.0


 30%|███       | 3001/10000 [2:21:18<6:24:10,  3.29s/it]

Total reward after episode 3001 is 1862.0


 30%|███       | 3002/10000 [2:21:21<6:10:41,  3.18s/it]

Total reward after episode 3002 is 817.0


 30%|███       | 3003/10000 [2:21:25<6:43:01,  3.46s/it]

Total reward after episode 3003 is 1866.0


 30%|███       | 3004/10000 [2:21:28<6:34:43,  3.39s/it]

Total reward after episode 3004 is 1345.0


 30%|███       | 3005/10000 [2:21:32<6:42:55,  3.46s/it]

Total reward after episode 3005 is 1697.0


 30%|███       | 3006/10000 [2:21:35<6:57:35,  3.58s/it]

Total reward after episode 3006 is 1928.0


 30%|███       | 3007/10000 [2:21:38<6:36:43,  3.40s/it]

Total reward after episode 3007 is 1348.0


 30%|███       | 3008/10000 [2:21:40<5:23:15,  2.77s/it]

Total reward after episode 3008 is 611.0


 30%|███       | 3009/10000 [2:21:43<5:24:54,  2.79s/it]

Total reward after episode 3009 is 1351.0


 30%|███       | 3010/10000 [2:21:44<4:33:18,  2.35s/it]

Total reward after episode 3010 is 610.0


 30%|███       | 3011/10000 [2:21:44<3:33:19,  1.83s/it]

Total reward after episode 3011 is 251.0


 30%|███       | 3012/10000 [2:21:48<4:43:28,  2.43s/it]

Total reward after episode 3012 is 1337.0


 30%|███       | 3013/10000 [2:21:51<5:08:42,  2.65s/it]

Total reward after episode 3013 is 1351.0


 30%|███       | 3014/10000 [2:21:55<5:45:27,  2.97s/it]

Total reward after episode 3014 is 1695.0


 30%|███       | 3015/10000 [2:21:59<6:07:09,  3.15s/it]

Total reward after episode 3015 is 1696.0


 30%|███       | 3016/10000 [2:22:02<5:57:19,  3.07s/it]

Total reward after episode 3016 is 1351.0


 30%|███       | 3017/10000 [2:22:06<6:31:44,  3.37s/it]

Total reward after episode 3017 is 1933.0


 30%|███       | 3018/10000 [2:22:09<6:16:16,  3.23s/it]

Total reward after episode 3018 is 1437.0


 30%|███       | 3019/10000 [2:22:09<4:44:48,  2.45s/it]

Total reward after episode 3019 is 249.0


 30%|███       | 3020/10000 [2:22:13<5:16:32,  2.72s/it]

Total reward after episode 3020 is 1345.0


 30%|███       | 3021/10000 [2:22:15<5:17:19,  2.73s/it]

Total reward after episode 3021 is 1350.0


 30%|███       | 3022/10000 [2:22:19<5:40:10,  2.93s/it]

Total reward after episode 3022 is 1589.0


 30%|███       | 3023/10000 [2:22:23<6:21:53,  3.28s/it]

Total reward after episode 3023 is 1926.0


 30%|███       | 3024/10000 [2:22:27<6:43:39,  3.47s/it]

Total reward after episode 3024 is 1702.0


 30%|███       | 3025/10000 [2:22:30<6:37:09,  3.42s/it]

Total reward after episode 3025 is 1590.0


 30%|███       | 3026/10000 [2:22:33<6:23:18,  3.30s/it]

Total reward after episode 3026 is 1349.0


 30%|███       | 3027/10000 [2:22:36<6:16:51,  3.24s/it]

Total reward after episode 3027 is 1428.0


 30%|███       | 3028/10000 [2:22:39<5:48:22,  3.00s/it]

Total reward after episode 3028 is 1049.0


 30%|███       | 3029/10000 [2:22:43<6:53:33,  3.56s/it]

Total reward after episode 3029 is 1923.0


 30%|███       | 3030/10000 [2:22:46<6:31:04,  3.37s/it]

Total reward after episode 3030 is 1350.0


 30%|███       | 3031/10000 [2:22:50<6:31:48,  3.37s/it]

Total reward after episode 3031 is 1587.0


 30%|███       | 3032/10000 [2:22:50<4:56:58,  2.56s/it]

Total reward after episode 3032 is 252.0


 30%|███       | 3033/10000 [2:22:55<6:00:15,  3.10s/it]

Total reward after episode 3033 is 1930.0


 30%|███       | 3034/10000 [2:23:02<8:08:57,  4.21s/it]

Total reward after episode 3034 is 2635.0


 30%|███       | 3035/10000 [2:23:04<7:07:51,  3.69s/it]

Total reward after episode 3035 is 1049.0


 30%|███       | 3036/10000 [2:23:07<6:35:41,  3.41s/it]

Total reward after episode 3036 is 1054.0


 30%|███       | 3037/10000 [2:23:13<8:08:56,  4.21s/it]

Total reward after episode 3037 is 2360.0


 30%|███       | 3038/10000 [2:23:16<7:19:13,  3.79s/it]

Total reward after episode 3038 is 1350.0


 30%|███       | 3039/10000 [2:23:18<6:30:36,  3.37s/it]

Total reward after episode 3039 is 1047.0


 30%|███       | 3040/10000 [2:23:19<4:55:02,  2.54s/it]

Total reward after episode 3040 is 252.0


 30%|███       | 3041/10000 [2:23:19<3:48:01,  1.97s/it]

Total reward after episode 3041 is 251.0


 30%|███       | 3042/10000 [2:23:22<4:19:57,  2.24s/it]

Total reward after episode 3042 is 1350.0


 30%|███       | 3043/10000 [2:23:25<4:39:56,  2.41s/it]

Total reward after episode 3043 is 1351.0


 30%|███       | 3044/10000 [2:23:26<3:36:56,  1.87s/it]

Total reward after episode 3044 is 251.0


 30%|███       | 3045/10000 [2:23:29<4:40:46,  2.42s/it]

Total reward after episode 3045 is 1709.0


 30%|███       | 3046/10000 [2:23:30<3:38:06,  1.88s/it]

Total reward after episode 3046 is 251.0


 30%|███       | 3047/10000 [2:23:33<4:34:23,  2.37s/it]

Total reward after episode 3047 is 1339.0


 30%|███       | 3048/10000 [2:23:37<5:28:37,  2.84s/it]

Total reward after episode 3048 is 1931.0


 30%|███       | 3049/10000 [2:23:41<6:00:14,  3.11s/it]

Total reward after episode 3049 is 1696.0


 30%|███       | 3050/10000 [2:23:42<4:34:10,  2.37s/it]

Total reward after episode 3050 is 252.0


 31%|███       | 3051/10000 [2:23:46<5:29:43,  2.85s/it]

Total reward after episode 3051 is 1934.0


 31%|███       | 3052/10000 [2:23:49<5:39:10,  2.93s/it]

Total reward after episode 3052 is 1330.0


 31%|███       | 3053/10000 [2:23:49<4:19:38,  2.24s/it]

Total reward after episode 3053 is 246.0


 31%|███       | 3054/10000 [2:23:52<4:41:44,  2.43s/it]

Total reward after episode 3054 is 1350.0


 31%|███       | 3055/10000 [2:23:55<4:55:41,  2.55s/it]

Total reward after episode 3055 is 1327.0


 31%|███       | 3056/10000 [2:23:59<5:45:34,  2.99s/it]

Total reward after episode 3056 is 1701.0


 31%|███       | 3057/10000 [2:24:00<4:45:27,  2.47s/it]

Total reward after episode 3057 is 608.0


 31%|███       | 3058/10000 [2:24:01<3:41:44,  1.92s/it]

Total reward after episode 3058 is 250.0


 31%|███       | 3059/10000 [2:24:02<2:56:50,  1.53s/it]

Total reward after episode 3059 is 252.0


 31%|███       | 3060/10000 [2:24:04<3:39:41,  1.90s/it]

Total reward after episode 3060 is 1352.0


 31%|███       | 3061/10000 [2:24:08<4:38:55,  2.41s/it]

Total reward after episode 3061 is 1584.0


 31%|███       | 3062/10000 [2:24:12<5:40:55,  2.95s/it]

Total reward after episode 3062 is 1925.0


 31%|███       | 3063/10000 [2:24:16<6:12:07,  3.22s/it]

Total reward after episode 3063 is 1695.0


 31%|███       | 3064/10000 [2:24:19<5:56:38,  3.09s/it]

Total reward after episode 3064 is 1352.0


 31%|███       | 3065/10000 [2:24:22<5:47:04,  3.00s/it]

Total reward after episode 3065 is 1351.0


 31%|███       | 3066/10000 [2:24:22<4:24:14,  2.29s/it]

Total reward after episode 3066 is 252.0


 31%|███       | 3067/10000 [2:24:25<4:47:35,  2.49s/it]

Total reward after episode 3067 is 1331.0


 31%|███       | 3068/10000 [2:24:30<5:47:10,  3.00s/it]

Total reward after episode 3068 is 1851.0


 31%|███       | 3069/10000 [2:24:32<5:41:57,  2.96s/it]

Total reward after episode 3069 is 1351.0


 31%|███       | 3070/10000 [2:24:37<6:27:24,  3.35s/it]

Total reward after episode 3070 is 1924.0


 31%|███       | 3071/10000 [2:24:38<5:14:50,  2.73s/it]

Total reward after episode 3071 is 608.0


 31%|███       | 3072/10000 [2:24:41<5:16:18,  2.74s/it]

Total reward after episode 3072 is 1352.0


 31%|███       | 3073/10000 [2:24:42<4:30:41,  2.34s/it]

Total reward after episode 3073 is 628.0


 31%|███       | 3074/10000 [2:24:43<3:53:51,  2.03s/it]

Total reward after episode 3074 is 608.0


 31%|███       | 3075/10000 [2:24:46<4:26:05,  2.31s/it]

Total reward after episode 3075 is 1438.0


 31%|███       | 3076/10000 [2:24:50<5:00:56,  2.61s/it]

Total reward after episode 3076 is 1585.0


 31%|███       | 3077/10000 [2:24:53<5:11:03,  2.70s/it]

Total reward after episode 3077 is 1350.0


 31%|███       | 3078/10000 [2:24:55<5:14:31,  2.73s/it]

Total reward after episode 3078 is 1348.0


 31%|███       | 3079/10000 [2:24:59<5:42:07,  2.97s/it]

Total reward after episode 3079 is 1698.0


 31%|███       | 3080/10000 [2:25:02<5:55:42,  3.08s/it]

Total reward after episode 3080 is 1591.0


 31%|███       | 3081/10000 [2:25:03<4:31:58,  2.36s/it]

Total reward after episode 3081 is 242.0


 31%|███       | 3082/10000 [2:25:06<4:50:18,  2.52s/it]

Total reward after episode 3082 is 1350.0


 31%|███       | 3083/10000 [2:25:09<5:00:03,  2.60s/it]

Total reward after episode 3083 is 1339.0


 31%|███       | 3084/10000 [2:25:09<3:51:44,  2.01s/it]

Total reward after episode 3084 is 250.0


 31%|███       | 3085/10000 [2:25:13<4:50:22,  2.52s/it]

Total reward after episode 3085 is 1696.0


 31%|███       | 3086/10000 [2:25:15<4:43:16,  2.46s/it]

Total reward after episode 3086 is 1052.0


 31%|███       | 3087/10000 [2:25:16<3:39:16,  1.90s/it]

Total reward after episode 3087 is 252.0


 31%|███       | 3088/10000 [2:25:18<4:00:58,  2.09s/it]

Total reward after episode 3088 is 1065.0


 31%|███       | 3089/10000 [2:25:20<3:32:26,  1.84s/it]

Total reward after episode 3089 is 608.0


 31%|███       | 3090/10000 [2:25:20<2:49:57,  1.48s/it]

Total reward after episode 3090 is 252.0


 31%|███       | 3091/10000 [2:25:22<2:57:56,  1.55s/it]

Total reward after episode 3091 is 652.0


 31%|███       | 3092/10000 [2:25:26<4:13:21,  2.20s/it]

Total reward after episode 3092 is 1692.0


 31%|███       | 3093/10000 [2:25:30<5:26:06,  2.83s/it]

Total reward after episode 3093 is 1856.0


 31%|███       | 3094/10000 [2:25:34<6:01:25,  3.14s/it]

Total reward after episode 3094 is 1583.0


 31%|███       | 3095/10000 [2:25:37<6:10:15,  3.22s/it]

Total reward after episode 3095 is 1336.0


 31%|███       | 3096/10000 [2:25:40<6:00:30,  3.13s/it]

Total reward after episode 3096 is 1351.0


 31%|███       | 3097/10000 [2:25:42<5:12:32,  2.72s/it]

Total reward after episode 3097 is 737.0


 31%|███       | 3098/10000 [2:25:45<5:22:53,  2.81s/it]

Total reward after episode 3098 is 1350.0


 31%|███       | 3099/10000 [2:25:46<4:29:39,  2.34s/it]

Total reward after episode 3099 is 607.0


 31%|███       | 3100/10000 [2:25:49<4:46:44,  2.49s/it]

Total reward after episode 3100 is 1352.0


 31%|███       | 3101/10000 [2:25:50<4:06:46,  2.15s/it]

Total reward after episode 3101 is 616.0


 31%|███       | 3102/10000 [2:25:54<5:07:31,  2.67s/it]

Total reward after episode 3102 is 1855.0


 31%|███       | 3103/10000 [2:25:57<4:55:43,  2.57s/it]

Total reward after episode 3103 is 1057.0


 31%|███       | 3104/10000 [2:25:59<4:50:07,  2.52s/it]

Total reward after episode 3104 is 1048.0


 31%|███       | 3105/10000 [2:26:23<16:52:58,  8.81s/it]

Total reward after episode 3105 is 2194.0


 31%|███       | 3106/10000 [2:26:27<14:12:46,  7.42s/it]

Total reward after episode 3106 is 1926.0


 31%|███       | 3107/10000 [2:26:31<12:15:31,  6.40s/it]

Total reward after episode 3107 is 1327.0


 31%|███       | 3108/10000 [2:26:31<8:56:07,  4.67s/it] 

Total reward after episode 3108 is 249.0


 31%|███       | 3109/10000 [2:26:34<7:59:25,  4.17s/it]

Total reward after episode 3109 is 1348.0


 31%|███       | 3110/10000 [2:26:36<6:19:11,  3.30s/it]

Total reward after episode 3110 is 607.0


 31%|███       | 3111/10000 [2:26:36<4:49:17,  2.52s/it]

Total reward after episode 3111 is 237.0


 31%|███       | 3112/10000 [2:26:41<5:47:13,  3.02s/it]

Total reward after episode 3112 is 1942.0


 31%|███       | 3113/10000 [2:26:41<4:24:01,  2.30s/it]

Total reward after episode 3113 is 252.0


 31%|███       | 3114/10000 [2:26:45<5:14:42,  2.74s/it]

Total reward after episode 3114 is 1703.0


 31%|███       | 3115/10000 [2:26:48<5:24:35,  2.83s/it]

Total reward after episode 3115 is 1350.0


 31%|███       | 3116/10000 [2:26:51<5:25:48,  2.84s/it]

Total reward after episode 3116 is 1351.0


 31%|███       | 3117/10000 [2:26:52<4:33:33,  2.38s/it]

Total reward after episode 3117 is 607.0


 31%|███       | 3118/10000 [2:26:55<4:57:34,  2.59s/it]

Total reward after episode 3118 is 1350.0


 31%|███       | 3119/10000 [2:26:57<4:12:06,  2.20s/it]

Total reward after episode 3119 is 607.0


 31%|███       | 3120/10000 [2:26:59<4:36:45,  2.41s/it]

Total reward after episode 3120 is 1326.0


 31%|███       | 3121/10000 [2:27:00<3:34:39,  1.87s/it]

Total reward after episode 3121 is 252.0


 31%|███       | 3122/10000 [2:27:01<2:51:20,  1.49s/it]

Total reward after episode 3122 is 252.0


 31%|███       | 3123/10000 [2:27:05<4:21:08,  2.28s/it]

Total reward after episode 3123 is 1931.0


 31%|███       | 3124/10000 [2:27:05<3:23:42,  1.78s/it]

Total reward after episode 3124 is 251.0


 31%|███▏      | 3125/10000 [2:27:07<3:08:09,  1.64s/it]

Total reward after episode 3125 is 608.0


 31%|███▏      | 3126/10000 [2:27:10<3:57:49,  2.08s/it]

Total reward after episode 3126 is 1427.0


 31%|███▏      | 3127/10000 [2:27:13<4:32:34,  2.38s/it]

Total reward after episode 3127 is 1350.0


 31%|███▏      | 3128/10000 [2:27:16<4:51:15,  2.54s/it]

Total reward after episode 3128 is 1044.0


 31%|███▏      | 3129/10000 [2:27:19<5:00:25,  2.62s/it]

Total reward after episode 3129 is 1350.0


 31%|███▏      | 3130/10000 [2:27:21<5:00:53,  2.63s/it]

Total reward after episode 3130 is 1346.0


 31%|███▏      | 3131/10000 [2:27:24<4:57:32,  2.60s/it]

Total reward after episode 3131 is 1050.0


 31%|███▏      | 3132/10000 [2:27:24<3:49:11,  2.00s/it]

Total reward after episode 3132 is 252.0


 31%|███▏      | 3133/10000 [2:27:27<4:18:31,  2.26s/it]

Total reward after episode 3133 is 1360.0


 31%|███▏      | 3134/10000 [2:27:30<4:52:14,  2.55s/it]

Total reward after episode 3134 is 1344.0


 31%|███▏      | 3135/10000 [2:27:35<5:48:59,  3.05s/it]

Total reward after episode 3135 is 1928.0


 31%|███▏      | 3136/10000 [2:27:38<5:44:56,  3.02s/it]

Total reward after episode 3136 is 1350.0


 31%|███▏      | 3137/10000 [2:27:38<4:22:23,  2.29s/it]

Total reward after episode 3137 is 246.0


 31%|███▏      | 3138/10000 [2:27:41<4:24:19,  2.31s/it]

Total reward after episode 3138 is 1055.0


 31%|███▏      | 3139/10000 [2:27:45<5:24:22,  2.84s/it]

Total reward after episode 3139 is 1703.0


 31%|███▏      | 3140/10000 [2:27:47<5:22:55,  2.82s/it]

Total reward after episode 3140 is 1352.0


 31%|███▏      | 3141/10000 [2:27:50<5:24:41,  2.84s/it]

Total reward after episode 3141 is 1350.0


 31%|███▏      | 3142/10000 [2:27:52<4:31:47,  2.38s/it]

Total reward after episode 3142 is 613.0


 31%|███▏      | 3143/10000 [2:27:57<6:06:38,  3.21s/it]

Total reward after episode 3143 is 1310.0


 31%|███▏      | 3144/10000 [2:28:00<6:23:43,  3.36s/it]

Total reward after episode 3144 is 1693.0


 31%|███▏      | 3145/10000 [2:28:03<6:06:16,  3.21s/it]

Total reward after episode 3145 is 1352.0


 31%|███▏      | 3146/10000 [2:28:07<6:23:48,  3.36s/it]

Total reward after episode 3146 is 1693.0


 31%|███▏      | 3147/10000 [2:28:11<7:01:22,  3.69s/it]

Total reward after episode 3147 is 1926.0


 31%|███▏      | 3148/10000 [2:28:16<7:15:06,  3.81s/it]

Total reward after episode 3148 is 1928.0


 31%|███▏      | 3149/10000 [2:28:19<6:44:39,  3.54s/it]

Total reward after episode 3149 is 1332.0


 32%|███▏      | 3150/10000 [2:28:25<8:22:45,  4.40s/it]

Total reward after episode 3150 is 2638.0


 32%|███▏      | 3151/10000 [2:28:28<7:29:07,  3.93s/it]

Total reward after episode 3151 is 1352.0


 32%|███▏      | 3152/10000 [2:28:33<8:26:21,  4.44s/it]

Total reward after episode 3152 is 2367.0


 32%|███▏      | 3153/10000 [2:28:37<7:50:22,  4.12s/it]

Total reward after episode 3153 is 1354.0


 32%|███▏      | 3154/10000 [2:28:40<7:22:32,  3.88s/it]

Total reward after episode 3154 is 1445.0


 32%|███▏      | 3155/10000 [2:28:41<5:30:50,  2.90s/it]

Total reward after episode 3155 is 248.0


 32%|███▏      | 3156/10000 [2:28:45<6:07:49,  3.22s/it]

Total reward after episode 3156 is 1583.0


 32%|███▏      | 3157/10000 [2:28:48<6:23:24,  3.36s/it]

Total reward after episode 3157 is 1692.0


 32%|███▏      | 3158/10000 [2:28:51<6:11:19,  3.26s/it]

Total reward after episode 3158 is 1350.0


 32%|███▏      | 3159/10000 [2:28:52<4:41:13,  2.47s/it]

Total reward after episode 3159 is 252.0


 32%|███▏      | 3160/10000 [2:28:55<4:46:58,  2.52s/it]

Total reward after episode 3160 is 1048.0


 32%|███▏      | 3161/10000 [2:29:38<27:51:55, 14.67s/it]

Total reward after episode 3161 is 443.0


 32%|███▏      | 3162/10000 [2:29:40<21:05:04, 11.10s/it]

Total reward after episode 3162 is 1356.0


 32%|███▏      | 3163/10000 [2:29:43<16:24:25,  8.64s/it]

Total reward after episode 3163 is 1350.0


 32%|███▏      | 3164/10000 [2:29:47<13:38:13,  7.18s/it]

Total reward after episode 3164 is 1704.0


 32%|███▏      | 3165/10000 [2:29:50<11:10:17,  5.88s/it]

Total reward after episode 3165 is 1352.0


 32%|███▏      | 3166/10000 [2:29:51<8:36:07,  4.53s/it] 

Total reward after episode 3166 is 606.0


 32%|███▏      | 3167/10000 [2:29:52<6:24:01,  3.37s/it]

Total reward after episode 3167 is 252.0


 32%|███▏      | 3168/10000 [2:29:55<6:00:44,  3.17s/it]

Total reward after episode 3168 is 1048.0


 32%|███▏      | 3169/10000 [2:29:55<4:35:06,  2.42s/it]

Total reward after episode 3169 is 252.0


 32%|███▏      | 3170/10000 [2:29:58<4:48:07,  2.53s/it]

Total reward after episode 3170 is 1051.0


 32%|███▏      | 3171/10000 [2:30:01<5:01:09,  2.65s/it]

Total reward after episode 3171 is 1350.0


 32%|███▏      | 3172/10000 [2:30:02<3:51:49,  2.04s/it]

Total reward after episode 3172 is 251.0


 32%|███▏      | 3173/10000 [2:30:05<4:44:24,  2.50s/it]

Total reward after episode 3173 is 1703.0


 32%|███▏      | 3174/10000 [2:30:08<4:38:18,  2.45s/it]

Total reward after episode 3174 is 1050.0


 32%|███▏      | 3175/10000 [2:30:09<4:14:38,  2.24s/it]

Total reward after episode 3175 is 652.0


 32%|███▏      | 3176/10000 [2:30:11<3:49:17,  2.02s/it]

Total reward after episode 3176 is 605.0


 32%|███▏      | 3177/10000 [2:30:12<3:33:25,  1.88s/it]

Total reward after episode 3177 is 635.0


 32%|███▏      | 3178/10000 [2:30:15<3:52:14,  2.04s/it]

Total reward after episode 3178 is 1049.0


 32%|███▏      | 3179/10000 [2:30:19<5:00:31,  2.64s/it]

Total reward after episode 3179 is 1323.0


 32%|███▏      | 3180/10000 [2:30:19<3:51:35,  2.04s/it]

Total reward after episode 3180 is 252.0


 32%|███▏      | 3181/10000 [2:30:28<7:45:54,  4.10s/it]

Total reward after episode 3181 is 1307.0


 32%|███▏      | 3182/10000 [2:30:31<7:07:34,  3.76s/it]

Total reward after episode 3182 is 1342.0


 32%|███▏      | 3183/10000 [2:30:35<6:52:47,  3.63s/it]

Total reward after episode 3183 is 1582.0


 32%|███▏      | 3184/10000 [2:30:35<5:10:06,  2.73s/it]

Total reward after episode 3184 is 252.0


 32%|███▏      | 3185/10000 [2:30:38<5:21:47,  2.83s/it]

Total reward after episode 3185 is 1431.0


 32%|███▏      | 3186/10000 [2:30:41<5:15:49,  2.78s/it]

Total reward after episode 3186 is 739.0


 32%|███▏      | 3187/10000 [2:30:45<5:41:26,  3.01s/it]

Total reward after episode 3187 is 1345.0


 32%|███▏      | 3188/10000 [2:30:47<5:36:29,  2.96s/it]

Total reward after episode 3188 is 1350.0


 32%|███▏      | 3189/10000 [2:30:48<4:16:25,  2.26s/it]

Total reward after episode 3189 is 252.0


 32%|███▏      | 3190/10000 [2:30:49<3:45:19,  1.99s/it]

Total reward after episode 3190 is 612.0


 32%|███▏      | 3191/10000 [2:31:03<10:24:04,  5.50s/it]

Total reward after episode 3191 is 2994.0


 32%|███▏      | 3192/10000 [2:31:05<8:34:56,  4.54s/it] 

Total reward after episode 3192 is 1049.0


 32%|███▏      | 3193/10000 [2:31:07<6:46:29,  3.58s/it]

Total reward after episode 3193 is 616.0


 32%|███▏      | 3194/10000 [2:31:10<6:42:17,  3.55s/it]

Total reward after episode 3194 is 1705.0


 32%|███▏      | 3195/10000 [2:31:15<7:12:57,  3.82s/it]

Total reward after episode 3195 is 1927.0


 32%|███▏      | 3196/10000 [2:31:17<6:29:43,  3.44s/it]

Total reward after episode 3196 is 1048.0


 32%|███▏      | 3197/10000 [2:31:20<6:08:38,  3.25s/it]

Total reward after episode 3197 is 1348.0


 32%|███▏      | 3198/10000 [2:31:24<6:18:33,  3.34s/it]

Total reward after episode 3198 is 1428.0


 32%|███▏      | 3199/10000 [2:31:24<4:47:09,  2.53s/it]

Total reward after episode 3199 is 251.0


 32%|███▏      | 3200/10000 [2:31:28<5:40:47,  3.01s/it]

Total reward after episode 3200 is 1885.0


 32%|███▏      | 3201/10000 [2:31:39<10:11:41,  5.40s/it]

Total reward after episode 3201 is 2310.0


 32%|███▏      | 3202/10000 [2:32:23<31:50:03, 16.86s/it]

Total reward after episode 3202 is 1771.0


 32%|███▏      | 3203/10000 [2:32:24<23:00:18, 12.18s/it]

Total reward after episode 3203 is 607.0


 32%|███▏      | 3204/10000 [2:32:27<17:44:08,  9.39s/it]

Total reward after episode 3204 is 1330.0


 32%|███▏      | 3205/10000 [2:32:28<12:49:10,  6.79s/it]

Total reward after episode 3205 is 248.0


 32%|███▏      | 3206/10000 [2:32:30<9:57:21,  5.28s/it] 

Total reward after episode 3206 is 652.0


 32%|███▏      | 3207/10000 [2:32:31<7:41:09,  4.07s/it]

Total reward after episode 3207 is 608.0


 32%|███▏      | 3208/10000 [2:32:33<6:49:14,  3.62s/it]

Total reward after episode 3208 is 1047.0


 32%|███▏      | 3209/10000 [2:32:38<7:23:23,  3.92s/it]

Total reward after episode 3209 is 1857.0


 32%|███▏      | 3210/10000 [2:32:41<6:48:01,  3.61s/it]

Total reward after episode 3210 is 1331.0


 32%|███▏      | 3211/10000 [2:32:45<6:56:52,  3.68s/it]

Total reward after episode 3211 is 1322.0


 32%|███▏      | 3212/10000 [2:32:48<6:29:01,  3.44s/it]

Total reward after episode 3212 is 1352.0


 32%|███▏      | 3213/10000 [2:32:52<6:58:07,  3.70s/it]

Total reward after episode 3213 is 1923.0


 32%|███▏      | 3214/10000 [2:32:55<6:25:13,  3.41s/it]

Total reward after episode 3214 is 819.0


 32%|███▏      | 3215/10000 [2:32:59<6:45:26,  3.59s/it]

Total reward after episode 3215 is 1336.0


 32%|███▏      | 3216/10000 [2:33:02<6:36:13,  3.50s/it]

Total reward after episode 3216 is 1585.0


 32%|███▏      | 3217/10000 [2:33:04<6:01:49,  3.20s/it]

Total reward after episode 3217 is 1058.0


 32%|███▏      | 3218/10000 [2:33:07<5:49:48,  3.09s/it]

Total reward after episode 3218 is 1044.0


 32%|███▏      | 3219/10000 [2:33:10<5:44:24,  3.05s/it]

Total reward after episode 3219 is 1350.0


 32%|███▏      | 3220/10000 [2:33:14<6:22:42,  3.39s/it]

Total reward after episode 3220 is 1865.0


 32%|███▏      | 3221/10000 [2:33:17<6:00:11,  3.19s/it]

Total reward after episode 3221 is 819.0


 32%|███▏      | 3222/10000 [2:33:21<6:19:00,  3.36s/it]

Total reward after episode 3222 is 1691.0


 32%|███▏      | 3223/10000 [2:33:24<5:55:40,  3.15s/it]

Total reward after episode 3223 is 1055.0


 32%|███▏      | 3224/10000 [2:33:26<5:46:46,  3.07s/it]

Total reward after episode 3224 is 1350.0


 32%|███▏      | 3225/10000 [2:33:29<5:27:32,  2.90s/it]

Total reward after episode 3225 is 1048.0


 32%|███▏      | 3226/10000 [2:33:31<5:14:04,  2.78s/it]

Total reward after episode 3226 is 1047.0


 32%|███▏      | 3227/10000 [2:33:36<6:07:48,  3.26s/it]

Total reward after episode 3227 is 1926.0


 32%|███▏      | 3228/10000 [2:33:41<7:19:46,  3.90s/it]

Total reward after episode 3228 is 1917.0


 32%|███▏      | 3229/10000 [2:33:43<6:26:11,  3.42s/it]

Total reward after episode 3229 is 1049.0


 32%|███▏      | 3230/10000 [2:33:47<6:29:33,  3.45s/it]

Total reward after episode 3230 is 812.0


 32%|███▏      | 3231/10000 [2:33:50<6:28:50,  3.45s/it]

Total reward after episode 3231 is 1340.0


 32%|███▏      | 3232/10000 [2:33:55<6:59:24,  3.72s/it]

Total reward after episode 3232 is 1942.0


 32%|███▏      | 3233/10000 [2:33:57<6:18:32,  3.36s/it]

Total reward after episode 3233 is 1045.0


 32%|███▏      | 3234/10000 [2:34:06<9:07:23,  4.85s/it]

Total reward after episode 3234 is 1295.0


 32%|███▏      | 3235/10000 [2:34:10<8:40:13,  4.61s/it]

Total reward after episode 3235 is 1869.0


 32%|███▏      | 3236/10000 [2:34:13<7:58:03,  4.24s/it]

Total reward after episode 3236 is 1587.0


 32%|███▏      | 3237/10000 [2:34:22<10:40:27,  5.68s/it]

Total reward after episode 3237 is 3039.0


 32%|███▏      | 3238/10000 [2:34:25<9:04:07,  4.83s/it] 

Total reward after episode 3238 is 818.0


 32%|███▏      | 3239/10000 [2:34:31<9:37:16,  5.12s/it]

Total reward after episode 3239 is 2361.0


 32%|███▏      | 3240/10000 [2:34:33<8:05:01,  4.30s/it]

Total reward after episode 3240 is 1046.0


 32%|███▏      | 3241/10000 [2:34:36<7:29:02,  3.99s/it]

Total reward after episode 3241 is 1344.0


 32%|███▏      | 3242/10000 [2:34:39<6:53:23,  3.67s/it]

Total reward after episode 3242 is 1351.0


 32%|███▏      | 3243/10000 [2:34:42<6:23:27,  3.40s/it]

Total reward after episode 3243 is 1338.0


 32%|███▏      | 3244/10000 [2:34:46<6:52:34,  3.66s/it]

Total reward after episode 3244 is 1338.0


 32%|███▏      | 3245/10000 [2:34:47<5:10:27,  2.76s/it]

Total reward after episode 3245 is 251.0


 32%|███▏      | 3246/10000 [2:34:49<4:55:06,  2.62s/it]

Total reward after episode 3246 is 1046.0


 32%|███▏      | 3247/10000 [2:34:52<5:00:05,  2.67s/it]

Total reward after episode 3247 is 1350.0


 32%|███▏      | 3248/10000 [2:34:54<4:47:28,  2.55s/it]

Total reward after episode 3248 is 1050.0


 32%|███▏      | 3249/10000 [2:34:56<4:09:53,  2.22s/it]

Total reward after episode 3249 is 628.0


 32%|███▎      | 3250/10000 [2:35:00<5:08:58,  2.75s/it]

Total reward after episode 3250 is 1863.0


 33%|███▎      | 3251/10000 [2:35:01<4:19:53,  2.31s/it]

Total reward after episode 3251 is 611.0


 33%|███▎      | 3252/10000 [2:35:06<5:32:35,  2.96s/it]

Total reward after episode 3252 is 1923.0


 33%|███▎      | 3253/10000 [2:35:10<6:12:30,  3.31s/it]

Total reward after episode 3253 is 1570.0


 33%|███▎      | 3254/10000 [2:35:13<6:17:54,  3.36s/it]

Total reward after episode 3254 is 1571.0


 33%|███▎      | 3255/10000 [2:35:17<6:33:25,  3.50s/it]

Total reward after episode 3255 is 1698.0


 33%|███▎      | 3256/10000 [2:35:21<6:41:42,  3.57s/it]

Total reward after episode 3256 is 1700.0


 33%|███▎      | 3257/10000 [2:35:25<7:17:01,  3.89s/it]

Total reward after episode 3257 is 1927.0


 33%|███▎      | 3258/10000 [2:35:29<7:12:54,  3.85s/it]

Total reward after episode 3258 is 1703.0


 33%|███▎      | 3259/10000 [2:35:31<5:48:57,  3.11s/it]

Total reward after episode 3259 is 608.0


 33%|███▎      | 3260/10000 [2:35:34<5:48:13,  3.10s/it]

Total reward after episode 3260 is 816.0


 33%|███▎      | 3261/10000 [2:35:38<6:27:28,  3.45s/it]

Total reward after episode 3261 is 1924.0


 33%|███▎      | 3262/10000 [2:35:39<5:23:24,  2.88s/it]

Total reward after episode 3262 is 738.0


 33%|███▎      | 3263/10000 [2:35:41<4:42:05,  2.51s/it]

Total reward after episode 3263 is 636.0


 33%|███▎      | 3264/10000 [2:35:45<5:44:27,  3.07s/it]

Total reward after episode 3264 is 1928.0


 33%|███▎      | 3265/10000 [2:35:48<5:27:27,  2.92s/it]

Total reward after episode 3265 is 1049.0


 33%|███▎      | 3266/10000 [2:35:51<5:13:23,  2.79s/it]

Total reward after episode 3266 is 1049.0


 33%|███▎      | 3267/10000 [2:35:55<6:14:35,  3.34s/it]

Total reward after episode 3267 is 1849.0


 33%|███▎      | 3268/10000 [2:35:59<6:16:27,  3.36s/it]

Total reward after episode 3268 is 1334.0


 33%|███▎      | 3269/10000 [2:36:03<6:46:42,  3.63s/it]

Total reward after episode 3269 is 1932.0


 33%|███▎      | 3270/10000 [2:36:05<6:09:04,  3.29s/it]

Total reward after episode 3270 is 1047.0


 33%|███▎      | 3271/10000 [2:36:06<4:39:36,  2.49s/it]

Total reward after episode 3271 is 250.0


 33%|███▎      | 3272/10000 [2:36:11<5:57:42,  3.19s/it]

Total reward after episode 3272 is 1698.0


 33%|███▎      | 3273/10000 [2:36:18<8:04:00,  4.32s/it]

Total reward after episode 3273 is 2353.0


 33%|███▎      | 3274/10000 [2:36:21<7:13:17,  3.87s/it]

Total reward after episode 3274 is 1345.0


 33%|███▎      | 3275/10000 [2:36:23<6:22:54,  3.42s/it]

Total reward after episode 3275 is 1046.0


 33%|███▎      | 3276/10000 [2:36:26<6:11:54,  3.32s/it]

Total reward after episode 3276 is 1349.0


 33%|███▎      | 3277/10000 [2:36:30<6:46:00,  3.62s/it]

Total reward after episode 3277 is 1858.0


 33%|███▎      | 3278/10000 [2:36:33<6:27:05,  3.46s/it]

Total reward after episode 3278 is 1331.0


 33%|███▎      | 3279/10000 [2:36:36<5:49:58,  3.12s/it]

Total reward after episode 3279 is 1049.0


 33%|███▎      | 3280/10000 [2:36:39<6:00:36,  3.22s/it]

Total reward after episode 3280 is 1346.0


 33%|███▎      | 3281/10000 [2:36:44<6:40:37,  3.58s/it]

Total reward after episode 3281 is 1943.0


 33%|███▎      | 3282/10000 [2:36:46<6:17:01,  3.37s/it]

Total reward after episode 3282 is 1352.0


 33%|███▎      | 3283/10000 [2:36:51<7:05:32,  3.80s/it]

Total reward after episode 3283 is 1697.0


 33%|███▎      | 3284/10000 [2:36:55<7:14:58,  3.89s/it]

Total reward after episode 3284 is 1696.0


 33%|███▎      | 3285/10000 [2:36:59<7:00:40,  3.76s/it]

Total reward after episode 3285 is 1571.0


 33%|███▎      | 3286/10000 [2:37:03<6:59:12,  3.75s/it]

Total reward after episode 3286 is 1726.0


 33%|███▎      | 3287/10000 [2:37:06<7:02:53,  3.78s/it]

Total reward after episode 3287 is 1697.0


 33%|███▎      | 3288/10000 [2:37:14<9:11:34,  4.93s/it]

Total reward after episode 3288 is 2347.0


 33%|███▎      | 3289/10000 [2:37:17<8:01:48,  4.31s/it]

Total reward after episode 3289 is 1333.0


 33%|███▎      | 3290/10000 [2:37:20<7:37:18,  4.09s/it]

Total reward after episode 3290 is 1586.0


 33%|███▎      | 3291/10000 [2:37:24<7:14:33,  3.89s/it]

Total reward after episode 3291 is 1436.0


 33%|███▎      | 3292/10000 [2:37:27<6:35:40,  3.54s/it]

Total reward after episode 3292 is 1046.0


 33%|███▎      | 3293/10000 [2:37:30<6:32:21,  3.51s/it]

Total reward after episode 3293 is 1345.0


 33%|███▎      | 3294/10000 [2:37:35<7:15:56,  3.90s/it]

Total reward after episode 3294 is 1689.0


 33%|███▎      | 3295/10000 [2:37:35<5:25:55,  2.92s/it]

Total reward after episode 3295 is 251.0


 33%|███▎      | 3296/10000 [2:37:39<5:32:37,  2.98s/it]

Total reward after episode 3296 is 1339.0


 33%|███▎      | 3297/10000 [2:37:41<5:25:28,  2.91s/it]

Total reward after episode 3297 is 1340.0


 33%|███▎      | 3298/10000 [2:37:45<5:54:56,  3.18s/it]

Total reward after episode 3298 is 1701.0


 33%|███▎      | 3299/10000 [2:37:48<5:41:57,  3.06s/it]

Total reward after episode 3299 is 1043.0


 33%|███▎      | 3300/10000 [2:37:50<5:12:04,  2.79s/it]

Total reward after episode 3300 is 1053.0


 33%|███▎      | 3301/10000 [2:37:52<4:53:16,  2.63s/it]

Total reward after episode 3301 is 1059.0


 33%|███▎      | 3302/10000 [2:37:58<6:26:38,  3.46s/it]

Total reward after episode 3302 is 2366.0


 33%|███▎      | 3303/10000 [2:38:02<6:36:53,  3.56s/it]

Total reward after episode 3303 is 1582.0


 33%|███▎      | 3304/10000 [2:38:09<9:00:16,  4.84s/it]

Total reward after episode 3304 is 772.0


 33%|███▎      | 3305/10000 [2:38:12<7:47:39,  4.19s/it]

Total reward after episode 3305 is 1046.0


 33%|███▎      | 3306/10000 [2:38:16<7:32:30,  4.06s/it]

Total reward after episode 3306 is 1710.0


 33%|███▎      | 3307/10000 [2:38:31<13:53:59,  7.48s/it]

Total reward after episode 3307 is 2274.0


 33%|███▎      | 3308/10000 [2:38:35<11:44:38,  6.32s/it]

Total reward after episode 3308 is 1580.0


 33%|███▎      | 3309/10000 [2:39:03<24:09:08, 12.99s/it]

Total reward after episode 3309 is 2150.0


 33%|███▎      | 3310/10000 [2:39:09<20:15:45, 10.90s/it]

Total reward after episode 3310 is 2363.0


 33%|███▎      | 3311/10000 [2:39:10<14:39:06,  7.89s/it]

Total reward after episode 3311 is 235.0


 33%|███▎      | 3312/10000 [2:39:13<11:57:06,  6.43s/it]

Total reward after episode 3312 is 1070.0


 33%|███▎      | 3313/10000 [2:39:16<10:04:12,  5.42s/it]

Total reward after episode 3313 is 1336.0


 33%|███▎      | 3314/10000 [2:39:19<8:23:35,  4.52s/it] 

Total reward after episode 3314 is 1060.0


 33%|███▎      | 3315/10000 [2:39:21<7:10:21,  3.86s/it]

Total reward after episode 3315 is 1050.0


 33%|███▎      | 3316/10000 [2:39:25<7:13:07,  3.89s/it]

Total reward after episode 3316 is 1702.0


 33%|███▎      | 3317/10000 [2:39:26<5:47:46,  3.12s/it]

Total reward after episode 3317 is 608.0


 33%|███▎      | 3318/10000 [2:39:29<5:39:02,  3.04s/it]

Total reward after episode 3318 is 1352.0


 33%|███▎      | 3319/10000 [2:39:32<5:33:21,  2.99s/it]

Total reward after episode 3319 is 1351.0


 33%|███▎      | 3320/10000 [2:39:33<4:14:13,  2.28s/it]

Total reward after episode 3320 is 252.0


 33%|███▎      | 3321/10000 [2:39:37<5:14:50,  2.83s/it]

Total reward after episode 3321 is 1924.0


 33%|███▎      | 3322/10000 [2:39:38<4:24:24,  2.38s/it]

Total reward after episode 3322 is 611.0


 33%|███▎      | 3323/10000 [2:39:41<4:47:52,  2.59s/it]

Total reward after episode 3323 is 1353.0


 33%|███▎      | 3324/10000 [2:39:46<5:54:04,  3.18s/it]

Total reward after episode 3324 is 1930.0


 33%|███▎      | 3325/10000 [2:39:51<6:50:08,  3.69s/it]

Total reward after episode 3325 is 1925.0


 33%|███▎      | 3326/10000 [2:39:52<5:31:08,  2.98s/it]

Total reward after episode 3326 is 611.0


 33%|███▎      | 3327/10000 [2:39:56<6:00:24,  3.24s/it]

Total reward after episode 3327 is 1324.0


 33%|███▎      | 3328/10000 [2:40:02<7:41:39,  4.15s/it]

Total reward after episode 3328 is 2359.0


 33%|███▎      | 3329/10000 [2:40:06<7:27:19,  4.02s/it]

Total reward after episode 3329 is 1726.0


 33%|███▎      | 3330/10000 [2:40:07<5:34:17,  3.01s/it]

Total reward after episode 3330 is 250.0


 33%|███▎      | 3331/10000 [2:40:10<5:39:58,  3.06s/it]

Total reward after episode 3331 is 1335.0


 33%|███▎      | 3332/10000 [2:40:10<4:19:03,  2.33s/it]

Total reward after episode 3332 is 251.0


 33%|███▎      | 3333/10000 [2:40:18<7:23:38,  3.99s/it]

Total reward after episode 3333 is 3050.0


 33%|███▎      | 3334/10000 [2:40:19<5:52:52,  3.18s/it]

Total reward after episode 3334 is 610.0


 33%|███▎      | 3335/10000 [2:40:24<6:34:09,  3.55s/it]

Total reward after episode 3335 is 1927.0


 33%|███▎      | 3336/10000 [2:40:27<6:09:49,  3.33s/it]

Total reward after episode 3336 is 1339.0


 33%|███▎      | 3337/10000 [2:40:40<11:48:41,  6.38s/it]

Total reward after episode 3337 is 2579.0


 33%|███▎      | 3338/10000 [2:40:43<9:52:27,  5.34s/it] 

Total reward after episode 3338 is 1041.0


 33%|███▎      | 3339/10000 [2:40:49<10:24:44,  5.63s/it]

Total reward after episode 3339 is 2358.0


 33%|███▎      | 3340/10000 [2:40:54<9:48:55,  5.31s/it] 

Total reward after episode 3340 is 1858.0


 33%|███▎      | 3341/10000 [2:40:56<8:07:30,  4.39s/it]

Total reward after episode 3341 is 1053.0


 33%|███▎      | 3342/10000 [2:41:00<7:55:16,  4.28s/it]

Total reward after episode 3342 is 1869.0


 33%|███▎      | 3343/10000 [2:41:07<9:00:55,  4.88s/it]

Total reward after episode 3343 is 1014.0


 33%|███▎      | 3344/10000 [2:41:09<7:36:51,  4.12s/it]

Total reward after episode 3344 is 1051.0


 33%|███▎      | 3345/10000 [2:41:12<7:17:14,  3.94s/it]

Total reward after episode 3345 is 1705.0


 33%|███▎      | 3346/10000 [2:41:15<6:30:48,  3.52s/it]

Total reward after episode 3346 is 1044.0


 33%|███▎      | 3347/10000 [2:41:17<5:52:54,  3.18s/it]

Total reward after episode 3347 is 1074.0


 33%|███▎      | 3348/10000 [2:41:21<6:06:45,  3.31s/it]

Total reward after episode 3348 is 1581.0


 33%|███▎      | 3349/10000 [2:41:24<5:53:40,  3.19s/it]

Total reward after episode 3349 is 1351.0


 34%|███▎      | 3350/10000 [2:41:26<5:34:10,  3.02s/it]

Total reward after episode 3350 is 1056.0


 34%|███▎      | 3351/10000 [2:41:30<6:03:27,  3.28s/it]

Total reward after episode 3351 is 1704.0


 34%|███▎      | 3352/10000 [2:41:36<7:27:11,  4.04s/it]

Total reward after episode 3352 is 2365.0


 34%|███▎      | 3353/10000 [2:41:39<6:37:40,  3.59s/it]

Total reward after episode 3353 is 1044.0


 34%|███▎      | 3354/10000 [2:41:43<6:59:54,  3.79s/it]

Total reward after episode 3354 is 1925.0


 34%|███▎      | 3355/10000 [2:41:47<6:58:18,  3.78s/it]

Total reward after episode 3355 is 1718.0


 34%|███▎      | 3356/10000 [2:41:49<6:16:40,  3.40s/it]

Total reward after episode 3356 is 1047.0


 34%|███▎      | 3357/10000 [2:41:50<4:44:20,  2.57s/it]

Total reward after episode 3357 is 249.0


 34%|███▎      | 3358/10000 [2:41:53<4:54:09,  2.66s/it]

Total reward after episode 3358 is 818.0


 34%|███▎      | 3359/10000 [2:41:53<3:46:45,  2.05s/it]

Total reward after episode 3359 is 251.0


 34%|███▎      | 3360/10000 [2:41:58<5:00:32,  2.72s/it]

Total reward after episode 3360 is 1947.0


 34%|███▎      | 3361/10000 [2:41:58<3:51:26,  2.09s/it]

Total reward after episode 3361 is 251.0


 34%|███▎      | 3362/10000 [2:42:01<4:21:35,  2.36s/it]

Total reward after episode 3362 is 1041.0


 34%|███▎      | 3363/10000 [2:42:04<4:47:05,  2.60s/it]

Total reward after episode 3363 is 1327.0


 34%|███▎      | 3364/10000 [2:42:07<4:37:39,  2.51s/it]

Total reward after episode 3364 is 1051.0


 34%|███▎      | 3365/10000 [2:42:11<5:26:33,  2.95s/it]

Total reward after episode 3365 is 1866.0


 34%|███▎      | 3366/10000 [2:42:15<6:03:17,  3.29s/it]

Total reward after episode 3366 is 1934.0


 34%|███▎      | 3367/10000 [2:42:17<5:30:07,  2.99s/it]

Total reward after episode 3367 is 1072.0


 34%|███▎      | 3368/10000 [2:42:21<6:06:48,  3.32s/it]

Total reward after episode 3368 is 1923.0


 34%|███▎      | 3369/10000 [2:42:24<5:38:41,  3.06s/it]

Total reward after episode 3369 is 1047.0


 34%|███▎      | 3370/10000 [2:42:26<5:13:28,  2.84s/it]

Total reward after episode 3370 is 1049.0


 34%|███▎      | 3371/10000 [2:42:29<5:17:51,  2.88s/it]

Total reward after episode 3371 is 1326.0


 34%|███▎      | 3372/10000 [2:42:32<5:40:30,  3.08s/it]

Total reward after episode 3372 is 1351.0


 34%|███▎      | 3373/10000 [2:42:35<5:18:16,  2.88s/it]

Total reward after episode 3373 is 1049.0


 34%|███▎      | 3374/10000 [2:42:37<5:04:33,  2.76s/it]

Total reward after episode 3374 is 1048.0


 34%|███▍      | 3375/10000 [2:42:40<5:15:43,  2.86s/it]

Total reward after episode 3375 is 1351.0


 34%|███▍      | 3376/10000 [2:42:44<5:32:53,  3.02s/it]

Total reward after episode 3376 is 1585.0


 34%|███▍      | 3377/10000 [2:42:47<5:52:12,  3.19s/it]

Total reward after episode 3377 is 1706.0


 34%|███▍      | 3378/10000 [2:42:52<6:23:43,  3.48s/it]

Total reward after episode 3378 is 1946.0


 34%|███▍      | 3379/10000 [2:42:55<6:33:33,  3.57s/it]

Total reward after episode 3379 is 1726.0


 34%|███▍      | 3380/10000 [2:42:57<5:19:01,  2.89s/it]

Total reward after episode 3380 is 610.0


 34%|███▍      | 3381/10000 [2:43:01<6:03:05,  3.29s/it]

Total reward after episode 3381 is 1923.0


 34%|███▍      | 3382/10000 [2:43:05<6:29:54,  3.53s/it]

Total reward after episode 3382 is 1694.0


 34%|███▍      | 3383/10000 [2:43:08<6:07:48,  3.34s/it]

Total reward after episode 3383 is 1352.0


 34%|███▍      | 3384/10000 [2:43:11<6:06:18,  3.32s/it]

Total reward after episode 3384 is 1586.0


 34%|███▍      | 3385/10000 [2:43:12<4:59:55,  2.72s/it]

Total reward after episode 3385 is 613.0


 34%|███▍      | 3386/10000 [2:43:17<6:11:11,  3.37s/it]

Total reward after episode 3386 is 2371.0


 34%|███▍      | 3387/10000 [2:43:18<4:40:16,  2.54s/it]

Total reward after episode 3387 is 247.0


 34%|███▍      | 3388/10000 [2:43:21<4:50:02,  2.63s/it]

Total reward after episode 3388 is 1330.0


 34%|███▍      | 3389/10000 [2:43:22<4:05:28,  2.23s/it]

Total reward after episode 3389 is 610.0


 34%|███▍      | 3390/10000 [2:43:24<3:41:38,  2.01s/it]

Total reward after episode 3390 is 631.0


 34%|███▍      | 3391/10000 [2:43:27<4:40:19,  2.54s/it]

Total reward after episode 3391 is 1698.0


 34%|███▍      | 3392/10000 [2:43:31<5:28:59,  2.99s/it]

Total reward after episode 3392 is 1927.0


 34%|███▍      | 3393/10000 [2:43:34<5:07:53,  2.80s/it]

Total reward after episode 3393 is 1049.0


 34%|███▍      | 3394/10000 [2:43:35<4:19:58,  2.36s/it]

Total reward after episode 3394 is 610.0


 34%|███▍      | 3395/10000 [2:43:37<4:01:02,  2.19s/it]

Total reward after episode 3395 is 725.0


 34%|███▍      | 3396/10000 [2:43:41<4:53:18,  2.66s/it]

Total reward after episode 3396 is 1698.0


 34%|███▍      | 3397/10000 [2:43:45<6:01:48,  3.29s/it]

Total reward after episode 3397 is 1921.0


 34%|███▍      | 3398/10000 [2:43:49<6:05:00,  3.32s/it]

Total reward after episode 3398 is 1589.0


 34%|███▍      | 3399/10000 [2:43:52<6:10:42,  3.37s/it]

Total reward after episode 3399 is 1704.0


 34%|███▍      | 3400/10000 [2:43:56<6:31:42,  3.56s/it]

Total reward after episode 3400 is 1863.0


 34%|███▍      | 3401/10000 [2:43:59<5:50:55,  3.19s/it]

Total reward after episode 3401 is 1047.0


 34%|███▍      | 3402/10000 [2:43:59<4:26:01,  2.42s/it]

Total reward after episode 3402 is 251.0


 34%|███▍      | 3403/10000 [2:44:03<5:21:06,  2.92s/it]

Total reward after episode 3403 is 1847.0


 34%|███▍      | 3404/10000 [2:44:06<5:19:10,  2.90s/it]

Total reward after episode 3404 is 1338.0


 34%|███▍      | 3405/10000 [2:44:14<8:13:28,  4.49s/it]

Total reward after episode 3405 is 2342.0


 34%|███▍      | 3406/10000 [2:44:15<6:06:45,  3.34s/it]

Total reward after episode 3406 is 250.0


 34%|███▍      | 3407/10000 [2:44:18<6:08:57,  3.36s/it]

Total reward after episode 3407 is 1322.0


 34%|███▍      | 3408/10000 [2:44:21<5:42:48,  3.12s/it]

Total reward after episode 3408 is 1057.0


 34%|███▍      | 3409/10000 [2:44:24<5:51:22,  3.20s/it]

Total reward after episode 3409 is 1345.0


 34%|███▍      | 3410/10000 [2:44:27<5:41:08,  3.11s/it]

Total reward after episode 3410 is 1351.0


 34%|███▍      | 3411/10000 [2:44:33<7:10:38,  3.92s/it]

Total reward after episode 3411 is 2365.0


 34%|███▍      | 3412/10000 [2:44:39<8:13:52,  4.50s/it]

Total reward after episode 3412 is 2362.0


 34%|███▍      | 3413/10000 [2:44:43<7:58:44,  4.36s/it]

Total reward after episode 3413 is 1862.0


 34%|███▍      | 3414/10000 [2:44:46<7:19:23,  4.00s/it]

Total reward after episode 3414 is 815.0


 34%|███▍      | 3415/10000 [2:44:50<7:27:46,  4.08s/it]

Total reward after episode 3415 is 1862.0


 34%|███▍      | 3416/10000 [2:44:55<7:32:04,  4.12s/it]

Total reward after episode 3416 is 1691.0


 34%|███▍      | 3417/10000 [2:44:55<5:37:38,  3.08s/it]

Total reward after episode 3417 is 252.0


 34%|███▍      | 3418/10000 [2:44:59<5:58:39,  3.27s/it]

Total reward after episode 3418 is 1699.0


 34%|███▍      | 3419/10000 [2:45:00<4:31:24,  2.47s/it]

Total reward after episode 3419 is 249.0


 34%|███▍      | 3420/10000 [2:45:03<5:03:04,  2.76s/it]

Total reward after episode 3420 is 812.0


 34%|███▍      | 3421/10000 [2:45:11<8:09:29,  4.46s/it]

Total reward after episode 3421 is 2341.0


 34%|███▍      | 3422/10000 [2:45:15<7:44:06,  4.23s/it]

Total reward after episode 3422 is 1703.0


 34%|███▍      | 3423/10000 [2:45:19<7:24:32,  4.06s/it]

Total reward after episode 3423 is 1695.0


 34%|███▍      | 3424/10000 [2:45:25<8:45:15,  4.79s/it]

Total reward after episode 3424 is 2352.0


 34%|███▍      | 3425/10000 [2:45:28<7:47:38,  4.27s/it]

Total reward after episode 3425 is 816.0


 34%|███▍      | 3426/10000 [2:45:31<6:56:45,  3.80s/it]

Total reward after episode 3426 is 1047.0


 34%|███▍      | 3427/10000 [2:45:33<6:09:37,  3.37s/it]

Total reward after episode 3427 is 1049.0


 34%|███▍      | 3428/10000 [2:45:36<5:46:51,  3.17s/it]

Total reward after episode 3428 is 1045.0


 34%|███▍      | 3429/10000 [2:45:39<5:27:59,  2.99s/it]

Total reward after episode 3429 is 1047.0


 34%|███▍      | 3430/10000 [2:45:43<5:57:02,  3.26s/it]

Total reward after episode 3430 is 1862.0


 34%|███▍      | 3431/10000 [2:45:46<6:00:27,  3.29s/it]

Total reward after episode 3431 is 1322.0


 34%|███▍      | 3432/10000 [2:45:50<6:29:25,  3.56s/it]

Total reward after episode 3432 is 1924.0


 34%|███▍      | 3433/10000 [2:45:52<5:46:52,  3.17s/it]

Total reward after episode 3433 is 1050.0


 34%|███▍      | 3434/10000 [2:45:58<6:58:00,  3.82s/it]

Total reward after episode 3434 is 2364.0


 34%|███▍      | 3435/10000 [2:46:01<6:25:13,  3.52s/it]

Total reward after episode 3435 is 1043.0


 34%|███▍      | 3436/10000 [2:46:03<5:45:48,  3.16s/it]

Total reward after episode 3436 is 1052.0


 34%|███▍      | 3437/10000 [2:46:03<4:22:21,  2.40s/it]

Total reward after episode 3437 is 251.0


 34%|███▍      | 3438/10000 [2:46:07<4:49:01,  2.64s/it]

Total reward after episode 3438 is 1341.0


 34%|███▍      | 3439/10000 [2:46:10<5:24:22,  2.97s/it]

Total reward after episode 3439 is 1693.0


 34%|███▍      | 3440/10000 [2:46:11<4:07:17,  2.26s/it]

Total reward after episode 3440 is 250.0


 34%|███▍      | 3441/10000 [2:46:15<5:10:03,  2.84s/it]

Total reward after episode 3441 is 1949.0


 34%|███▍      | 3442/10000 [2:46:25<9:12:11,  5.05s/it]

Total reward after episode 3442 is 2325.0


 34%|███▍      | 3443/10000 [2:46:28<8:00:06,  4.39s/it]

Total reward after episode 3443 is 1356.0


 34%|███▍      | 3444/10000 [2:46:30<6:17:45,  3.46s/it]

Total reward after episode 3444 is 607.0


 34%|███▍      | 3445/10000 [2:46:36<7:55:02,  4.35s/it]

Total reward after episode 3445 is 2357.0


 34%|███▍      | 3446/10000 [2:46:37<6:14:19,  3.43s/it]

Total reward after episode 3446 is 608.0


 34%|███▍      | 3447/10000 [2:46:39<5:03:58,  2.78s/it]

Total reward after episode 3447 is 611.0


 34%|███▍      | 3448/10000 [2:46:40<4:14:37,  2.33s/it]

Total reward after episode 3448 is 607.0


 34%|███▍      | 3449/10000 [2:46:46<6:04:19,  3.34s/it]

Total reward after episode 3449 is 2364.0


 34%|███▍      | 3450/10000 [2:46:53<8:10:13,  4.49s/it]

Total reward after episode 3450 is 2351.0


 35%|███▍      | 3451/10000 [2:46:55<7:06:49,  3.91s/it]

Total reward after episode 3451 is 1049.0


 35%|███▍      | 3452/10000 [2:46:56<5:19:38,  2.93s/it]

Total reward after episode 3452 is 247.0


 35%|███▍      | 3453/10000 [2:46:57<4:28:32,  2.46s/it]

Total reward after episode 3453 is 608.0


 35%|███▍      | 3454/10000 [2:46:59<3:59:27,  2.19s/it]

Total reward after episode 3454 is 607.0


 35%|███▍      | 3455/10000 [2:47:03<4:49:23,  2.65s/it]

Total reward after episode 3455 is 1699.0


 35%|███▍      | 3456/10000 [2:47:05<4:39:03,  2.56s/it]

Total reward after episode 3456 is 1053.0


 35%|███▍      | 3457/10000 [2:47:09<5:20:59,  2.94s/it]

Total reward after episode 3457 is 1698.0


 35%|███▍      | 3458/10000 [2:47:10<4:26:32,  2.44s/it]

Total reward after episode 3458 is 608.0


 35%|███▍      | 3459/10000 [2:47:11<3:49:24,  2.10s/it]

Total reward after episode 3459 is 608.0


 35%|███▍      | 3460/10000 [2:47:13<3:23:27,  1.87s/it]

Total reward after episode 3460 is 612.0


 35%|███▍      | 3461/10000 [2:47:17<4:33:45,  2.51s/it]

Total reward after episode 3461 is 1323.0


 35%|███▍      | 3462/10000 [2:47:22<6:04:13,  3.34s/it]

Total reward after episode 3462 is 1858.0


 35%|███▍      | 3463/10000 [2:47:29<8:12:34,  4.52s/it]

Total reward after episode 3463 is 2347.0


 35%|███▍      | 3464/10000 [2:47:30<6:04:52,  3.35s/it]

Total reward after episode 3464 is 251.0


 35%|███▍      | 3465/10000 [2:47:31<4:57:26,  2.73s/it]

Total reward after episode 3465 is 610.0


 35%|███▍      | 3466/10000 [2:47:45<11:08:27,  6.14s/it]

Total reward after episode 3466 is 2986.0


 35%|███▍      | 3467/10000 [2:47:48<9:23:12,  5.17s/it] 

Total reward after episode 3467 is 1042.0


 35%|███▍      | 3468/10000 [2:47:54<9:38:07,  5.31s/it]

Total reward after episode 3468 is 2365.0


 35%|███▍      | 3469/10000 [2:47:58<8:56:17,  4.93s/it]

Total reward after episode 3469 is 1862.0


 35%|███▍      | 3470/10000 [2:48:01<8:13:57,  4.54s/it]

Total reward after episode 3470 is 1696.0


 35%|███▍      | 3471/10000 [2:48:04<7:02:55,  3.89s/it]

Total reward after episode 3471 is 1058.0


 35%|███▍      | 3472/10000 [2:48:04<5:16:08,  2.91s/it]

Total reward after episode 3472 is 250.0


 35%|███▍      | 3473/10000 [2:48:07<4:58:40,  2.75s/it]

Total reward after episode 3473 is 1058.0


 35%|███▍      | 3474/10000 [2:48:11<5:40:33,  3.13s/it]

Total reward after episode 3474 is 1700.0


 35%|███▍      | 3475/10000 [2:48:14<5:28:40,  3.02s/it]

Total reward after episode 3475 is 1043.0


 35%|███▍      | 3476/10000 [2:48:16<5:19:32,  2.94s/it]

Total reward after episode 3476 is 1043.0


 35%|███▍      | 3477/10000 [2:48:18<4:25:45,  2.44s/it]

Total reward after episode 3477 is 607.0


 35%|███▍      | 3478/10000 [2:48:20<4:22:48,  2.42s/it]

Total reward after episode 3478 is 1046.0


 35%|███▍      | 3479/10000 [2:48:26<6:24:41,  3.54s/it]

Total reward after episode 3479 is 2644.0


 35%|███▍      | 3480/10000 [2:48:30<6:28:40,  3.58s/it]

Total reward after episode 3480 is 1702.0


 35%|███▍      | 3481/10000 [2:48:34<6:35:00,  3.64s/it]

Total reward after episode 3481 is 1711.0


 35%|███▍      | 3482/10000 [2:48:40<8:14:04,  4.55s/it]

Total reward after episode 3482 is 2353.0


 35%|███▍      | 3483/10000 [2:48:44<7:46:55,  4.30s/it]

Total reward after episode 3483 is 1693.0


 35%|███▍      | 3484/10000 [2:48:48<7:42:24,  4.26s/it]

Total reward after episode 3484 is 1692.0


 35%|███▍      | 3485/10000 [2:48:51<6:56:37,  3.84s/it]

Total reward after episode 3485 is 1350.0


 35%|███▍      | 3486/10000 [2:48:58<8:35:47,  4.75s/it]

Total reward after episode 3486 is 2353.0


 35%|███▍      | 3487/10000 [2:49:05<9:57:00,  5.50s/it]

Total reward after episode 3487 is 2354.0


 35%|███▍      | 3488/10000 [2:49:12<10:44:54,  5.94s/it]

Total reward after episode 3488 is 2355.0


 35%|███▍      | 3489/10000 [2:49:13<8:15:19,  4.56s/it] 

Total reward after episode 3489 is 616.0


 35%|███▍      | 3490/10000 [2:49:22<10:16:54,  5.69s/it]

Total reward after episode 3490 is 3040.0


 35%|███▍      | 3491/10000 [2:49:23<7:55:10,  4.38s/it] 

Total reward after episode 3491 is 611.0


 35%|███▍      | 3492/10000 [2:49:26<7:03:17,  3.90s/it]

Total reward after episode 3492 is 1069.0


 35%|███▍      | 3493/10000 [2:49:30<7:09:35,  3.96s/it]

Total reward after episode 3493 is 1864.0


 35%|███▍      | 3494/10000 [2:49:36<8:08:24,  4.50s/it]

Total reward after episode 3494 is 2362.0


 35%|███▍      | 3495/10000 [2:49:41<8:31:45,  4.72s/it]

Total reward after episode 3495 is 2372.0


 35%|███▍      | 3496/10000 [2:49:43<7:15:59,  4.02s/it]

Total reward after episode 3496 is 1059.0


 35%|███▍      | 3497/10000 [2:49:47<7:00:26,  3.88s/it]

Total reward after episode 3497 is 1585.0


 35%|███▍      | 3498/10000 [2:49:49<6:07:32,  3.39s/it]

Total reward after episode 3498 is 1060.0


 35%|███▍      | 3499/10000 [2:49:50<5:00:19,  2.77s/it]

Total reward after episode 3499 is 608.0


 35%|███▌      | 3500/10000 [2:49:51<3:50:38,  2.13s/it]

Total reward after episode 3500 is 251.0


 35%|███▌      | 3501/10000 [2:49:52<3:24:02,  1.88s/it]

Total reward after episode 3501 is 611.0


 35%|███▌      | 3502/10000 [2:49:53<2:43:26,  1.51s/it]

Total reward after episode 3502 is 251.0


 35%|███▌      | 3503/10000 [2:49:57<4:06:47,  2.28s/it]

Total reward after episode 3503 is 1706.0


 35%|███▌      | 3504/10000 [2:50:07<8:13:52,  4.56s/it]

Total reward after episode 3504 is 2328.0


 35%|███▌      | 3505/10000 [2:50:10<7:25:19,  4.11s/it]

Total reward after episode 3505 is 1346.0


 35%|███▌      | 3506/10000 [2:50:11<5:31:53,  3.07s/it]

Total reward after episode 3506 is 250.0


 35%|███▌      | 3507/10000 [2:50:14<5:55:46,  3.29s/it]

Total reward after episode 3507 is 1691.0


 35%|███▌      | 3508/10000 [2:50:16<5:04:00,  2.81s/it]

Total reward after episode 3508 is 632.0


 35%|███▌      | 3509/10000 [2:50:20<5:33:41,  3.08s/it]

Total reward after episode 3509 is 1700.0


 35%|███▌      | 3510/10000 [2:50:22<4:51:54,  2.70s/it]

Total reward after episode 3510 is 615.0


 35%|███▌      | 3511/10000 [2:50:27<6:16:32,  3.48s/it]

Total reward after episode 3511 is 2369.0


 35%|███▌      | 3512/10000 [2:50:31<6:27:57,  3.59s/it]

Total reward after episode 3512 is 1727.0


 35%|███▌      | 3513/10000 [2:50:37<8:01:48,  4.46s/it]

Total reward after episode 3513 is 2356.0


 35%|███▌      | 3514/10000 [2:50:43<8:31:38,  4.73s/it]

Total reward after episode 3514 is 795.0


 35%|███▌      | 3515/10000 [2:50:47<8:09:13,  4.53s/it]

Total reward after episode 3515 is 1852.0


 35%|███▌      | 3516/10000 [2:50:53<9:04:41,  5.04s/it]

Total reward after episode 3516 is 2362.0


 35%|███▌      | 3517/10000 [2:51:00<10:00:52,  5.56s/it]

Total reward after episode 3517 is 2640.0


 35%|███▌      | 3518/10000 [2:51:02<8:25:16,  4.68s/it] 

Total reward after episode 3518 is 1044.0


 35%|███▌      | 3519/10000 [2:51:06<7:36:04,  4.22s/it]

Total reward after episode 3519 is 639.0


 35%|███▌      | 3520/10000 [2:51:09<7:19:02,  4.07s/it]

Total reward after episode 3520 is 1728.0


 35%|███▌      | 3521/10000 [2:51:13<7:20:04,  4.08s/it]

Total reward after episode 3521 is 1931.0


 35%|███▌      | 3522/10000 [2:51:17<7:21:19,  4.09s/it]

Total reward after episode 3522 is 1929.0


 35%|███▌      | 3523/10000 [2:51:21<7:12:35,  4.01s/it]

Total reward after episode 3523 is 1691.0


 35%|███▌      | 3524/10000 [2:51:25<6:54:06,  3.84s/it]

Total reward after episode 3524 is 1584.0


 35%|███▌      | 3525/10000 [2:51:33<9:24:16,  5.23s/it]

Total reward after episode 3525 is 2338.0


 35%|███▌      | 3526/10000 [2:51:36<8:15:56,  4.60s/it]

Total reward after episode 3526 is 1348.0


 35%|███▌      | 3527/10000 [2:51:39<7:06:00,  3.95s/it]

Total reward after episode 3527 is 1051.0


 35%|███▌      | 3528/10000 [2:51:40<5:40:11,  3.15s/it]

Total reward after episode 3528 is 607.0


 35%|███▌      | 3529/10000 [2:51:43<5:24:11,  3.01s/it]

Total reward after episode 3529 is 1048.0


 35%|███▌      | 3530/10000 [2:51:47<5:50:07,  3.25s/it]

Total reward after episode 3530 is 1712.0


 35%|███▌      | 3531/10000 [2:51:50<6:13:13,  3.46s/it]

Total reward after episode 3531 is 1849.0


 35%|███▌      | 3532/10000 [2:51:52<5:16:41,  2.94s/it]

Total reward after episode 3532 is 637.0


 35%|███▌      | 3533/10000 [2:51:55<4:57:34,  2.76s/it]

Total reward after episode 3533 is 1050.0


 35%|███▌      | 3534/10000 [2:51:59<5:36:25,  3.12s/it]

Total reward after episode 3534 is 1689.0


 35%|███▌      | 3535/10000 [2:52:13<11:34:40,  6.45s/it]

Total reward after episode 3535 is 2991.0


 35%|███▌      | 3536/10000 [2:52:14<8:47:17,  4.89s/it] 

Total reward after episode 3536 is 607.0


 35%|███▌      | 3537/10000 [2:52:15<6:51:43,  3.82s/it]

Total reward after episode 3537 is 611.0


 35%|███▌      | 3538/10000 [2:52:20<7:21:35,  4.10s/it]

Total reward after episode 3538 is 1923.0


 35%|███▌      | 3539/10000 [2:52:24<7:04:31,  3.94s/it]

Total reward after episode 3539 is 1701.0


 35%|███▌      | 3540/10000 [2:52:27<6:57:36,  3.88s/it]

Total reward after episode 3540 is 810.0


 35%|███▌      | 3541/10000 [2:52:30<6:23:06,  3.56s/it]

Total reward after episode 3541 is 1351.0


 35%|███▌      | 3542/10000 [2:52:31<4:48:25,  2.68s/it]

Total reward after episode 3542 is 252.0


 35%|███▌      | 3543/10000 [2:52:35<5:31:25,  3.08s/it]

Total reward after episode 3543 is 1722.0


 35%|███▌      | 3544/10000 [2:52:36<4:44:00,  2.64s/it]

Total reward after episode 3544 is 603.0


 35%|███▌      | 3545/10000 [2:52:38<4:08:59,  2.31s/it]

Total reward after episode 3545 is 737.0


 35%|███▌      | 3546/10000 [2:52:40<3:52:22,  2.16s/it]

Total reward after episode 3546 is 652.0


 35%|███▌      | 3547/10000 [2:52:44<4:51:50,  2.71s/it]

Total reward after episode 3547 is 1685.0


 35%|███▌      | 3548/10000 [2:52:47<5:14:07,  2.92s/it]

Total reward after episode 3548 is 1586.0


 35%|███▌      | 3549/10000 [2:52:51<5:41:03,  3.17s/it]

Total reward after episode 3549 is 1693.0


 36%|███▌      | 3550/10000 [2:52:56<6:45:01,  3.77s/it]

Total reward after episode 3550 is 2373.0


 36%|███▌      | 3551/10000 [2:52:57<5:24:10,  3.02s/it]

Total reward after episode 3551 is 608.0


 36%|███▌      | 3552/10000 [2:52:59<4:27:58,  2.49s/it]

Total reward after episode 3552 is 607.0


 36%|███▌      | 3553/10000 [2:53:04<6:07:19,  3.42s/it]

Total reward after episode 3553 is 2370.0


 36%|███▌      | 3554/10000 [2:53:18<11:40:58,  6.52s/it]

Total reward after episode 3554 is 2995.0


 36%|███▌      | 3555/10000 [2:53:21<9:43:56,  5.44s/it] 

Total reward after episode 3555 is 1041.0


 36%|███▌      | 3556/10000 [2:53:27<9:56:06,  5.55s/it]

Total reward after episode 3556 is 2365.0


 36%|███▌      | 3557/10000 [2:53:30<8:55:41,  4.99s/it]

Total reward after episode 3557 is 1694.0


 36%|███▌      | 3558/10000 [2:53:34<7:57:06,  4.44s/it]

Total reward after episode 3558 is 1327.0


 36%|███▌      | 3559/10000 [2:53:39<8:19:13,  4.65s/it]

Total reward after episode 3559 is 1917.0


 36%|███▌      | 3560/10000 [2:53:42<7:21:35,  4.11s/it]

Total reward after episode 3560 is 1045.0


 36%|███▌      | 3561/10000 [2:53:48<8:30:40,  4.76s/it]

Total reward after episode 3561 is 2641.0


 36%|███▌      | 3562/10000 [2:53:53<8:36:44,  4.82s/it]

Total reward after episode 3562 is 2374.0


 36%|███▌      | 3563/10000 [2:53:57<8:02:15,  4.50s/it]

Total reward after episode 3563 is 1697.0


 36%|███▌      | 3564/10000 [2:54:00<7:37:43,  4.27s/it]

Total reward after episode 3564 is 1344.0


 36%|███▌      | 3565/10000 [2:54:04<7:12:16,  4.03s/it]

Total reward after episode 3565 is 1698.0


 36%|███▌      | 3566/10000 [2:54:07<6:44:45,  3.77s/it]

Total reward after episode 3566 is 1587.0


 36%|███▌      | 3567/10000 [2:54:10<6:36:09,  3.70s/it]

Total reward after episode 3567 is 1583.0


 36%|███▌      | 3568/10000 [2:54:14<6:45:01,  3.78s/it]

Total reward after episode 3568 is 1863.0


 36%|███▌      | 3569/10000 [2:54:18<6:48:30,  3.81s/it]

Total reward after episode 3569 is 1689.0


 36%|███▌      | 3570/10000 [2:54:26<9:03:43,  5.07s/it]

Total reward after episode 3570 is 3048.0


 36%|███▌      | 3571/10000 [2:54:32<9:37:46,  5.39s/it]

Total reward after episode 3571 is 2359.0


 36%|███▌      | 3572/10000 [2:54:38<9:34:55,  5.37s/it]

Total reward after episode 3572 is 2367.0


 36%|███▌      | 3573/10000 [2:54:41<8:39:13,  4.85s/it]

Total reward after episode 3573 is 1723.0


 36%|███▌      | 3574/10000 [2:54:43<6:44:04,  3.77s/it]

Total reward after episode 3574 is 606.0


 36%|███▌      | 3575/10000 [2:54:46<6:43:44,  3.77s/it]

Total reward after episode 3575 is 1725.0


 36%|███▌      | 3576/10000 [2:54:50<6:41:20,  3.75s/it]

Total reward after episode 3576 is 1690.0


 36%|███▌      | 3577/10000 [2:54:51<5:04:53,  2.85s/it]

Total reward after episode 3577 is 243.0


 36%|███▌      | 3578/10000 [2:54:54<5:18:41,  2.98s/it]

Total reward after episode 3578 is 1586.0


 36%|███▌      | 3579/10000 [2:54:58<6:00:14,  3.37s/it]

Total reward after episode 3579 is 1930.0


 36%|███▌      | 3580/10000 [2:55:02<6:13:17,  3.49s/it]

Total reward after episode 3580 is 1706.0


 36%|███▌      | 3581/10000 [2:55:04<5:04:05,  2.84s/it]

Total reward after episode 3581 is 616.0


 36%|███▌      | 3582/10000 [2:55:07<5:16:38,  2.96s/it]

Total reward after episode 3582 is 1586.0


 36%|███▌      | 3583/10000 [2:55:07<4:02:44,  2.27s/it]

Total reward after episode 3583 is 252.0


 36%|███▌      | 3584/10000 [2:55:13<5:35:44,  3.14s/it]

Total reward after episode 3584 is 2365.0


 36%|███▌      | 3585/10000 [2:55:18<6:35:56,  3.70s/it]

Total reward after episode 3585 is 2371.0


 36%|███▌      | 3586/10000 [2:55:20<6:05:53,  3.42s/it]

Total reward after episode 3586 is 1339.0


 36%|███▌      | 3587/10000 [2:55:25<6:29:29,  3.64s/it]

Total reward after episode 3587 is 1931.0


 36%|███▌      | 3588/10000 [2:55:33<8:58:27,  5.04s/it]

Total reward after episode 3588 is 3044.0


 36%|███▌      | 3589/10000 [2:55:36<8:10:28,  4.59s/it]

Total reward after episode 3589 is 1581.0


 36%|███▌      | 3590/10000 [2:55:44<9:56:35,  5.58s/it]

Total reward after episode 3590 is 3049.0


 36%|███▌      | 3591/10000 [2:55:48<8:40:59,  4.88s/it]

Total reward after episode 3591 is 1337.0


 36%|███▌      | 3592/10000 [2:55:51<7:56:00,  4.46s/it]

Total reward after episode 3592 is 1689.0


 36%|███▌      | 3593/10000 [2:55:54<7:07:23,  4.00s/it]

Total reward after episode 3593 is 1350.0


 36%|███▌      | 3594/10000 [2:55:55<5:38:33,  3.17s/it]

Total reward after episode 3594 is 611.0


 36%|███▌      | 3595/10000 [2:56:05<9:06:09,  5.12s/it]

Total reward after episode 3595 is 2328.0


 36%|███▌      | 3596/10000 [2:56:06<7:11:00,  4.04s/it]

Total reward after episode 3596 is 607.0


 36%|███▌      | 3597/10000 [2:56:08<5:55:20,  3.33s/it]

Total reward after episode 3597 is 633.0


 36%|███▌      | 3598/10000 [2:56:14<7:20:28,  4.13s/it]

Total reward after episode 3598 is 2364.0


 36%|███▌      | 3599/10000 [2:56:15<5:47:32,  3.26s/it]

Total reward after episode 3599 is 608.0


 36%|███▌      | 3600/10000 [2:56:18<5:29:31,  3.09s/it]

Total reward after episode 3600 is 1339.0


 36%|███▌      | 3601/10000 [2:56:25<7:30:39,  4.23s/it]

Total reward after episode 3601 is 2634.0


 36%|███▌      | 3602/10000 [2:56:27<6:32:36,  3.68s/it]

Total reward after episode 3602 is 1049.0


 36%|███▌      | 3603/10000 [2:56:31<6:40:35,  3.76s/it]

Total reward after episode 3603 is 1853.0


 36%|███▌      | 3604/10000 [2:56:35<6:44:41,  3.80s/it]

Total reward after episode 3604 is 1866.0


 36%|███▌      | 3605/10000 [2:56:36<5:22:33,  3.03s/it]

Total reward after episode 3605 is 610.0


 36%|███▌      | 3606/10000 [2:56:42<6:37:52,  3.73s/it]

Total reward after episode 3606 is 2361.0


 36%|███▌      | 3607/10000 [2:56:45<6:17:54,  3.55s/it]

Total reward after episode 3607 is 1040.0


 36%|███▌      | 3608/10000 [2:56:48<6:22:34,  3.59s/it]

Total reward after episode 3608 is 1699.0


 36%|███▌      | 3609/10000 [2:56:55<7:58:44,  4.49s/it]

Total reward after episode 3609 is 1676.0


 36%|███▌      | 3610/10000 [2:56:56<6:14:30,  3.52s/it]

Total reward after episode 3610 is 609.0


 36%|███▌      | 3611/10000 [2:57:00<6:24:19,  3.61s/it]

Total reward after episode 3611 is 1693.0


 36%|███▌      | 3612/10000 [2:57:02<5:43:55,  3.23s/it]

Total reward after episode 3612 is 1049.0


 36%|███▌      | 3613/10000 [2:57:05<5:30:30,  3.10s/it]

Total reward after episode 3613 is 1340.0


 36%|███▌      | 3614/10000 [2:57:09<5:40:58,  3.20s/it]

Total reward after episode 3614 is 1337.0


 36%|███▌      | 3615/10000 [2:57:14<6:50:57,  3.86s/it]

Total reward after episode 3615 is 2368.0


 36%|███▌      | 3616/10000 [2:57:21<8:11:54,  4.62s/it]

Total reward after episode 3616 is 2354.0


 36%|███▌      | 3617/10000 [2:57:23<7:06:06,  4.01s/it]

Total reward after episode 3617 is 1056.0


 36%|███▌      | 3618/10000 [2:57:27<6:50:12,  3.86s/it]

Total reward after episode 3618 is 1697.0


 36%|███▌      | 3619/10000 [2:57:30<6:47:37,  3.83s/it]

Total reward after episode 3619 is 1864.0


 36%|███▌      | 3620/10000 [2:57:34<6:38:02,  3.74s/it]

Total reward after episode 3620 is 1695.0


 36%|███▌      | 3621/10000 [2:57:41<8:10:47,  4.62s/it]

Total reward after episode 3621 is 1306.0


 36%|███▌      | 3622/10000 [2:57:43<6:52:28,  3.88s/it]

Total reward after episode 3622 is 1059.0


 36%|███▌      | 3623/10000 [2:57:44<5:34:55,  3.15s/it]

Total reward after episode 3623 is 632.0


 36%|███▌      | 3624/10000 [2:57:49<6:16:37,  3.54s/it]

Total reward after episode 3624 is 1334.0


 36%|███▋      | 3625/10000 [2:57:55<7:32:52,  4.26s/it]

Total reward after episode 3625 is 2357.0


 36%|███▋      | 3626/10000 [2:58:02<9:11:52,  5.19s/it]

Total reward after episode 3626 is 2349.0


 36%|███▋      | 3627/10000 [2:58:06<8:36:59,  4.87s/it]

Total reward after episode 3627 is 1841.0


 36%|███▋      | 3628/10000 [2:58:10<8:20:50,  4.72s/it]

Total reward after episode 3628 is 1847.0


 36%|███▋      | 3629/10000 [2:58:12<6:32:56,  3.70s/it]

Total reward after episode 3629 is 609.0


 36%|███▋      | 3630/10000 [2:58:17<7:10:31,  4.06s/it]

Total reward after episode 3630 is 1689.0


 36%|███▋      | 3631/10000 [2:58:20<6:52:08,  3.88s/it]

Total reward after episode 3631 is 1695.0


 36%|███▋      | 3632/10000 [2:58:24<7:07:02,  4.02s/it]

Total reward after episode 3632 is 1929.0


 36%|███▋      | 3633/10000 [2:58:28<6:50:36,  3.87s/it]

Total reward after episode 3633 is 1342.0


 36%|███▋      | 3634/10000 [2:58:33<7:40:43,  4.34s/it]

Total reward after episode 3634 is 2361.0


 36%|███▋      | 3635/10000 [2:58:37<7:05:16,  4.01s/it]

Total reward after episode 3635 is 1590.0


 36%|███▋      | 3636/10000 [2:58:41<7:13:16,  4.08s/it]

Total reward after episode 3636 is 1859.0


 36%|███▋      | 3637/10000 [2:58:46<7:56:27,  4.49s/it]

Total reward after episode 3637 is 2362.0


 36%|███▋      | 3638/10000 [2:58:52<8:35:18,  4.86s/it]

Total reward after episode 3638 is 2646.0


 36%|███▋      | 3639/10000 [2:58:56<7:57:39,  4.51s/it]

Total reward after episode 3639 is 1696.0


 36%|███▋      | 3640/10000 [2:58:57<6:15:28,  3.54s/it]

Total reward after episode 3640 is 609.0


 36%|███▋      | 3641/10000 [2:59:07<9:30:32,  5.38s/it]

Total reward after episode 3641 is 3031.0


 36%|███▋      | 3642/10000 [2:59:10<8:09:23,  4.62s/it]

Total reward after episode 3642 is 1340.0


 36%|███▋      | 3643/10000 [2:59:15<8:37:20,  4.88s/it]

Total reward after episode 3643 is 2365.0


 36%|███▋      | 3644/10000 [2:59:21<9:08:03,  5.17s/it]

Total reward after episode 3644 is 2362.0


 36%|███▋      | 3645/10000 [2:59:24<8:16:59,  4.69s/it]

Total reward after episode 3645 is 1692.0


 36%|███▋      | 3646/10000 [2:59:30<8:30:05,  4.82s/it]

Total reward after episode 3646 is 2373.0


 36%|███▋      | 3647/10000 [2:59:35<8:42:11,  4.93s/it]

Total reward after episode 3647 is 2372.0


 36%|███▋      | 3648/10000 [2:59:42<9:57:42,  5.65s/it]

Total reward after episode 3648 is 2350.0


 36%|███▋      | 3649/10000 [2:59:46<9:16:23,  5.26s/it]

Total reward after episode 3649 is 1578.0


 36%|███▋      | 3650/10000 [2:59:50<8:19:43,  4.72s/it]

Total reward after episode 3650 is 811.0


 37%|███▋      | 3651/10000 [3:00:02<12:27:35,  7.06s/it]

Total reward after episode 3651 is 3004.0


 37%|███▋      | 3652/10000 [3:00:06<10:42:16,  6.07s/it]

Total reward after episode 3652 is 1580.0


 37%|███▋      | 3653/10000 [3:00:12<10:37:42,  6.03s/it]

Total reward after episode 3653 is 2363.0


 37%|███▋      | 3654/10000 [3:00:13<8:07:15,  4.61s/it] 

Total reward after episode 3654 is 610.0


 37%|███▋      | 3655/10000 [3:00:14<6:00:58,  3.41s/it]

Total reward after episode 3655 is 252.0


 37%|███▋      | 3656/10000 [3:00:28<11:38:12,  6.60s/it]

Total reward after episode 3656 is 2990.0


 37%|███▋      | 3657/10000 [3:00:36<12:08:56,  6.90s/it]

Total reward after episode 3657 is 2344.0


 37%|███▋      | 3658/10000 [3:00:42<11:48:56,  6.71s/it]

Total reward after episode 3658 is 2359.0


 37%|███▋      | 3659/10000 [3:00:47<10:52:35,  6.18s/it]

Total reward after episode 3659 is 2371.0


 37%|███▋      | 3660/10000 [3:00:51<9:46:59,  5.56s/it] 

Total reward after episode 3660 is 1694.0


 37%|███▋      | 3661/10000 [3:00:53<8:03:50,  4.58s/it]

Total reward after episode 3661 is 1046.0


 37%|███▋      | 3662/10000 [3:00:57<7:52:10,  4.47s/it]

Total reward after episode 3662 is 1934.0


 37%|███▋      | 3663/10000 [3:01:02<7:41:02,  4.37s/it]

Total reward after episode 3663 is 1691.0


 37%|███▋      | 3664/10000 [3:01:09<9:17:25,  5.28s/it]

Total reward after episode 3664 is 2618.0


 37%|███▋      | 3665/10000 [3:01:13<8:37:01,  4.90s/it]

Total reward after episode 3665 is 1692.0


 37%|███▋      | 3666/10000 [3:01:17<8:14:24,  4.68s/it]

Total reward after episode 3666 is 1932.0


 37%|███▋      | 3667/10000 [3:01:28<11:40:36,  6.64s/it]

Total reward after episode 3667 is 2307.0


 37%|███▋      | 3668/10000 [3:01:30<8:54:19,  5.06s/it] 

Total reward after episode 3668 is 612.0


 37%|███▋      | 3669/10000 [3:01:38<10:22:39,  5.90s/it]

Total reward after episode 3669 is 2345.0


 37%|███▋      | 3670/10000 [3:01:38<7:35:42,  4.32s/it] 

Total reward after episode 3670 is 249.0


 37%|███▋      | 3671/10000 [3:01:44<8:35:53,  4.89s/it]

Total reward after episode 3671 is 2360.0


 37%|███▋      | 3672/10000 [3:01:48<8:07:47,  4.63s/it]

Total reward after episode 3672 is 1925.0


 37%|███▋      | 3673/10000 [3:01:52<7:47:41,  4.44s/it]

Total reward after episode 3673 is 1690.0


 37%|███▋      | 3674/10000 [3:01:57<7:38:48,  4.35s/it]

Total reward after episode 3674 is 1700.0


 37%|███▋      | 3675/10000 [3:02:03<8:39:04,  4.92s/it]

Total reward after episode 3675 is 2357.0


 37%|███▋      | 3676/10000 [3:02:07<8:02:59,  4.58s/it]

Total reward after episode 3676 is 1869.0


 37%|███▋      | 3677/10000 [3:02:10<7:36:10,  4.33s/it]

Total reward after episode 3677 is 1719.0


 37%|███▋      | 3678/10000 [3:02:16<8:03:47,  4.59s/it]

Total reward after episode 3678 is 2373.0


 37%|███▋      | 3679/10000 [3:02:19<7:23:06,  4.21s/it]

Total reward after episode 3679 is 1589.0


 37%|███▋      | 3680/10000 [3:02:23<7:04:32,  4.03s/it]

Total reward after episode 3680 is 1324.0


 37%|███▋      | 3681/10000 [3:02:24<5:38:02,  3.21s/it]

Total reward after episode 3681 is 614.0


 37%|███▋      | 3682/10000 [3:02:35<9:48:25,  5.59s/it]

Total reward after episode 3682 is 3018.0


 37%|███▋      | 3683/10000 [3:02:38<8:28:18,  4.83s/it]

Total reward after episode 3683 is 1342.0


 37%|███▋      | 3684/10000 [3:02:44<9:17:51,  5.30s/it]

Total reward after episode 3684 is 2358.0


 37%|███▋      | 3685/10000 [3:02:46<7:11:35,  4.10s/it]

Total reward after episode 3685 is 612.0


 37%|███▋      | 3686/10000 [3:02:55<9:56:25,  5.67s/it]

Total reward after episode 3686 is 2332.0


 37%|███▋      | 3687/10000 [3:03:02<10:31:57,  6.01s/it]

Total reward after episode 3687 is 3053.0


 37%|███▋      | 3688/10000 [3:03:03<7:44:46,  4.42s/it] 

Total reward after episode 3688 is 238.0


 37%|███▋      | 3689/10000 [3:03:08<8:16:48,  4.72s/it]

Total reward after episode 3689 is 2366.0


 37%|███▋      | 3690/10000 [3:03:11<7:26:41,  4.25s/it]

Total reward after episode 3690 is 1347.0


 37%|███▋      | 3691/10000 [3:03:12<5:53:57,  3.37s/it]

Total reward after episode 3691 is 611.0


 37%|███▋      | 3692/10000 [3:03:19<7:27:08,  4.25s/it]

Total reward after episode 3692 is 2638.0


 37%|███▋      | 3693/10000 [3:03:23<7:10:39,  4.10s/it]

Total reward after episode 3693 is 1695.0


 37%|███▋      | 3694/10000 [3:03:25<6:24:52,  3.66s/it]

Total reward after episode 3694 is 1052.0


 37%|███▋      | 3695/10000 [3:03:26<4:53:38,  2.79s/it]

Total reward after episode 3695 is 230.0


 37%|███▋      | 3696/10000 [3:03:30<5:33:00,  3.17s/it]

Total reward after episode 3696 is 1931.0


 37%|███▋      | 3697/10000 [3:03:34<6:00:40,  3.43s/it]

Total reward after episode 3697 is 1703.0


 37%|███▋      | 3698/10000 [3:03:38<6:09:16,  3.52s/it]

Total reward after episode 3698 is 1036.0


 37%|███▋      | 3699/10000 [3:03:52<11:40:24,  6.67s/it]

Total reward after episode 3699 is 2993.0


 37%|███▋      | 3700/10000 [3:03:58<11:31:43,  6.59s/it]

Total reward after episode 3700 is 2360.0


 37%|███▋      | 3701/10000 [3:04:01<9:34:24,  5.47s/it] 

Total reward after episode 3701 is 1350.0


 37%|███▋      | 3702/10000 [3:04:02<7:22:32,  4.22s/it]

Total reward after episode 3702 is 613.0


 37%|███▋      | 3703/10000 [3:04:07<7:52:31,  4.50s/it]

Total reward after episode 3703 is 2371.0


 37%|███▋      | 3704/10000 [3:04:13<8:25:45,  4.82s/it]

Total reward after episode 3704 is 2368.0


 37%|███▋      | 3705/10000 [3:04:16<7:17:14,  4.17s/it]

Total reward after episode 3705 is 1048.0


 37%|███▋      | 3706/10000 [3:04:22<8:12:16,  4.69s/it]

Total reward after episode 3706 is 2650.0


 37%|███▋      | 3707/10000 [3:04:27<8:38:27,  4.94s/it]

Total reward after episode 3707 is 2366.0


 37%|███▋      | 3708/10000 [3:04:33<9:14:52,  5.29s/it]

Total reward after episode 3708 is 2358.0


 37%|███▋      | 3709/10000 [3:04:36<7:50:08,  4.48s/it]

Total reward after episode 3709 is 1046.0


 37%|███▋      | 3710/10000 [3:04:38<6:38:24,  3.80s/it]

Total reward after episode 3710 is 1053.0


 37%|███▋      | 3711/10000 [3:04:41<6:09:59,  3.53s/it]

Total reward after episode 3711 is 1441.0


 37%|███▋      | 3712/10000 [3:04:47<7:23:49,  4.23s/it]

Total reward after episode 3712 is 2366.0


 37%|███▋      | 3713/10000 [3:04:51<7:20:07,  4.20s/it]

Total reward after episode 3713 is 1692.0


 37%|███▋      | 3714/10000 [3:04:54<6:33:42,  3.76s/it]

Total reward after episode 3714 is 1048.0


 37%|███▋      | 3715/10000 [3:04:55<5:15:48,  3.01s/it]

Total reward after episode 3715 is 611.0


 37%|███▋      | 3716/10000 [3:04:59<5:51:58,  3.36s/it]

Total reward after episode 3716 is 1691.0


 37%|███▋      | 3717/10000 [3:05:02<5:49:58,  3.34s/it]

Total reward after episode 3717 is 1584.0


 37%|███▋      | 3718/10000 [3:05:05<5:27:00,  3.12s/it]

Total reward after episode 3718 is 1050.0


 37%|███▋      | 3719/10000 [3:05:09<5:39:06,  3.24s/it]

Total reward after episode 3719 is 1703.0


 37%|███▋      | 3720/10000 [3:05:17<8:28:28,  4.86s/it]

Total reward after episode 3720 is 3040.0


 37%|███▋      | 3721/10000 [3:05:24<9:43:29,  5.58s/it]

Total reward after episode 3721 is 3053.0


 37%|███▋      | 3722/10000 [3:05:27<8:19:25,  4.77s/it]

Total reward after episode 3722 is 1045.0


 37%|███▋      | 3723/10000 [3:05:28<6:10:42,  3.54s/it]

Total reward after episode 3723 is 240.0


 37%|███▋      | 3724/10000 [3:05:32<6:11:00,  3.55s/it]

Total reward after episode 3724 is 1690.0


 37%|███▋      | 3725/10000 [3:05:37<6:57:52,  4.00s/it]

Total reward after episode 3725 is 1854.0


 37%|███▋      | 3726/10000 [3:05:42<7:30:19,  4.31s/it]

Total reward after episode 3726 is 1940.0


 37%|███▋      | 3727/10000 [3:05:44<6:25:02,  3.68s/it]

Total reward after episode 3727 is 1050.0


 37%|███▋      | 3728/10000 [3:05:46<5:37:40,  3.23s/it]

Total reward after episode 3728 is 733.0


 37%|███▋      | 3729/10000 [3:05:49<5:24:37,  3.11s/it]

Total reward after episode 3729 is 1328.0


 37%|███▋      | 3730/10000 [3:05:53<5:44:54,  3.30s/it]

Total reward after episode 3730 is 1860.0


 37%|███▋      | 3731/10000 [3:05:56<5:55:26,  3.40s/it]

Total reward after episode 3731 is 1695.0


 37%|███▋      | 3732/10000 [3:06:00<6:04:50,  3.49s/it]

Total reward after episode 3732 is 1692.0


 37%|███▋      | 3733/10000 [3:06:08<8:12:19,  4.71s/it]

Total reward after episode 3733 is 2345.0


 37%|███▋      | 3734/10000 [3:06:10<7:07:12,  4.09s/it]

Total reward after episode 3734 is 1046.0


 37%|███▋      | 3735/10000 [3:06:13<6:37:38,  3.81s/it]

Total reward after episode 3735 is 1589.0


 37%|███▋      | 3736/10000 [3:06:16<6:01:00,  3.46s/it]

Total reward after episode 3736 is 1047.0


 37%|███▋      | 3737/10000 [3:06:22<7:29:03,  4.30s/it]

Total reward after episode 3737 is 2358.0


 37%|███▋      | 3738/10000 [3:06:26<7:11:07,  4.13s/it]

Total reward after episode 3738 is 1701.0


 37%|███▋      | 3739/10000 [3:06:29<6:32:30,  3.76s/it]

Total reward after episode 3739 is 1350.0


 37%|███▋      | 3740/10000 [3:06:33<6:54:49,  3.98s/it]

Total reward after episode 3740 is 1925.0


 37%|███▋      | 3741/10000 [3:06:38<7:14:10,  4.16s/it]

Total reward after episode 3741 is 1690.0


 37%|███▋      | 3742/10000 [3:06:39<5:42:50,  3.29s/it]

Total reward after episode 3742 is 611.0


 37%|███▋      | 3743/10000 [3:06:43<5:59:11,  3.44s/it]

Total reward after episode 3743 is 1863.0


 37%|███▋      | 3744/10000 [3:06:50<7:52:14,  4.53s/it]

Total reward after episode 3744 is 2346.0


 37%|███▋      | 3745/10000 [3:06:51<6:10:22,  3.55s/it]

Total reward after episode 3745 is 610.0


 37%|███▋      | 3746/10000 [3:06:57<7:11:47,  4.14s/it]

Total reward after episode 3746 is 2369.0


 37%|███▋      | 3747/10000 [3:07:03<8:08:15,  4.69s/it]

Total reward after episode 3747 is 2361.0


 37%|███▋      | 3748/10000 [3:07:07<7:40:38,  4.42s/it]

Total reward after episode 3748 is 1695.0


 37%|███▋      | 3749/10000 [3:07:15<9:50:21,  5.67s/it]

Total reward after episode 3749 is 3041.0


 38%|███▊      | 3750/10000 [3:07:22<10:11:46,  5.87s/it]

Total reward after episode 3750 is 2356.0


 38%|███▊      | 3751/10000 [3:07:23<7:55:15,  4.56s/it] 

Total reward after episode 3751 is 738.0


 38%|███▊      | 3752/10000 [3:07:27<7:24:52,  4.27s/it]

Total reward after episode 3752 is 1580.0


 38%|███▊      | 3753/10000 [3:07:31<7:29:46,  4.32s/it]

Total reward after episode 3753 is 1863.0


 38%|███▊      | 3754/10000 [3:07:39<9:07:48,  5.26s/it]

Total reward after episode 3754 is 3052.0


 38%|███▊      | 3755/10000 [3:07:40<7:13:27,  4.16s/it]

Total reward after episode 3755 is 629.0


 38%|███▊      | 3756/10000 [3:07:46<7:58:33,  4.60s/it]

Total reward after episode 3756 is 1420.0


 38%|███▊      | 3757/10000 [3:07:55<10:32:58,  6.08s/it]

Total reward after episode 3757 is 3034.0


 38%|███▊      | 3758/10000 [3:07:56<7:42:39,  4.45s/it] 

Total reward after episode 3758 is 251.0


 38%|███▊      | 3759/10000 [3:08:01<8:16:58,  4.78s/it]

Total reward after episode 3759 is 2370.0


 38%|███▊      | 3760/10000 [3:08:09<9:40:46,  5.58s/it]

Total reward after episode 3760 is 2631.0


 38%|███▊      | 3761/10000 [3:08:11<8:06:20,  4.68s/it]

Total reward after episode 3761 is 1046.0


 38%|███▊      | 3762/10000 [3:08:15<7:42:47,  4.45s/it]

Total reward after episode 3762 is 1865.0


 38%|███▊      | 3763/10000 [3:08:17<6:03:46,  3.50s/it]

Total reward after episode 3763 is 606.0


 38%|███▊      | 3764/10000 [3:08:18<5:01:23,  2.90s/it]

Total reward after episode 3764 is 654.0


 38%|███▊      | 3765/10000 [3:08:25<7:07:15,  4.11s/it]

Total reward after episode 3765 is 3057.0


 38%|███▊      | 3766/10000 [3:08:26<5:38:26,  3.26s/it]

Total reward after episode 3766 is 612.0


 38%|███▊      | 3767/10000 [3:08:34<8:02:25,  4.64s/it]

Total reward after episode 3767 is 2346.0


 38%|███▊      | 3768/10000 [3:08:38<7:25:46,  4.29s/it]

Total reward after episode 3768 is 1694.0


 38%|███▊      | 3769/10000 [3:08:45<8:45:59,  5.06s/it]

Total reward after episode 3769 is 2354.0


 38%|███▊      | 3770/10000 [3:08:50<9:08:11,  5.28s/it]

Total reward after episode 3770 is 2365.0


 38%|███▊      | 3771/10000 [3:08:52<7:05:33,  4.10s/it]

Total reward after episode 3771 is 615.0


 38%|███▊      | 3772/10000 [3:08:53<5:37:35,  3.25s/it]

Total reward after episode 3772 is 612.0


 38%|███▊      | 3773/10000 [3:08:56<5:30:00,  3.18s/it]

Total reward after episode 3773 is 1431.0


 38%|███▊      | 3774/10000 [3:08:58<5:01:56,  2.91s/it]

Total reward after episode 3774 is 1052.0


 38%|███▊      | 3775/10000 [3:09:02<5:29:44,  3.18s/it]

Total reward after episode 3775 is 1864.0


 38%|███▊      | 3776/10000 [3:09:05<5:22:19,  3.11s/it]

Total reward after episode 3776 is 1433.0


 38%|███▊      | 3777/10000 [3:09:08<5:24:15,  3.13s/it]

Total reward after episode 3777 is 1346.0


 38%|███▊      | 3778/10000 [3:09:11<5:18:52,  3.08s/it]

Total reward after episode 3778 is 1341.0


 38%|███▊      | 3779/10000 [3:09:14<5:15:57,  3.05s/it]

Total reward after episode 3779 is 1428.0


 38%|███▊      | 3780/10000 [3:09:19<6:09:34,  3.57s/it]

Total reward after episode 3780 is 1922.0


 38%|███▊      | 3781/10000 [3:09:22<5:54:35,  3.42s/it]

Total reward after episode 3781 is 1424.0


 38%|███▊      | 3782/10000 [3:09:23<4:49:24,  2.79s/it]

Total reward after episode 3782 is 612.0


 38%|███▊      | 3783/10000 [3:09:25<4:03:24,  2.35s/it]

Total reward after episode 3783 is 611.0


 38%|███▊      | 3784/10000 [3:09:28<4:42:01,  2.72s/it]

Total reward after episode 3784 is 1342.0


 38%|███▊      | 3785/10000 [3:09:33<5:44:26,  3.33s/it]

Total reward after episode 3785 is 1919.0


 38%|███▊      | 3786/10000 [3:09:38<6:35:10,  3.82s/it]

Total reward after episode 3786 is 2374.0


 38%|███▊      | 3787/10000 [3:09:45<8:16:22,  4.79s/it]

Total reward after episode 3787 is 2731.0


 38%|███▊      | 3788/10000 [3:09:49<7:57:57,  4.62s/it]

Total reward after episode 3788 is 1583.0


 38%|███▊      | 3789/10000 [3:09:53<7:42:24,  4.47s/it]

Total reward after episode 3789 is 1933.0


 38%|███▊      | 3790/10000 [3:09:57<7:16:09,  4.21s/it]

Total reward after episode 3790 is 1038.0


 38%|███▊      | 3791/10000 [3:10:01<7:10:06,  4.16s/it]

Total reward after episode 3791 is 1694.0


 38%|███▊      | 3792/10000 [3:10:05<6:53:04,  3.99s/it]

Total reward after episode 3792 is 1717.0


 38%|███▊      | 3793/10000 [3:10:08<6:22:32,  3.70s/it]

Total reward after episode 3793 is 1432.0


 38%|███▊      | 3794/10000 [3:10:11<6:17:02,  3.65s/it]

Total reward after episode 3794 is 1585.0


 38%|███▊      | 3795/10000 [3:10:15<6:13:44,  3.61s/it]

Total reward after episode 3795 is 1695.0


 38%|███▊      | 3796/10000 [3:10:21<7:24:18,  4.30s/it]

Total reward after episode 3796 is 2361.0


 38%|███▊      | 3797/10000 [3:10:27<8:31:27,  4.95s/it]

Total reward after episode 3797 is 2357.0


 38%|███▊      | 3798/10000 [3:10:31<8:14:53,  4.79s/it]

Total reward after episode 3798 is 1931.0


 38%|███▊      | 3799/10000 [3:10:35<7:37:34,  4.43s/it]

Total reward after episode 3799 is 1715.0


 38%|███▊      | 3800/10000 [3:10:41<8:12:00,  4.76s/it]

Total reward after episode 3800 is 2361.0


 38%|███▊      | 3801/10000 [3:10:45<7:48:14,  4.53s/it]

Total reward after episode 3801 is 1888.0


 38%|███▊      | 3802/10000 [3:10:55<10:43:22,  6.23s/it]

Total reward after episode 3802 is 3028.0


 38%|███▊      | 3803/10000 [3:10:57<8:45:41,  5.09s/it] 

Total reward after episode 3803 is 1051.0


 38%|███▊      | 3804/10000 [3:11:01<8:09:57,  4.74s/it]

Total reward after episode 3804 is 1929.0


 38%|███▊      | 3805/10000 [3:11:05<7:37:11,  4.43s/it]

Total reward after episode 3805 is 1694.0


 38%|███▊      | 3806/10000 [3:11:11<8:41:05,  5.05s/it]

Total reward after episode 3806 is 1667.0


 38%|███▊      | 3807/10000 [3:11:13<6:54:41,  4.02s/it]

Total reward after episode 3807 is 632.0


 38%|███▊      | 3808/10000 [3:11:21<9:10:24,  5.33s/it]

Total reward after episode 3808 is 2635.0


 38%|███▊      | 3809/10000 [3:11:24<7:53:33,  4.59s/it]

Total reward after episode 3809 is 1350.0


 38%|███▊      | 3810/10000 [3:11:30<8:24:10,  4.89s/it]

Total reward after episode 3810 is 2361.0


 38%|███▊      | 3811/10000 [3:11:38<10:20:24,  6.01s/it]

Total reward after episode 3811 is 3038.0


 38%|███▊      | 3812/10000 [3:11:41<8:29:12,  4.94s/it] 

Total reward after episode 3812 is 1049.0


 38%|███▊      | 3813/10000 [3:11:50<10:35:56,  6.17s/it]

Total reward after episode 3813 is 3040.0


 38%|███▊      | 3814/10000 [3:11:56<10:50:10,  6.31s/it]

Total reward after episode 3814 is 3061.0


 38%|███▊      | 3815/10000 [3:12:04<11:27:16,  6.67s/it]

Total reward after episode 3815 is 3053.0


 38%|███▊      | 3816/10000 [3:12:15<13:33:41,  7.89s/it]

Total reward after episode 3816 is 3017.0


 38%|███▊      | 3817/10000 [3:12:20<12:14:06,  7.12s/it]

Total reward after episode 3817 is 2371.0


 38%|███▊      | 3818/10000 [3:12:23<9:58:09,  5.81s/it] 

Total reward after episode 3818 is 1044.0


 38%|███▊      | 3819/10000 [3:12:28<9:25:47,  5.49s/it]

Total reward after episode 3819 is 1925.0


 38%|███▊      | 3820/10000 [3:12:36<11:00:02,  6.41s/it]

Total reward after episode 3820 is 3043.0


 38%|███▊      | 3821/10000 [3:12:40<9:53:59,  5.77s/it] 

Total reward after episode 3821 is 1720.0


 38%|███▊      | 3822/10000 [3:12:46<9:52:26,  5.75s/it]

Total reward after episode 3822 is 2364.0


 38%|███▊      | 3823/10000 [3:12:52<9:47:20,  5.71s/it]

Total reward after episode 3823 is 2368.0


 38%|███▊      | 3824/10000 [3:13:00<10:56:02,  6.37s/it]

Total reward after episode 3824 is 3049.0


 38%|███▊      | 3825/10000 [3:13:01<8:26:15,  4.92s/it] 

Total reward after episode 3825 is 654.0


 38%|███▊      | 3826/10000 [3:13:04<7:08:28,  4.16s/it]

Total reward after episode 3826 is 1076.0


 38%|███▊      | 3827/10000 [3:13:08<7:02:43,  4.11s/it]

Total reward after episode 3827 is 1929.0


 38%|███▊      | 3828/10000 [3:13:13<7:37:05,  4.44s/it]

Total reward after episode 3828 is 2370.0


 38%|███▊      | 3829/10000 [3:13:18<8:10:19,  4.77s/it]

Total reward after episode 3829 is 2365.0


 38%|███▊      | 3830/10000 [3:13:21<7:20:21,  4.28s/it]

Total reward after episode 3830 is 1347.0


 38%|███▊      | 3831/10000 [3:13:28<8:34:11,  5.00s/it]

Total reward after episode 3831 is 3055.0


 38%|███▊      | 3832/10000 [3:13:34<8:49:21,  5.15s/it]

Total reward after episode 3832 is 2363.0


 38%|███▊      | 3833/10000 [3:13:35<6:58:13,  4.07s/it]

Total reward after episode 3833 is 654.0


 38%|███▊      | 3834/10000 [3:13:40<7:07:00,  4.16s/it]

Total reward after episode 3834 is 1926.0


 38%|███▊      | 3835/10000 [3:13:44<7:20:59,  4.29s/it]

Total reward after episode 3835 is 1926.0


 38%|███▊      | 3836/10000 [3:13:48<7:12:52,  4.21s/it]

Total reward after episode 3836 is 1688.0


 38%|███▊      | 3837/10000 [3:13:54<8:05:17,  4.72s/it]

Total reward after episode 3837 is 2363.0


 38%|███▊      | 3838/10000 [3:14:05<11:12:34,  6.55s/it]

Total reward after episode 3838 is 3023.0


 38%|███▊      | 3839/10000 [3:14:12<11:22:34,  6.65s/it]

Total reward after episode 3839 is 3060.0


 38%|███▊      | 3840/10000 [3:14:17<10:47:12,  6.30s/it]

Total reward after episode 3840 is 2367.0


 38%|███▊      | 3841/10000 [3:14:18<7:51:56,  4.60s/it] 

Total reward after episode 3841 is 252.0


 38%|███▊      | 3842/10000 [3:14:29<11:02:07,  6.45s/it]

Total reward after episode 3842 is 3024.0


 38%|███▊      | 3843/10000 [3:14:41<13:58:11,  8.17s/it]

Total reward after episode 3843 is 3011.0


 38%|███▊      | 3844/10000 [3:14:42<10:31:21,  6.15s/it]

Total reward after episode 3844 is 608.0


 38%|███▊      | 3845/10000 [3:14:46<9:30:26,  5.56s/it] 

Total reward after episode 3845 is 1933.0


 38%|███▊      | 3846/10000 [3:14:49<8:05:33,  4.73s/it]

Total reward after episode 3846 is 1340.0


 38%|███▊      | 3847/10000 [3:14:57<9:32:08,  5.58s/it]

Total reward after episode 3847 is 3053.0


 38%|███▊      | 3848/10000 [3:15:02<9:33:22,  5.59s/it]

Total reward after episode 3848 is 2367.0


 38%|███▊      | 3849/10000 [3:15:08<9:44:43,  5.70s/it]

Total reward after episode 3849 is 2363.0


 38%|███▊      | 3850/10000 [3:15:13<9:02:52,  5.30s/it]

Total reward after episode 3850 is 1929.0


 39%|███▊      | 3851/10000 [3:15:14<6:43:24,  3.94s/it]

Total reward after episode 3851 is 234.0


 39%|███▊      | 3852/10000 [3:15:16<6:09:03,  3.60s/it]

Total reward after episode 3852 is 1350.0


 39%|███▊      | 3853/10000 [3:15:31<11:35:20,  6.79s/it]

Total reward after episode 3853 is 2991.0


 39%|███▊      | 3854/10000 [3:15:36<10:43:38,  6.28s/it]

Total reward after episode 3854 is 2374.0


 39%|███▊      | 3855/10000 [3:15:41<10:11:56,  5.98s/it]

Total reward after episode 3855 is 1919.0


 39%|███▊      | 3856/10000 [3:15:44<8:49:43,  5.17s/it] 

Total reward after episode 3856 is 1589.0


 39%|███▊      | 3857/10000 [3:15:47<7:31:20,  4.41s/it]

Total reward after episode 3857 is 1049.0


 39%|███▊      | 3858/10000 [3:15:50<6:57:00,  4.07s/it]

Total reward after episode 3858 is 1586.0


 39%|███▊      | 3859/10000 [3:15:54<6:59:01,  4.09s/it]

Total reward after episode 3859 is 1863.0


 39%|███▊      | 3860/10000 [3:15:57<6:02:59,  3.55s/it]

Total reward after episode 3860 is 1052.0


 39%|███▊      | 3861/10000 [3:16:01<6:20:57,  3.72s/it]

Total reward after episode 3861 is 1929.0


 39%|███▊      | 3862/10000 [3:16:03<5:39:20,  3.32s/it]

Total reward after episode 3862 is 1051.0


 39%|███▊      | 3863/10000 [3:16:07<6:08:24,  3.60s/it]

Total reward after episode 3863 is 1697.0


 39%|███▊      | 3864/10000 [3:16:13<7:22:51,  4.33s/it]

Total reward after episode 3864 is 2362.0


 39%|███▊      | 3865/10000 [3:16:16<6:26:33,  3.78s/it]

Total reward after episode 3865 is 1057.0


 39%|███▊      | 3866/10000 [3:16:20<6:23:27,  3.75s/it]

Total reward after episode 3866 is 1716.0


 39%|███▊      | 3867/10000 [3:16:23<6:26:32,  3.78s/it]

Total reward after episode 3867 is 1703.0


 39%|███▊      | 3868/10000 [3:16:26<5:39:29,  3.32s/it]

Total reward after episode 3868 is 1055.0


 39%|███▊      | 3869/10000 [3:16:31<6:33:20,  3.85s/it]

Total reward after episode 3869 is 2373.0


 39%|███▊      | 3870/10000 [3:16:39<9:02:08,  5.31s/it]

Total reward after episode 3870 is 2335.0


 39%|███▊      | 3871/10000 [3:16:40<6:38:44,  3.90s/it]

Total reward after episode 3871 is 250.0


 39%|███▊      | 3872/10000 [3:16:44<6:44:36,  3.96s/it]

Total reward after episode 3872 is 1927.0


 39%|███▊      | 3873/10000 [3:16:50<7:33:04,  4.44s/it]

Total reward after episode 3873 is 2365.0


 39%|███▊      | 3874/10000 [3:16:57<9:09:42,  5.38s/it]

Total reward after episode 3874 is 3046.0


 39%|███▉      | 3875/10000 [3:17:01<8:23:35,  4.93s/it]

Total reward after episode 3875 is 1694.0


 39%|███▉      | 3876/10000 [3:17:02<6:31:59,  3.84s/it]

Total reward after episode 3876 is 606.0


 39%|███▉      | 3877/10000 [3:17:05<6:06:37,  3.59s/it]

Total reward after episode 3877 is 1045.0


 39%|███▉      | 3878/10000 [3:17:09<6:10:32,  3.63s/it]

Total reward after episode 3878 is 1695.0


 39%|███▉      | 3879/10000 [3:17:13<6:02:19,  3.55s/it]

Total reward after episode 3879 is 1584.0


 39%|███▉      | 3880/10000 [3:17:18<7:10:37,  4.22s/it]

Total reward after episode 3880 is 2362.0


 39%|███▉      | 3881/10000 [3:17:21<6:31:47,  3.84s/it]

Total reward after episode 3881 is 1344.0


 39%|███▉      | 3882/10000 [3:17:27<7:33:12,  4.44s/it]

Total reward after episode 3882 is 2365.0


 39%|███▉      | 3883/10000 [3:17:29<6:03:56,  3.57s/it]

Total reward after episode 3883 is 654.0


 39%|███▉      | 3884/10000 [3:17:30<5:01:44,  2.96s/it]

Total reward after episode 3884 is 654.0


 39%|███▉      | 3885/10000 [3:17:36<6:18:44,  3.72s/it]

Total reward after episode 3885 is 2367.0


 39%|███▉      | 3886/10000 [3:17:44<8:40:34,  5.11s/it]

Total reward after episode 3886 is 3046.0


 39%|███▉      | 3887/10000 [3:17:46<7:18:22,  4.30s/it]

Total reward after episode 3887 is 1056.0


 39%|███▉      | 3888/10000 [3:17:48<5:54:12,  3.48s/it]

Total reward after episode 3888 is 654.0


 39%|███▉      | 3889/10000 [3:17:51<5:51:59,  3.46s/it]

Total reward after episode 3889 is 1585.0


 39%|███▉      | 3890/10000 [3:17:55<6:02:41,  3.56s/it]

Total reward after episode 3890 is 1712.0


 39%|███▉      | 3891/10000 [3:17:59<6:15:27,  3.69s/it]

Total reward after episode 3891 is 1690.0


 39%|███▉      | 3892/10000 [3:18:03<6:26:25,  3.80s/it]

Total reward after episode 3892 is 1861.0


 39%|███▉      | 3893/10000 [3:18:05<5:20:03,  3.14s/it]

Total reward after episode 3893 is 653.0


 39%|███▉      | 3894/10000 [3:18:25<14:03:17,  8.29s/it]

Total reward after episode 3894 is 2936.0


 39%|███▉      | 3895/10000 [3:18:32<13:33:01,  7.99s/it]

Total reward after episode 3895 is 2352.0


 39%|███▉      | 3896/10000 [3:18:39<12:58:56,  7.66s/it]

Total reward after episode 3896 is 3060.0


 39%|███▉      | 3897/10000 [3:18:43<10:49:19,  6.38s/it]

Total reward after episode 3897 is 1325.0


 39%|███▉      | 3898/10000 [3:18:46<9:18:03,  5.49s/it] 

Total reward after episode 3898 is 1586.0


 39%|███▉      | 3899/10000 [3:18:53<10:05:12,  5.95s/it]

Total reward after episode 3899 is 3059.0


 39%|███▉      | 3900/10000 [3:18:55<7:44:01,  4.56s/it] 

Total reward after episode 3900 is 613.0


 39%|███▉      | 3901/10000 [3:18:59<7:27:45,  4.40s/it]

Total reward after episode 3901 is 1861.0


 39%|███▉      | 3902/10000 [3:19:04<8:02:07,  4.74s/it]

Total reward after episode 3902 is 2368.0


 39%|███▉      | 3903/10000 [3:19:08<7:49:01,  4.62s/it]

Total reward after episode 3903 is 1862.0


 39%|███▉      | 3904/10000 [3:19:13<7:49:26,  4.62s/it]

Total reward after episode 3904 is 1930.0


 39%|███▉      | 3905/10000 [3:19:20<8:51:22,  5.23s/it]

Total reward after episode 3905 is 2356.0


 39%|███▉      | 3906/10000 [3:19:23<7:53:45,  4.66s/it]

Total reward after episode 3906 is 1585.0


 39%|███▉      | 3907/10000 [3:19:37<12:31:30,  7.40s/it]

Total reward after episode 3907 is 2996.0


 39%|███▉      | 3908/10000 [3:19:41<10:43:14,  6.34s/it]

Total reward after episode 3908 is 1863.0


 39%|███▉      | 3909/10000 [3:19:45<9:56:41,  5.88s/it] 

Total reward after episode 3909 is 1682.0


 39%|███▉      | 3910/10000 [3:19:47<7:37:41,  4.51s/it]

Total reward after episode 3910 is 614.0


 39%|███▉      | 3911/10000 [3:19:49<6:15:59,  3.70s/it]

Total reward after episode 3911 is 737.0


 39%|███▉      | 3912/10000 [3:19:50<5:04:37,  3.00s/it]

Total reward after episode 3912 is 611.0


 39%|███▉      | 3913/10000 [3:19:59<8:22:27,  4.95s/it]

Total reward after episode 3913 is 3034.0


 39%|███▉      | 3914/10000 [3:20:05<8:41:27,  5.14s/it]

Total reward after episode 3914 is 2364.0


 39%|███▉      | 3915/10000 [3:20:14<10:45:58,  6.37s/it]

Total reward after episode 3915 is 3038.0


 39%|███▉      | 3916/10000 [3:20:18<9:18:00,  5.50s/it] 

Total reward after episode 3916 is 1321.0


 39%|███▉      | 3917/10000 [3:20:21<8:16:14,  4.89s/it]

Total reward after episode 3917 is 1579.0


 39%|███▉      | 3918/10000 [3:20:27<8:35:44,  5.09s/it]

Total reward after episode 3918 is 1680.0


 39%|███▉      | 3919/10000 [3:20:30<7:51:06,  4.65s/it]

Total reward after episode 3919 is 1728.0


 39%|███▉      | 3920/10000 [3:20:36<8:19:04,  4.93s/it]

Total reward after episode 3920 is 2653.0


 39%|███▉      | 3921/10000 [3:20:39<7:31:40,  4.46s/it]

Total reward after episode 3921 is 1438.0


 39%|███▉      | 3922/10000 [3:20:46<8:48:30,  5.22s/it]

Total reward after episode 3922 is 2651.0


 39%|███▉      | 3923/10000 [3:20:50<8:06:53,  4.81s/it]

Total reward after episode 3923 is 1703.0


 39%|███▉      | 3924/10000 [3:20:54<7:38:55,  4.53s/it]

Total reward after episode 3924 is 1719.0


 39%|███▉      | 3925/10000 [3:21:02<9:10:34,  5.44s/it]

Total reward after episode 3925 is 3054.0


 39%|███▉      | 3926/10000 [3:21:06<8:39:41,  5.13s/it]

Total reward after episode 3926 is 1927.0


 39%|███▉      | 3927/10000 [3:21:16<11:17:52,  6.70s/it]

Total reward after episode 3927 is 2318.0


 39%|███▉      | 3928/10000 [3:21:21<10:00:25,  5.93s/it]

Total reward after episode 3928 is 1927.0


 39%|███▉      | 3929/10000 [3:21:25<9:13:22,  5.47s/it] 

Total reward after episode 3929 is 1694.0


 39%|███▉      | 3930/10000 [3:21:32<10:03:34,  5.97s/it]

Total reward after episode 3930 is 3057.0


 39%|███▉      | 3931/10000 [3:21:35<8:16:12,  4.91s/it] 

Total reward after episode 3931 is 1052.0


 39%|███▉      | 3932/10000 [3:21:39<7:52:07,  4.67s/it]

Total reward after episode 3932 is 1700.0


 39%|███▉      | 3933/10000 [3:21:42<7:21:55,  4.37s/it]

Total reward after episode 3933 is 1716.0


 39%|███▉      | 3934/10000 [3:21:51<9:30:56,  5.65s/it]

Total reward after episode 3934 is 3043.0


 39%|███▉      | 3935/10000 [3:21:55<8:55:57,  5.30s/it]

Total reward after episode 3935 is 1925.0


 39%|███▉      | 3936/10000 [3:22:02<9:21:46,  5.56s/it]

Total reward after episode 3936 is 2359.0


 39%|███▉      | 3937/10000 [3:22:10<10:56:04,  6.49s/it]

Total reward after episode 3937 is 2338.0


 39%|███▉      | 3938/10000 [3:22:14<9:39:44,  5.74s/it] 

Total reward after episode 3938 is 1931.0


 39%|███▉      | 3939/10000 [3:22:18<8:52:32,  5.27s/it]

Total reward after episode 3939 is 1923.0


 39%|███▉      | 3940/10000 [3:22:27<10:40:03,  6.34s/it]

Total reward after episode 3940 is 3041.0


 39%|███▉      | 3941/10000 [3:22:31<9:19:09,  5.54s/it] 

Total reward after episode 3941 is 1694.0


 39%|███▉      | 3942/10000 [3:22:35<8:24:56,  5.00s/it]

Total reward after episode 3942 is 1703.0


 39%|███▉      | 3943/10000 [3:22:39<7:59:55,  4.75s/it]

Total reward after episode 3943 is 1928.0


 39%|███▉      | 3944/10000 [3:22:43<7:52:20,  4.68s/it]

Total reward after episode 3944 is 1925.0


 39%|███▉      | 3945/10000 [3:22:46<6:51:08,  4.07s/it]

Total reward after episode 3945 is 1153.0


 39%|███▉      | 3946/10000 [3:22:50<6:43:03,  3.99s/it]

Total reward after episode 3946 is 1705.0


 39%|███▉      | 3947/10000 [3:22:52<5:54:09,  3.51s/it]

Total reward after episode 3947 is 1076.0


 39%|███▉      | 3948/10000 [3:22:59<7:34:47,  4.51s/it]

Total reward after episode 3948 is 3059.0


 39%|███▉      | 3949/10000 [3:23:07<9:19:48,  5.55s/it]

Total reward after episode 3949 is 3049.0


 40%|███▉      | 3950/10000 [3:23:11<8:21:01,  4.97s/it]

Total reward after episode 3950 is 1727.0


 40%|███▉      | 3951/10000 [3:23:15<7:54:22,  4.71s/it]

Total reward after episode 3951 is 1931.0


 40%|███▉      | 3952/10000 [3:23:19<7:42:13,  4.59s/it]

Total reward after episode 3952 is 1863.0


 40%|███▉      | 3953/10000 [3:23:25<8:10:46,  4.87s/it]

Total reward after episode 3953 is 2364.0


 40%|███▉      | 3954/10000 [3:23:28<7:19:01,  4.36s/it]

Total reward after episode 3954 is 1347.0


 40%|███▉      | 3955/10000 [3:23:29<5:48:12,  3.46s/it]

Total reward after episode 3955 is 610.0


 40%|███▉      | 3956/10000 [3:23:33<5:59:17,  3.57s/it]

Total reward after episode 3956 is 1701.0


 40%|███▉      | 3957/10000 [3:23:36<5:45:22,  3.43s/it]

Total reward after episode 3957 is 1051.0


 40%|███▉      | 3958/10000 [3:23:42<7:05:14,  4.22s/it]

Total reward after episode 3958 is 1320.0


 40%|███▉      | 3959/10000 [3:23:48<8:07:46,  4.84s/it]

Total reward after episode 3959 is 2357.0


 40%|███▉      | 3960/10000 [3:23:53<8:16:05,  4.93s/it]

Total reward after episode 3960 is 1918.0


 40%|███▉      | 3961/10000 [3:23:58<7:50:01,  4.67s/it]

Total reward after episode 3961 is 1931.0


 40%|███▉      | 3962/10000 [3:24:04<8:34:50,  5.12s/it]

Total reward after episode 3962 is 2359.0


 40%|███▉      | 3963/10000 [3:24:07<7:50:55,  4.68s/it]

Total reward after episode 3963 is 1701.0


 40%|███▉      | 3964/10000 [3:24:11<7:29:12,  4.47s/it]

Total reward after episode 3964 is 1868.0


 40%|███▉      | 3965/10000 [3:24:14<6:35:10,  3.93s/it]

Total reward after episode 3965 is 819.0


 40%|███▉      | 3966/10000 [3:24:17<6:00:23,  3.58s/it]

Total reward after episode 3966 is 819.0


 40%|███▉      | 3967/10000 [3:24:24<7:54:41,  4.72s/it]

Total reward after episode 3967 is 3048.0


 40%|███▉      | 3968/10000 [3:24:27<6:54:33,  4.12s/it]

Total reward after episode 3968 is 819.0


 40%|███▉      | 3969/10000 [3:24:38<10:38:30,  6.35s/it]

Total reward after episode 3969 is 3015.0


 40%|███▉      | 3970/10000 [3:25:01<18:54:00, 11.28s/it]

Total reward after episode 3970 is 1747.0


 40%|███▉      | 3971/10000 [3:25:04<14:40:23,  8.76s/it]

Total reward after episode 3971 is 1341.0


 40%|███▉      | 3972/10000 [3:25:12<14:09:21,  8.45s/it]

Total reward after episode 3972 is 1895.0


 40%|███▉      | 3973/10000 [3:25:20<13:59:42,  8.36s/it]

Total reward after episode 3973 is 2340.0


 40%|███▉      | 3974/10000 [3:25:23<11:14:46,  6.72s/it]

Total reward after episode 3974 is 1350.0


 40%|███▉      | 3975/10000 [3:25:26<9:39:32,  5.77s/it] 

Total reward after episode 3975 is 1731.0


 40%|███▉      | 3976/10000 [3:25:29<8:08:06,  4.86s/it]

Total reward after episode 3976 is 1050.0


 40%|███▉      | 3977/10000 [3:25:36<9:01:54,  5.40s/it]

Total reward after episode 3977 is 3055.0


 40%|███▉      | 3978/10000 [3:25:41<8:54:35,  5.33s/it]

Total reward after episode 3978 is 1682.0


 40%|███▉      | 3979/10000 [3:25:45<8:20:53,  4.99s/it]

Total reward after episode 3979 is 1870.0


 40%|███▉      | 3980/10000 [3:25:50<8:06:46,  4.85s/it]

Total reward after episode 3980 is 1699.0


 40%|███▉      | 3981/10000 [3:25:54<7:50:47,  4.69s/it]

Total reward after episode 3981 is 1860.0


 40%|███▉      | 3982/10000 [3:26:04<10:21:50,  6.20s/it]

Total reward after episode 3982 is 3032.0


 40%|███▉      | 3983/10000 [3:26:11<10:52:08,  6.50s/it]

Total reward after episode 3983 is 3056.0


 40%|███▉      | 3984/10000 [3:26:13<8:49:32,  5.28s/it] 

Total reward after episode 3984 is 1073.0


 40%|███▉      | 3985/10000 [3:26:24<11:14:26,  6.73s/it]

Total reward after episode 3985 is 3028.0


 40%|███▉      | 3986/10000 [3:26:30<11:10:32,  6.69s/it]

Total reward after episode 3986 is 3061.0


 40%|███▉      | 3987/10000 [3:26:33<9:08:47,  5.48s/it] 

Total reward after episode 3987 is 1353.0


 40%|███▉      | 3988/10000 [3:26:34<7:03:21,  4.23s/it]

Total reward after episode 3988 is 612.0


 40%|███▉      | 3989/10000 [3:26:38<6:49:16,  4.09s/it]

Total reward after episode 3989 is 1580.0


 40%|███▉      | 3990/10000 [3:26:41<6:24:36,  3.84s/it]

Total reward after episode 3990 is 1327.0


 40%|███▉      | 3991/10000 [3:26:43<5:39:42,  3.39s/it]

Total reward after episode 3991 is 1048.0


 40%|███▉      | 3992/10000 [3:26:48<6:25:59,  3.85s/it]

Total reward after episode 3992 is 2375.0


 40%|███▉      | 3993/10000 [3:26:50<5:23:14,  3.23s/it]

Total reward after episode 3993 is 736.0


 40%|███▉      | 3994/10000 [3:26:53<5:12:36,  3.12s/it]

Total reward after episode 3994 is 1341.0


 40%|███▉      | 3995/10000 [3:26:59<6:47:29,  4.07s/it]

Total reward after episode 3995 is 2651.0


 40%|███▉      | 3996/10000 [3:27:11<10:42:56,  6.43s/it]

Total reward after episode 3996 is 2592.0


 40%|███▉      | 3997/10000 [3:27:14<8:41:56,  5.22s/it] 

Total reward after episode 3997 is 1053.0


 40%|███▉      | 3998/10000 [3:27:21<9:52:37,  5.92s/it]

Total reward after episode 3998 is 2632.0


 40%|███▉      | 3999/10000 [3:27:24<8:11:36,  4.92s/it]

Total reward after episode 3999 is 1048.0


 40%|████      | 4000/10000 [3:27:28<7:51:33,  4.72s/it]

Total reward after episode 4000 is 1844.0


 40%|████      | 4001/10000 [3:27:29<5:49:06,  3.49s/it]

Total reward after episode 4001 is 251.0


 40%|████      | 4002/10000 [3:27:29<4:23:54,  2.64s/it]

Total reward after episode 4002 is 249.0


 40%|████      | 4003/10000 [3:27:30<3:23:18,  2.03s/it]

Total reward after episode 4003 is 252.0


 40%|████      | 4004/10000 [3:27:34<4:13:11,  2.53s/it]

Total reward after episode 4004 is 1696.0


 40%|████      | 4005/10000 [3:27:39<5:33:04,  3.33s/it]

Total reward after episode 4005 is 2364.0


 40%|████      | 4006/10000 [3:27:44<6:41:38,  4.02s/it]

Total reward after episode 4006 is 2363.0


 40%|████      | 4007/10000 [3:27:46<5:19:18,  3.20s/it]

Total reward after episode 4007 is 610.0


 40%|████      | 4008/10000 [3:27:50<5:56:49,  3.57s/it]

Total reward after episode 4008 is 1925.0


 40%|████      | 4009/10000 [3:27:53<5:41:55,  3.42s/it]

Total reward after episode 4009 is 1429.0


 40%|████      | 4010/10000 [3:27:56<5:31:08,  3.32s/it]

Total reward after episode 4010 is 1429.0


 40%|████      | 4011/10000 [3:28:00<5:40:42,  3.41s/it]

Total reward after episode 4011 is 1692.0


 40%|████      | 4012/10000 [3:28:05<6:32:18,  3.93s/it]

Total reward after episode 4012 is 2367.0


 40%|████      | 4013/10000 [3:28:11<7:19:22,  4.40s/it]

Total reward after episode 4013 is 2364.0


 40%|████      | 4014/10000 [3:28:15<7:19:56,  4.41s/it]

Total reward after episode 4014 is 1923.0


 40%|████      | 4015/10000 [3:28:17<6:20:02,  3.81s/it]

Total reward after episode 4015 is 1047.0


 40%|████      | 4016/10000 [3:28:23<7:13:22,  4.35s/it]

Total reward after episode 4016 is 2364.0


 40%|████      | 4017/10000 [3:28:27<6:50:43,  4.12s/it]

Total reward after episode 4017 is 1727.0


 40%|████      | 4018/10000 [3:28:31<6:54:17,  4.16s/it]

Total reward after episode 4018 is 1928.0


 40%|████      | 4019/10000 [3:28:34<6:24:22,  3.86s/it]

Total reward after episode 4019 is 1429.0


 40%|████      | 4020/10000 [3:28:35<4:48:12,  2.89s/it]

Total reward after episode 4020 is 250.0


 40%|████      | 4021/10000 [3:28:37<4:36:58,  2.78s/it]

Total reward after episode 4021 is 1058.0


 40%|████      | 4022/10000 [3:28:50<9:25:21,  5.67s/it]

Total reward after episode 4022 is 3007.0


 40%|████      | 4023/10000 [3:28:57<10:06:11,  6.09s/it]

Total reward after episode 4023 is 2629.0


 40%|████      | 4024/10000 [3:29:04<10:45:22,  6.48s/it]

Total reward after episode 4024 is 3054.0


 40%|████      | 4025/10000 [3:29:07<8:57:31,  5.40s/it] 

Total reward after episode 4025 is 1341.0


 40%|████      | 4026/10000 [3:29:08<6:35:56,  3.98s/it]

Total reward after episode 4026 is 251.0


 40%|████      | 4027/10000 [3:29:12<6:42:13,  4.04s/it]

Total reward after episode 4027 is 1927.0


 40%|████      | 4028/10000 [3:29:20<8:33:07,  5.16s/it]

Total reward after episode 4028 is 3053.0


 40%|████      | 4029/10000 [3:29:23<7:38:02,  4.60s/it]

Total reward after episode 4029 is 1349.0


 40%|████      | 4030/10000 [3:29:32<9:59:08,  6.02s/it]

Total reward after episode 4030 is 2331.0


 40%|████      | 4031/10000 [3:29:36<8:46:31,  5.29s/it]

Total reward after episode 4031 is 1577.0


 40%|████      | 4032/10000 [3:29:37<6:49:36,  4.12s/it]

Total reward after episode 4032 is 613.0


 40%|████      | 4033/10000 [3:29:45<8:50:05,  5.33s/it]

Total reward after episode 4033 is 2623.0


 40%|████      | 4034/10000 [3:29:54<10:31:48,  6.35s/it]

Total reward after episode 4034 is 3042.0


 40%|████      | 4035/10000 [3:29:58<9:26:42,  5.70s/it] 

Total reward after episode 4035 is 1927.0


 40%|████      | 4036/10000 [3:30:19<16:54:24, 10.21s/it]

Total reward after episode 4036 is 2511.0


 40%|████      | 4037/10000 [3:30:29<16:54:10, 10.20s/it]

Total reward after episode 4037 is 2610.0


 40%|████      | 4038/10000 [3:30:33<13:39:16,  8.24s/it]

Total reward after episode 4038 is 1705.0


 40%|████      | 4039/10000 [3:30:44<15:22:03,  9.28s/it]

Total reward after episode 4039 is 3014.0


 40%|████      | 4040/10000 [3:30:50<13:21:02,  8.06s/it]

Total reward after episode 4040 is 2370.0


 40%|████      | 4041/10000 [3:30:58<13:13:53,  7.99s/it]

Total reward after episode 4041 is 3050.0


 40%|████      | 4042/10000 [3:31:00<10:40:15,  6.45s/it]

Total reward after episode 4042 is 1352.0


 40%|████      | 4043/10000 [3:31:05<9:30:52,  5.75s/it] 

Total reward after episode 4043 is 1930.0


 40%|████      | 4044/10000 [3:31:08<8:25:10,  5.09s/it]

Total reward after episode 4044 is 1709.0


 40%|████      | 4045/10000 [3:31:12<7:55:01,  4.79s/it]

Total reward after episode 4045 is 1929.0


 40%|████      | 4046/10000 [3:31:21<10:07:18,  6.12s/it]

Total reward after episode 4046 is 3037.0


 40%|████      | 4047/10000 [3:31:33<12:43:36,  7.70s/it]

Total reward after episode 4047 is 3017.0


 40%|████      | 4048/10000 [3:31:37<11:01:08,  6.66s/it]

Total reward after episode 4048 is 1863.0


 40%|████      | 4049/10000 [3:31:39<8:55:21,  5.40s/it] 

Total reward after episode 4049 is 1052.0


 40%|████      | 4050/10000 [3:31:45<9:05:54,  5.50s/it]

Total reward after episode 4050 is 2364.0


 41%|████      | 4051/10000 [3:31:52<9:49:38,  5.95s/it]

Total reward after episode 4051 is 2353.0


 41%|████      | 4052/10000 [3:32:05<13:17:43,  8.05s/it]

Total reward after episode 4052 is 3002.0


 41%|████      | 4053/10000 [3:32:09<11:18:45,  6.85s/it]

Total reward after episode 4053 is 1929.0


 41%|████      | 4054/10000 [3:32:13<9:58:41,  6.04s/it] 

Total reward after episode 4054 is 1928.0


 41%|████      | 4055/10000 [3:32:17<8:59:46,  5.45s/it]

Total reward after episode 4055 is 1866.0


 41%|████      | 4056/10000 [3:32:26<10:21:24,  6.27s/it]

Total reward after episode 4056 is 3047.0


 41%|████      | 4057/10000 [3:32:35<11:45:30,  7.12s/it]

Total reward after episode 4057 is 3038.0


 41%|████      | 4058/10000 [3:32:38<10:03:31,  6.09s/it]

Total reward after episode 4058 is 1696.0


 41%|████      | 4059/10000 [3:32:42<8:43:58,  5.29s/it] 

Total reward after episode 4059 is 1583.0


 41%|████      | 4060/10000 [3:32:52<10:54:43,  6.61s/it]

Total reward after episode 4060 is 3033.0


 41%|████      | 4061/10000 [3:32:57<10:28:38,  6.35s/it]

Total reward after episode 4061 is 2363.0


 41%|████      | 4062/10000 [3:33:01<9:06:57,  5.53s/it] 

Total reward after episode 4062 is 1588.0


 41%|████      | 4063/10000 [3:33:05<8:17:38,  5.03s/it]

Total reward after episode 4063 is 1863.0


 41%|████      | 4064/10000 [3:33:07<6:58:46,  4.23s/it]

Total reward after episode 4064 is 1052.0


 41%|████      | 4065/10000 [3:33:10<6:33:25,  3.98s/it]

Total reward after episode 4065 is 1585.0


 41%|████      | 4066/10000 [3:33:20<9:03:02,  5.49s/it]

Total reward after episode 4066 is 3040.0


 41%|████      | 4067/10000 [3:33:26<9:32:06,  5.79s/it]

Total reward after episode 4067 is 3062.0


 41%|████      | 4068/10000 [3:33:31<9:07:18,  5.54s/it]

Total reward after episode 4068 is 2371.0


 41%|████      | 4069/10000 [3:33:34<8:07:08,  4.93s/it]

Total reward after episode 4069 is 1699.0


 41%|████      | 4070/10000 [3:33:38<7:23:18,  4.49s/it]

Total reward after episode 4070 is 1587.0


 41%|████      | 4071/10000 [3:33:43<7:48:38,  4.74s/it]

Total reward after episode 4071 is 2365.0


 41%|████      | 4072/10000 [3:33:54<10:49:47,  6.58s/it]

Total reward after episode 4072 is 3016.0


 41%|████      | 4073/10000 [3:33:59<9:48:00,  5.95s/it] 

Total reward after episode 4073 is 1864.0


 41%|████      | 4074/10000 [3:34:09<11:58:06,  7.27s/it]

Total reward after episode 4074 is 3027.0


 41%|████      | 4075/10000 [3:34:12<10:05:55,  6.14s/it]

Total reward after episode 4075 is 1585.0


 41%|████      | 4076/10000 [3:34:16<9:02:41,  5.50s/it] 

Total reward after episode 4076 is 1925.0


 41%|████      | 4077/10000 [3:34:21<8:46:53,  5.34s/it]

Total reward after episode 4077 is 2370.0


 41%|████      | 4078/10000 [3:34:23<6:48:27,  4.14s/it]

Total reward after episode 4078 is 611.0


 41%|████      | 4079/10000 [3:34:28<7:21:25,  4.47s/it]

Total reward after episode 4079 is 2369.0


 41%|████      | 4080/10000 [3:34:32<7:07:29,  4.33s/it]

Total reward after episode 4080 is 1865.0


 41%|████      | 4081/10000 [3:34:35<6:15:33,  3.81s/it]

Total reward after episode 4081 is 1056.0


 41%|████      | 4082/10000 [3:34:35<4:42:19,  2.86s/it]

Total reward after episode 4082 is 250.0


 41%|████      | 4083/10000 [3:34:36<3:37:39,  2.21s/it]

Total reward after episode 4083 is 248.0


 41%|████      | 4084/10000 [3:34:42<5:24:26,  3.29s/it]

Total reward after episode 4084 is 2363.0


 41%|████      | 4085/10000 [3:34:50<8:00:21,  4.87s/it]

Total reward after episode 4085 is 3044.0


 41%|████      | 4086/10000 [3:34:54<7:33:48,  4.60s/it]

Total reward after episode 4086 is 1864.0


 41%|████      | 4087/10000 [3:34:58<7:16:11,  4.43s/it]

Total reward after episode 4087 is 1925.0


 41%|████      | 4088/10000 [3:35:03<7:32:30,  4.59s/it]

Total reward after episode 4088 is 2373.0


 41%|████      | 4089/10000 [3:35:06<6:28:41,  3.95s/it]

Total reward after episode 4089 is 1049.0


 41%|████      | 4090/10000 [3:35:10<6:30:53,  3.97s/it]

Total reward after episode 4090 is 1924.0


 41%|████      | 4091/10000 [3:35:12<5:55:41,  3.61s/it]

Total reward after episode 4091 is 1047.0


 41%|████      | 4092/10000 [3:35:16<6:06:06,  3.72s/it]

Total reward after episode 4092 is 1928.0


 41%|████      | 4093/10000 [3:35:17<4:35:05,  2.79s/it]

Total reward after episode 4093 is 250.0


 41%|████      | 4094/10000 [3:35:55<21:44:37, 13.25s/it]

Total reward after episode 4094 is 2775.0


 41%|████      | 4095/10000 [3:35:59<17:18:58, 10.56s/it]

Total reward after episode 4095 is 1929.0


 41%|████      | 4096/10000 [3:36:01<13:19:28,  8.12s/it]

Total reward after episode 4096 is 1050.0


 41%|████      | 4097/10000 [3:36:09<13:13:26,  8.06s/it]

Total reward after episode 4097 is 3049.0


 41%|████      | 4098/10000 [3:36:13<11:11:02,  6.82s/it]

Total reward after episode 4098 is 1860.0


 41%|████      | 4099/10000 [3:36:25<13:31:35,  8.25s/it]

Total reward after episode 4099 is 3009.0


 41%|████      | 4100/10000 [3:36:28<10:58:35,  6.70s/it]

Total reward after episode 4100 is 1324.0


 41%|████      | 4101/10000 [3:36:30<8:54:40,  5.44s/it] 

Total reward after episode 4101 is 1050.0


 41%|████      | 4102/10000 [3:36:32<6:52:14,  4.19s/it]

Total reward after episode 4102 is 607.0


 41%|████      | 4103/10000 [3:36:35<6:34:02,  4.01s/it]

Total reward after episode 4103 is 1343.0


 41%|████      | 4104/10000 [3:36:39<6:35:31,  4.02s/it]

Total reward after episode 4104 is 1929.0


 41%|████      | 4105/10000 [3:36:41<5:15:01,  3.21s/it]

Total reward after episode 4105 is 610.0


 41%|████      | 4106/10000 [3:36:47<6:57:17,  4.25s/it]

Total reward after episode 4106 is 2353.0


 41%|████      | 4107/10000 [3:36:55<8:27:01,  5.16s/it]

Total reward after episode 4107 is 3055.0


 41%|████      | 4108/10000 [3:36:58<7:28:41,  4.57s/it]

Total reward after episode 4108 is 1585.0


 41%|████      | 4109/10000 [3:37:00<6:24:18,  3.91s/it]

Total reward after episode 4109 is 1052.0


 41%|████      | 4110/10000 [3:37:08<8:26:38,  5.16s/it]

Total reward after episode 4110 is 3048.0


 41%|████      | 4111/10000 [3:37:14<8:39:24,  5.29s/it]

Total reward after episode 4111 is 2363.0


 41%|████      | 4112/10000 [3:37:18<7:52:49,  4.82s/it]

Total reward after episode 4112 is 1702.0


 41%|████      | 4113/10000 [3:37:22<7:29:02,  4.58s/it]

Total reward after episode 4113 is 1923.0


 41%|████      | 4114/10000 [3:37:32<10:21:55,  6.34s/it]

Total reward after episode 4114 is 3025.0


 41%|████      | 4115/10000 [3:37:35<8:39:10,  5.29s/it] 

Total reward after episode 4115 is 1351.0


 41%|████      | 4116/10000 [3:37:44<10:29:38,  6.42s/it]

Total reward after episode 4116 is 3039.0


 41%|████      | 4117/10000 [3:37:48<9:14:51,  5.66s/it] 

Total reward after episode 4117 is 1694.0


 41%|████      | 4118/10000 [3:37:53<8:49:27,  5.40s/it]

Total reward after episode 4118 is 1916.0


 41%|████      | 4119/10000 [3:37:55<7:31:11,  4.60s/it]

Total reward after episode 4119 is 1045.0


 41%|████      | 4120/10000 [3:38:00<7:15:40,  4.45s/it]

Total reward after episode 4120 is 1923.0


 41%|████      | 4121/10000 [3:38:04<7:11:51,  4.41s/it]

Total reward after episode 4121 is 1932.0


 41%|████      | 4122/10000 [3:38:10<7:57:38,  4.88s/it]

Total reward after episode 4122 is 2360.0


 41%|████      | 4123/10000 [3:38:19<10:07:07,  6.20s/it]

Total reward after episode 4123 is 3036.0


 41%|████      | 4124/10000 [3:38:25<10:05:11,  6.18s/it]

Total reward after episode 4124 is 2358.0


 41%|████▏     | 4125/10000 [3:38:33<11:05:50,  6.80s/it]

Total reward after episode 4125 is 3046.0


 41%|████▏     | 4126/10000 [3:38:38<9:55:42,  6.08s/it] 

Total reward after episode 4126 is 1918.0


 41%|████▏     | 4127/10000 [3:38:39<7:30:52,  4.61s/it]

Total reward after episode 4127 is 236.0


 41%|████▏     | 4128/10000 [3:38:46<8:54:27,  5.46s/it]

Total reward after episode 4128 is 3054.0


 41%|████▏     | 4129/10000 [3:38:50<7:57:20,  4.88s/it]

Total reward after episode 4129 is 1727.0


 41%|████▏     | 4130/10000 [3:38:52<6:26:52,  3.95s/it]

Total reward after episode 4130 is 618.0


 41%|████▏     | 4131/10000 [3:38:58<7:23:06,  4.53s/it]

Total reward after episode 4131 is 2651.0


 41%|████▏     | 4132/10000 [3:39:02<7:08:20,  4.38s/it]

Total reward after episode 4132 is 1929.0


 41%|████▏     | 4133/10000 [3:39:06<6:57:33,  4.27s/it]

Total reward after episode 4133 is 1933.0


 41%|████▏     | 4134/10000 [3:39:11<7:21:13,  4.51s/it]

Total reward after episode 4134 is 2369.0


 41%|████▏     | 4135/10000 [3:39:18<8:26:03,  5.18s/it]

Total reward after episode 4135 is 3060.0


 41%|████▏     | 4136/10000 [3:39:22<7:53:19,  4.84s/it]

Total reward after episode 4136 is 1951.0


 41%|████▏     | 4137/10000 [3:39:30<9:28:26,  5.82s/it]

Total reward after episode 4137 is 3047.0


 41%|████▏     | 4138/10000 [3:39:34<8:44:43,  5.37s/it]

Total reward after episode 4138 is 1926.0


 41%|████▏     | 4139/10000 [3:39:38<8:01:15,  4.93s/it]

Total reward after episode 4139 is 1699.0


 41%|████▏     | 4140/10000 [3:39:45<9:14:56,  5.68s/it]

Total reward after episode 4140 is 3054.0


 41%|████▏     | 4141/10000 [3:39:47<7:11:15,  4.42s/it]

Total reward after episode 4141 is 608.0


 41%|████▏     | 4142/10000 [3:39:54<8:17:57,  5.10s/it]

Total reward after episode 4142 is 2354.0


 41%|████▏     | 4143/10000 [3:39:58<7:49:13,  4.81s/it]

Total reward after episode 4143 is 1863.0


 41%|████▏     | 4144/10000 [3:40:01<7:16:27,  4.47s/it]

Total reward after episode 4144 is 1703.0


 41%|████▏     | 4145/10000 [3:40:04<6:32:56,  4.03s/it]

Total reward after episode 4145 is 1336.0


 41%|████▏     | 4146/10000 [3:40:10<7:09:39,  4.40s/it]

Total reward after episode 4146 is 1312.0


 41%|████▏     | 4147/10000 [3:40:14<6:58:47,  4.29s/it]

Total reward after episode 4147 is 1698.0


 41%|████▏     | 4148/10000 [3:40:16<6:01:50,  3.71s/it]

Total reward after episode 4148 is 1050.0


 41%|████▏     | 4149/10000 [3:40:20<6:22:55,  3.93s/it]

Total reward after episode 4149 is 1925.0


 42%|████▏     | 4150/10000 [3:40:24<6:24:01,  3.94s/it]

Total reward after episode 4150 is 1704.0


 42%|████▏     | 4151/10000 [3:40:27<5:38:30,  3.47s/it]

Total reward after episode 4151 is 1075.0


 42%|████▏     | 4152/10000 [3:40:35<7:54:07,  4.86s/it]

Total reward after episode 4152 is 3042.0


 42%|████▏     | 4153/10000 [3:40:38<7:12:16,  4.44s/it]

Total reward after episode 4153 is 1583.0


 42%|████▏     | 4154/10000 [3:40:42<7:03:14,  4.34s/it]

Total reward after episode 4154 is 1924.0


 42%|████▏     | 4155/10000 [3:40:45<6:23:43,  3.94s/it]

Total reward after episode 4155 is 1331.0


 42%|████▏     | 4156/10000 [3:40:53<8:09:30,  5.03s/it]

Total reward after episode 4156 is 3053.0


 42%|████▏     | 4157/10000 [3:40:55<6:30:43,  4.01s/it]

Total reward after episode 4157 is 621.0


 42%|████▏     | 4158/10000 [3:41:06<10:12:03,  6.29s/it]

Total reward after episode 4158 is 3015.0


 42%|████▏     | 4159/10000 [3:41:11<9:18:53,  5.74s/it] 

Total reward after episode 4159 is 1857.0


 42%|████▏     | 4160/10000 [3:41:15<8:41:01,  5.35s/it]

Total reward after episode 4160 is 1686.0


 42%|████▏     | 4161/10000 [3:41:22<9:18:34,  5.74s/it]

Total reward after episode 4161 is 3061.0


 42%|████▏     | 4162/10000 [3:41:26<8:36:05,  5.30s/it]

Total reward after episode 4162 is 1933.0


 42%|████▏     | 4163/10000 [3:41:31<8:38:40,  5.33s/it]

Total reward after episode 4163 is 2368.0


 42%|████▏     | 4164/10000 [3:41:35<7:59:23,  4.93s/it]

Total reward after episode 4164 is 1930.0


 42%|████▏     | 4165/10000 [3:41:39<7:10:42,  4.43s/it]

Total reward after episode 4165 is 1348.0


 42%|████▏     | 4166/10000 [3:41:43<7:19:36,  4.52s/it]

Total reward after episode 4166 is 1927.0


 42%|████▏     | 4167/10000 [3:41:47<6:42:36,  4.14s/it]

Total reward after episode 4167 is 1415.0


 42%|████▏     | 4168/10000 [3:41:49<6:01:55,  3.72s/it]

Total reward after episode 4168 is 819.0


 42%|████▏     | 4169/10000 [3:41:54<6:15:01,  3.86s/it]

Total reward after episode 4169 is 1926.0


 42%|████▏     | 4170/10000 [3:41:56<5:33:59,  3.44s/it]

Total reward after episode 4170 is 1073.0


 42%|████▏     | 4171/10000 [3:42:00<5:38:01,  3.48s/it]

Total reward after episode 4171 is 1691.0


 42%|████▏     | 4172/10000 [3:42:05<6:18:24,  3.90s/it]

Total reward after episode 4172 is 1682.0


 42%|████▏     | 4173/10000 [3:42:08<5:56:00,  3.67s/it]

Total reward after episode 4173 is 1586.0


 42%|████▏     | 4174/10000 [3:42:10<5:12:13,  3.22s/it]

Total reward after episode 4174 is 1051.0


 42%|████▏     | 4175/10000 [3:42:13<4:57:12,  3.06s/it]

Total reward after episode 4175 is 1068.0


 42%|████▏     | 4176/10000 [3:42:16<4:59:43,  3.09s/it]

Total reward after episode 4176 is 1584.0


 42%|████▏     | 4177/10000 [3:42:17<4:10:44,  2.58s/it]

Total reward after episode 4177 is 621.0


 42%|████▏     | 4178/10000 [3:42:24<6:30:43,  4.03s/it]

Total reward after episode 4178 is 3054.0


 42%|████▏     | 4179/10000 [3:42:29<6:32:41,  4.05s/it]

Total reward after episode 4179 is 1869.0


 42%|████▏     | 4180/10000 [3:42:31<5:53:37,  3.65s/it]

Total reward after episode 4180 is 1355.0


 42%|████▏     | 4181/10000 [3:42:35<6:05:46,  3.77s/it]

Total reward after episode 4181 is 1711.0


 42%|████▏     | 4182/10000 [3:42:37<5:01:50,  3.11s/it]

Total reward after episode 4182 is 736.0


 42%|████▏     | 4183/10000 [3:42:45<7:32:42,  4.67s/it]

Total reward after episode 4183 is 2339.0


 42%|████▏     | 4184/10000 [3:42:49<7:15:24,  4.49s/it]

Total reward after episode 4184 is 1926.0


 42%|████▏     | 4185/10000 [3:42:51<5:49:31,  3.61s/it]

Total reward after episode 4185 is 738.0


 42%|████▏     | 4186/10000 [3:43:01<8:59:46,  5.57s/it]

Total reward after episode 4186 is 3028.0


 42%|████▏     | 4187/10000 [3:43:08<9:42:43,  6.01s/it]

Total reward after episode 4187 is 3057.0


 42%|████▏     | 4188/10000 [3:43:12<8:28:46,  5.25s/it]

Total reward after episode 4188 is 1434.0


 42%|████▏     | 4189/10000 [3:43:14<7:12:35,  4.47s/it]

Total reward after episode 4189 is 1052.0


 42%|████▏     | 4190/10000 [3:43:16<5:56:25,  3.68s/it]

Total reward after episode 4190 is 609.0


 42%|████▏     | 4191/10000 [3:43:18<4:55:07,  3.05s/it]

Total reward after episode 4191 is 737.0


 42%|████▏     | 4192/10000 [3:43:21<5:19:45,  3.30s/it]

Total reward after episode 4192 is 1867.0


 42%|████▏     | 4193/10000 [3:43:26<5:41:33,  3.53s/it]

Total reward after episode 4193 is 1928.0


 42%|████▏     | 4194/10000 [3:43:34<8:08:33,  5.05s/it]

Total reward after episode 4194 is 2635.0


 42%|████▏     | 4195/10000 [3:43:35<6:00:27,  3.73s/it]

Total reward after episode 4195 is 252.0


 42%|████▏     | 4196/10000 [3:43:42<7:29:59,  4.65s/it]

Total reward after episode 4196 is 3054.0


 42%|████▏     | 4197/10000 [3:43:45<6:57:21,  4.32s/it]

Total reward after episode 4197 is 1714.0


 42%|████▏     | 4198/10000 [3:43:50<7:21:58,  4.57s/it]

Total reward after episode 4198 is 2369.0


 42%|████▏     | 4199/10000 [3:43:53<6:16:55,  3.90s/it]

Total reward after episode 4199 is 1073.0


 42%|████▏     | 4200/10000 [3:43:54<5:02:05,  3.13s/it]

Total reward after episode 4200 is 614.0


 42%|████▏     | 4201/10000 [3:43:55<3:49:50,  2.38s/it]

Total reward after episode 4201 is 250.0


 42%|████▏     | 4202/10000 [3:43:59<4:44:17,  2.94s/it]

Total reward after episode 4202 is 1844.0


 42%|████▏     | 4203/10000 [3:44:03<5:22:03,  3.33s/it]

Total reward after episode 4203 is 1922.0


 42%|████▏     | 4204/10000 [3:44:05<4:29:47,  2.79s/it]

Total reward after episode 4204 is 738.0


 42%|████▏     | 4205/10000 [3:44:07<4:24:27,  2.74s/it]

Total reward after episode 4205 is 1049.0


 42%|████▏     | 4206/10000 [3:44:10<4:14:51,  2.64s/it]

Total reward after episode 4206 is 1153.0


 42%|████▏     | 4207/10000 [3:44:13<4:50:17,  3.01s/it]

Total reward after episode 4207 is 1928.0


 42%|████▏     | 4208/10000 [3:44:17<4:58:53,  3.10s/it]

Total reward after episode 4208 is 1343.0


 42%|████▏     | 4209/10000 [3:44:19<4:32:14,  2.82s/it]

Total reward after episode 4209 is 1051.0


 42%|████▏     | 4210/10000 [3:44:22<4:49:06,  3.00s/it]

Total reward after episode 4210 is 1330.0


 42%|████▏     | 4211/10000 [3:44:24<4:07:42,  2.57s/it]

Total reward after episode 4211 is 738.0


 42%|████▏     | 4212/10000 [3:44:27<4:11:31,  2.61s/it]

Total reward after episode 4212 is 1067.0


 42%|████▏     | 4213/10000 [3:44:54<16:21:32, 10.18s/it]

Total reward after episode 4213 is 383.0


 42%|████▏     | 4214/10000 [3:45:01<14:49:54,  9.23s/it]

Total reward after episode 4214 is 3058.0


 42%|████▏     | 4215/10000 [3:45:02<10:41:00,  6.65s/it]

Total reward after episode 4215 is 252.0


 42%|████▏     | 4216/10000 [3:45:06<9:28:36,  5.90s/it] 

Total reward after episode 4216 is 1932.0


 42%|████▏     | 4217/10000 [3:45:10<8:35:15,  5.35s/it]

Total reward after episode 4217 is 1934.0


 42%|████▏     | 4218/10000 [3:45:12<6:39:11,  4.14s/it]

Total reward after episode 4218 is 608.0


 42%|████▏     | 4219/10000 [3:45:15<6:10:20,  3.84s/it]

Total reward after episode 4219 is 1332.0


 42%|████▏     | 4220/10000 [3:45:20<6:49:25,  4.25s/it]

Total reward after episode 4220 is 2368.0


 42%|████▏     | 4221/10000 [3:45:23<6:14:51,  3.89s/it]

Total reward after episode 4221 is 1333.0


 42%|████▏     | 4222/10000 [3:45:25<5:30:35,  3.43s/it]

Total reward after episode 4222 is 1070.0


 42%|████▏     | 4223/10000 [3:45:27<4:33:54,  2.84s/it]

Total reward after episode 4223 is 612.0


 42%|████▏     | 4224/10000 [3:45:31<5:18:16,  3.31s/it]

Total reward after episode 4224 is 1927.0


 42%|████▏     | 4225/10000 [3:45:33<4:34:38,  2.85s/it]

Total reward after episode 4225 is 621.0


 42%|████▏     | 4226/10000 [3:45:34<3:51:13,  2.40s/it]

Total reward after episode 4226 is 611.0


 42%|████▏     | 4227/10000 [3:45:36<3:19:08,  2.07s/it]

Total reward after episode 4227 is 611.0


 42%|████▏     | 4228/10000 [3:45:43<5:36:00,  3.49s/it]

Total reward after episode 4228 is 3060.0


 42%|████▏     | 4229/10000 [3:45:44<4:33:02,  2.84s/it]

Total reward after episode 4229 is 610.0


 42%|████▏     | 4230/10000 [3:45:45<3:51:32,  2.41s/it]

Total reward after episode 4230 is 609.0


 42%|████▏     | 4231/10000 [3:45:49<4:44:24,  2.96s/it]

Total reward after episode 4231 is 1925.0


 42%|████▏     | 4232/10000 [3:45:52<4:42:17,  2.94s/it]

Total reward after episode 4232 is 1345.0


 42%|████▏     | 4233/10000 [3:45:56<4:58:03,  3.10s/it]

Total reward after episode 4233 is 1693.0


 42%|████▏     | 4234/10000 [3:45:59<5:00:38,  3.13s/it]

Total reward after episode 4234 is 1342.0


 42%|████▏     | 4235/10000 [3:46:01<4:14:39,  2.65s/it]

Total reward after episode 4235 is 737.0


 42%|████▏     | 4236/10000 [3:46:04<4:47:36,  2.99s/it]

Total reward after episode 4236 is 1694.0


 42%|████▏     | 4237/10000 [3:46:11<6:45:31,  4.22s/it]

Total reward after episode 4237 is 3058.0


 42%|████▏     | 4238/10000 [3:46:14<6:07:08,  3.82s/it]

Total reward after episode 4238 is 1350.0


 42%|████▏     | 4239/10000 [3:46:17<5:34:26,  3.48s/it]

Total reward after episode 4239 is 1353.0


 42%|████▏     | 4240/10000 [3:46:18<4:33:45,  2.85s/it]

Total reward after episode 4240 is 609.0


 42%|████▏     | 4241/10000 [3:46:23<5:09:21,  3.22s/it]

Total reward after episode 4241 is 1931.0


 42%|████▏     | 4242/10000 [3:46:26<5:22:34,  3.36s/it]

Total reward after episode 4242 is 1694.0


 42%|████▏     | 4243/10000 [3:46:41<11:01:43,  6.90s/it]

Total reward after episode 4243 is 2984.0


 42%|████▏     | 4244/10000 [3:46:43<8:21:00,  5.22s/it] 

Total reward after episode 4244 is 611.0


 42%|████▏     | 4245/10000 [3:46:49<9:04:56,  5.68s/it]

Total reward after episode 4245 is 3054.0


 42%|████▏     | 4246/10000 [3:46:52<7:49:43,  4.90s/it]

Total reward after episode 4246 is 1349.0


 42%|████▏     | 4247/10000 [3:46:56<7:12:21,  4.51s/it]

Total reward after episode 4247 is 1695.0


 42%|████▏     | 4248/10000 [3:47:05<9:14:20,  5.78s/it]

Total reward after episode 4248 is 3042.0


 42%|████▏     | 4249/10000 [3:47:09<8:14:49,  5.16s/it]

Total reward after episode 4249 is 1709.0


 42%|████▎     | 4250/10000 [3:47:18<10:20:20,  6.47s/it]

Total reward after episode 4250 is 3029.0


 43%|████▎     | 4251/10000 [3:47:22<8:59:13,  5.63s/it] 

Total reward after episode 4251 is 1706.0


 43%|████▎     | 4252/10000 [3:47:29<9:33:37,  5.99s/it]

Total reward after episode 4252 is 2357.0


 43%|████▎     | 4253/10000 [3:47:34<9:24:45,  5.90s/it]

Total reward after episode 4253 is 2371.0


 43%|████▎     | 4254/10000 [3:47:36<7:14:14,  4.53s/it]

Total reward after episode 4254 is 611.0


 43%|████▎     | 4255/10000 [3:47:45<9:43:46,  6.10s/it]

Total reward after episode 4255 is 3033.0


 43%|████▎     | 4256/10000 [3:47:49<8:25:11,  5.28s/it]

Total reward after episode 4256 is 1576.0


 43%|████▎     | 4257/10000 [3:47:52<7:19:21,  4.59s/it]

Total reward after episode 4257 is 817.0


 43%|████▎     | 4258/10000 [3:48:01<9:23:41,  5.89s/it]

Total reward after episode 4258 is 989.0


 43%|████▎     | 4259/10000 [3:48:02<7:11:49,  4.51s/it]

Total reward after episode 4259 is 611.0


 43%|████▎     | 4260/10000 [3:48:10<9:04:13,  5.69s/it]

Total reward after episode 4260 is 767.0


 43%|████▎     | 4261/10000 [3:48:14<8:11:52,  5.14s/it]

Total reward after episode 4261 is 1695.0


 43%|████▎     | 4262/10000 [3:48:19<8:13:55,  5.16s/it]

Total reward after episode 4262 is 2372.0


 43%|████▎     | 4263/10000 [3:48:22<7:02:31,  4.42s/it]

Total reward after episode 4263 is 1351.0


 43%|████▎     | 4264/10000 [3:48:26<6:41:29,  4.20s/it]

Total reward after episode 4264 is 1710.0


 43%|████▎     | 4265/10000 [3:48:32<7:37:22,  4.79s/it]

Total reward after episode 4265 is 2362.0


 43%|████▎     | 4266/10000 [3:48:33<5:57:11,  3.74s/it]

Total reward after episode 4266 is 610.0


 43%|████▎     | 4267/10000 [3:48:38<6:37:37,  4.16s/it]

Total reward after episode 4267 is 2369.0


 43%|████▎     | 4268/10000 [3:48:42<6:34:28,  4.13s/it]

Total reward after episode 4268 is 1924.0


 43%|████▎     | 4269/10000 [3:48:46<6:11:02,  3.88s/it]

Total reward after episode 4269 is 814.0


 43%|████▎     | 4270/10000 [3:48:50<6:20:49,  3.99s/it]

Total reward after episode 4270 is 1924.0


 43%|████▎     | 4271/10000 [3:48:51<5:04:12,  3.19s/it]

Total reward after episode 4271 is 611.0


 43%|████▎     | 4272/10000 [3:49:00<7:32:07,  4.74s/it]

Total reward after episode 4272 is 3046.0


 43%|████▎     | 4273/10000 [3:49:03<6:43:20,  4.23s/it]

Total reward after episode 4273 is 1437.0


 43%|████▎     | 4274/10000 [3:49:07<6:37:05,  4.16s/it]

Total reward after episode 4274 is 1884.0


 43%|████▎     | 4275/10000 [3:49:08<5:14:45,  3.30s/it]

Total reward after episode 4275 is 610.0


 43%|████▎     | 4276/10000 [3:49:11<5:18:56,  3.34s/it]

Total reward after episode 4276 is 1578.0


 43%|████▎     | 4277/10000 [3:49:13<4:22:08,  2.75s/it]

Total reward after episode 4277 is 611.0


 43%|████▎     | 4278/10000 [3:49:14<3:45:50,  2.37s/it]

Total reward after episode 4278 is 616.0


 43%|████▎     | 4279/10000 [3:49:19<4:54:19,  3.09s/it]

Total reward after episode 4279 is 1688.0


 43%|████▎     | 4280/10000 [3:49:24<5:54:53,  3.72s/it]

Total reward after episode 4280 is 2371.0


 43%|████▎     | 4281/10000 [3:49:26<4:46:35,  3.01s/it]

Total reward after episode 4281 is 610.0


 43%|████▎     | 4282/10000 [3:49:37<8:55:35,  5.62s/it]

Total reward after episode 4282 is 3016.0


 43%|████▎     | 4283/10000 [3:49:40<7:39:07,  4.82s/it]

Total reward after episode 4283 is 1048.0


 43%|████▎     | 4284/10000 [3:49:42<5:58:57,  3.77s/it]

Total reward after episode 4284 is 611.0


 43%|████▎     | 4285/10000 [3:49:45<5:53:09,  3.71s/it]

Total reward after episode 4285 is 1576.0


 43%|████▎     | 4286/10000 [3:49:46<4:45:24,  3.00s/it]

Total reward after episode 4286 is 611.0


 43%|████▎     | 4287/10000 [3:49:57<8:27:22,  5.33s/it]

Total reward after episode 4287 is 3024.0


 43%|████▎     | 4288/10000 [3:49:59<6:33:30,  4.13s/it]

Total reward after episode 4288 is 611.0


 43%|████▎     | 4289/10000 [3:50:00<5:12:45,  3.29s/it]

Total reward after episode 4289 is 611.0


 43%|████▎     | 4290/10000 [3:50:03<4:59:54,  3.15s/it]

Total reward after episode 4290 is 1046.0


 43%|████▎     | 4291/10000 [3:50:05<4:43:05,  2.98s/it]

Total reward after episode 4291 is 1051.0


 43%|████▎     | 4292/10000 [3:50:09<5:11:11,  3.27s/it]

Total reward after episode 4292 is 1692.0


 43%|████▎     | 4293/10000 [3:50:12<4:44:24,  2.99s/it]

Total reward after episode 4293 is 1053.0


 43%|████▎     | 4294/10000 [3:50:12<3:37:41,  2.29s/it]

Total reward after episode 4294 is 252.0


 43%|████▎     | 4295/10000 [3:50:14<3:10:22,  2.00s/it]

Total reward after episode 4295 is 609.0


 43%|████▎     | 4296/10000 [3:50:21<5:50:25,  3.69s/it]

Total reward after episode 4296 is 3052.0


 43%|████▎     | 4297/10000 [3:50:25<6:02:27,  3.81s/it]

Total reward after episode 4297 is 1340.0


 43%|████▎     | 4298/10000 [3:50:33<7:47:57,  4.92s/it]

Total reward after episode 4298 is 3054.0


 43%|████▎     | 4299/10000 [3:50:36<6:46:22,  4.28s/it]

Total reward after episode 4299 is 1351.0


 43%|████▎     | 4300/10000 [3:50:39<6:22:49,  4.03s/it]

Total reward after episode 4300 is 1345.0


 43%|████▎     | 4301/10000 [3:50:43<6:23:17,  4.04s/it]

Total reward after episode 4301 is 1859.0


 43%|████▎     | 4302/10000 [3:50:45<5:15:15,  3.32s/it]

Total reward after episode 4302 is 629.0


 43%|████▎     | 4303/10000 [3:50:46<4:18:23,  2.72s/it]

Total reward after episode 4303 is 610.0


 43%|████▎     | 4304/10000 [3:50:55<7:14:30,  4.58s/it]

Total reward after episode 4304 is 3042.0


 43%|████▎     | 4305/10000 [3:50:56<5:42:21,  3.61s/it]

Total reward after episode 4305 is 611.0


 43%|████▎     | 4306/10000 [3:50:58<4:39:57,  2.95s/it]

Total reward after episode 4306 is 614.0


 43%|████▎     | 4307/10000 [3:51:06<7:12:26,  4.56s/it]

Total reward after episode 4307 is 2343.0


 43%|████▎     | 4308/10000 [3:51:07<5:22:10,  3.40s/it]

Total reward after episode 4308 is 250.0


 43%|████▎     | 4309/10000 [3:51:10<5:28:35,  3.46s/it]

Total reward after episode 4309 is 1330.0


 43%|████▎     | 4310/10000 [3:51:12<4:33:58,  2.89s/it]

Total reward after episode 4310 is 654.0


 43%|████▎     | 4311/10000 [3:51:15<4:27:57,  2.83s/it]

Total reward after episode 4311 is 1351.0


 43%|████▎     | 4312/10000 [3:51:15<3:25:49,  2.17s/it]

Total reward after episode 4312 is 251.0


 43%|████▎     | 4313/10000 [3:51:19<4:05:20,  2.59s/it]

Total reward after episode 4313 is 1715.0


 43%|████▎     | 4314/10000 [3:51:19<3:09:30,  2.00s/it]

Total reward after episode 4314 is 251.0


 43%|████▎     | 4315/10000 [3:51:23<3:55:01,  2.48s/it]

Total reward after episode 4315 is 1726.0


 43%|████▎     | 4316/10000 [3:51:30<6:14:42,  3.96s/it]

Total reward after episode 4316 is 3055.0


 43%|████▎     | 4317/10000 [3:51:31<4:40:47,  2.96s/it]

Total reward after episode 4317 is 250.0


 43%|████▎     | 4318/10000 [3:51:44<9:12:10,  5.83s/it]

Total reward after episode 4318 is 3009.0


 43%|████▎     | 4319/10000 [3:51:45<7:12:24,  4.57s/it]

Total reward after episode 4319 is 609.0


 43%|████▎     | 4320/10000 [3:51:48<6:33:27,  4.16s/it]

Total reward after episode 4320 is 738.0


 43%|████▎     | 4321/10000 [3:51:54<7:24:14,  4.69s/it]

Total reward after episode 4321 is 2360.0


 43%|████▎     | 4322/10000 [3:51:58<7:01:04,  4.45s/it]

Total reward after episode 4322 is 1865.0


 43%|████▎     | 4323/10000 [3:52:00<5:31:23,  3.50s/it]

Total reward after episode 4323 is 611.0


 43%|████▎     | 4324/10000 [3:52:06<7:02:27,  4.47s/it]

Total reward after episode 4324 is 2353.0


 43%|████▎     | 4325/10000 [3:52:11<7:06:41,  4.51s/it]

Total reward after episode 4325 is 1686.0


 43%|████▎     | 4326/10000 [3:52:12<5:17:02,  3.35s/it]

Total reward after episode 4326 is 246.0


 43%|████▎     | 4327/10000 [3:52:21<8:00:36,  5.08s/it]

Total reward after episode 4327 is 2336.0


 43%|████▎     | 4328/10000 [3:52:22<6:13:31,  3.95s/it]

Total reward after episode 4328 is 610.0


 43%|████▎     | 4329/10000 [3:52:29<7:29:13,  4.75s/it]

Total reward after episode 4329 is 3056.0


 43%|████▎     | 4330/10000 [3:52:33<7:16:34,  4.62s/it]

Total reward after episode 4330 is 1847.0


 43%|████▎     | 4331/10000 [3:52:34<5:42:50,  3.63s/it]

Total reward after episode 4331 is 610.0


 43%|████▎     | 4332/10000 [3:52:49<10:53:52,  6.92s/it]

Total reward after episode 4332 is 2989.0


 43%|████▎     | 4333/10000 [3:52:56<10:53:19,  6.92s/it]

Total reward after episode 4333 is 2351.0


 43%|████▎     | 4334/10000 [3:53:02<10:39:22,  6.77s/it]

Total reward after episode 4334 is 2651.0


 43%|████▎     | 4335/10000 [3:53:08<10:13:02,  6.49s/it]

Total reward after episode 4335 is 2365.0


 43%|████▎     | 4336/10000 [3:53:11<8:30:19,  5.41s/it] 

Total reward after episode 4336 is 1350.0


 43%|████▎     | 4337/10000 [3:53:17<8:39:41,  5.51s/it]

Total reward after episode 4337 is 2648.0


 43%|████▎     | 4338/10000 [3:53:17<6:21:32,  4.04s/it]

Total reward after episode 4338 is 251.0


 43%|████▎     | 4339/10000 [3:53:21<6:22:39,  4.06s/it]

Total reward after episode 4339 is 1924.0


 43%|████▎     | 4340/10000 [3:53:28<7:46:41,  4.95s/it]

Total reward after episode 4340 is 3058.0


 43%|████▎     | 4341/10000 [3:53:32<7:01:25,  4.47s/it]

Total reward after episode 4341 is 1589.0


 43%|████▎     | 4342/10000 [3:53:36<6:55:31,  4.41s/it]

Total reward after episode 4342 is 1714.0


 43%|████▎     | 4343/10000 [3:53:39<6:20:02,  4.03s/it]

Total reward after episode 4343 is 1585.0


 43%|████▎     | 4344/10000 [3:53:46<7:32:00,  4.79s/it]

Total reward after episode 4344 is 3062.0


 43%|████▎     | 4345/10000 [3:53:53<8:34:47,  5.46s/it]

Total reward after episode 4345 is 3058.0


 43%|████▎     | 4346/10000 [3:53:53<6:18:27,  4.02s/it]

Total reward after episode 4346 is 252.0


 43%|████▎     | 4347/10000 [3:53:54<4:42:27,  3.00s/it]

Total reward after episode 4347 is 252.0


 43%|████▎     | 4348/10000 [3:53:58<5:07:32,  3.26s/it]

Total reward after episode 4348 is 1696.0


 43%|████▎     | 4349/10000 [3:54:07<7:48:30,  4.97s/it]

Total reward after episode 4349 is 3040.0


 44%|████▎     | 4350/10000 [3:54:11<7:14:27,  4.61s/it]

Total reward after episode 4350 is 1705.0


 44%|████▎     | 4351/10000 [3:54:14<6:42:51,  4.28s/it]

Total reward after episode 4351 is 1709.0


 44%|████▎     | 4352/10000 [3:54:18<6:38:05,  4.23s/it]

Total reward after episode 4352 is 1934.0


 44%|████▎     | 4353/10000 [3:54:22<6:36:42,  4.22s/it]

Total reward after episode 4353 is 1928.0


 44%|████▎     | 4354/10000 [3:54:24<5:13:48,  3.33s/it]

Total reward after episode 4354 is 611.0


 44%|████▎     | 4355/10000 [3:54:32<7:25:08,  4.73s/it]

Total reward after episode 4355 is 3049.0


 44%|████▎     | 4356/10000 [3:54:34<6:24:11,  4.08s/it]

Total reward after episode 4356 is 743.0


 44%|████▎     | 4357/10000 [3:54:40<7:09:23,  4.57s/it]

Total reward after episode 4357 is 2366.0


 44%|████▎     | 4358/10000 [3:54:45<7:27:40,  4.76s/it]

Total reward after episode 4358 is 2367.0


 44%|████▎     | 4359/10000 [3:54:48<6:41:22,  4.27s/it]

Total reward after episode 4359 is 1332.0


 44%|████▎     | 4360/10000 [3:54:52<6:27:08,  4.12s/it]

Total reward after episode 4360 is 1926.0


 44%|████▎     | 4361/10000 [3:54:58<7:15:24,  4.63s/it]

Total reward after episode 4361 is 2365.0


 44%|████▎     | 4362/10000 [3:55:01<6:45:04,  4.31s/it]

Total reward after episode 4362 is 1702.0


 44%|████▎     | 4363/10000 [3:55:04<6:06:04,  3.90s/it]

Total reward after episode 4363 is 1349.0


 44%|████▎     | 4364/10000 [3:55:12<7:43:36,  4.94s/it]

Total reward after episode 4364 is 3054.0


 44%|████▎     | 4365/10000 [3:55:15<7:02:00,  4.49s/it]

Total reward after episode 4365 is 746.0


 44%|████▎     | 4366/10000 [3:55:25<9:38:31,  6.16s/it]

Total reward after episode 4366 is 2323.0


 44%|████▎     | 4367/10000 [3:55:29<8:38:31,  5.52s/it]

Total reward after episode 4367 is 1864.0


 44%|████▎     | 4368/10000 [3:55:33<7:40:30,  4.91s/it]

Total reward after episode 4368 is 1696.0


 44%|████▎     | 4369/10000 [3:55:36<7:07:40,  4.56s/it]

Total reward after episode 4369 is 1701.0


 44%|████▎     | 4370/10000 [3:55:38<5:42:11,  3.65s/it]

Total reward after episode 4370 is 739.0


 44%|████▎     | 4371/10000 [3:55:49<9:19:54,  5.97s/it]

Total reward after episode 4371 is 3011.0


 44%|████▎     | 4372/10000 [3:55:52<7:50:34,  5.02s/it]

Total reward after episode 4372 is 818.0


 44%|████▎     | 4373/10000 [3:56:03<10:48:27,  6.91s/it]

Total reward after episode 4373 is 3024.0


 44%|████▎     | 4374/10000 [3:56:13<12:15:11,  7.84s/it]

Total reward after episode 4374 is 3030.0


 44%|████▍     | 4375/10000 [3:56:15<9:14:17,  5.91s/it] 

Total reward after episode 4375 is 605.0


 44%|████▍     | 4376/10000 [3:56:18<7:51:59,  5.04s/it]

Total reward after episode 4376 is 1342.0


 44%|████▍     | 4377/10000 [3:56:22<7:14:09,  4.63s/it]

Total reward after episode 4377 is 1709.0


 44%|████▍     | 4378/10000 [3:56:23<5:42:33,  3.66s/it]

Total reward after episode 4378 is 608.0


 44%|████▍     | 4379/10000 [3:56:26<5:31:38,  3.54s/it]

Total reward after episode 4379 is 1589.0


 44%|████▍     | 4380/10000 [3:56:29<5:19:48,  3.41s/it]

Total reward after episode 4380 is 1340.0


 44%|████▍     | 4381/10000 [3:56:34<5:46:50,  3.70s/it]

Total reward after episode 4381 is 1927.0


 44%|████▍     | 4382/10000 [3:56:38<5:55:13,  3.79s/it]

Total reward after episode 4382 is 1925.0


 44%|████▍     | 4383/10000 [3:56:47<8:17:23,  5.31s/it]

Total reward after episode 4383 is 3041.0


 44%|████▍     | 4384/10000 [3:56:53<8:57:56,  5.75s/it]

Total reward after episode 4384 is 3060.0


 44%|████▍     | 4385/10000 [3:56:59<8:50:24,  5.67s/it]

Total reward after episode 4385 is 747.0


 44%|████▍     | 4386/10000 [3:57:02<7:46:22,  4.98s/it]

Total reward after episode 4386 is 1728.0


 44%|████▍     | 4387/10000 [3:57:04<6:27:25,  4.14s/it]

Total reward after episode 4387 is 1048.0


 44%|████▍     | 4388/10000 [3:57:13<8:18:35,  5.33s/it]

Total reward after episode 4388 is 2341.0


 44%|████▍     | 4389/10000 [3:57:16<7:17:31,  4.68s/it]

Total reward after episode 4389 is 1590.0


 44%|████▍     | 4390/10000 [3:57:17<5:50:22,  3.75s/it]

Total reward after episode 4390 is 653.0


 44%|████▍     | 4391/10000 [3:57:18<4:23:04,  2.81s/it]

Total reward after episode 4391 is 247.0


 44%|████▍     | 4392/10000 [3:57:23<5:26:46,  3.50s/it]

Total reward after episode 4392 is 2370.0


 44%|████▍     | 4393/10000 [3:57:35<9:29:10,  6.09s/it]

Total reward after episode 4393 is 3010.0


 44%|████▍     | 4394/10000 [3:57:37<7:21:06,  4.72s/it]

Total reward after episode 4394 is 737.0


 44%|████▍     | 4395/10000 [3:57:41<7:19:22,  4.70s/it]

Total reward after episode 4395 is 1877.0


 44%|████▍     | 4396/10000 [3:57:43<6:01:16,  3.87s/it]

Total reward after episode 4396 is 734.0


 44%|████▍     | 4397/10000 [3:57:45<4:53:34,  3.14s/it]

Total reward after episode 4397 is 603.0


 44%|████▍     | 4398/10000 [3:57:48<4:52:43,  3.14s/it]

Total reward after episode 4398 is 1344.0


 44%|████▍     | 4399/10000 [3:57:50<4:32:11,  2.92s/it]

Total reward after episode 4399 is 1046.0


 44%|████▍     | 4400/10000 [3:57:54<4:56:38,  3.18s/it]

Total reward after episode 4400 is 1699.0


 44%|████▍     | 4401/10000 [3:58:00<6:08:16,  3.95s/it]

Total reward after episode 4401 is 2360.0


 44%|████▍     | 4402/10000 [3:58:06<7:25:31,  4.78s/it]

Total reward after episode 4402 is 3061.0


 44%|████▍     | 4403/10000 [3:58:08<5:49:05,  3.74s/it]

Total reward after episode 4403 is 608.0


 44%|████▍     | 4404/10000 [3:58:22<10:48:58,  6.96s/it]

Total reward after episode 4404 is 2987.0


 44%|████▍     | 4405/10000 [3:58:29<10:43:17,  6.90s/it]

Total reward after episode 4405 is 3060.0


 44%|████▍     | 4406/10000 [3:58:33<9:10:12,  5.90s/it] 

Total reward after episode 4406 is 1729.0


 44%|████▍     | 4407/10000 [3:58:36<8:11:09,  5.27s/it]

Total reward after episode 4407 is 1864.0


 44%|████▍     | 4408/10000 [3:58:38<6:26:20,  4.15s/it]

Total reward after episode 4408 is 737.0


 44%|████▍     | 4409/10000 [3:58:39<4:54:33,  3.16s/it]

Total reward after episode 4409 is 232.0


 44%|████▍     | 4410/10000 [3:58:41<4:28:45,  2.88s/it]

Total reward after episode 4410 is 1074.0


 44%|████▍     | 4411/10000 [3:58:45<4:53:40,  3.15s/it]

Total reward after episode 4411 is 1929.0


 44%|████▍     | 4412/10000 [3:58:48<4:43:05,  3.04s/it]

Total reward after episode 4412 is 1334.0


 44%|████▍     | 4413/10000 [3:58:51<5:07:05,  3.30s/it]

Total reward after episode 4413 is 1691.0


 44%|████▍     | 4414/10000 [3:58:55<5:18:48,  3.42s/it]

Total reward after episode 4414 is 1728.0


 44%|████▍     | 4415/10000 [3:58:59<5:20:12,  3.44s/it]

Total reward after episode 4415 is 1714.0


 44%|████▍     | 4416/10000 [3:59:06<6:57:56,  4.49s/it]

Total reward after episode 4416 is 3053.0


 44%|████▍     | 4417/10000 [3:59:06<5:10:25,  3.34s/it]

Total reward after episode 4417 is 247.0


 44%|████▍     | 4418/10000 [3:59:12<6:05:00,  3.92s/it]

Total reward after episode 4418 is 2364.0


 44%|████▍     | 4419/10000 [3:59:15<5:51:57,  3.78s/it]

Total reward after episode 4419 is 1705.0


 44%|████▍     | 4420/10000 [3:59:17<4:50:24,  3.12s/it]

Total reward after episode 4420 is 739.0


 44%|████▍     | 4421/10000 [3:59:22<6:07:17,  3.95s/it]

Total reward after episode 4421 is 2359.0


 44%|████▍     | 4422/10000 [3:59:29<7:24:27,  4.78s/it]

Total reward after episode 4422 is 3061.0


 44%|████▍     | 4423/10000 [3:59:31<5:55:56,  3.83s/it]

Total reward after episode 4423 is 738.0


 44%|████▍     | 4424/10000 [3:59:31<4:27:10,  2.87s/it]

Total reward after episode 4424 is 250.0


 44%|████▍     | 4425/10000 [3:59:33<3:50:12,  2.48s/it]

Total reward after episode 4425 is 737.0


 44%|████▍     | 4426/10000 [3:59:39<5:21:07,  3.46s/it]

Total reward after episode 4426 is 2362.0


 44%|████▍     | 4427/10000 [3:59:43<5:33:04,  3.59s/it]

Total reward after episode 4427 is 1929.0


 44%|████▍     | 4428/10000 [3:59:50<7:26:30,  4.81s/it]

Total reward after episode 4428 is 2346.0


 44%|████▍     | 4429/10000 [3:59:54<6:53:50,  4.46s/it]

Total reward after episode 4429 is 1705.0


 44%|████▍     | 4430/10000 [3:59:56<5:56:36,  3.84s/it]

Total reward after episode 4430 is 1052.0


 44%|████▍     | 4431/10000 [4:00:10<10:44:37,  6.95s/it]

Total reward after episode 4431 is 2992.0


 44%|████▍     | 4432/10000 [4:00:14<9:04:23,  5.87s/it] 

Total reward after episode 4432 is 598.0


 44%|████▍     | 4433/10000 [4:00:15<7:03:39,  4.57s/it]

Total reward after episode 4433 is 738.0


 44%|████▍     | 4434/10000 [4:00:22<8:12:06,  5.30s/it]

Total reward after episode 4434 is 2350.0


 44%|████▍     | 4435/10000 [4:00:27<8:06:57,  5.25s/it]

Total reward after episode 4435 is 2370.0


 44%|████▍     | 4436/10000 [4:00:37<10:11:21,  6.59s/it]

Total reward after episode 4436 is 3039.0


 44%|████▍     | 4437/10000 [4:00:44<10:22:23,  6.71s/it]

Total reward after episode 4437 is 1008.0


 44%|████▍     | 4438/10000 [4:00:48<8:53:48,  5.76s/it] 

Total reward after episode 4438 is 1693.0


 44%|████▍     | 4439/10000 [4:01:01<12:09:18,  7.87s/it]

Total reward after episode 4439 is 3003.0


 44%|████▍     | 4440/10000 [4:01:06<11:09:30,  7.22s/it]

Total reward after episode 4440 is 2366.0


 44%|████▍     | 4441/10000 [4:01:10<9:27:08,  6.12s/it] 

Total reward after episode 4441 is 1692.0


 44%|████▍     | 4442/10000 [4:01:15<9:00:21,  5.83s/it]

Total reward after episode 4442 is 2369.0


 44%|████▍     | 4443/10000 [4:01:16<6:36:11,  4.28s/it]

Total reward after episode 4443 is 251.0


 44%|████▍     | 4444/10000 [4:01:22<7:48:05,  5.05s/it]

Total reward after episode 4444 is 3060.0


 44%|████▍     | 4445/10000 [4:01:26<7:13:05,  4.68s/it]

Total reward after episode 4445 is 1870.0


 44%|████▍     | 4446/10000 [4:01:30<6:47:31,  4.40s/it]

Total reward after episode 4446 is 1929.0


 44%|████▍     | 4447/10000 [4:01:34<6:31:12,  4.23s/it]

Total reward after episode 4447 is 1690.0


 44%|████▍     | 4448/10000 [4:01:35<4:51:45,  3.15s/it]

Total reward after episode 4448 is 251.0


 44%|████▍     | 4449/10000 [4:01:38<5:01:29,  3.26s/it]

Total reward after episode 4449 is 1696.0


 44%|████▍     | 4450/10000 [4:01:46<7:10:08,  4.65s/it]

Total reward after episode 4450 is 2346.0


 45%|████▍     | 4451/10000 [4:01:53<8:28:12,  5.50s/it]

Total reward after episode 4451 is 3053.0


 45%|████▍     | 4452/10000 [4:01:56<7:12:16,  4.67s/it]

Total reward after episode 4452 is 1337.0


 45%|████▍     | 4453/10000 [4:01:58<5:44:33,  3.73s/it]

Total reward after episode 4453 is 739.0


 45%|████▍     | 4454/10000 [4:02:05<7:24:33,  4.81s/it]

Total reward after episode 4454 is 2351.0


 45%|████▍     | 4455/10000 [4:02:08<6:30:35,  4.23s/it]

Total reward after episode 4455 is 1337.0


 45%|████▍     | 4456/10000 [4:02:14<7:22:31,  4.79s/it]

Total reward after episode 4456 is 2363.0


 45%|████▍     | 4457/10000 [4:02:18<6:49:08,  4.43s/it]

Total reward after episode 4457 is 1689.0


 45%|████▍     | 4458/10000 [4:02:21<6:22:39,  4.14s/it]

Total reward after episode 4458 is 1698.0


 45%|████▍     | 4459/10000 [4:02:25<6:06:22,  3.97s/it]

Total reward after episode 4459 is 1702.0


 45%|████▍     | 4460/10000 [4:02:28<5:52:23,  3.82s/it]

Total reward after episode 4460 is 1718.0


 45%|████▍     | 4461/10000 [4:02:32<5:59:42,  3.90s/it]

Total reward after episode 4461 is 1864.0


 45%|████▍     | 4462/10000 [4:02:35<5:44:53,  3.74s/it]

Total reward after episode 4462 is 813.0


 45%|████▍     | 4463/10000 [4:02:49<10:02:09,  6.53s/it]

Total reward after episode 4463 is 3002.0


 45%|████▍     | 4464/10000 [4:02:56<10:15:59,  6.68s/it]

Total reward after episode 4464 is 3058.0


 45%|████▍     | 4465/10000 [4:02:59<8:44:24,  5.68s/it] 

Total reward after episode 4465 is 1705.0


 45%|████▍     | 4466/10000 [4:03:07<9:39:02,  6.28s/it]

Total reward after episode 4466 is 3052.0


 45%|████▍     | 4467/10000 [4:03:19<12:15:06,  7.97s/it]

Total reward after episode 4467 is 3012.0


 45%|████▍     | 4468/10000 [4:03:22<10:20:44,  6.73s/it]

Total reward after episode 4468 is 1949.0


 45%|████▍     | 4469/10000 [4:03:23<7:31:49,  4.90s/it] 

Total reward after episode 4469 is 251.0


 45%|████▍     | 4470/10000 [4:03:30<8:22:36,  5.45s/it]

Total reward after episode 4470 is 2640.0


 45%|████▍     | 4471/10000 [4:03:31<6:35:50,  4.30s/it]

Total reward after episode 4471 is 738.0


 45%|████▍     | 4472/10000 [4:03:33<5:32:22,  3.61s/it]

Total reward after episode 4472 is 634.0


 45%|████▍     | 4473/10000 [4:03:37<5:28:21,  3.56s/it]

Total reward after episode 4473 is 1692.0


 45%|████▍     | 4474/10000 [4:03:40<5:07:52,  3.34s/it]

Total reward after episode 4474 is 1336.0


 45%|████▍     | 4475/10000 [4:03:42<4:48:59,  3.14s/it]

Total reward after episode 4475 is 740.0


 45%|████▍     | 4476/10000 [4:03:44<4:04:45,  2.66s/it]

Total reward after episode 4476 is 739.0


 45%|████▍     | 4477/10000 [4:03:45<3:33:34,  2.32s/it]

Total reward after episode 4477 is 739.0


 45%|████▍     | 4478/10000 [4:04:02<10:09:55,  6.63s/it]

Total reward after episode 4478 is 2968.0


 45%|████▍     | 4479/10000 [4:04:10<10:39:01,  6.94s/it]

Total reward after episode 4479 is 3051.0


 45%|████▍     | 4480/10000 [4:04:18<11:14:38,  7.33s/it]

Total reward after episode 4480 is 3041.0


 45%|████▍     | 4481/10000 [4:04:22<9:39:22,  6.30s/it] 

Total reward after episode 4481 is 1928.0


 45%|████▍     | 4482/10000 [4:04:26<8:31:31,  5.56s/it]

Total reward after episode 4482 is 1949.0


 45%|████▍     | 4483/10000 [4:04:31<8:20:00,  5.44s/it]

Total reward after episode 4483 is 2369.0


 45%|████▍     | 4484/10000 [4:04:41<10:17:13,  6.71s/it]

Total reward after episode 4484 is 3033.0


 45%|████▍     | 4485/10000 [4:04:44<8:59:13,  5.87s/it] 

Total reward after episode 4485 is 1949.0


 45%|████▍     | 4486/10000 [4:04:48<7:54:17,  5.16s/it]

Total reward after episode 4486 is 1698.0


 45%|████▍     | 4487/10000 [4:04:56<9:02:39,  5.91s/it]

Total reward after episode 4487 is 3046.0


 45%|████▍     | 4488/10000 [4:04:58<7:35:03,  4.95s/it]

Total reward after episode 4488 is 1351.0


 45%|████▍     | 4489/10000 [4:05:04<8:03:28,  5.26s/it]

Total reward after episode 4489 is 2371.0


 45%|████▍     | 4490/10000 [4:05:06<6:20:31,  4.14s/it]

Total reward after episode 4490 is 654.0


 45%|████▍     | 4491/10000 [4:05:07<5:08:28,  3.36s/it]

Total reward after episode 4491 is 739.0


 45%|████▍     | 4492/10000 [4:05:18<8:27:56,  5.53s/it]

Total reward after episode 4492 is 645.0


 45%|████▍     | 4493/10000 [4:05:19<6:37:19,  4.33s/it]

Total reward after episode 4493 is 739.0


 45%|████▍     | 4494/10000 [4:05:23<6:27:56,  4.23s/it]

Total reward after episode 4494 is 1870.0


 45%|████▍     | 4495/10000 [4:05:26<5:45:32,  3.77s/it]

Total reward after episode 4495 is 1353.0


 45%|████▍     | 4496/10000 [4:05:31<6:20:47,  4.15s/it]

Total reward after episode 4496 is 2371.0


 45%|████▍     | 4497/10000 [4:05:33<5:08:47,  3.37s/it]

Total reward after episode 4497 is 737.0


 45%|████▍     | 4498/10000 [4:05:35<4:50:25,  3.17s/it]

Total reward after episode 4498 is 1353.0


 45%|████▍     | 4499/10000 [4:05:41<5:44:02,  3.75s/it]

Total reward after episode 4499 is 2365.0


 45%|████▌     | 4500/10000 [4:05:42<4:43:18,  3.09s/it]

Total reward after episode 4500 is 737.0


 45%|████▌     | 4501/10000 [4:05:46<5:08:37,  3.37s/it]

Total reward after episode 4501 is 1928.0


 45%|████▌     | 4502/10000 [4:05:52<6:05:10,  3.99s/it]

Total reward after episode 4502 is 2367.0


 45%|████▌     | 4503/10000 [4:05:52<4:33:10,  2.98s/it]

Total reward after episode 4503 is 252.0


 45%|████▌     | 4504/10000 [4:05:55<4:31:44,  2.97s/it]

Total reward after episode 4504 is 1330.0


 45%|████▌     | 4505/10000 [4:06:02<6:07:07,  4.01s/it]

Total reward after episode 4505 is 702.0


 45%|████▌     | 4506/10000 [4:06:03<4:59:02,  3.27s/it]

Total reward after episode 4506 is 654.0


 45%|████▌     | 4507/10000 [4:06:07<5:19:48,  3.49s/it]

Total reward after episode 4507 is 1947.0


 45%|████▌     | 4508/10000 [4:06:09<4:32:04,  2.97s/it]

Total reward after episode 4508 is 630.0


 45%|████▌     | 4509/10000 [4:06:16<6:27:28,  4.23s/it]

Total reward after episode 4509 is 3056.0


 45%|████▌     | 4510/10000 [4:06:18<5:13:31,  3.43s/it]

Total reward after episode 4510 is 738.0


 45%|████▌     | 4511/10000 [4:06:19<4:21:46,  2.86s/it]

Total reward after episode 4511 is 737.0


 45%|████▌     | 4512/10000 [4:06:23<4:59:50,  3.28s/it]

Total reward after episode 4512 is 1927.0


 45%|████▌     | 4513/10000 [4:06:30<6:32:50,  4.30s/it]

Total reward after episode 4513 is 3061.0


 45%|████▌     | 4514/10000 [4:06:35<6:56:23,  4.55s/it]

Total reward after episode 4514 is 2368.0


 45%|████▌     | 4515/10000 [4:06:37<5:33:38,  3.65s/it]

Total reward after episode 4515 is 738.0


 45%|████▌     | 4516/10000 [4:06:40<5:30:58,  3.62s/it]

Total reward after episode 4516 is 1698.0


 45%|████▌     | 4517/10000 [4:06:42<4:34:14,  3.00s/it]

Total reward after episode 4517 is 738.0


 45%|████▌     | 4518/10000 [4:06:44<4:16:53,  2.81s/it]

Total reward after episode 4518 is 1071.0


 45%|████▌     | 4519/10000 [4:06:48<4:39:29,  3.06s/it]

Total reward after episode 4519 is 1728.0


 45%|████▌     | 4520/10000 [4:06:52<5:03:19,  3.32s/it]

Total reward after episode 4520 is 1677.0


 45%|████▌     | 4521/10000 [4:06:53<4:14:47,  2.79s/it]

Total reward after episode 4521 is 738.0


 45%|████▌     | 4522/10000 [4:06:55<3:40:18,  2.41s/it]

Total reward after episode 4522 is 738.0


 45%|████▌     | 4523/10000 [4:07:16<12:05:23,  7.95s/it]

Total reward after episode 4523 is 2931.0


 45%|████▌     | 4524/10000 [4:07:16<8:45:42,  5.76s/it] 

Total reward after episode 4524 is 249.0


 45%|████▌     | 4525/10000 [4:07:18<6:50:23,  4.50s/it]

Total reward after episode 4525 is 654.0


 45%|████▌     | 4526/10000 [4:07:24<7:38:15,  5.02s/it]

Total reward after episode 4526 is 2359.0


 45%|████▌     | 4527/10000 [4:07:26<6:02:51,  3.98s/it]

Total reward after episode 4527 is 737.0


 45%|████▌     | 4528/10000 [4:07:30<6:13:16,  4.09s/it]

Total reward after episode 4528 is 1929.0


 45%|████▌     | 4529/10000 [4:07:31<4:40:25,  3.08s/it]

Total reward after episode 4529 is 245.0


 45%|████▌     | 4530/10000 [4:07:32<3:59:00,  2.62s/it]

Total reward after episode 4530 is 737.0


 45%|████▌     | 4531/10000 [4:07:35<3:53:48,  2.57s/it]

Total reward after episode 4531 is 1151.0


 45%|████▌     | 4532/10000 [4:07:38<4:01:52,  2.65s/it]

Total reward after episode 4532 is 1333.0


 45%|████▌     | 4533/10000 [4:07:39<3:32:30,  2.33s/it]

Total reward after episode 4533 is 737.0


 45%|████▌     | 4534/10000 [4:07:41<3:10:15,  2.09s/it]

Total reward after episode 4534 is 654.0


 45%|████▌     | 4535/10000 [4:07:55<8:48:22,  5.80s/it]

Total reward after episode 4535 is 2989.0


 45%|████▌     | 4536/10000 [4:07:59<7:42:27,  5.08s/it]

Total reward after episode 4536 is 1698.0


 45%|████▌     | 4537/10000 [4:08:01<6:36:25,  4.35s/it]

Total reward after episode 4537 is 1152.0


 45%|████▌     | 4538/10000 [4:08:04<5:39:54,  3.73s/it]

Total reward after episode 4538 is 1053.0


 45%|████▌     | 4539/10000 [4:08:07<5:35:32,  3.69s/it]

Total reward after episode 4539 is 1693.0


 45%|████▌     | 4540/10000 [4:08:14<6:56:05,  4.57s/it]

Total reward after episode 4540 is 3062.0


 45%|████▌     | 4541/10000 [4:08:34<14:00:31,  9.24s/it]

Total reward after episode 4541 is 2229.0


 45%|████▌     | 4542/10000 [4:08:37<11:22:29,  7.50s/it]

Total reward after episode 4542 is 1700.0


 45%|████▌     | 4543/10000 [4:08:40<9:12:41,  6.08s/it] 

Total reward after episode 4543 is 1352.0


 45%|████▌     | 4544/10000 [4:08:44<8:00:43,  5.29s/it]

Total reward after episode 4544 is 1700.0


 45%|████▌     | 4545/10000 [4:08:47<7:18:30,  4.82s/it]

Total reward after episode 4545 is 1864.0


 45%|████▌     | 4546/10000 [4:08:51<6:36:52,  4.37s/it]

Total reward after episode 4546 is 1589.0


 45%|████▌     | 4547/10000 [4:08:56<7:02:53,  4.65s/it]

Total reward after episode 4547 is 2366.0


 45%|████▌     | 4548/10000 [4:08:57<5:38:22,  3.72s/it]

Total reward after episode 4548 is 738.0


 45%|████▌     | 4549/10000 [4:09:01<5:30:26,  3.64s/it]

Total reward after episode 4549 is 1700.0


 46%|████▌     | 4550/10000 [4:09:02<4:33:15,  3.01s/it]

Total reward after episode 4550 is 737.0


 46%|████▌     | 4551/10000 [4:09:04<3:53:42,  2.57s/it]

Total reward after episode 4551 is 737.0


 46%|████▌     | 4552/10000 [4:09:06<3:25:13,  2.26s/it]

Total reward after episode 4552 is 654.0


 46%|████▌     | 4553/10000 [4:09:09<4:02:46,  2.67s/it]

Total reward after episode 4553 is 1729.0


 46%|████▌     | 4554/10000 [4:09:15<5:41:02,  3.76s/it]

Total reward after episode 4554 is 2359.0


 46%|████▌     | 4555/10000 [4:09:21<6:42:05,  4.43s/it]

Total reward after episode 4555 is 2368.0


 46%|████▌     | 4556/10000 [4:09:27<7:16:25,  4.81s/it]

Total reward after episode 4556 is 2656.0


 46%|████▌     | 4557/10000 [4:09:31<6:40:57,  4.42s/it]

Total reward after episode 4557 is 1692.0


 46%|████▌     | 4558/10000 [4:09:35<6:27:28,  4.27s/it]

Total reward after episode 4558 is 1691.0


 46%|████▌     | 4559/10000 [4:09:38<6:05:30,  4.03s/it]

Total reward after episode 4559 is 1698.0


 46%|████▌     | 4560/10000 [4:09:40<4:59:03,  3.30s/it]

Total reward after episode 4560 is 737.0


 46%|████▌     | 4561/10000 [4:09:43<5:03:23,  3.35s/it]

Total reward after episode 4561 is 1701.0


 46%|████▌     | 4562/10000 [4:09:50<6:50:45,  4.53s/it]

Total reward after episode 4562 is 3055.0


 46%|████▌     | 4563/10000 [4:09:52<5:28:54,  3.63s/it]

Total reward after episode 4563 is 739.0


 46%|████▌     | 4564/10000 [4:09:55<5:26:56,  3.61s/it]

Total reward after episode 4564 is 1693.0


 46%|████▌     | 4565/10000 [4:09:59<5:22:43,  3.56s/it]

Total reward after episode 4565 is 1697.0


 46%|████▌     | 4566/10000 [4:10:04<6:14:52,  4.14s/it]

Total reward after episode 4566 is 2366.0


 46%|████▌     | 4567/10000 [4:10:10<6:45:22,  4.48s/it]

Total reward after episode 4567 is 2364.0


 46%|████▌     | 4568/10000 [4:10:12<5:56:25,  3.94s/it]

Total reward after episode 4568 is 1352.0


 46%|████▌     | 4569/10000 [4:10:14<4:52:25,  3.23s/it]

Total reward after episode 4569 is 739.0


 46%|████▌     | 4570/10000 [4:10:16<4:08:44,  2.75s/it]

Total reward after episode 4570 is 736.0


 46%|████▌     | 4571/10000 [4:10:22<5:50:09,  3.87s/it]

Total reward after episode 4571 is 3063.0


 46%|████▌     | 4572/10000 [4:10:26<5:47:50,  3.84s/it]

Total reward after episode 4572 is 1851.0


 46%|████▌     | 4573/10000 [4:10:32<6:46:31,  4.49s/it]

Total reward after episode 4573 is 2357.0


 46%|████▌     | 4574/10000 [4:10:33<5:26:26,  3.61s/it]

Total reward after episode 4574 is 738.0


 46%|████▌     | 4575/10000 [4:10:35<4:30:05,  2.99s/it]

Total reward after episode 4575 is 737.0


 46%|████▌     | 4576/10000 [4:10:39<4:58:38,  3.30s/it]

Total reward after episode 4576 is 1952.0


 46%|████▌     | 4577/10000 [4:10:49<8:12:25,  5.45s/it]

Total reward after episode 4577 is 1281.0


 46%|████▌     | 4578/10000 [4:10:51<6:26:29,  4.28s/it]

Total reward after episode 4578 is 738.0


 46%|████▌     | 4579/10000 [4:10:57<7:24:57,  4.92s/it]

Total reward after episode 4579 is 3063.0


 46%|████▌     | 4580/10000 [4:11:01<6:47:19,  4.51s/it]

Total reward after episode 4580 is 1696.0


 46%|████▌     | 4581/10000 [4:11:02<5:02:19,  3.35s/it]

Total reward after episode 4581 is 249.0


 46%|████▌     | 4582/10000 [4:11:05<4:59:34,  3.32s/it]

Total reward after episode 4582 is 1346.0


 46%|████▌     | 4583/10000 [4:11:08<4:47:51,  3.19s/it]

Total reward after episode 4583 is 1332.0


 46%|████▌     | 4584/10000 [4:11:09<4:02:30,  2.69s/it]

Total reward after episode 4584 is 737.0


 46%|████▌     | 4585/10000 [4:11:14<5:11:38,  3.45s/it]

Total reward after episode 4585 is 2367.0


 46%|████▌     | 4586/10000 [4:11:17<4:37:35,  3.08s/it]

Total reward after episode 4586 is 1051.0


 46%|████▌     | 4587/10000 [4:11:17<3:31:50,  2.35s/it]

Total reward after episode 4587 is 251.0


 46%|████▌     | 4588/10000 [4:11:20<3:50:59,  2.56s/it]

Total reward after episode 4588 is 1331.0


 46%|████▌     | 4589/10000 [4:11:22<3:22:47,  2.25s/it]

Total reward after episode 4589 is 737.0


 46%|████▌     | 4590/10000 [4:11:23<3:04:00,  2.04s/it]

Total reward after episode 4590 is 738.0


 46%|████▌     | 4591/10000 [4:11:25<2:44:20,  1.82s/it]

Total reward after episode 4591 is 611.0


 46%|████▌     | 4592/10000 [4:11:29<3:47:09,  2.52s/it]

Total reward after episode 4592 is 1932.0


 46%|████▌     | 4593/10000 [4:11:32<3:55:15,  2.61s/it]

Total reward after episode 4593 is 818.0


 46%|████▌     | 4594/10000 [4:11:35<4:24:00,  2.93s/it]

Total reward after episode 4594 is 1690.0


 46%|████▌     | 4595/10000 [4:11:38<4:19:26,  2.88s/it]

Total reward after episode 4595 is 1337.0


 46%|████▌     | 4596/10000 [4:11:44<5:35:55,  3.73s/it]

Total reward after episode 4596 is 2649.0


 46%|████▌     | 4597/10000 [4:12:28<23:34:25, 15.71s/it]

Total reward after episode 4597 is 267.0


 46%|████▌     | 4598/10000 [4:12:30<17:45:30, 11.83s/it]

Total reward after episode 4598 is 818.0


 46%|████▌     | 4599/10000 [4:12:33<13:45:41,  9.17s/it]

Total reward after episode 4599 is 1332.0


 46%|████▌     | 4600/10000 [4:12:41<13:16:18,  8.85s/it]

Total reward after episode 4600 is 2345.0


 46%|████▌     | 4601/10000 [4:12:44<10:39:09,  7.10s/it]

Total reward after episode 4601 is 1350.0


 46%|████▌     | 4602/10000 [4:12:50<9:45:05,  6.50s/it] 

Total reward after episode 4602 is 2367.0


 46%|████▌     | 4603/10000 [4:12:57<9:59:23,  6.66s/it]

Total reward after episode 4603 is 3057.0


 46%|████▌     | 4604/10000 [4:13:00<8:38:38,  5.77s/it]

Total reward after episode 4604 is 1707.0


 46%|████▌     | 4605/10000 [4:13:05<7:59:21,  5.33s/it]

Total reward after episode 4605 is 1861.0


 46%|████▌     | 4606/10000 [4:13:14<9:56:28,  6.63s/it]

Total reward after episode 4606 is 3036.0


 46%|████▌     | 4607/10000 [4:13:21<10:05:04,  6.73s/it]

Total reward after episode 4607 is 3058.0


 46%|████▌     | 4608/10000 [4:13:23<8:03:52,  5.38s/it] 

Total reward after episode 4608 is 1051.0


 46%|████▌     | 4609/10000 [4:13:31<9:14:16,  6.17s/it]

Total reward after episode 4609 is 2631.0


 46%|████▌     | 4610/10000 [4:13:35<7:59:59,  5.34s/it]

Total reward after episode 4610 is 1728.0


 46%|████▌     | 4611/10000 [4:13:36<6:17:28,  4.20s/it]

Total reward after episode 4611 is 738.0


 46%|████▌     | 4612/10000 [4:13:39<5:29:25,  3.67s/it]

Total reward after episode 4612 is 1048.0


 46%|████▌     | 4613/10000 [4:13:41<4:38:02,  3.10s/it]

Total reward after episode 4613 is 735.0


 46%|████▌     | 4614/10000 [4:13:44<4:55:14,  3.29s/it]

Total reward after episode 4614 is 1696.0


 46%|████▌     | 4615/10000 [4:13:51<6:33:48,  4.39s/it]

Total reward after episode 4615 is 2353.0


 46%|████▌     | 4616/10000 [4:13:53<5:20:05,  3.57s/it]

Total reward after episode 4616 is 632.0


 46%|████▌     | 4617/10000 [4:13:56<5:07:05,  3.42s/it]

Total reward after episode 4617 is 816.0


 46%|████▌     | 4618/10000 [4:13:59<4:59:55,  3.34s/it]

Total reward after episode 4618 is 1585.0


 46%|████▌     | 4619/10000 [4:14:06<6:31:00,  4.36s/it]

Total reward after episode 4619 is 3061.0


 46%|████▌     | 4620/10000 [4:14:07<4:50:57,  3.24s/it]

Total reward after episode 4620 is 249.0


 46%|████▌     | 4621/10000 [4:14:09<4:42:28,  3.15s/it]

Total reward after episode 4621 is 1437.0


 46%|████▌     | 4622/10000 [4:14:12<4:34:31,  3.06s/it]

Total reward after episode 4622 is 818.0


 46%|████▌     | 4623/10000 [4:14:18<5:33:06,  3.72s/it]

Total reward after episode 4623 is 2368.0


 46%|████▌     | 4624/10000 [4:14:21<5:28:49,  3.67s/it]

Total reward after episode 4624 is 1687.0


 46%|████▋     | 4625/10000 [4:14:27<6:22:39,  4.27s/it]

Total reward after episode 4625 is 2363.0


 46%|████▋     | 4626/10000 [4:14:30<5:41:29,  3.81s/it]

Total reward after episode 4626 is 1351.0


 46%|████▋     | 4627/10000 [4:14:37<7:10:47,  4.81s/it]

Total reward after episode 4627 is 1551.0


 46%|████▋     | 4628/10000 [4:14:44<8:21:46,  5.60s/it]

Total reward after episode 4628 is 3053.0


 46%|████▋     | 4629/10000 [4:14:48<7:47:14,  5.22s/it]

Total reward after episode 4629 is 1924.0


 46%|████▋     | 4630/10000 [4:14:54<7:50:36,  5.26s/it]

Total reward after episode 4630 is 2367.0


 46%|████▋     | 4631/10000 [4:15:02<8:58:10,  6.01s/it]

Total reward after episode 4631 is 3050.0


 46%|████▋     | 4632/10000 [4:15:07<8:41:52,  5.83s/it]

Total reward after episode 4632 is 2362.0


 46%|████▋     | 4633/10000 [4:15:08<6:23:09,  4.28s/it]

Total reward after episode 4633 is 249.0


 46%|████▋     | 4634/10000 [4:15:11<6:09:23,  4.13s/it]

Total reward after episode 4634 is 1693.0


 46%|████▋     | 4635/10000 [4:15:25<10:29:03,  7.04s/it]

Total reward after episode 4635 is 2994.0


 46%|████▋     | 4636/10000 [4:15:29<9:00:47,  6.05s/it] 

Total reward after episode 4636 is 1862.0


 46%|████▋     | 4637/10000 [4:15:31<7:23:45,  4.96s/it]

Total reward after episode 4637 is 1046.0


 46%|████▋     | 4638/10000 [4:15:37<7:31:04,  5.05s/it]

Total reward after episode 4638 is 2367.0


 46%|████▋     | 4639/10000 [4:15:37<5:34:17,  3.74s/it]

Total reward after episode 4639 is 243.0


 46%|████▋     | 4640/10000 [4:15:41<5:35:03,  3.75s/it]

Total reward after episode 4640 is 1695.0


 46%|████▋     | 4641/10000 [4:15:44<5:15:58,  3.54s/it]

Total reward after episode 4641 is 1330.0


 46%|████▋     | 4642/10000 [4:15:47<4:58:15,  3.34s/it]

Total reward after episode 4642 is 1331.0


 46%|████▋     | 4643/10000 [4:15:51<5:13:47,  3.51s/it]

Total reward after episode 4643 is 1718.0


 46%|████▋     | 4644/10000 [4:16:00<7:52:38,  5.29s/it]

Total reward after episode 4644 is 3036.0


 46%|████▋     | 4645/10000 [4:16:04<7:10:40,  4.83s/it]

Total reward after episode 4645 is 1690.0


 46%|████▋     | 4646/10000 [4:16:10<7:50:05,  5.27s/it]

Total reward after episode 4646 is 2646.0


 46%|████▋     | 4647/10000 [4:16:12<6:10:47,  4.16s/it]

Total reward after episode 4647 is 738.0


 46%|████▋     | 4648/10000 [4:16:18<6:57:15,  4.68s/it]

Total reward after episode 4648 is 2358.0


 46%|████▋     | 4649/10000 [4:16:19<5:10:11,  3.48s/it]

Total reward after episode 4649 is 250.0
