## What is ⅂ᴚ?

So far, we have modelled the policy, value and q functions. In this session, we will be using an approach to RL called Upside Down RL where we model the behaviour function.

This was outlined in Schmidhuber's December 2019 paper [Reinforcement Learning Upside Down:
Don’t Predict Rewards - Just Map Them to Actions](https://arxiv.org/pdf/1912.02875.pdf). <br>
The specific implementation we are following is outlined in the following paper: [Training Agents using Upside-Down Reinforcement Learning](https://arxiv.org/pdf/1912.02877.pdf).

### The Behaviour Function
The behaviour function takes as input the current state and a command, and is trained to output a probability distribution over the actions which lead to that command being fulfilled. The command in this implementation takes the form of two scalars - a desired reward to achieve and a time horizon over which to achieve that desired reward.

In [58]:
import time
from copy import deepcopy
import gym
import numpy as np
import torch
import torch.nn.functional as F

In [59]:
env = gym.make('LunarLander-v2')

In [60]:
#command takes form [derired reward, desired horizon]
def random_policy(obs, command):
    return np.random.randint(env.action_space.n)

In [61]:
#Visualise agent function
def visualise_agent_command(policy, command, n=5):
    try:
        for trial_i in range(n):
            current_command = deepcopy(command)
            observation = env.reset()
            done=False
            t=0
            episode_return=0
            while not done:
                env.render()
                action = policy(torch.tensor([observation]).double(), torch.tensor([command]).double())
                observation, reward, done, info = env.step(action)
                episode_return+=reward
                current_command[0]-= reward
                current_command[1] = max(1, current_command[1]-1)
                t+=1
            env.render()
            time.sleep(1.5)
            print("Episode {} finished after {} timesteps. Return = {}".format(trial_i, t, episode_return))
        env.close()
    except KeyboardInterrupt:
        env.close()

In [62]:
#visualise_agent(random_policy, command=[500, 500], n=3)

In [63]:
class FCNN_AGENT(torch.nn.Module):
    def __init__(self, command_scale):
        super().__init__()
        embedding_size=32
        hidden_size=64
        self.command_scale=command_scale
        self.observation_embedding = torch.nn.Sequential(
            torch.nn.Linear(np.prod(env.observation_space.shape), embedding_size),
            torch.nn.Tanh()
        )
        self.command_embedding = torch.nn.Sequential(
            torch.nn.Linear(2, embedding_size),
            torch.nn.Sigmoid()
        )
        self.to_output = torch.nn.Sequential(
            torch.nn.Linear(embedding_size, hidden_size),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_size, hidden_size),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_size, env.action_space.n)
        )
    
    def forward(self, observation, command):
        obs_emebdding = self.observation_embedding(observation)
        cmd_embedding = self.command_embedding(command*self.command_scale)
        embedding = torch.mul(obs_emebdding, cmd_embedding)
        action_prob_logits = self.to_output(embedding)
        return action_prob_logits
    
    def create_optimizer(self, lr):
        self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)

In [64]:
def collect_experience(policy, replay_buffer, replay_size, last_few, n_episodes=100, log_to_tensorboard=True):
    global i_episode
    init_replay_buffer = deepcopy(replay_buffer)
    try:
        for _ in range(n_episodes):
            command = sample_command(init_replay_buffer, last_few)
            writer.add_scalar('Command desired reward/Episode', command[0], i_episode)    # write loss to a graph
            writer.add_scalar('Command horizon/Episode', command[1], i_episode)    # write loss to a graph
            observation = env.reset()
            episode_mem = {'observation':[],
                           'action':[],
                           'reward':[],}
            done=False
            while not done:
                action = policy(torch.tensor([observation]).double(), torch.tensor([command]).double())
                new_observation, reward, done, info = env.step(action)
                
                episode_mem['observation'].append(observation)
                episode_mem['action'].append(action)
                episode_mem['reward'].append(reward)
                
                observation=new_observation
                command[0]-= reward
                command[1] = max(1, command[1]-1)
            episode_mem['return']=sum(episode_mem['reward'])
            episode_mem['episode_len']=len(episode_mem['observation'])
            replay_buffer.append(episode_mem)
            i_episode+=1
            if log_to_tensorboard: writer.add_scalar('Return/Episode', sum(episode_mem['reward']), i_episode)    # write loss to a graph
            print("Episode {} finished after {} timesteps. Return = {}".format(i_episode, len(episode_mem['observation']), sum(episode_mem['reward'])))
        env.close()
    except KeyboardInterrupt:
        env.close()
    replay_buffer = sorted(replay_buffer, key=lambda x:x['return'])[-replay_size:]
    return replay_buffer

def sample_command(replay_buffer, last_few):
    if len(replay_buffer)==0:
        return [1, 1]
    else:
        command_samples = replay_buffer[-last_few:]
        lengths = [mem['episode_len'] for mem in command_samples]
        returns = [mem['return'] for mem in command_samples]
        mean_return, std_return = np.mean(returns), np.std(returns)
        command_horizon = np.mean(lengths)
        desired_reward = np.random.uniform(mean_return, mean_return+std_return)
        return [desired_reward, command_horizon]

In [65]:
def train_net(policy_net, replay_buffer, n_updates=100, batch_size=64, log_to_tensorboard=True):
    global i_updates
    all_costs = []
    for i in range(n_updates):
        batch_observations = np.zeros((batch_size, np.prod(env.observation_space.shape)))
        batch_commands = np.zeros((batch_size, 2))
        batch_label = np.zeros((batch_size))
        for b in range(batch_size):
            sample_episode = np.random.randint(0, len(replay_buffer))
            sample_t1 = np.random.randint(0, len(replay_buffer[sample_episode]['observation']))
            sample_t2 = len(replay_buffer[sample_episode]['observation'])
            ##sample_t2 = np.random.randint(sample_t1+1, len(replay_buffer[sample_episode]['observation'])+1)
            sample_horizon = sample_t2-sample_t1
            sample_mem = replay_buffer[sample_episode]['observation'][sample_t1]
            sample_desired_reward = sum(replay_buffer[sample_episode]['reward'][sample_t1:sample_t2])
            network_input = np.append(sample_mem, [sample_desired_reward, sample_horizon])
            label = replay_buffer[sample_episode]['action'][sample_t1]
            batch_observations[b] = sample_mem
            batch_commands[b] = [sample_desired_reward, sample_horizon]
            batch_label[b] = label
        batch_observations = torch.tensor(batch_observations).double()
        batch_commands = torch.tensor(batch_commands).double()
        batch_label = torch.tensor(batch_label).long()
        pred = policy_net(batch_observations, batch_commands)
        cost = F.cross_entropy(pred, batch_label)
        if log_to_tensorboard: writer.add_scalar('Cost/NN update', cost.item() , i_updates)    # write loss to a graph
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
        i_updates+=1
    return np.mean(all_costs)

In [66]:
def create_greedy_policy(policy_network):
    def policy(obs, command):
        action_logits = policy_network(obs, command)
        action = np.argmax(action_logits.detach().numpy())
        return action
    return policy

def create_stochastic_policy(policy_network):
    def policy(obs, command):
        action_logits = policy_network(obs, command)
        action_probs = F.softmax(action_logits, dim=-1)
        action = torch.distributions.Categorical(action_probs).sample().item()
        return action
    return policy

In [67]:
i_episode=0
i_updates=0 #number of parameter updates to the neural network
replay_buffer = []
log_to_tensorboard = True 

replay_size = 600
last_few = 75
batch_size = 32
n_warm_up_episodes = 50
n_episodes_per_iter = 50
n_updates_per_iter = 300
command_scale = 0.01
lr = 0.001

agent = FCNN_AGENT(command_scale).double()
agent.create_optimizer(lr)

stochastic_policy = create_stochastic_policy(agent)
greedy_policy = create_greedy_policy(agent)

In [68]:
# SET UP TRAINING VISUALISATION
# SET UP TRAINING VISUALISATION
if log_to_tensorboard: from torch.utils.tensorboard import SummaryWriter
if log_to_tensorboard: writer = SummaryWriter() # we will use this to show our models performance on a graph using tensorboard

In [69]:
#Collect warm up episodes
replay_buffer = collect_experience(random_policy, replay_buffer, replay_size, last_few, n_warm_up_episodes, log_to_tensorboard)
train_net(agent, replay_buffer, n_updates_per_iter, batch_size, log_to_tensorboard)

Episode 1 finished after 77 timesteps. Return = -109.3581771685191
Episode 2 finished after 110 timesteps. Return = -40.057786276945635
Episode 3 finished after 110 timesteps. Return = -458.5218342673362
Episode 4 finished after 92 timesteps. Return = -123.916121127219
Episode 5 finished after 80 timesteps. Return = -294.2528616038636
Episode 6 finished after 71 timesteps. Return = -73.26745856209543
Episode 7 finished after 69 timesteps. Return = -69.76011093923816
Episode 8 finished after 110 timesteps. Return = -382.10424441305605
Episode 9 finished after 106 timesteps. Return = -112.88904878624014
Episode 10 finished after 81 timesteps. Return = -241.06721973243847
Episode 11 finished after 89 timesteps. Return = -125.70503870857834
Episode 12 finished after 111 timesteps. Return = -426.1668800460502
Episode 13 finished after 101 timesteps. Return = -177.82784670198959
Episode 14 finished after 76 timesteps. Return = -301.77620067797113
Episode 15 finished after 77 timesteps. Retur

1.3864230495993328

In [70]:
n_iters = 1000
for i in range(n_iters):
    replay_buffer = collect_experience(stochastic_policy, replay_buffer, replay_size, last_few, n_episodes_per_iter, log_to_tensorboard)
    train_net(agent, replay_buffer, n_updates_per_iter, batch_size, log_to_tensorboard)

Episode 51 finished after 93 timesteps. Return = -259.7395306930805
Episode 52 finished after 101 timesteps. Return = -265.709085786938
Episode 53 finished after 59 timesteps. Return = -107.93481798172428
Episode 54 finished after 99 timesteps. Return = -318.8560484862995
Episode 55 finished after 118 timesteps. Return = -210.22779547171552
Episode 56 finished after 96 timesteps. Return = -115.33760537188039
Episode 57 finished after 74 timesteps. Return = -166.58431563692432
Episode 58 finished after 75 timesteps. Return = -220.66415281536587
Episode 59 finished after 100 timesteps. Return = -313.36677581876074
Episode 60 finished after 96 timesteps. Return = -141.602016537834
Episode 61 finished after 77 timesteps. Return = -105.14325661296229
Episode 62 finished after 83 timesteps. Return = -106.1360863622129
Episode 63 finished after 77 timesteps. Return = -193.51219284934848
Episode 64 finished after 75 timesteps. Return = -81.98325121875556
Episode 65 finished after 105 timesteps

Episode 172 finished after 123 timesteps. Return = -149.5605335818325
Episode 173 finished after 69 timesteps. Return = -73.17442740852994
Episode 174 finished after 111 timesteps. Return = -173.11498900637145
Episode 175 finished after 99 timesteps. Return = -298.1627051310083
Episode 176 finished after 63 timesteps. Return = -171.21945615311645
Episode 177 finished after 90 timesteps. Return = -133.5181303653289
Episode 178 finished after 122 timesteps. Return = -190.12130768665233
Episode 179 finished after 83 timesteps. Return = -107.56221978301664
Episode 180 finished after 71 timesteps. Return = -175.25226080910778
Episode 181 finished after 117 timesteps. Return = -119.94840504842179
Episode 182 finished after 123 timesteps. Return = -110.13345292980861
Episode 183 finished after 106 timesteps. Return = -313.70847761174014
Episode 184 finished after 100 timesteps. Return = -142.39934172279777
Episode 185 finished after 75 timesteps. Return = -92.49620211351488
Episode 186 finish

Episode 292 finished after 111 timesteps. Return = -126.32071731526247
Episode 293 finished after 85 timesteps. Return = -100.58837684888442
Episode 294 finished after 91 timesteps. Return = -261.3094283406451
Episode 295 finished after 97 timesteps. Return = -135.13281402977873
Episode 296 finished after 86 timesteps. Return = -379.1967893121435
Episode 297 finished after 69 timesteps. Return = -120.52670116117591
Episode 298 finished after 62 timesteps. Return = -190.9174249327037
Episode 299 finished after 68 timesteps. Return = -124.66941784183113
Episode 300 finished after 113 timesteps. Return = -287.7922242532412
Episode 301 finished after 80 timesteps. Return = -100.14087702791856
Episode 302 finished after 111 timesteps. Return = -382.18940290301555
Episode 303 finished after 64 timesteps. Return = -83.45341351745108
Episode 304 finished after 94 timesteps. Return = -343.7252533112063
Episode 305 finished after 110 timesteps. Return = -95.97144402483487
Episode 306 finished af

Episode 411 finished after 96 timesteps. Return = -388.4224017352654
Episode 412 finished after 70 timesteps. Return = -170.33692369038317
Episode 413 finished after 93 timesteps. Return = -218.18312194140128
Episode 414 finished after 60 timesteps. Return = -86.63869345572974
Episode 415 finished after 69 timesteps. Return = -74.63651832062507
Episode 416 finished after 106 timesteps. Return = -303.02784212869176
Episode 417 finished after 89 timesteps. Return = -336.42346793662546
Episode 418 finished after 123 timesteps. Return = 23.69126911100456
Episode 419 finished after 96 timesteps. Return = -134.54944416230302
Episode 420 finished after 100 timesteps. Return = -103.81710818149179
Episode 421 finished after 133 timesteps. Return = -254.08618664276435
Episode 422 finished after 86 timesteps. Return = -256.1551969842101
Episode 423 finished after 79 timesteps. Return = -130.95942600584038
Episode 424 finished after 104 timesteps. Return = -108.20016312111946
Episode 425 finished 

Episode 529 finished after 73 timesteps. Return = -147.55385246703577
Episode 530 finished after 109 timesteps. Return = -148.50754285216442
Episode 531 finished after 83 timesteps. Return = -90.0490068681225
Episode 532 finished after 91 timesteps. Return = -289.59163769283987
Episode 533 finished after 79 timesteps. Return = -95.92941329404762
Episode 534 finished after 90 timesteps. Return = -458.1163461186668
Episode 535 finished after 99 timesteps. Return = -189.32684263408095
Episode 536 finished after 118 timesteps. Return = -121.07873068602868
Episode 537 finished after 73 timesteps. Return = -86.31554033381504
Episode 538 finished after 102 timesteps. Return = -199.1509080334594
Episode 539 finished after 113 timesteps. Return = -289.41207500669344
Episode 540 finished after 76 timesteps. Return = -255.95561947403672
Episode 541 finished after 63 timesteps. Return = -47.3669986873502
Episode 542 finished after 110 timesteps. Return = -264.48934872996176
Episode 543 finished af

Episode 648 finished after 62 timesteps. Return = -195.76869644406372
Episode 649 finished after 109 timesteps. Return = -91.17221625370593
Episode 650 finished after 92 timesteps. Return = -189.3675611437164
Episode 651 finished after 65 timesteps. Return = -165.65991471647607
Episode 652 finished after 88 timesteps. Return = -430.99397937409344
Episode 653 finished after 61 timesteps. Return = -88.53092158182677
Episode 654 finished after 70 timesteps. Return = -250.32610474494703
Episode 655 finished after 60 timesteps. Return = -90.2022805524973
Episode 656 finished after 112 timesteps. Return = -434.82588945912767
Episode 657 finished after 92 timesteps. Return = -191.80032296784248
Episode 658 finished after 142 timesteps. Return = -236.6726445461989
Episode 659 finished after 99 timesteps. Return = -182.3376863444085
Episode 660 finished after 81 timesteps. Return = -99.63973997018407
Episode 661 finished after 87 timesteps. Return = -121.38067430043381
Episode 662 finished afte

Episode 770 finished after 117 timesteps. Return = -110.7123425181127
Episode 771 finished after 98 timesteps. Return = -73.50143328057712
Episode 772 finished after 88 timesteps. Return = -135.02461665392096
Episode 773 finished after 98 timesteps. Return = -292.937576305772
Episode 774 finished after 64 timesteps. Return = -54.44305805294716
Episode 775 finished after 63 timesteps. Return = -136.13043679648413
Episode 776 finished after 92 timesteps. Return = -79.73000533822739
Episode 777 finished after 106 timesteps. Return = -223.5091627063893
Episode 778 finished after 100 timesteps. Return = -239.96781857559563
Episode 779 finished after 64 timesteps. Return = -95.92837635847195
Episode 780 finished after 133 timesteps. Return = 21.898261464414006
Episode 781 finished after 124 timesteps. Return = -169.3294276658839
Episode 782 finished after 105 timesteps. Return = -195.25144968374997
Episode 783 finished after 62 timesteps. Return = -86.32347793096096
Episode 784 finished afte

Episode 890 finished after 93 timesteps. Return = -117.73729312835815
Episode 891 finished after 96 timesteps. Return = -149.24271248144902
Episode 892 finished after 57 timesteps. Return = -115.77540638060148
Episode 893 finished after 64 timesteps. Return = -94.9537317402134
Episode 894 finished after 87 timesteps. Return = -116.8128985783112
Episode 895 finished after 110 timesteps. Return = -181.52365530242275
Episode 896 finished after 70 timesteps. Return = -70.78821629711635
Episode 897 finished after 83 timesteps. Return = -177.30963052599296
Episode 898 finished after 89 timesteps. Return = -135.31932371845605
Episode 899 finished after 97 timesteps. Return = -149.44269259915183
Episode 900 finished after 58 timesteps. Return = -96.32640458308082
Episode 901 finished after 64 timesteps. Return = -78.99437445772008
Episode 902 finished after 83 timesteps. Return = -248.61377326958083
Episode 903 finished after 97 timesteps. Return = -115.6556158301784
Episode 904 finished after

Episode 1013 finished after 75 timesteps. Return = -128.41498459046795
Episode 1014 finished after 76 timesteps. Return = -163.2731760634328
Episode 1015 finished after 114 timesteps. Return = -129.70700808174652
Episode 1016 finished after 74 timesteps. Return = -104.82820475170863
Episode 1017 finished after 68 timesteps. Return = -245.3478066366927
Episode 1018 finished after 76 timesteps. Return = -90.74948985826575
Episode 1019 finished after 69 timesteps. Return = -116.64085768204353
Episode 1020 finished after 80 timesteps. Return = -285.6993399196049
Episode 1021 finished after 102 timesteps. Return = -239.95938492945055
Episode 1022 finished after 106 timesteps. Return = -431.01162429583803
Episode 1023 finished after 95 timesteps. Return = -107.3606327597898
Episode 1024 finished after 103 timesteps. Return = -135.50872506796387
Episode 1025 finished after 81 timesteps. Return = -107.12187556358508
Episode 1026 finished after 102 timesteps. Return = -256.8405716463019
Episode

Episode 1134 finished after 133 timesteps. Return = -39.14539069233885
Episode 1135 finished after 106 timesteps. Return = -183.94848010832703
Episode 1136 finished after 105 timesteps. Return = -176.115250750332
Episode 1137 finished after 84 timesteps. Return = -105.93353392630955
Episode 1138 finished after 100 timesteps. Return = -101.03621381080417
Episode 1139 finished after 102 timesteps. Return = -129.52607863102648
Episode 1140 finished after 141 timesteps. Return = -277.60398087774706
Episode 1141 finished after 103 timesteps. Return = -105.72129069940758
Episode 1142 finished after 86 timesteps. Return = -100.5631946345327
Episode 1143 finished after 81 timesteps. Return = -107.90668386155127
Episode 1144 finished after 107 timesteps. Return = -124.72230603766248
Episode 1145 finished after 115 timesteps. Return = -6.749275134712633
Episode 1146 finished after 98 timesteps. Return = -106.82149195407233
Episode 1147 finished after 88 timesteps. Return = -134.4688579327879
Epi

Episode 1250 finished after 93 timesteps. Return = -419.4196968803318
Episode 1251 finished after 69 timesteps. Return = -76.71684921132389
Episode 1252 finished after 84 timesteps. Return = -94.64602121348955
Episode 1253 finished after 70 timesteps. Return = -114.77857128731677
Episode 1254 finished after 79 timesteps. Return = -110.16389398148527
Episode 1255 finished after 83 timesteps. Return = -189.6108861184747
Episode 1256 finished after 78 timesteps. Return = -159.7908169942339
Episode 1257 finished after 98 timesteps. Return = -211.1270054312307
Episode 1258 finished after 99 timesteps. Return = -42.86732050173828
Episode 1259 finished after 113 timesteps. Return = -128.67046921707913
Episode 1260 finished after 108 timesteps. Return = -116.93646388933954
Episode 1261 finished after 89 timesteps. Return = -148.3579672306247
Episode 1262 finished after 113 timesteps. Return = -74.02515909661201
Episode 1263 finished after 105 timesteps. Return = -266.45172675333924
Episode 126

Episode 1370 finished after 109 timesteps. Return = -107.51054199595714
Episode 1371 finished after 119 timesteps. Return = -253.2261113121033
Episode 1372 finished after 74 timesteps. Return = -155.1612072069012
Episode 1373 finished after 68 timesteps. Return = -74.16220427171712
Episode 1374 finished after 112 timesteps. Return = -126.88487593784879
Episode 1375 finished after 101 timesteps. Return = -229.75253514033403
Episode 1376 finished after 82 timesteps. Return = -155.1384647765693
Episode 1377 finished after 87 timesteps. Return = -89.01164499478983
Episode 1378 finished after 113 timesteps. Return = -166.67855912091446
Episode 1379 finished after 108 timesteps. Return = -36.40218340541128
Episode 1380 finished after 75 timesteps. Return = -101.46069505891596
Episode 1381 finished after 86 timesteps. Return = -64.43218003006177
Episode 1382 finished after 76 timesteps. Return = -37.01967816293718
Episode 1383 finished after 90 timesteps. Return = -133.95195415173626
Episode 

Episode 1489 finished after 112 timesteps. Return = 3.58843764816892
Episode 1490 finished after 62 timesteps. Return = -108.41116543710129
Episode 1491 finished after 116 timesteps. Return = -0.17202291431581784
Episode 1492 finished after 99 timesteps. Return = -113.15125260884174
Episode 1493 finished after 54 timesteps. Return = -96.22466078887516
Episode 1494 finished after 62 timesteps. Return = -112.81913022118695
Episode 1495 finished after 74 timesteps. Return = -115.31545985415764
Episode 1496 finished after 99 timesteps. Return = -240.59875785232774
Episode 1497 finished after 56 timesteps. Return = -83.90581122910854
Episode 1498 finished after 104 timesteps. Return = -47.91905928954276
Episode 1499 finished after 117 timesteps. Return = -251.3624864489276
Episode 1500 finished after 67 timesteps. Return = -101.5969272260341
Episode 1501 finished after 135 timesteps. Return = -102.91905471831447
Episode 1502 finished after 92 timesteps. Return = -112.16309076065372
Episode 

Episode 1606 finished after 113 timesteps. Return = -158.63915405908472
Episode 1607 finished after 67 timesteps. Return = -220.304667463229
Episode 1608 finished after 101 timesteps. Return = -145.49353657975382
Episode 1609 finished after 102 timesteps. Return = -361.68421209575956
Episode 1610 finished after 89 timesteps. Return = -386.4064905864084
Episode 1611 finished after 136 timesteps. Return = -226.61080082685694
Episode 1612 finished after 70 timesteps. Return = -76.95129321141906
Episode 1613 finished after 58 timesteps. Return = -194.34818027665864
Episode 1614 finished after 112 timesteps. Return = -199.79297514750877
Episode 1615 finished after 96 timesteps. Return = -123.00106735936306
Episode 1616 finished after 95 timesteps. Return = -148.10108562205227
Episode 1617 finished after 67 timesteps. Return = -160.99610246735782
Episode 1618 finished after 83 timesteps. Return = -111.1955787115454
Episode 1619 finished after 116 timesteps. Return = -193.40753373147766
Episo

Episode 1724 finished after 109 timesteps. Return = -33.765565191895064
Episode 1725 finished after 58 timesteps. Return = -85.90452086209386
Episode 1726 finished after 103 timesteps. Return = -159.6930054626365
Episode 1727 finished after 62 timesteps. Return = -107.9983132929049
Episode 1728 finished after 90 timesteps. Return = -147.43368713820547
Episode 1729 finished after 101 timesteps. Return = -117.562340452097
Episode 1730 finished after 105 timesteps. Return = -392.5664620036179
Episode 1731 finished after 80 timesteps. Return = -109.78096295418703
Episode 1732 finished after 71 timesteps. Return = -112.55690993627695
Episode 1733 finished after 74 timesteps. Return = -135.13741602533747
Episode 1734 finished after 93 timesteps. Return = -139.9967843274861
Episode 1735 finished after 70 timesteps. Return = -117.66696137914866
Episode 1736 finished after 91 timesteps. Return = -118.25556499829275
Episode 1737 finished after 102 timesteps. Return = -53.60288781011069
Episode 1

Episode 1840 finished after 72 timesteps. Return = -77.34091458571255
Episode 1841 finished after 109 timesteps. Return = -162.3632698174028
Episode 1842 finished after 70 timesteps. Return = -119.24021935629696
Episode 1843 finished after 105 timesteps. Return = -67.89158759192024
Episode 1844 finished after 65 timesteps. Return = -58.945215207319336
Episode 1845 finished after 74 timesteps. Return = -87.71787200297437
Episode 1846 finished after 69 timesteps. Return = -111.94579344187308
Episode 1847 finished after 80 timesteps. Return = -92.45719083313107
Episode 1848 finished after 71 timesteps. Return = -72.15407890724796
Episode 1849 finished after 105 timesteps. Return = -245.0268242489247
Episode 1850 finished after 88 timesteps. Return = -237.97054841573845
Episode 1851 finished after 60 timesteps. Return = -112.6445189237038
Episode 1852 finished after 73 timesteps. Return = -80.44502252725692
Episode 1853 finished after 113 timesteps. Return = -135.4532071449203
Episode 1854

Episode 1957 finished after 81 timesteps. Return = -103.42899155685
Episode 1958 finished after 120 timesteps. Return = -176.942971030467
Episode 1959 finished after 82 timesteps. Return = -68.2235011203301
Episode 1960 finished after 105 timesteps. Return = -150.40384388675983
Episode 1961 finished after 132 timesteps. Return = -177.0837479633949
Episode 1962 finished after 110 timesteps. Return = -91.61057368786734
Episode 1963 finished after 62 timesteps. Return = -92.96401473651233
Episode 1964 finished after 60 timesteps. Return = -93.73076594550423
Episode 1965 finished after 92 timesteps. Return = -107.81628983597925
Episode 1966 finished after 90 timesteps. Return = -213.0443139960543
Episode 1967 finished after 76 timesteps. Return = -55.65735474618772
Episode 1968 finished after 83 timesteps. Return = -114.18441636343613
Episode 1969 finished after 66 timesteps. Return = -145.95846485378414
Episode 1970 finished after 99 timesteps. Return = -97.65643089903716
Episode 1971 fin

Episode 2075 finished after 70 timesteps. Return = -79.7712112782869
Episode 2076 finished after 83 timesteps. Return = -255.98327176509216
Episode 2077 finished after 109 timesteps. Return = -106.65704983111793
Episode 2078 finished after 78 timesteps. Return = -157.0952192981169
Episode 2079 finished after 80 timesteps. Return = -109.62465365933365
Episode 2080 finished after 82 timesteps. Return = -132.7354527238612
Episode 2081 finished after 73 timesteps. Return = -107.65295531392255
Episode 2082 finished after 134 timesteps. Return = -98.54150358611305
Episode 2083 finished after 61 timesteps. Return = -151.70284429657625
Episode 2084 finished after 108 timesteps. Return = -103.11710731001145
Episode 2085 finished after 125 timesteps. Return = -115.62528284361323
Episode 2086 finished after 98 timesteps. Return = -182.72261810317275
Episode 2087 finished after 107 timesteps. Return = -159.31467790441638
Episode 2088 finished after 117 timesteps. Return = -157.59834295931552
Episo

Episode 2194 finished after 79 timesteps. Return = -135.26500231121418
Episode 2195 finished after 90 timesteps. Return = -103.5886690125527
Episode 2196 finished after 56 timesteps. Return = -100.57414405692113
Episode 2197 finished after 59 timesteps. Return = -59.61067548907222
Episode 2198 finished after 63 timesteps. Return = -86.3264155793565
Episode 2199 finished after 65 timesteps. Return = -69.64095410079618
Episode 2200 finished after 108 timesteps. Return = -215.37043019149917
Episode 2201 finished after 82 timesteps. Return = -136.95560513445193
Episode 2202 finished after 89 timesteps. Return = -161.7834485720585
Episode 2203 finished after 77 timesteps. Return = -113.93481968656815
Episode 2204 finished after 113 timesteps. Return = -166.64584057460485
Episode 2205 finished after 80 timesteps. Return = -105.30827767379607
Episode 2206 finished after 90 timesteps. Return = -123.08403345517279
Episode 2207 finished after 103 timesteps. Return = -341.5836029828031
Episode 22

Episode 2311 finished after 94 timesteps. Return = -252.29870926381886
Episode 2312 finished after 91 timesteps. Return = -93.04810241787087
Episode 2313 finished after 72 timesteps. Return = -230.42880202012253
Episode 2314 finished after 134 timesteps. Return = -125.82337493841088
Episode 2315 finished after 69 timesteps. Return = -83.41691108764843
Episode 2316 finished after 80 timesteps. Return = -109.66979170005123
Episode 2317 finished after 94 timesteps. Return = -119.41510407000831
Episode 2318 finished after 104 timesteps. Return = -116.00972356859414
Episode 2319 finished after 60 timesteps. Return = -106.98968667831289
Episode 2320 finished after 84 timesteps. Return = -108.29413069454353
Episode 2321 finished after 94 timesteps. Return = -128.14641881154145
Episode 2322 finished after 68 timesteps. Return = -118.75204064286393
Episode 2323 finished after 123 timesteps. Return = -198.31557366149508
Episode 2324 finished after 68 timesteps. Return = -117.78862364916874
Episo

Episode 2428 finished after 89 timesteps. Return = -225.77240118805142
Episode 2429 finished after 69 timesteps. Return = -113.84833582707986
Episode 2430 finished after 67 timesteps. Return = -94.76193653240125
Episode 2431 finished after 86 timesteps. Return = -88.5321355523491
Episode 2432 finished after 63 timesteps. Return = -85.26393855893868
Episode 2433 finished after 70 timesteps. Return = -191.49486048363383
Episode 2434 finished after 91 timesteps. Return = -77.0344134478886
Episode 2435 finished after 79 timesteps. Return = -96.62474013117846
Episode 2436 finished after 58 timesteps. Return = -100.51677553798099
Episode 2437 finished after 94 timesteps. Return = -433.2785479709646
Episode 2438 finished after 117 timesteps. Return = -65.3828876533969
Episode 2439 finished after 70 timesteps. Return = -76.45044829901366
Episode 2440 finished after 68 timesteps. Return = -86.51707703173851
Episode 2441 finished after 89 timesteps. Return = -110.50075892518001
Episode 2442 fini

Episode 2546 finished after 90 timesteps. Return = -130.69700215304013
Episode 2547 finished after 144 timesteps. Return = -32.818134732552636
Episode 2548 finished after 90 timesteps. Return = -81.90898099673907
Episode 2549 finished after 106 timesteps. Return = -152.51046867542675
Episode 2550 finished after 93 timesteps. Return = -139.20148790054438
Episode 2551 finished after 98 timesteps. Return = -133.45504923497361
Episode 2552 finished after 109 timesteps. Return = -309.7737300474049
Episode 2553 finished after 83 timesteps. Return = -101.56665478326441
Episode 2554 finished after 73 timesteps. Return = -85.62731229971237
Episode 2555 finished after 80 timesteps. Return = -129.4855734973019
Episode 2556 finished after 58 timesteps. Return = -139.39230793542816
Episode 2557 finished after 95 timesteps. Return = -335.6354542438865
Episode 2558 finished after 71 timesteps. Return = -136.12929796336573
Episode 2559 finished after 119 timesteps. Return = -83.40341822642314
Episode 

Episode 2665 finished after 106 timesteps. Return = -159.689571837754
Episode 2666 finished after 76 timesteps. Return = -93.84253047483324
Episode 2667 finished after 101 timesteps. Return = -362.77570898602227
Episode 2668 finished after 64 timesteps. Return = -46.444059541466544
Episode 2669 finished after 90 timesteps. Return = -95.16989544614827
Episode 2670 finished after 74 timesteps. Return = -103.96276676573378
Episode 2671 finished after 105 timesteps. Return = -250.44802718577708
Episode 2672 finished after 85 timesteps. Return = -124.76753603610024
Episode 2673 finished after 83 timesteps. Return = -121.33012337766633
Episode 2674 finished after 103 timesteps. Return = -95.37747676895974
Episode 2675 finished after 97 timesteps. Return = -246.06687331323795
Episode 2676 finished after 81 timesteps. Return = -107.71862659379629
Episode 2677 finished after 95 timesteps. Return = -91.06174136836938
Episode 2678 finished after 79 timesteps. Return = -122.94099491492352
Episode 

Episode 2782 finished after 89 timesteps. Return = -107.19561695220824
Episode 2783 finished after 68 timesteps. Return = -92.28088688470633
Episode 2784 finished after 124 timesteps. Return = -172.95686635731562
Episode 2785 finished after 61 timesteps. Return = -128.94163951297233
Episode 2786 finished after 123 timesteps. Return = -78.68609651710071
Episode 2787 finished after 108 timesteps. Return = -138.5353940880836
Episode 2788 finished after 113 timesteps. Return = -159.3182072451961
Episode 2789 finished after 96 timesteps. Return = -116.23789035494826
Episode 2790 finished after 112 timesteps. Return = -207.476059946539
Episode 2791 finished after 64 timesteps. Return = -47.7289743073878
Episode 2792 finished after 136 timesteps. Return = -229.89945755887527
Episode 2793 finished after 99 timesteps. Return = -76.618321020149
Episode 2794 finished after 85 timesteps. Return = -155.998202899499
Episode 2795 finished after 90 timesteps. Return = -300.0287902438252
Episode 2796 f

Episode 2901 finished after 106 timesteps. Return = -166.53484394349107
Episode 2902 finished after 59 timesteps. Return = -77.7816247071907
Episode 2903 finished after 114 timesteps. Return = -200.53746758123782
Episode 2904 finished after 67 timesteps. Return = -119.52535880878348
Episode 2905 finished after 59 timesteps. Return = -114.25348227660447
Episode 2906 finished after 77 timesteps. Return = -110.76498464530434
Episode 2907 finished after 116 timesteps. Return = -153.71924755173012
Episode 2908 finished after 101 timesteps. Return = -99.58447281810268
Episode 2909 finished after 95 timesteps. Return = -304.4086605034823
Episode 2910 finished after 75 timesteps. Return = -260.86622064802793
Episode 2911 finished after 123 timesteps. Return = -182.060772387309
Episode 2912 finished after 60 timesteps. Return = -131.33780841089816
Episode 2913 finished after 67 timesteps. Return = -137.8537911743325
Episode 2914 finished after 103 timesteps. Return = -314.7038656317725
Episode 

Episode 3019 finished after 115 timesteps. Return = -123.64127602489211
Episode 3020 finished after 78 timesteps. Return = -138.04581895060772
Episode 3021 finished after 67 timesteps. Return = -130.76634069414195
Episode 3022 finished after 104 timesteps. Return = -22.066050758479918
Episode 3023 finished after 95 timesteps. Return = -207.58573688715137
Episode 3024 finished after 119 timesteps. Return = -111.18452408895399
Episode 3025 finished after 67 timesteps. Return = -82.98877726337457
Episode 3026 finished after 84 timesteps. Return = -207.38238793314957
Episode 3027 finished after 106 timesteps. Return = -150.5194348382562
Episode 3028 finished after 72 timesteps. Return = -146.38208243445095
Episode 3029 finished after 80 timesteps. Return = -99.64756647635889
Episode 3030 finished after 100 timesteps. Return = -117.46392728569361
Episode 3031 finished after 113 timesteps. Return = -206.28365912090277
Episode 3032 finished after 70 timesteps. Return = -67.42044036817305
Epis

Episode 3138 finished after 84 timesteps. Return = -98.85082684232026
Episode 3139 finished after 61 timesteps. Return = -91.19055310548866
Episode 3140 finished after 103 timesteps. Return = -95.59464040732537
Episode 3141 finished after 57 timesteps. Return = -100.09762584708686
Episode 3142 finished after 84 timesteps. Return = -241.34831142321818
Episode 3143 finished after 74 timesteps. Return = -105.97671748665196
Episode 3144 finished after 93 timesteps. Return = -114.33524817435146
Episode 3145 finished after 103 timesteps. Return = -187.71909419554993
Episode 3146 finished after 75 timesteps. Return = 22.223549634574454
Episode 3147 finished after 105 timesteps. Return = -166.55527494103586
Episode 3148 finished after 99 timesteps. Return = -192.9421673559586
Episode 3149 finished after 132 timesteps. Return = -426.77107192237776
Episode 3150 finished after 140 timesteps. Return = -31.51197853767526
Episode 3151 finished after 89 timesteps. Return = -148.5692779220942
Episode 

Episode 3257 finished after 95 timesteps. Return = -350.1704314984009
Episode 3258 finished after 96 timesteps. Return = -138.16967754528127
Episode 3259 finished after 86 timesteps. Return = -139.9424321631762
Episode 3260 finished after 73 timesteps. Return = -101.33809734623256
Episode 3261 finished after 105 timesteps. Return = -116.54773987668935
Episode 3262 finished after 116 timesteps. Return = -119.2131528917223
Episode 3263 finished after 64 timesteps. Return = -76.52499801506892
Episode 3264 finished after 65 timesteps. Return = -95.89004215344771
Episode 3265 finished after 80 timesteps. Return = -106.45835257193221
Episode 3266 finished after 75 timesteps. Return = -136.1878220141217
Episode 3267 finished after 120 timesteps. Return = -125.34970983377282
Episode 3268 finished after 107 timesteps. Return = -134.8578429306422
Episode 3269 finished after 66 timesteps. Return = -249.67820547955657
Episode 3270 finished after 70 timesteps. Return = -78.42906349620486
Episode 32

Episode 3375 finished after 102 timesteps. Return = -153.14287295227422
Episode 3376 finished after 99 timesteps. Return = -352.0934169662182
Episode 3377 finished after 78 timesteps. Return = -146.98266660801056
Episode 3378 finished after 107 timesteps. Return = -221.74813631232354
Episode 3379 finished after 120 timesteps. Return = -204.90647038460736
Episode 3380 finished after 75 timesteps. Return = -126.83944181597002
Episode 3381 finished after 65 timesteps. Return = -137.06429947711376
Episode 3382 finished after 90 timesteps. Return = -128.5690717451851
Episode 3383 finished after 66 timesteps. Return = -45.63207703305545
Episode 3384 finished after 72 timesteps. Return = -80.55622863179292
Episode 3385 finished after 74 timesteps. Return = -165.57850454745284
Episode 3386 finished after 63 timesteps. Return = -93.87709877300627
Episode 3387 finished after 97 timesteps. Return = -118.66555707291351
Episode 3388 finished after 114 timesteps. Return = -143.35215457478418
Episode

Episode 3491 finished after 112 timesteps. Return = -174.4617521985275
Episode 3492 finished after 90 timesteps. Return = -158.5278027783304
Episode 3493 finished after 63 timesteps. Return = -112.5485858880134
Episode 3494 finished after 73 timesteps. Return = -221.40075802151847
Episode 3495 finished after 95 timesteps. Return = -131.5975948036092
Episode 3496 finished after 65 timesteps. Return = -86.53292217811617
Episode 3497 finished after 91 timesteps. Return = -117.54196042875475
Episode 3498 finished after 125 timesteps. Return = -62.580082015157195
Episode 3499 finished after 110 timesteps. Return = -84.21928539359786
Episode 3500 finished after 74 timesteps. Return = -144.08029090280252
Episode 3501 finished after 111 timesteps. Return = -112.89459902737207
Episode 3502 finished after 87 timesteps. Return = -128.7332371867914
Episode 3503 finished after 81 timesteps. Return = -333.3197310907011
Episode 3504 finished after 134 timesteps. Return = -124.43049558051023
Episode 3

Episode 3607 finished after 89 timesteps. Return = -117.34801682757343
Episode 3608 finished after 92 timesteps. Return = -335.4332462138931
Episode 3609 finished after 94 timesteps. Return = -159.81904978622163
Episode 3610 finished after 71 timesteps. Return = -99.46225834105317
Episode 3611 finished after 79 timesteps. Return = -131.10583097123384
Episode 3612 finished after 87 timesteps. Return = -139.0352836875697
Episode 3613 finished after 81 timesteps. Return = -388.9114357623702
Episode 3614 finished after 66 timesteps. Return = -282.14806494296545
Episode 3615 finished after 136 timesteps. Return = -223.23918950983395
Episode 3616 finished after 89 timesteps. Return = -356.17159585014133
Episode 3617 finished after 100 timesteps. Return = -193.18801438709434
Episode 3618 finished after 86 timesteps. Return = -181.57249582686697
Episode 3619 finished after 82 timesteps. Return = -133.73333693926764
Episode 3620 finished after 61 timesteps. Return = -197.10920074980248
Episode 

Episode 3724 finished after 91 timesteps. Return = -170.17803622858537
Episode 3725 finished after 103 timesteps. Return = -200.5967902382181
Episode 3726 finished after 87 timesteps. Return = -86.22166146586557
Episode 3727 finished after 72 timesteps. Return = -82.89786549570191
Episode 3728 finished after 109 timesteps. Return = -54.60377534559793
Episode 3729 finished after 76 timesteps. Return = -97.36915943117033
Episode 3730 finished after 92 timesteps. Return = -156.11626817657222
Episode 3731 finished after 130 timesteps. Return = -68.98857101531993
Episode 3732 finished after 126 timesteps. Return = -61.568863423678266
Episode 3733 finished after 71 timesteps. Return = -55.38543621300657
Episode 3734 finished after 100 timesteps. Return = -122.97677154978766
Episode 3735 finished after 62 timesteps. Return = -90.62301956833569
Episode 3736 finished after 101 timesteps. Return = -198.92940823252263
Episode 3737 finished after 70 timesteps. Return = -79.35246773369786
Episode 3

Episode 3844 finished after 100 timesteps. Return = -119.04110206031827
Episode 3845 finished after 78 timesteps. Return = -143.51588284762013
Episode 3846 finished after 93 timesteps. Return = -209.51725568808283
Episode 3847 finished after 139 timesteps. Return = -91.39480554979727
Episode 3848 finished after 111 timesteps. Return = -177.58017955368433
Episode 3849 finished after 83 timesteps. Return = -137.89413457576984
Episode 3850 finished after 93 timesteps. Return = -106.22436755639407
Episode 3851 finished after 122 timesteps. Return = -79.26208898589464
Episode 3852 finished after 76 timesteps. Return = -22.01576410560466
Episode 3853 finished after 86 timesteps. Return = -120.05419270269206
Episode 3854 finished after 105 timesteps. Return = -102.78765370509319
Episode 3855 finished after 77 timesteps. Return = -111.66033146943334
Episode 3856 finished after 98 timesteps. Return = -36.041590403177295
Episode 3857 finished after 106 timesteps. Return = -111.44447032254882
Epi

Episode 3961 finished after 64 timesteps. Return = -103.03286488086039
Episode 3962 finished after 112 timesteps. Return = -188.40255993155563
Episode 3963 finished after 72 timesteps. Return = 18.75940953436009
Episode 3964 finished after 78 timesteps. Return = -87.62105338689267
Episode 3965 finished after 72 timesteps. Return = -119.23891012721701
Episode 3966 finished after 132 timesteps. Return = -224.3423666160348
Episode 3967 finished after 75 timesteps. Return = -174.1821498704248
Episode 3968 finished after 133 timesteps. Return = -289.6476119002351
Episode 3969 finished after 100 timesteps. Return = -101.06018483761022
Episode 3970 finished after 81 timesteps. Return = -105.02433905024837
Episode 3971 finished after 70 timesteps. Return = -98.04582747457626
Episode 3972 finished after 75 timesteps. Return = -150.61515436266149
Episode 3973 finished after 75 timesteps. Return = -73.54579218171052
Episode 3974 finished after 102 timesteps. Return = -320.30713751131697
Episode 3

Episode 4081 finished after 91 timesteps. Return = -140.82144659278987
Episode 4082 finished after 73 timesteps. Return = -90.59460948712575
Episode 4083 finished after 72 timesteps. Return = -131.80478057734734
Episode 4084 finished after 113 timesteps. Return = -156.3165308326371
Episode 4085 finished after 68 timesteps. Return = -50.78747799660213
Episode 4086 finished after 121 timesteps. Return = -56.60510000512319
Episode 4087 finished after 60 timesteps. Return = -127.08445438118892
Episode 4088 finished after 93 timesteps. Return = -174.67098420442926
Episode 4089 finished after 68 timesteps. Return = -103.3103225412878
Episode 4090 finished after 102 timesteps. Return = -108.57059294767794
Episode 4091 finished after 110 timesteps. Return = -83.89269184243014
Episode 4092 finished after 89 timesteps. Return = -187.3537408785048
Episode 4093 finished after 97 timesteps. Return = -161.28270534727847
Episode 4094 finished after 65 timesteps. Return = -110.45594124025854
Episode 4

Episode 4199 finished after 95 timesteps. Return = -147.2975372351999
Episode 4200 finished after 83 timesteps. Return = -133.9186632435708
Episode 4201 finished after 93 timesteps. Return = -155.29849530147132
Episode 4202 finished after 102 timesteps. Return = -131.59074629594448
Episode 4203 finished after 117 timesteps. Return = -128.8399548504575
Episode 4204 finished after 63 timesteps. Return = -167.13681379971183
Episode 4205 finished after 73 timesteps. Return = -231.57193308917766
Episode 4206 finished after 77 timesteps. Return = -126.29814710192738
Episode 4207 finished after 89 timesteps. Return = -167.98893961153027
Episode 4208 finished after 103 timesteps. Return = -150.66307333154822
Episode 4209 finished after 70 timesteps. Return = -82.32499007608317
Episode 4210 finished after 72 timesteps. Return = -134.76180600118488
Episode 4211 finished after 93 timesteps. Return = -132.09296259863714
Episode 4212 finished after 78 timesteps. Return = -101.37916967486228
Episode

Episode 4315 finished after 91 timesteps. Return = -156.30174052863066
Episode 4316 finished after 110 timesteps. Return = -43.258883992252414
Episode 4317 finished after 75 timesteps. Return = -195.14609177502453
Episode 4318 finished after 72 timesteps. Return = -141.8210273862777
Episode 4319 finished after 59 timesteps. Return = -85.88876273629917
Episode 4320 finished after 75 timesteps. Return = -87.267685777439
Episode 4321 finished after 101 timesteps. Return = -271.68917884197185
Episode 4322 finished after 87 timesteps. Return = -68.2559504528824
Episode 4323 finished after 107 timesteps. Return = -91.39245768496696
Episode 4324 finished after 90 timesteps. Return = -245.4140591776781
Episode 4325 finished after 74 timesteps. Return = -84.41389653149807
Episode 4326 finished after 108 timesteps. Return = -137.12761919618393
Episode 4327 finished after 115 timesteps. Return = -226.75416581693173
Episode 4328 finished after 65 timesteps. Return = -60.728527130345384
Episode 432

Episode 4433 finished after 86 timesteps. Return = -113.8874282301619
Episode 4434 finished after 110 timesteps. Return = -100.01934728520405
Episode 4435 finished after 56 timesteps. Return = -104.59401509519029
Episode 4436 finished after 114 timesteps. Return = -50.20766307056991
Episode 4437 finished after 108 timesteps. Return = -108.57418280927178
Episode 4438 finished after 106 timesteps. Return = -169.85175149249156
Episode 4439 finished after 105 timesteps. Return = -133.16829199399953
Episode 4440 finished after 110 timesteps. Return = -116.62735309750451
Episode 4441 finished after 86 timesteps. Return = -91.12080358963354
Episode 4442 finished after 116 timesteps. Return = -50.65381291149044
Episode 4443 finished after 134 timesteps. Return = -269.15989630906313
Episode 4444 finished after 58 timesteps. Return = -107.62184195449703
Episode 4445 finished after 98 timesteps. Return = -130.33650184388333
Episode 4446 finished after 87 timesteps. Return = -157.53222570840362
Ep

Episode 4551 finished after 98 timesteps. Return = -93.23805819822707
Episode 4552 finished after 92 timesteps. Return = -271.71355933954294
Episode 4553 finished after 71 timesteps. Return = -84.51201410766703
Episode 4554 finished after 106 timesteps. Return = -147.4144620364542
Episode 4555 finished after 67 timesteps. Return = -46.165210279387935
Episode 4556 finished after 106 timesteps. Return = -124.57327199704272
Episode 4557 finished after 71 timesteps. Return = -162.9611445121588
Episode 4558 finished after 65 timesteps. Return = -182.19727793277548
Episode 4559 finished after 95 timesteps. Return = -143.28183568990997
Episode 4560 finished after 76 timesteps. Return = -95.3427293811024
Episode 4561 finished after 66 timesteps. Return = -89.90976366969258
Episode 4562 finished after 68 timesteps. Return = -56.11136091209374
Episode 4563 finished after 124 timesteps. Return = -177.68384169408478
Episode 4564 finished after 113 timesteps. Return = -205.6520068308019
Episode 456

Episode 4668 finished after 93 timesteps. Return = -150.25569395337885
Episode 4669 finished after 66 timesteps. Return = -95.97196444553153
Episode 4670 finished after 63 timesteps. Return = -94.41481731892385
Episode 4671 finished after 85 timesteps. Return = -134.6027219272974
Episode 4672 finished after 104 timesteps. Return = -227.19013646194873
Episode 4673 finished after 66 timesteps. Return = -106.56083893801312
Episode 4674 finished after 127 timesteps. Return = -105.18080117113408
Episode 4675 finished after 90 timesteps. Return = -103.56709344562917
Episode 4676 finished after 90 timesteps. Return = -111.38615356077246
Episode 4677 finished after 63 timesteps. Return = -96.64239224265071
Episode 4678 finished after 83 timesteps. Return = -203.70022755015862
Episode 4679 finished after 74 timesteps. Return = -141.7650689003056
Episode 4680 finished after 67 timesteps. Return = -115.53490979577694
Episode 4681 finished after 115 timesteps. Return = -309.9984590020764
Episode 4

Episode 4784 finished after 99 timesteps. Return = -133.36889342511296
Episode 4785 finished after 113 timesteps. Return = -93.16800195653964
Episode 4786 finished after 101 timesteps. Return = -180.6277590121123
Episode 4787 finished after 74 timesteps. Return = -81.1853624585619
Episode 4788 finished after 83 timesteps. Return = -169.56630821103494
Episode 4789 finished after 84 timesteps. Return = -119.01591069499969
Episode 4790 finished after 111 timesteps. Return = -287.716396678989
Episode 4791 finished after 80 timesteps. Return = -199.45591704277564
Episode 4792 finished after 113 timesteps. Return = -337.56779371471936
Episode 4793 finished after 112 timesteps. Return = -115.97584714720003
Episode 4794 finished after 73 timesteps. Return = -96.02134465903723
Episode 4795 finished after 112 timesteps. Return = -41.0517909998763
Episode 4796 finished after 104 timesteps. Return = -226.06578018124117
Episode 4797 finished after 99 timesteps. Return = -34.384700277766186
Episode 

Episode 4900 finished after 59 timesteps. Return = -109.91952658277538
Episode 4901 finished after 102 timesteps. Return = -101.8353156240716
Episode 4902 finished after 103 timesteps. Return = -319.59201992845306
Episode 4903 finished after 63 timesteps. Return = -72.12571706872885
Episode 4904 finished after 96 timesteps. Return = -137.0349898308512
Episode 4905 finished after 100 timesteps. Return = -214.7050090403951
Episode 4906 finished after 59 timesteps. Return = -123.13985235629141
Episode 4907 finished after 88 timesteps. Return = -36.73830267400756
Episode 4908 finished after 109 timesteps. Return = -132.2642270526361
Episode 4909 finished after 78 timesteps. Return = -107.5675289897352
Episode 4910 finished after 93 timesteps. Return = -215.14136096285944
Episode 4911 finished after 105 timesteps. Return = -169.55471202971123
Episode 4912 finished after 104 timesteps. Return = -218.00966107502634
Episode 4913 finished after 73 timesteps. Return = -161.97990128592096
Episode

Episode 5017 finished after 103 timesteps. Return = -181.39989601422928
Episode 5018 finished after 79 timesteps. Return = -103.69552616896752
Episode 5019 finished after 70 timesteps. Return = -101.19868050067704
Episode 5020 finished after 82 timesteps. Return = -93.64676823647042
Episode 5021 finished after 69 timesteps. Return = -86.10127327683148
Episode 5022 finished after 98 timesteps. Return = -136.17770449837252
Episode 5023 finished after 61 timesteps. Return = -219.63796410844427
Episode 5024 finished after 86 timesteps. Return = -53.675807090492086
Episode 5025 finished after 101 timesteps. Return = -107.68293154113154
Episode 5026 finished after 100 timesteps. Return = -137.14180043697925
Episode 5027 finished after 124 timesteps. Return = -176.0948747524455
Episode 5028 finished after 108 timesteps. Return = -131.48386713557778
Episode 5029 finished after 61 timesteps. Return = -158.52181684195546
Episode 5030 finished after 79 timesteps. Return = -127.8455996982081
Episo

Episode 5134 finished after 74 timesteps. Return = -142.64035615892317
Episode 5135 finished after 70 timesteps. Return = -93.23430486946597
Episode 5136 finished after 68 timesteps. Return = -100.46261367340848
Episode 5137 finished after 118 timesteps. Return = -145.90103717854646
Episode 5138 finished after 74 timesteps. Return = -99.99882950201406
Episode 5139 finished after 106 timesteps. Return = -115.87266224384197
Episode 5140 finished after 99 timesteps. Return = -28.65194902687334
Episode 5141 finished after 73 timesteps. Return = -72.26696599001143
Episode 5142 finished after 81 timesteps. Return = -156.03249711134924
Episode 5143 finished after 121 timesteps. Return = -8.89235185306326
Episode 5144 finished after 59 timesteps. Return = -166.94150769157525
Episode 5145 finished after 119 timesteps. Return = -424.4039912305958
Episode 5146 finished after 111 timesteps. Return = -197.78378062267936
Episode 5147 finished after 134 timesteps. Return = -339.09063608859117
Episode

Episode 5251 finished after 89 timesteps. Return = -154.3016687852966
Episode 5252 finished after 91 timesteps. Return = -145.40413108768462
Episode 5253 finished after 118 timesteps. Return = -221.81411672938265
Episode 5254 finished after 110 timesteps. Return = -191.2577818313248
Episode 5255 finished after 68 timesteps. Return = -122.87758456307462
Episode 5256 finished after 74 timesteps. Return = -95.34261714333944
Episode 5257 finished after 97 timesteps. Return = -124.65150206668174
Episode 5258 finished after 70 timesteps. Return = -122.21413238921875
Episode 5259 finished after 69 timesteps. Return = -228.1533075905587
Episode 5260 finished after 77 timesteps. Return = -115.51330142530617
Episode 5261 finished after 83 timesteps. Return = -171.09489364001558
Episode 5262 finished after 85 timesteps. Return = -117.75148120294473
Episode 5263 finished after 85 timesteps. Return = -146.05599002322788
Episode 5264 finished after 106 timesteps. Return = -141.01888501969592
Episode

Episode 5368 finished after 104 timesteps. Return = -208.78311792343936
Episode 5369 finished after 95 timesteps. Return = -108.50480112163704
Episode 5370 finished after 66 timesteps. Return = -39.99121533194315
Episode 5371 finished after 87 timesteps. Return = -80.42935913967968
Episode 5372 finished after 114 timesteps. Return = -167.82048416054027
Episode 5373 finished after 85 timesteps. Return = -115.68597040664721
Episode 5374 finished after 73 timesteps. Return = -125.20180885027726
Episode 5375 finished after 139 timesteps. Return = -185.47291485624905
Episode 5376 finished after 104 timesteps. Return = -166.45759129964668
Episode 5377 finished after 96 timesteps. Return = -319.7701573339094
Episode 5378 finished after 105 timesteps. Return = -134.72189909682493
Episode 5379 finished after 58 timesteps. Return = -119.83338658302762
Episode 5380 finished after 57 timesteps. Return = -82.17363481761367
Episode 5381 finished after 85 timesteps. Return = -85.67250059754151
Episod

Episode 5488 finished after 113 timesteps. Return = 29.46080706543526
Episode 5489 finished after 112 timesteps. Return = -154.4589985310834
Episode 5490 finished after 104 timesteps. Return = -137.550460710757
Episode 5491 finished after 65 timesteps. Return = -127.71338663842835
Episode 5492 finished after 78 timesteps. Return = -110.81848795844921
Episode 5493 finished after 101 timesteps. Return = -128.20220944850936
Episode 5494 finished after 94 timesteps. Return = -188.12252917122544
Episode 5495 finished after 83 timesteps. Return = -116.0494762822603
Episode 5496 finished after 109 timesteps. Return = -102.90589647275239
Episode 5497 finished after 83 timesteps. Return = -37.84242138913379
Episode 5498 finished after 80 timesteps. Return = -187.7314200061639
Episode 5499 finished after 98 timesteps. Return = -184.60184644946366
Episode 5500 finished after 82 timesteps. Return = -123.66019548538713
Episode 5501 finished after 89 timesteps. Return = -95.09463445119412
Episode 55

Episode 5607 finished after 69 timesteps. Return = -177.1951130795627
Episode 5608 finished after 58 timesteps. Return = -103.23886062193958
Episode 5609 finished after 100 timesteps. Return = -119.5819021089983
Episode 5610 finished after 99 timesteps. Return = -250.40821436870002
Episode 5611 finished after 101 timesteps. Return = -168.21279225134987
Episode 5612 finished after 89 timesteps. Return = -167.93731158017457
Episode 5613 finished after 111 timesteps. Return = -184.8209281221933
Episode 5614 finished after 78 timesteps. Return = -124.1077572394294
Episode 5615 finished after 95 timesteps. Return = -174.67983416127475
Episode 5616 finished after 89 timesteps. Return = -95.05075100321561
Episode 5617 finished after 108 timesteps. Return = -189.6552842872453
Episode 5618 finished after 62 timesteps. Return = -103.93830251400747
Episode 5619 finished after 118 timesteps. Return = -135.49185909788676
Episode 5620 finished after 74 timesteps. Return = -119.30945639695696
Episode

Episode 5725 finished after 58 timesteps. Return = -161.33372082681882
Episode 5726 finished after 64 timesteps. Return = -207.990966867903
Episode 5727 finished after 104 timesteps. Return = -107.09241240665916
Episode 5728 finished after 58 timesteps. Return = -138.96240943759136
Episode 5729 finished after 125 timesteps. Return = -79.99456340674968
Episode 5730 finished after 81 timesteps. Return = -150.75318717425623
Episode 5731 finished after 81 timesteps. Return = -117.1183760252626
Episode 5732 finished after 118 timesteps. Return = -136.23978155143158
Episode 5733 finished after 130 timesteps. Return = -317.96746875178326
Episode 5734 finished after 82 timesteps. Return = -114.32200113223641
Episode 5735 finished after 115 timesteps. Return = -239.5068219584134
Episode 5736 finished after 75 timesteps. Return = -108.30944067051146
Episode 5737 finished after 105 timesteps. Return = -148.81212610905806
Episode 5738 finished after 145 timesteps. Return = 0.8915646848772809
Episo

Episode 5842 finished after 73 timesteps. Return = -118.47422129351051
Episode 5843 finished after 88 timesteps. Return = -230.3657783814327
Episode 5844 finished after 73 timesteps. Return = -150.0563993747486
Episode 5845 finished after 65 timesteps. Return = -91.60443488307982
Episode 5846 finished after 70 timesteps. Return = -83.08342100632969
Episode 5847 finished after 104 timesteps. Return = -198.78744476318823
Episode 5848 finished after 105 timesteps. Return = -353.81417532551507
Episode 5849 finished after 108 timesteps. Return = -80.21723399449583
Episode 5850 finished after 71 timesteps. Return = -134.0514573265266
Episode 5851 finished after 89 timesteps. Return = -145.38516242214814
Episode 5852 finished after 71 timesteps. Return = -199.731207681271
Episode 5853 finished after 66 timesteps. Return = -99.60234372879124
Episode 5854 finished after 106 timesteps. Return = -191.69496793168133
Episode 5855 finished after 101 timesteps. Return = -42.190794198746175
Episode 58

Episode 5962 finished after 91 timesteps. Return = 3.3754814236684894
Episode 5963 finished after 81 timesteps. Return = -199.30947538749575
Episode 5964 finished after 115 timesteps. Return = -369.1448477046417
Episode 5965 finished after 69 timesteps. Return = -129.59607085821878
Episode 5966 finished after 73 timesteps. Return = -138.6152761088547
Episode 5967 finished after 66 timesteps. Return = -125.4104538297293
Episode 5968 finished after 92 timesteps. Return = -110.04015967349781
Episode 5969 finished after 108 timesteps. Return = -192.9502231705008
Episode 5970 finished after 74 timesteps. Return = -147.6444874152475
Episode 5971 finished after 79 timesteps. Return = -213.62715989679418
Episode 5972 finished after 124 timesteps. Return = -97.37353823889168
Episode 5973 finished after 99 timesteps. Return = 1.0659699816863508
Episode 5974 finished after 64 timesteps. Return = -95.80706253355254
Episode 5975 finished after 115 timesteps. Return = -186.02361754819032
Episode 597

Episode 6080 finished after 118 timesteps. Return = -160.16804027951088
Episode 6081 finished after 95 timesteps. Return = -115.3865711505751
Episode 6082 finished after 107 timesteps. Return = -13.381198639605543
Episode 6083 finished after 92 timesteps. Return = -95.64003193622452
Episode 6084 finished after 67 timesteps. Return = -139.10639106213551
Episode 6085 finished after 82 timesteps. Return = -98.53838680149727
Episode 6086 finished after 57 timesteps. Return = -90.4520279216247
Episode 6087 finished after 87 timesteps. Return = -110.35578395433866
Episode 6088 finished after 102 timesteps. Return = -131.28686191191775
Episode 6089 finished after 66 timesteps. Return = -104.53420665727373
Episode 6090 finished after 106 timesteps. Return = -118.20483299832301
Episode 6091 finished after 129 timesteps. Return = -174.09151004308876
Episode 6092 finished after 68 timesteps. Return = -85.7915891095648
Episode 6093 finished after 66 timesteps. Return = -130.87509550686104
Episode 

Episode 6197 finished after 78 timesteps. Return = -121.02386815423895
Episode 6198 finished after 81 timesteps. Return = -105.93679172580255
Episode 6199 finished after 79 timesteps. Return = -93.25998632843643
Episode 6200 finished after 85 timesteps. Return = -29.731860347590754
Episode 6201 finished after 69 timesteps. Return = -112.90780756836365
Episode 6202 finished after 79 timesteps. Return = -158.90373350218016
Episode 6203 finished after 73 timesteps. Return = -121.75821605574538
Episode 6204 finished after 112 timesteps. Return = -133.9713829061647
Episode 6205 finished after 60 timesteps. Return = -78.87162061541038
Episode 6206 finished after 77 timesteps. Return = -101.24934298861142
Episode 6207 finished after 80 timesteps. Return = -130.40222174875666
Episode 6208 finished after 111 timesteps. Return = -164.01045120344588
Episode 6209 finished after 75 timesteps. Return = -112.92738428050941
Episode 6210 finished after 139 timesteps. Return = -146.96724532078224
Episod

Episode 6313 finished after 76 timesteps. Return = -132.38807887259915
Episode 6314 finished after 89 timesteps. Return = -134.04480975284594
Episode 6315 finished after 67 timesteps. Return = -94.24989882064304
Episode 6316 finished after 71 timesteps. Return = -102.79380594925517
Episode 6317 finished after 77 timesteps. Return = -120.68172406582728
Episode 6318 finished after 65 timesteps. Return = -108.46653613283868
Episode 6319 finished after 65 timesteps. Return = -94.11385852709182
Episode 6320 finished after 88 timesteps. Return = -171.59187328689507
Episode 6321 finished after 65 timesteps. Return = -109.0320801024676
Episode 6322 finished after 92 timesteps. Return = -155.89117133550926
Episode 6323 finished after 85 timesteps. Return = -227.53689401818875
Episode 6324 finished after 81 timesteps. Return = -81.8901067588694
Episode 6325 finished after 86 timesteps. Return = -135.06950480701252
Episode 6326 finished after 57 timesteps. Return = -125.93468771134593
Episode 632

Episode 6431 finished after 84 timesteps. Return = -309.53434254727836
Episode 6432 finished after 123 timesteps. Return = -345.1074282397765
Episode 6433 finished after 70 timesteps. Return = -134.42789203222824
Episode 6434 finished after 82 timesteps. Return = -84.90520336085723
Episode 6435 finished after 68 timesteps. Return = -153.68315456591762
Episode 6436 finished after 64 timesteps. Return = -106.31273690426626
Episode 6437 finished after 62 timesteps. Return = -59.965887495436576
Episode 6438 finished after 62 timesteps. Return = -74.54814093554076
Episode 6439 finished after 73 timesteps. Return = -141.95670795820465
Episode 6440 finished after 74 timesteps. Return = -106.1312100039421
Episode 6441 finished after 80 timesteps. Return = -147.13287397833028
Episode 6442 finished after 68 timesteps. Return = -110.26346244536447
Episode 6443 finished after 105 timesteps. Return = -181.44490814734678
Episode 6444 finished after 99 timesteps. Return = -139.8470511703065
Episode 6

Episode 6551 finished after 87 timesteps. Return = -262.153112667001
Episode 6552 finished after 94 timesteps. Return = 5.8286386314892695
Episode 6553 finished after 109 timesteps. Return = -266.90438456140964
Episode 6554 finished after 93 timesteps. Return = -122.87788150908037
Episode 6555 finished after 89 timesteps. Return = -135.84816588849188
Episode 6556 finished after 115 timesteps. Return = -140.3125881785442
Episode 6557 finished after 73 timesteps. Return = -98.73707848328125
Episode 6558 finished after 106 timesteps. Return = -109.63886687940473
Episode 6559 finished after 91 timesteps. Return = -120.59168073801891
Episode 6560 finished after 66 timesteps. Return = -101.63443329751665
Episode 6561 finished after 85 timesteps. Return = -169.82546751983423
Episode 6562 finished after 107 timesteps. Return = -201.82454139441566
Episode 6563 finished after 79 timesteps. Return = -96.7942451812514
Episode 6564 finished after 100 timesteps. Return = -143.93089408411885
Episode 

Episode 6668 finished after 107 timesteps. Return = -89.47189637284085
Episode 6669 finished after 108 timesteps. Return = 31.527539308542714
Episode 6670 finished after 98 timesteps. Return = -220.51552665005406
Episode 6671 finished after 106 timesteps. Return = -132.00249946140048
Episode 6672 finished after 65 timesteps. Return = -108.97054907720604
Episode 6673 finished after 111 timesteps. Return = -242.97942891749128
Episode 6674 finished after 87 timesteps. Return = -152.42218546114174
Episode 6675 finished after 82 timesteps. Return = -130.10364570938063
Episode 6676 finished after 106 timesteps. Return = -25.52330876698717
Episode 6677 finished after 101 timesteps. Return = -154.55540392870267
Episode 6678 finished after 137 timesteps. Return = -75.76291149743025
Episode 6679 finished after 74 timesteps. Return = -138.1111246816559
Episode 6680 finished after 121 timesteps. Return = -217.60491361926984
Episode 6681 finished after 95 timesteps. Return = -117.31337468155589
Epi

Episode 6787 finished after 76 timesteps. Return = -84.99712099217216
Episode 6788 finished after 70 timesteps. Return = -107.07017182042615
Episode 6789 finished after 87 timesteps. Return = -183.09110807992238
Episode 6790 finished after 75 timesteps. Return = -130.7286950657027
Episode 6791 finished after 105 timesteps. Return = -22.12780719674528
Episode 6792 finished after 78 timesteps. Return = -124.82911723640252
Episode 6793 finished after 75 timesteps. Return = -91.11424415468457
Episode 6794 finished after 138 timesteps. Return = -153.8006793278425
Episode 6795 finished after 74 timesteps. Return = -116.44023572507314
Episode 6796 finished after 62 timesteps. Return = -88.51803157393927
Episode 6797 finished after 64 timesteps. Return = -102.2516447856865
Episode 6798 finished after 77 timesteps. Return = -68.0488523758929
Episode 6799 finished after 98 timesteps. Return = -274.80634893989446
Episode 6800 finished after 122 timesteps. Return = -175.454655723776
Episode 6801 f

Episode 6906 finished after 82 timesteps. Return = -159.92895822367313
Episode 6907 finished after 69 timesteps. Return = -81.20858798809249
Episode 6908 finished after 65 timesteps. Return = -62.90249818107372
Episode 6909 finished after 88 timesteps. Return = -106.37806798139398
Episode 6910 finished after 82 timesteps. Return = -124.12308557790124
Episode 6911 finished after 67 timesteps. Return = -98.88321917241784
Episode 6912 finished after 131 timesteps. Return = -112.80721664653171
Episode 6913 finished after 74 timesteps. Return = -105.3320569434713
Episode 6914 finished after 76 timesteps. Return = -114.67923993750185
Episode 6915 finished after 103 timesteps. Return = -134.444908846476
Episode 6916 finished after 110 timesteps. Return = -116.0025173052131
Episode 6917 finished after 93 timesteps. Return = -118.94791288214718
Episode 6918 finished after 120 timesteps. Return = -152.94216461269198
Episode 6919 finished after 93 timesteps. Return = -113.32087958706452
Episode 6

Episode 7025 finished after 109 timesteps. Return = -155.40590527080735
Episode 7026 finished after 103 timesteps. Return = -124.2836857737885
Episode 7027 finished after 60 timesteps. Return = -86.13210734491742
Episode 7028 finished after 104 timesteps. Return = -296.5977633243121
Episode 7029 finished after 98 timesteps. Return = -212.7180467898317
Episode 7030 finished after 65 timesteps. Return = -162.18179671433862
Episode 7031 finished after 72 timesteps. Return = -104.42223300093407
Episode 7032 finished after 93 timesteps. Return = -138.1465944545322
Episode 7033 finished after 78 timesteps. Return = -103.88865851738488
Episode 7034 finished after 65 timesteps. Return = -95.11241914751226
Episode 7035 finished after 106 timesteps. Return = -88.45791815580401
Episode 7036 finished after 77 timesteps. Return = -95.41663466481499
Episode 7037 finished after 75 timesteps. Return = -145.13438067457983
Episode 7038 finished after 112 timesteps. Return = -144.66427969505915
Episode 7

Episode 7142 finished after 110 timesteps. Return = -149.01286730873386
Episode 7143 finished after 98 timesteps. Return = -55.99151588536316
Episode 7144 finished after 94 timesteps. Return = -126.37346576314386
Episode 7145 finished after 70 timesteps. Return = -75.72988908360996
Episode 7146 finished after 75 timesteps. Return = -78.26876388087128
Episode 7147 finished after 93 timesteps. Return = -116.66265998588867
Episode 7148 finished after 73 timesteps. Return = -204.45118138178736
Episode 7149 finished after 85 timesteps. Return = -110.83386867638502
Episode 7150 finished after 101 timesteps. Return = -129.78453431938468
Episode 7151 finished after 75 timesteps. Return = -89.65352742863267
Episode 7152 finished after 89 timesteps. Return = -177.13792944819068
Episode 7153 finished after 88 timesteps. Return = -140.2548690062805
Episode 7154 finished after 68 timesteps. Return = -78.6749278474793
Episode 7155 finished after 90 timesteps. Return = -146.43657864929133
Episode 715

Episode 7261 finished after 75 timesteps. Return = -138.7183436459611
Episode 7262 finished after 93 timesteps. Return = -167.42649879974408
Episode 7263 finished after 124 timesteps. Return = 5.008747382136747
Episode 7264 finished after 60 timesteps. Return = -110.26135228624791
Episode 7265 finished after 64 timesteps. Return = -52.01928007773819
Episode 7266 finished after 104 timesteps. Return = -85.23715836765355
Episode 7267 finished after 136 timesteps. Return = -94.19222459497627
Episode 7268 finished after 88 timesteps. Return = -158.59297399277105
Episode 7269 finished after 110 timesteps. Return = -137.15014931587945
Episode 7270 finished after 70 timesteps. Return = -75.27971523234349
Episode 7271 finished after 92 timesteps. Return = -132.28128064003454
Episode 7272 finished after 77 timesteps. Return = -229.32295726785742
Episode 7273 finished after 78 timesteps. Return = -151.38362496025587
Episode 7274 finished after 81 timesteps. Return = -127.0429843732388
Episode 72

Episode 7382 finished after 119 timesteps. Return = -115.36385620540173
Episode 7383 finished after 108 timesteps. Return = -157.99940985126165
Episode 7384 finished after 83 timesteps. Return = -145.61959718438737
Episode 7385 finished after 73 timesteps. Return = -129.8717890138032
Episode 7386 finished after 69 timesteps. Return = -87.57773295654574
Episode 7387 finished after 73 timesteps. Return = -114.54574716453766
Episode 7388 finished after 75 timesteps. Return = -81.67393028135308
Episode 7389 finished after 60 timesteps. Return = -116.09026743951063
Episode 7390 finished after 101 timesteps. Return = -288.4048455350119
Episode 7391 finished after 99 timesteps. Return = -116.78121264890609
Episode 7392 finished after 82 timesteps. Return = -409.30381012437385
Episode 7393 finished after 105 timesteps. Return = -244.78401329455926
Episode 7394 finished after 133 timesteps. Return = -146.4503895275413
Episode 7395 finished after 122 timesteps. Return = -14.035913070037708
Episo

Episode 7498 finished after 109 timesteps. Return = -191.2696604057034
Episode 7499 finished after 120 timesteps. Return = -95.75916109129682
Episode 7500 finished after 117 timesteps. Return = -149.5184692735699
Episode 7501 finished after 91 timesteps. Return = -90.89638145407011
Episode 7502 finished after 58 timesteps. Return = -136.75188642957835
Episode 7503 finished after 106 timesteps. Return = -135.30448437156016
Episode 7504 finished after 78 timesteps. Return = -124.11381848463543
Episode 7505 finished after 64 timesteps. Return = -19.351751563097892
Episode 7506 finished after 60 timesteps. Return = -154.62239850022894
Episode 7507 finished after 87 timesteps. Return = -97.10644070307758
Episode 7508 finished after 125 timesteps. Return = -166.16070903740763
Episode 7509 finished after 108 timesteps. Return = -180.33241325814205
Episode 7510 finished after 95 timesteps. Return = -44.918688969453555
Episode 7511 finished after 93 timesteps. Return = -173.83613492522102
Episo

Episode 7617 finished after 86 timesteps. Return = -114.09622499801388
Episode 7618 finished after 81 timesteps. Return = -131.74575194471612
Episode 7619 finished after 65 timesteps. Return = -102.17663878761753
Episode 7620 finished after 97 timesteps. Return = -85.80711878712066
Episode 7621 finished after 108 timesteps. Return = -187.36098983713129
Episode 7622 finished after 111 timesteps. Return = -144.21206021922035
Episode 7623 finished after 106 timesteps. Return = -149.46404746827784
Episode 7624 finished after 85 timesteps. Return = -152.88170485955624
Episode 7625 finished after 109 timesteps. Return = -140.3960614353337
Episode 7626 finished after 101 timesteps. Return = -130.19450077419285
Episode 7627 finished after 125 timesteps. Return = -194.06626522855152
Episode 7628 finished after 104 timesteps. Return = -133.9003449416157
Episode 7629 finished after 96 timesteps. Return = -70.87463370234715
Episode 7630 finished after 94 timesteps. Return = -172.66988177201466
Epi

Episode 7735 finished after 78 timesteps. Return = -179.58924291593422
Episode 7736 finished after 80 timesteps. Return = -152.68547273219895
Episode 7737 finished after 115 timesteps. Return = -107.39802934785297
Episode 7738 finished after 99 timesteps. Return = -0.4436307954956362
Episode 7739 finished after 106 timesteps. Return = -148.81003458547073
Episode 7740 finished after 107 timesteps. Return = -132.44719417365962
Episode 7741 finished after 99 timesteps. Return = -160.64481990490526
Episode 7742 finished after 78 timesteps. Return = -113.16402073647758
Episode 7743 finished after 97 timesteps. Return = -172.12091504026898
Episode 7744 finished after 68 timesteps. Return = -93.3581971965572
Episode 7745 finished after 64 timesteps. Return = -108.3131717532769
Episode 7746 finished after 75 timesteps. Return = -267.61432998997674
Episode 7747 finished after 101 timesteps. Return = -98.04064095914413
Episode 7748 finished after 114 timesteps. Return = -233.14448525640037
Episo

Episode 7851 finished after 100 timesteps. Return = -106.7403516758198
Episode 7852 finished after 120 timesteps. Return = -162.17012597264772
Episode 7853 finished after 75 timesteps. Return = -142.14410287426813
Episode 7854 finished after 100 timesteps. Return = -156.9354406064262
Episode 7855 finished after 125 timesteps. Return = -149.2038266272303
Episode 7856 finished after 92 timesteps. Return = -144.53829108820293
Episode 7857 finished after 63 timesteps. Return = -117.65698606513703
Episode 7858 finished after 95 timesteps. Return = -97.84156312627506
Episode 7859 finished after 84 timesteps. Return = -99.60838264699129
Episode 7860 finished after 72 timesteps. Return = -117.24880458903925
Episode 7861 finished after 71 timesteps. Return = -85.93810432063248
Episode 7862 finished after 109 timesteps. Return = -378.4810926228581
Episode 7863 finished after 74 timesteps. Return = -108.08882195857916
Episode 7864 finished after 91 timesteps. Return = -251.36837034702305
Episode 

Episode 7971 finished after 91 timesteps. Return = -109.87988187719361
Episode 7972 finished after 1000 timesteps. Return = 65.1053869408569
Episode 7973 finished after 102 timesteps. Return = -170.833439202266
Episode 7974 finished after 85 timesteps. Return = -87.27979396136931
Episode 7975 finished after 120 timesteps. Return = -152.712334851247
Episode 7976 finished after 89 timesteps. Return = -98.5895141959462
Episode 7977 finished after 131 timesteps. Return = -133.35461652069472
Episode 7978 finished after 56 timesteps. Return = -144.97714539309874
Episode 7979 finished after 136 timesteps. Return = -143.60894852059346
Episode 7980 finished after 145 timesteps. Return = 3.5918311236920886
Episode 7981 finished after 63 timesteps. Return = -147.86175739812916
Episode 7982 finished after 86 timesteps. Return = -193.6374931986074
Episode 7983 finished after 94 timesteps. Return = -269.2230523943311
Episode 7984 finished after 94 timesteps. Return = -110.38209830499963
Episode 7985

Episode 8090 finished after 100 timesteps. Return = -117.06813266828284
Episode 8091 finished after 66 timesteps. Return = -147.91055712529214
Episode 8092 finished after 66 timesteps. Return = -127.15395606560185
Episode 8093 finished after 96 timesteps. Return = -99.48987689324125
Episode 8094 finished after 99 timesteps. Return = -114.71521667154373
Episode 8095 finished after 87 timesteps. Return = -177.39882264287633
Episode 8096 finished after 110 timesteps. Return = -132.03337331418288
Episode 8097 finished after 89 timesteps. Return = -108.3652146811356
Episode 8098 finished after 112 timesteps. Return = -20.917313733333117
Episode 8099 finished after 87 timesteps. Return = -127.22507461048387
Episode 8100 finished after 62 timesteps. Return = -88.9301622465097
Episode 8101 finished after 74 timesteps. Return = -91.89019300895605
Episode 8102 finished after 98 timesteps. Return = -222.90254250160353
Episode 8103 finished after 69 timesteps. Return = -95.63081474262133
Episode 8

Episode 8207 finished after 61 timesteps. Return = -83.91620127887442
Episode 8208 finished after 79 timesteps. Return = -137.73560073188935
Episode 8209 finished after 102 timesteps. Return = -142.5099198941572
Episode 8210 finished after 107 timesteps. Return = -38.1334582418132
Episode 8211 finished after 94 timesteps. Return = -64.64886337916283
Episode 8212 finished after 68 timesteps. Return = -182.6021824142437
Episode 8213 finished after 61 timesteps. Return = -83.47647534205495
Episode 8214 finished after 64 timesteps. Return = -65.98799797051933
Episode 8215 finished after 110 timesteps. Return = -134.0691897738885
Episode 8216 finished after 129 timesteps. Return = -124.86548241020799
Episode 8217 finished after 66 timesteps. Return = -125.88898344806887
Episode 8218 finished after 106 timesteps. Return = -110.67001762076445
Episode 8219 finished after 92 timesteps. Return = -496.6196707084294
Episode 8220 finished after 137 timesteps. Return = -148.3794815439257
Episode 822

Episode 8325 finished after 100 timesteps. Return = -201.11052973353802
Episode 8326 finished after 73 timesteps. Return = -120.56660527466606
Episode 8327 finished after 63 timesteps. Return = -138.21334935649008
Episode 8328 finished after 98 timesteps. Return = -187.27347235241996
Episode 8329 finished after 88 timesteps. Return = -208.10633226614698
Episode 8330 finished after 122 timesteps. Return = -127.93773546931764
Episode 8331 finished after 78 timesteps. Return = -141.40665364939338
Episode 8332 finished after 102 timesteps. Return = -192.45462112420034
Episode 8333 finished after 88 timesteps. Return = -110.8700980298494
Episode 8334 finished after 76 timesteps. Return = -97.86367989362516
Episode 8335 finished after 71 timesteps. Return = 40.14572538294661
Episode 8336 finished after 125 timesteps. Return = -283.16727713771184
Episode 8337 finished after 120 timesteps. Return = -165.95695049006605
Episode 8338 finished after 113 timesteps. Return = -17.02049345815604
Episo

Episode 8444 finished after 78 timesteps. Return = -122.47970416572778
Episode 8445 finished after 108 timesteps. Return = -97.013343112004
Episode 8446 finished after 69 timesteps. Return = -84.05993553106107
Episode 8447 finished after 66 timesteps. Return = -66.34297574438483
Episode 8448 finished after 78 timesteps. Return = -76.72638100015386
Episode 8449 finished after 119 timesteps. Return = -209.2832815498416
Episode 8450 finished after 99 timesteps. Return = -187.13136137673803
Episode 8451 finished after 83 timesteps. Return = -112.63872692328015
Episode 8452 finished after 91 timesteps. Return = -203.02653170514708
Episode 8453 finished after 94 timesteps. Return = -96.44911749280736
Episode 8454 finished after 93 timesteps. Return = -192.6706560406473
Episode 8455 finished after 88 timesteps. Return = 15.953783037639852
Episode 8456 finished after 106 timesteps. Return = -159.78818988577757
Episode 8457 finished after 106 timesteps. Return = -165.3998863186947
Episode 8458 

Episode 8562 finished after 72 timesteps. Return = -144.55849288143168
Episode 8563 finished after 115 timesteps. Return = 2.413468635616482
Episode 8564 finished after 78 timesteps. Return = -129.97493611468383
Episode 8565 finished after 62 timesteps. Return = -134.13987583406316
Episode 8566 finished after 83 timesteps. Return = -227.20309152799967
Episode 8567 finished after 56 timesteps. Return = -77.58598698206487
Episode 8568 finished after 71 timesteps. Return = -124.42639278714266
Episode 8569 finished after 71 timesteps. Return = -123.33351838202525
Episode 8570 finished after 77 timesteps. Return = -108.68324028403552
Episode 8571 finished after 88 timesteps. Return = -154.3570526425217
Episode 8572 finished after 66 timesteps. Return = -145.91260020348207
Episode 8573 finished after 92 timesteps. Return = -159.3733277654253
Episode 8574 finished after 86 timesteps. Return = -171.61167906369994
Episode 8575 finished after 72 timesteps. Return = -174.72132729128685
Episode 85

Episode 8680 finished after 86 timesteps. Return = -180.23265595827985
Episode 8681 finished after 87 timesteps. Return = -93.6135963689882
Episode 8682 finished after 93 timesteps. Return = -112.29278993065931
Episode 8683 finished after 113 timesteps. Return = -214.5778186266275
Episode 8684 finished after 120 timesteps. Return = -190.96080972096416
Episode 8685 finished after 111 timesteps. Return = -140.96039528956305
Episode 8686 finished after 99 timesteps. Return = -181.0821091003467
Episode 8687 finished after 66 timesteps. Return = -50.24437038302635
Episode 8688 finished after 127 timesteps. Return = -407.5398752327621
Episode 8689 finished after 65 timesteps. Return = -196.99485734912196
Episode 8690 finished after 69 timesteps. Return = -153.93730334003567
Episode 8691 finished after 80 timesteps. Return = -212.18338851682364
Episode 8692 finished after 75 timesteps. Return = -65.18961540071125
Episode 8693 finished after 120 timesteps. Return = -117.09167891678186
Episode 

Episode 8798 finished after 138 timesteps. Return = -11.554024083790338
Episode 8799 finished after 94 timesteps. Return = -130.87680650711968
Episode 8800 finished after 126 timesteps. Return = -145.59977249802392
Episode 8801 finished after 104 timesteps. Return = -125.37193152677085
Episode 8802 finished after 78 timesteps. Return = -222.46600242520273
Episode 8803 finished after 133 timesteps. Return = -14.62761311831494
Episode 8804 finished after 101 timesteps. Return = -244.05990622941746
Episode 8805 finished after 117 timesteps. Return = -72.47975820224792
Episode 8806 finished after 61 timesteps. Return = -118.43422759500206
Episode 8807 finished after 119 timesteps. Return = -357.4351805883569
Episode 8808 finished after 128 timesteps. Return = -267.6965167440395
Episode 8809 finished after 75 timesteps. Return = -172.13264648687561
Episode 8810 finished after 99 timesteps. Return = -348.6857210931289
Episode 8811 finished after 104 timesteps. Return = -154.8717811453371
Epi

Episode 8918 finished after 127 timesteps. Return = -177.38463358815378
Episode 8919 finished after 101 timesteps. Return = -71.01486369296951
Episode 8920 finished after 90 timesteps. Return = -200.2597054007323
Episode 8921 finished after 66 timesteps. Return = -38.842124662170235
Episode 8922 finished after 72 timesteps. Return = -112.69672493117022
Episode 8923 finished after 122 timesteps. Return = -111.66227146282031
Episode 8924 finished after 86 timesteps. Return = -113.08588844138755
Episode 8925 finished after 68 timesteps. Return = -71.77803848757505
Episode 8926 finished after 68 timesteps. Return = -154.24267066154826
Episode 8927 finished after 71 timesteps. Return = -76.63096964316397
Episode 8928 finished after 102 timesteps. Return = -153.88297733338771
Episode 8929 finished after 67 timesteps. Return = -164.6349998918601
Episode 8930 finished after 112 timesteps. Return = -22.524156439329815
Episode 8931 finished after 74 timesteps. Return = -238.86204678893387
Episod

Episode 9037 finished after 84 timesteps. Return = -131.47793570149264
Episode 9038 finished after 69 timesteps. Return = -113.19241160553663
Episode 9039 finished after 91 timesteps. Return = -232.43340260463546
Episode 9040 finished after 67 timesteps. Return = -132.37091025148055
Episode 9041 finished after 102 timesteps. Return = -265.7243789848559
Episode 9042 finished after 70 timesteps. Return = -167.94981593502334
Episode 9043 finished after 96 timesteps. Return = -202.22805679234125
Episode 9044 finished after 100 timesteps. Return = -132.57007660239495
Episode 9045 finished after 66 timesteps. Return = -78.80621815767364
Episode 9046 finished after 64 timesteps. Return = -33.589021036052344
Episode 9047 finished after 81 timesteps. Return = -119.42390634391406
Episode 9048 finished after 79 timesteps. Return = -153.785275592189
Episode 9049 finished after 86 timesteps. Return = -168.1133893247631
Episode 9050 finished after 58 timesteps. Return = -127.83154306293724
Episode 9

Episode 9157 finished after 95 timesteps. Return = -167.13632664663956
Episode 9158 finished after 85 timesteps. Return = -232.07767163033037
Episode 9159 finished after 89 timesteps. Return = -106.29417046585252
Episode 9160 finished after 80 timesteps. Return = -222.30742476159813
Episode 9161 finished after 120 timesteps. Return = -321.83760005171735
Episode 9162 finished after 85 timesteps. Return = -206.48189439291065
Episode 9163 finished after 139 timesteps. Return = -313.2876680839526
Episode 9164 finished after 111 timesteps. Return = -214.95196958185966
Episode 9165 finished after 97 timesteps. Return = -141.84489261032596
Episode 9166 finished after 70 timesteps. Return = -105.79859601704078
Episode 9167 finished after 99 timesteps. Return = -151.56471269420373
Episode 9168 finished after 100 timesteps. Return = -169.66431988381555
Episode 9169 finished after 110 timesteps. Return = -274.10468595894974
Episode 9170 finished after 94 timesteps. Return = -213.68926944561616
Ep

Episode 9274 finished after 109 timesteps. Return = -149.09032014551545
Episode 9275 finished after 66 timesteps. Return = -186.64488442203876
Episode 9276 finished after 60 timesteps. Return = -129.17369599540163
Episode 9277 finished after 71 timesteps. Return = -185.4297997447108
Episode 9278 finished after 68 timesteps. Return = -85.41547721970045
Episode 9279 finished after 93 timesteps. Return = -136.7982167285329
Episode 9280 finished after 91 timesteps. Return = -228.0090271347295
Episode 9281 finished after 69 timesteps. Return = -168.65513592752205
Episode 9282 finished after 64 timesteps. Return = -113.05101552471046
Episode 9283 finished after 95 timesteps. Return = -220.181725896889
Episode 9284 finished after 72 timesteps. Return = -118.92306850423651
Episode 9285 finished after 115 timesteps. Return = -93.73962829901777
Episode 9286 finished after 92 timesteps. Return = -224.91245146666785
Episode 9287 finished after 129 timesteps. Return = -364.2059246887064
Episode 928

Episode 9391 finished after 119 timesteps. Return = -97.30966541499282
Episode 9392 finished after 73 timesteps. Return = -66.31180633230399
Episode 9393 finished after 69 timesteps. Return = -153.8346242683705
Episode 9394 finished after 114 timesteps. Return = -165.44374445403452
Episode 9395 finished after 90 timesteps. Return = -197.5835016060396
Episode 9396 finished after 130 timesteps. Return = -167.19840476834332
Episode 9397 finished after 99 timesteps. Return = -109.24061343245279
Episode 9398 finished after 88 timesteps. Return = -146.44855073358823
Episode 9399 finished after 90 timesteps. Return = -117.32313696507882
Episode 9400 finished after 86 timesteps. Return = -152.50174549974167
Episode 9401 finished after 102 timesteps. Return = -263.25335451058675
Episode 9402 finished after 62 timesteps. Return = -63.46666797716923
Episode 9403 finished after 106 timesteps. Return = -327.05238995046744
Episode 9404 finished after 82 timesteps. Return = -102.14232962069232
Episod

Episode 9512 finished after 85 timesteps. Return = -148.91393923736317
Episode 9513 finished after 119 timesteps. Return = -117.79206788452743
Episode 9514 finished after 77 timesteps. Return = -121.31818549189957
Episode 9515 finished after 112 timesteps. Return = -84.34912499972914
Episode 9516 finished after 67 timesteps. Return = -109.18083826928103
Episode 9517 finished after 90 timesteps. Return = -157.63521420982727
Episode 9518 finished after 110 timesteps. Return = -128.32904309928824
Episode 9519 finished after 95 timesteps. Return = -142.1989286837835
Episode 9520 finished after 85 timesteps. Return = -196.7864320015575
Episode 9521 finished after 61 timesteps. Return = -64.36469277866482
Episode 9522 finished after 94 timesteps. Return = -334.60194681101717
Episode 9523 finished after 131 timesteps. Return = -71.05047024862087
Episode 9524 finished after 111 timesteps. Return = -218.04437849838385
Episode 9525 finished after 95 timesteps. Return = -161.71646121448504
Episod

Episode 9632 finished after 88 timesteps. Return = -213.04821778963498
Episode 9633 finished after 115 timesteps. Return = -190.83430985559772
Episode 9634 finished after 90 timesteps. Return = -195.44112398896277
Episode 9635 finished after 113 timesteps. Return = -175.10209836438727
Episode 9636 finished after 113 timesteps. Return = -185.8049937394686
Episode 9637 finished after 88 timesteps. Return = -332.2500598236587
Episode 9638 finished after 102 timesteps. Return = -9.782703445182449
Episode 9639 finished after 107 timesteps. Return = -201.1960694070782
Episode 9640 finished after 66 timesteps. Return = -153.25866184650513
Episode 9641 finished after 92 timesteps. Return = 17.324825187848504
Episode 9642 finished after 61 timesteps. Return = -117.8210395281594
Episode 9643 finished after 119 timesteps. Return = -176.95696081107644
Episode 9644 finished after 67 timesteps. Return = -126.0529675430253
Episode 9645 finished after 112 timesteps. Return = -175.98536018125623
Episod

Episode 9748 finished after 72 timesteps. Return = -139.8644801913055
Episode 9749 finished after 95 timesteps. Return = -195.25036763946613
Episode 9750 finished after 101 timesteps. Return = -144.77633763799088
Episode 9751 finished after 72 timesteps. Return = -220.34824333915617
Episode 9752 finished after 83 timesteps. Return = -148.1142552801556
Episode 9753 finished after 83 timesteps. Return = -168.55534636447112
Episode 9754 finished after 83 timesteps. Return = -172.38538094377276
Episode 9755 finished after 80 timesteps. Return = -143.0414874244433
Episode 9756 finished after 73 timesteps. Return = 51.41815271525718
Episode 9757 finished after 88 timesteps. Return = -135.47008820287772
Episode 9758 finished after 64 timesteps. Return = -67.58278321516563
Episode 9759 finished after 66 timesteps. Return = -68.1670380290781
Episode 9760 finished after 118 timesteps. Return = -141.7995507845572
Episode 9761 finished after 136 timesteps. Return = -164.0363519847886
Episode 9762 

Episode 9867 finished after 101 timesteps. Return = -120.64459554878415
Episode 9868 finished after 100 timesteps. Return = -211.7426138934752
Episode 9869 finished after 105 timesteps. Return = -174.16256030917418
Episode 9870 finished after 64 timesteps. Return = -88.27603378595825
Episode 9871 finished after 99 timesteps. Return = -137.08181097997354
Episode 9872 finished after 116 timesteps. Return = -206.60519817088021
Episode 9873 finished after 61 timesteps. Return = -165.62238713779033
Episode 9874 finished after 98 timesteps. Return = -1.1048115494802317
Episode 9875 finished after 82 timesteps. Return = -83.80154521543025
Episode 9876 finished after 103 timesteps. Return = -178.33891960029084
Episode 9877 finished after 102 timesteps. Return = -251.13714950734283
Episode 9878 finished after 97 timesteps. Return = -146.67841806585292
Episode 9879 finished after 102 timesteps. Return = 24.988136239027753
Episode 9880 finished after 61 timesteps. Return = -139.10826242889556
Epi

Episode 9985 finished after 112 timesteps. Return = -179.21974832060818
Episode 9986 finished after 79 timesteps. Return = -126.70474500246699
Episode 9987 finished after 57 timesteps. Return = -99.62971734869575
Episode 9988 finished after 96 timesteps. Return = -226.26106830391063
Episode 9989 finished after 56 timesteps. Return = -168.42772877638345
Episode 9990 finished after 95 timesteps. Return = -123.73101087642385
Episode 9991 finished after 98 timesteps. Return = -110.33135886656822
Episode 9992 finished after 92 timesteps. Return = -94.62830051067462
Episode 9993 finished after 82 timesteps. Return = -191.30177289121292
Episode 9994 finished after 85 timesteps. Return = -210.05290352174396
Episode 9995 finished after 97 timesteps. Return = -198.93152036184685
Episode 9996 finished after 58 timesteps. Return = -98.59392733063406
Episode 9997 finished after 80 timesteps. Return = -267.24238372825255
Episode 9998 finished after 89 timesteps. Return = -96.35600219396338
Episode 9

Episode 10101 finished after 75 timesteps. Return = -173.7944165996088
Episode 10102 finished after 90 timesteps. Return = -203.42675814774287
Episode 10103 finished after 72 timesteps. Return = -89.22996049864783
Episode 10104 finished after 60 timesteps. Return = -103.73487006412466
Episode 10105 finished after 93 timesteps. Return = -219.74429642349878
Episode 10106 finished after 111 timesteps. Return = -154.96845765157508
Episode 10107 finished after 64 timesteps. Return = -80.33917614916362
Episode 10108 finished after 105 timesteps. Return = -123.7673651509929
Episode 10109 finished after 88 timesteps. Return = -78.15103815330437
Episode 10110 finished after 113 timesteps. Return = -122.33521761143683
Episode 10111 finished after 94 timesteps. Return = -210.87398474411964
Episode 10112 finished after 71 timesteps. Return = -89.13474671519285
Episode 10113 finished after 75 timesteps. Return = -104.86884494597045
Episode 10114 finished after 1000 timesteps. Return = 78.6164690788

Episode 10217 finished after 137 timesteps. Return = -74.91775127274214
Episode 10218 finished after 135 timesteps. Return = -137.94322692246126
Episode 10219 finished after 76 timesteps. Return = -141.16707136368666
Episode 10220 finished after 65 timesteps. Return = -157.23910852646736
Episode 10221 finished after 97 timesteps. Return = -356.8912490134093
Episode 10222 finished after 83 timesteps. Return = -139.75626000836846
Episode 10223 finished after 134 timesteps. Return = -99.13843052066126
Episode 10224 finished after 60 timesteps. Return = -74.95289153651778
Episode 10225 finished after 70 timesteps. Return = -54.6380432490927
Episode 10226 finished after 107 timesteps. Return = -168.7275962250818
Episode 10227 finished after 93 timesteps. Return = -206.66794072084417
Episode 10228 finished after 69 timesteps. Return = -157.92329114853968
Episode 10229 finished after 87 timesteps. Return = -117.39228517453722
Episode 10230 finished after 75 timesteps. Return = -126.6536484053

Episode 10335 finished after 91 timesteps. Return = -133.6984518102682
Episode 10336 finished after 76 timesteps. Return = -187.04656483096863
Episode 10337 finished after 99 timesteps. Return = -139.09215463897488
Episode 10338 finished after 91 timesteps. Return = -128.03538596566113
Episode 10339 finished after 109 timesteps. Return = -152.2759852078618
Episode 10340 finished after 136 timesteps. Return = -139.85803706120913
Episode 10341 finished after 84 timesteps. Return = -114.06539806732721
Episode 10342 finished after 91 timesteps. Return = -94.03924620316417
Episode 10343 finished after 89 timesteps. Return = -364.1016364928397
Episode 10344 finished after 93 timesteps. Return = -108.55390371197291
Episode 10345 finished after 120 timesteps. Return = -15.51748860342623
Episode 10346 finished after 116 timesteps. Return = -91.25417197571825
Episode 10347 finished after 89 timesteps. Return = -72.58248289123325
Episode 10348 finished after 117 timesteps. Return = -106.091698329

Episode 10451 finished after 95 timesteps. Return = -72.89037947384372
Episode 10452 finished after 96 timesteps. Return = -106.20382840366148
Episode 10453 finished after 83 timesteps. Return = -130.5022126783968
Episode 10454 finished after 64 timesteps. Return = -52.733305429099026
Episode 10455 finished after 75 timesteps. Return = -160.56200226776105
Episode 10456 finished after 63 timesteps. Return = -76.59292012289556
Episode 10457 finished after 89 timesteps. Return = -52.90350410190884
Episode 10458 finished after 77 timesteps. Return = -25.982442789115936
Episode 10459 finished after 160 timesteps. Return = -321.75097123986075
Episode 10460 finished after 75 timesteps. Return = -111.9980420850781
Episode 10461 finished after 128 timesteps. Return = -106.18793214063218
Episode 10462 finished after 59 timesteps. Return = -106.24404784394585
Episode 10463 finished after 109 timesteps. Return = -210.73670518995795
Episode 10464 finished after 124 timesteps. Return = -109.46511615

Episode 10566 finished after 103 timesteps. Return = -191.75175638621238
Episode 10567 finished after 70 timesteps. Return = -100.84956472172809
Episode 10568 finished after 63 timesteps. Return = -76.93051115097873
Episode 10569 finished after 127 timesteps. Return = -412.4230369363221
Episode 10570 finished after 118 timesteps. Return = -199.7230064144939
Episode 10571 finished after 105 timesteps. Return = -36.65447083306162
Episode 10572 finished after 114 timesteps. Return = -163.99721818087164
Episode 10573 finished after 89 timesteps. Return = -121.75360548165662
Episode 10574 finished after 83 timesteps. Return = -119.55390453272355
Episode 10575 finished after 108 timesteps. Return = -103.09291311162826
Episode 10576 finished after 103 timesteps. Return = -396.5320452606406
Episode 10577 finished after 80 timesteps. Return = -136.43940093650997
Episode 10578 finished after 98 timesteps. Return = -1.4646517715682421
Episode 10579 finished after 69 timesteps. Return = -133.00972

Episode 10683 finished after 132 timesteps. Return = -23.70748788064705
Episode 10684 finished after 60 timesteps. Return = -120.45439676052317
Episode 10685 finished after 103 timesteps. Return = -339.1791863731536
Episode 10686 finished after 99 timesteps. Return = -146.00867780506528
Episode 10687 finished after 115 timesteps. Return = -192.4092906761295
Episode 10688 finished after 77 timesteps. Return = -200.56674532008537
Episode 10689 finished after 87 timesteps. Return = -95.47300077223905
Episode 10690 finished after 66 timesteps. Return = -86.13390540497856
Episode 10691 finished after 117 timesteps. Return = -127.50758856639541
Episode 10692 finished after 94 timesteps. Return = -315.46799262046886
Episode 10693 finished after 107 timesteps. Return = -266.5249537707314
Episode 10694 finished after 105 timesteps. Return = -183.08344391036422
Episode 10695 finished after 109 timesteps. Return = -197.4073990747738
Episode 10696 finished after 75 timesteps. Return = -91.07889718

Episode 10798 finished after 104 timesteps. Return = -110.21655710635994
Episode 10799 finished after 119 timesteps. Return = -167.7732499526113
Episode 10800 finished after 68 timesteps. Return = -69.88311804435676
Episode 10801 finished after 107 timesteps. Return = -197.56635214316552
Episode 10802 finished after 85 timesteps. Return = -136.06229439522895
Episode 10803 finished after 115 timesteps. Return = -187.0378888913733
Episode 10804 finished after 112 timesteps. Return = -242.95827222829698
Episode 10805 finished after 53 timesteps. Return = -186.74485000722012
Episode 10806 finished after 64 timesteps. Return = -69.76330607663326
Episode 10807 finished after 89 timesteps. Return = -236.36967882498038
Episode 10808 finished after 99 timesteps. Return = -186.35475421671157
Episode 10809 finished after 90 timesteps. Return = -203.6842450374614
Episode 10810 finished after 86 timesteps. Return = -142.63978637472258
Episode 10811 finished after 122 timesteps. Return = -154.911461

Episode 10915 finished after 1000 timesteps. Return = 124.67457924572061
Episode 10916 finished after 81 timesteps. Return = -121.74924479790491
Episode 10917 finished after 76 timesteps. Return = -96.08012616740936
Episode 10918 finished after 131 timesteps. Return = -155.87432681211232
Episode 10919 finished after 128 timesteps. Return = -271.2786552837614
Episode 10920 finished after 80 timesteps. Return = -226.65803042604037
Episode 10921 finished after 75 timesteps. Return = -197.3924843795141
Episode 10922 finished after 107 timesteps. Return = -189.62217631895044
Episode 10923 finished after 63 timesteps. Return = -97.77362467011572
Episode 10924 finished after 65 timesteps. Return = -161.658101477362
Episode 10925 finished after 73 timesteps. Return = -69.8976707038286
Episode 10926 finished after 63 timesteps. Return = -134.42069464573405
Episode 10927 finished after 66 timesteps. Return = -203.24416584616063
Episode 10928 finished after 102 timesteps. Return = -201.2950742447

Episode 11032 finished after 92 timesteps. Return = -222.9443495990596
Episode 11033 finished after 88 timesteps. Return = -147.27464136452204
Episode 11034 finished after 68 timesteps. Return = -92.10380801854788
Episode 11035 finished after 87 timesteps. Return = -131.43898608016755
Episode 11036 finished after 81 timesteps. Return = -262.415888940673
Episode 11037 finished after 126 timesteps. Return = -215.8959805176819
Episode 11038 finished after 65 timesteps. Return = -71.16293843558984
Episode 11039 finished after 77 timesteps. Return = -94.2243999990598
Episode 11040 finished after 1000 timesteps. Return = 74.06886792373405
Episode 11041 finished after 60 timesteps. Return = -62.70893748465851
Episode 11042 finished after 97 timesteps. Return = -29.40538691814544
Episode 11043 finished after 127 timesteps. Return = -266.6469386929552
Episode 11044 finished after 90 timesteps. Return = -138.11058008374215
Episode 11045 finished after 92 timesteps. Return = -66.23794527431103
Ep

Episode 11148 finished after 69 timesteps. Return = -162.20032150334993
Episode 11149 finished after 84 timesteps. Return = -132.05162062054475
Episode 11150 finished after 114 timesteps. Return = -175.85166418984787
Episode 11151 finished after 93 timesteps. Return = -147.32576593757244
Episode 11152 finished after 83 timesteps. Return = -146.72836728720716
Episode 11153 finished after 68 timesteps. Return = -180.090158767844
Episode 11154 finished after 83 timesteps. Return = -187.07469590018758
Episode 11155 finished after 83 timesteps. Return = -156.24016581080164
Episode 11156 finished after 101 timesteps. Return = -192.43685344196302
Episode 11157 finished after 136 timesteps. Return = -178.6069620185392
Episode 11158 finished after 89 timesteps. Return = -256.258114176469
Episode 11159 finished after 76 timesteps. Return = -74.56725664533175
Episode 11160 finished after 101 timesteps. Return = -191.32485009512965
Episode 11161 finished after 88 timesteps. Return = -157.054631731

Episode 11266 finished after 66 timesteps. Return = -162.03818685843265
Episode 11267 finished after 88 timesteps. Return = -186.57083260168218
Episode 11268 finished after 117 timesteps. Return = -165.9959471971153
Episode 11269 finished after 94 timesteps. Return = -128.19856820855392
Episode 11270 finished after 71 timesteps. Return = -222.777974731838
Episode 11271 finished after 80 timesteps. Return = -145.132450190642
Episode 11272 finished after 90 timesteps. Return = -88.486373287167
Episode 11273 finished after 83 timesteps. Return = -88.36915886903486
Episode 11274 finished after 74 timesteps. Return = -97.99894720993466
Episode 11275 finished after 110 timesteps. Return = -235.28675543659588
Episode 11276 finished after 70 timesteps. Return = -47.33183943935366
Episode 11277 finished after 72 timesteps. Return = -25.096399789434088
Episode 11278 finished after 90 timesteps. Return = -146.12636302659433
Episode 11279 finished after 94 timesteps. Return = -143.94621340046342
E

Episode 11385 finished after 77 timesteps. Return = -143.52030799188353
Episode 11386 finished after 59 timesteps. Return = -133.78815896451368
Episode 11387 finished after 128 timesteps. Return = -117.55999143077916
Episode 11388 finished after 123 timesteps. Return = -215.0189896278205
Episode 11389 finished after 94 timesteps. Return = -193.71099433691185
Episode 11390 finished after 127 timesteps. Return = -11.603434450599025
Episode 11391 finished after 103 timesteps. Return = 17.384726699147734
Episode 11392 finished after 73 timesteps. Return = -199.95660655758513
Episode 11393 finished after 94 timesteps. Return = -117.58784384286626
Episode 11394 finished after 79 timesteps. Return = -67.78937713011314
Episode 11395 finished after 87 timesteps. Return = -101.61990684463369
Episode 11396 finished after 62 timesteps. Return = -163.25623328382449
Episode 11397 finished after 87 timesteps. Return = -182.11774161320645
Episode 11398 finished after 110 timesteps. Return = -203.71649

Episode 11501 finished after 86 timesteps. Return = -93.65014413501729
Episode 11502 finished after 76 timesteps. Return = -115.63296308501766
Episode 11503 finished after 122 timesteps. Return = -47.45167658337044
Episode 11504 finished after 89 timesteps. Return = -181.39920916371742
Episode 11505 finished after 87 timesteps. Return = -226.14719397385366
Episode 11506 finished after 108 timesteps. Return = -244.39874307994955
Episode 11507 finished after 83 timesteps. Return = -227.03220815656402
Episode 11508 finished after 118 timesteps. Return = -144.68920938236135
Episode 11509 finished after 83 timesteps. Return = -86.63957053229176
Episode 11510 finished after 129 timesteps. Return = -238.0518183626929
Episode 11511 finished after 116 timesteps. Return = -320.12634434933227
Episode 11512 finished after 111 timesteps. Return = -204.50287640997595
Episode 11513 finished after 91 timesteps. Return = -112.83716754314878
Episode 11514 finished after 100 timesteps. Return = -193.6882

Episode 11618 finished after 71 timesteps. Return = -14.125832366301168
Episode 11619 finished after 140 timesteps. Return = -173.83384844298905
Episode 11620 finished after 80 timesteps. Return = -178.0720347315156
Episode 11621 finished after 78 timesteps. Return = -180.6718473739441
Episode 11622 finished after 70 timesteps. Return = -189.60337562815158
Episode 11623 finished after 96 timesteps. Return = -188.49909473192773
Episode 11624 finished after 65 timesteps. Return = -95.52840013442542
Episode 11625 finished after 95 timesteps. Return = -218.3681111939465
Episode 11626 finished after 120 timesteps. Return = -91.03049890507809
Episode 11627 finished after 69 timesteps. Return = -83.64138186735826
Episode 11628 finished after 79 timesteps. Return = -147.60474128709023
Episode 11629 finished after 96 timesteps. Return = -143.3442927552682
Episode 11630 finished after 105 timesteps. Return = -135.73295484502245
Episode 11631 finished after 134 timesteps. Return = -167.3050899683

Episode 11735 finished after 105 timesteps. Return = -200.37850732086665
Episode 11736 finished after 75 timesteps. Return = -147.67928388124514
Episode 11737 finished after 99 timesteps. Return = -183.4363525142583
Episode 11738 finished after 74 timesteps. Return = -80.79012876077431
Episode 11739 finished after 94 timesteps. Return = -183.90158144518165
Episode 11740 finished after 113 timesteps. Return = -379.6045571635829
Episode 11741 finished after 78 timesteps. Return = -120.91229133304543
Episode 11742 finished after 105 timesteps. Return = 16.58689897830952
Episode 11743 finished after 67 timesteps. Return = -80.87965529203295
Episode 11744 finished after 104 timesteps. Return = -286.474220982548
Episode 11745 finished after 67 timesteps. Return = -114.26380182030853
Episode 11746 finished after 75 timesteps. Return = -220.6171763772664
Episode 11747 finished after 62 timesteps. Return = -187.19818766979637
Episode 11748 finished after 75 timesteps. Return = -182.101846419589

Episode 11851 finished after 110 timesteps. Return = -253.2005293455789
Episode 11852 finished after 83 timesteps. Return = -135.11916807872365
Episode 11853 finished after 96 timesteps. Return = -394.60000526784506
Episode 11854 finished after 111 timesteps. Return = -200.357509203255
Episode 11855 finished after 67 timesteps. Return = -120.32242797192842
Episode 11856 finished after 102 timesteps. Return = -185.16881742681386
Episode 11857 finished after 90 timesteps. Return = -131.38503042815475
Episode 11858 finished after 75 timesteps. Return = -122.61504312522682
Episode 11859 finished after 78 timesteps. Return = -204.7818047979419
Episode 11860 finished after 108 timesteps. Return = -178.32241819738886
Episode 11861 finished after 85 timesteps. Return = -104.9574967850101
Episode 11862 finished after 120 timesteps. Return = -158.30444598972161
Episode 11863 finished after 83 timesteps. Return = -331.3138350368902
Episode 11864 finished after 97 timesteps. Return = -189.78247609

Episode 11968 finished after 108 timesteps. Return = -116.77076311300465
Episode 11969 finished after 115 timesteps. Return = -136.00822487930589
Episode 11970 finished after 75 timesteps. Return = -141.27188935816278
Episode 11971 finished after 91 timesteps. Return = -255.85084705477212
Episode 11972 finished after 92 timesteps. Return = -290.0253572606965
Episode 11973 finished after 67 timesteps. Return = 6.875567514215092
Episode 11974 finished after 97 timesteps. Return = -230.0202680009086
Episode 11975 finished after 73 timesteps. Return = -207.24699272135655
Episode 11976 finished after 131 timesteps. Return = -260.4460775321324
Episode 11977 finished after 157 timesteps. Return = -326.32608699037246
Episode 11978 finished after 81 timesteps. Return = -258.7839311543012
Episode 11979 finished after 110 timesteps. Return = -145.86821183402427
Episode 11980 finished after 70 timesteps. Return = -83.66919020933287
Episode 11981 finished after 67 timesteps. Return = -106.399893151

Episode 12086 finished after 74 timesteps. Return = -89.18176805774344
Episode 12087 finished after 140 timesteps. Return = -6.614297123042121
Episode 12088 finished after 78 timesteps. Return = -93.46791878729447
Episode 12089 finished after 118 timesteps. Return = -294.1483455735047
Episode 12090 finished after 73 timesteps. Return = -110.0352675027127
Episode 12091 finished after 127 timesteps. Return = -313.22234455081707
Episode 12092 finished after 90 timesteps. Return = -127.04232708887719
Episode 12093 finished after 59 timesteps. Return = -158.089699058024
Episode 12094 finished after 102 timesteps. Return = -259.5906047180646
Episode 12095 finished after 131 timesteps. Return = -162.47677377067416
Episode 12096 finished after 68 timesteps. Return = -65.2545997801194
Episode 12097 finished after 83 timesteps. Return = -125.1713082198543
Episode 12098 finished after 99 timesteps. Return = -113.71494560894492
Episode 12099 finished after 86 timesteps. Return = -135.3324580630649

Episode 12200 finished after 94 timesteps. Return = -175.79338953911065
Episode 12201 finished after 118 timesteps. Return = -200.79095425412197
Episode 12202 finished after 102 timesteps. Return = -254.9875748144368
Episode 12203 finished after 73 timesteps. Return = -120.63191282475827
Episode 12204 finished after 96 timesteps. Return = -273.5703966141019
Episode 12205 finished after 85 timesteps. Return = -64.26898104095338
Episode 12206 finished after 66 timesteps. Return = -105.76286202945842
Episode 12207 finished after 83 timesteps. Return = -257.34265272243545
Episode 12208 finished after 118 timesteps. Return = -127.48755667432545
Episode 12209 finished after 107 timesteps. Return = -0.34300446528635575
Episode 12210 finished after 54 timesteps. Return = -199.11040912254097
Episode 12211 finished after 111 timesteps. Return = -296.5130923034838
Episode 12212 finished after 102 timesteps. Return = -258.9763173670764
Episode 12213 finished after 73 timesteps. Return = -116.81418

Episode 12316 finished after 89 timesteps. Return = -158.91043603225233
Episode 12317 finished after 116 timesteps. Return = -155.03653360090345
Episode 12318 finished after 65 timesteps. Return = -108.62015981719014
Episode 12319 finished after 93 timesteps. Return = -169.12257179443958
Episode 12320 finished after 74 timesteps. Return = -218.95514111582924
Episode 12321 finished after 116 timesteps. Return = -189.13105780281103
Episode 12322 finished after 104 timesteps. Return = -123.04174429255904
Episode 12323 finished after 107 timesteps. Return = -325.14111285801107
Episode 12324 finished after 124 timesteps. Return = -126.76161895525883
Episode 12325 finished after 100 timesteps. Return = -237.52303418660605
Episode 12326 finished after 117 timesteps. Return = -292.97777633466046
Episode 12327 finished after 85 timesteps. Return = -137.29355361330045
Episode 12328 finished after 72 timesteps. Return = -91.70801689424547
Episode 12329 finished after 107 timesteps. Return = -2.48

Episode 12430 finished after 114 timesteps. Return = -162.7091504317678
Episode 12431 finished after 97 timesteps. Return = -481.8449620221818
Episode 12432 finished after 100 timesteps. Return = -295.6581917231098
Episode 12433 finished after 121 timesteps. Return = -101.4930311430473
Episode 12434 finished after 66 timesteps. Return = -113.962493484131
Episode 12435 finished after 79 timesteps. Return = -67.07326051443438
Episode 12436 finished after 92 timesteps. Return = -88.61242212291697
Episode 12437 finished after 104 timesteps. Return = -137.24801960094047
Episode 12438 finished after 66 timesteps. Return = -53.98822947028093
Episode 12439 finished after 127 timesteps. Return = -211.1742241082117
Episode 12440 finished after 69 timesteps. Return = -88.20593397192941
Episode 12441 finished after 99 timesteps. Return = -181.82581904784405
Episode 12442 finished after 91 timesteps. Return = -160.05003423450793
Episode 12443 finished after 121 timesteps. Return = -210.546339410306

Episode 12545 finished after 120 timesteps. Return = -264.2763903194874
Episode 12546 finished after 92 timesteps. Return = -322.552915438701
Episode 12547 finished after 71 timesteps. Return = -103.45443348689615
Episode 12548 finished after 105 timesteps. Return = -196.69538839745428
Episode 12549 finished after 67 timesteps. Return = -140.32861077224842
Episode 12550 finished after 67 timesteps. Return = -185.5560549473605
Episode 12551 finished after 69 timesteps. Return = -185.17849088345872
Episode 12552 finished after 72 timesteps. Return = -158.2953836794635
Episode 12553 finished after 104 timesteps. Return = -92.88451679834682
Episode 12554 finished after 121 timesteps. Return = -321.81182393151914
Episode 12555 finished after 99 timesteps. Return = -137.7405498146118
Episode 12556 finished after 83 timesteps. Return = -341.1235745458612
Episode 12557 finished after 68 timesteps. Return = -72.45742889988654
Episode 12558 finished after 95 timesteps. Return = -276.432628734522

Episode 12662 finished after 149 timesteps. Return = -170.1264081119619
Episode 12663 finished after 121 timesteps. Return = -173.3373375668097
Episode 12664 finished after 77 timesteps. Return = -75.32965470462118
Episode 12665 finished after 138 timesteps. Return = -309.982083581987
Episode 12666 finished after 115 timesteps. Return = -211.77802033056378
Episode 12667 finished after 133 timesteps. Return = -186.59253205191797
Episode 12668 finished after 81 timesteps. Return = -60.194708620312866
Episode 12669 finished after 114 timesteps. Return = -94.0692322131775
Episode 12670 finished after 123 timesteps. Return = -243.6835309395051
Episode 12671 finished after 84 timesteps. Return = -144.01342180456865
Episode 12672 finished after 97 timesteps. Return = -216.6727060061573
Episode 12673 finished after 95 timesteps. Return = -170.4168473407222
Episode 12674 finished after 68 timesteps. Return = -15.022430392436831
Episode 12675 finished after 155 timesteps. Return = -347.487267732

Episode 12779 finished after 123 timesteps. Return = -150.0702176248144
Episode 12780 finished after 66 timesteps. Return = -58.31613583365824
Episode 12781 finished after 109 timesteps. Return = -221.50409390767737
Episode 12782 finished after 115 timesteps. Return = -148.00553155590876
Episode 12783 finished after 118 timesteps. Return = -152.25971446881445
Episode 12784 finished after 133 timesteps. Return = -154.05531620595116
Episode 12785 finished after 108 timesteps. Return = -321.1622421086819
Episode 12786 finished after 136 timesteps. Return = -271.9739034084315
Episode 12787 finished after 114 timesteps. Return = -101.14060225235623
Episode 12788 finished after 125 timesteps. Return = -220.33574470451208
Episode 12789 finished after 126 timesteps. Return = -154.8094549031018
Episode 12790 finished after 152 timesteps. Return = -475.9529692268886
Episode 12791 finished after 102 timesteps. Return = -245.31888057026723
Episode 12792 finished after 92 timesteps. Return = -253.1

Episode 12893 finished after 106 timesteps. Return = -263.8814774464797
Episode 12894 finished after 109 timesteps. Return = -171.501959832791
Episode 12895 finished after 106 timesteps. Return = -267.45687387038697
Episode 12896 finished after 89 timesteps. Return = -167.50535862080457
Episode 12897 finished after 89 timesteps. Return = -221.46083401983068
Episode 12898 finished after 1000 timesteps. Return = 79.30259447434523
Episode 12899 finished after 1000 timesteps. Return = 52.89399750986894
Episode 12900 finished after 82 timesteps. Return = -102.88965165573858
Episode 12901 finished after 114 timesteps. Return = -286.5595203105447
Episode 12902 finished after 116 timesteps. Return = -191.74827674157723
Episode 12903 finished after 96 timesteps. Return = -174.1421773367225
Episode 12904 finished after 71 timesteps. Return = -88.68730035079845
Episode 12905 finished after 68 timesteps. Return = -139.31738917762274
Episode 12906 finished after 81 timesteps. Return = -204.39000973

Episode 13008 finished after 95 timesteps. Return = -204.82379387815817
Episode 13009 finished after 96 timesteps. Return = 59.477017378314514
Episode 13010 finished after 98 timesteps. Return = -111.33571723689938
Episode 13011 finished after 85 timesteps. Return = -102.16723717208262
Episode 13012 finished after 97 timesteps. Return = -133.09820173834328
Episode 13013 finished after 70 timesteps. Return = -101.40967018287397
Episode 13014 finished after 98 timesteps. Return = -133.24998045801394
Episode 13015 finished after 67 timesteps. Return = -31.92509540482017
Episode 13016 finished after 103 timesteps. Return = -200.38343601720499
Episode 13017 finished after 82 timesteps. Return = -71.29852559054173
Episode 13018 finished after 146 timesteps. Return = -189.5937668159986
Episode 13019 finished after 77 timesteps. Return = -76.2084537692422
Episode 13020 finished after 76 timesteps. Return = -133.5916387360724
Episode 13021 finished after 71 timesteps. Return = -150.526788293490

Episode 13126 finished after 67 timesteps. Return = -41.11921086003155
Episode 13127 finished after 112 timesteps. Return = -261.0091350666779
Episode 13128 finished after 127 timesteps. Return = -262.56364007673665
Episode 13129 finished after 79 timesteps. Return = -88.81618052052836
Episode 13130 finished after 92 timesteps. Return = -111.18944478930929
Episode 13131 finished after 114 timesteps. Return = -298.7932931816576
Episode 13132 finished after 86 timesteps. Return = 7.28774167552686
Episode 13133 finished after 104 timesteps. Return = -220.94612116612205
Episode 13134 finished after 115 timesteps. Return = -263.3441360333659
Episode 13135 finished after 126 timesteps. Return = -161.09182594029943
Episode 13136 finished after 69 timesteps. Return = -54.41181731429664
Episode 13137 finished after 82 timesteps. Return = -145.8920551327855
Episode 13138 finished after 143 timesteps. Return = -191.29450370704575
Episode 13139 finished after 118 timesteps. Return = -170.958528860

Episode 13241 finished after 140 timesteps. Return = -111.8173380207115
Episode 13242 finished after 85 timesteps. Return = -244.31456057445197
Episode 13243 finished after 104 timesteps. Return = -232.53759684521557
Episode 13244 finished after 80 timesteps. Return = -181.85109506568074
Episode 13245 finished after 111 timesteps. Return = -230.51294047048424
Episode 13246 finished after 76 timesteps. Return = -124.34774187225588
Episode 13247 finished after 83 timesteps. Return = -244.62632482683824
Episode 13248 finished after 89 timesteps. Return = -205.63009763473167
Episode 13249 finished after 81 timesteps. Return = -312.34444059031296
Episode 13250 finished after 98 timesteps. Return = -222.62781498979098
Episode 13251 finished after 70 timesteps. Return = -155.82955829444518
Episode 13252 finished after 66 timesteps. Return = -240.32986405134548
Episode 13253 finished after 96 timesteps. Return = -183.36270071129096
Episode 13254 finished after 103 timesteps. Return = -188.7502

Episode 13356 finished after 112 timesteps. Return = 12.109572463355406
Episode 13357 finished after 120 timesteps. Return = -123.74555301097864
Episode 13358 finished after 77 timesteps. Return = -235.8464254153018
Episode 13359 finished after 129 timesteps. Return = -219.7395726348456
Episode 13360 finished after 99 timesteps. Return = -219.53181358602492
Episode 13361 finished after 65 timesteps. Return = -89.40060787845512
Episode 13362 finished after 111 timesteps. Return = -197.28536344333392
Episode 13363 finished after 118 timesteps. Return = -245.07421400425523
Episode 13364 finished after 57 timesteps. Return = -110.96308996008821
Episode 13365 finished after 65 timesteps. Return = -95.79235844105378
Episode 13366 finished after 111 timesteps. Return = -64.96918308699378
Episode 13367 finished after 63 timesteps. Return = -143.65329877690027
Episode 13368 finished after 76 timesteps. Return = -117.4813465015655
Episode 13369 finished after 117 timesteps. Return = -9.858137464

Episode 13472 finished after 61 timesteps. Return = -89.05072358728182
Episode 13473 finished after 99 timesteps. Return = -204.76106677219093
Episode 13474 finished after 77 timesteps. Return = -136.02576044995232
Episode 13475 finished after 96 timesteps. Return = -121.04565287960388
Episode 13476 finished after 101 timesteps. Return = -109.37915555014554
Episode 13477 finished after 120 timesteps. Return = -185.24805391559966
Episode 13478 finished after 62 timesteps. Return = -185.00978671977964
Episode 13479 finished after 138 timesteps. Return = -42.80505336950249
Episode 13480 finished after 137 timesteps. Return = -161.5182001350994
Episode 13481 finished after 88 timesteps. Return = -103.96191675833968
Episode 13482 finished after 111 timesteps. Return = -118.32152840941838
Episode 13483 finished after 147 timesteps. Return = -41.53840043825895
Episode 13484 finished after 80 timesteps. Return = -116.79500551267374
Episode 13485 finished after 73 timesteps. Return = -189.58223

Episode 13590 finished after 75 timesteps. Return = -102.46489469813577
Episode 13591 finished after 74 timesteps. Return = -138.15499462110083
Episode 13592 finished after 75 timesteps. Return = -133.0059772095109
Episode 13593 finished after 98 timesteps. Return = -200.16799131798408
Episode 13594 finished after 105 timesteps. Return = -262.6346699529564
Episode 13595 finished after 115 timesteps. Return = -41.132887819742564
Episode 13596 finished after 102 timesteps. Return = -173.81673194201986
Episode 13597 finished after 95 timesteps. Return = -172.63972686731748
Episode 13598 finished after 111 timesteps. Return = 27.025365644400438
Episode 13599 finished after 75 timesteps. Return = -124.87312644132162
Episode 13600 finished after 139 timesteps. Return = -200.35182996483366
Episode 13601 finished after 84 timesteps. Return = -257.40972908323425
Episode 13602 finished after 71 timesteps. Return = -99.92757165727794
Episode 13603 finished after 129 timesteps. Return = -191.26571

Episode 13706 finished after 110 timesteps. Return = -121.22668759835253
Episode 13707 finished after 117 timesteps. Return = -306.7364497999793
Episode 13708 finished after 88 timesteps. Return = -113.82890880138632
Episode 13709 finished after 78 timesteps. Return = -97.3920638989081
Episode 13710 finished after 100 timesteps. Return = -235.52073126229462
Episode 13711 finished after 137 timesteps. Return = -276.88372100745437
Episode 13712 finished after 84 timesteps. Return = -259.7748212096827
Episode 13713 finished after 100 timesteps. Return = -203.3995857224694
Episode 13714 finished after 104 timesteps. Return = 31.305059246780928
Episode 13715 finished after 167 timesteps. Return = -72.1183667188511
Episode 13716 finished after 81 timesteps. Return = -180.92483509304412
Episode 13717 finished after 75 timesteps. Return = -124.86823183895615
Episode 13718 finished after 67 timesteps. Return = -97.10363526264283
Episode 13719 finished after 129 timesteps. Return = -270.47934203

Episode 13823 finished after 93 timesteps. Return = -170.3650439180984
Episode 13824 finished after 87 timesteps. Return = -90.17060414520463
Episode 13825 finished after 121 timesteps. Return = -367.52222999033734
Episode 13826 finished after 78 timesteps. Return = -108.53720841005581
Episode 13827 finished after 94 timesteps. Return = -199.32879864626403
Episode 13828 finished after 76 timesteps. Return = -92.98025197861033
Episode 13829 finished after 117 timesteps. Return = -294.4403867107427
Episode 13830 finished after 74 timesteps. Return = -212.74876563000888
Episode 13831 finished after 125 timesteps. Return = -240.08470653999723
Episode 13832 finished after 123 timesteps. Return = -156.4162362624695
Episode 13833 finished after 87 timesteps. Return = -235.14586138971325
Episode 13834 finished after 103 timesteps. Return = -125.032949049353
Episode 13835 finished after 97 timesteps. Return = -230.97431653241395
Episode 13836 finished after 85 timesteps. Return = -136.780230070

Episode 13938 finished after 135 timesteps. Return = -158.17953252273043
Episode 13939 finished after 105 timesteps. Return = -196.73632793832385
Episode 13940 finished after 73 timesteps. Return = -143.28939623853438
Episode 13941 finished after 94 timesteps. Return = -96.43270744206437
Episode 13942 finished after 67 timesteps. Return = -84.3878172383999
Episode 13943 finished after 81 timesteps. Return = -135.78682010953824
Episode 13944 finished after 1000 timesteps. Return = 73.19556998242116
Episode 13945 finished after 131 timesteps. Return = -225.18761488741742
Episode 13946 finished after 70 timesteps. Return = -98.30773648818904
Episode 13947 finished after 118 timesteps. Return = -167.04635631922974
Episode 13948 finished after 106 timesteps. Return = -133.89944178306624
Episode 13949 finished after 129 timesteps. Return = -424.6636785980067
Episode 13950 finished after 88 timesteps. Return = -119.9209939007607
Episode 13951 finished after 138 timesteps. Return = -166.546813

Episode 14056 finished after 80 timesteps. Return = -209.63894621513813
Episode 14057 finished after 67 timesteps. Return = -122.2143120779467
Episode 14058 finished after 67 timesteps. Return = -125.80521976759564
Episode 14059 finished after 84 timesteps. Return = -91.57107252556767
Episode 14060 finished after 65 timesteps. Return = -114.24640678521565
Episode 14061 finished after 63 timesteps. Return = -91.30351699287861
Episode 14062 finished after 65 timesteps. Return = -222.90009268082372
Episode 14063 finished after 88 timesteps. Return = -213.34745431956713
Episode 14064 finished after 82 timesteps. Return = -151.0313827149257
Episode 14065 finished after 94 timesteps. Return = -269.07983951832455
Episode 14066 finished after 63 timesteps. Return = -90.37273144226083
Episode 14067 finished after 74 timesteps. Return = -214.03042575388548
Episode 14068 finished after 106 timesteps. Return = -177.2981771062332
Episode 14069 finished after 88 timesteps. Return = -222.661173013607

Episode 14173 finished after 126 timesteps. Return = -106.95816850511036
Episode 14174 finished after 64 timesteps. Return = -105.36011653431841
Episode 14175 finished after 85 timesteps. Return = -163.3649793595899
Episode 14176 finished after 100 timesteps. Return = -75.90108875797338
Episode 14177 finished after 84 timesteps. Return = -131.28184415214474
Episode 14178 finished after 125 timesteps. Return = -104.03275668200936
Episode 14179 finished after 109 timesteps. Return = -285.2536180784491
Episode 14180 finished after 91 timesteps. Return = -191.756074288179
Episode 14181 finished after 121 timesteps. Return = -219.71948352534554
Episode 14182 finished after 119 timesteps. Return = -183.0024792191358
Episode 14183 finished after 110 timesteps. Return = -266.9286845956856
Episode 14184 finished after 96 timesteps. Return = -180.90704316303942
Episode 14185 finished after 74 timesteps. Return = -189.7358755711744
Episode 14186 finished after 98 timesteps. Return = -182.37565504

Episode 14290 finished after 122 timesteps. Return = -204.24824318877154
Episode 14291 finished after 90 timesteps. Return = -147.48417267833895
Episode 14292 finished after 62 timesteps. Return = -112.24886376581958
Episode 14293 finished after 104 timesteps. Return = 10.044899731653956
Episode 14294 finished after 73 timesteps. Return = -217.16840542822916
Episode 14295 finished after 104 timesteps. Return = -228.59984751624123
Episode 14296 finished after 79 timesteps. Return = -187.44225146769276
Episode 14297 finished after 84 timesteps. Return = -83.15774668248557
Episode 14298 finished after 116 timesteps. Return = -229.00992215414914
Episode 14299 finished after 111 timesteps. Return = -215.9040406401021
Episode 14300 finished after 95 timesteps. Return = -216.39274988764944
Episode 14301 finished after 127 timesteps. Return = -189.57124747635052
Episode 14302 finished after 133 timesteps. Return = -17.401813215753975
Episode 14303 finished after 111 timesteps. Return = -182.50

Episode 14406 finished after 62 timesteps. Return = -63.14566064070573
Episode 14407 finished after 118 timesteps. Return = -275.09023990918365
Episode 14408 finished after 98 timesteps. Return = -294.2832462540119
Episode 14409 finished after 89 timesteps. Return = -221.31686919988826
Episode 14410 finished after 132 timesteps. Return = -204.5546811563982
Episode 14411 finished after 96 timesteps. Return = -205.2155311283332
Episode 14412 finished after 111 timesteps. Return = -239.2980751503517
Episode 14413 finished after 65 timesteps. Return = -111.72082351541718
Episode 14414 finished after 98 timesteps. Return = -197.97608972525381
Episode 14415 finished after 69 timesteps. Return = -62.48438992822609
Episode 14416 finished after 104 timesteps. Return = -202.62656924603738
Episode 14417 finished after 96 timesteps. Return = -116.80335161089279
Episode 14418 finished after 86 timesteps. Return = -194.40485225882756
Episode 14419 finished after 80 timesteps. Return = -159.169109812

Episode 14524 finished after 98 timesteps. Return = -249.3617836781557
Episode 14525 finished after 103 timesteps. Return = -280.178555668739
Episode 14526 finished after 96 timesteps. Return = -130.75775194848114
Episode 14527 finished after 106 timesteps. Return = -229.85423210552986
Episode 14528 finished after 92 timesteps. Return = -181.14911902970448
Episode 14529 finished after 106 timesteps. Return = -433.39042147681
Episode 14530 finished after 133 timesteps. Return = -245.850155800177
Episode 14531 finished after 65 timesteps. Return = -87.35892461644944
Episode 14532 finished after 137 timesteps. Return = -301.44894209884274
Episode 14533 finished after 109 timesteps. Return = -203.4701935500218
Episode 14534 finished after 87 timesteps. Return = -153.26671865471468
Episode 14535 finished after 91 timesteps. Return = -194.0668577850518
Episode 14536 finished after 71 timesteps. Return = -190.17135741642738
Episode 14537 finished after 85 timesteps. Return = -335.626484115068

Episode 14642 finished after 81 timesteps. Return = -125.01641200980174
Episode 14643 finished after 64 timesteps. Return = -148.33617689983458
Episode 14644 finished after 75 timesteps. Return = -147.1834829432131
Episode 14645 finished after 110 timesteps. Return = -201.08928377162044
Episode 14646 finished after 89 timesteps. Return = -105.07651503011654
Episode 14647 finished after 111 timesteps. Return = -169.7313623595532
Episode 14648 finished after 94 timesteps. Return = -228.27959359854984
Episode 14649 finished after 114 timesteps. Return = -107.58901691476741
Episode 14650 finished after 74 timesteps. Return = -127.81994722960752
Episode 14651 finished after 137 timesteps. Return = -134.95800589172984
Episode 14652 finished after 70 timesteps. Return = -169.71858274015122
Episode 14653 finished after 123 timesteps. Return = -224.57615508038015
Episode 14654 finished after 73 timesteps. Return = -64.67714756447914
Episode 14655 finished after 102 timesteps. Return = -211.1534

Episode 14757 finished after 89 timesteps. Return = -58.6956416126381
Episode 14758 finished after 111 timesteps. Return = -151.6431248464562
Episode 14759 finished after 83 timesteps. Return = -150.32743941476036
Episode 14760 finished after 99 timesteps. Return = -260.4069755658115
Episode 14761 finished after 87 timesteps. Return = -122.29486668481135
Episode 14762 finished after 83 timesteps. Return = -226.29346525239467
Episode 14763 finished after 61 timesteps. Return = -166.97502332578716
Episode 14764 finished after 71 timesteps. Return = -127.66759292157667
Episode 14765 finished after 128 timesteps. Return = -147.2294673793487
Episode 14766 finished after 85 timesteps. Return = -127.3316146762966
Episode 14767 finished after 90 timesteps. Return = -198.41246228537688
Episode 14768 finished after 100 timesteps. Return = -245.6065344152806
Episode 14769 finished after 86 timesteps. Return = -186.83402695057322
Episode 14770 finished after 62 timesteps. Return = -96.991160264401

Episode 14874 finished after 112 timesteps. Return = -162.41852623484522
Episode 14875 finished after 67 timesteps. Return = -99.43265691287405
Episode 14876 finished after 80 timesteps. Return = -157.99622384069107
Episode 14877 finished after 137 timesteps. Return = -200.46889678970453
Episode 14878 finished after 78 timesteps. Return = -132.4352163048719
Episode 14879 finished after 99 timesteps. Return = -207.06318385337232
Episode 14880 finished after 126 timesteps. Return = -198.67064393355435
Episode 14881 finished after 97 timesteps. Return = -257.62353088063867
Episode 14882 finished after 67 timesteps. Return = -83.7243854898116
Episode 14883 finished after 86 timesteps. Return = -8.618296560132379
Episode 14884 finished after 126 timesteps. Return = -246.9784950142642
Episode 14885 finished after 138 timesteps. Return = -11.78293026524824
Episode 14886 finished after 136 timesteps. Return = -388.4804460554692
Episode 14887 finished after 91 timesteps. Return = -266.774270871

Episode 14989 finished after 114 timesteps. Return = -171.2912529891957
Episode 14990 finished after 75 timesteps. Return = -174.3628091459666
Episode 14991 finished after 99 timesteps. Return = -104.8787191870298
Episode 14992 finished after 109 timesteps. Return = -194.4360818189129
Episode 14993 finished after 65 timesteps. Return = -164.48001085418645
Episode 14994 finished after 120 timesteps. Return = -290.699981765681
Episode 14995 finished after 74 timesteps. Return = -124.80931948798765
Episode 14996 finished after 76 timesteps. Return = -115.12540807991567
Episode 14997 finished after 96 timesteps. Return = -263.28018637603896
Episode 14998 finished after 131 timesteps. Return = -191.2359425403465
Episode 14999 finished after 90 timesteps. Return = -149.8020631477541
Episode 15000 finished after 119 timesteps. Return = -205.28944057909246
Episode 15001 finished after 106 timesteps. Return = -173.49461001664272
Episode 15002 finished after 107 timesteps. Return = 39.7644314698

Episode 15104 finished after 1000 timesteps. Return = 110.919388458012
Episode 15105 finished after 70 timesteps. Return = -92.68811775197811
Episode 15106 finished after 72 timesteps. Return = -117.3549098604025
Episode 15107 finished after 80 timesteps. Return = -112.36064316924434
Episode 15108 finished after 73 timesteps. Return = -205.08432186808545
Episode 15109 finished after 101 timesteps. Return = -178.86738195905917
Episode 15110 finished after 98 timesteps. Return = -266.07744152046257
Episode 15111 finished after 109 timesteps. Return = -255.2484505860772
Episode 15112 finished after 69 timesteps. Return = -96.54916642263899
Episode 15113 finished after 67 timesteps. Return = -207.07454053962823
Episode 15114 finished after 134 timesteps. Return = -153.94076537183594
Episode 15115 finished after 93 timesteps. Return = -222.8670440603109
Episode 15116 finished after 121 timesteps. Return = -127.69004749389254
Episode 15117 finished after 88 timesteps. Return = -204.516430641

Episode 15221 finished after 109 timesteps. Return = -159.59197045413862
Episode 15222 finished after 90 timesteps. Return = -193.47920069673674
Episode 15223 finished after 68 timesteps. Return = -81.33524524671878
Episode 15224 finished after 99 timesteps. Return = -96.03120285247945
Episode 15225 finished after 88 timesteps. Return = -136.3203360736147
Episode 15226 finished after 108 timesteps. Return = -278.311024600588
Episode 15227 finished after 87 timesteps. Return = -281.6446560711676
Episode 15228 finished after 80 timesteps. Return = -153.25115263240576
Episode 15229 finished after 93 timesteps. Return = -177.54088029147283
Episode 15230 finished after 87 timesteps. Return = -111.18924435735725
Episode 15231 finished after 108 timesteps. Return = -124.19501979074771
Episode 15232 finished after 57 timesteps. Return = -144.44930812405596
Episode 15233 finished after 85 timesteps. Return = -167.2302717241365
Episode 15234 finished after 71 timesteps. Return = -41.236676521553

Episode 15336 finished after 78 timesteps. Return = -314.6165725628053
Episode 15337 finished after 64 timesteps. Return = -46.51077113843808
Episode 15338 finished after 76 timesteps. Return = -96.91253489115931
Episode 15339 finished after 108 timesteps. Return = -59.574243425596244
Episode 15340 finished after 86 timesteps. Return = -144.25309093305003
Episode 15341 finished after 114 timesteps. Return = -87.10597246399465
Episode 15342 finished after 68 timesteps. Return = -242.17287499872538
Episode 15343 finished after 67 timesteps. Return = -114.07182735730112
Episode 15344 finished after 108 timesteps. Return = -271.1191489430479
Episode 15345 finished after 126 timesteps. Return = -159.4480202152145
Episode 15346 finished after 120 timesteps. Return = -32.00307126190661
Episode 15347 finished after 61 timesteps. Return = -209.05055915573107
Episode 15348 finished after 110 timesteps. Return = -230.8699719867883
Episode 15349 finished after 95 timesteps. Return = -107.717842358

Episode 15450 finished after 133 timesteps. Return = -305.21240715835074
Episode 15451 finished after 114 timesteps. Return = -488.7576272875104
Episode 15452 finished after 76 timesteps. Return = -128.30221737575238
Episode 15453 finished after 78 timesteps. Return = -109.55594452117121
Episode 15454 finished after 114 timesteps. Return = -209.50125784298956
Episode 15455 finished after 104 timesteps. Return = -106.12217768884307
Episode 15456 finished after 110 timesteps. Return = -125.42624787837762
Episode 15457 finished after 150 timesteps. Return = -201.31112783987248
Episode 15458 finished after 124 timesteps. Return = -114.96618624282401
Episode 15459 finished after 112 timesteps. Return = -237.27776424290948
Episode 15460 finished after 112 timesteps. Return = -148.31219082958683
Episode 15461 finished after 138 timesteps. Return = -177.8531127909174
Episode 15462 finished after 104 timesteps. Return = -261.4453372045474
Episode 15463 finished after 67 timesteps. Return = -185

Episode 15565 finished after 72 timesteps. Return = -208.31546578588282
Episode 15566 finished after 111 timesteps. Return = -20.337449236903538
Episode 15567 finished after 96 timesteps. Return = -237.9713984497155
Episode 15568 finished after 78 timesteps. Return = -148.87561539520095
Episode 15569 finished after 115 timesteps. Return = -209.7029369142486
Episode 15570 finished after 173 timesteps. Return = -217.03319586238632
Episode 15571 finished after 125 timesteps. Return = -71.3589224310006
Episode 15572 finished after 109 timesteps. Return = -84.17482168353294
Episode 15573 finished after 98 timesteps. Return = -206.55788821137574
Episode 15574 finished after 103 timesteps. Return = -175.66739677536538
Episode 15575 finished after 69 timesteps. Return = -136.45077770607548
Episode 15576 finished after 95 timesteps. Return = -152.97284128598346
Episode 15577 finished after 99 timesteps. Return = -173.77736810782338
Episode 15578 finished after 102 timesteps. Return = -239.11452

Episode 15681 finished after 137 timesteps. Return = -217.85177154634195
Episode 15682 finished after 86 timesteps. Return = -158.8177827841398
Episode 15683 finished after 87 timesteps. Return = -110.23906194574886
Episode 15684 finished after 100 timesteps. Return = -190.48115003429882
Episode 15685 finished after 103 timesteps. Return = -125.40251078253871
Episode 15686 finished after 77 timesteps. Return = -157.87481779255683
Episode 15687 finished after 70 timesteps. Return = -8.565614318360844
Episode 15688 finished after 68 timesteps. Return = -186.36740540216516
Episode 15689 finished after 126 timesteps. Return = -173.1573724447832
Episode 15690 finished after 97 timesteps. Return = -268.9442443981023
Episode 15691 finished after 107 timesteps. Return = -196.82700146961264
Episode 15692 finished after 101 timesteps. Return = -88.72289164586083
Episode 15693 finished after 94 timesteps. Return = -202.59143236032827
Episode 15694 finished after 79 timesteps. Return = -149.982365

Episode 15796 finished after 104 timesteps. Return = -222.59927105823712
Episode 15797 finished after 103 timesteps. Return = -171.80443803797073
Episode 15798 finished after 122 timesteps. Return = -241.85064987883345
Episode 15799 finished after 83 timesteps. Return = -95.23127977617659
Episode 15800 finished after 114 timesteps. Return = -203.53153407944146
Episode 15801 finished after 80 timesteps. Return = -114.89430318273128
Episode 15802 finished after 139 timesteps. Return = -48.77659508794156
Episode 15803 finished after 64 timesteps. Return = -83.0781944884216
Episode 15804 finished after 70 timesteps. Return = -87.12545556939965
Episode 15805 finished after 92 timesteps. Return = -218.91924729919273
Episode 15806 finished after 71 timesteps. Return = -81.51599338089798
Episode 15807 finished after 116 timesteps. Return = -181.69657007186203
Episode 15808 finished after 61 timesteps. Return = -94.86339925562184
Episode 15809 finished after 107 timesteps. Return = -94.87694726

Episode 15911 finished after 1000 timesteps. Return = 49.3872081163128
Episode 15912 finished after 108 timesteps. Return = -170.59899819046865
Episode 15913 finished after 59 timesteps. Return = -119.8089983277727
Episode 15914 finished after 131 timesteps. Return = -265.1149941489772
Episode 15915 finished after 108 timesteps. Return = -160.06844461490795
Episode 15916 finished after 66 timesteps. Return = -76.54250158447758
Episode 15917 finished after 129 timesteps. Return = 38.61444783991263
Episode 15918 finished after 118 timesteps. Return = -233.13228893759057
Episode 15919 finished after 102 timesteps. Return = -231.1481848515426
Episode 15920 finished after 106 timesteps. Return = -171.74344785149412
Episode 15921 finished after 87 timesteps. Return = -177.23033103044554
Episode 15922 finished after 105 timesteps. Return = -63.125583447242725
Episode 15923 finished after 97 timesteps. Return = -250.61658915424056
Episode 15924 finished after 76 timesteps. Return = -216.431408

Episode 16027 finished after 93 timesteps. Return = -292.50192100066727
Episode 16028 finished after 66 timesteps. Return = -146.93200795555762
Episode 16029 finished after 168 timesteps. Return = -205.66404063265972
Episode 16030 finished after 72 timesteps. Return = 1.3446582344359115
Episode 16031 finished after 89 timesteps. Return = -179.991936010072
Episode 16032 finished after 122 timesteps. Return = -280.4759753611936
Episode 16033 finished after 109 timesteps. Return = -195.08846736212968
Episode 16034 finished after 130 timesteps. Return = -154.54619433551065
Episode 16035 finished after 79 timesteps. Return = -162.6112314869605
Episode 16036 finished after 96 timesteps. Return = -149.59601817406252
Episode 16037 finished after 74 timesteps. Return = -175.53260317393062
Episode 16038 finished after 120 timesteps. Return = -489.02092534659937
Episode 16039 finished after 114 timesteps. Return = -230.86189382225382
Episode 16040 finished after 101 timesteps. Return = -128.69018

Episode 16145 finished after 104 timesteps. Return = -215.18911691479497
Episode 16146 finished after 119 timesteps. Return = -216.01653588591765
Episode 16147 finished after 81 timesteps. Return = -16.445252029730213
Episode 16148 finished after 61 timesteps. Return = -93.90459840550004
Episode 16149 finished after 82 timesteps. Return = -231.54292413437068
Episode 16150 finished after 74 timesteps. Return = -140.75837865077565
Episode 16151 finished after 80 timesteps. Return = -54.45160885615206
Episode 16152 finished after 96 timesteps. Return = -170.76348149491116
Episode 16153 finished after 89 timesteps. Return = -303.97809741265894
Episode 16154 finished after 110 timesteps. Return = -33.0742344572254
Episode 16155 finished after 106 timesteps. Return = -163.02714962667181
Episode 16156 finished after 122 timesteps. Return = -161.42327127670924
Episode 16157 finished after 71 timesteps. Return = -94.94549346143145
Episode 16158 finished after 114 timesteps. Return = -275.708944

Episode 16261 finished after 116 timesteps. Return = -181.29752620716525
Episode 16262 finished after 79 timesteps. Return = -209.97880489502805
Episode 16263 finished after 69 timesteps. Return = -103.4313792179901
Episode 16264 finished after 97 timesteps. Return = -309.6985847606447
Episode 16265 finished after 82 timesteps. Return = -71.64894659741975
Episode 16266 finished after 73 timesteps. Return = -235.81616952263585
Episode 16267 finished after 107 timesteps. Return = -159.42300801053
Episode 16268 finished after 68 timesteps. Return = -91.29268272614135
Episode 16269 finished after 104 timesteps. Return = -269.72081838974975
Episode 16270 finished after 137 timesteps. Return = -101.42129351748487
Episode 16271 finished after 84 timesteps. Return = -122.87354760635216
Episode 16272 finished after 112 timesteps. Return = -222.71492354339512
Episode 16273 finished after 86 timesteps. Return = -118.9398051602715
Episode 16274 finished after 99 timesteps. Return = -153.8877952199

Episode 16377 finished after 150 timesteps. Return = -190.6818406371956
Episode 16378 finished after 130 timesteps. Return = -137.87690496542467
Episode 16379 finished after 108 timesteps. Return = -126.21636326099215
Episode 16380 finished after 87 timesteps. Return = -397.06837509505044
Episode 16381 finished after 79 timesteps. Return = -194.72740747633782
Episode 16382 finished after 126 timesteps. Return = -316.62682644286406
Episode 16383 finished after 104 timesteps. Return = -21.517938444562347
Episode 16384 finished after 77 timesteps. Return = -119.06353375765202
Episode 16385 finished after 95 timesteps. Return = -241.56327515611
Episode 16386 finished after 78 timesteps. Return = -253.48915420179702
Episode 16387 finished after 107 timesteps. Return = -215.90520814161545
Episode 16388 finished after 145 timesteps. Return = -161.5690678547789
Episode 16389 finished after 95 timesteps. Return = -87.45646121187535
Episode 16390 finished after 64 timesteps. Return = -97.1696280

Episode 16491 finished after 147 timesteps. Return = -222.03883234233874
Episode 16492 finished after 161 timesteps. Return = 13.2537188429896
Episode 16493 finished after 89 timesteps. Return = -258.6762082923806
Episode 16494 finished after 90 timesteps. Return = -169.46554228987196
Episode 16495 finished after 111 timesteps. Return = -327.78695225738505
Episode 16496 finished after 59 timesteps. Return = -89.37017892229021
Episode 16497 finished after 61 timesteps. Return = -91.79083731812983
Episode 16498 finished after 73 timesteps. Return = -76.71398133933467
Episode 16499 finished after 116 timesteps. Return = -186.39394824834787
Episode 16500 finished after 113 timesteps. Return = -310.3557752672535
Episode 16501 finished after 75 timesteps. Return = -178.10141762394483
Episode 16502 finished after 126 timesteps. Return = -219.42731692172737
Episode 16503 finished after 94 timesteps. Return = -239.49454718276152
Episode 16504 finished after 71 timesteps. Return = -290.432943694

Episode 16606 finished after 126 timesteps. Return = -105.7306263940572
Episode 16607 finished after 100 timesteps. Return = 26.159097708571323
Episode 16608 finished after 65 timesteps. Return = -145.1056722739164
Episode 16609 finished after 85 timesteps. Return = -94.81957719624897
Episode 16610 finished after 63 timesteps. Return = -92.13976795542732
Episode 16611 finished after 119 timesteps. Return = -176.18043577587278
Episode 16612 finished after 148 timesteps. Return = -372.68542571581986
Episode 16613 finished after 112 timesteps. Return = -164.03544445143052
Episode 16614 finished after 106 timesteps. Return = -201.41366759614726
Episode 16615 finished after 66 timesteps. Return = -96.26703453224938
Episode 16616 finished after 84 timesteps. Return = -90.94667123818004
Episode 16617 finished after 83 timesteps. Return = -101.95027795155197
Episode 16618 finished after 113 timesteps. Return = -22.39544376237579
Episode 16619 finished after 95 timesteps. Return = -186.34420661

Episode 16722 finished after 106 timesteps. Return = -197.7796972886814
Episode 16723 finished after 135 timesteps. Return = -76.35644423437526
Episode 16724 finished after 79 timesteps. Return = -50.31615283294515
Episode 16725 finished after 155 timesteps. Return = -352.438023251234
Episode 16726 finished after 90 timesteps. Return = -268.23539549837733
Episode 16727 finished after 72 timesteps. Return = -104.42014169369082
Episode 16728 finished after 107 timesteps. Return = -126.80052243894157
Episode 16729 finished after 176 timesteps. Return = -29.221834023616836
Episode 16730 finished after 121 timesteps. Return = -139.6918854106594
Episode 16731 finished after 107 timesteps. Return = -249.2511304175405
Episode 16732 finished after 127 timesteps. Return = -55.1848073628026
Episode 16733 finished after 86 timesteps. Return = -93.35084464556296
Episode 16734 finished after 106 timesteps. Return = -100.15143811527952
Episode 16735 finished after 68 timesteps. Return = -59.558223454

Episode 16836 finished after 101 timesteps. Return = -143.15368039573133
Episode 16837 finished after 69 timesteps. Return = -143.4119749391376
Episode 16838 finished after 110 timesteps. Return = 22.177363572490364
Episode 16839 finished after 103 timesteps. Return = -173.74668569197058
Episode 16840 finished after 99 timesteps. Return = -131.7507802698614
Episode 16841 finished after 149 timesteps. Return = -126.58753330239533
Episode 16842 finished after 102 timesteps. Return = -132.11549916989702
Episode 16843 finished after 103 timesteps. Return = -401.30870626945784
Episode 16844 finished after 125 timesteps. Return = -364.2159787643192
Episode 16845 finished after 124 timesteps. Return = -115.76001804022911
Episode 16846 finished after 85 timesteps. Return = -131.844738371428
Episode 16847 finished after 69 timesteps. Return = -169.89096485988
Episode 16848 finished after 97 timesteps. Return = -116.802746545103
Episode 16849 finished after 107 timesteps. Return = -130.785457508

Episode 16951 finished after 64 timesteps. Return = -200.7584355423811
Episode 16952 finished after 85 timesteps. Return = -125.53674878727688
Episode 16953 finished after 129 timesteps. Return = -422.6164654383713
Episode 16954 finished after 91 timesteps. Return = -229.8390651749295
Episode 16955 finished after 72 timesteps. Return = -136.52075653469248
Episode 16956 finished after 91 timesteps. Return = -151.39888631017436
Episode 16957 finished after 94 timesteps. Return = -88.61240396581408
Episode 16958 finished after 123 timesteps. Return = -208.37236077958252
Episode 16959 finished after 127 timesteps. Return = -163.90065712651892
Episode 16960 finished after 122 timesteps. Return = -252.7573963124594
Episode 16961 finished after 89 timesteps. Return = -95.69893754323802
Episode 16962 finished after 83 timesteps. Return = -90.72573557183428
Episode 16963 finished after 115 timesteps. Return = -157.37497472839175
Episode 16964 finished after 70 timesteps. Return = -57.5616466313

Episode 17066 finished after 109 timesteps. Return = -250.05233007239787
Episode 17067 finished after 70 timesteps. Return = -121.08919701897366
Episode 17068 finished after 95 timesteps. Return = -71.76670071037529
Episode 17069 finished after 108 timesteps. Return = -160.4182915819689
Episode 17070 finished after 111 timesteps. Return = -253.16278250374836
Episode 17071 finished after 69 timesteps. Return = -76.50916581326531
Episode 17072 finished after 87 timesteps. Return = -114.3051165312284
Episode 17073 finished after 70 timesteps. Return = 56.86853802311859
Episode 17074 finished after 110 timesteps. Return = -53.334058852076936
Episode 17075 finished after 70 timesteps. Return = -93.03952393208873
Episode 17076 finished after 98 timesteps. Return = -311.5456920903748
Episode 17077 finished after 78 timesteps. Return = -228.40422883416193
Episode 17078 finished after 94 timesteps. Return = -223.71305940683615
Episode 17079 finished after 64 timesteps. Return = -119.34498727294

Episode 17180 finished after 74 timesteps. Return = -99.88707033073415
Episode 17181 finished after 130 timesteps. Return = -120.22529876339473
Episode 17182 finished after 141 timesteps. Return = -288.90329069339873
Episode 17183 finished after 114 timesteps. Return = -201.04758799189415
Episode 17184 finished after 1000 timesteps. Return = 80.47073601264684
Episode 17185 finished after 61 timesteps. Return = -115.90756589475718
Episode 17186 finished after 126 timesteps. Return = -285.04623874329525
Episode 17187 finished after 96 timesteps. Return = -221.58646915503647
Episode 17188 finished after 105 timesteps. Return = -122.04848833927713
Episode 17189 finished after 109 timesteps. Return = -204.52849299237982
Episode 17190 finished after 1000 timesteps. Return = 82.23257670578886
Episode 17191 finished after 104 timesteps. Return = -12.785761973298193
Episode 17192 finished after 65 timesteps. Return = -82.948042500108
Episode 17193 finished after 89 timesteps. Return = -128.1919

Episode 17297 finished after 143 timesteps. Return = -308.90219098501825
Episode 17298 finished after 145 timesteps. Return = -245.42164319950444
Episode 17299 finished after 129 timesteps. Return = -130.26094950216964
Episode 17300 finished after 93 timesteps. Return = -136.27092486095773
Episode 17301 finished after 75 timesteps. Return = -206.6783669119345
Episode 17302 finished after 86 timesteps. Return = -301.904728436503
Episode 17303 finished after 75 timesteps. Return = -85.40363985681111
Episode 17304 finished after 111 timesteps. Return = -167.98581606860137
Episode 17305 finished after 76 timesteps. Return = -148.25247060502545
Episode 17306 finished after 116 timesteps. Return = -155.32136872255404
Episode 17307 finished after 128 timesteps. Return = -302.93229232070587
Episode 17308 finished after 133 timesteps. Return = -371.4808669652253
Episode 17309 finished after 102 timesteps. Return = -219.28543006846348
Episode 17310 finished after 70 timesteps. Return = -75.37904

Episode 17411 finished after 102 timesteps. Return = -138.6549017957766
Episode 17412 finished after 106 timesteps. Return = -318.5333810331946
Episode 17413 finished after 98 timesteps. Return = -154.72208849759312
Episode 17414 finished after 60 timesteps. Return = -100.8609090043001
Episode 17415 finished after 101 timesteps. Return = -33.26299052471194
Episode 17416 finished after 113 timesteps. Return = -172.44544852830262
Episode 17417 finished after 122 timesteps. Return = -63.66901788394267
Episode 17418 finished after 93 timesteps. Return = -254.85766442360176
Episode 17419 finished after 67 timesteps. Return = -113.67916599888824
Episode 17420 finished after 95 timesteps. Return = -291.59537686274825
Episode 17421 finished after 102 timesteps. Return = -190.06674090037876
Episode 17422 finished after 119 timesteps. Return = -215.23464000695267
Episode 17423 finished after 101 timesteps. Return = -196.52721486198354
Episode 17424 finished after 64 timesteps. Return = -109.6119

Episode 17529 finished after 63 timesteps. Return = -105.39897789610411
Episode 17530 finished after 69 timesteps. Return = -151.84881018658774
Episode 17531 finished after 83 timesteps. Return = -154.99332002487336
Episode 17532 finished after 96 timesteps. Return = -292.1332682219047
Episode 17533 finished after 67 timesteps. Return = -114.15696940645739
Episode 17534 finished after 106 timesteps. Return = -307.164624111352
Episode 17535 finished after 93 timesteps. Return = -124.73379923530474
Episode 17536 finished after 119 timesteps. Return = -325.4363318607237
Episode 17537 finished after 79 timesteps. Return = -110.47053737118006
Episode 17538 finished after 92 timesteps. Return = -218.50820387492706
Episode 17539 finished after 85 timesteps. Return = -114.93284415675708
Episode 17540 finished after 67 timesteps. Return = -148.44381039902055
Episode 17541 finished after 127 timesteps. Return = -278.48652793035944
Episode 17542 finished after 97 timesteps. Return = -171.28310390

Episode 17646 finished after 82 timesteps. Return = -131.14153961847617
Episode 17647 finished after 114 timesteps. Return = -179.30518765645195
Episode 17648 finished after 66 timesteps. Return = -102.71001296430906
Episode 17649 finished after 80 timesteps. Return = -151.3439159413884
Episode 17650 finished after 111 timesteps. Return = -266.17471898970814
Episode 17651 finished after 123 timesteps. Return = -119.25054825081013
Episode 17652 finished after 86 timesteps. Return = -144.0039736293711
Episode 17653 finished after 87 timesteps. Return = -127.95216851700816
Episode 17654 finished after 86 timesteps. Return = -210.7416688822334
Episode 17655 finished after 75 timesteps. Return = -167.43809936153428
Episode 17656 finished after 66 timesteps. Return = -209.00560893161818
Episode 17657 finished after 78 timesteps. Return = -142.57137898123779
Episode 17658 finished after 96 timesteps. Return = -358.11111068412094
Episode 17659 finished after 105 timesteps. Return = -380.212630

Episode 17763 finished after 85 timesteps. Return = -107.75426300234203
Episode 17764 finished after 110 timesteps. Return = -258.1475068698843
Episode 17765 finished after 1000 timesteps. Return = 68.13742803560709
Episode 17766 finished after 84 timesteps. Return = -184.67576264680673
Episode 17767 finished after 66 timesteps. Return = -97.16933678958036
Episode 17768 finished after 117 timesteps. Return = -199.05983151664338
Episode 17769 finished after 82 timesteps. Return = -127.54625892128806
Episode 17770 finished after 139 timesteps. Return = -192.9708489328862
Episode 17771 finished after 116 timesteps. Return = -231.92688525661768
Episode 17772 finished after 66 timesteps. Return = -87.58220609992786
Episode 17773 finished after 100 timesteps. Return = -37.814689404462754
Episode 17774 finished after 67 timesteps. Return = -93.94707946248947
Episode 17775 finished after 90 timesteps. Return = -97.01082145110419
Episode 17776 finished after 71 timesteps. Return = -91.094469378

Episode 17877 finished after 140 timesteps. Return = -200.4846363247052
Episode 17878 finished after 117 timesteps. Return = -246.7631840089166
Episode 17879 finished after 104 timesteps. Return = -105.07307135872757
Episode 17880 finished after 101 timesteps. Return = -125.57769978402058
Episode 17881 finished after 99 timesteps. Return = -205.00813670389397
Episode 17882 finished after 110 timesteps. Return = -343.9907570674093
Episode 17883 finished after 115 timesteps. Return = -111.93513657394719
Episode 17884 finished after 82 timesteps. Return = -208.72307144271986
Episode 17885 finished after 65 timesteps. Return = -95.20010568139435
Episode 17886 finished after 128 timesteps. Return = -209.45966623115095
Episode 17887 finished after 76 timesteps. Return = -143.5246370988189
Episode 17888 finished after 73 timesteps. Return = -105.67399781079877
Episode 17889 finished after 84 timesteps. Return = -144.5829707767457
Episode 17890 finished after 95 timesteps. Return = -258.242486

Episode 17992 finished after 79 timesteps. Return = -58.048892530603915
Episode 17993 finished after 70 timesteps. Return = -113.34679902496168
Episode 17994 finished after 74 timesteps. Return = -121.43314143676858
Episode 17995 finished after 113 timesteps. Return = -198.43533653552717
Episode 17996 finished after 103 timesteps. Return = -137.29832931155363
Episode 17997 finished after 92 timesteps. Return = -77.83140751243585
Episode 17998 finished after 60 timesteps. Return = -85.7307999446825
Episode 17999 finished after 64 timesteps. Return = -164.35663175268712
Episode 18000 finished after 99 timesteps. Return = -185.2843206961506
Episode 18001 finished after 123 timesteps. Return = -243.5253921206399
Episode 18002 finished after 94 timesteps. Return = -207.2045403758685
Episode 18003 finished after 86 timesteps. Return = -34.29052454697826
Episode 18004 finished after 106 timesteps. Return = -213.1332485562835
Episode 18005 finished after 113 timesteps. Return = -312.9106174266

Episode 18106 finished after 132 timesteps. Return = -197.9903392164943
Episode 18107 finished after 149 timesteps. Return = -125.35450033949817
Episode 18108 finished after 134 timesteps. Return = -295.21957454461864
Episode 18109 finished after 87 timesteps. Return = -193.65766936134423
Episode 18110 finished after 63 timesteps. Return = -82.73978149643399
Episode 18111 finished after 96 timesteps. Return = -172.61490651047018
Episode 18112 finished after 90 timesteps. Return = -119.367427870252
Episode 18113 finished after 128 timesteps. Return = -181.285275784975
Episode 18114 finished after 100 timesteps. Return = -113.83199151525788
Episode 18115 finished after 88 timesteps. Return = -147.89491734330608
Episode 18116 finished after 114 timesteps. Return = -155.4804442018384
Episode 18117 finished after 62 timesteps. Return = -98.1876699347537
Episode 18118 finished after 93 timesteps. Return = -339.5636968549238
Episode 18119 finished after 88 timesteps. Return = -318.43331120093

Episode 18220 finished after 112 timesteps. Return = -211.31148115928357
Episode 18221 finished after 90 timesteps. Return = -75.30372452670606
Episode 18222 finished after 68 timesteps. Return = -118.92717164204004
Episode 18223 finished after 76 timesteps. Return = -211.4156234964331
Episode 18224 finished after 96 timesteps. Return = -147.76855174367876
Episode 18225 finished after 72 timesteps. Return = -111.21864392263069
Episode 18226 finished after 118 timesteps. Return = -234.25878957789286
Episode 18227 finished after 65 timesteps. Return = -67.54515277530709
Episode 18228 finished after 86 timesteps. Return = -196.75221438353006
Episode 18229 finished after 66 timesteps. Return = -190.76120663439906
Episode 18230 finished after 90 timesteps. Return = -67.80525395737538
Episode 18231 finished after 114 timesteps. Return = -332.3879752967638
Episode 18232 finished after 131 timesteps. Return = -161.33571835175167
Episode 18233 finished after 123 timesteps. Return = -172.1387969

Episode 18337 finished after 59 timesteps. Return = -105.3567965698712
Episode 18338 finished after 95 timesteps. Return = -221.7686751601786
Episode 18339 finished after 90 timesteps. Return = -38.22592756016224
Episode 18340 finished after 93 timesteps. Return = -146.2392653431885
Episode 18341 finished after 99 timesteps. Return = -122.51010681982883
Episode 18342 finished after 107 timesteps. Return = -107.83454512848816
Episode 18343 finished after 96 timesteps. Return = -124.97289045839747
Episode 18344 finished after 85 timesteps. Return = -99.22664899907362
Episode 18345 finished after 116 timesteps. Return = -181.82416059503672
Episode 18346 finished after 65 timesteps. Return = -149.31489860487176
Episode 18347 finished after 53 timesteps. Return = -118.69518394804052
Episode 18348 finished after 106 timesteps. Return = -297.64100010604653
Episode 18349 finished after 148 timesteps. Return = -2.495614202891531
Episode 18350 finished after 95 timesteps. Return = -64.5494491048

Episode 18451 finished after 88 timesteps. Return = -95.65404460759467
Episode 18452 finished after 69 timesteps. Return = -125.88986493568177
Episode 18453 finished after 98 timesteps. Return = -231.48798989359682
Episode 18454 finished after 110 timesteps. Return = -83.96260413073391
Episode 18455 finished after 65 timesteps. Return = -78.88215959928539
Episode 18456 finished after 78 timesteps. Return = -159.2080434140123
Episode 18457 finished after 122 timesteps. Return = -192.6575726704093
Episode 18458 finished after 106 timesteps. Return = -189.39022775138082
Episode 18459 finished after 76 timesteps. Return = -162.17435973110167
Episode 18460 finished after 95 timesteps. Return = -135.92895959849636
Episode 18461 finished after 86 timesteps. Return = 46.83373361063488
Episode 18462 finished after 137 timesteps. Return = -374.53328616827577
Episode 18463 finished after 120 timesteps. Return = -104.60815068616445
Episode 18464 finished after 67 timesteps. Return = -407.513374657

Episode 18565 finished after 105 timesteps. Return = -173.77481651046514
Episode 18566 finished after 117 timesteps. Return = -243.72056167451876
Episode 18567 finished after 70 timesteps. Return = -110.97034848234567
Episode 18568 finished after 106 timesteps. Return = -165.34214348779074
Episode 18569 finished after 74 timesteps. Return = -132.07818223566744
Episode 18570 finished after 77 timesteps. Return = -99.31720072729198
Episode 18571 finished after 149 timesteps. Return = -163.69110548523406
Episode 18572 finished after 129 timesteps. Return = -130.58968607202482
Episode 18573 finished after 90 timesteps. Return = -234.45124969322654
Episode 18574 finished after 109 timesteps. Return = -269.0014316969566
Episode 18575 finished after 69 timesteps. Return = -144.49911080430007
Episode 18576 finished after 71 timesteps. Return = -158.9599440484659
Episode 18577 finished after 71 timesteps. Return = -88.34221879587481
Episode 18578 finished after 104 timesteps. Return = -171.1824

Episode 18682 finished after 139 timesteps. Return = -307.3439771017827
Episode 18683 finished after 78 timesteps. Return = -110.81033013139614
Episode 18684 finished after 125 timesteps. Return = -267.2916471817815
Episode 18685 finished after 130 timesteps. Return = -181.6235423051823
Episode 18686 finished after 113 timesteps. Return = -294.5991258283451
Episode 18687 finished after 112 timesteps. Return = -464.936339693719
Episode 18688 finished after 97 timesteps. Return = -124.59034257820387
Episode 18689 finished after 129 timesteps. Return = -152.53220082938543
Episode 18690 finished after 70 timesteps. Return = -7.584803427656183
Episode 18691 finished after 67 timesteps. Return = -133.77197963809263
Episode 18692 finished after 110 timesteps. Return = -239.78113153327632
Episode 18693 finished after 119 timesteps. Return = -148.9796190022792
Episode 18694 finished after 138 timesteps. Return = -62.33521480978794
Episode 18695 finished after 92 timesteps. Return = -287.4990895

Episode 18797 finished after 130 timesteps. Return = -312.29026963326044
Episode 18798 finished after 115 timesteps. Return = -10.092926391461887
Episode 18799 finished after 71 timesteps. Return = -124.09633329994674
Episode 18800 finished after 99 timesteps. Return = -83.38570241723643
Episode 18801 finished after 83 timesteps. Return = -84.89892717008189
Episode 18802 finished after 118 timesteps. Return = -39.81401841342546
Episode 18803 finished after 105 timesteps. Return = -305.7918953565274
Episode 18804 finished after 93 timesteps. Return = -158.74616171591205
Episode 18805 finished after 102 timesteps. Return = -208.51128150974645
Episode 18806 finished after 75 timesteps. Return = -110.98708863330067
Episode 18807 finished after 96 timesteps. Return = -190.80316472059695
Episode 18808 finished after 98 timesteps. Return = -197.29334798459058
Episode 18809 finished after 101 timesteps. Return = -120.70912868406968
Episode 18810 finished after 140 timesteps. Return = 7.0802698

Episode 18913 finished after 67 timesteps. Return = -27.614944948364084
Episode 18914 finished after 103 timesteps. Return = -120.52840466657797
Episode 18915 finished after 94 timesteps. Return = -253.7201785873394
Episode 18916 finished after 74 timesteps. Return = -114.8862802437034
Episode 18917 finished after 126 timesteps. Return = -231.85311418828007
Episode 18918 finished after 58 timesteps. Return = -92.7988051313443
Episode 18919 finished after 76 timesteps. Return = -136.82523330007788
Episode 18920 finished after 112 timesteps. Return = -317.39715409720594
Episode 18921 finished after 103 timesteps. Return = -176.890593406678
Episode 18922 finished after 65 timesteps. Return = -166.56862098805186
Episode 18923 finished after 92 timesteps. Return = -151.6013014891757
Episode 18924 finished after 119 timesteps. Return = -139.0078033885606
Episode 18925 finished after 95 timesteps. Return = -172.4944786151307
Episode 18926 finished after 109 timesteps. Return = -246.7546420245

Episode 19029 finished after 84 timesteps. Return = -120.01803370878679
Episode 19030 finished after 63 timesteps. Return = -127.39578133385629
Episode 19031 finished after 75 timesteps. Return = -74.75105572096929
Episode 19032 finished after 146 timesteps. Return = -102.30841320735072
Episode 19033 finished after 99 timesteps. Return = -297.1251654075993
Episode 19034 finished after 96 timesteps. Return = -148.1926015157138
Episode 19035 finished after 71 timesteps. Return = -142.35689797012105
Episode 19036 finished after 86 timesteps. Return = -189.91602786849143
Episode 19037 finished after 109 timesteps. Return = -37.43290850080831
Episode 19038 finished after 92 timesteps. Return = -19.10763284038353
Episode 19039 finished after 91 timesteps. Return = -197.3442596435484
Episode 19040 finished after 103 timesteps. Return = -370.370871302416
Episode 19041 finished after 129 timesteps. Return = -159.59344234611794
Episode 19042 finished after 96 timesteps. Return = -184.90740295205

Episode 19146 finished after 77 timesteps. Return = -111.11058666942517
Episode 19147 finished after 57 timesteps. Return = -66.99066285547556
Episode 19148 finished after 77 timesteps. Return = -75.43068359262527
Episode 19149 finished after 120 timesteps. Return = -279.4829093459736
Episode 19150 finished after 144 timesteps. Return = -149.67611444541706
Episode 19151 finished after 80 timesteps. Return = -170.77515227642547
Episode 19152 finished after 83 timesteps. Return = -18.41259891932509
Episode 19153 finished after 84 timesteps. Return = -202.93329568210623
Episode 19154 finished after 78 timesteps. Return = -152.37560111622412
Episode 19155 finished after 94 timesteps. Return = -246.96853808801407
Episode 19156 finished after 116 timesteps. Return = -131.95639136734943
Episode 19157 finished after 91 timesteps. Return = -259.8834417256228
Episode 19158 finished after 99 timesteps. Return = -128.6752681583398
Episode 19159 finished after 70 timesteps. Return = -208.7572531306

Episode 19262 finished after 100 timesteps. Return = -7.009697988541674
Episode 19263 finished after 126 timesteps. Return = -168.06948160972695
Episode 19264 finished after 65 timesteps. Return = -155.68741945472323
Episode 19265 finished after 131 timesteps. Return = -234.45698616353758
Episode 19266 finished after 67 timesteps. Return = -113.24986579864844
Episode 19267 finished after 141 timesteps. Return = -243.7926916249914
Episode 19268 finished after 101 timesteps. Return = -190.53273665559294
Episode 19269 finished after 67 timesteps. Return = -149.98924396931977
Episode 19270 finished after 147 timesteps. Return = -141.83082998307918
Episode 19271 finished after 110 timesteps. Return = -246.80724146366325
Episode 19272 finished after 93 timesteps. Return = -167.99434121331203
Episode 19273 finished after 71 timesteps. Return = -114.57397064214423
Episode 19274 finished after 105 timesteps. Return = -312.2253295446377
Episode 19275 finished after 72 timesteps. Return = -106.18

Episode 19378 finished after 114 timesteps. Return = -76.49190622859767
Episode 19379 finished after 91 timesteps. Return = -75.40896336728252
Episode 19380 finished after 77 timesteps. Return = -106.5193363421272
Episode 19381 finished after 106 timesteps. Return = -148.4615620049981
Episode 19382 finished after 59 timesteps. Return = -108.5882163632068
Episode 19383 finished after 116 timesteps. Return = -242.77306411028056
Episode 19384 finished after 90 timesteps. Return = -138.40968470598312
Episode 19385 finished after 141 timesteps. Return = -196.32255426114472
Episode 19386 finished after 121 timesteps. Return = -199.56571186930816
Episode 19387 finished after 119 timesteps. Return = -241.33637345984283
Episode 19388 finished after 72 timesteps. Return = -141.77159529568604
Episode 19389 finished after 98 timesteps. Return = -371.97745047535415
Episode 19390 finished after 118 timesteps. Return = -227.43203696956803
Episode 19391 finished after 117 timesteps. Return = -156.8488

Episode 19496 finished after 76 timesteps. Return = -173.19734670556073
Episode 19497 finished after 110 timesteps. Return = 18.87863970012414
Episode 19498 finished after 86 timesteps. Return = -115.80155874275519
Episode 19499 finished after 101 timesteps. Return = -153.96087907303814
Episode 19500 finished after 78 timesteps. Return = -105.62210678834482
Episode 19501 finished after 89 timesteps. Return = -104.40685842098998
Episode 19502 finished after 147 timesteps. Return = -251.71197207661237
Episode 19503 finished after 97 timesteps. Return = -190.667243022474
Episode 19504 finished after 137 timesteps. Return = -183.01292014404788
Episode 19505 finished after 131 timesteps. Return = -167.21022737254196
Episode 19506 finished after 102 timesteps. Return = 34.58871752379511
Episode 19507 finished after 113 timesteps. Return = -103.48563150786573
Episode 19508 finished after 71 timesteps. Return = -94.89921048280759
Episode 19509 finished after 110 timesteps. Return = 29.53521138

Episode 19610 finished after 116 timesteps. Return = -182.10198276142546
Episode 19611 finished after 120 timesteps. Return = -205.05224947700214
Episode 19612 finished after 122 timesteps. Return = -153.18407670144893
Episode 19613 finished after 97 timesteps. Return = -76.89045924319291
Episode 19614 finished after 85 timesteps. Return = -135.3401389099899
Episode 19615 finished after 73 timesteps. Return = -85.03104258442471
Episode 19616 finished after 101 timesteps. Return = -161.48879936696534
Episode 19617 finished after 63 timesteps. Return = -129.14680003941692
Episode 19618 finished after 70 timesteps. Return = -154.42657718144008
Episode 19619 finished after 78 timesteps. Return = -182.66754065075565
Episode 19620 finished after 94 timesteps. Return = -105.46771609337604
Episode 19621 finished after 136 timesteps. Return = -152.14193729378349
Episode 19622 finished after 78 timesteps. Return = -292.86174148488385
Episode 19623 finished after 91 timesteps. Return = -136.56330

Episode 19729 finished after 105 timesteps. Return = -89.1096588393563
Episode 19730 finished after 150 timesteps. Return = 10.336737065049064
Episode 19731 finished after 98 timesteps. Return = -206.72839748536177
Episode 19732 finished after 160 timesteps. Return = -380.51820828607305
Episode 19733 finished after 72 timesteps. Return = -132.2114497561644
Episode 19734 finished after 113 timesteps. Return = -234.90681786014588
Episode 19735 finished after 90 timesteps. Return = -211.58826185878374
Episode 19736 finished after 103 timesteps. Return = -140.9533950282526
Episode 19737 finished after 69 timesteps. Return = -94.20587411037333
Episode 19738 finished after 145 timesteps. Return = -2.5927372320187203
Episode 19739 finished after 112 timesteps. Return = -104.02707130639973
Episode 19740 finished after 106 timesteps. Return = -210.1149048608113
Episode 19741 finished after 63 timesteps. Return = -141.63559088079657
Episode 19742 finished after 100 timesteps. Return = -196.43997

Episode 19844 finished after 106 timesteps. Return = -136.45093435048722
Episode 19845 finished after 127 timesteps. Return = -93.82265396518922
Episode 19846 finished after 143 timesteps. Return = -184.63814680046036
Episode 19847 finished after 92 timesteps. Return = -136.64358453055303
Episode 19848 finished after 107 timesteps. Return = -156.42393208907112
Episode 19849 finished after 86 timesteps. Return = -135.03125088248422
Episode 19850 finished after 104 timesteps. Return = -176.9936675979011
Episode 19851 finished after 102 timesteps. Return = -167.22891678733092
Episode 19852 finished after 75 timesteps. Return = -207.31292860063758
Episode 19853 finished after 122 timesteps. Return = -178.0426421063177
Episode 19854 finished after 90 timesteps. Return = -203.16682665386213
Episode 19855 finished after 131 timesteps. Return = -113.85614573386528
Episode 19856 finished after 84 timesteps. Return = -200.2902911365333
Episode 19857 finished after 133 timesteps. Return = 17.4232

Episode 19959 finished after 100 timesteps. Return = -198.4175686832624
Episode 19960 finished after 89 timesteps. Return = -286.7089783638617
Episode 19961 finished after 64 timesteps. Return = -108.10214987650316
Episode 19962 finished after 88 timesteps. Return = -207.86370470667168
Episode 19963 finished after 94 timesteps. Return = -216.3291774128948
Episode 19964 finished after 66 timesteps. Return = -56.16003277053686
Episode 19965 finished after 78 timesteps. Return = -106.92857520699748
Episode 19966 finished after 112 timesteps. Return = -187.91576630606892
Episode 19967 finished after 96 timesteps. Return = -231.4326548926556
Episode 19968 finished after 75 timesteps. Return = -113.8843573900582
Episode 19969 finished after 121 timesteps. Return = 35.845732952070705
Episode 19970 finished after 92 timesteps. Return = -295.24555112269104
Episode 19971 finished after 70 timesteps. Return = -55.93247799156319
Episode 19972 finished after 98 timesteps. Return = -215.128874534900

Episode 20075 finished after 101 timesteps. Return = -207.63686477914698
Episode 20076 finished after 113 timesteps. Return = -121.90833439591925
Episode 20077 finished after 115 timesteps. Return = -222.55211245274614
Episode 20078 finished after 1000 timesteps. Return = 95.48991272802036
Episode 20079 finished after 120 timesteps. Return = -357.7722321162064
Episode 20080 finished after 85 timesteps. Return = -206.73868079634377
Episode 20081 finished after 81 timesteps. Return = -178.17237698554993
Episode 20082 finished after 101 timesteps. Return = -276.4916637465022
Episode 20083 finished after 81 timesteps. Return = -122.84708825476592
Episode 20084 finished after 120 timesteps. Return = -117.10741015483173
Episode 20085 finished after 100 timesteps. Return = -175.42950493208997
Episode 20086 finished after 68 timesteps. Return = -107.80923629614264
Episode 20087 finished after 104 timesteps. Return = -207.37240435279247
Episode 20088 finished after 71 timesteps. Return = -98.06

Episode 20189 finished after 141 timesteps. Return = -185.0270065546501
Episode 20190 finished after 86 timesteps. Return = -162.2938058940578
Episode 20191 finished after 149 timesteps. Return = -443.5023584070839
Episode 20192 finished after 153 timesteps. Return = -79.47750838247738
Episode 20193 finished after 81 timesteps. Return = -128.24062736127215
Episode 20194 finished after 1000 timesteps. Return = 75.12048957355535
Episode 20195 finished after 66 timesteps. Return = -142.65363556008785
Episode 20196 finished after 87 timesteps. Return = 32.361344471964344
Episode 20197 finished after 79 timesteps. Return = -154.13368732451772
Episode 20198 finished after 85 timesteps. Return = 29.718968318812443
Episode 20199 finished after 78 timesteps. Return = -103.76906638805852
Episode 20200 finished after 126 timesteps. Return = -230.5194412083452
Episode 20201 finished after 69 timesteps. Return = -68.61232036471262
Episode 20202 finished after 106 timesteps. Return = -91.25512677654

Episode 20305 finished after 61 timesteps. Return = -146.84593941913596
Episode 20306 finished after 63 timesteps. Return = -126.57567666411079
Episode 20307 finished after 73 timesteps. Return = -116.42350850330251
Episode 20308 finished after 96 timesteps. Return = -196.07451835870847
Episode 20309 finished after 122 timesteps. Return = -206.12514628223283
Episode 20310 finished after 105 timesteps. Return = -305.27642156041674
Episode 20311 finished after 109 timesteps. Return = -174.2724259924282
Episode 20312 finished after 148 timesteps. Return = -188.29365516073196
Episode 20313 finished after 107 timesteps. Return = -231.32366500139707
Episode 20314 finished after 89 timesteps. Return = -107.09612044274786
Episode 20315 finished after 94 timesteps. Return = -115.95231895730694
Episode 20316 finished after 62 timesteps. Return = -92.01362809814776
Episode 20317 finished after 86 timesteps. Return = -98.10423989543636
Episode 20318 finished after 77 timesteps. Return = -113.14744

Episode 20420 finished after 81 timesteps. Return = -148.08970440099444
Episode 20421 finished after 68 timesteps. Return = -113.81576330472808
Episode 20422 finished after 96 timesteps. Return = -155.32863119339214
Episode 20423 finished after 108 timesteps. Return = -231.13918770351654
Episode 20424 finished after 100 timesteps. Return = -135.5136670772922
Episode 20425 finished after 110 timesteps. Return = -125.15146971446784
Episode 20426 finished after 83 timesteps. Return = 0.0657761557963994
Episode 20427 finished after 78 timesteps. Return = -208.54957353796027
Episode 20428 finished after 132 timesteps. Return = -200.13708278440996
Episode 20429 finished after 74 timesteps. Return = -74.92777901102326
Episode 20430 finished after 90 timesteps. Return = -142.88813443407344
Episode 20431 finished after 109 timesteps. Return = -226.71749330129478
Episode 20432 finished after 83 timesteps. Return = -114.55840475573657
Episode 20433 finished after 104 timesteps. Return = -167.0825

Episode 20537 finished after 126 timesteps. Return = -132.1221668705574
Episode 20538 finished after 73 timesteps. Return = -128.48872369330275
Episode 20539 finished after 94 timesteps. Return = -286.54078202225105
Episode 20540 finished after 119 timesteps. Return = -148.78721674654656
Episode 20541 finished after 122 timesteps. Return = -245.3929514736875
Episode 20542 finished after 128 timesteps. Return = -215.91544828332314
Episode 20543 finished after 81 timesteps. Return = -125.18741257091578
Episode 20544 finished after 84 timesteps. Return = -115.4325556379373
Episode 20545 finished after 108 timesteps. Return = -101.79480017579745
Episode 20546 finished after 146 timesteps. Return = -197.87499699216784
Episode 20547 finished after 102 timesteps. Return = -96.21863285304589
Episode 20548 finished after 69 timesteps. Return = -81.9344348900423
Episode 20549 finished after 60 timesteps. Return = -89.94603750354933
Episode 20550 finished after 97 timesteps. Return = -139.9890346

Episode 20651 finished after 77 timesteps. Return = -85.83602523713135
Episode 20652 finished after 84 timesteps. Return = -81.01925880576118
Episode 20653 finished after 126 timesteps. Return = -171.80298592728798
Episode 20654 finished after 117 timesteps. Return = -183.61637260480518
Episode 20655 finished after 79 timesteps. Return = -202.05592511128287
Episode 20656 finished after 78 timesteps. Return = -82.9409500240682
Episode 20657 finished after 105 timesteps. Return = -303.47229655735316
Episode 20658 finished after 126 timesteps. Return = 21.61753949453238
Episode 20659 finished after 99 timesteps. Return = -140.93458687386146
Episode 20660 finished after 78 timesteps. Return = -111.39582929873188
Episode 20661 finished after 105 timesteps. Return = -173.46940170038732
Episode 20662 finished after 150 timesteps. Return = -125.07805392641893
Episode 20663 finished after 130 timesteps. Return = -296.9022340815056
Episode 20664 finished after 89 timesteps. Return = -127.6962545

Episode 20765 finished after 75 timesteps. Return = -95.53280670649119
Episode 20766 finished after 78 timesteps. Return = -77.5055234293103
Episode 20767 finished after 69 timesteps. Return = -147.2586765678571
Episode 20768 finished after 67 timesteps. Return = -180.77093436186112
Episode 20769 finished after 67 timesteps. Return = -89.96782740055451
Episode 20770 finished after 74 timesteps. Return = -130.6464845524165
Episode 20771 finished after 67 timesteps. Return = -139.8642597850093
Episode 20772 finished after 127 timesteps. Return = -30.373350124356094
Episode 20773 finished after 66 timesteps. Return = -106.90310477714735
Episode 20774 finished after 71 timesteps. Return = -100.425988179429
Episode 20775 finished after 133 timesteps. Return = -129.7541978100516
Episode 20776 finished after 81 timesteps. Return = -110.29751916198319
Episode 20777 finished after 96 timesteps. Return = -113.41414435793817
Episode 20778 finished after 120 timesteps. Return = -189.0056987309959


Episode 20882 finished after 74 timesteps. Return = -82.93337787200721
Episode 20883 finished after 97 timesteps. Return = -93.98733366305045
Episode 20884 finished after 101 timesteps. Return = -47.407726540661
Episode 20885 finished after 125 timesteps. Return = -183.31084112554572
Episode 20886 finished after 108 timesteps. Return = -107.37921829934153
Episode 20887 finished after 105 timesteps. Return = -2.793173364120861
Episode 20888 finished after 70 timesteps. Return = -96.64018067401676
Episode 20889 finished after 75 timesteps. Return = -44.4527462571508
Episode 20890 finished after 91 timesteps. Return = -133.40792502729315
Episode 20891 finished after 135 timesteps. Return = -228.31764455159004
Episode 20892 finished after 122 timesteps. Return = -133.2886595236156
Episode 20893 finished after 93 timesteps. Return = -174.08391792576802
Episode 20894 finished after 85 timesteps. Return = -144.4656855185992
Episode 20895 finished after 85 timesteps. Return = -106.333498186772

Episode 20999 finished after 119 timesteps. Return = -170.3412532239521
Episode 21000 finished after 90 timesteps. Return = -135.7537386349422
Episode 21001 finished after 76 timesteps. Return = -144.22104308977163
Episode 21002 finished after 105 timesteps. Return = -180.89185293210593
Episode 21003 finished after 105 timesteps. Return = -196.2794401102855
Episode 21004 finished after 132 timesteps. Return = -65.14578776968445
Episode 21005 finished after 81 timesteps. Return = -113.8267119469482
Episode 21006 finished after 85 timesteps. Return = -112.75227349896636
Episode 21007 finished after 92 timesteps. Return = -175.67438545899535
Episode 21008 finished after 106 timesteps. Return = -170.14111617910478
Episode 21009 finished after 133 timesteps. Return = -129.46074641587214
Episode 21010 finished after 102 timesteps. Return = -126.65309908548313
Episode 21011 finished after 129 timesteps. Return = -148.61718816986487
Episode 21012 finished after 144 timesteps. Return = -161.833

Episode 21117 finished after 89 timesteps. Return = -127.69498029110869
Episode 21118 finished after 111 timesteps. Return = -109.23249677147787
Episode 21119 finished after 118 timesteps. Return = -119.7566496822514
Episode 21120 finished after 107 timesteps. Return = -234.22745809296103
Episode 21121 finished after 93 timesteps. Return = -269.2420278287445
Episode 21122 finished after 107 timesteps. Return = -156.8217375738409
Episode 21123 finished after 152 timesteps. Return = -12.853244985734094
Episode 21124 finished after 77 timesteps. Return = -223.2207160348528
Episode 21125 finished after 114 timesteps. Return = -149.5707703086279
Episode 21126 finished after 81 timesteps. Return = -159.1193144114337
Episode 21127 finished after 68 timesteps. Return = -110.49666403405197
Episode 21128 finished after 148 timesteps. Return = -13.547474999124418
Episode 21129 finished after 86 timesteps. Return = -122.89407152543038
Episode 21130 finished after 153 timesteps. Return = -224.47372

Episode 21233 finished after 85 timesteps. Return = -87.65808410236777
Episode 21234 finished after 84 timesteps. Return = -170.72877195966885
Episode 21235 finished after 121 timesteps. Return = -99.29416726466495
Episode 21236 finished after 147 timesteps. Return = -0.4638401427022103
Episode 21237 finished after 81 timesteps. Return = -179.73822351561452
Episode 21238 finished after 115 timesteps. Return = -242.65238442107523
Episode 21239 finished after 126 timesteps. Return = -131.10586721750087
Episode 21240 finished after 105 timesteps. Return = -187.0450784381364
Episode 21241 finished after 74 timesteps. Return = -186.92031704473158
Episode 21242 finished after 81 timesteps. Return = -148.7053245215192
Episode 21243 finished after 119 timesteps. Return = -107.46817759592635
Episode 21244 finished after 62 timesteps. Return = -133.80031414736345
Episode 21245 finished after 122 timesteps. Return = -188.64825987741125
Episode 21246 finished after 98 timesteps. Return = -437.5910

Episode 21347 finished after 77 timesteps. Return = -215.26215681986181
Episode 21348 finished after 72 timesteps. Return = -177.1590034627452
Episode 21349 finished after 104 timesteps. Return = -179.05352991949292
Episode 21350 finished after 102 timesteps. Return = -196.85007687885604
Episode 21351 finished after 101 timesteps. Return = -230.44283788688182
Episode 21352 finished after 77 timesteps. Return = -104.00563888703077
Episode 21353 finished after 120 timesteps. Return = -107.50542889661669
Episode 21354 finished after 105 timesteps. Return = -126.6701650661968
Episode 21355 finished after 76 timesteps. Return = -147.47687257922658
Episode 21356 finished after 61 timesteps. Return = -138.85702384937017
Episode 21357 finished after 96 timesteps. Return = -210.04055631868573
Episode 21358 finished after 92 timesteps. Return = -132.5164262632167
Episode 21359 finished after 105 timesteps. Return = -151.5695005221354
Episode 21360 finished after 128 timesteps. Return = -147.6130

Episode 21462 finished after 88 timesteps. Return = -169.63147089415077
Episode 21463 finished after 84 timesteps. Return = -138.32901967167123
Episode 21464 finished after 63 timesteps. Return = -154.49751836035625
Episode 21465 finished after 71 timesteps. Return = -183.36243628582974
Episode 21466 finished after 119 timesteps. Return = -201.59332663113918
Episode 21467 finished after 94 timesteps. Return = -219.05814546914263
Episode 21468 finished after 74 timesteps. Return = -137.5114699268666
Episode 21469 finished after 143 timesteps. Return = -248.4055718192327
Episode 21470 finished after 75 timesteps. Return = -103.55251169885952
Episode 21471 finished after 83 timesteps. Return = -126.82692792477279
Episode 21472 finished after 77 timesteps. Return = -101.53007773457021
Episode 21473 finished after 68 timesteps. Return = -123.69191746551226
Episode 21474 finished after 97 timesteps. Return = -122.85890066433625
Episode 21475 finished after 114 timesteps. Return = -135.097662

Episode 21578 finished after 114 timesteps. Return = -331.40944768575264
Episode 21579 finished after 87 timesteps. Return = -216.01013362623232
Episode 21580 finished after 140 timesteps. Return = -130.01176203379254
Episode 21581 finished after 103 timesteps. Return = -209.8848876007761
Episode 21582 finished after 96 timesteps. Return = -221.83961022107087
Episode 21583 finished after 135 timesteps. Return = -179.4807628400409
Episode 21584 finished after 100 timesteps. Return = -235.53840463875457
Episode 21585 finished after 135 timesteps. Return = -204.70078183944315
Episode 21586 finished after 151 timesteps. Return = -22.574158312403284
Episode 21587 finished after 123 timesteps. Return = -194.54125740073897
Episode 21588 finished after 128 timesteps. Return = -261.4245579577074
Episode 21589 finished after 82 timesteps. Return = -116.65983364770673
Episode 21590 finished after 173 timesteps. Return = -15.663360542248498
Episode 21591 finished after 102 timesteps. Return = -114

Episode 21693 finished after 84 timesteps. Return = -113.79020896582068
Episode 21694 finished after 92 timesteps. Return = -76.54201152176438
Episode 21695 finished after 118 timesteps. Return = -200.65316291522322
Episode 21696 finished after 112 timesteps. Return = -194.27393959604973
Episode 21697 finished after 69 timesteps. Return = -116.01051440461907
Episode 21698 finished after 92 timesteps. Return = -113.16206534706325
Episode 21699 finished after 97 timesteps. Return = -195.02095463464167
Episode 21700 finished after 84 timesteps. Return = -145.88942861234122
Episode 21701 finished after 127 timesteps. Return = -88.54265588790862
Episode 21702 finished after 147 timesteps. Return = -42.63609540103198
Episode 21703 finished after 59 timesteps. Return = -152.42297748628198
Episode 21704 finished after 1000 timesteps. Return = 42.15231474914427
Episode 21705 finished after 1000 timesteps. Return = 103.17609701402321
Episode 21706 finished after 63 timesteps. Return = -142.63542

Episode 21810 finished after 132 timesteps. Return = -162.06067715878984
Episode 21811 finished after 114 timesteps. Return = -95.91164004099524
Episode 21812 finished after 129 timesteps. Return = -192.12558853000291
Episode 21813 finished after 70 timesteps. Return = -113.24135183355145
Episode 21814 finished after 89 timesteps. Return = -121.6361215864266
Episode 21815 finished after 109 timesteps. Return = -98.88734794154865
Episode 21816 finished after 75 timesteps. Return = -203.32539597518644
Episode 21817 finished after 60 timesteps. Return = -130.6162584690629
Episode 21818 finished after 79 timesteps. Return = -142.6035157972251
Episode 21819 finished after 102 timesteps. Return = -190.75474969710442
Episode 21820 finished after 88 timesteps. Return = 15.296662930218261
Episode 21821 finished after 120 timesteps. Return = -153.8920553124641
Episode 21822 finished after 83 timesteps. Return = -110.13249784975577
Episode 21823 finished after 82 timesteps. Return = -90.361518349

Episode 21927 finished after 112 timesteps. Return = -138.13252465905012
Episode 21928 finished after 78 timesteps. Return = -165.60285516413091
Episode 21929 finished after 101 timesteps. Return = -191.05370269271828
Episode 21930 finished after 93 timesteps. Return = -156.2791889277852
Episode 21931 finished after 78 timesteps. Return = -391.0808710369614
Episode 21932 finished after 78 timesteps. Return = -102.36727576408704
Episode 21933 finished after 113 timesteps. Return = -206.06438185596187
Episode 21934 finished after 64 timesteps. Return = -85.6264293936176
Episode 21935 finished after 89 timesteps. Return = -221.3337444740304
Episode 21936 finished after 78 timesteps. Return = -97.64141909175773
Episode 21937 finished after 84 timesteps. Return = -89.50643946833662
Episode 21938 finished after 123 timesteps. Return = -171.84049068943528
Episode 21939 finished after 92 timesteps. Return = -109.94894796147607
Episode 21940 finished after 133 timesteps. Return = -131.413002501

Episode 22042 finished after 1000 timesteps. Return = 32.9495885875028
Episode 22043 finished after 112 timesteps. Return = -235.724639734083
Episode 22044 finished after 124 timesteps. Return = -219.88035288299866
Episode 22045 finished after 123 timesteps. Return = -320.693551260151
Episode 22046 finished after 70 timesteps. Return = -73.60325325068595
Episode 22047 finished after 90 timesteps. Return = -117.52839001979838
Episode 22048 finished after 116 timesteps. Return = -108.42543677952884
Episode 22049 finished after 82 timesteps. Return = -101.40045438426806
Episode 22050 finished after 120 timesteps. Return = -153.13895811422404
Episode 22051 finished after 136 timesteps. Return = -125.84361176683342
Episode 22052 finished after 101 timesteps. Return = -204.58239199731463
Episode 22053 finished after 146 timesteps. Return = -58.70560896304001
Episode 22054 finished after 98 timesteps. Return = -102.07488198149981
Episode 22055 finished after 80 timesteps. Return = -120.278334

Episode 22156 finished after 103 timesteps. Return = -54.66320460409075
Episode 22157 finished after 115 timesteps. Return = 1.0007110824564762
Episode 22158 finished after 94 timesteps. Return = -303.2365714190569
Episode 22159 finished after 112 timesteps. Return = -286.0082710684155
Episode 22160 finished after 130 timesteps. Return = -204.13311049862577
Episode 22161 finished after 77 timesteps. Return = -232.56953060405908
Episode 22162 finished after 114 timesteps. Return = -237.8562702979462
Episode 22163 finished after 102 timesteps. Return = -268.8954288482739
Episode 22164 finished after 71 timesteps. Return = -139.3022420555552
Episode 22165 finished after 70 timesteps. Return = -233.8053229345577
Episode 22166 finished after 119 timesteps. Return = -221.5433558043282
Episode 22167 finished after 112 timesteps. Return = -293.19696952583104
Episode 22168 finished after 131 timesteps. Return = -106.8014543445391
Episode 22169 finished after 81 timesteps. Return = -157.20806279

Episode 22273 finished after 127 timesteps. Return = -232.08939234237712
Episode 22274 finished after 125 timesteps. Return = -103.87797075009237
Episode 22275 finished after 124 timesteps. Return = -224.42195175961498
Episode 22276 finished after 92 timesteps. Return = -355.4038914623701
Episode 22277 finished after 68 timesteps. Return = -61.827763486332756
Episode 22278 finished after 65 timesteps. Return = -104.94289587481292
Episode 22279 finished after 76 timesteps. Return = -96.03670226252595
Episode 22280 finished after 60 timesteps. Return = -125.3012975846798
Episode 22281 finished after 82 timesteps. Return = -97.9942217241936
Episode 22282 finished after 99 timesteps. Return = -180.93614562351377
Episode 22283 finished after 108 timesteps. Return = -9.266007340218579
Episode 22284 finished after 195 timesteps. Return = -32.30568452435696
Episode 22285 finished after 67 timesteps. Return = -95.84443716053939
Episode 22286 finished after 71 timesteps. Return = -139.1413560008

Episode 22390 finished after 105 timesteps. Return = -152.54561714090795
Episode 22391 finished after 145 timesteps. Return = -143.11731633623256
Episode 22392 finished after 67 timesteps. Return = -96.76781952226521
Episode 22393 finished after 129 timesteps. Return = -105.4197352493613
Episode 22394 finished after 146 timesteps. Return = -141.12118162689205
Episode 22395 finished after 98 timesteps. Return = -136.37070380582412
Episode 22396 finished after 100 timesteps. Return = -156.68827016327376
Episode 22397 finished after 99 timesteps. Return = -22.968314737672102
Episode 22398 finished after 110 timesteps. Return = -94.44765503196133
Episode 22399 finished after 91 timesteps. Return = -258.78058725850246
Episode 22400 finished after 62 timesteps. Return = -103.41072140875731
Episode 22401 finished after 141 timesteps. Return = -208.23845679916556
Episode 22402 finished after 70 timesteps. Return = -115.26711366658931
Episode 22403 finished after 63 timesteps. Return = -103.451

Episode 22506 finished after 82 timesteps. Return = -134.7078606542633
Episode 22507 finished after 95 timesteps. Return = -171.64794732549996
Episode 22508 finished after 94 timesteps. Return = -138.52363854366214
Episode 22509 finished after 107 timesteps. Return = -98.2825174095066
Episode 22510 finished after 106 timesteps. Return = -114.39060585102747
Episode 22511 finished after 77 timesteps. Return = -39.335534138581735
Episode 22512 finished after 70 timesteps. Return = -89.88590802426802
Episode 22513 finished after 107 timesteps. Return = -164.7962817534202
Episode 22514 finished after 165 timesteps. Return = -110.70113499136625
Episode 22515 finished after 112 timesteps. Return = -197.83569830836697
Episode 22516 finished after 94 timesteps. Return = -276.22910597128623
Episode 22517 finished after 140 timesteps. Return = -151.93301981534972
Episode 22518 finished after 76 timesteps. Return = -98.01083835074482
Episode 22519 finished after 102 timesteps. Return = -154.138150

Episode 22623 finished after 105 timesteps. Return = -154.0967229529029
Episode 22624 finished after 79 timesteps. Return = -226.93282941714
Episode 22625 finished after 141 timesteps. Return = -136.53958100924277
Episode 22626 finished after 71 timesteps. Return = -130.97895750788058
Episode 22627 finished after 85 timesteps. Return = -134.52995195684633
Episode 22628 finished after 107 timesteps. Return = -194.19540826763392
Episode 22629 finished after 95 timesteps. Return = -226.14953658422195
Episode 22630 finished after 107 timesteps. Return = -85.68345077812124
Episode 22631 finished after 74 timesteps. Return = -105.91441309694622
Episode 22632 finished after 160 timesteps. Return = -247.91418871079458
Episode 22633 finished after 1000 timesteps. Return = 90.58047172135488
Episode 22634 finished after 110 timesteps. Return = -29.690924787923763
Episode 22635 finished after 69 timesteps. Return = -107.25067466192519
Episode 22636 finished after 65 timesteps. Return = -69.8013974

Episode 22737 finished after 138 timesteps. Return = 5.343763492723696
Episode 22738 finished after 116 timesteps. Return = -99.59588296944868
Episode 22739 finished after 63 timesteps. Return = -96.78026808732336
Episode 22740 finished after 68 timesteps. Return = -146.8495760791942
Episode 22741 finished after 111 timesteps. Return = -229.48416649132523
Episode 22742 finished after 120 timesteps. Return = -167.16856646435525
Episode 22743 finished after 106 timesteps. Return = -257.5760982245249
Episode 22744 finished after 110 timesteps. Return = -207.7651232430419
Episode 22745 finished after 76 timesteps. Return = -319.7795380910899
Episode 22746 finished after 108 timesteps. Return = -144.26229751750427
Episode 22747 finished after 100 timesteps. Return = -263.2136702940551
Episode 22748 finished after 96 timesteps. Return = -94.71991058462612
Episode 22749 finished after 76 timesteps. Return = -177.8128411898395
Episode 22750 finished after 67 timesteps. Return = -70.96274871057

Episode 22851 finished after 76 timesteps. Return = -131.42233960031442
Episode 22852 finished after 76 timesteps. Return = -118.03265142386425
Episode 22853 finished after 93 timesteps. Return = -166.9592164473097
Episode 22854 finished after 104 timesteps. Return = -157.31347925326435
Episode 22855 finished after 110 timesteps. Return = -268.9624297097011
Episode 22856 finished after 161 timesteps. Return = -4.4861123649110795
Episode 22857 finished after 116 timesteps. Return = -293.8319499616901
Episode 22858 finished after 69 timesteps. Return = -112.76144908707087
Episode 22859 finished after 130 timesteps. Return = 14.171942895281617
Episode 22860 finished after 109 timesteps. Return = -134.2382096554255
Episode 22861 finished after 117 timesteps. Return = -109.7520623340214
Episode 22862 finished after 109 timesteps. Return = -78.8859773049401
Episode 22863 finished after 90 timesteps. Return = -124.09523296408739
Episode 22864 finished after 136 timesteps. Return = -91.9793692

Episode 22967 finished after 87 timesteps. Return = -202.74105607792916
Episode 22968 finished after 89 timesteps. Return = -206.98256577241028
Episode 22969 finished after 144 timesteps. Return = -192.07821588764799
Episode 22970 finished after 81 timesteps. Return = -141.99897454749302
Episode 22971 finished after 99 timesteps. Return = -165.54439348774508
Episode 22972 finished after 65 timesteps. Return = -81.23520513609117
Episode 22973 finished after 72 timesteps. Return = -147.55348366091548
Episode 22974 finished after 119 timesteps. Return = -169.49748583858738
Episode 22975 finished after 83 timesteps. Return = -159.5049661165121
Episode 22976 finished after 163 timesteps. Return = 12.2867597109597
Episode 22977 finished after 87 timesteps. Return = -171.49082424809959
Episode 22978 finished after 66 timesteps. Return = -139.9660865744067
Episode 22979 finished after 61 timesteps. Return = -123.47855813490963
Episode 22980 finished after 144 timesteps. Return = -118.876189580

Episode 23084 finished after 93 timesteps. Return = -246.85501816300751
Episode 23085 finished after 92 timesteps. Return = -135.94789427080116
Episode 23086 finished after 73 timesteps. Return = -79.71756782069029
Episode 23087 finished after 99 timesteps. Return = -116.04680633228764
Episode 23088 finished after 62 timesteps. Return = -124.2419992246951
Episode 23089 finished after 137 timesteps. Return = -213.06530468472482
Episode 23090 finished after 115 timesteps. Return = 32.22030606620433
Episode 23091 finished after 61 timesteps. Return = -84.45766822420752
Episode 23092 finished after 98 timesteps. Return = -140.7760888132832
Episode 23093 finished after 89 timesteps. Return = -133.42585685330468
Episode 23094 finished after 89 timesteps. Return = -175.84967508790282
Episode 23095 finished after 71 timesteps. Return = -130.9458442337608
Episode 23096 finished after 96 timesteps. Return = -115.2256300126238
Episode 23097 finished after 67 timesteps. Return = -101.8071872586255

Episode 23199 finished after 89 timesteps. Return = -113.40309845397728
Episode 23200 finished after 79 timesteps. Return = -119.41989065741605
Episode 23201 finished after 155 timesteps. Return = -203.76586386826452
Episode 23202 finished after 112 timesteps. Return = -186.21333956526945
Episode 23203 finished after 69 timesteps. Return = -110.3682933381467
Episode 23204 finished after 99 timesteps. Return = -204.17132788569063
Episode 23205 finished after 113 timesteps. Return = -197.41139051074077
Episode 23206 finished after 100 timesteps. Return = -227.6572796540948
Episode 23207 finished after 66 timesteps. Return = -104.13726378909912
Episode 23208 finished after 78 timesteps. Return = -169.14285968142138
Episode 23209 finished after 118 timesteps. Return = -179.57316958270542
Episode 23210 finished after 101 timesteps. Return = -287.8202971388572
Episode 23211 finished after 120 timesteps. Return = -228.39181715967692
Episode 23212 finished after 96 timesteps. Return = -134.178

Episode 23315 finished after 116 timesteps. Return = -163.73829354591652
Episode 23316 finished after 106 timesteps. Return = -210.46678846162996
Episode 23317 finished after 87 timesteps. Return = -113.26577224594567
Episode 23318 finished after 91 timesteps. Return = -287.6311634001185
Episode 23319 finished after 89 timesteps. Return = -147.09118344649147
Episode 23320 finished after 96 timesteps. Return = -159.85462737071452
Episode 23321 finished after 79 timesteps. Return = -115.0672011929843
Episode 23322 finished after 94 timesteps. Return = -196.1282131686815
Episode 23323 finished after 115 timesteps. Return = -149.36550191899173
Episode 23324 finished after 141 timesteps. Return = -176.73202507270673
Episode 23325 finished after 81 timesteps. Return = -106.15033920414487
Episode 23326 finished after 110 timesteps. Return = -95.39707756420292
Episode 23327 finished after 111 timesteps. Return = -187.28748681897576
Episode 23328 finished after 135 timesteps. Return = -304.1481

Episode 23429 finished after 65 timesteps. Return = -87.58110605773432
Episode 23430 finished after 79 timesteps. Return = -204.2226433860772
Episode 23431 finished after 138 timesteps. Return = -155.54595830415383
Episode 23432 finished after 75 timesteps. Return = -148.63119993231277
Episode 23433 finished after 67 timesteps. Return = -139.63368259951497
Episode 23434 finished after 96 timesteps. Return = -137.4931955469006
Episode 23435 finished after 99 timesteps. Return = -156.90659151651863
Episode 23436 finished after 154 timesteps. Return = -139.82610893333694
Episode 23437 finished after 82 timesteps. Return = -49.479884507721714
Episode 23438 finished after 93 timesteps. Return = 13.914042921440867
Episode 23439 finished after 97 timesteps. Return = 24.656655699879153
Episode 23440 finished after 131 timesteps. Return = -375.4884295573595
Episode 23441 finished after 1000 timesteps. Return = 75.68298097486701
Episode 23442 finished after 96 timesteps. Return = -271.8549449925

Episode 23546 finished after 128 timesteps. Return = -155.89171154287894
Episode 23547 finished after 90 timesteps. Return = -176.00462199238729
Episode 23548 finished after 63 timesteps. Return = -138.78913582217464
Episode 23549 finished after 89 timesteps. Return = -227.96122825681795
Episode 23550 finished after 77 timesteps. Return = -176.88234006506408
Episode 23551 finished after 71 timesteps. Return = 53.90190343319696
Episode 23552 finished after 121 timesteps. Return = -152.12494069357717
Episode 23553 finished after 91 timesteps. Return = -176.23725661510156
Episode 23554 finished after 78 timesteps. Return = -134.3900898945819
Episode 23555 finished after 99 timesteps. Return = -202.440151802944
Episode 23556 finished after 122 timesteps. Return = -223.79873588974576
Episode 23557 finished after 100 timesteps. Return = 38.545259822056465
Episode 23558 finished after 98 timesteps. Return = -90.04004330848377
Episode 23559 finished after 81 timesteps. Return = -207.9918344070

Episode 23661 finished after 139 timesteps. Return = -62.066957250225045
Episode 23662 finished after 144 timesteps. Return = -103.47640783163833
Episode 23663 finished after 106 timesteps. Return = -126.4945026558512
Episode 23664 finished after 61 timesteps. Return = -121.21746618656707
Episode 23665 finished after 75 timesteps. Return = -152.93969830929873
Episode 23666 finished after 77 timesteps. Return = -190.89451517925997
Episode 23667 finished after 99 timesteps. Return = -94.29528189416331
Episode 23668 finished after 112 timesteps. Return = -11.465418201574863
Episode 23669 finished after 97 timesteps. Return = -125.4856266904998
Episode 23670 finished after 79 timesteps. Return = -256.5926662534934
Episode 23671 finished after 120 timesteps. Return = -202.46518958524553
Episode 23672 finished after 72 timesteps. Return = -133.44442288156097
Episode 23673 finished after 133 timesteps. Return = -102.79253088180431
Episode 23674 finished after 85 timesteps. Return = -103.21911

Episode 23776 finished after 143 timesteps. Return = -447.50040509373844
Episode 23777 finished after 122 timesteps. Return = -174.32365606889107
Episode 23778 finished after 82 timesteps. Return = -213.02370507234673
Episode 23779 finished after 113 timesteps. Return = -239.64919512845623
Episode 23780 finished after 113 timesteps. Return = -254.54557242905625
Episode 23781 finished after 96 timesteps. Return = -180.58637456571887
Episode 23782 finished after 171 timesteps. Return = -76.64108029456709
Episode 23783 finished after 77 timesteps. Return = -166.51444278337647
Episode 23784 finished after 129 timesteps. Return = -78.695966634577
Episode 23785 finished after 72 timesteps. Return = -194.98942344217318
Episode 23786 finished after 78 timesteps. Return = -129.4058180647462
Episode 23787 finished after 97 timesteps. Return = -154.34602733816413
Episode 23788 finished after 101 timesteps. Return = -157.71892346763633
Episode 23789 finished after 91 timesteps. Return = -155.59686

Episode 23893 finished after 93 timesteps. Return = -124.77875374833249
Episode 23894 finished after 102 timesteps. Return = -127.88842894988433
Episode 23895 finished after 68 timesteps. Return = -117.54764338774405
Episode 23896 finished after 74 timesteps. Return = -106.4398274923461
Episode 23897 finished after 74 timesteps. Return = -115.0764704866112
Episode 23898 finished after 122 timesteps. Return = -145.51366588611344
Episode 23899 finished after 116 timesteps. Return = -103.4475820428831
Episode 23900 finished after 157 timesteps. Return = -164.12742633955196
Episode 23901 finished after 80 timesteps. Return = -141.03251625936474
Episode 23902 finished after 104 timesteps. Return = -132.06668752898304
Episode 23903 finished after 84 timesteps. Return = -102.79768123928352
Episode 23904 finished after 69 timesteps. Return = -101.01939634103357
Episode 23905 finished after 101 timesteps. Return = -184.9909436171321
Episode 23906 finished after 84 timesteps. Return = -153.56142

Episode 24010 finished after 127 timesteps. Return = -100.8028519709899
Episode 24011 finished after 135 timesteps. Return = -179.696399249332
Episode 24012 finished after 66 timesteps. Return = -113.63541610523764
Episode 24013 finished after 132 timesteps. Return = -143.8655481784349
Episode 24014 finished after 90 timesteps. Return = -159.68014945366113
Episode 24015 finished after 92 timesteps. Return = -161.89769549386506
Episode 24016 finished after 146 timesteps. Return = -225.5582674840366
Episode 24017 finished after 144 timesteps. Return = -110.68159708835016
Episode 24018 finished after 109 timesteps. Return = -119.23654301703691
Episode 24019 finished after 99 timesteps. Return = -114.57399188145331
Episode 24020 finished after 139 timesteps. Return = -240.38038957242955
Episode 24021 finished after 67 timesteps. Return = -147.35577344143817
Episode 24022 finished after 113 timesteps. Return = -218.31904353152004
Episode 24023 finished after 86 timesteps. Return = -295.7386

Episode 24125 finished after 123 timesteps. Return = -137.48396538688718
Episode 24126 finished after 115 timesteps. Return = -284.8169368886141
Episode 24127 finished after 66 timesteps. Return = -79.42534408761625
Episode 24128 finished after 74 timesteps. Return = -79.37902125773572
Episode 24129 finished after 75 timesteps. Return = -74.79037187653654
Episode 24130 finished after 115 timesteps. Return = -236.46954062618178
Episode 24131 finished after 90 timesteps. Return = -129.8188521239465
Episode 24132 finished after 72 timesteps. Return = -125.29331076370921
Episode 24133 finished after 78 timesteps. Return = -90.90749759914632
Episode 24134 finished after 130 timesteps. Return = -134.08967824020857
Episode 24135 finished after 108 timesteps. Return = -229.9086834265292
Episode 24136 finished after 124 timesteps. Return = -97.27088338644819
Episode 24137 finished after 136 timesteps. Return = -221.07293306996493
Episode 24138 finished after 144 timesteps. Return = -258.0453514

Episode 24242 finished after 72 timesteps. Return = -94.31341203609821
Episode 24243 finished after 90 timesteps. Return = -78.77590788757715
Episode 24244 finished after 107 timesteps. Return = -127.970053453269
Episode 24245 finished after 102 timesteps. Return = -168.72838419041403
Episode 24246 finished after 98 timesteps. Return = -150.60268289148144
Episode 24247 finished after 82 timesteps. Return = -155.19835795501507
Episode 24248 finished after 85 timesteps. Return = -111.4409563771056
Episode 24249 finished after 130 timesteps. Return = -247.69142891905528
Episode 24250 finished after 91 timesteps. Return = -142.9159795684019
Episode 24251 finished after 71 timesteps. Return = -91.60265216247363
Episode 24252 finished after 106 timesteps. Return = -136.309578378039
Episode 24253 finished after 127 timesteps. Return = -178.90734570345685
Episode 24254 finished after 158 timesteps. Return = 5.185329260146062
Episode 24255 finished after 126 timesteps. Return = -140.90097508080

Episode 24356 finished after 93 timesteps. Return = -145.69669522630986
Episode 24357 finished after 91 timesteps. Return = -86.98252313805585
Episode 24358 finished after 112 timesteps. Return = -185.49629311999493
Episode 24359 finished after 124 timesteps. Return = -163.42548902886892
Episode 24360 finished after 88 timesteps. Return = -100.79123988787535
Episode 24361 finished after 149 timesteps. Return = -136.69272761476105
Episode 24362 finished after 107 timesteps. Return = -173.82345129850384
Episode 24363 finished after 97 timesteps. Return = -205.44140654507675
Episode 24364 finished after 79 timesteps. Return = -80.11085839858902
Episode 24365 finished after 73 timesteps. Return = -127.66061257557686
Episode 24366 finished after 156 timesteps. Return = -233.51862838719506
Episode 24367 finished after 134 timesteps. Return = -165.64042284384828
Episode 24368 finished after 139 timesteps. Return = -96.25791407193434
Episode 24369 finished after 117 timesteps. Return = -238.77

Episode 24473 finished after 128 timesteps. Return = -225.63155127245852
Episode 24474 finished after 146 timesteps. Return = -146.33942460600136
Episode 24475 finished after 130 timesteps. Return = -284.28375842880985
Episode 24476 finished after 66 timesteps. Return = -79.88597890902858
Episode 24477 finished after 100 timesteps. Return = 11.09743882306266
Episode 24478 finished after 167 timesteps. Return = -172.32272749390916
Episode 24479 finished after 87 timesteps. Return = -105.90146214262947
Episode 24480 finished after 160 timesteps. Return = -239.28485657404653
Episode 24481 finished after 73 timesteps. Return = -175.59337143665277
Episode 24482 finished after 116 timesteps. Return = -193.4944667615648
Episode 24483 finished after 90 timesteps. Return = -56.97420811697872
Episode 24484 finished after 62 timesteps. Return = -80.9484264556223
Episode 24485 finished after 88 timesteps. Return = -61.26881458694463
Episode 24486 finished after 95 timesteps. Return = -60.537361729

Episode 24587 finished after 142 timesteps. Return = -100.79615192861152
Episode 24588 finished after 157 timesteps. Return = -151.65173209536658
Episode 24589 finished after 73 timesteps. Return = -98.51761026209726
Episode 24590 finished after 83 timesteps. Return = -195.7599008590986
Episode 24591 finished after 140 timesteps. Return = -128.62890449963376
Episode 24592 finished after 126 timesteps. Return = -55.40591553968147
Episode 24593 finished after 151 timesteps. Return = -137.2342140597923
Episode 24594 finished after 142 timesteps. Return = -364.48208941848395
Episode 24595 finished after 90 timesteps. Return = -449.97328770001286
Episode 24596 finished after 167 timesteps. Return = -10.951242778094098
Episode 24597 finished after 97 timesteps. Return = -85.03564972091645
Episode 24598 finished after 112 timesteps. Return = -193.06856458845684
Episode 24599 finished after 79 timesteps. Return = -99.81128356630146
Episode 24600 finished after 90 timesteps. Return = -50.366499

Episode 24701 finished after 84 timesteps. Return = -170.85919270860205
Episode 24702 finished after 147 timesteps. Return = -260.23927440377497
Episode 24703 finished after 119 timesteps. Return = -202.7896965486384
Episode 24704 finished after 80 timesteps. Return = -112.77812007641238
Episode 24705 finished after 150 timesteps. Return = -257.6340523355283
Episode 24706 finished after 122 timesteps. Return = -279.7739685414659
Episode 24707 finished after 71 timesteps. Return = -107.17318419978356
Episode 24708 finished after 92 timesteps. Return = -143.49335720912353
Episode 24709 finished after 97 timesteps. Return = -133.10673409730154
Episode 24710 finished after 90 timesteps. Return = -124.52050688180292
Episode 24711 finished after 63 timesteps. Return = -149.58946973558636
Episode 24712 finished after 138 timesteps. Return = -133.31332525345925
Episode 24713 finished after 79 timesteps. Return = -76.88381228549272
Episode 24714 finished after 63 timesteps. Return = -163.295076

Episode 24817 finished after 117 timesteps. Return = -208.6275420497764
Episode 24818 finished after 102 timesteps. Return = -201.3642601191608
Episode 24819 finished after 152 timesteps. Return = -25.917502539086257
Episode 24820 finished after 119 timesteps. Return = -126.88992955018682
Episode 24821 finished after 58 timesteps. Return = -104.9022381785543
Episode 24822 finished after 140 timesteps. Return = -83.55951026824702
Episode 24823 finished after 64 timesteps. Return = -213.4009519722864
Episode 24824 finished after 105 timesteps. Return = -29.274780257489297
Episode 24825 finished after 105 timesteps. Return = -252.8914813087092
Episode 24826 finished after 67 timesteps. Return = -59.78020982384633
Episode 24827 finished after 69 timesteps. Return = -162.56579419974602
Episode 24828 finished after 117 timesteps. Return = -206.1361334387757
Episode 24829 finished after 85 timesteps. Return = -119.49276483827435
Episode 24830 finished after 119 timesteps. Return = -1.53446929

Episode 24932 finished after 138 timesteps. Return = -183.9966372842579
Episode 24933 finished after 63 timesteps. Return = -112.18668580432565
Episode 24934 finished after 111 timesteps. Return = -174.02582971842236
Episode 24935 finished after 118 timesteps. Return = -194.1753943190725
Episode 24936 finished after 127 timesteps. Return = -185.96492038727604
Episode 24937 finished after 72 timesteps. Return = -161.86496156134643
Episode 24938 finished after 69 timesteps. Return = -102.85999824641064
Episode 24939 finished after 108 timesteps. Return = -328.94651050224445
Episode 24940 finished after 108 timesteps. Return = -230.56827924231675
Episode 24941 finished after 85 timesteps. Return = -120.354234916951
Episode 24942 finished after 108 timesteps. Return = -417.67196851369073
Episode 24943 finished after 212 timesteps. Return = -151.1406420659208
Episode 24944 finished after 74 timesteps. Return = -90.71161139906124
Episode 24945 finished after 129 timesteps. Return = -156.0861

Episode 25048 finished after 121 timesteps. Return = -225.86071580151935
Episode 25049 finished after 81 timesteps. Return = -214.93618811999045
Episode 25050 finished after 95 timesteps. Return = -60.343845718464564
Episode 25051 finished after 103 timesteps. Return = -122.10020260348352
Episode 25052 finished after 114 timesteps. Return = -150.54138842170175
Episode 25053 finished after 90 timesteps. Return = -112.82797301117262
Episode 25054 finished after 152 timesteps. Return = -132.54407053866353
Episode 25055 finished after 61 timesteps. Return = -116.91651151413583
Episode 25056 finished after 103 timesteps. Return = -201.6319795707936
Episode 25057 finished after 71 timesteps. Return = -135.37230548104003
Episode 25058 finished after 83 timesteps. Return = -127.189664572581
Episode 25059 finished after 72 timesteps. Return = -88.16450450061637
Episode 25060 finished after 88 timesteps. Return = -101.80057043965002
Episode 25061 finished after 75 timesteps. Return = -156.566808

KeyboardInterrupt: 

In [165]:
#torch.save(agent.state_dict(), 'checkpoints/lunar_lander_64x64_checkpoint_0.pt')
#agent.load_state_dict(torch.load('checkpoints/lunar_lander_32x32_checkpoint_0.pt'))

In [74]:
visualise_agent(greedy_policy, command=[150, 400], n=5)

Episode 0 finished after 156 timesteps. Return = -241.49785131822478
Episode 1 finished after 297 timesteps. Return = -358.99471816394623
Episode 2 finished after 211 timesteps. Return = -314.8731776425535
Episode 3 finished after 102 timesteps. Return = -224.18053479219859
Episode 4 finished after 412 timesteps. Return = -387.7649308825705


In [73]:
visualise_agent(stochastic_policy, command=[150, 400], n=5)

Episode 0 finished after 101 timesteps. Return = -131.8027246567254


In [25]:
print([mem['return'] for mem in replay_buffer])

[-63.70254888830934, -63.638939025122625, -63.5994898557033, -63.49465027645706, -63.46673824051337, -63.36871712910339, -63.3170788909209, -63.268455814456246, -63.26158837333688, -63.22068659038845, -63.21192519962132, -63.159962894358316, -63.035593454982134, -63.035425263206434, -62.97096940697155, -62.943593599875086, -62.92596251667162, -62.91281822394709, -62.90096774806254, -62.89021254680336, -62.88432112394105, -62.833528924538854, -62.77959557612017, -62.667310065127865, -62.66679486512233, -62.629278971083366, -62.61750379938199, -62.575461411676095, -62.47476364144245, -62.2956868138306, -62.28361089060018, -62.21614293189815, -62.211379210167564, -62.19752032429969, -62.17571584274293, -62.118940774335755, -62.08795198808494, -61.997331174933244, -61.92102486534531, -61.8701775567973, -61.79702597554188, -61.78349969009108, -61.761231874595126, -61.75852742540508, -61.71683109542317, -61.6446895161059, -61.506866501602374, -61.489387372573006, -61.48727633614841, -61.4803

# Previous Code

In [None]:
def train_net(policy_net, replay_buffer, n_updates=100, batch_size=64):
    all_costs = []
    for i in range(n_updates):
        batch_input = np.zeros((batch_size, np.prod(env.observation_space.shape)+2))
        batch_label = np.zeros((batch_size))
        for b in range(batch_size):
            sample_episode = np.random.randint(0, len(replay_buffer))
            sample_horizon = np.random.randint(1, len(replay_buffer[sample_episode]['observation'])+1)
            sample_mem_idx = np.random.randint(0, len(replay_buffer[sample_episode]['observation'])+1-sample_horizon)
            sample_mem = replay_buffer[sample_episode]['observation'][sample_mem_idx]
            sample_desired_reward = sum(replay_buffer[sample_episode]['reward'][sample_mem_idx:sample_mem_idx+sample_horizon])
            network_input = np.append(sample_mem, [sample_desired_reward, sample_horizon])
            label = replay_buffer[sample_episode]['action'][sample_mem_idx]
            batch_input[b] = network_input
            batch_label[b] = label
        batch_input = torch.tensor(batch_input).double()
        batch_label = torch.tensor(batch_label).long()
        pred = policy_net(batch_input)
        cost = F.cross_entropy(pred, batch_label)
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
    return np.mean(all_costs)

In [8]:
def train_net(policy_net, episode_mem, n_samples = 5): #stochastic gradient descent
    all_costs = []
    for i in range(n_samples):
        sample_horizon = np.random.randint(1, len(episode_mem['observation'])+1)
        sample_mem_idx = np.random.randint(0, len(episode_mem['observation'])+1-sample_horizon)
        sample_mem = episode_mem['observation'][sample_mem_idx]
        sample_desired_reward = sum(episode_mem['reward'][sample_mem_idx:sample_mem_idx+sample_horizon])
        network_input = torch.tensor(np.append(sample_mem, [sample_desired_reward, sample_horizon])).double()
        label = torch.tensor([episode_mem['action'][sample_mem_idx]]).double()
        
        pred = policy_net(network_input)
        cost = F.binary_cross_entropy(pred, label)
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
    return np.mean(all_costs)
    

In [33]:
def train(policy_net, n_episodes=100):
    global i_episode
    global epsilon
    try:
        for _ in range(n_episodes):
            observation = env.reset()
            episode_mem = {'observation':[],
                            'action':[],
                            'reward':[],
                            'done':[]}
            done=False
            while not done:
                network_input = torch.tensor(np.append(observation, [desired_reward, command_horizon])).double()
                action_prob = policy_net(network_input)
                action = np.random.binomial(1, action_prob.item())
                #action = int(action_prob.item()>0.5)
                if np.random.rand()<epsilon: action = np.random.randint(0, 2)
                new_observation, reward, done, info = env.step(action)
                
                episode_mem['observation'].append(observation)
                episode_mem['action'].append(action)
                episode_mem['reward'].append(reward)
                episode_mem['done'].append(done)
                
                observation=new_observation
                epsilon*=0.999
            episode_mem['return']=sum(episode_mem['reward'])
            episode_mem['episode_len']=len(episode_mem['observation'])
            mean_cost = train_net(policy_net, episode_mem)
            
            i_episode+=1
            print("Episode {} finished after {} timesteps. Epsilon={} Mean Cost={}".format(i_episode, len(episode_mem['observation']), epsilon, mean_cost))
        env.close()
    except KeyboardInterrupt:
        env.close()

## Previous version

In [55]:
import gym
import time
import torch
import numpy as np
from copy import deepcopy
import torch.nn.functional as F

def random_policy(obs):
    return np.random.randint(env.action_space.n)

#Visualise agent function
def visualise_agent(policy, command, command_scale, n=5):
    try:
        for trial_i in range(n):
            current_command = deepcopy(command)
            observation = env.reset()
            done=False
            t=0
            episode_return=0
            while not done:
                env.render()
                network_input = torch.tensor(np.append(observation, np.array(current_command)*command_scale)).double()
                policy_action = policy(network_input)
                observation, reward, done, info = env.step(policy_action)
                episode_return+=reward
                #time.sleep(0.1)
                current_command[0]-= reward
                current_command[1] = max(1, current_command[1]-1)
                
                t+=1
            env.render()
            time.sleep(1.5)
            print("Episode {} finished after {} timesteps. Return = {}".format(trial_i, t, episode_return))
        env.close()
    except KeyboardInterrupt:
        env.close()
        
#Behaviour function - Neural Network
class FCNN_AGENT(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(np.prod(env.observation_space.shape)+2, 10),
            torch.nn.ReLU(),
            torch.nn.Linear(10, 10),
            torch.nn.ReLU(),
            torch.nn.Linear(10, env.action_space.n)
        )
    
    def forward(self, x):
        x = self.fc(x)
        return x
    
    def create_optimizer(self, lr):
        self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)

#Full the replay buffer with more experience
def collect_experience(policy, replay_buffer, replay_size, last_few, n_episodes=100, command_scale=0.01, log_to_tensorboard=True):
    global i_episode
    init_replay_buffer = deepcopy(replay_buffer)
    try:
        for _ in range(n_episodes):
            command = sample_command(init_replay_buffer, last_few)
            if log_to_tensorboard: writer.add_scalar('Command desired reward/Episode', command[0], i_episode)    # write loss to a graph
            if log_to_tensorboard: writer.add_scalar('Command horizon/Episode', command[1], i_episode)    # write loss to a graph
            observation = env.reset()
            episode_mem = {'observation':[],
                            'action':[],
                            'reward':[]}
            done=False
            while not done:
                network_input = torch.tensor(np.append(observation, np.array(command)*command_scale)).double()
                action = policy(network_input)
                new_observation, reward, done, info = env.step(action)
                
                episode_mem['observation'].append(observation)
                episode_mem['action'].append(action)
                episode_mem['reward'].append(reward)
                
                observation=new_observation
                #command[0]-= reward
                command[0] = max(1, command[0]-reward)
                command[1] = max(1, command[1]-1)
            episode_mem['return']=sum(episode_mem['reward'])
            episode_mem['episode_len']=len(episode_mem['observation'])
            replay_buffer.append(episode_mem)
            i_episode+=1
            if log_to_tensorboard: writer.add_scalar('Return/Episode', sum(episode_mem['reward']), i_episode)    # write loss to a graph
            print("Episode {} finished after {} timesteps. Return = {}".format(i_episode, len(episode_mem['observation']), sum(episode_mem['reward'])))
        env.close()
    except KeyboardInterrupt:
        env.close()
    replay_buffer = sorted(replay_buffer, key=lambda x:x['return'])[-replay_size:]
    return replay_buffer

#Sample exploratory command
def sample_command(replay_buffer, last_few):
    if len(replay_buffer)==0:
        return [1, 1]
    else:
        command_samples = replay_buffer[-last_few:]
        lengths = [mem['episode_len'] for mem in command_samples]
        returns = [mem['return'] for mem in command_samples]
        mean_return, std_return = np.mean(returns), np.std(returns)
        command_horizon = np.mean(lengths)
        desired_reward = np.random.uniform(mean_return, mean_return+std_return)
        return [desired_reward, command_horizon]

#Improve behviour function by training on replay buffer
def train_net(policy_net, replay_buffer, n_updates=100, batch_size=64, command_scale=0.01):
    global i_updates
    all_costs = []
    for i in range(n_updates):
        batch_input = np.zeros((batch_size, np.prod(env.observation_space.shape)+2))
        batch_label = np.zeros((batch_size))
        for b in range(batch_size):
            sample_episode = np.random.randint(0, len(replay_buffer))
            sample_t1 = np.random.randint(0, len(replay_buffer[sample_episode]['observation']))
            sample_t2 = len(replay_buffer[sample_episode]['observation'])
            sample_horizon = (sample_t2-sample_t1) * command_scale
            sample_mem = replay_buffer[sample_episode]['observation'][sample_t1]
            sample_desired_reward = sum(replay_buffer[sample_episode]['reward'][sample_t1:sample_t2])*command_scale
            network_input = np.append(sample_mem, [sample_desired_reward, sample_horizon])
            label = replay_buffer[sample_episode]['action'][sample_t1]
            batch_input[b] = network_input
            batch_label[b] = label
        batch_input = torch.tensor(batch_input).double()
        batch_label = torch.tensor(batch_label).long()
        pred = policy_net(batch_input)
        cost = F.cross_entropy(pred, batch_label)
        if log_to_tensorboard: writer.add_scalar('Cost/NN update', cost.item() , i_updates)    # write loss to a graph
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
        i_updates+=1
    return np.mean(all_costs)

#Return a greedy policy from a given network
def create_greedy_policy(policy_network):
    def policy(obs):
        action_logits = policy_network(obs)
        action = np.argmax(action_logits.detach().numpy())
        return action
    return policy

#Return a stochastic policy from a given network
def create_stochastic_policy(policy_network):
    def policy(obs):
        action_logits = policy_network(obs)
        action_probs = F.softmax(action_logits, dim=-1)
        action = torch.distributions.Categorical(action_probs).sample().item()
        return action
    return policy

In [56]:
#Define hyperparameters, initialize behaviour function
env = gym.make('LunarLander-v2')
i_episode=0
i_updates=0
replay_buffer = []
log_to_tensorboard = True 

replay_size = 600
last_few = 75
batch_size = 32
n_warm_up_episodes = 50
n_episodes_per_iter = 50
n_updates_per_iter = 300
command_scale = 0.01
lr = 0.001

agent = FCNN_AGENT().double()
agent.create_optimizer(lr)

stochastic_policy = create_stochastic_policy(agent)
greedy_policy = create_greedy_policy(agent)

# SET UP TRAINING VISUALISATION
if log_to_tensorboard: from torch.utils.tensorboard import SummaryWriter
if log_to_tensorboard: writer = SummaryWriter() # we will use this to show our models performance on a graph using tensorboard

#Collect warm up episodes
replay_buffer = collect_experience(random_policy, replay_buffer, replay_size, last_few, n_warm_up_episodes, command_scale, log_to_tensorboard)
train_net(agent, replay_buffer, n_updates_per_iter, batch_size, command_scale)

Episode 1 finished after 78 timesteps. Return = -128.4480682875527
Episode 2 finished after 89 timesteps. Return = -387.88715583374216
Episode 3 finished after 77 timesteps. Return = -95.10879230530807
Episode 4 finished after 72 timesteps. Return = -102.74438279096503
Episode 5 finished after 116 timesteps. Return = -253.90123618001869
Episode 6 finished after 107 timesteps. Return = -178.8621093404991
Episode 7 finished after 110 timesteps. Return = -424.2903105389424
Episode 8 finished after 69 timesteps. Return = -90.45928541886127
Episode 9 finished after 85 timesteps. Return = -90.34062619706825
Episode 10 finished after 110 timesteps. Return = -257.37052024828347
Episode 11 finished after 95 timesteps. Return = -171.70578091801366
Episode 12 finished after 65 timesteps. Return = -264.4237434177501
Episode 13 finished after 128 timesteps. Return = -146.38984817020642
Episode 14 finished after 73 timesteps. Return = -95.87199679628412
Episode 15 finished after 67 timesteps. Return

1.3918637607680855

In [57]:
#Collect experience and train behaviour function for given number of iterations
n_iters = 1000
for i in range(n_iters):
    replay_buffer = collect_experience(stochastic_policy, replay_buffer, replay_size, last_few, n_episodes_per_iter, command_scale, log_to_tensorboard)
    train_net(agent, replay_buffer, n_updates_per_iter, batch_size, command_scale)

Episode 51 finished after 137 timesteps. Return = -446.3506995863737
Episode 52 finished after 77 timesteps. Return = -140.5196864731071
Episode 53 finished after 70 timesteps. Return = -85.56728471718694
Episode 54 finished after 71 timesteps. Return = -94.78256978743447
Episode 55 finished after 114 timesteps. Return = -65.1801625049948
Episode 56 finished after 71 timesteps. Return = -315.67881711784116
Episode 57 finished after 120 timesteps. Return = -67.65283374333278
Episode 58 finished after 92 timesteps. Return = -413.1242067143709
Episode 59 finished after 96 timesteps. Return = -134.2708548742756
Episode 60 finished after 96 timesteps. Return = -115.75873117970687
Episode 61 finished after 88 timesteps. Return = -127.17007197006436
Episode 62 finished after 112 timesteps. Return = -301.4526746821167
Episode 63 finished after 113 timesteps. Return = -211.53988109680003
Episode 64 finished after 68 timesteps. Return = -119.3757190542396
Episode 65 finished after 93 timesteps. 

Episode 174 finished after 69 timesteps. Return = -104.16899601506384
Episode 175 finished after 84 timesteps. Return = -96.07286939569624
Episode 176 finished after 131 timesteps. Return = -139.4199800131397
Episode 177 finished after 74 timesteps. Return = -91.46291264096092
Episode 178 finished after 115 timesteps. Return = -90.51635088079755
Episode 179 finished after 98 timesteps. Return = -292.12224853522514
Episode 180 finished after 79 timesteps. Return = -143.27721094359757
Episode 181 finished after 63 timesteps. Return = -124.11629519864661
Episode 182 finished after 84 timesteps. Return = -133.22669053051544
Episode 183 finished after 95 timesteps. Return = -171.2783776465923
Episode 184 finished after 96 timesteps. Return = -93.28849817463086
Episode 185 finished after 92 timesteps. Return = -114.1557484370562
Episode 186 finished after 62 timesteps. Return = -102.55848158584956
Episode 187 finished after 121 timesteps. Return = -151.4838767167584
Episode 188 finished afte

Episode 293 finished after 98 timesteps. Return = -126.0177373450193
Episode 294 finished after 85 timesteps. Return = -41.33706913169587
Episode 295 finished after 110 timesteps. Return = -104.89408300251816
Episode 296 finished after 122 timesteps. Return = -122.06720661409226
Episode 297 finished after 67 timesteps. Return = -93.27479221165001
Episode 298 finished after 92 timesteps. Return = -111.51403266195848
Episode 299 finished after 96 timesteps. Return = -278.2307061737931
Episode 300 finished after 59 timesteps. Return = -68.63417569381657
Episode 301 finished after 113 timesteps. Return = -174.5064029495523
Episode 302 finished after 108 timesteps. Return = -138.43405525667737
Episode 303 finished after 87 timesteps. Return = -364.50838279260915
Episode 304 finished after 122 timesteps. Return = -90.0417980348177
Episode 305 finished after 63 timesteps. Return = -74.82944791207059
Episode 306 finished after 78 timesteps. Return = 12.349499040470633
Episode 307 finished afte

Episode 412 finished after 107 timesteps. Return = -190.50868161911495
Episode 413 finished after 89 timesteps. Return = -91.3439183969028
Episode 414 finished after 65 timesteps. Return = -95.84527173026802
Episode 415 finished after 106 timesteps. Return = -191.69772948261556
Episode 416 finished after 64 timesteps. Return = -120.5916127759939
Episode 417 finished after 69 timesteps. Return = -99.47554883556316
Episode 418 finished after 69 timesteps. Return = -138.91836274515907
Episode 419 finished after 64 timesteps. Return = -117.55656377805636
Episode 420 finished after 112 timesteps. Return = -100.63873253717723
Episode 421 finished after 73 timesteps. Return = -171.74789328967256
Episode 422 finished after 88 timesteps. Return = -80.93309778150977
Episode 423 finished after 95 timesteps. Return = -227.75923425296233
Episode 424 finished after 72 timesteps. Return = -122.62037874926172
Episode 425 finished after 61 timesteps. Return = -145.92015232939673
Episode 426 finished af

Episode 531 finished after 61 timesteps. Return = -246.37178230884416
Episode 532 finished after 69 timesteps. Return = -184.35148436013293
Episode 533 finished after 88 timesteps. Return = -162.70473431205681
Episode 534 finished after 71 timesteps. Return = -110.32276938173662
Episode 535 finished after 89 timesteps. Return = -191.55526324541177
Episode 536 finished after 88 timesteps. Return = -137.48470208927156
Episode 537 finished after 104 timesteps. Return = -347.35429462174017
Episode 538 finished after 83 timesteps. Return = -128.69897913425598
Episode 539 finished after 101 timesteps. Return = -321.2059091532176
Episode 540 finished after 112 timesteps. Return = -138.7140622015092
Episode 541 finished after 100 timesteps. Return = -186.51693224204348
Episode 542 finished after 60 timesteps. Return = -83.74087998108158
Episode 543 finished after 91 timesteps. Return = -271.50812190237775
Episode 544 finished after 92 timesteps. Return = -82.63524718879307
Episode 545 finished

Episode 651 finished after 89 timesteps. Return = -228.12220614249142
Episode 652 finished after 100 timesteps. Return = -185.45600648118116
Episode 653 finished after 76 timesteps. Return = -143.29087653051056
Episode 654 finished after 118 timesteps. Return = -102.72088375033732
Episode 655 finished after 147 timesteps. Return = -174.84816168079723
Episode 656 finished after 87 timesteps. Return = -210.20080940540765
Episode 657 finished after 103 timesteps. Return = -247.27492503958965
Episode 658 finished after 65 timesteps. Return = -173.65910815190708
Episode 659 finished after 69 timesteps. Return = -136.58032105500303
Episode 660 finished after 61 timesteps. Return = -48.947477507433305
Episode 661 finished after 97 timesteps. Return = -320.6362398486302
Episode 662 finished after 82 timesteps. Return = -94.23389179871761
Episode 663 finished after 115 timesteps. Return = -143.3302872722158
Episode 664 finished after 63 timesteps. Return = -110.19392325927913
Episode 665 finish

Episode 770 finished after 93 timesteps. Return = -361.8565803384485
Episode 771 finished after 108 timesteps. Return = -135.48566836589796
Episode 772 finished after 88 timesteps. Return = -250.3636633763952
Episode 773 finished after 91 timesteps. Return = -135.93981507724035
Episode 774 finished after 83 timesteps. Return = -114.945171888071
Episode 775 finished after 97 timesteps. Return = -268.6064181853361
Episode 776 finished after 86 timesteps. Return = -112.61470449103413
Episode 777 finished after 76 timesteps. Return = -134.16225054043275
Episode 778 finished after 73 timesteps. Return = -159.8578804635983
Episode 779 finished after 115 timesteps. Return = -270.3042330619008
Episode 780 finished after 75 timesteps. Return = -90.15903085804347
Episode 781 finished after 110 timesteps. Return = -89.53135145783126
Episode 782 finished after 106 timesteps. Return = -93.1788889643171
Episode 783 finished after 63 timesteps. Return = -87.07968244575486
Episode 784 finished after 7

Episode 891 finished after 65 timesteps. Return = -96.93459357956606
Episode 892 finished after 70 timesteps. Return = -129.0138524831833
Episode 893 finished after 110 timesteps. Return = -129.28457841173622
Episode 894 finished after 82 timesteps. Return = -111.65611749548567
Episode 895 finished after 68 timesteps. Return = -102.59439201002591
Episode 896 finished after 100 timesteps. Return = -244.13858840666458
Episode 897 finished after 93 timesteps. Return = -162.34595983228257
Episode 898 finished after 117 timesteps. Return = -69.1203000088697
Episode 899 finished after 106 timesteps. Return = -134.01951849012602
Episode 900 finished after 68 timesteps. Return = -109.43949350388209
Episode 901 finished after 104 timesteps. Return = -89.9339252191532
Episode 902 finished after 66 timesteps. Return = -137.93576225471622
Episode 903 finished after 71 timesteps. Return = -130.13976292711345
Episode 904 finished after 103 timesteps. Return = -439.42475809934984
Episode 905 finished

Episode 1013 finished after 65 timesteps. Return = -88.45958021041145
Episode 1014 finished after 83 timesteps. Return = -107.9186525051391
Episode 1015 finished after 102 timesteps. Return = -180.21820573365346
Episode 1016 finished after 111 timesteps. Return = -116.64505797449951
Episode 1017 finished after 78 timesteps. Return = -122.6802303980975
Episode 1018 finished after 83 timesteps. Return = -127.47463020559329
Episode 1019 finished after 95 timesteps. Return = -101.23331010897266
Episode 1020 finished after 89 timesteps. Return = -164.24845534550627
Episode 1021 finished after 86 timesteps. Return = -59.48356706333652
Episode 1022 finished after 69 timesteps. Return = -77.76455948547365
Episode 1023 finished after 75 timesteps. Return = -172.05703557012384
Episode 1024 finished after 100 timesteps. Return = -198.3139196278279
Episode 1025 finished after 149 timesteps. Return = -226.21538281793028
Episode 1026 finished after 87 timesteps. Return = -132.49614493738437
Episode 

Episode 1132 finished after 57 timesteps. Return = -138.2695971758805
Episode 1133 finished after 72 timesteps. Return = -94.57885358579016
Episode 1134 finished after 106 timesteps. Return = -116.87686692018109
Episode 1135 finished after 71 timesteps. Return = -81.25965558927656
Episode 1136 finished after 103 timesteps. Return = -108.20637245317961
Episode 1137 finished after 82 timesteps. Return = -90.40593497598263
Episode 1138 finished after 98 timesteps. Return = -113.90729454444282
Episode 1139 finished after 87 timesteps. Return = 20.631578925663945
Episode 1140 finished after 79 timesteps. Return = -144.5357465799871
Episode 1141 finished after 72 timesteps. Return = -93.82775662617273
Episode 1142 finished after 70 timesteps. Return = -70.05193652521507
Episode 1143 finished after 77 timesteps. Return = -107.7858710485022
Episode 1144 finished after 94 timesteps. Return = -74.91320019861327
Episode 1145 finished after 78 timesteps. Return = -18.900678187387825
Episode 1146 f

Episode 1250 finished after 119 timesteps. Return = -332.7259759746458
Episode 1251 finished after 63 timesteps. Return = -55.008850081145184
Episode 1252 finished after 84 timesteps. Return = -116.15128190431903
Episode 1253 finished after 79 timesteps. Return = -51.88700284512791
Episode 1254 finished after 124 timesteps. Return = -110.45449933406938
Episode 1255 finished after 69 timesteps. Return = -79.40153731695843
Episode 1256 finished after 82 timesteps. Return = -144.77769289054464
Episode 1257 finished after 110 timesteps. Return = -248.85262202541747
Episode 1258 finished after 65 timesteps. Return = -58.45343312852228
Episode 1259 finished after 77 timesteps. Return = -79.42801623948986
Episode 1260 finished after 66 timesteps. Return = -107.333429522547
Episode 1261 finished after 118 timesteps. Return = -118.93839788915923
Episode 1262 finished after 84 timesteps. Return = -301.2421651623846
Episode 1263 finished after 71 timesteps. Return = -111.77003096074526
Episode 12

Episode 1367 finished after 68 timesteps. Return = -228.59114677115133
Episode 1368 finished after 112 timesteps. Return = -225.9833413445663
Episode 1369 finished after 89 timesteps. Return = -111.22829384477166
Episode 1370 finished after 66 timesteps. Return = -202.83345031308107
Episode 1371 finished after 104 timesteps. Return = -206.62670664032416
Episode 1372 finished after 64 timesteps. Return = -81.69461601776517
Episode 1373 finished after 88 timesteps. Return = -94.79198794266736
Episode 1374 finished after 109 timesteps. Return = -121.33048407252141
Episode 1375 finished after 69 timesteps. Return = -92.21788729586392
Episode 1376 finished after 116 timesteps. Return = -106.46027654045564
Episode 1377 finished after 95 timesteps. Return = -134.69080027575575
Episode 1378 finished after 119 timesteps. Return = -143.029825659174
Episode 1379 finished after 92 timesteps. Return = -87.75259186155537
Episode 1380 finished after 128 timesteps. Return = -100.72369530154172
Episode

Episode 1485 finished after 77 timesteps. Return = -85.5287289705638
Episode 1486 finished after 138 timesteps. Return = -440.05310023843776
Episode 1487 finished after 72 timesteps. Return = -106.567931202934
Episode 1488 finished after 114 timesteps. Return = -141.559200734021
Episode 1489 finished after 110 timesteps. Return = -76.68487029571808
Episode 1490 finished after 64 timesteps. Return = -100.49294848265485
Episode 1491 finished after 73 timesteps. Return = -53.08938616301481
Episode 1492 finished after 78 timesteps. Return = -17.280749534844404
Episode 1493 finished after 73 timesteps. Return = -102.49149544186264
Episode 1494 finished after 117 timesteps. Return = -115.1074106993381
Episode 1495 finished after 64 timesteps. Return = -61.83487951417389
Episode 1496 finished after 94 timesteps. Return = 7.799987629348806
Episode 1497 finished after 86 timesteps. Return = -110.4661972047806
Episode 1498 finished after 85 timesteps. Return = -97.41588788008077
Episode 1499 fin

Episode 1601 finished after 134 timesteps. Return = -142.1334206736833
Episode 1602 finished after 68 timesteps. Return = -130.75143700750368
Episode 1603 finished after 63 timesteps. Return = -53.66932285815433
Episode 1604 finished after 76 timesteps. Return = -99.97990443997702
Episode 1605 finished after 72 timesteps. Return = -32.57833504396217
Episode 1606 finished after 84 timesteps. Return = -105.72344778558535
Episode 1607 finished after 73 timesteps. Return = -215.0983914240749
Episode 1608 finished after 97 timesteps. Return = -377.1627189922241
Episode 1609 finished after 73 timesteps. Return = -35.55362707662505
Episode 1610 finished after 125 timesteps. Return = -81.69479198014348
Episode 1611 finished after 78 timesteps. Return = -112.66204672545173
Episode 1612 finished after 64 timesteps. Return = -106.59084455505602
Episode 1613 finished after 71 timesteps. Return = -80.66830964709045
Episode 1614 finished after 67 timesteps. Return = -61.80959586947712
Episode 1615 f

Episode 1717 finished after 85 timesteps. Return = -82.19684003420853
Episode 1718 finished after 62 timesteps. Return = -70.20765583579905
Episode 1719 finished after 127 timesteps. Return = 4.338780153617876
Episode 1720 finished after 80 timesteps. Return = -74.21137253974706
Episode 1721 finished after 78 timesteps. Return = -141.24644911124622
Episode 1722 finished after 91 timesteps. Return = -113.18724116034336
Episode 1723 finished after 82 timesteps. Return = -83.972712819404
Episode 1724 finished after 72 timesteps. Return = -71.98836960338319
Episode 1725 finished after 78 timesteps. Return = -83.49384484152405
Episode 1726 finished after 117 timesteps. Return = -125.25007667017735
Episode 1727 finished after 61 timesteps. Return = -103.25962770967716
Episode 1728 finished after 126 timesteps. Return = -97.37290130833023
Episode 1729 finished after 106 timesteps. Return = -181.81687421544075
Episode 1730 finished after 101 timesteps. Return = -159.8248397189971
Episode 1731 

Episode 1834 finished after 71 timesteps. Return = -112.25741489237134
Episode 1835 finished after 63 timesteps. Return = -136.09656951977905
Episode 1836 finished after 95 timesteps. Return = -162.75557306133058
Episode 1837 finished after 95 timesteps. Return = -181.90418877929258
Episode 1838 finished after 94 timesteps. Return = -413.07851840650966
Episode 1839 finished after 74 timesteps. Return = -105.08792799184098
Episode 1840 finished after 104 timesteps. Return = -87.75397378069886
Episode 1841 finished after 83 timesteps. Return = -94.31464412141919
Episode 1842 finished after 96 timesteps. Return = -155.6913369744877
Episode 1843 finished after 97 timesteps. Return = -97.76308792020765
Episode 1844 finished after 80 timesteps. Return = -162.1249520602369
Episode 1845 finished after 98 timesteps. Return = -143.7528332481138
Episode 1846 finished after 115 timesteps. Return = 28.20827963790981
Episode 1847 finished after 64 timesteps. Return = -154.00362440124013
Episode 1848

Episode 1950 finished after 115 timesteps. Return = -157.56413146012832
Episode 1951 finished after 83 timesteps. Return = -110.56307953073737
Episode 1952 finished after 78 timesteps. Return = -141.49167196979374
Episode 1953 finished after 63 timesteps. Return = -51.74599226092759
Episode 1954 finished after 61 timesteps. Return = -90.68530085055511
Episode 1955 finished after 73 timesteps. Return = -100.95880080710675
Episode 1956 finished after 92 timesteps. Return = -136.88718719659025
Episode 1957 finished after 98 timesteps. Return = -118.73927215394502
Episode 1958 finished after 131 timesteps. Return = -118.76528982385999
Episode 1959 finished after 104 timesteps. Return = -115.06219600565912
Episode 1960 finished after 67 timesteps. Return = -106.20284922890303
Episode 1961 finished after 67 timesteps. Return = -105.01447626068112
Episode 1962 finished after 83 timesteps. Return = -114.10818972158943
Episode 1963 finished after 64 timesteps. Return = -81.05240180713687
Episod

Episode 2068 finished after 107 timesteps. Return = -229.22140602820878
Episode 2069 finished after 101 timesteps. Return = -150.59283404835452
Episode 2070 finished after 116 timesteps. Return = -203.55625892988837
Episode 2071 finished after 92 timesteps. Return = -86.9218320968182
Episode 2072 finished after 99 timesteps. Return = -163.57159271160472
Episode 2073 finished after 113 timesteps. Return = -78.34035196329782
Episode 2074 finished after 82 timesteps. Return = -91.43933650171526
Episode 2075 finished after 67 timesteps. Return = -97.60198068826814
Episode 2076 finished after 62 timesteps. Return = -62.14959783415489
Episode 2077 finished after 92 timesteps. Return = -220.33706732217803
Episode 2078 finished after 116 timesteps. Return = -320.61004062710754
Episode 2079 finished after 74 timesteps. Return = -99.94522750641605
Episode 2080 finished after 81 timesteps. Return = -145.52828324717413
Episode 2081 finished after 102 timesteps. Return = -122.65855014532693
Episode

Episode 2187 finished after 75 timesteps. Return = -127.27695285396557
Episode 2188 finished after 107 timesteps. Return = -80.0329675819642
Episode 2189 finished after 89 timesteps. Return = -189.63965927194891
Episode 2190 finished after 84 timesteps. Return = -105.9654536388988
Episode 2191 finished after 78 timesteps. Return = -47.17951438326127
Episode 2192 finished after 96 timesteps. Return = -145.17428937647085
Episode 2193 finished after 98 timesteps. Return = -135.29439704417462
Episode 2194 finished after 66 timesteps. Return = -131.93859844571313
Episode 2195 finished after 62 timesteps. Return = -104.17156664223673
Episode 2196 finished after 75 timesteps. Return = -75.26607258676214
Episode 2197 finished after 95 timesteps. Return = -185.5066970187989
Episode 2198 finished after 80 timesteps. Return = -110.366410160591
Episode 2199 finished after 96 timesteps. Return = -102.88492502584032
Episode 2200 finished after 101 timesteps. Return = -124.82783169929827
Episode 2201

Episode 2306 finished after 77 timesteps. Return = -90.29164662929972
Episode 2307 finished after 97 timesteps. Return = -149.02170711134272
Episode 2308 finished after 114 timesteps. Return = -113.32506815734101
Episode 2309 finished after 92 timesteps. Return = -110.31626310677323
Episode 2310 finished after 62 timesteps. Return = -87.51360622921275
Episode 2311 finished after 141 timesteps. Return = -12.708356319145508
Episode 2312 finished after 107 timesteps. Return = -66.33480560622948
Episode 2313 finished after 70 timesteps. Return = -76.47514166851366
Episode 2314 finished after 76 timesteps. Return = -188.82441161507387
Episode 2315 finished after 66 timesteps. Return = -143.4827052465926
Episode 2316 finished after 103 timesteps. Return = -115.17987906801436
Episode 2317 finished after 62 timesteps. Return = -89.9469884935432
Episode 2318 finished after 107 timesteps. Return = -108.62470894407755
Episode 2319 finished after 68 timesteps. Return = -158.32761234422162
Episode 

Episode 2422 finished after 116 timesteps. Return = -71.9519216471641
Episode 2423 finished after 61 timesteps. Return = -134.21395502558335
Episode 2424 finished after 135 timesteps. Return = -303.0777319821201
Episode 2425 finished after 128 timesteps. Return = -111.08378340593278
Episode 2426 finished after 103 timesteps. Return = -120.96419715596919
Episode 2427 finished after 76 timesteps. Return = -128.14289298486472
Episode 2428 finished after 108 timesteps. Return = -146.62268299471364
Episode 2429 finished after 110 timesteps. Return = -125.80935438356143
Episode 2430 finished after 62 timesteps. Return = -96.04086674453632
Episode 2431 finished after 70 timesteps. Return = -94.96210471397275
Episode 2432 finished after 103 timesteps. Return = -278.20927119726866
Episode 2433 finished after 106 timesteps. Return = -184.55217848194985
Episode 2434 finished after 123 timesteps. Return = -141.88160201097133
Episode 2435 finished after 90 timesteps. Return = -107.13856648735232
Ep

Episode 2538 finished after 97 timesteps. Return = -224.36052913575776
Episode 2539 finished after 95 timesteps. Return = -108.02833422559983
Episode 2540 finished after 96 timesteps. Return = -134.82963793558804
Episode 2541 finished after 69 timesteps. Return = -91.69984254283854
Episode 2542 finished after 82 timesteps. Return = -61.01604984176633
Episode 2543 finished after 115 timesteps. Return = -144.07663897851492
Episode 2544 finished after 58 timesteps. Return = -91.3370154420889
Episode 2545 finished after 91 timesteps. Return = -142.41482625924237
Episode 2546 finished after 83 timesteps. Return = -125.77033570187804
Episode 2547 finished after 103 timesteps. Return = -110.28013999242052
Episode 2548 finished after 71 timesteps. Return = -74.191418221381
Episode 2549 finished after 96 timesteps. Return = -124.32498098619942
Episode 2550 finished after 74 timesteps. Return = -82.20661271698434
Episode 2551 finished after 63 timesteps. Return = -87.26098092554776
Episode 2552 

Episode 2657 finished after 73 timesteps. Return = -179.4971353945062
Episode 2658 finished after 59 timesteps. Return = -138.58080938727505
Episode 2659 finished after 118 timesteps. Return = -125.16845317702207
Episode 2660 finished after 64 timesteps. Return = -118.22541305705752
Episode 2661 finished after 108 timesteps. Return = -116.4126575188893
Episode 2662 finished after 70 timesteps. Return = -106.19837636866741
Episode 2663 finished after 82 timesteps. Return = -168.4693659313284
Episode 2664 finished after 116 timesteps. Return = -4.2295779018631094
Episode 2665 finished after 94 timesteps. Return = -117.57461018595916
Episode 2666 finished after 94 timesteps. Return = -184.00741955185816
Episode 2667 finished after 92 timesteps. Return = -328.5747288171206
Episode 2668 finished after 63 timesteps. Return = -80.40177482118241
Episode 2669 finished after 72 timesteps. Return = -83.05354736422733
Episode 2670 finished after 86 timesteps. Return = -161.49665327761312
Episode 2

Episode 2777 finished after 89 timesteps. Return = -121.86017211509669
Episode 2778 finished after 92 timesteps. Return = -90.14669159409603
Episode 2779 finished after 60 timesteps. Return = -70.82182825082069
Episode 2780 finished after 67 timesteps. Return = -67.2535716439236
Episode 2781 finished after 85 timesteps. Return = -140.1370957489271
Episode 2782 finished after 80 timesteps. Return = -68.83820016411902
Episode 2783 finished after 112 timesteps. Return = -108.40585331260453
Episode 2784 finished after 93 timesteps. Return = -127.73518670638795
Episode 2785 finished after 76 timesteps. Return = -371.2762282809266
Episode 2786 finished after 71 timesteps. Return = -117.708207677774
Episode 2787 finished after 99 timesteps. Return = -91.73901221061936
Episode 2788 finished after 70 timesteps. Return = -180.31759344005047
Episode 2789 finished after 108 timesteps. Return = -152.64296873876367
Episode 2790 finished after 116 timesteps. Return = -78.60232139390557
Episode 2791 f

Episode 2896 finished after 112 timesteps. Return = -115.07687314641323
Episode 2897 finished after 114 timesteps. Return = -207.25477176315297
Episode 2898 finished after 72 timesteps. Return = -46.11702172716179
Episode 2899 finished after 75 timesteps. Return = -99.29300026414307
Episode 2900 finished after 89 timesteps. Return = -107.32957744373869
Episode 2901 finished after 83 timesteps. Return = -79.03473801824282
Episode 2902 finished after 82 timesteps. Return = -134.18275569595215
Episode 2903 finished after 88 timesteps. Return = -90.92844508561903
Episode 2904 finished after 66 timesteps. Return = -77.20424509693358
Episode 2905 finished after 84 timesteps. Return = -109.27379021500273
Episode 2906 finished after 66 timesteps. Return = -59.65039567206397
Episode 2907 finished after 83 timesteps. Return = -108.11512973587932
Episode 2908 finished after 98 timesteps. Return = -121.1389577370764
Episode 2909 finished after 84 timesteps. Return = -121.95421752230783
Episode 291

Episode 3016 finished after 64 timesteps. Return = -109.76638205442693
Episode 3017 finished after 63 timesteps. Return = -79.66055799030023
Episode 3018 finished after 128 timesteps. Return = -158.62755758765212
Episode 3019 finished after 82 timesteps. Return = -117.34806054297508
Episode 3020 finished after 112 timesteps. Return = -255.31477722272513
Episode 3021 finished after 84 timesteps. Return = -164.88235104406363
Episode 3022 finished after 70 timesteps. Return = -55.19212769404812
Episode 3023 finished after 73 timesteps. Return = -168.59029589595303
Episode 3024 finished after 80 timesteps. Return = -132.0191289479098
Episode 3025 finished after 67 timesteps. Return = -89.94404998522567
Episode 3026 finished after 77 timesteps. Return = -285.03682503500306
Episode 3027 finished after 86 timesteps. Return = -177.96203940319958
Episode 3028 finished after 93 timesteps. Return = -125.76471238642227
Episode 3029 finished after 113 timesteps. Return = -184.6805897743543
Episode 

Episode 3132 finished after 86 timesteps. Return = -87.61765315223617
Episode 3133 finished after 120 timesteps. Return = -75.22537865619333
Episode 3134 finished after 100 timesteps. Return = -239.44426393677293
Episode 3135 finished after 67 timesteps. Return = -65.48376077366719
Episode 3136 finished after 81 timesteps. Return = -82.62412412515114
Episode 3137 finished after 110 timesteps. Return = -112.40083043169693
Episode 3138 finished after 63 timesteps. Return = -95.24644218449643
Episode 3139 finished after 134 timesteps. Return = -28.665487045831696
Episode 3140 finished after 93 timesteps. Return = -127.36704914362255
Episode 3141 finished after 140 timesteps. Return = -7.824161148022839
Episode 3142 finished after 101 timesteps. Return = -165.67157115445337
Episode 3143 finished after 115 timesteps. Return = -174.93841263607476
Episode 3144 finished after 91 timesteps. Return = -56.420654024629826
Episode 3145 finished after 89 timesteps. Return = -63.78824531450823
Episod

Episode 3251 finished after 104 timesteps. Return = -109.86001516912017
Episode 3252 finished after 109 timesteps. Return = -226.1781374806227
Episode 3253 finished after 86 timesteps. Return = -151.737521201921
Episode 3254 finished after 89 timesteps. Return = -108.89990579149224
Episode 3255 finished after 72 timesteps. Return = -37.66616379446221
Episode 3256 finished after 127 timesteps. Return = -341.43778772915357
Episode 3257 finished after 68 timesteps. Return = -95.7671193626859
Episode 3258 finished after 88 timesteps. Return = -121.64633045062868
Episode 3259 finished after 109 timesteps. Return = -130.6233583092965
Episode 3260 finished after 81 timesteps. Return = -57.7889151571631
Episode 3261 finished after 114 timesteps. Return = -192.5290784010817
Episode 3262 finished after 66 timesteps. Return = -82.8399659260509
Episode 3263 finished after 83 timesteps. Return = -26.45036265821298
Episode 3264 finished after 81 timesteps. Return = -129.06581996224335
Episode 3265 f

Episode 3368 finished after 60 timesteps. Return = -159.25667201823285
Episode 3369 finished after 70 timesteps. Return = -118.74418725629769
Episode 3370 finished after 87 timesteps. Return = -196.616725021536
Episode 3371 finished after 89 timesteps. Return = -138.33967160655487
Episode 3372 finished after 71 timesteps. Return = -154.29761125367185
Episode 3373 finished after 105 timesteps. Return = -212.3391213033425
Episode 3374 finished after 94 timesteps. Return = -127.47396195588036
Episode 3375 finished after 66 timesteps. Return = -115.95760503264893
Episode 3376 finished after 109 timesteps. Return = 66.90791919956717
Episode 3377 finished after 97 timesteps. Return = -158.07918970936475
Episode 3378 finished after 105 timesteps. Return = -150.44565299310162
Episode 3379 finished after 88 timesteps. Return = -199.99029593502559
Episode 3380 finished after 77 timesteps. Return = -78.03326118076444
Episode 3381 finished after 84 timesteps. Return = -139.97691700622016
Episode 3

Episode 3487 finished after 61 timesteps. Return = -211.9553797088694
Episode 3488 finished after 92 timesteps. Return = -258.6628288279677
Episode 3489 finished after 76 timesteps. Return = -231.4808073756695
Episode 3490 finished after 70 timesteps. Return = -89.65980717400026
Episode 3491 finished after 79 timesteps. Return = -136.35545110942456
Episode 3492 finished after 80 timesteps. Return = -83.99038893294059
Episode 3493 finished after 108 timesteps. Return = -205.1180002404171
Episode 3494 finished after 78 timesteps. Return = -203.1892041553272
Episode 3495 finished after 83 timesteps. Return = -111.22611927703765
Episode 3496 finished after 71 timesteps. Return = -56.21036728005019
Episode 3497 finished after 70 timesteps. Return = -83.63423260642884
Episode 3498 finished after 72 timesteps. Return = -252.27624682354298
Episode 3499 finished after 68 timesteps. Return = -78.02160227758986
Episode 3500 finished after 124 timesteps. Return = -82.1940567328942
Episode 3501 fin

Episode 3607 finished after 74 timesteps. Return = -22.27019177815572
Episode 3608 finished after 78 timesteps. Return = -73.41817595382551
Episode 3609 finished after 89 timesteps. Return = -433.6822208980352
Episode 3610 finished after 117 timesteps. Return = -183.0865349294996
Episode 3611 finished after 70 timesteps. Return = -99.08053720297944
Episode 3612 finished after 134 timesteps. Return = -96.11604649620995
Episode 3613 finished after 99 timesteps. Return = -199.69068197739426
Episode 3614 finished after 78 timesteps. Return = -102.03584574962935
Episode 3615 finished after 66 timesteps. Return = -79.90452833427949
Episode 3616 finished after 81 timesteps. Return = -80.871922251248
Episode 3617 finished after 126 timesteps. Return = -71.77205631384555
Episode 3618 finished after 108 timesteps. Return = -60.7423039682411
Episode 3619 finished after 71 timesteps. Return = -79.51959305194524
Episode 3620 finished after 69 timesteps. Return = -120.07775404990556
Episode 3621 fin

Episode 3724 finished after 82 timesteps. Return = -100.70521756279616
Episode 3725 finished after 89 timesteps. Return = -137.0639331438652
Episode 3726 finished after 76 timesteps. Return = -140.80255013229015
Episode 3727 finished after 87 timesteps. Return = -248.6995683250136
Episode 3728 finished after 100 timesteps. Return = -192.01171086284205
Episode 3729 finished after 69 timesteps. Return = -70.37999372938766
Episode 3730 finished after 97 timesteps. Return = -194.19382282592045
Episode 3731 finished after 71 timesteps. Return = -121.99367964201772
Episode 3732 finished after 83 timesteps. Return = -110.12137107967048
Episode 3733 finished after 91 timesteps. Return = -244.4943383629803
Episode 3734 finished after 66 timesteps. Return = -144.54613982743115
Episode 3735 finished after 66 timesteps. Return = -125.13949818663559
Episode 3736 finished after 96 timesteps. Return = -125.20112636739007
Episode 3737 finished after 94 timesteps. Return = -96.93775305017724
Episode 37

Episode 3841 finished after 84 timesteps. Return = -130.44530579362424
Episode 3842 finished after 85 timesteps. Return = -75.03593321112818
Episode 3843 finished after 104 timesteps. Return = -117.3188188173526
Episode 3844 finished after 83 timesteps. Return = -111.6134579687905
Episode 3845 finished after 93 timesteps. Return = -87.70668567973263
Episode 3846 finished after 71 timesteps. Return = -105.02953812192557
Episode 3847 finished after 75 timesteps. Return = -93.76671366550688
Episode 3848 finished after 104 timesteps. Return = -194.70444826677783
Episode 3849 finished after 72 timesteps. Return = -100.33975826389916
Episode 3850 finished after 66 timesteps. Return = -77.29784367603693
Episode 3851 finished after 90 timesteps. Return = -153.65702052121927
Episode 3852 finished after 69 timesteps. Return = -97.74912764205918
Episode 3853 finished after 73 timesteps. Return = -291.30290831687194
Episode 3854 finished after 69 timesteps. Return = -73.73028882876599
Episode 3855

Episode 3957 finished after 106 timesteps. Return = -217.0333505897993
Episode 3958 finished after 104 timesteps. Return = -263.8035159528848
Episode 3959 finished after 59 timesteps. Return = -60.29206501674836
Episode 3960 finished after 99 timesteps. Return = -6.750245104156463
Episode 3961 finished after 74 timesteps. Return = -91.86242017158825
Episode 3962 finished after 64 timesteps. Return = -136.55102076057327
Episode 3963 finished after 79 timesteps. Return = -134.85887493425298
Episode 3964 finished after 78 timesteps. Return = -77.01499534552583
Episode 3965 finished after 125 timesteps. Return = -475.50670976793975
Episode 3966 finished after 126 timesteps. Return = -124.32706350739188
Episode 3967 finished after 94 timesteps. Return = -133.63205487907558
Episode 3968 finished after 94 timesteps. Return = -184.32424295672362
Episode 3969 finished after 86 timesteps. Return = -358.94611463417255
Episode 3970 finished after 110 timesteps. Return = -289.4425059471291
Episode 

Episode 4075 finished after 119 timesteps. Return = -167.84675679151417
Episode 4076 finished after 110 timesteps. Return = -282.3738791189304
Episode 4077 finished after 67 timesteps. Return = -84.54402393195966
Episode 4078 finished after 78 timesteps. Return = -177.23452245877547
Episode 4079 finished after 89 timesteps. Return = -104.33771756080642
Episode 4080 finished after 106 timesteps. Return = -180.8349396935304
Episode 4081 finished after 70 timesteps. Return = -105.38385638282257
Episode 4082 finished after 112 timesteps. Return = -52.68114498239113
Episode 4083 finished after 83 timesteps. Return = -192.46370767679616
Episode 4084 finished after 94 timesteps. Return = -178.411022456327
Episode 4085 finished after 75 timesteps. Return = -152.45051122939972
Episode 4086 finished after 127 timesteps. Return = -241.79439867320713
Episode 4087 finished after 108 timesteps. Return = -188.31788575347122
Episode 4088 finished after 71 timesteps. Return = -117.67010381426843
Episod

Episode 4192 finished after 93 timesteps. Return = -130.29847004599878
Episode 4193 finished after 111 timesteps. Return = -117.0451824999583
Episode 4194 finished after 117 timesteps. Return = -343.4368158428256
Episode 4195 finished after 91 timesteps. Return = -167.7196173603396
Episode 4196 finished after 65 timesteps. Return = -75.77530404612979
Episode 4197 finished after 96 timesteps. Return = -113.89135269982536
Episode 4198 finished after 88 timesteps. Return = -83.43523465189304
Episode 4199 finished after 105 timesteps. Return = -86.18459564179328
Episode 4200 finished after 120 timesteps. Return = -60.42702160923794
Episode 4201 finished after 120 timesteps. Return = -349.6369689228179
Episode 4202 finished after 59 timesteps. Return = -146.42654205740456
Episode 4203 finished after 73 timesteps. Return = -63.643164275518785
Episode 4204 finished after 74 timesteps. Return = -140.47482079569318
Episode 4205 finished after 66 timesteps. Return = -84.70129253623648
Episode 42

Episode 4312 finished after 119 timesteps. Return = -292.412300536342
Episode 4313 finished after 71 timesteps. Return = -181.28303746473654
Episode 4314 finished after 84 timesteps. Return = -140.7672096862241
Episode 4315 finished after 109 timesteps. Return = -118.90900356361668
Episode 4316 finished after 87 timesteps. Return = -136.23616903554998
Episode 4317 finished after 92 timesteps. Return = -152.073803229698
Episode 4318 finished after 102 timesteps. Return = -205.98063336498802
Episode 4319 finished after 57 timesteps. Return = -180.3996620965681
Episode 4320 finished after 73 timesteps. Return = -108.41912035548256
Episode 4321 finished after 97 timesteps. Return = -190.73154433254103
Episode 4322 finished after 82 timesteps. Return = -94.02493035486329
Episode 4323 finished after 102 timesteps. Return = -156.2885542918428
Episode 4324 finished after 69 timesteps. Return = -63.565119991449905
Episode 4325 finished after 62 timesteps. Return = -73.46866746793566
Episode 432

Episode 4431 finished after 98 timesteps. Return = -89.74321180855297
Episode 4432 finished after 79 timesteps. Return = -106.94489061040626
Episode 4433 finished after 70 timesteps. Return = -77.46409949765389
Episode 4434 finished after 69 timesteps. Return = -88.60075670950069
Episode 4435 finished after 121 timesteps. Return = -135.25547163964006
Episode 4436 finished after 97 timesteps. Return = -164.5634743058227
Episode 4437 finished after 75 timesteps. Return = -120.8424104601613
Episode 4438 finished after 73 timesteps. Return = -75.52501616103835
Episode 4439 finished after 75 timesteps. Return = -98.65922765411099
Episode 4440 finished after 79 timesteps. Return = -96.07656019652191
Episode 4441 finished after 75 timesteps. Return = -144.41360614203523
Episode 4442 finished after 99 timesteps. Return = -126.70289885350387
Episode 4443 finished after 71 timesteps. Return = -174.48217647816813
Episode 4444 finished after 90 timesteps. Return = -229.74409439209353
Episode 4445 

Episode 4551 finished after 98 timesteps. Return = -118.0058598333478
Episode 4552 finished after 84 timesteps. Return = -307.6052980532925
Episode 4553 finished after 116 timesteps. Return = -21.818017161574318
Episode 4554 finished after 70 timesteps. Return = -65.74274132828538
Episode 4555 finished after 66 timesteps. Return = -125.24048032468593
Episode 4556 finished after 74 timesteps. Return = -157.5188825968003
Episode 4557 finished after 68 timesteps. Return = -128.4704986298416
Episode 4558 finished after 109 timesteps. Return = -137.30884662662422
Episode 4559 finished after 55 timesteps. Return = -82.17292436823803
Episode 4560 finished after 56 timesteps. Return = -214.01737823135466
Episode 4561 finished after 169 timesteps. Return = -601.8355325458633
Episode 4562 finished after 94 timesteps. Return = -189.63515374012866
Episode 4563 finished after 58 timesteps. Return = -188.74239623868218
Episode 4564 finished after 88 timesteps. Return = -173.55330136228247
Episode 45

Episode 4670 finished after 104 timesteps. Return = -124.28829027579428
Episode 4671 finished after 92 timesteps. Return = -378.9272571439518
Episode 4672 finished after 112 timesteps. Return = -113.94534872298274
Episode 4673 finished after 114 timesteps. Return = -88.07295083905642
Episode 4674 finished after 101 timesteps. Return = -191.27491850455118
Episode 4675 finished after 74 timesteps. Return = -135.62865688644126
Episode 4676 finished after 94 timesteps. Return = -364.68579099526517
Episode 4677 finished after 84 timesteps. Return = -96.33822181210289
Episode 4678 finished after 81 timesteps. Return = -76.80956404010519
Episode 4679 finished after 68 timesteps. Return = -101.68064664966693
Episode 4680 finished after 100 timesteps. Return = -108.1660361532173
Episode 4681 finished after 99 timesteps. Return = -148.03521569074078
Episode 4682 finished after 64 timesteps. Return = -131.02457950597505
Episode 4683 finished after 100 timesteps. Return = -118.73892783308894
Episo

Episode 4788 finished after 97 timesteps. Return = -150.8615838377658
Episode 4789 finished after 87 timesteps. Return = -171.45823098582048
Episode 4790 finished after 75 timesteps. Return = -107.62302178985492
Episode 4791 finished after 77 timesteps. Return = -103.48373267739228
Episode 4792 finished after 95 timesteps. Return = -393.5794808122323
Episode 4793 finished after 95 timesteps. Return = -101.2243789193131
Episode 4794 finished after 72 timesteps. Return = -178.63011317316688
Episode 4795 finished after 62 timesteps. Return = -57.803465623301804
Episode 4796 finished after 74 timesteps. Return = -181.0845964572585
Episode 4797 finished after 80 timesteps. Return = -56.75833896790967
Episode 4798 finished after 75 timesteps. Return = -56.65225455509849
Episode 4799 finished after 78 timesteps. Return = 24.455190071404388
Episode 4800 finished after 98 timesteps. Return = -151.84383181979348
Episode 4801 finished after 67 timesteps. Return = -175.758157705896
Episode 4802 fi

Episode 4907 finished after 86 timesteps. Return = -330.3778233202946
Episode 4908 finished after 78 timesteps. Return = -140.7911020298737
Episode 4909 finished after 88 timesteps. Return = -111.44558321370657
Episode 4910 finished after 64 timesteps. Return = -116.82551256230815
Episode 4911 finished after 81 timesteps. Return = -118.20328195786136
Episode 4912 finished after 78 timesteps. Return = -113.33917858452469
Episode 4913 finished after 70 timesteps. Return = -85.98324131799572
Episode 4914 finished after 129 timesteps. Return = -102.34945796647308
Episode 4915 finished after 87 timesteps. Return = -41.505130863908306
Episode 4916 finished after 91 timesteps. Return = -131.1276320710717
Episode 4917 finished after 64 timesteps. Return = -181.9913605258584
Episode 4918 finished after 96 timesteps. Return = -124.6877109051655
Episode 4919 finished after 96 timesteps. Return = -214.44151850010687
Episode 4920 finished after 63 timesteps. Return = -105.09213343470685
Episode 492

Episode 5026 finished after 61 timesteps. Return = -95.13466549064563
Episode 5027 finished after 78 timesteps. Return = -85.51257966521473
Episode 5028 finished after 63 timesteps. Return = -151.9891415921013
Episode 5029 finished after 103 timesteps. Return = -111.68479038058945
Episode 5030 finished after 60 timesteps. Return = -72.9891757150116
Episode 5031 finished after 76 timesteps. Return = -104.52498918982815
Episode 5032 finished after 94 timesteps. Return = -118.06809376460312
Episode 5033 finished after 80 timesteps. Return = -122.85098402376536
Episode 5034 finished after 67 timesteps. Return = -167.0497746241677
Episode 5035 finished after 114 timesteps. Return = -82.59484914732033
Episode 5036 finished after 93 timesteps. Return = -287.97523576138053
Episode 5037 finished after 79 timesteps. Return = -96.18725201234318
Episode 5038 finished after 115 timesteps. Return = -115.34152880991877
Episode 5039 finished after 94 timesteps. Return = -110.92706886084986
Episode 504

Episode 5144 finished after 106 timesteps. Return = -102.64966534214084
Episode 5145 finished after 86 timesteps. Return = -45.61482820856135
Episode 5146 finished after 109 timesteps. Return = -161.28525566509126
Episode 5147 finished after 57 timesteps. Return = -145.53730962370594
Episode 5148 finished after 75 timesteps. Return = -95.50258702171757
Episode 5149 finished after 104 timesteps. Return = -249.5256725488996
Episode 5150 finished after 61 timesteps. Return = -82.48397708635889
Episode 5151 finished after 91 timesteps. Return = -7.904696418687337
Episode 5152 finished after 78 timesteps. Return = -110.54282472917723
Episode 5153 finished after 82 timesteps. Return = -239.77483653215407
Episode 5154 finished after 103 timesteps. Return = 8.915441949145318
Episode 5155 finished after 79 timesteps. Return = -119.00039517544295
Episode 5156 finished after 74 timesteps. Return = -84.92624668392313
Episode 5157 finished after 73 timesteps. Return = -119.30956444480238
Episode 51

Episode 5264 finished after 119 timesteps. Return = -301.3263808520542
Episode 5265 finished after 80 timesteps. Return = -89.34346240201768
Episode 5266 finished after 117 timesteps. Return = -53.636872821343495
Episode 5267 finished after 72 timesteps. Return = -135.55932895364813
Episode 5268 finished after 93 timesteps. Return = -313.42962785310795
Episode 5269 finished after 71 timesteps. Return = -230.90047157722125
Episode 5270 finished after 97 timesteps. Return = -201.75645846963138
Episode 5271 finished after 64 timesteps. Return = -86.74167428594116
Episode 5272 finished after 122 timesteps. Return = -224.6241572400856
Episode 5273 finished after 68 timesteps. Return = -57.97008163922408
Episode 5274 finished after 85 timesteps. Return = -123.2181173406348
Episode 5275 finished after 80 timesteps. Return = -146.47127339228285
Episode 5276 finished after 60 timesteps. Return = -161.76816063878584
Episode 5277 finished after 80 timesteps. Return = -105.88774905017888
Episode 5

Episode 5382 finished after 103 timesteps. Return = -83.0051252542339
Episode 5383 finished after 85 timesteps. Return = -152.64571412467114
Episode 5384 finished after 87 timesteps. Return = -135.3660909232213
Episode 5385 finished after 76 timesteps. Return = -122.11674234930979
Episode 5386 finished after 69 timesteps. Return = -65.98368702267767
Episode 5387 finished after 68 timesteps. Return = -59.31123724767694
Episode 5388 finished after 65 timesteps. Return = -108.26802716216017
Episode 5389 finished after 66 timesteps. Return = -19.315165768096335
Episode 5390 finished after 84 timesteps. Return = -54.527873837984316
Episode 5391 finished after 100 timesteps. Return = -203.94034257316326
Episode 5392 finished after 99 timesteps. Return = -446.67623179236165
Episode 5393 finished after 112 timesteps. Return = -167.17334741650143
Episode 5394 finished after 104 timesteps. Return = -199.25923659295384
Episode 5395 finished after 90 timesteps. Return = -151.13900215026754
Episode

Episode 5500 finished after 129 timesteps. Return = -195.86356026281027
Episode 5501 finished after 97 timesteps. Return = -213.52306078372214
Episode 5502 finished after 64 timesteps. Return = -104.75272349023363
Episode 5503 finished after 116 timesteps. Return = -122.12226811234784
Episode 5504 finished after 55 timesteps. Return = -172.41662370600153
Episode 5505 finished after 87 timesteps. Return = -172.27313827761185
Episode 5506 finished after 73 timesteps. Return = -257.6862654881854
Episode 5507 finished after 78 timesteps. Return = -129.99437827729403
Episode 5508 finished after 121 timesteps. Return = -155.28883070209318
Episode 5509 finished after 72 timesteps. Return = -131.71610889335312
Episode 5510 finished after 93 timesteps. Return = -226.9231762195477
Episode 5511 finished after 107 timesteps. Return = -250.4302058867489
Episode 5512 finished after 65 timesteps. Return = -32.46835074664848
Episode 5513 finished after 68 timesteps. Return = -113.9024808599425
Episode

Episode 5619 finished after 65 timesteps. Return = -61.5283447816501
Episode 5620 finished after 94 timesteps. Return = -236.0736473600889
Episode 5621 finished after 92 timesteps. Return = -225.6653009325271
Episode 5622 finished after 93 timesteps. Return = 38.73787725852148
Episode 5623 finished after 75 timesteps. Return = -78.61579513804375
Episode 5624 finished after 77 timesteps. Return = -189.59257389517344
Episode 5625 finished after 90 timesteps. Return = -205.7718968400066
Episode 5626 finished after 118 timesteps. Return = -139.7304421505689
Episode 5627 finished after 77 timesteps. Return = -103.28406261337938
Episode 5628 finished after 78 timesteps. Return = -43.07030208867022
Episode 5629 finished after 82 timesteps. Return = -249.40716254492077
Episode 5630 finished after 77 timesteps. Return = -68.5177290076243
Episode 5631 finished after 1000 timesteps. Return = 89.0298176834688
Episode 5632 finished after 61 timesteps. Return = -89.28191869697545
Episode 5633 finish

Episode 5741 finished after 105 timesteps. Return = -8.726691795320875
Episode 5742 finished after 78 timesteps. Return = -73.41634618321183
Episode 5743 finished after 74 timesteps. Return = -185.30956348358686
Episode 5744 finished after 65 timesteps. Return = -130.19471381305254
Episode 5745 finished after 76 timesteps. Return = -230.5437781729135
Episode 5746 finished after 74 timesteps. Return = -80.6174441767332
Episode 5747 finished after 75 timesteps. Return = -67.54236343850096
Episode 5748 finished after 92 timesteps. Return = -127.41615714144645
Episode 5749 finished after 91 timesteps. Return = -192.5861283419926
Episode 5750 finished after 69 timesteps. Return = -187.61667454052224
Episode 5751 finished after 93 timesteps. Return = -178.02464440100923
Episode 5752 finished after 61 timesteps. Return = -92.39646130077931
Episode 5753 finished after 112 timesteps. Return = -177.30511643402247
Episode 5754 finished after 71 timesteps. Return = -124.28508788464552
Episode 5755

Episode 5857 finished after 107 timesteps. Return = -150.7171226228597
Episode 5858 finished after 83 timesteps. Return = -70.37855144221199
Episode 5859 finished after 74 timesteps. Return = -154.7816009289554
Episode 5860 finished after 107 timesteps. Return = -211.66591496285713
Episode 5861 finished after 81 timesteps. Return = -143.84034299861793
Episode 5862 finished after 81 timesteps. Return = -118.81043747299158
Episode 5863 finished after 139 timesteps. Return = -66.18703392413991
Episode 5864 finished after 64 timesteps. Return = -159.67951160870552
Episode 5865 finished after 76 timesteps. Return = -171.53163816061777
Episode 5866 finished after 105 timesteps. Return = -128.90098695474168
Episode 5867 finished after 107 timesteps. Return = -132.48752488321472
Episode 5868 finished after 80 timesteps. Return = -193.0822576003586
Episode 5869 finished after 71 timesteps. Return = -77.77902818965089
Episode 5870 finished after 93 timesteps. Return = -112.14052365696406
Episode

Episode 5973 finished after 112 timesteps. Return = -276.74323660916605
Episode 5974 finished after 106 timesteps. Return = -376.6848987894204
Episode 5975 finished after 90 timesteps. Return = -62.82804462459515
Episode 5976 finished after 62 timesteps. Return = -109.65237883962648
Episode 5977 finished after 90 timesteps. Return = -100.11571589465134
Episode 5978 finished after 79 timesteps. Return = -54.61377907984564
Episode 5979 finished after 93 timesteps. Return = 26.063720409502295
Episode 5980 finished after 94 timesteps. Return = -231.56069177184904
Episode 5981 finished after 69 timesteps. Return = -137.2358290774315
Episode 5982 finished after 92 timesteps. Return = -216.8061494501365
Episode 5983 finished after 94 timesteps. Return = -208.14686686851383
Episode 5984 finished after 73 timesteps. Return = -97.9585935755684
Episode 5985 finished after 87 timesteps. Return = -144.6295834125666
Episode 5986 finished after 121 timesteps. Return = -185.77269100107043
Episode 5987

Episode 6089 finished after 78 timesteps. Return = -57.151655122658354
Episode 6090 finished after 74 timesteps. Return = -123.0583170746163
Episode 6091 finished after 75 timesteps. Return = -243.67883484403427
Episode 6092 finished after 70 timesteps. Return = -134.58076698931535
Episode 6093 finished after 67 timesteps. Return = -89.41188488046721
Episode 6094 finished after 81 timesteps. Return = -229.1546460406721
Episode 6095 finished after 128 timesteps. Return = -83.32552066139849
Episode 6096 finished after 103 timesteps. Return = -222.3233419222591
Episode 6097 finished after 89 timesteps. Return = -141.40404284835046
Episode 6098 finished after 64 timesteps. Return = -114.32929040169329
Episode 6099 finished after 80 timesteps. Return = -107.10682779977327
Episode 6100 finished after 66 timesteps. Return = -34.66761369051446
Episode 6101 finished after 90 timesteps. Return = -216.0935187665959
Episode 6102 finished after 90 timesteps. Return = -147.7158919895688
Episode 6103

Episode 6207 finished after 65 timesteps. Return = -89.25121110356213
Episode 6208 finished after 85 timesteps. Return = -73.25146526270322
Episode 6209 finished after 111 timesteps. Return = -349.84204295325026
Episode 6210 finished after 79 timesteps. Return = -137.16514387429675
Episode 6211 finished after 78 timesteps. Return = -135.7845885320382
Episode 6212 finished after 82 timesteps. Return = -184.9331633893167
Episode 6213 finished after 94 timesteps. Return = -236.275469234001
Episode 6214 finished after 62 timesteps. Return = -95.13241795285741
Episode 6215 finished after 92 timesteps. Return = -129.86592868962606
Episode 6216 finished after 59 timesteps. Return = 7.694760110272753
Episode 6217 finished after 65 timesteps. Return = -93.78316016679831
Episode 6218 finished after 99 timesteps. Return = -155.41796371686718
Episode 6219 finished after 71 timesteps. Return = -207.82209867673683
Episode 6220 finished after 103 timesteps. Return = -244.90721747151463
Episode 6221 f

Episode 6328 finished after 89 timesteps. Return = -149.76858532124282
Episode 6329 finished after 96 timesteps. Return = -352.30464062089925
Episode 6330 finished after 79 timesteps. Return = -129.52493130771217
Episode 6331 finished after 84 timesteps. Return = -291.56652593449815
Episode 6332 finished after 104 timesteps. Return = -238.76729539041958
Episode 6333 finished after 86 timesteps. Return = -312.7126631549148
Episode 6334 finished after 78 timesteps. Return = -112.30723211198759
Episode 6335 finished after 79 timesteps. Return = -183.10431242647516
Episode 6336 finished after 72 timesteps. Return = -106.51254022790178
Episode 6337 finished after 100 timesteps. Return = -216.38367906858676
Episode 6338 finished after 82 timesteps. Return = -168.98768374129332
Episode 6339 finished after 93 timesteps. Return = -163.1369088210398
Episode 6340 finished after 54 timesteps. Return = -141.61135179642906
Episode 6341 finished after 73 timesteps. Return = -121.21148654274137
Episod

Episode 6445 finished after 76 timesteps. Return = -113.99614073483619
Episode 6446 finished after 83 timesteps. Return = -148.6341253602491
Episode 6447 finished after 69 timesteps. Return = -77.06311138379927
Episode 6448 finished after 77 timesteps. Return = -91.84149797774032
Episode 6449 finished after 74 timesteps. Return = -139.71027993277875
Episode 6450 finished after 103 timesteps. Return = -188.23643011046084
Episode 6451 finished after 84 timesteps. Return = -138.92347587588182
Episode 6452 finished after 154 timesteps. Return = -90.73604131776665
Episode 6453 finished after 99 timesteps. Return = -120.92691773662087
Episode 6454 finished after 72 timesteps. Return = -108.92645807798209
Episode 6455 finished after 79 timesteps. Return = -216.16359272667174
Episode 6456 finished after 92 timesteps. Return = -233.93422455103695
Episode 6457 finished after 93 timesteps. Return = -14.371095080982698
Episode 6458 finished after 91 timesteps. Return = -210.75715136525326
Episode 

Episode 6565 finished after 79 timesteps. Return = -95.66770820572047
Episode 6566 finished after 99 timesteps. Return = -136.8354253569844
Episode 6567 finished after 104 timesteps. Return = -211.52693922684813
Episode 6568 finished after 90 timesteps. Return = -98.21819851895324
Episode 6569 finished after 116 timesteps. Return = -123.65859397995068
Episode 6570 finished after 96 timesteps. Return = -289.4318987139721
Episode 6571 finished after 106 timesteps. Return = -265.83123589521665
Episode 6572 finished after 70 timesteps. Return = -120.70869365311367
Episode 6573 finished after 82 timesteps. Return = -126.89711013284706
Episode 6574 finished after 97 timesteps. Return = -188.2232973685274
Episode 6575 finished after 69 timesteps. Return = -140.27332013798
Episode 6576 finished after 105 timesteps. Return = -285.44093911803543
Episode 6577 finished after 106 timesteps. Return = -203.46190041948074
Episode 6578 finished after 115 timesteps. Return = -333.6986800027821
Episode 6

Episode 6685 finished after 102 timesteps. Return = -72.6152438246027
Episode 6686 finished after 84 timesteps. Return = -147.38162154134744
Episode 6687 finished after 78 timesteps. Return = -118.84023277996833
Episode 6688 finished after 89 timesteps. Return = -258.6875125972321
Episode 6689 finished after 66 timesteps. Return = -135.59890797447747
Episode 6690 finished after 90 timesteps. Return = -60.991894705629704
Episode 6691 finished after 88 timesteps. Return = -276.3318243919126
Episode 6692 finished after 67 timesteps. Return = -191.44776836078668
Episode 6693 finished after 83 timesteps. Return = -255.49140749097583
Episode 6694 finished after 98 timesteps. Return = -52.78443284092991
Episode 6695 finished after 93 timesteps. Return = -100.33355824241006
Episode 6696 finished after 61 timesteps. Return = -110.64740588160996
Episode 6697 finished after 84 timesteps. Return = -187.56266400300845
Episode 6698 finished after 104 timesteps. Return = -155.25245693653773
Episode 6

Episode 6801 finished after 79 timesteps. Return = -13.298964887362246
Episode 6802 finished after 95 timesteps. Return = -123.47694008902792
Episode 6803 finished after 99 timesteps. Return = -356.4720373247308
Episode 6804 finished after 109 timesteps. Return = -266.4518091804186
Episode 6805 finished after 67 timesteps. Return = -132.14649866122423
Episode 6806 finished after 108 timesteps. Return = -370.79228446142423
Episode 6807 finished after 82 timesteps. Return = -177.39270076664855
Episode 6808 finished after 73 timesteps. Return = -60.21495909029163
Episode 6809 finished after 84 timesteps. Return = -159.3357381694874
Episode 6810 finished after 63 timesteps. Return = -95.2924852713851
Episode 6811 finished after 65 timesteps. Return = -79.58763134056213
Episode 6812 finished after 71 timesteps. Return = -90.56873063624035
Episode 6813 finished after 84 timesteps. Return = -121.7735649399428
Episode 6814 finished after 64 timesteps. Return = -144.16361895065467
Episode 6815 

Episode 6917 finished after 103 timesteps. Return = -114.26825488231792
Episode 6918 finished after 88 timesteps. Return = -266.4820716332828
Episode 6919 finished after 93 timesteps. Return = -269.480863412825
Episode 6920 finished after 66 timesteps. Return = -146.74661173327644
Episode 6921 finished after 82 timesteps. Return = -261.1703938610073
Episode 6922 finished after 60 timesteps. Return = -65.57074697492189
Episode 6923 finished after 76 timesteps. Return = -170.69520705350894
Episode 6924 finished after 99 timesteps. Return = -53.89797926182764
Episode 6925 finished after 95 timesteps. Return = -219.30706655539683
Episode 6926 finished after 102 timesteps. Return = -354.4348653016862
Episode 6927 finished after 109 timesteps. Return = -232.02363472721095
Episode 6928 finished after 83 timesteps. Return = -310.13871256488324
Episode 6929 finished after 65 timesteps. Return = -111.38104383312532
Episode 6930 finished after 86 timesteps. Return = -278.7742159252331
Episode 693

Episode 7035 finished after 65 timesteps. Return = -90.82883456820399
Episode 7036 finished after 110 timesteps. Return = -252.71790106745402
Episode 7037 finished after 112 timesteps. Return = -195.29246683355655
Episode 7038 finished after 88 timesteps. Return = -281.2188273516132
Episode 7039 finished after 76 timesteps. Return = -137.58894825235603
Episode 7040 finished after 68 timesteps. Return = -224.40377747887476
Episode 7041 finished after 66 timesteps. Return = -122.87016906147628
Episode 7042 finished after 56 timesteps. Return = -233.5296291867452
Episode 7043 finished after 86 timesteps. Return = -134.05900718228202
Episode 7044 finished after 100 timesteps. Return = -93.35605237462161
Episode 7045 finished after 91 timesteps. Return = -125.04950883984469
Episode 7046 finished after 75 timesteps. Return = -251.78991148515107
Episode 7047 finished after 77 timesteps. Return = -119.1788620635399
Episode 7048 finished after 89 timesteps. Return = -127.51994191072099
Episode 

Episode 7151 finished after 70 timesteps. Return = -109.62938665856088
Episode 7152 finished after 108 timesteps. Return = -519.3692716601724
Episode 7153 finished after 89 timesteps. Return = -130.64055473066134
Episode 7154 finished after 102 timesteps. Return = -119.86970904483337
Episode 7155 finished after 104 timesteps. Return = -249.1585780028101
Episode 7156 finished after 63 timesteps. Return = -203.88612591966518
Episode 7157 finished after 72 timesteps. Return = -210.93150940963008
Episode 7158 finished after 71 timesteps. Return = -78.83345166617335
Episode 7159 finished after 87 timesteps. Return = -108.33239907493933
Episode 7160 finished after 76 timesteps. Return = -259.79989326357435
Episode 7161 finished after 62 timesteps. Return = -190.59142170725056
Episode 7162 finished after 81 timesteps. Return = -66.0288203816859
Episode 7163 finished after 76 timesteps. Return = -61.05836949782628
Episode 7164 finished after 99 timesteps. Return = -434.7501767647521
Episode 71

Episode 7270 finished after 109 timesteps. Return = -114.48022765607658
Episode 7271 finished after 67 timesteps. Return = -181.9379972910636
Episode 7272 finished after 75 timesteps. Return = -46.200252062320274
Episode 7273 finished after 75 timesteps. Return = 13.065932924523878
Episode 7274 finished after 79 timesteps. Return = -321.5791122170428
Episode 7275 finished after 70 timesteps. Return = -91.50543675966776
Episode 7276 finished after 98 timesteps. Return = -189.4481497743713
Episode 7277 finished after 90 timesteps. Return = -281.0748504520974
Episode 7278 finished after 127 timesteps. Return = -50.15067687948294
Episode 7279 finished after 71 timesteps. Return = -103.71893153864993
Episode 7280 finished after 98 timesteps. Return = -0.42392752611273465
Episode 7281 finished after 108 timesteps. Return = -224.95887454539093
Episode 7282 finished after 103 timesteps. Return = -49.24986133477849
Episode 7283 finished after 112 timesteps. Return = -245.1897380212862
Episode 7

Episode 7391 finished after 70 timesteps. Return = -195.82850154275525
Episode 7392 finished after 76 timesteps. Return = -257.25521201928575
Episode 7393 finished after 81 timesteps. Return = -102.22419832086942
Episode 7394 finished after 103 timesteps. Return = -233.32628919481826
Episode 7395 finished after 105 timesteps. Return = -202.71230834939934
Episode 7396 finished after 96 timesteps. Return = -152.77587617083077
Episode 7397 finished after 104 timesteps. Return = -110.1657671818782
Episode 7398 finished after 67 timesteps. Return = -86.3984109959543
Episode 7399 finished after 92 timesteps. Return = -165.60607502661227
Episode 7400 finished after 77 timesteps. Return = -71.74535476645923
Episode 7401 finished after 81 timesteps. Return = -136.3147483408082
Episode 7402 finished after 116 timesteps. Return = -302.45074268677047
Episode 7403 finished after 104 timesteps. Return = -173.37108664077022
Episode 7404 finished after 80 timesteps. Return = 23.493647413719657
Episode

Episode 7507 finished after 95 timesteps. Return = -106.78358624080207
Episode 7508 finished after 107 timesteps. Return = -156.10916741911478
Episode 7509 finished after 107 timesteps. Return = -54.77711544692631
Episode 7510 finished after 118 timesteps. Return = -204.24115400773437
Episode 7511 finished after 77 timesteps. Return = -197.92238264292718
Episode 7512 finished after 95 timesteps. Return = -224.30862662826735
Episode 7513 finished after 63 timesteps. Return = -155.0401026167352
Episode 7514 finished after 67 timesteps. Return = -124.61057658166823
Episode 7515 finished after 77 timesteps. Return = -85.71796909174721
Episode 7516 finished after 86 timesteps. Return = -63.505956060182456
Episode 7517 finished after 84 timesteps. Return = -129.74800009778755
Episode 7518 finished after 91 timesteps. Return = -332.08207678392137
Episode 7519 finished after 86 timesteps. Return = -128.60927921284986
Episode 7520 finished after 76 timesteps. Return = -103.956695854061
Episode 

Episode 7625 finished after 62 timesteps. Return = -56.914354981227014
Episode 7626 finished after 83 timesteps. Return = -288.5110662847205
Episode 7627 finished after 102 timesteps. Return = -120.16575192024771
Episode 7628 finished after 135 timesteps. Return = -339.068232914149
Episode 7629 finished after 1000 timesteps. Return = 85.71547066698545
Episode 7630 finished after 147 timesteps. Return = -220.83167910700172
Episode 7631 finished after 61 timesteps. Return = -81.07777695360049
Episode 7632 finished after 80 timesteps. Return = -91.1310897924687
Episode 7633 finished after 102 timesteps. Return = -121.85856698529551
Episode 7634 finished after 86 timesteps. Return = -57.556546719251514
Episode 7635 finished after 99 timesteps. Return = -44.36585358272285
Episode 7636 finished after 62 timesteps. Return = -83.92062988650636
Episode 7637 finished after 85 timesteps. Return = -45.699530051854
Episode 7638 finished after 106 timesteps. Return = -141.6699128420679
Episode 7639 

Episode 7746 finished after 81 timesteps. Return = -10.167288469821798
Episode 7747 finished after 92 timesteps. Return = -221.21334380208918
Episode 7748 finished after 100 timesteps. Return = -218.7185396277879
Episode 7749 finished after 88 timesteps. Return = -288.220671104529
Episode 7750 finished after 112 timesteps. Return = -135.03186821729224
Episode 7751 finished after 85 timesteps. Return = -329.2360039163326
Episode 7752 finished after 82 timesteps. Return = -175.4858903744052
Episode 7753 finished after 73 timesteps. Return = 31.77205170295838
Episode 7754 finished after 106 timesteps. Return = -305.0183411811089
Episode 7755 finished after 82 timesteps. Return = -182.55605957652423
Episode 7756 finished after 67 timesteps. Return = -106.97560974460946
Episode 7757 finished after 91 timesteps. Return = -75.36280157421818
Episode 7758 finished after 64 timesteps. Return = -103.45895430413672
Episode 7759 finished after 67 timesteps. Return = -189.36935568753606
Episode 7760

Episode 7864 finished after 81 timesteps. Return = -129.94598658360067
Episode 7865 finished after 93 timesteps. Return = -142.38836349892517
Episode 7866 finished after 78 timesteps. Return = -107.14513339448101
Episode 7867 finished after 66 timesteps. Return = -75.77710294051482
Episode 7868 finished after 86 timesteps. Return = -297.5726871265225
Episode 7869 finished after 86 timesteps. Return = -167.2688324747018
Episode 7870 finished after 87 timesteps. Return = -274.3813198265285
Episode 7871 finished after 110 timesteps. Return = -105.7700103368636
Episode 7872 finished after 93 timesteps. Return = -173.86902113822708
Episode 7873 finished after 85 timesteps. Return = -232.59437160091426
Episode 7874 finished after 99 timesteps. Return = -211.74987311726807
Episode 7875 finished after 67 timesteps. Return = -122.86475978906338
Episode 7876 finished after 73 timesteps. Return = -177.08367096798952
Episode 7877 finished after 108 timesteps. Return = 5.020546829607298
Episode 787

Episode 7984 finished after 108 timesteps. Return = -165.85045383364553
Episode 7985 finished after 86 timesteps. Return = -113.70188069294645
Episode 7986 finished after 101 timesteps. Return = -327.77276234950784
Episode 7987 finished after 71 timesteps. Return = -218.16652553309166
Episode 7988 finished after 86 timesteps. Return = -89.39472034649586
Episode 7989 finished after 115 timesteps. Return = -349.3605362481923
Episode 7990 finished after 52 timesteps. Return = -164.24945604794505
Episode 7991 finished after 66 timesteps. Return = -118.57202958408416
Episode 7992 finished after 86 timesteps. Return = -312.63414746888
Episode 7993 finished after 98 timesteps. Return = -195.63548159471213
Episode 7994 finished after 108 timesteps. Return = -354.1497473533867
Episode 7995 finished after 68 timesteps. Return = -90.21186938360049
Episode 7996 finished after 72 timesteps. Return = -40.54747870984829
Episode 7997 finished after 76 timesteps. Return = -299.4564515376719
Episode 799

Episode 8100 finished after 68 timesteps. Return = -130.51780323061075
Episode 8101 finished after 111 timesteps. Return = -105.51391514611805
Episode 8102 finished after 71 timesteps. Return = -140.1280090361555
Episode 8103 finished after 96 timesteps. Return = -152.81653356899432
Episode 8104 finished after 81 timesteps. Return = 33.879370769094464
Episode 8105 finished after 116 timesteps. Return = -268.1383339472542
Episode 8106 finished after 82 timesteps. Return = -143.3065617268661
Episode 8107 finished after 105 timesteps. Return = -339.58542212663133
Episode 8108 finished after 88 timesteps. Return = -46.86156366539129
Episode 8109 finished after 116 timesteps. Return = -460.71063240469095
Episode 8110 finished after 90 timesteps. Return = -313.34068172373463
Episode 8111 finished after 82 timesteps. Return = -175.87474614796406
Episode 8112 finished after 96 timesteps. Return = -273.9965756229726
Episode 8113 finished after 109 timesteps. Return = -91.40001217951831
Episode 

Episode 8219 finished after 111 timesteps. Return = -220.17739695429947
Episode 8220 finished after 99 timesteps. Return = -60.635919144892895
Episode 8221 finished after 68 timesteps. Return = -135.15084513003848
Episode 8222 finished after 76 timesteps. Return = -259.47933968012165
Episode 8223 finished after 94 timesteps. Return = -85.01844273459704
Episode 8224 finished after 113 timesteps. Return = -226.2031438413032
Episode 8225 finished after 110 timesteps. Return = -214.98977216416884
Episode 8226 finished after 114 timesteps. Return = -248.67552195761004
Episode 8227 finished after 111 timesteps. Return = -320.51403425877015
Episode 8228 finished after 116 timesteps. Return = -160.7105108365243
Episode 8229 finished after 127 timesteps. Return = -155.9028643400734
Episode 8230 finished after 88 timesteps. Return = -290.56020984583154
Episode 8231 finished after 89 timesteps. Return = -233.1322519008971
Episode 8232 finished after 81 timesteps. Return = -121.94238660317387
Epis

Episode 8336 finished after 86 timesteps. Return = -7.204838295403846
Episode 8337 finished after 115 timesteps. Return = -162.88561711804826
Episode 8338 finished after 88 timesteps. Return = -142.62672407406222
Episode 8339 finished after 96 timesteps. Return = -137.81697642714923
Episode 8340 finished after 103 timesteps. Return = -266.704908593823
Episode 8341 finished after 98 timesteps. Return = -328.53279534945716
Episode 8342 finished after 93 timesteps. Return = -135.84372915631803
Episode 8343 finished after 84 timesteps. Return = -105.96545400977016
Episode 8344 finished after 118 timesteps. Return = -209.79952163639564
Episode 8345 finished after 112 timesteps. Return = -189.4788866183386
Episode 8346 finished after 78 timesteps. Return = -59.452403806477754
Episode 8347 finished after 108 timesteps. Return = -159.4304188306587
Episode 8348 finished after 100 timesteps. Return = -90.53918610993436
Episode 8349 finished after 114 timesteps. Return = -236.0312419151927
Episod

Episode 8456 finished after 88 timesteps. Return = -119.07438453019154
Episode 8457 finished after 96 timesteps. Return = -360.09684132189966
Episode 8458 finished after 106 timesteps. Return = -79.87491056408308
Episode 8459 finished after 66 timesteps. Return = -119.89952183105504
Episode 8460 finished after 85 timesteps. Return = -103.12469917643202
Episode 8461 finished after 68 timesteps. Return = -107.02372656958245
Episode 8462 finished after 85 timesteps. Return = -110.9669436879623
Episode 8463 finished after 113 timesteps. Return = -260.8265586851291
Episode 8464 finished after 86 timesteps. Return = -105.91547861894688
Episode 8465 finished after 103 timesteps. Return = -134.27630861957005
Episode 8466 finished after 71 timesteps. Return = -45.693048768243685
Episode 8467 finished after 119 timesteps. Return = -0.9981762948736446
Episode 8468 finished after 95 timesteps. Return = -308.6260696678245
Episode 8469 finished after 103 timesteps. Return = -115.19211477513677
Episo

Episode 8576 finished after 78 timesteps. Return = -144.74454415458175
Episode 8577 finished after 107 timesteps. Return = -115.7957392246855
Episode 8578 finished after 109 timesteps. Return = -320.97670941323616
Episode 8579 finished after 75 timesteps. Return = -90.52394261271468
Episode 8580 finished after 112 timesteps. Return = -153.76958977074318
Episode 8581 finished after 76 timesteps. Return = -301.4134322608382
Episode 8582 finished after 71 timesteps. Return = -98.9234905599405
Episode 8583 finished after 81 timesteps. Return = -80.99308065129568
Episode 8584 finished after 92 timesteps. Return = -78.92904591147081
Episode 8585 finished after 71 timesteps. Return = -71.58634359091108
Episode 8586 finished after 81 timesteps. Return = -291.78058331143154
Episode 8587 finished after 120 timesteps. Return = -222.60670318215517
Episode 8588 finished after 82 timesteps. Return = -46.569637587909234
Episode 8589 finished after 87 timesteps. Return = -142.09994384567221
Episode 85

Episode 8693 finished after 75 timesteps. Return = -149.9383221517701
Episode 8694 finished after 86 timesteps. Return = -198.82405917096264
Episode 8695 finished after 73 timesteps. Return = -93.94369421116066
Episode 8696 finished after 129 timesteps. Return = -26.065650900002794
Episode 8697 finished after 84 timesteps. Return = -272.3654118738863
Episode 8698 finished after 91 timesteps. Return = -247.79897612465493
Episode 8699 finished after 115 timesteps. Return = -204.9933403233618
Episode 8700 finished after 91 timesteps. Return = -255.1854641641384
Episode 8701 finished after 100 timesteps. Return = -304.81181593117043
Episode 8702 finished after 64 timesteps. Return = -67.54859991247113
Episode 8703 finished after 71 timesteps. Return = -60.8093725676073
Episode 8704 finished after 58 timesteps. Return = -150.11811278617108
Episode 8705 finished after 65 timesteps. Return = -167.59294243825224
Episode 8706 finished after 64 timesteps. Return = -134.6046670896147
Episode 8707

Episode 8813 finished after 94 timesteps. Return = -372.56115628775206
Episode 8814 finished after 74 timesteps. Return = -172.60208511159772
Episode 8815 finished after 99 timesteps. Return = -115.80261372847407
Episode 8816 finished after 83 timesteps. Return = -371.71873037046214
Episode 8817 finished after 116 timesteps. Return = -151.83960371300995
Episode 8818 finished after 82 timesteps. Return = -89.4951963508708
Episode 8819 finished after 82 timesteps. Return = -103.85674590624006
Episode 8820 finished after 84 timesteps. Return = -408.12215174069996
Episode 8821 finished after 76 timesteps. Return = -172.6389425044049
Episode 8822 finished after 89 timesteps. Return = -300.6194361277378
Episode 8823 finished after 99 timesteps. Return = -224.54003061398603
Episode 8824 finished after 77 timesteps. Return = -127.12691976427014
Episode 8825 finished after 82 timesteps. Return = -108.35134048298809
Episode 8826 finished after 114 timesteps. Return = -187.41537131579167
Episode 

Episode 8929 finished after 114 timesteps. Return = -303.69517384949535
Episode 8930 finished after 108 timesteps. Return = -106.7201468698327
Episode 8931 finished after 76 timesteps. Return = -153.49427146652133
Episode 8932 finished after 65 timesteps. Return = -238.59425791885232
Episode 8933 finished after 79 timesteps. Return = -411.05349804789995
Episode 8934 finished after 65 timesteps. Return = -151.6623086080145
Episode 8935 finished after 86 timesteps. Return = -149.17289878788552
Episode 8936 finished after 76 timesteps. Return = -266.7494451624764
Episode 8937 finished after 71 timesteps. Return = -95.66774842845689
Episode 8938 finished after 78 timesteps. Return = -167.29950654961488
Episode 8939 finished after 96 timesteps. Return = -237.24196530655237
Episode 8940 finished after 92 timesteps. Return = -88.05405621973458
Episode 8941 finished after 108 timesteps. Return = -117.15053622097781
Episode 8942 finished after 60 timesteps. Return = -95.28488868299416
Episode 8

Episode 9051 finished after 69 timesteps. Return = -149.0514054223267
Episode 9052 finished after 72 timesteps. Return = -120.38511853215589
Episode 9053 finished after 93 timesteps. Return = -126.46984273973888
Episode 9054 finished after 81 timesteps. Return = -208.11592636209156
Episode 9055 finished after 102 timesteps. Return = -255.8089346633006
Episode 9056 finished after 143 timesteps. Return = 62.38308178507492
Episode 9057 finished after 100 timesteps. Return = -179.35330030294458
Episode 9058 finished after 77 timesteps. Return = -319.1317681191654
Episode 9059 finished after 80 timesteps. Return = -244.49575369803676
Episode 9060 finished after 77 timesteps. Return = -131.84153702286915
Episode 9061 finished after 82 timesteps. Return = -271.8914436764681
Episode 9062 finished after 88 timesteps. Return = -119.59823023938752
Episode 9063 finished after 103 timesteps. Return = -232.63176669914606
Episode 9064 finished after 119 timesteps. Return = -342.3216207599368
Episode 

Episode 9168 finished after 88 timesteps. Return = -345.7105020006893
Episode 9169 finished after 72 timesteps. Return = -270.7954157015381
Episode 9170 finished after 63 timesteps. Return = -142.8543372085508
Episode 9171 finished after 91 timesteps. Return = -203.5404249604198
Episode 9172 finished after 75 timesteps. Return = -184.31924902769487
Episode 9173 finished after 102 timesteps. Return = -379.7888050195163
Episode 9174 finished after 109 timesteps. Return = -324.7159563880894
Episode 9175 finished after 100 timesteps. Return = -167.05637145847976
Episode 9176 finished after 84 timesteps. Return = -167.01981610320348
Episode 9177 finished after 101 timesteps. Return = -393.2209986356503
Episode 9178 finished after 91 timesteps. Return = -200.11210439301937
Episode 9179 finished after 108 timesteps. Return = -249.67261991568094
Episode 9180 finished after 121 timesteps. Return = -335.66865734305384
Episode 9181 finished after 86 timesteps. Return = -186.11785983818976
Episode

Episode 9284 finished after 72 timesteps. Return = -144.792015983257
Episode 9285 finished after 104 timesteps. Return = -247.4480451606168
Episode 9286 finished after 117 timesteps. Return = -201.32458015031412
Episode 9287 finished after 67 timesteps. Return = -63.35009821086292
Episode 9288 finished after 85 timesteps. Return = -380.60103070778354
Episode 9289 finished after 66 timesteps. Return = -222.03613230683385
Episode 9290 finished after 106 timesteps. Return = -294.4250582185165
Episode 9291 finished after 110 timesteps. Return = -106.78809583953782
Episode 9292 finished after 66 timesteps. Return = -75.83815825340363
Episode 9293 finished after 101 timesteps. Return = -63.321684931651745
Episode 9294 finished after 66 timesteps. Return = -91.72300889614272
Episode 9295 finished after 64 timesteps. Return = -289.8092089388805
Episode 9296 finished after 107 timesteps. Return = -331.14369257051953
Episode 9297 finished after 77 timesteps. Return = -411.87347389650745
Episode 

Episode 9401 finished after 89 timesteps. Return = -199.6259930089319
Episode 9402 finished after 101 timesteps. Return = -24.558527052783987
Episode 9403 finished after 72 timesteps. Return = -79.02540795273163
Episode 9404 finished after 127 timesteps. Return = -94.33967186200333
Episode 9405 finished after 91 timesteps. Return = -300.8583004492606
Episode 9406 finished after 63 timesteps. Return = -300.92796102671065
Episode 9407 finished after 78 timesteps. Return = 20.776425036497002
Episode 9408 finished after 71 timesteps. Return = -53.293463631889196
Episode 9409 finished after 89 timesteps. Return = -316.1723540158431
Episode 9410 finished after 97 timesteps. Return = -113.4353111496131
Episode 9411 finished after 92 timesteps. Return = -223.6988731917566
Episode 9412 finished after 70 timesteps. Return = -131.2966115886166
Episode 9413 finished after 85 timesteps. Return = -197.54657794191587
Episode 9414 finished after 122 timesteps. Return = -220.94542921811905
Episode 9415

Episode 9518 finished after 105 timesteps. Return = -297.3044002628866
Episode 9519 finished after 105 timesteps. Return = -221.62018486062877
Episode 9520 finished after 71 timesteps. Return = -21.98032167093926
Episode 9521 finished after 80 timesteps. Return = -180.84705668706906
Episode 9522 finished after 109 timesteps. Return = -228.68865129185505
Episode 9523 finished after 122 timesteps. Return = -43.580069854602876
Episode 9524 finished after 94 timesteps. Return = -96.54484436974177
Episode 9525 finished after 73 timesteps. Return = -97.61487534064207
Episode 9526 finished after 75 timesteps. Return = -204.37299806773626
Episode 9527 finished after 75 timesteps. Return = -218.9321828058094
Episode 9528 finished after 116 timesteps. Return = -238.60377020104082
Episode 9529 finished after 106 timesteps. Return = -312.3938904368941
Episode 9530 finished after 94 timesteps. Return = -328.7235551279108
Episode 9531 finished after 90 timesteps. Return = -248.81463864459474
Episode

Episode 9635 finished after 142 timesteps. Return = -241.91231385808405
Episode 9636 finished after 85 timesteps. Return = -123.47426431710505
Episode 9637 finished after 95 timesteps. Return = -50.82806359199394
Episode 9638 finished after 105 timesteps. Return = -139.6229518702737
Episode 9639 finished after 91 timesteps. Return = -297.0264624971229
Episode 9640 finished after 62 timesteps. Return = -77.62290582129717
Episode 9641 finished after 64 timesteps. Return = -17.066318800313056
Episode 9642 finished after 94 timesteps. Return = -110.19838582125792
Episode 9643 finished after 112 timesteps. Return = -128.43817506491666
Episode 9644 finished after 84 timesteps. Return = -149.0600231040704
Episode 9645 finished after 96 timesteps. Return = -172.77130701662566
Episode 9646 finished after 116 timesteps. Return = -138.50068477879609
Episode 9647 finished after 96 timesteps. Return = -226.52658206543487
Episode 9648 finished after 89 timesteps. Return = -109.33551575240304
Episode

Episode 9751 finished after 142 timesteps. Return = -360.44930964313625
Episode 9752 finished after 111 timesteps. Return = -195.75311942828208
Episode 9753 finished after 114 timesteps. Return = -310.71797779666844
Episode 9754 finished after 73 timesteps. Return = -96.58392988919073
Episode 9755 finished after 109 timesteps. Return = -402.4058521715597
Episode 9756 finished after 70 timesteps. Return = -128.03885848629437
Episode 9757 finished after 100 timesteps. Return = -198.51776296837443
Episode 9758 finished after 113 timesteps. Return = -273.5842298508689
Episode 9759 finished after 106 timesteps. Return = -445.6645253639239
Episode 9760 finished after 105 timesteps. Return = -284.8840547994311
Episode 9761 finished after 96 timesteps. Return = -254.6653615364259
Episode 9762 finished after 123 timesteps. Return = -80.44274903633713
Episode 9763 finished after 118 timesteps. Return = -108.89432966596368
Episode 9764 finished after 75 timesteps. Return = -116.07924280403756
Epi

Episode 9868 finished after 131 timesteps. Return = -143.07278581323303
Episode 9869 finished after 74 timesteps. Return = -339.4610543398355
Episode 9870 finished after 106 timesteps. Return = -225.5375282340899
Episode 9871 finished after 97 timesteps. Return = -226.76840468081497
Episode 9872 finished after 85 timesteps. Return = -137.257558096656
Episode 9873 finished after 109 timesteps. Return = -249.26557669510598
Episode 9874 finished after 122 timesteps. Return = -197.6179832976929
Episode 9875 finished after 101 timesteps. Return = 13.3980836308004
Episode 9876 finished after 141 timesteps. Return = -143.9067484106221
Episode 9877 finished after 102 timesteps. Return = -238.30662587811267
Episode 9878 finished after 69 timesteps. Return = -215.27562357094166
Episode 9879 finished after 80 timesteps. Return = -213.8965717769538
Episode 9880 finished after 81 timesteps. Return = -115.3131881237374
Episode 9881 finished after 96 timesteps. Return = -156.24183311188773
Episode 98

Episode 9985 finished after 89 timesteps. Return = -269.6874420131308
Episode 9986 finished after 103 timesteps. Return = -258.87468592452706
Episode 9987 finished after 99 timesteps. Return = -267.45285728571906
Episode 9988 finished after 116 timesteps. Return = -200.00267450500897
Episode 9989 finished after 68 timesteps. Return = -162.96908164544655
Episode 9990 finished after 96 timesteps. Return = -213.10849139579992
Episode 9991 finished after 58 timesteps. Return = -105.2295669827138
Episode 9992 finished after 67 timesteps. Return = -74.66782077195978
Episode 9993 finished after 77 timesteps. Return = -60.799820107629
Episode 9994 finished after 104 timesteps. Return = -103.97948885818066
Episode 9995 finished after 113 timesteps. Return = -122.14557350126921
Episode 9996 finished after 115 timesteps. Return = -200.33782037722818
Episode 9997 finished after 77 timesteps. Return = -72.74117889930467
Episode 9998 finished after 98 timesteps. Return = 28.336244772735967
Episode 9

Episode 10101 finished after 78 timesteps. Return = -253.9123297690235
Episode 10102 finished after 98 timesteps. Return = -108.68588536881725
Episode 10103 finished after 109 timesteps. Return = -276.3130128492782
Episode 10104 finished after 73 timesteps. Return = -137.2722507679424
Episode 10105 finished after 74 timesteps. Return = -209.2157125644
Episode 10106 finished after 86 timesteps. Return = -88.7444603670404
Episode 10107 finished after 77 timesteps. Return = -356.52679714941866
Episode 10108 finished after 138 timesteps. Return = -220.45139805186864
Episode 10109 finished after 107 timesteps. Return = -79.68661487775253
Episode 10110 finished after 121 timesteps. Return = -276.6277944031742
Episode 10111 finished after 90 timesteps. Return = -130.78561267113594
Episode 10112 finished after 67 timesteps. Return = -98.1422718651761
Episode 10113 finished after 64 timesteps. Return = -158.65167193325232
Episode 10114 finished after 72 timesteps. Return = -217.62920000370764
E

Episode 10217 finished after 103 timesteps. Return = -225.2060862938422
Episode 10218 finished after 116 timesteps. Return = -223.32965744676562
Episode 10219 finished after 73 timesteps. Return = -178.30485527539557
Episode 10220 finished after 68 timesteps. Return = -173.2401215020153
Episode 10221 finished after 102 timesteps. Return = -193.70920019630262
Episode 10222 finished after 96 timesteps. Return = -250.47192846547958
Episode 10223 finished after 85 timesteps. Return = -274.8602768315085
Episode 10224 finished after 64 timesteps. Return = -210.3944776394335
Episode 10225 finished after 93 timesteps. Return = -432.00716798769065
Episode 10226 finished after 106 timesteps. Return = -261.9569655213727
Episode 10227 finished after 120 timesteps. Return = -204.1786505505013
Episode 10228 finished after 123 timesteps. Return = -222.61933816956827
Episode 10229 finished after 102 timesteps. Return = -196.49482821373385
Episode 10230 finished after 78 timesteps. Return = -267.295428

Episode 10333 finished after 113 timesteps. Return = -260.2838687874207
Episode 10334 finished after 110 timesteps. Return = -235.0067838682809
Episode 10335 finished after 77 timesteps. Return = -96.25937692869358
Episode 10336 finished after 102 timesteps. Return = -238.38710736346502
Episode 10337 finished after 74 timesteps. Return = -223.8691308182398
Episode 10338 finished after 75 timesteps. Return = -226.56699931855235
Episode 10339 finished after 147 timesteps. Return = -243.79549509325182
Episode 10340 finished after 110 timesteps. Return = 30.37227483745673
Episode 10341 finished after 105 timesteps. Return = -119.33070727404453
Episode 10342 finished after 73 timesteps. Return = -253.1118530820678
Episode 10343 finished after 112 timesteps. Return = -173.84977284027468
Episode 10344 finished after 107 timesteps. Return = -149.31891424307548
Episode 10345 finished after 82 timesteps. Return = -231.3517291713811
Episode 10346 finished after 90 timesteps. Return = -287.0110114

Episode 10448 finished after 115 timesteps. Return = -289.59448611566364
Episode 10449 finished after 70 timesteps. Return = -85.0903009184932
Episode 10450 finished after 64 timesteps. Return = -115.96613238408956
Episode 10451 finished after 108 timesteps. Return = -176.49511329948885
Episode 10452 finished after 123 timesteps. Return = -171.17851497414946
Episode 10453 finished after 143 timesteps. Return = -123.82929111347866
Episode 10454 finished after 86 timesteps. Return = -205.61433903770478
Episode 10455 finished after 82 timesteps. Return = -193.132453877897
Episode 10456 finished after 76 timesteps. Return = -140.6871474685144
Episode 10457 finished after 93 timesteps. Return = -150.9912237001328
Episode 10458 finished after 127 timesteps. Return = -37.23500982957983
Episode 10459 finished after 75 timesteps. Return = -85.85557685041854
Episode 10460 finished after 73 timesteps. Return = -221.7324207450483
Episode 10461 finished after 108 timesteps. Return = -107.3159184794

KeyboardInterrupt: 

In [51]:
#Visualise final trained agent
visualise_agent(greedy_policy, command=[400, 400], command_scale=command_scale, n=3)

Episode 0 finished after 125 timesteps. Return = 125.0
