In [115]:
import gym
env = gym.make('CartPole-v1')

In [116]:
import numpy as np
def random_policy(obs):
    return np.random.randint(env.action_space.n)

In [117]:
import time
from copy import deepcopy
#Visualise agent function
def visualise_agent(policy, command, n=5):
    try:
        for trial_i in range(n):
            current_command = deepcopy(command)
            observation = env.reset()
            done=False
            t=0
            episode_return=0
            while not done:
                env.render()
                network_input = torch.tensor(np.append(observation, current_command)).double()
                policy_action = policy(network_input)
                observation, reward, done, info = env.step(policy_action)
                episode_return+=reward
                #time.sleep(0.1)
                current_command[0]-=reward
                current_command[1] = max(1, current_command[1]-1)
                
                t+=1
            env.render()
            time.sleep(1.5)
            print("Episode {} finished after {} timesteps. Return = {}".format(trial_i, t, episode_return))
        env.close()
    except KeyboardInterrupt:
        env.close()

In [118]:
#visualise_agent(random_policy, command=[500, 500], 1)

In [174]:
import torch
import torch.nn.functional as F

class FCNN_AGENT(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(np.prod(env.observation_space.shape)+2, 10),
            torch.nn.ReLU(),
            torch.nn.Linear(10, 10),
            torch.nn.ReLU(),
            torch.nn.Linear(10, env.action_space.n)
        )
    
    def forward(self, x):
        x = self.fc(x)
        return F.softmax(x, dim=-1)
    
    def create_optimizer(self, lr):
        self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)

In [175]:
from copy import deepcopy
def collect_experience(policy, replay_buffer, replay_size, last_few, n_episodes=100):
    global i_episode
    init_replay_buffer = deepcopy(replay_buffer)
    try:
        for _ in range(n_episodes):
            command = sample_command(init_replay_buffer, last_few)
            writer.add_scalar('Command desired reward', command[0], i_episode)    # write loss to a graph
            writer.add_scalar('Command horizon', command[1], i_episode)    # write loss to a graph
            observation = env.reset()
            episode_mem = {'observation':[],
                            'action':[],
                            'reward':[]}
            done=False
            while not done:
                network_input = torch.tensor(np.append(observation, command)).double()
                action = policy(network_input)
                new_observation, reward, done, info = env.step(action)
                
                episode_mem['observation'].append(observation)
                episode_mem['action'].append(action)
                episode_mem['reward'].append(reward)
                
                observation=new_observation
                #command[0]-= reward
                command[0] = max(1, command[0]-reward)
                command[1] = max(1, command[1]-1)
            episode_mem['return']=sum(episode_mem['reward'])
            episode_mem['episode_len']=len(episode_mem['observation'])
            replay_buffer.append(episode_mem)
            i_episode+=1
            writer.add_scalar('Return', sum(episode_mem['reward']), i_episode)    # write loss to a graph
            print("Episode {} finished after {} timesteps. Return = {}".format(i_episode, len(episode_mem['observation']), sum(episode_mem['reward'])))
        env.close()
    except KeyboardInterrupt:
        env.close()
    replay_buffer = sorted(replay_buffer, key=lambda x:x['return'])[-replay_size:]
    return replay_buffer

def sample_command(replay_buffer, last_few):
    if len(replay_buffer)==0:
        return [1, 1]
    else:
        command_samples = replay_buffer[-last_few:]
        lengths = [mem['episode_len'] for mem in command_samples]
        returns = [mem['return'] for mem in command_samples]
        mean_return, std_return = np.mean(returns), np.std(returns)
        command_horizon = np.mean(lengths)
        desired_reward = np.random.uniform(mean_return, mean_return+std_return)
        return [desired_reward, command_horizon]

In [176]:
def train_net(policy_net, replay_buffer, n_updates=100, batch_size=64):
    all_costs = []
    for i in range(n_updates):
        batch_input = np.zeros((batch_size, np.prod(env.observation_space.shape)+2))
        batch_label = np.zeros((batch_size))
        for b in range(batch_size):
            sample_episode = np.random.randint(0, len(replay_buffer))
            sample_t1 = np.random.randint(0, len(replay_buffer[sample_episode]['observation']))
            sample_t2 = len(replay_buffer[sample_episode]['observation'])
            sample_horizon = sample_t2-sample_t1
            sample_mem = replay_buffer[sample_episode]['observation'][sample_t1]
            sample_desired_reward = sum(replay_buffer[sample_episode]['reward'][sample_t1:sample_t2])
            network_input = np.append(sample_mem, [sample_desired_reward, sample_horizon])
            label = replay_buffer[sample_episode]['action'][sample_t1]
            batch_input[b] = network_input
            batch_label[b] = label
        batch_input = torch.tensor(batch_input).double()
        batch_label = torch.tensor(batch_label).long()
        pred = policy_net(batch_input)
        cost = F.cross_entropy(pred, batch_label)
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
    return np.mean(all_costs)

In [177]:
def create_greedy_policy(policy_network):
    def policy(obs):
        action_prob = policy_network(obs)
        action = np.argmax(action_prob.detach().numpy())
        return action
    return policy

def create_stochastic_policy(policy_network):
    def policy(obs):
        action_prob = policy_network(obs)
        action_sample = np.random.multinomial(1, action_prob.detach().numpy())
        action = np.argmax(action_sample)
        return action
    return policy

In [178]:
i_episode=0
replay_buffer = []
replay_size = 600
last_few = 50

batch_size = 32
n_warm_up_episodes = 50
n_episodes_per_iter = 50
n_updates_per_iter = 200

lr = 0.001
agent = FCNN_AGENT().double()
agent.create_optimizer(lr)

stochastic_policy = create_stochastic_policy(agent)
greedy_policy = create_greedy_policy(agent)

In [179]:
# SET UP TRAINING VISUALISATION
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter() # we will use this to show our models performance on a graph

In [180]:
#Collect warm up episodes
replay_buffer = collect_experience(random_policy, replay_buffer, replay_size, last_few, n_warm_up_episodes)
train_net(agent, replay_buffer, n_updates=n_updates_per_iter, batch_size=batch_size)

Episode 1 finished after 28 timesteps. Return = 28.0
Episode 2 finished after 14 timesteps. Return = 14.0
Episode 3 finished after 14 timesteps. Return = 14.0
Episode 4 finished after 15 timesteps. Return = 15.0
Episode 5 finished after 16 timesteps. Return = 16.0
Episode 6 finished after 15 timesteps. Return = 15.0
Episode 7 finished after 10 timesteps. Return = 10.0
Episode 8 finished after 10 timesteps. Return = 10.0
Episode 9 finished after 51 timesteps. Return = 51.0
Episode 10 finished after 25 timesteps. Return = 25.0
Episode 11 finished after 25 timesteps. Return = 25.0
Episode 12 finished after 24 timesteps. Return = 24.0
Episode 13 finished after 17 timesteps. Return = 17.0
Episode 14 finished after 13 timesteps. Return = 13.0
Episode 15 finished after 11 timesteps. Return = 11.0
Episode 16 finished after 18 timesteps. Return = 18.0
Episode 17 finished after 16 timesteps. Return = 16.0
Episode 18 finished after 18 timesteps. Return = 18.0
Episode 19 finished after 14 timestep

0.6908077066139745

In [181]:
#agent.load_state_dict(torch.load('checkpoints/lunar_lander_32x32_checkpoint_0.pt'))

In [182]:
n_iters = 10000
for i in range(n_iters):
    replay_buffer = collect_experience(stochastic_policy, replay_buffer, replay_size, last_few, n_episodes_per_iter)
    train_net(agent, replay_buffer, n_updates=n_updates_per_iter, batch_size=batch_size)

Episode 51 finished after 18 timesteps. Return = 18.0
Episode 52 finished after 20 timesteps. Return = 20.0
Episode 53 finished after 17 timesteps. Return = 17.0
Episode 54 finished after 15 timesteps. Return = 15.0
Episode 55 finished after 14 timesteps. Return = 14.0
Episode 56 finished after 12 timesteps. Return = 12.0
Episode 57 finished after 14 timesteps. Return = 14.0
Episode 58 finished after 29 timesteps. Return = 29.0
Episode 59 finished after 11 timesteps. Return = 11.0
Episode 60 finished after 23 timesteps. Return = 23.0
Episode 61 finished after 21 timesteps. Return = 21.0
Episode 62 finished after 13 timesteps. Return = 13.0
Episode 63 finished after 13 timesteps. Return = 13.0
Episode 64 finished after 19 timesteps. Return = 19.0
Episode 65 finished after 15 timesteps. Return = 15.0
Episode 66 finished after 15 timesteps. Return = 15.0
Episode 67 finished after 10 timesteps. Return = 10.0
Episode 68 finished after 20 timesteps. Return = 20.0
Episode 69 finished after 11

Episode 251 finished after 9 timesteps. Return = 9.0
Episode 252 finished after 8 timesteps. Return = 8.0
Episode 253 finished after 13 timesteps. Return = 13.0
Episode 254 finished after 12 timesteps. Return = 12.0
Episode 255 finished after 9 timesteps. Return = 9.0
Episode 256 finished after 13 timesteps. Return = 13.0
Episode 257 finished after 9 timesteps. Return = 9.0
Episode 258 finished after 11 timesteps. Return = 11.0
Episode 259 finished after 9 timesteps. Return = 9.0
Episode 260 finished after 9 timesteps. Return = 9.0
Episode 261 finished after 10 timesteps. Return = 10.0
Episode 262 finished after 10 timesteps. Return = 10.0
Episode 263 finished after 8 timesteps. Return = 8.0
Episode 264 finished after 11 timesteps. Return = 11.0
Episode 265 finished after 12 timesteps. Return = 12.0
Episode 266 finished after 9 timesteps. Return = 9.0
Episode 267 finished after 9 timesteps. Return = 9.0
Episode 268 finished after 14 timesteps. Return = 14.0
Episode 269 finished after 1

Episode 449 finished after 10 timesteps. Return = 10.0
Episode 450 finished after 8 timesteps. Return = 8.0
Episode 451 finished after 8 timesteps. Return = 8.0
Episode 452 finished after 10 timesteps. Return = 10.0
Episode 453 finished after 9 timesteps. Return = 9.0
Episode 454 finished after 11 timesteps. Return = 11.0
Episode 455 finished after 9 timesteps. Return = 9.0
Episode 456 finished after 10 timesteps. Return = 10.0
Episode 457 finished after 8 timesteps. Return = 8.0
Episode 458 finished after 9 timesteps. Return = 9.0
Episode 459 finished after 11 timesteps. Return = 11.0
Episode 460 finished after 9 timesteps. Return = 9.0
Episode 461 finished after 9 timesteps. Return = 9.0
Episode 462 finished after 10 timesteps. Return = 10.0
Episode 463 finished after 13 timesteps. Return = 13.0
Episode 464 finished after 9 timesteps. Return = 9.0
Episode 465 finished after 8 timesteps. Return = 8.0
Episode 466 finished after 10 timesteps. Return = 10.0
Episode 467 finished after 12 

Episode 601 finished after 16 timesteps. Return = 16.0
Episode 602 finished after 8 timesteps. Return = 8.0
Episode 603 finished after 15 timesteps. Return = 15.0
Episode 604 finished after 15 timesteps. Return = 15.0
Episode 605 finished after 10 timesteps. Return = 10.0
Episode 606 finished after 9 timesteps. Return = 9.0
Episode 607 finished after 18 timesteps. Return = 18.0
Episode 608 finished after 24 timesteps. Return = 24.0
Episode 609 finished after 10 timesteps. Return = 10.0
Episode 610 finished after 10 timesteps. Return = 10.0
Episode 611 finished after 12 timesteps. Return = 12.0
Episode 612 finished after 8 timesteps. Return = 8.0
Episode 613 finished after 15 timesteps. Return = 15.0
Episode 614 finished after 10 timesteps. Return = 10.0
Episode 615 finished after 9 timesteps. Return = 9.0
Episode 616 finished after 10 timesteps. Return = 10.0
Episode 617 finished after 18 timesteps. Return = 18.0
Episode 618 finished after 16 timesteps. Return = 16.0
Episode 619 finish

Episode 801 finished after 15 timesteps. Return = 15.0
Episode 802 finished after 9 timesteps. Return = 9.0
Episode 803 finished after 10 timesteps. Return = 10.0
Episode 804 finished after 17 timesteps. Return = 17.0
Episode 805 finished after 10 timesteps. Return = 10.0
Episode 806 finished after 30 timesteps. Return = 30.0
Episode 807 finished after 12 timesteps. Return = 12.0
Episode 808 finished after 14 timesteps. Return = 14.0
Episode 809 finished after 16 timesteps. Return = 16.0
Episode 810 finished after 32 timesteps. Return = 32.0
Episode 811 finished after 10 timesteps. Return = 10.0
Episode 812 finished after 22 timesteps. Return = 22.0
Episode 813 finished after 22 timesteps. Return = 22.0
Episode 814 finished after 11 timesteps. Return = 11.0
Episode 815 finished after 11 timesteps. Return = 11.0
Episode 816 finished after 22 timesteps. Return = 22.0
Episode 817 finished after 12 timesteps. Return = 12.0
Episode 818 finished after 10 timesteps. Return = 10.0
Episode 819 

Episode 951 finished after 31 timesteps. Return = 31.0
Episode 952 finished after 17 timesteps. Return = 17.0
Episode 953 finished after 18 timesteps. Return = 18.0
Episode 954 finished after 29 timesteps. Return = 29.0
Episode 955 finished after 11 timesteps. Return = 11.0
Episode 956 finished after 18 timesteps. Return = 18.0
Episode 957 finished after 17 timesteps. Return = 17.0
Episode 958 finished after 17 timesteps. Return = 17.0
Episode 959 finished after 19 timesteps. Return = 19.0
Episode 960 finished after 10 timesteps. Return = 10.0
Episode 961 finished after 16 timesteps. Return = 16.0
Episode 962 finished after 17 timesteps. Return = 17.0
Episode 963 finished after 27 timesteps. Return = 27.0
Episode 964 finished after 16 timesteps. Return = 16.0
Episode 965 finished after 15 timesteps. Return = 15.0
Episode 966 finished after 33 timesteps. Return = 33.0
Episode 967 finished after 17 timesteps. Return = 17.0
Episode 968 finished after 22 timesteps. Return = 22.0
Episode 96

Episode 1101 finished after 21 timesteps. Return = 21.0
Episode 1102 finished after 13 timesteps. Return = 13.0
Episode 1103 finished after 17 timesteps. Return = 17.0
Episode 1104 finished after 18 timesteps. Return = 18.0
Episode 1105 finished after 12 timesteps. Return = 12.0
Episode 1106 finished after 16 timesteps. Return = 16.0
Episode 1107 finished after 19 timesteps. Return = 19.0
Episode 1108 finished after 34 timesteps. Return = 34.0
Episode 1109 finished after 12 timesteps. Return = 12.0
Episode 1110 finished after 31 timesteps. Return = 31.0
Episode 1111 finished after 20 timesteps. Return = 20.0
Episode 1112 finished after 17 timesteps. Return = 17.0
Episode 1113 finished after 45 timesteps. Return = 45.0
Episode 1114 finished after 12 timesteps. Return = 12.0
Episode 1115 finished after 18 timesteps. Return = 18.0
Episode 1116 finished after 23 timesteps. Return = 23.0
Episode 1117 finished after 24 timesteps. Return = 24.0
Episode 1118 finished after 23 timesteps. Return

Episode 1251 finished after 14 timesteps. Return = 14.0
Episode 1252 finished after 9 timesteps. Return = 9.0
Episode 1253 finished after 12 timesteps. Return = 12.0
Episode 1254 finished after 13 timesteps. Return = 13.0
Episode 1255 finished after 11 timesteps. Return = 11.0
Episode 1256 finished after 14 timesteps. Return = 14.0
Episode 1257 finished after 13 timesteps. Return = 13.0
Episode 1258 finished after 12 timesteps. Return = 12.0
Episode 1259 finished after 16 timesteps. Return = 16.0
Episode 1260 finished after 14 timesteps. Return = 14.0
Episode 1261 finished after 15 timesteps. Return = 15.0
Episode 1262 finished after 10 timesteps. Return = 10.0
Episode 1263 finished after 11 timesteps. Return = 11.0
Episode 1264 finished after 16 timesteps. Return = 16.0
Episode 1265 finished after 16 timesteps. Return = 16.0
Episode 1266 finished after 9 timesteps. Return = 9.0
Episode 1267 finished after 13 timesteps. Return = 13.0
Episode 1268 finished after 17 timesteps. Return = 1

Episode 1401 finished after 16 timesteps. Return = 16.0
Episode 1402 finished after 24 timesteps. Return = 24.0
Episode 1403 finished after 27 timesteps. Return = 27.0
Episode 1404 finished after 21 timesteps. Return = 21.0
Episode 1405 finished after 42 timesteps. Return = 42.0
Episode 1406 finished after 25 timesteps. Return = 25.0
Episode 1407 finished after 15 timesteps. Return = 15.0
Episode 1408 finished after 21 timesteps. Return = 21.0
Episode 1409 finished after 25 timesteps. Return = 25.0
Episode 1410 finished after 19 timesteps. Return = 19.0
Episode 1411 finished after 50 timesteps. Return = 50.0
Episode 1412 finished after 13 timesteps. Return = 13.0
Episode 1413 finished after 21 timesteps. Return = 21.0
Episode 1414 finished after 23 timesteps. Return = 23.0
Episode 1415 finished after 35 timesteps. Return = 35.0
Episode 1416 finished after 19 timesteps. Return = 19.0
Episode 1417 finished after 19 timesteps. Return = 19.0
Episode 1418 finished after 17 timesteps. Return

Episode 1548 finished after 13 timesteps. Return = 13.0
Episode 1549 finished after 16 timesteps. Return = 16.0
Episode 1550 finished after 16 timesteps. Return = 16.0
Episode 1551 finished after 17 timesteps. Return = 17.0
Episode 1552 finished after 20 timesteps. Return = 20.0
Episode 1553 finished after 17 timesteps. Return = 17.0
Episode 1554 finished after 17 timesteps. Return = 17.0
Episode 1555 finished after 16 timesteps. Return = 16.0
Episode 1556 finished after 47 timesteps. Return = 47.0
Episode 1557 finished after 29 timesteps. Return = 29.0
Episode 1558 finished after 17 timesteps. Return = 17.0
Episode 1559 finished after 15 timesteps. Return = 15.0
Episode 1560 finished after 18 timesteps. Return = 18.0
Episode 1561 finished after 15 timesteps. Return = 15.0
Episode 1562 finished after 41 timesteps. Return = 41.0
Episode 1563 finished after 15 timesteps. Return = 15.0
Episode 1564 finished after 32 timesteps. Return = 32.0
Episode 1565 finished after 21 timesteps. Return

Episode 1701 finished after 21 timesteps. Return = 21.0
Episode 1702 finished after 10 timesteps. Return = 10.0
Episode 1703 finished after 31 timesteps. Return = 31.0
Episode 1704 finished after 15 timesteps. Return = 15.0
Episode 1705 finished after 19 timesteps. Return = 19.0
Episode 1706 finished after 27 timesteps. Return = 27.0
Episode 1707 finished after 15 timesteps. Return = 15.0
Episode 1708 finished after 21 timesteps. Return = 21.0
Episode 1709 finished after 29 timesteps. Return = 29.0
Episode 1710 finished after 21 timesteps. Return = 21.0
Episode 1711 finished after 19 timesteps. Return = 19.0
Episode 1712 finished after 15 timesteps. Return = 15.0
Episode 1713 finished after 17 timesteps. Return = 17.0
Episode 1714 finished after 20 timesteps. Return = 20.0
Episode 1715 finished after 27 timesteps. Return = 27.0
Episode 1716 finished after 19 timesteps. Return = 19.0
Episode 1717 finished after 18 timesteps. Return = 18.0
Episode 1718 finished after 15 timesteps. Return

Episode 1851 finished after 15 timesteps. Return = 15.0
Episode 1852 finished after 19 timesteps. Return = 19.0
Episode 1853 finished after 16 timesteps. Return = 16.0
Episode 1854 finished after 24 timesteps. Return = 24.0
Episode 1855 finished after 15 timesteps. Return = 15.0
Episode 1856 finished after 15 timesteps. Return = 15.0
Episode 1857 finished after 19 timesteps. Return = 19.0
Episode 1858 finished after 25 timesteps. Return = 25.0
Episode 1859 finished after 16 timesteps. Return = 16.0
Episode 1860 finished after 20 timesteps. Return = 20.0
Episode 1861 finished after 17 timesteps. Return = 17.0
Episode 1862 finished after 12 timesteps. Return = 12.0
Episode 1863 finished after 20 timesteps. Return = 20.0
Episode 1864 finished after 18 timesteps. Return = 18.0
Episode 1865 finished after 17 timesteps. Return = 17.0
Episode 1866 finished after 14 timesteps. Return = 14.0
Episode 1867 finished after 14 timesteps. Return = 14.0
Episode 1868 finished after 19 timesteps. Return

Episode 2001 finished after 34 timesteps. Return = 34.0
Episode 2002 finished after 45 timesteps. Return = 45.0
Episode 2003 finished after 46 timesteps. Return = 46.0
Episode 2004 finished after 54 timesteps. Return = 54.0
Episode 2005 finished after 47 timesteps. Return = 47.0
Episode 2006 finished after 44 timesteps. Return = 44.0
Episode 2007 finished after 46 timesteps. Return = 46.0
Episode 2008 finished after 34 timesteps. Return = 34.0
Episode 2009 finished after 40 timesteps. Return = 40.0
Episode 2010 finished after 32 timesteps. Return = 32.0
Episode 2011 finished after 32 timesteps. Return = 32.0
Episode 2012 finished after 35 timesteps. Return = 35.0
Episode 2013 finished after 42 timesteps. Return = 42.0
Episode 2014 finished after 56 timesteps. Return = 56.0
Episode 2015 finished after 50 timesteps. Return = 50.0
Episode 2016 finished after 44 timesteps. Return = 44.0
Episode 2017 finished after 41 timesteps. Return = 41.0
Episode 2018 finished after 39 timesteps. Return

Episode 2151 finished after 55 timesteps. Return = 55.0
Episode 2152 finished after 37 timesteps. Return = 37.0
Episode 2153 finished after 37 timesteps. Return = 37.0
Episode 2154 finished after 41 timesteps. Return = 41.0
Episode 2155 finished after 32 timesteps. Return = 32.0
Episode 2156 finished after 46 timesteps. Return = 46.0
Episode 2157 finished after 33 timesteps. Return = 33.0
Episode 2158 finished after 42 timesteps. Return = 42.0
Episode 2159 finished after 37 timesteps. Return = 37.0
Episode 2160 finished after 39 timesteps. Return = 39.0
Episode 2161 finished after 33 timesteps. Return = 33.0
Episode 2162 finished after 49 timesteps. Return = 49.0
Episode 2163 finished after 26 timesteps. Return = 26.0
Episode 2164 finished after 42 timesteps. Return = 42.0
Episode 2165 finished after 36 timesteps. Return = 36.0
Episode 2166 finished after 36 timesteps. Return = 36.0
Episode 2167 finished after 32 timesteps. Return = 32.0
Episode 2168 finished after 25 timesteps. Return

Episode 2301 finished after 61 timesteps. Return = 61.0
Episode 2302 finished after 58 timesteps. Return = 58.0
Episode 2303 finished after 56 timesteps. Return = 56.0
Episode 2304 finished after 56 timesteps. Return = 56.0
Episode 2305 finished after 59 timesteps. Return = 59.0
Episode 2306 finished after 56 timesteps. Return = 56.0
Episode 2307 finished after 59 timesteps. Return = 59.0
Episode 2308 finished after 69 timesteps. Return = 69.0
Episode 2309 finished after 59 timesteps. Return = 59.0
Episode 2310 finished after 70 timesteps. Return = 70.0
Episode 2311 finished after 60 timesteps. Return = 60.0
Episode 2312 finished after 60 timesteps. Return = 60.0
Episode 2313 finished after 69 timesteps. Return = 69.0
Episode 2314 finished after 63 timesteps. Return = 63.0
Episode 2315 finished after 59 timesteps. Return = 59.0
Episode 2316 finished after 60 timesteps. Return = 60.0
Episode 2317 finished after 62 timesteps. Return = 62.0
Episode 2318 finished after 84 timesteps. Return

Episode 2451 finished after 73 timesteps. Return = 73.0
Episode 2452 finished after 84 timesteps. Return = 84.0
Episode 2453 finished after 93 timesteps. Return = 93.0
Episode 2454 finished after 88 timesteps. Return = 88.0
Episode 2455 finished after 90 timesteps. Return = 90.0
Episode 2456 finished after 83 timesteps. Return = 83.0
Episode 2457 finished after 85 timesteps. Return = 85.0
Episode 2458 finished after 81 timesteps. Return = 81.0
Episode 2459 finished after 87 timesteps. Return = 87.0
Episode 2460 finished after 73 timesteps. Return = 73.0
Episode 2461 finished after 90 timesteps. Return = 90.0
Episode 2462 finished after 83 timesteps. Return = 83.0
Episode 2463 finished after 84 timesteps. Return = 84.0
Episode 2464 finished after 90 timesteps. Return = 90.0
Episode 2465 finished after 75 timesteps. Return = 75.0
Episode 2466 finished after 73 timesteps. Return = 73.0
Episode 2467 finished after 74 timesteps. Return = 74.0
Episode 2468 finished after 73 timesteps. Return

Episode 2601 finished after 102 timesteps. Return = 102.0
Episode 2602 finished after 20 timesteps. Return = 20.0
Episode 2603 finished after 107 timesteps. Return = 107.0
Episode 2604 finished after 104 timesteps. Return = 104.0
Episode 2605 finished after 57 timesteps. Return = 57.0
Episode 2606 finished after 26 timesteps. Return = 26.0
Episode 2607 finished after 29 timesteps. Return = 29.0
Episode 2608 finished after 96 timesteps. Return = 96.0
Episode 2609 finished after 104 timesteps. Return = 104.0
Episode 2610 finished after 36 timesteps. Return = 36.0
Episode 2611 finished after 69 timesteps. Return = 69.0
Episode 2612 finished after 37 timesteps. Return = 37.0
Episode 2613 finished after 103 timesteps. Return = 103.0
Episode 2614 finished after 59 timesteps. Return = 59.0
Episode 2615 finished after 100 timesteps. Return = 100.0
Episode 2616 finished after 28 timesteps. Return = 28.0
Episode 2617 finished after 104 timesteps. Return = 104.0
Episode 2618 finished after 103 ti

Episode 2751 finished after 23 timesteps. Return = 23.0
Episode 2752 finished after 21 timesteps. Return = 21.0
Episode 2753 finished after 14 timesteps. Return = 14.0
Episode 2754 finished after 25 timesteps. Return = 25.0
Episode 2755 finished after 38 timesteps. Return = 38.0
Episode 2756 finished after 31 timesteps. Return = 31.0
Episode 2757 finished after 24 timesteps. Return = 24.0
Episode 2758 finished after 25 timesteps. Return = 25.0
Episode 2759 finished after 21 timesteps. Return = 21.0
Episode 2760 finished after 30 timesteps. Return = 30.0
Episode 2761 finished after 30 timesteps. Return = 30.0
Episode 2762 finished after 22 timesteps. Return = 22.0
Episode 2763 finished after 23 timesteps. Return = 23.0
Episode 2764 finished after 16 timesteps. Return = 16.0
Episode 2765 finished after 14 timesteps. Return = 14.0
Episode 2766 finished after 20 timesteps. Return = 20.0
Episode 2767 finished after 25 timesteps. Return = 25.0
Episode 2768 finished after 30 timesteps. Return

Episode 2901 finished after 91 timesteps. Return = 91.0
Episode 2902 finished after 82 timesteps. Return = 82.0
Episode 2903 finished after 125 timesteps. Return = 125.0
Episode 2904 finished after 122 timesteps. Return = 122.0
Episode 2905 finished after 47 timesteps. Return = 47.0
Episode 2906 finished after 103 timesteps. Return = 103.0
Episode 2907 finished after 123 timesteps. Return = 123.0
Episode 2908 finished after 90 timesteps. Return = 90.0
Episode 2909 finished after 101 timesteps. Return = 101.0
Episode 2910 finished after 88 timesteps. Return = 88.0
Episode 2911 finished after 131 timesteps. Return = 131.0
Episode 2912 finished after 51 timesteps. Return = 51.0
Episode 2913 finished after 98 timesteps. Return = 98.0
Episode 2914 finished after 122 timesteps. Return = 122.0
Episode 2915 finished after 120 timesteps. Return = 120.0
Episode 2916 finished after 68 timesteps. Return = 68.0
Episode 2917 finished after 56 timesteps. Return = 56.0
Episode 2918 finished after 51 t

Episode 3049 finished after 29 timesteps. Return = 29.0
Episode 3050 finished after 20 timesteps. Return = 20.0
Episode 3051 finished after 30 timesteps. Return = 30.0
Episode 3052 finished after 59 timesteps. Return = 59.0
Episode 3053 finished after 57 timesteps. Return = 57.0
Episode 3054 finished after 32 timesteps. Return = 32.0
Episode 3055 finished after 42 timesteps. Return = 42.0
Episode 3056 finished after 57 timesteps. Return = 57.0
Episode 3057 finished after 31 timesteps. Return = 31.0
Episode 3058 finished after 39 timesteps. Return = 39.0
Episode 3059 finished after 58 timesteps. Return = 58.0
Episode 3060 finished after 35 timesteps. Return = 35.0
Episode 3061 finished after 37 timesteps. Return = 37.0
Episode 3062 finished after 33 timesteps. Return = 33.0
Episode 3063 finished after 29 timesteps. Return = 29.0
Episode 3064 finished after 43 timesteps. Return = 43.0
Episode 3065 finished after 40 timesteps. Return = 40.0
Episode 3066 finished after 37 timesteps. Return

Episode 3201 finished after 43 timesteps. Return = 43.0
Episode 3202 finished after 34 timesteps. Return = 34.0
Episode 3203 finished after 28 timesteps. Return = 28.0
Episode 3204 finished after 31 timesteps. Return = 31.0
Episode 3205 finished after 183 timesteps. Return = 183.0
Episode 3206 finished after 68 timesteps. Return = 68.0
Episode 3207 finished after 56 timesteps. Return = 56.0
Episode 3208 finished after 43 timesteps. Return = 43.0
Episode 3209 finished after 51 timesteps. Return = 51.0
Episode 3210 finished after 92 timesteps. Return = 92.0
Episode 3211 finished after 64 timesteps. Return = 64.0
Episode 3212 finished after 81 timesteps. Return = 81.0
Episode 3213 finished after 42 timesteps. Return = 42.0
Episode 3214 finished after 77 timesteps. Return = 77.0
Episode 3215 finished after 43 timesteps. Return = 43.0
Episode 3216 finished after 49 timesteps. Return = 49.0
Episode 3217 finished after 49 timesteps. Return = 49.0
Episode 3218 finished after 34 timesteps. Retu

Episode 3351 finished after 41 timesteps. Return = 41.0
Episode 3352 finished after 48 timesteps. Return = 48.0
Episode 3353 finished after 25 timesteps. Return = 25.0
Episode 3354 finished after 33 timesteps. Return = 33.0
Episode 3355 finished after 126 timesteps. Return = 126.0
Episode 3356 finished after 20 timesteps. Return = 20.0
Episode 3357 finished after 34 timesteps. Return = 34.0
Episode 3358 finished after 61 timesteps. Return = 61.0
Episode 3359 finished after 141 timesteps. Return = 141.0
Episode 3360 finished after 87 timesteps. Return = 87.0
Episode 3361 finished after 101 timesteps. Return = 101.0
Episode 3362 finished after 51 timesteps. Return = 51.0
Episode 3363 finished after 31 timesteps. Return = 31.0
Episode 3364 finished after 19 timesteps. Return = 19.0
Episode 3365 finished after 45 timesteps. Return = 45.0
Episode 3366 finished after 24 timesteps. Return = 24.0
Episode 3367 finished after 39 timesteps. Return = 39.0
Episode 3368 finished after 36 timesteps. 

Episode 3501 finished after 36 timesteps. Return = 36.0
Episode 3502 finished after 29 timesteps. Return = 29.0
Episode 3503 finished after 22 timesteps. Return = 22.0
Episode 3504 finished after 21 timesteps. Return = 21.0
Episode 3505 finished after 65 timesteps. Return = 65.0
Episode 3506 finished after 30 timesteps. Return = 30.0
Episode 3507 finished after 67 timesteps. Return = 67.0
Episode 3508 finished after 30 timesteps. Return = 30.0
Episode 3509 finished after 27 timesteps. Return = 27.0
Episode 3510 finished after 18 timesteps. Return = 18.0
Episode 3511 finished after 62 timesteps. Return = 62.0
Episode 3512 finished after 43 timesteps. Return = 43.0
Episode 3513 finished after 72 timesteps. Return = 72.0
Episode 3514 finished after 47 timesteps. Return = 47.0
Episode 3515 finished after 23 timesteps. Return = 23.0
Episode 3516 finished after 30 timesteps. Return = 30.0
Episode 3517 finished after 22 timesteps. Return = 22.0
Episode 3518 finished after 54 timesteps. Return

Episode 3651 finished after 24 timesteps. Return = 24.0
Episode 3652 finished after 20 timesteps. Return = 20.0
Episode 3653 finished after 19 timesteps. Return = 19.0
Episode 3654 finished after 42 timesteps. Return = 42.0
Episode 3655 finished after 21 timesteps. Return = 21.0
Episode 3656 finished after 40 timesteps. Return = 40.0
Episode 3657 finished after 16 timesteps. Return = 16.0
Episode 3658 finished after 24 timesteps. Return = 24.0
Episode 3659 finished after 25 timesteps. Return = 25.0
Episode 3660 finished after 47 timesteps. Return = 47.0
Episode 3661 finished after 22 timesteps. Return = 22.0
Episode 3662 finished after 44 timesteps. Return = 44.0
Episode 3663 finished after 28 timesteps. Return = 28.0
Episode 3664 finished after 24 timesteps. Return = 24.0
Episode 3665 finished after 17 timesteps. Return = 17.0
Episode 3666 finished after 21 timesteps. Return = 21.0
Episode 3667 finished after 20 timesteps. Return = 20.0
Episode 3668 finished after 23 timesteps. Return

Episode 3801 finished after 92 timesteps. Return = 92.0
Episode 3802 finished after 45 timesteps. Return = 45.0
Episode 3803 finished after 58 timesteps. Return = 58.0
Episode 3804 finished after 22 timesteps. Return = 22.0
Episode 3805 finished after 22 timesteps. Return = 22.0
Episode 3806 finished after 46 timesteps. Return = 46.0
Episode 3807 finished after 20 timesteps. Return = 20.0
Episode 3808 finished after 38 timesteps. Return = 38.0
Episode 3809 finished after 79 timesteps. Return = 79.0
Episode 3810 finished after 19 timesteps. Return = 19.0
Episode 3811 finished after 36 timesteps. Return = 36.0
Episode 3812 finished after 33 timesteps. Return = 33.0
Episode 3813 finished after 42 timesteps. Return = 42.0
Episode 3814 finished after 22 timesteps. Return = 22.0
Episode 3815 finished after 38 timesteps. Return = 38.0
Episode 3816 finished after 35 timesteps. Return = 35.0
Episode 3817 finished after 43 timesteps. Return = 43.0
Episode 3818 finished after 25 timesteps. Return

Episode 3951 finished after 55 timesteps. Return = 55.0
Episode 3952 finished after 36 timesteps. Return = 36.0
Episode 3953 finished after 30 timesteps. Return = 30.0
Episode 3954 finished after 22 timesteps. Return = 22.0
Episode 3955 finished after 83 timesteps. Return = 83.0
Episode 3956 finished after 41 timesteps. Return = 41.0
Episode 3957 finished after 68 timesteps. Return = 68.0
Episode 3958 finished after 58 timesteps. Return = 58.0
Episode 3959 finished after 34 timesteps. Return = 34.0
Episode 3960 finished after 148 timesteps. Return = 148.0
Episode 3961 finished after 149 timesteps. Return = 149.0
Episode 3962 finished after 64 timesteps. Return = 64.0
Episode 3963 finished after 25 timesteps. Return = 25.0
Episode 3964 finished after 36 timesteps. Return = 36.0
Episode 3965 finished after 19 timesteps. Return = 19.0
Episode 3966 finished after 79 timesteps. Return = 79.0
Episode 3967 finished after 56 timesteps. Return = 56.0
Episode 3968 finished after 46 timesteps. Re

Episode 4101 finished after 76 timesteps. Return = 76.0
Episode 4102 finished after 28 timesteps. Return = 28.0
Episode 4103 finished after 26 timesteps. Return = 26.0
Episode 4104 finished after 21 timesteps. Return = 21.0
Episode 4105 finished after 21 timesteps. Return = 21.0
Episode 4106 finished after 19 timesteps. Return = 19.0
Episode 4107 finished after 31 timesteps. Return = 31.0
Episode 4108 finished after 20 timesteps. Return = 20.0
Episode 4109 finished after 24 timesteps. Return = 24.0
Episode 4110 finished after 69 timesteps. Return = 69.0
Episode 4111 finished after 17 timesteps. Return = 17.0
Episode 4112 finished after 35 timesteps. Return = 35.0
Episode 4113 finished after 38 timesteps. Return = 38.0
Episode 4114 finished after 23 timesteps. Return = 23.0
Episode 4115 finished after 40 timesteps. Return = 40.0
Episode 4116 finished after 27 timesteps. Return = 27.0
Episode 4117 finished after 51 timesteps. Return = 51.0
Episode 4118 finished after 30 timesteps. Return

Episode 4251 finished after 141 timesteps. Return = 141.0
Episode 4252 finished after 133 timesteps. Return = 133.0
Episode 4253 finished after 145 timesteps. Return = 145.0
Episode 4254 finished after 128 timesteps. Return = 128.0
Episode 4255 finished after 44 timesteps. Return = 44.0
Episode 4256 finished after 20 timesteps. Return = 20.0
Episode 4257 finished after 42 timesteps. Return = 42.0
Episode 4258 finished after 30 timesteps. Return = 30.0
Episode 4259 finished after 96 timesteps. Return = 96.0
Episode 4260 finished after 135 timesteps. Return = 135.0
Episode 4261 finished after 102 timesteps. Return = 102.0
Episode 4262 finished after 89 timesteps. Return = 89.0
Episode 4263 finished after 49 timesteps. Return = 49.0
Episode 4264 finished after 111 timesteps. Return = 111.0
Episode 4265 finished after 71 timesteps. Return = 71.0
Episode 4266 finished after 128 timesteps. Return = 128.0
Episode 4267 finished after 61 timesteps. Return = 61.0
Episode 4268 finished after 26 t

Episode 4396 finished after 56 timesteps. Return = 56.0
Episode 4397 finished after 104 timesteps. Return = 104.0
Episode 4398 finished after 84 timesteps. Return = 84.0
Episode 4399 finished after 73 timesteps. Return = 73.0
Episode 4400 finished after 124 timesteps. Return = 124.0
Episode 4401 finished after 70 timesteps. Return = 70.0
Episode 4402 finished after 64 timesteps. Return = 64.0
Episode 4403 finished after 37 timesteps. Return = 37.0
Episode 4404 finished after 35 timesteps. Return = 35.0
Episode 4405 finished after 54 timesteps. Return = 54.0
Episode 4406 finished after 52 timesteps. Return = 52.0
Episode 4407 finished after 61 timesteps. Return = 61.0
Episode 4408 finished after 37 timesteps. Return = 37.0
Episode 4409 finished after 37 timesteps. Return = 37.0
Episode 4410 finished after 24 timesteps. Return = 24.0
Episode 4411 finished after 26 timesteps. Return = 26.0
Episode 4412 finished after 66 timesteps. Return = 66.0
Episode 4413 finished after 25 timesteps. Re

Episode 4547 finished after 56 timesteps. Return = 56.0
Episode 4548 finished after 40 timesteps. Return = 40.0
Episode 4549 finished after 28 timesteps. Return = 28.0
Episode 4550 finished after 38 timesteps. Return = 38.0
Episode 4551 finished after 127 timesteps. Return = 127.0
Episode 4552 finished after 65 timesteps. Return = 65.0
Episode 4553 finished after 68 timesteps. Return = 68.0
Episode 4554 finished after 71 timesteps. Return = 71.0
Episode 4555 finished after 52 timesteps. Return = 52.0
Episode 4556 finished after 165 timesteps. Return = 165.0
Episode 4557 finished after 108 timesteps. Return = 108.0
Episode 4558 finished after 167 timesteps. Return = 167.0
Episode 4559 finished after 160 timesteps. Return = 160.0
Episode 4560 finished after 122 timesteps. Return = 122.0
Episode 4561 finished after 80 timesteps. Return = 80.0
Episode 4562 finished after 125 timesteps. Return = 125.0
Episode 4563 finished after 144 timesteps. Return = 144.0
Episode 4564 finished after 42 t

Episode 4701 finished after 30 timesteps. Return = 30.0
Episode 4702 finished after 33 timesteps. Return = 33.0
Episode 4703 finished after 21 timesteps. Return = 21.0
Episode 4704 finished after 56 timesteps. Return = 56.0
Episode 4705 finished after 18 timesteps. Return = 18.0
Episode 4706 finished after 28 timesteps. Return = 28.0
Episode 4707 finished after 48 timesteps. Return = 48.0
Episode 4708 finished after 25 timesteps. Return = 25.0
Episode 4709 finished after 59 timesteps. Return = 59.0
Episode 4710 finished after 34 timesteps. Return = 34.0
Episode 4711 finished after 44 timesteps. Return = 44.0
Episode 4712 finished after 45 timesteps. Return = 45.0
Episode 4713 finished after 17 timesteps. Return = 17.0
Episode 4714 finished after 40 timesteps. Return = 40.0
Episode 4715 finished after 95 timesteps. Return = 95.0
Episode 4716 finished after 21 timesteps. Return = 21.0
Episode 4717 finished after 68 timesteps. Return = 68.0
Episode 4718 finished after 19 timesteps. Return

Episode 4848 finished after 97 timesteps. Return = 97.0
Episode 4849 finished after 55 timesteps. Return = 55.0
Episode 4850 finished after 34 timesteps. Return = 34.0
Episode 4851 finished after 89 timesteps. Return = 89.0
Episode 4852 finished after 28 timesteps. Return = 28.0
Episode 4853 finished after 78 timesteps. Return = 78.0
Episode 4854 finished after 79 timesteps. Return = 79.0
Episode 4855 finished after 30 timesteps. Return = 30.0
Episode 4856 finished after 29 timesteps. Return = 29.0
Episode 4857 finished after 34 timesteps. Return = 34.0
Episode 4858 finished after 43 timesteps. Return = 43.0
Episode 4859 finished after 141 timesteps. Return = 141.0
Episode 4860 finished after 144 timesteps. Return = 144.0
Episode 4861 finished after 177 timesteps. Return = 177.0
Episode 4862 finished after 72 timesteps. Return = 72.0
Episode 4863 finished after 33 timesteps. Return = 33.0
Episode 4864 finished after 46 timesteps. Return = 46.0
Episode 4865 finished after 152 timesteps.

Episode 5001 finished after 37 timesteps. Return = 37.0
Episode 5002 finished after 45 timesteps. Return = 45.0
Episode 5003 finished after 31 timesteps. Return = 31.0
Episode 5004 finished after 30 timesteps. Return = 30.0
Episode 5005 finished after 42 timesteps. Return = 42.0
Episode 5006 finished after 33 timesteps. Return = 33.0
Episode 5007 finished after 42 timesteps. Return = 42.0
Episode 5008 finished after 41 timesteps. Return = 41.0
Episode 5009 finished after 45 timesteps. Return = 45.0
Episode 5010 finished after 57 timesteps. Return = 57.0
Episode 5011 finished after 50 timesteps. Return = 50.0
Episode 5012 finished after 35 timesteps. Return = 35.0
Episode 5013 finished after 23 timesteps. Return = 23.0
Episode 5014 finished after 38 timesteps. Return = 38.0
Episode 5015 finished after 26 timesteps. Return = 26.0
Episode 5016 finished after 45 timesteps. Return = 45.0
Episode 5017 finished after 15 timesteps. Return = 15.0
Episode 5018 finished after 22 timesteps. Return

Episode 5147 finished after 56 timesteps. Return = 56.0
Episode 5148 finished after 37 timesteps. Return = 37.0
Episode 5149 finished after 28 timesteps. Return = 28.0
Episode 5150 finished after 39 timesteps. Return = 39.0
Episode 5151 finished after 111 timesteps. Return = 111.0
Episode 5152 finished after 126 timesteps. Return = 126.0
Episode 5153 finished after 125 timesteps. Return = 125.0
Episode 5154 finished after 121 timesteps. Return = 121.0
Episode 5155 finished after 59 timesteps. Return = 59.0
Episode 5156 finished after 114 timesteps. Return = 114.0
Episode 5157 finished after 153 timesteps. Return = 153.0
Episode 5158 finished after 104 timesteps. Return = 104.0
Episode 5159 finished after 113 timesteps. Return = 113.0
Episode 5160 finished after 60 timesteps. Return = 60.0
Episode 5161 finished after 45 timesteps. Return = 45.0
Episode 5162 finished after 154 timesteps. Return = 154.0
Episode 5163 finished after 117 timesteps. Return = 117.0
Episode 5164 finished after 

Episode 5301 finished after 42 timesteps. Return = 42.0
Episode 5302 finished after 44 timesteps. Return = 44.0
Episode 5303 finished after 54 timesteps. Return = 54.0
Episode 5304 finished after 41 timesteps. Return = 41.0
Episode 5305 finished after 45 timesteps. Return = 45.0
Episode 5306 finished after 77 timesteps. Return = 77.0
Episode 5307 finished after 44 timesteps. Return = 44.0
Episode 5308 finished after 81 timesteps. Return = 81.0
Episode 5309 finished after 145 timesteps. Return = 145.0
Episode 5310 finished after 60 timesteps. Return = 60.0
Episode 5311 finished after 126 timesteps. Return = 126.0
Episode 5312 finished after 26 timesteps. Return = 26.0
Episode 5313 finished after 41 timesteps. Return = 41.0
Episode 5314 finished after 50 timesteps. Return = 50.0
Episode 5315 finished after 38 timesteps. Return = 38.0
Episode 5316 finished after 72 timesteps. Return = 72.0
Episode 5317 finished after 68 timesteps. Return = 68.0
Episode 5318 finished after 40 timesteps. Re

Episode 5451 finished after 175 timesteps. Return = 175.0
Episode 5452 finished after 118 timesteps. Return = 118.0
Episode 5453 finished after 131 timesteps. Return = 131.0
Episode 5454 finished after 123 timesteps. Return = 123.0
Episode 5455 finished after 40 timesteps. Return = 40.0
Episode 5456 finished after 31 timesteps. Return = 31.0
Episode 5457 finished after 40 timesteps. Return = 40.0
Episode 5458 finished after 54 timesteps. Return = 54.0
Episode 5459 finished after 50 timesteps. Return = 50.0
Episode 5460 finished after 139 timesteps. Return = 139.0
Episode 5461 finished after 68 timesteps. Return = 68.0
Episode 5462 finished after 185 timesteps. Return = 185.0
Episode 5463 finished after 47 timesteps. Return = 47.0
Episode 5464 finished after 148 timesteps. Return = 148.0
Episode 5465 finished after 74 timesteps. Return = 74.0
Episode 5466 finished after 52 timesteps. Return = 52.0
Episode 5467 finished after 29 timesteps. Return = 29.0
Episode 5468 finished after 89 tim

Episode 5601 finished after 57 timesteps. Return = 57.0
Episode 5602 finished after 48 timesteps. Return = 48.0
Episode 5603 finished after 70 timesteps. Return = 70.0
Episode 5604 finished after 59 timesteps. Return = 59.0
Episode 5605 finished after 56 timesteps. Return = 56.0
Episode 5606 finished after 139 timesteps. Return = 139.0
Episode 5607 finished after 28 timesteps. Return = 28.0
Episode 5608 finished after 35 timesteps. Return = 35.0
Episode 5609 finished after 120 timesteps. Return = 120.0
Episode 5610 finished after 46 timesteps. Return = 46.0
Episode 5611 finished after 33 timesteps. Return = 33.0
Episode 5612 finished after 77 timesteps. Return = 77.0
Episode 5613 finished after 35 timesteps. Return = 35.0
Episode 5614 finished after 70 timesteps. Return = 70.0
Episode 5615 finished after 71 timesteps. Return = 71.0
Episode 5616 finished after 65 timesteps. Return = 65.0
Episode 5617 finished after 34 timesteps. Return = 34.0
Episode 5618 finished after 126 timesteps. R

Episode 5747 finished after 75 timesteps. Return = 75.0
Episode 5748 finished after 74 timesteps. Return = 74.0
Episode 5749 finished after 128 timesteps. Return = 128.0
Episode 5750 finished after 42 timesteps. Return = 42.0
Episode 5751 finished after 56 timesteps. Return = 56.0
Episode 5752 finished after 63 timesteps. Return = 63.0
Episode 5753 finished after 84 timesteps. Return = 84.0
Episode 5754 finished after 43 timesteps. Return = 43.0
Episode 5755 finished after 62 timesteps. Return = 62.0
Episode 5756 finished after 136 timesteps. Return = 136.0
Episode 5757 finished after 75 timesteps. Return = 75.0
Episode 5758 finished after 51 timesteps. Return = 51.0
Episode 5759 finished after 79 timesteps. Return = 79.0
Episode 5760 finished after 56 timesteps. Return = 56.0
Episode 5761 finished after 56 timesteps. Return = 56.0
Episode 5762 finished after 49 timesteps. Return = 49.0
Episode 5763 finished after 134 timesteps. Return = 134.0
Episode 5764 finished after 106 timesteps.

Episode 5901 finished after 37 timesteps. Return = 37.0
Episode 5902 finished after 69 timesteps. Return = 69.0
Episode 5903 finished after 68 timesteps. Return = 68.0
Episode 5904 finished after 118 timesteps. Return = 118.0
Episode 5905 finished after 105 timesteps. Return = 105.0
Episode 5906 finished after 69 timesteps. Return = 69.0
Episode 5907 finished after 42 timesteps. Return = 42.0
Episode 5908 finished after 94 timesteps. Return = 94.0
Episode 5909 finished after 186 timesteps. Return = 186.0
Episode 5910 finished after 129 timesteps. Return = 129.0
Episode 5911 finished after 144 timesteps. Return = 144.0
Episode 5912 finished after 125 timesteps. Return = 125.0
Episode 5913 finished after 101 timesteps. Return = 101.0
Episode 5914 finished after 128 timesteps. Return = 128.0
Episode 5915 finished after 103 timesteps. Return = 103.0
Episode 5916 finished after 67 timesteps. Return = 67.0
Episode 5917 finished after 62 timesteps. Return = 62.0
Episode 5918 finished after 44

Episode 6047 finished after 33 timesteps. Return = 33.0
Episode 6048 finished after 28 timesteps. Return = 28.0
Episode 6049 finished after 31 timesteps. Return = 31.0
Episode 6050 finished after 30 timesteps. Return = 30.0
Episode 6051 finished after 50 timesteps. Return = 50.0
Episode 6052 finished after 28 timesteps. Return = 28.0
Episode 6053 finished after 34 timesteps. Return = 34.0
Episode 6054 finished after 27 timesteps. Return = 27.0
Episode 6055 finished after 46 timesteps. Return = 46.0
Episode 6056 finished after 34 timesteps. Return = 34.0
Episode 6057 finished after 36 timesteps. Return = 36.0
Episode 6058 finished after 38 timesteps. Return = 38.0
Episode 6059 finished after 74 timesteps. Return = 74.0
Episode 6060 finished after 31 timesteps. Return = 31.0
Episode 6061 finished after 51 timesteps. Return = 51.0
Episode 6062 finished after 31 timesteps. Return = 31.0
Episode 6063 finished after 23 timesteps. Return = 23.0
Episode 6064 finished after 43 timesteps. Return

Episode 6194 finished after 73 timesteps. Return = 73.0
Episode 6195 finished after 142 timesteps. Return = 142.0
Episode 6196 finished after 107 timesteps. Return = 107.0
Episode 6197 finished after 71 timesteps. Return = 71.0
Episode 6198 finished after 153 timesteps. Return = 153.0
Episode 6199 finished after 147 timesteps. Return = 147.0
Episode 6200 finished after 152 timesteps. Return = 152.0
Episode 6201 finished after 54 timesteps. Return = 54.0
Episode 6202 finished after 139 timesteps. Return = 139.0
Episode 6203 finished after 147 timesteps. Return = 147.0
Episode 6204 finished after 47 timesteps. Return = 47.0
Episode 6205 finished after 108 timesteps. Return = 108.0
Episode 6206 finished after 61 timesteps. Return = 61.0
Episode 6207 finished after 168 timesteps. Return = 168.0
Episode 6208 finished after 119 timesteps. Return = 119.0
Episode 6209 finished after 103 timesteps. Return = 103.0
Episode 6210 finished after 67 timesteps. Return = 67.0
Episode 6211 finished afte

Episode 6351 finished after 65 timesteps. Return = 65.0
Episode 6352 finished after 64 timesteps. Return = 64.0
Episode 6353 finished after 43 timesteps. Return = 43.0
Episode 6354 finished after 44 timesteps. Return = 44.0
Episode 6355 finished after 60 timesteps. Return = 60.0
Episode 6356 finished after 68 timesteps. Return = 68.0
Episode 6357 finished after 74 timesteps. Return = 74.0
Episode 6358 finished after 52 timesteps. Return = 52.0
Episode 6359 finished after 72 timesteps. Return = 72.0
Episode 6360 finished after 50 timesteps. Return = 50.0
Episode 6361 finished after 83 timesteps. Return = 83.0
Episode 6362 finished after 36 timesteps. Return = 36.0
Episode 6363 finished after 53 timesteps. Return = 53.0
Episode 6364 finished after 53 timesteps. Return = 53.0
Episode 6365 finished after 57 timesteps. Return = 57.0
Episode 6366 finished after 58 timesteps. Return = 58.0
Episode 6367 finished after 40 timesteps. Return = 40.0
Episode 6368 finished after 46 timesteps. Return

Episode 6499 finished after 144 timesteps. Return = 144.0
Episode 6500 finished after 53 timesteps. Return = 53.0
Episode 6501 finished after 129 timesteps. Return = 129.0
Episode 6502 finished after 126 timesteps. Return = 126.0
Episode 6503 finished after 147 timesteps. Return = 147.0
Episode 6504 finished after 97 timesteps. Return = 97.0
Episode 6505 finished after 61 timesteps. Return = 61.0
Episode 6506 finished after 153 timesteps. Return = 153.0
Episode 6507 finished after 135 timesteps. Return = 135.0
Episode 6508 finished after 130 timesteps. Return = 130.0
Episode 6509 finished after 111 timesteps. Return = 111.0
Episode 6510 finished after 112 timesteps. Return = 112.0
Episode 6511 finished after 76 timesteps. Return = 76.0
Episode 6512 finished after 126 timesteps. Return = 126.0
Episode 6513 finished after 66 timesteps. Return = 66.0
Episode 6514 finished after 56 timesteps. Return = 56.0
Episode 6515 finished after 135 timesteps. Return = 135.0
Episode 6516 finished afte

Episode 6651 finished after 70 timesteps. Return = 70.0
Episode 6652 finished after 87 timesteps. Return = 87.0
Episode 6653 finished after 70 timesteps. Return = 70.0
Episode 6654 finished after 101 timesteps. Return = 101.0
Episode 6655 finished after 102 timesteps. Return = 102.0
Episode 6656 finished after 63 timesteps. Return = 63.0
Episode 6657 finished after 122 timesteps. Return = 122.0
Episode 6658 finished after 70 timesteps. Return = 70.0
Episode 6659 finished after 82 timesteps. Return = 82.0
Episode 6660 finished after 56 timesteps. Return = 56.0
Episode 6661 finished after 104 timesteps. Return = 104.0
Episode 6662 finished after 82 timesteps. Return = 82.0
Episode 6663 finished after 42 timesteps. Return = 42.0
Episode 6664 finished after 143 timesteps. Return = 143.0
Episode 6665 finished after 75 timesteps. Return = 75.0
Episode 6666 finished after 77 timesteps. Return = 77.0
Episode 6667 finished after 57 timesteps. Return = 57.0
Episode 6668 finished after 78 timeste

Episode 6801 finished after 47 timesteps. Return = 47.0
Episode 6802 finished after 42 timesteps. Return = 42.0
Episode 6803 finished after 70 timesteps. Return = 70.0
Episode 6804 finished after 71 timesteps. Return = 71.0
Episode 6805 finished after 93 timesteps. Return = 93.0
Episode 6806 finished after 50 timesteps. Return = 50.0
Episode 6807 finished after 44 timesteps. Return = 44.0
Episode 6808 finished after 39 timesteps. Return = 39.0
Episode 6809 finished after 43 timesteps. Return = 43.0
Episode 6810 finished after 56 timesteps. Return = 56.0
Episode 6811 finished after 95 timesteps. Return = 95.0
Episode 6812 finished after 66 timesteps. Return = 66.0
Episode 6813 finished after 60 timesteps. Return = 60.0
Episode 6814 finished after 72 timesteps. Return = 72.0
Episode 6815 finished after 72 timesteps. Return = 72.0
Episode 6816 finished after 48 timesteps. Return = 48.0
Episode 6817 finished after 59 timesteps. Return = 59.0
Episode 6818 finished after 61 timesteps. Return

Episode 6951 finished after 78 timesteps. Return = 78.0
Episode 6952 finished after 59 timesteps. Return = 59.0
Episode 6953 finished after 126 timesteps. Return = 126.0
Episode 6954 finished after 107 timesteps. Return = 107.0
Episode 6955 finished after 109 timesteps. Return = 109.0
Episode 6956 finished after 141 timesteps. Return = 141.0
Episode 6957 finished after 162 timesteps. Return = 162.0
Episode 6958 finished after 118 timesteps. Return = 118.0
Episode 6959 finished after 148 timesteps. Return = 148.0
Episode 6960 finished after 106 timesteps. Return = 106.0
Episode 6961 finished after 111 timesteps. Return = 111.0
Episode 6962 finished after 115 timesteps. Return = 115.0
Episode 6963 finished after 129 timesteps. Return = 129.0
Episode 6964 finished after 129 timesteps. Return = 129.0
Episode 6965 finished after 108 timesteps. Return = 108.0
Episode 6966 finished after 137 timesteps. Return = 137.0
Episode 6967 finished after 134 timesteps. Return = 134.0
Episode 6968 finis

Episode 7101 finished after 94 timesteps. Return = 94.0
Episode 7102 finished after 43 timesteps. Return = 43.0
Episode 7103 finished after 113 timesteps. Return = 113.0
Episode 7104 finished after 143 timesteps. Return = 143.0
Episode 7105 finished after 76 timesteps. Return = 76.0
Episode 7106 finished after 105 timesteps. Return = 105.0
Episode 7107 finished after 55 timesteps. Return = 55.0
Episode 7108 finished after 66 timesteps. Return = 66.0
Episode 7109 finished after 120 timesteps. Return = 120.0
Episode 7110 finished after 127 timesteps. Return = 127.0
Episode 7111 finished after 136 timesteps. Return = 136.0
Episode 7112 finished after 138 timesteps. Return = 138.0
Episode 7113 finished after 136 timesteps. Return = 136.0
Episode 7114 finished after 132 timesteps. Return = 132.0
Episode 7115 finished after 58 timesteps. Return = 58.0
Episode 7116 finished after 127 timesteps. Return = 127.0
Episode 7117 finished after 111 timesteps. Return = 111.0
Episode 7118 finished afte

Episode 7245 finished after 56 timesteps. Return = 56.0
Episode 7246 finished after 97 timesteps. Return = 97.0
Episode 7247 finished after 76 timesteps. Return = 76.0
Episode 7248 finished after 109 timesteps. Return = 109.0
Episode 7249 finished after 82 timesteps. Return = 82.0
Episode 7250 finished after 41 timesteps. Return = 41.0
Episode 7251 finished after 163 timesteps. Return = 163.0
Episode 7252 finished after 138 timesteps. Return = 138.0
Episode 7253 finished after 73 timesteps. Return = 73.0
Episode 7254 finished after 71 timesteps. Return = 71.0
Episode 7255 finished after 43 timesteps. Return = 43.0
Episode 7256 finished after 50 timesteps. Return = 50.0
Episode 7257 finished after 61 timesteps. Return = 61.0
Episode 7258 finished after 126 timesteps. Return = 126.0
Episode 7259 finished after 75 timesteps. Return = 75.0
Episode 7260 finished after 95 timesteps. Return = 95.0
Episode 7261 finished after 91 timesteps. Return = 91.0
Episode 7262 finished after 40 timesteps

Episode 7392 finished after 70 timesteps. Return = 70.0
Episode 7393 finished after 75 timesteps. Return = 75.0
Episode 7394 finished after 82 timesteps. Return = 82.0
Episode 7395 finished after 84 timesteps. Return = 84.0
Episode 7396 finished after 124 timesteps. Return = 124.0
Episode 7397 finished after 117 timesteps. Return = 117.0
Episode 7398 finished after 102 timesteps. Return = 102.0
Episode 7399 finished after 81 timesteps. Return = 81.0
Episode 7400 finished after 122 timesteps. Return = 122.0
Episode 7401 finished after 156 timesteps. Return = 156.0
Episode 7402 finished after 89 timesteps. Return = 89.0
Episode 7403 finished after 92 timesteps. Return = 92.0
Episode 7404 finished after 125 timesteps. Return = 125.0
Episode 7405 finished after 102 timesteps. Return = 102.0
Episode 7406 finished after 97 timesteps. Return = 97.0
Episode 7407 finished after 126 timesteps. Return = 126.0
Episode 7408 finished after 145 timesteps. Return = 145.0
Episode 7409 finished after 10

Episode 7541 finished after 189 timesteps. Return = 189.0
Episode 7542 finished after 179 timesteps. Return = 179.0
Episode 7543 finished after 44 timesteps. Return = 44.0
Episode 7544 finished after 122 timesteps. Return = 122.0
Episode 7545 finished after 161 timesteps. Return = 161.0
Episode 7546 finished after 70 timesteps. Return = 70.0
Episode 7547 finished after 78 timesteps. Return = 78.0
Episode 7548 finished after 66 timesteps. Return = 66.0
Episode 7549 finished after 61 timesteps. Return = 61.0
Episode 7550 finished after 220 timesteps. Return = 220.0
Episode 7551 finished after 176 timesteps. Return = 176.0
Episode 7552 finished after 138 timesteps. Return = 138.0
Episode 7553 finished after 99 timesteps. Return = 99.0
Episode 7554 finished after 66 timesteps. Return = 66.0
Episode 7555 finished after 78 timesteps. Return = 78.0
Episode 7556 finished after 186 timesteps. Return = 186.0
Episode 7557 finished after 155 timesteps. Return = 155.0
Episode 7558 finished after 82

Episode 7692 finished after 68 timesteps. Return = 68.0
Episode 7693 finished after 87 timesteps. Return = 87.0
Episode 7694 finished after 47 timesteps. Return = 47.0
Episode 7695 finished after 69 timesteps. Return = 69.0
Episode 7696 finished after 72 timesteps. Return = 72.0
Episode 7697 finished after 49 timesteps. Return = 49.0
Episode 7698 finished after 69 timesteps. Return = 69.0
Episode 7699 finished after 60 timesteps. Return = 60.0
Episode 7700 finished after 151 timesteps. Return = 151.0
Episode 7701 finished after 141 timesteps. Return = 141.0
Episode 7702 finished after 120 timesteps. Return = 120.0
Episode 7703 finished after 43 timesteps. Return = 43.0
Episode 7704 finished after 103 timesteps. Return = 103.0
Episode 7705 finished after 136 timesteps. Return = 136.0
Episode 7706 finished after 62 timesteps. Return = 62.0
Episode 7707 finished after 108 timesteps. Return = 108.0
Episode 7708 finished after 152 timesteps. Return = 152.0
Episode 7709 finished after 66 tim

Episode 7837 finished after 123 timesteps. Return = 123.0
Episode 7838 finished after 72 timesteps. Return = 72.0
Episode 7839 finished after 63 timesteps. Return = 63.0
Episode 7840 finished after 88 timesteps. Return = 88.0
Episode 7841 finished after 118 timesteps. Return = 118.0
Episode 7842 finished after 135 timesteps. Return = 135.0
Episode 7843 finished after 96 timesteps. Return = 96.0
Episode 7844 finished after 135 timesteps. Return = 135.0
Episode 7845 finished after 110 timesteps. Return = 110.0
Episode 7846 finished after 75 timesteps. Return = 75.0
Episode 7847 finished after 129 timesteps. Return = 129.0
Episode 7848 finished after 75 timesteps. Return = 75.0
Episode 7849 finished after 141 timesteps. Return = 141.0
Episode 7850 finished after 88 timesteps. Return = 88.0
Episode 7851 finished after 151 timesteps. Return = 151.0
Episode 7852 finished after 162 timesteps. Return = 162.0
Episode 7853 finished after 155 timesteps. Return = 155.0
Episode 7854 finished after 

Episode 7993 finished after 72 timesteps. Return = 72.0
Episode 7994 finished after 60 timesteps. Return = 60.0
Episode 7995 finished after 128 timesteps. Return = 128.0
Episode 7996 finished after 92 timesteps. Return = 92.0
Episode 7997 finished after 57 timesteps. Return = 57.0
Episode 7998 finished after 70 timesteps. Return = 70.0
Episode 7999 finished after 54 timesteps. Return = 54.0
Episode 8000 finished after 139 timesteps. Return = 139.0
Episode 8001 finished after 130 timesteps. Return = 130.0
Episode 8002 finished after 123 timesteps. Return = 123.0
Episode 8003 finished after 116 timesteps. Return = 116.0
Episode 8004 finished after 109 timesteps. Return = 109.0
Episode 8005 finished after 165 timesteps. Return = 165.0
Episode 8006 finished after 111 timesteps. Return = 111.0
Episode 8007 finished after 44 timesteps. Return = 44.0
Episode 8008 finished after 130 timesteps. Return = 130.0
Episode 8009 finished after 118 timesteps. Return = 118.0
Episode 8010 finished after 

Episode 8148 finished after 123 timesteps. Return = 123.0
Episode 8149 finished after 63 timesteps. Return = 63.0
Episode 8150 finished after 38 timesteps. Return = 38.0
Episode 8151 finished after 56 timesteps. Return = 56.0
Episode 8152 finished after 109 timesteps. Return = 109.0
Episode 8153 finished after 47 timesteps. Return = 47.0
Episode 8154 finished after 54 timesteps. Return = 54.0
Episode 8155 finished after 113 timesteps. Return = 113.0
Episode 8156 finished after 87 timesteps. Return = 87.0
Episode 8157 finished after 107 timesteps. Return = 107.0
Episode 8158 finished after 55 timesteps. Return = 55.0
Episode 8159 finished after 152 timesteps. Return = 152.0
Episode 8160 finished after 56 timesteps. Return = 56.0
Episode 8161 finished after 46 timesteps. Return = 46.0
Episode 8162 finished after 62 timesteps. Return = 62.0
Episode 8163 finished after 54 timesteps. Return = 54.0
Episode 8164 finished after 108 timesteps. Return = 108.0
Episode 8165 finished after 46 times

Episode 8296 finished after 82 timesteps. Return = 82.0
Episode 8297 finished after 75 timesteps. Return = 75.0
Episode 8298 finished after 59 timesteps. Return = 59.0
Episode 8299 finished after 136 timesteps. Return = 136.0
Episode 8300 finished after 46 timesteps. Return = 46.0
Episode 8301 finished after 118 timesteps. Return = 118.0
Episode 8302 finished after 157 timesteps. Return = 157.0
Episode 8303 finished after 53 timesteps. Return = 53.0
Episode 8304 finished after 130 timesteps. Return = 130.0
Episode 8305 finished after 132 timesteps. Return = 132.0
Episode 8306 finished after 45 timesteps. Return = 45.0
Episode 8307 finished after 164 timesteps. Return = 164.0
Episode 8308 finished after 93 timesteps. Return = 93.0
Episode 8309 finished after 103 timesteps. Return = 103.0
Episode 8310 finished after 90 timesteps. Return = 90.0
Episode 8311 finished after 63 timesteps. Return = 63.0
Episode 8312 finished after 186 timesteps. Return = 186.0
Episode 8313 finished after 124 

Episode 8448 finished after 48 timesteps. Return = 48.0
Episode 8449 finished after 51 timesteps. Return = 51.0
Episode 8450 finished after 149 timesteps. Return = 149.0
Episode 8451 finished after 69 timesteps. Return = 69.0
Episode 8452 finished after 141 timesteps. Return = 141.0
Episode 8453 finished after 160 timesteps. Return = 160.0
Episode 8454 finished after 55 timesteps. Return = 55.0
Episode 8455 finished after 147 timesteps. Return = 147.0
Episode 8456 finished after 107 timesteps. Return = 107.0
Episode 8457 finished after 151 timesteps. Return = 151.0
Episode 8458 finished after 61 timesteps. Return = 61.0
Episode 8459 finished after 106 timesteps. Return = 106.0
Episode 8460 finished after 129 timesteps. Return = 129.0
Episode 8461 finished after 121 timesteps. Return = 121.0
Episode 8462 finished after 144 timesteps. Return = 144.0
Episode 8463 finished after 149 timesteps. Return = 149.0
Episode 8464 finished after 68 timesteps. Return = 68.0
Episode 8465 finished afte

Episode 8595 finished after 88 timesteps. Return = 88.0
Episode 8596 finished after 110 timesteps. Return = 110.0
Episode 8597 finished after 176 timesteps. Return = 176.0
Episode 8598 finished after 160 timesteps. Return = 160.0
Episode 8599 finished after 108 timesteps. Return = 108.0
Episode 8600 finished after 111 timesteps. Return = 111.0
Episode 8601 finished after 49 timesteps. Return = 49.0
Episode 8602 finished after 36 timesteps. Return = 36.0
Episode 8603 finished after 87 timesteps. Return = 87.0
Episode 8604 finished after 54 timesteps. Return = 54.0
Episode 8605 finished after 49 timesteps. Return = 49.0
Episode 8606 finished after 73 timesteps. Return = 73.0
Episode 8607 finished after 84 timesteps. Return = 84.0
Episode 8608 finished after 156 timesteps. Return = 156.0
Episode 8609 finished after 91 timesteps. Return = 91.0
Episode 8610 finished after 87 timesteps. Return = 87.0
Episode 8611 finished after 107 timesteps. Return = 107.0
Episode 8612 finished after 81 tim

Episode 8749 finished after 107 timesteps. Return = 107.0
Episode 8750 finished after 53 timesteps. Return = 53.0
Episode 8751 finished after 63 timesteps. Return = 63.0
Episode 8752 finished after 62 timesteps. Return = 62.0
Episode 8753 finished after 157 timesteps. Return = 157.0
Episode 8754 finished after 75 timesteps. Return = 75.0
Episode 8755 finished after 72 timesteps. Return = 72.0
Episode 8756 finished after 98 timesteps. Return = 98.0
Episode 8757 finished after 91 timesteps. Return = 91.0
Episode 8758 finished after 76 timesteps. Return = 76.0
Episode 8759 finished after 123 timesteps. Return = 123.0
Episode 8760 finished after 181 timesteps. Return = 181.0
Episode 8761 finished after 156 timesteps. Return = 156.0
Episode 8762 finished after 152 timesteps. Return = 152.0
Episode 8763 finished after 79 timesteps. Return = 79.0
Episode 8764 finished after 106 timesteps. Return = 106.0
Episode 8765 finished after 123 timesteps. Return = 123.0
Episode 8766 finished after 138 

Episode 8896 finished after 154 timesteps. Return = 154.0
Episode 8897 finished after 126 timesteps. Return = 126.0
Episode 8898 finished after 142 timesteps. Return = 142.0
Episode 8899 finished after 245 timesteps. Return = 245.0
Episode 8900 finished after 101 timesteps. Return = 101.0
Episode 8901 finished after 70 timesteps. Return = 70.0
Episode 8902 finished after 62 timesteps. Return = 62.0
Episode 8903 finished after 142 timesteps. Return = 142.0
Episode 8904 finished after 137 timesteps. Return = 137.0
Episode 8905 finished after 79 timesteps. Return = 79.0
Episode 8906 finished after 107 timesteps. Return = 107.0
Episode 8907 finished after 61 timesteps. Return = 61.0
Episode 8908 finished after 74 timesteps. Return = 74.0
Episode 8909 finished after 77 timesteps. Return = 77.0
Episode 8910 finished after 127 timesteps. Return = 127.0
Episode 8911 finished after 158 timesteps. Return = 158.0
Episode 8912 finished after 128 timesteps. Return = 128.0
Episode 8913 finished afte

Episode 9047 finished after 141 timesteps. Return = 141.0
Episode 9048 finished after 138 timesteps. Return = 138.0
Episode 9049 finished after 79 timesteps. Return = 79.0
Episode 9050 finished after 51 timesteps. Return = 51.0
Episode 9051 finished after 86 timesteps. Return = 86.0
Episode 9052 finished after 130 timesteps. Return = 130.0
Episode 9053 finished after 137 timesteps. Return = 137.0
Episode 9054 finished after 160 timesteps. Return = 160.0
Episode 9055 finished after 139 timesteps. Return = 139.0
Episode 9056 finished after 59 timesteps. Return = 59.0
Episode 9057 finished after 147 timesteps. Return = 147.0
Episode 9058 finished after 173 timesteps. Return = 173.0
Episode 9059 finished after 56 timesteps. Return = 56.0
Episode 9060 finished after 130 timesteps. Return = 130.0
Episode 9061 finished after 57 timesteps. Return = 57.0
Episode 9062 finished after 115 timesteps. Return = 115.0
Episode 9063 finished after 128 timesteps. Return = 128.0
Episode 9064 finished afte

Episode 9193 finished after 48 timesteps. Return = 48.0
Episode 9194 finished after 50 timesteps. Return = 50.0
Episode 9195 finished after 122 timesteps. Return = 122.0
Episode 9196 finished after 149 timesteps. Return = 149.0
Episode 9197 finished after 130 timesteps. Return = 130.0
Episode 9198 finished after 94 timesteps. Return = 94.0
Episode 9199 finished after 71 timesteps. Return = 71.0
Episode 9200 finished after 112 timesteps. Return = 112.0
Episode 9201 finished after 119 timesteps. Return = 119.0
Episode 9202 finished after 80 timesteps. Return = 80.0
Episode 9203 finished after 100 timesteps. Return = 100.0
Episode 9204 finished after 171 timesteps. Return = 171.0
Episode 9205 finished after 177 timesteps. Return = 177.0
Episode 9206 finished after 132 timesteps. Return = 132.0
Episode 9207 finished after 132 timesteps. Return = 132.0
Episode 9208 finished after 144 timesteps. Return = 144.0
Episode 9209 finished after 68 timesteps. Return = 68.0
Episode 9210 finished afte

Episode 9344 finished after 117 timesteps. Return = 117.0
Episode 9345 finished after 130 timesteps. Return = 130.0
Episode 9346 finished after 110 timesteps. Return = 110.0
Episode 9347 finished after 53 timesteps. Return = 53.0
Episode 9348 finished after 62 timesteps. Return = 62.0
Episode 9349 finished after 118 timesteps. Return = 118.0
Episode 9350 finished after 120 timesteps. Return = 120.0
Episode 9351 finished after 71 timesteps. Return = 71.0
Episode 9352 finished after 106 timesteps. Return = 106.0
Episode 9353 finished after 137 timesteps. Return = 137.0
Episode 9354 finished after 108 timesteps. Return = 108.0
Episode 9355 finished after 178 timesteps. Return = 178.0
Episode 9356 finished after 146 timesteps. Return = 146.0
Episode 9357 finished after 79 timesteps. Return = 79.0
Episode 9358 finished after 48 timesteps. Return = 48.0
Episode 9359 finished after 89 timesteps. Return = 89.0
Episode 9360 finished after 118 timesteps. Return = 118.0
Episode 9361 finished afte

Episode 9496 finished after 130 timesteps. Return = 130.0
Episode 9497 finished after 135 timesteps. Return = 135.0
Episode 9498 finished after 55 timesteps. Return = 55.0
Episode 9499 finished after 174 timesteps. Return = 174.0
Episode 9500 finished after 94 timesteps. Return = 94.0
Episode 9501 finished after 130 timesteps. Return = 130.0
Episode 9502 finished after 134 timesteps. Return = 134.0
Episode 9503 finished after 118 timesteps. Return = 118.0
Episode 9504 finished after 125 timesteps. Return = 125.0
Episode 9505 finished after 58 timesteps. Return = 58.0
Episode 9506 finished after 152 timesteps. Return = 152.0
Episode 9507 finished after 66 timesteps. Return = 66.0
Episode 9508 finished after 119 timesteps. Return = 119.0
Episode 9509 finished after 159 timesteps. Return = 159.0
Episode 9510 finished after 88 timesteps. Return = 88.0
Episode 9511 finished after 83 timesteps. Return = 83.0
Episode 9512 finished after 71 timesteps. Return = 71.0
Episode 9513 finished after 

Episode 9643 finished after 73 timesteps. Return = 73.0
Episode 9644 finished after 146 timesteps. Return = 146.0
Episode 9645 finished after 50 timesteps. Return = 50.0
Episode 9646 finished after 143 timesteps. Return = 143.0
Episode 9647 finished after 116 timesteps. Return = 116.0
Episode 9648 finished after 123 timesteps. Return = 123.0
Episode 9649 finished after 127 timesteps. Return = 127.0
Episode 9650 finished after 110 timesteps. Return = 110.0
Episode 9651 finished after 57 timesteps. Return = 57.0
Episode 9652 finished after 77 timesteps. Return = 77.0
Episode 9653 finished after 40 timesteps. Return = 40.0
Episode 9654 finished after 50 timesteps. Return = 50.0
Episode 9655 finished after 42 timesteps. Return = 42.0
Episode 9656 finished after 48 timesteps. Return = 48.0
Episode 9657 finished after 76 timesteps. Return = 76.0
Episode 9658 finished after 80 timesteps. Return = 80.0
Episode 9659 finished after 49 timesteps. Return = 49.0
Episode 9660 finished after 52 times

Episode 9794 finished after 118 timesteps. Return = 118.0
Episode 9795 finished after 88 timesteps. Return = 88.0
Episode 9796 finished after 75 timesteps. Return = 75.0
Episode 9797 finished after 103 timesteps. Return = 103.0
Episode 9798 finished after 128 timesteps. Return = 128.0
Episode 9799 finished after 69 timesteps. Return = 69.0
Episode 9800 finished after 132 timesteps. Return = 132.0
Episode 9801 finished after 84 timesteps. Return = 84.0
Episode 9802 finished after 116 timesteps. Return = 116.0
Episode 9803 finished after 76 timesteps. Return = 76.0
Episode 9804 finished after 99 timesteps. Return = 99.0
Episode 9805 finished after 80 timesteps. Return = 80.0
Episode 9806 finished after 50 timesteps. Return = 50.0
Episode 9807 finished after 125 timesteps. Return = 125.0
Episode 9808 finished after 139 timesteps. Return = 139.0
Episode 9809 finished after 92 timesteps. Return = 92.0
Episode 9810 finished after 71 timesteps. Return = 71.0
Episode 9811 finished after 58 tim

Episode 9945 finished after 128 timesteps. Return = 128.0
Episode 9946 finished after 72 timesteps. Return = 72.0
Episode 9947 finished after 134 timesteps. Return = 134.0
Episode 9948 finished after 69 timesteps. Return = 69.0
Episode 9949 finished after 189 timesteps. Return = 189.0
Episode 9950 finished after 137 timesteps. Return = 137.0
Episode 9951 finished after 74 timesteps. Return = 74.0
Episode 9952 finished after 80 timesteps. Return = 80.0
Episode 9953 finished after 62 timesteps. Return = 62.0
Episode 9954 finished after 47 timesteps. Return = 47.0
Episode 9955 finished after 50 timesteps. Return = 50.0
Episode 9956 finished after 49 timesteps. Return = 49.0
Episode 9957 finished after 64 timesteps. Return = 64.0
Episode 9958 finished after 105 timesteps. Return = 105.0
Episode 9959 finished after 49 timesteps. Return = 49.0
Episode 9960 finished after 102 timesteps. Return = 102.0
Episode 9961 finished after 35 timesteps. Return = 35.0
Episode 9962 finished after 49 times

Episode 10093 finished after 138 timesteps. Return = 138.0
Episode 10094 finished after 66 timesteps. Return = 66.0
Episode 10095 finished after 116 timesteps. Return = 116.0
Episode 10096 finished after 88 timesteps. Return = 88.0
Episode 10097 finished after 112 timesteps. Return = 112.0
Episode 10098 finished after 154 timesteps. Return = 154.0
Episode 10099 finished after 96 timesteps. Return = 96.0
Episode 10100 finished after 129 timesteps. Return = 129.0
Episode 10101 finished after 116 timesteps. Return = 116.0
Episode 10102 finished after 116 timesteps. Return = 116.0
Episode 10103 finished after 152 timesteps. Return = 152.0
Episode 10104 finished after 125 timesteps. Return = 125.0
Episode 10105 finished after 49 timesteps. Return = 49.0
Episode 10106 finished after 137 timesteps. Return = 137.0
Episode 10107 finished after 99 timesteps. Return = 99.0
Episode 10108 finished after 45 timesteps. Return = 45.0
Episode 10109 finished after 121 timesteps. Return = 121.0
Episode 1

Episode 10246 finished after 133 timesteps. Return = 133.0
Episode 10247 finished after 65 timesteps. Return = 65.0
Episode 10248 finished after 67 timesteps. Return = 67.0
Episode 10249 finished after 113 timesteps. Return = 113.0
Episode 10250 finished after 43 timesteps. Return = 43.0
Episode 10251 finished after 76 timesteps. Return = 76.0
Episode 10252 finished after 96 timesteps. Return = 96.0
Episode 10253 finished after 114 timesteps. Return = 114.0
Episode 10254 finished after 120 timesteps. Return = 120.0
Episode 10255 finished after 54 timesteps. Return = 54.0
Episode 10256 finished after 96 timesteps. Return = 96.0
Episode 10257 finished after 177 timesteps. Return = 177.0
Episode 10258 finished after 65 timesteps. Return = 65.0
Episode 10259 finished after 127 timesteps. Return = 127.0
Episode 10260 finished after 106 timesteps. Return = 106.0
Episode 10261 finished after 80 timesteps. Return = 80.0
Episode 10262 finished after 123 timesteps. Return = 123.0
Episode 10263 f

Episode 10391 finished after 131 timesteps. Return = 131.0
Episode 10392 finished after 70 timesteps. Return = 70.0
Episode 10393 finished after 138 timesteps. Return = 138.0
Episode 10394 finished after 101 timesteps. Return = 101.0
Episode 10395 finished after 60 timesteps. Return = 60.0
Episode 10396 finished after 104 timesteps. Return = 104.0
Episode 10397 finished after 165 timesteps. Return = 165.0
Episode 10398 finished after 134 timesteps. Return = 134.0
Episode 10399 finished after 111 timesteps. Return = 111.0
Episode 10400 finished after 153 timesteps. Return = 153.0
Episode 10401 finished after 115 timesteps. Return = 115.0
Episode 10402 finished after 76 timesteps. Return = 76.0
Episode 10403 finished after 52 timesteps. Return = 52.0
Episode 10404 finished after 68 timesteps. Return = 68.0
Episode 10405 finished after 64 timesteps. Return = 64.0
Episode 10406 finished after 90 timesteps. Return = 90.0
Episode 10407 finished after 48 timesteps. Return = 48.0
Episode 10408

Episode 10542 finished after 146 timesteps. Return = 146.0
Episode 10543 finished after 64 timesteps. Return = 64.0
Episode 10544 finished after 106 timesteps. Return = 106.0
Episode 10545 finished after 63 timesteps. Return = 63.0
Episode 10546 finished after 63 timesteps. Return = 63.0
Episode 10547 finished after 161 timesteps. Return = 161.0
Episode 10548 finished after 123 timesteps. Return = 123.0
Episode 10549 finished after 73 timesteps. Return = 73.0
Episode 10550 finished after 104 timesteps. Return = 104.0
Episode 10551 finished after 102 timesteps. Return = 102.0
Episode 10552 finished after 145 timesteps. Return = 145.0
Episode 10553 finished after 162 timesteps. Return = 162.0
Episode 10554 finished after 108 timesteps. Return = 108.0
Episode 10555 finished after 118 timesteps. Return = 118.0
Episode 10556 finished after 83 timesteps. Return = 83.0
Episode 10557 finished after 127 timesteps. Return = 127.0
Episode 10558 finished after 166 timesteps. Return = 166.0
Episode

Episode 10683 finished after 207 timesteps. Return = 207.0
Episode 10684 finished after 144 timesteps. Return = 144.0
Episode 10685 finished after 144 timesteps. Return = 144.0
Episode 10686 finished after 144 timesteps. Return = 144.0
Episode 10687 finished after 133 timesteps. Return = 133.0
Episode 10688 finished after 126 timesteps. Return = 126.0
Episode 10689 finished after 136 timesteps. Return = 136.0
Episode 10690 finished after 66 timesteps. Return = 66.0
Episode 10691 finished after 120 timesteps. Return = 120.0
Episode 10692 finished after 146 timesteps. Return = 146.0
Episode 10693 finished after 134 timesteps. Return = 134.0
Episode 10694 finished after 143 timesteps. Return = 143.0
Episode 10695 finished after 134 timesteps. Return = 134.0
Episode 10696 finished after 125 timesteps. Return = 125.0
Episode 10697 finished after 144 timesteps. Return = 144.0
Episode 10698 finished after 102 timesteps. Return = 102.0
Episode 10699 finished after 101 timesteps. Return = 101.0

Episode 10832 finished after 178 timesteps. Return = 178.0
Episode 10833 finished after 152 timesteps. Return = 152.0
Episode 10834 finished after 57 timesteps. Return = 57.0
Episode 10835 finished after 93 timesteps. Return = 93.0
Episode 10836 finished after 152 timesteps. Return = 152.0
Episode 10837 finished after 152 timesteps. Return = 152.0
Episode 10838 finished after 67 timesteps. Return = 67.0
Episode 10839 finished after 151 timesteps. Return = 151.0
Episode 10840 finished after 48 timesteps. Return = 48.0
Episode 10841 finished after 149 timesteps. Return = 149.0
Episode 10842 finished after 130 timesteps. Return = 130.0
Episode 10843 finished after 72 timesteps. Return = 72.0
Episode 10844 finished after 138 timesteps. Return = 138.0
Episode 10845 finished after 59 timesteps. Return = 59.0
Episode 10846 finished after 124 timesteps. Return = 124.0
Episode 10847 finished after 89 timesteps. Return = 89.0
Episode 10848 finished after 107 timesteps. Return = 107.0
Episode 108

Episode 10981 finished after 37 timesteps. Return = 37.0
Episode 10982 finished after 51 timesteps. Return = 51.0
Episode 10983 finished after 42 timesteps. Return = 42.0
Episode 10984 finished after 56 timesteps. Return = 56.0
Episode 10985 finished after 70 timesteps. Return = 70.0
Episode 10986 finished after 42 timesteps. Return = 42.0
Episode 10987 finished after 63 timesteps. Return = 63.0
Episode 10988 finished after 45 timesteps. Return = 45.0
Episode 10989 finished after 36 timesteps. Return = 36.0
Episode 10990 finished after 84 timesteps. Return = 84.0
Episode 10991 finished after 46 timesteps. Return = 46.0
Episode 10992 finished after 54 timesteps. Return = 54.0
Episode 10993 finished after 44 timesteps. Return = 44.0
Episode 10994 finished after 70 timesteps. Return = 70.0
Episode 10995 finished after 47 timesteps. Return = 47.0
Episode 10996 finished after 38 timesteps. Return = 38.0
Episode 10997 finished after 42 timesteps. Return = 42.0
Episode 10998 finished after 33

Episode 11129 finished after 94 timesteps. Return = 94.0
Episode 11130 finished after 66 timesteps. Return = 66.0
Episode 11131 finished after 95 timesteps. Return = 95.0
Episode 11132 finished after 51 timesteps. Return = 51.0
Episode 11133 finished after 50 timesteps. Return = 50.0
Episode 11134 finished after 56 timesteps. Return = 56.0
Episode 11135 finished after 43 timesteps. Return = 43.0
Episode 11136 finished after 185 timesteps. Return = 185.0
Episode 11137 finished after 40 timesteps. Return = 40.0
Episode 11138 finished after 130 timesteps. Return = 130.0
Episode 11139 finished after 38 timesteps. Return = 38.0
Episode 11140 finished after 77 timesteps. Return = 77.0
Episode 11141 finished after 44 timesteps. Return = 44.0
Episode 11142 finished after 85 timesteps. Return = 85.0
Episode 11143 finished after 148 timesteps. Return = 148.0
Episode 11144 finished after 53 timesteps. Return = 53.0
Episode 11145 finished after 43 timesteps. Return = 43.0
Episode 11146 finished af

Episode 11278 finished after 133 timesteps. Return = 133.0
Episode 11279 finished after 148 timesteps. Return = 148.0
Episode 11280 finished after 83 timesteps. Return = 83.0
Episode 11281 finished after 121 timesteps. Return = 121.0
Episode 11282 finished after 119 timesteps. Return = 119.0
Episode 11283 finished after 152 timesteps. Return = 152.0
Episode 11284 finished after 156 timesteps. Return = 156.0
Episode 11285 finished after 128 timesteps. Return = 128.0
Episode 11286 finished after 107 timesteps. Return = 107.0
Episode 11287 finished after 80 timesteps. Return = 80.0
Episode 11288 finished after 70 timesteps. Return = 70.0
Episode 11289 finished after 145 timesteps. Return = 145.0
Episode 11290 finished after 160 timesteps. Return = 160.0
Episode 11291 finished after 106 timesteps. Return = 106.0
Episode 11292 finished after 143 timesteps. Return = 143.0
Episode 11293 finished after 123 timesteps. Return = 123.0
Episode 11294 finished after 85 timesteps. Return = 85.0
Episo

Episode 11424 finished after 94 timesteps. Return = 94.0
Episode 11425 finished after 79 timesteps. Return = 79.0
Episode 11426 finished after 62 timesteps. Return = 62.0
Episode 11427 finished after 108 timesteps. Return = 108.0
Episode 11428 finished after 63 timesteps. Return = 63.0
Episode 11429 finished after 99 timesteps. Return = 99.0
Episode 11430 finished after 74 timesteps. Return = 74.0
Episode 11431 finished after 112 timesteps. Return = 112.0
Episode 11432 finished after 116 timesteps. Return = 116.0
Episode 11433 finished after 86 timesteps. Return = 86.0
Episode 11434 finished after 48 timesteps. Return = 48.0
Episode 11435 finished after 83 timesteps. Return = 83.0
Episode 11436 finished after 71 timesteps. Return = 71.0
Episode 11437 finished after 57 timesteps. Return = 57.0
Episode 11438 finished after 101 timesteps. Return = 101.0
Episode 11439 finished after 69 timesteps. Return = 69.0
Episode 11440 finished after 81 timesteps. Return = 81.0
Episode 11441 finished 

Episode 11572 finished after 125 timesteps. Return = 125.0
Episode 11573 finished after 45 timesteps. Return = 45.0
Episode 11574 finished after 57 timesteps. Return = 57.0
Episode 11575 finished after 86 timesteps. Return = 86.0
Episode 11576 finished after 123 timesteps. Return = 123.0
Episode 11577 finished after 129 timesteps. Return = 129.0
Episode 11578 finished after 63 timesteps. Return = 63.0
Episode 11579 finished after 81 timesteps. Return = 81.0
Episode 11580 finished after 113 timesteps. Return = 113.0
Episode 11581 finished after 68 timesteps. Return = 68.0
Episode 11582 finished after 131 timesteps. Return = 131.0
Episode 11583 finished after 96 timesteps. Return = 96.0
Episode 11584 finished after 68 timesteps. Return = 68.0
Episode 11585 finished after 64 timesteps. Return = 64.0
Episode 11586 finished after 74 timesteps. Return = 74.0
Episode 11587 finished after 139 timesteps. Return = 139.0
Episode 11588 finished after 94 timesteps. Return = 94.0
Episode 11589 finis

Episode 11717 finished after 58 timesteps. Return = 58.0
Episode 11718 finished after 54 timesteps. Return = 54.0
Episode 11719 finished after 149 timesteps. Return = 149.0
Episode 11720 finished after 126 timesteps. Return = 126.0
Episode 11721 finished after 205 timesteps. Return = 205.0
Episode 11722 finished after 127 timesteps. Return = 127.0
Episode 11723 finished after 68 timesteps. Return = 68.0
Episode 11724 finished after 112 timesteps. Return = 112.0
Episode 11725 finished after 66 timesteps. Return = 66.0
Episode 11726 finished after 75 timesteps. Return = 75.0
Episode 11727 finished after 73 timesteps. Return = 73.0
Episode 11728 finished after 158 timesteps. Return = 158.0
Episode 11729 finished after 126 timesteps. Return = 126.0
Episode 11730 finished after 144 timesteps. Return = 144.0
Episode 11731 finished after 140 timesteps. Return = 140.0
Episode 11732 finished after 121 timesteps. Return = 121.0
Episode 11733 finished after 64 timesteps. Return = 64.0
Episode 117

Episode 11860 finished after 87 timesteps. Return = 87.0
Episode 11861 finished after 45 timesteps. Return = 45.0
Episode 11862 finished after 124 timesteps. Return = 124.0
Episode 11863 finished after 67 timesteps. Return = 67.0
Episode 11864 finished after 123 timesteps. Return = 123.0
Episode 11865 finished after 51 timesteps. Return = 51.0
Episode 11866 finished after 57 timesteps. Return = 57.0
Episode 11867 finished after 69 timesteps. Return = 69.0
Episode 11868 finished after 123 timesteps. Return = 123.0
Episode 11869 finished after 60 timesteps. Return = 60.0
Episode 11870 finished after 71 timesteps. Return = 71.0
Episode 11871 finished after 146 timesteps. Return = 146.0
Episode 11872 finished after 87 timesteps. Return = 87.0
Episode 11873 finished after 118 timesteps. Return = 118.0
Episode 11874 finished after 102 timesteps. Return = 102.0
Episode 11875 finished after 108 timesteps. Return = 108.0
Episode 11876 finished after 129 timesteps. Return = 129.0
Episode 11877 f

Episode 12015 finished after 120 timesteps. Return = 120.0
Episode 12016 finished after 89 timesteps. Return = 89.0
Episode 12017 finished after 89 timesteps. Return = 89.0
Episode 12018 finished after 90 timesteps. Return = 90.0
Episode 12019 finished after 68 timesteps. Return = 68.0
Episode 12020 finished after 94 timesteps. Return = 94.0
Episode 12021 finished after 71 timesteps. Return = 71.0
Episode 12022 finished after 89 timesteps. Return = 89.0
Episode 12023 finished after 68 timesteps. Return = 68.0
Episode 12024 finished after 104 timesteps. Return = 104.0
Episode 12025 finished after 112 timesteps. Return = 112.0
Episode 12026 finished after 86 timesteps. Return = 86.0
Episode 12027 finished after 49 timesteps. Return = 49.0
Episode 12028 finished after 74 timesteps. Return = 74.0
Episode 12029 finished after 126 timesteps. Return = 126.0
Episode 12030 finished after 80 timesteps. Return = 80.0
Episode 12031 finished after 40 timesteps. Return = 40.0
Episode 12032 finished 

Episode 12159 finished after 131 timesteps. Return = 131.0
Episode 12160 finished after 59 timesteps. Return = 59.0
Episode 12161 finished after 135 timesteps. Return = 135.0
Episode 12162 finished after 128 timesteps. Return = 128.0
Episode 12163 finished after 70 timesteps. Return = 70.0
Episode 12164 finished after 71 timesteps. Return = 71.0
Episode 12165 finished after 133 timesteps. Return = 133.0
Episode 12166 finished after 77 timesteps. Return = 77.0
Episode 12167 finished after 84 timesteps. Return = 84.0
Episode 12168 finished after 74 timesteps. Return = 74.0
Episode 12169 finished after 159 timesteps. Return = 159.0
Episode 12170 finished after 80 timesteps. Return = 80.0
Episode 12171 finished after 115 timesteps. Return = 115.0
Episode 12172 finished after 154 timesteps. Return = 154.0
Episode 12173 finished after 141 timesteps. Return = 141.0
Episode 12174 finished after 145 timesteps. Return = 145.0
Episode 12175 finished after 150 timesteps. Return = 150.0
Episode 121

Episode 12301 finished after 43 timesteps. Return = 43.0
Episode 12302 finished after 61 timesteps. Return = 61.0
Episode 12303 finished after 114 timesteps. Return = 114.0
Episode 12304 finished after 67 timesteps. Return = 67.0
Episode 12305 finished after 119 timesteps. Return = 119.0
Episode 12306 finished after 117 timesteps. Return = 117.0
Episode 12307 finished after 137 timesteps. Return = 137.0
Episode 12308 finished after 137 timesteps. Return = 137.0
Episode 12309 finished after 135 timesteps. Return = 135.0
Episode 12310 finished after 69 timesteps. Return = 69.0
Episode 12311 finished after 161 timesteps. Return = 161.0
Episode 12312 finished after 153 timesteps. Return = 153.0
Episode 12313 finished after 59 timesteps. Return = 59.0
Episode 12314 finished after 67 timesteps. Return = 67.0
Episode 12315 finished after 64 timesteps. Return = 64.0
Episode 12316 finished after 65 timesteps. Return = 65.0
Episode 12317 finished after 79 timesteps. Return = 79.0
Episode 12318 f

Episode 12448 finished after 145 timesteps. Return = 145.0
Episode 12449 finished after 163 timesteps. Return = 163.0
Episode 12450 finished after 127 timesteps. Return = 127.0
Episode 12451 finished after 105 timesteps. Return = 105.0
Episode 12452 finished after 75 timesteps. Return = 75.0
Episode 12453 finished after 63 timesteps. Return = 63.0
Episode 12454 finished after 123 timesteps. Return = 123.0
Episode 12455 finished after 68 timesteps. Return = 68.0
Episode 12456 finished after 48 timesteps. Return = 48.0
Episode 12457 finished after 153 timesteps. Return = 153.0
Episode 12458 finished after 92 timesteps. Return = 92.0
Episode 12459 finished after 164 timesteps. Return = 164.0
Episode 12460 finished after 104 timesteps. Return = 104.0
Episode 12461 finished after 125 timesteps. Return = 125.0
Episode 12462 finished after 135 timesteps. Return = 135.0
Episode 12463 finished after 128 timesteps. Return = 128.0
Episode 12464 finished after 96 timesteps. Return = 96.0
Episode 1

Episode 12589 finished after 142 timesteps. Return = 142.0
Episode 12590 finished after 60 timesteps. Return = 60.0
Episode 12591 finished after 115 timesteps. Return = 115.0
Episode 12592 finished after 107 timesteps. Return = 107.0
Episode 12593 finished after 91 timesteps. Return = 91.0
Episode 12594 finished after 98 timesteps. Return = 98.0
Episode 12595 finished after 59 timesteps. Return = 59.0
Episode 12596 finished after 99 timesteps. Return = 99.0
Episode 12597 finished after 89 timesteps. Return = 89.0
Episode 12598 finished after 106 timesteps. Return = 106.0
Episode 12599 finished after 77 timesteps. Return = 77.0
Episode 12600 finished after 57 timesteps. Return = 57.0
Episode 12601 finished after 80 timesteps. Return = 80.0
Episode 12602 finished after 139 timesteps. Return = 139.0
Episode 12603 finished after 152 timesteps. Return = 152.0
Episode 12604 finished after 135 timesteps. Return = 135.0
Episode 12605 finished after 94 timesteps. Return = 94.0
Episode 12606 fin

Episode 12731 finished after 116 timesteps. Return = 116.0
Episode 12732 finished after 138 timesteps. Return = 138.0
Episode 12733 finished after 52 timesteps. Return = 52.0
Episode 12734 finished after 112 timesteps. Return = 112.0
Episode 12735 finished after 106 timesteps. Return = 106.0
Episode 12736 finished after 135 timesteps. Return = 135.0
Episode 12737 finished after 81 timesteps. Return = 81.0
Episode 12738 finished after 139 timesteps. Return = 139.0
Episode 12739 finished after 68 timesteps. Return = 68.0
Episode 12740 finished after 134 timesteps. Return = 134.0
Episode 12741 finished after 166 timesteps. Return = 166.0
Episode 12742 finished after 131 timesteps. Return = 131.0
Episode 12743 finished after 121 timesteps. Return = 121.0
Episode 12744 finished after 125 timesteps. Return = 125.0
Episode 12745 finished after 142 timesteps. Return = 142.0
Episode 12746 finished after 134 timesteps. Return = 134.0
Episode 12747 finished after 133 timesteps. Return = 133.0
Epi

Episode 12876 finished after 113 timesteps. Return = 113.0
Episode 12877 finished after 75 timesteps. Return = 75.0
Episode 12878 finished after 170 timesteps. Return = 170.0
Episode 12879 finished after 107 timesteps. Return = 107.0
Episode 12880 finished after 54 timesteps. Return = 54.0
Episode 12881 finished after 169 timesteps. Return = 169.0
Episode 12882 finished after 88 timesteps. Return = 88.0
Episode 12883 finished after 118 timesteps. Return = 118.0
Episode 12884 finished after 102 timesteps. Return = 102.0
Episode 12885 finished after 118 timesteps. Return = 118.0
Episode 12886 finished after 82 timesteps. Return = 82.0
Episode 12887 finished after 139 timesteps. Return = 139.0
Episode 12888 finished after 185 timesteps. Return = 185.0
Episode 12889 finished after 169 timesteps. Return = 169.0
Episode 12890 finished after 111 timesteps. Return = 111.0
Episode 12891 finished after 110 timesteps. Return = 110.0
Episode 12892 finished after 191 timesteps. Return = 191.0
Episo

Episode 13016 finished after 105 timesteps. Return = 105.0
Episode 13017 finished after 107 timesteps. Return = 107.0
Episode 13018 finished after 90 timesteps. Return = 90.0
Episode 13019 finished after 124 timesteps. Return = 124.0
Episode 13020 finished after 162 timesteps. Return = 162.0
Episode 13021 finished after 135 timesteps. Return = 135.0
Episode 13022 finished after 114 timesteps. Return = 114.0
Episode 13023 finished after 144 timesteps. Return = 144.0
Episode 13024 finished after 90 timesteps. Return = 90.0
Episode 13025 finished after 114 timesteps. Return = 114.0
Episode 13026 finished after 131 timesteps. Return = 131.0
Episode 13027 finished after 97 timesteps. Return = 97.0
Episode 13028 finished after 140 timesteps. Return = 140.0
Episode 13029 finished after 171 timesteps. Return = 171.0
Episode 13030 finished after 122 timesteps. Return = 122.0
Episode 13031 finished after 135 timesteps. Return = 135.0
Episode 13032 finished after 96 timesteps. Return = 96.0
Episo

KeyboardInterrupt: 

In [72]:
visualise_agent(greedy_policy, command=[250, 200], n=5)

Episode 0 finished after 84 timesteps. Return = -134.1428887684655
Episode 1 finished after 62 timesteps. Return = -143.282848576283
Episode 2 finished after 72 timesteps. Return = -0.15610989654310004
Episode 3 finished after 106 timesteps. Return = -72.03684838919166
Episode 4 finished after 105 timesteps. Return = -37.44217835732849


In [73]:
visualise_agent(stochastic_policy, command=[250, 200], n=5)

Episode 0 finished after 97 timesteps. Return = -89.47575725612703


In [220]:
#torch.save(agent.state_dict(), 'checkpoints/lunar_lander_32x32_checkpoint_0.pt')

In [158]:
print([mem['return'] for mem in replay_buffer])

[79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 81.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.0, 82.

In [None]:
def train_net(policy_net, replay_buffer, n_updates=100, batch_size=64):
    all_costs = []
    for i in range(n_updates):
        batch_input = np.zeros((batch_size, np.prod(env.observation_space.shape)+2))
        batch_label = np.zeros((batch_size))
        for b in range(batch_size):
            sample_episode = np.random.randint(0, len(replay_buffer))
            sample_horizon = np.random.randint(1, len(replay_buffer[sample_episode]['observation'])+1)
            sample_mem_idx = np.random.randint(0, len(replay_buffer[sample_episode]['observation'])+1-sample_horizon)
            sample_mem = replay_buffer[sample_episode]['observation'][sample_mem_idx]
            sample_desired_reward = sum(replay_buffer[sample_episode]['reward'][sample_mem_idx:sample_mem_idx+sample_horizon])
            network_input = np.append(sample_mem, [sample_desired_reward, sample_horizon])
            label = replay_buffer[sample_episode]['action'][sample_mem_idx]
            batch_input[b] = network_input
            batch_label[b] = label
        batch_input = torch.tensor(batch_input).double()
        batch_label = torch.tensor(batch_label).long()
        pred = policy_net(batch_input)
        cost = F.cross_entropy(pred, batch_label)
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
    return np.mean(all_costs)

In [8]:
def train_net(policy_net, episode_mem, n_samples = 5): #stochastic gradient descent
    all_costs = []
    for i in range(n_samples):
        sample_horizon = np.random.randint(1, len(episode_mem['observation'])+1)
        sample_mem_idx = np.random.randint(0, len(episode_mem['observation'])+1-sample_horizon)
        sample_mem = episode_mem['observation'][sample_mem_idx]
        sample_desired_reward = sum(episode_mem['reward'][sample_mem_idx:sample_mem_idx+sample_horizon])
        network_input = torch.tensor(np.append(sample_mem, [sample_desired_reward, sample_horizon])).double()
        label = torch.tensor([episode_mem['action'][sample_mem_idx]]).double()
        
        pred = policy_net(network_input)
        cost = F.binary_cross_entropy(pred, label)
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
    return np.mean(all_costs)
    

In [33]:
def train(policy_net, n_episodes=100):
    global i_episode
    global epsilon
    try:
        for _ in range(n_episodes):
            observation = env.reset()
            episode_mem = {'observation':[],
                            'action':[],
                            'reward':[],
                            'done':[]}
            done=False
            while not done:
                network_input = torch.tensor(np.append(observation, [desired_reward, command_horizon])).double()
                action_prob = policy_net(network_input)
                action = np.random.binomial(1, action_prob.item())
                #action = int(action_prob.item()>0.5)
                if np.random.rand()<epsilon: action = np.random.randint(0, 2)
                new_observation, reward, done, info = env.step(action)
                
                episode_mem['observation'].append(observation)
                episode_mem['action'].append(action)
                episode_mem['reward'].append(reward)
                episode_mem['done'].append(done)
                
                observation=new_observation
                epsilon*=0.999
            episode_mem['return']=sum(episode_mem['reward'])
            episode_mem['episode_len']=len(episode_mem['observation'])
            mean_cost = train_net(policy_net, episode_mem)
            
            i_episode+=1
            print("Episode {} finished after {} timesteps. Epsilon={} Mean Cost={}".format(i_episode, len(episode_mem['observation']), epsilon, mean_cost))
        env.close()
    except KeyboardInterrupt:
        env.close()