# Actor Critic for Cartpole

In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.distributions import Categorical

import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

import gym

### Cartpole environment

In [2]:
env = gym.make('CartPole-v1')

### Actor Critic agent

Common model for actor and critic

In [None]:
class ActorCritic(nn.Module):
    
    def __init__(self):
        super(ActorCritic, self).__init__()
        
        # Common network
        self.fc1 = nn.Linear(in_features=4, out_features=10)
        self.fc2 = nn.Linear(in_features=10, out_features=2)
        
        # Actor head
        self.actor_head = nn.Linear(in_features=2, out_features=2)
        
        # Critic head
        self.critic_head = nn.Linear(in_features=2, out_features=2)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        
        policy = F.softmax(self.actor_head(x))
        value = self.critic_head(x)
        return policy, value

class AC_Agent():
    
    def __init__(self, env, lr=0.001, dr=0.95):
        # Cartpole environment
        self.cartpole_env = env
        
        # Learning rates
        self.lr = lr
        # Discount rate
        self.dr = dr
        
        # Actor Critic model
        self.model = ActorCritic()
        self.optim = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        
        # Saving training curves
        self.rewards = []
        self.value_losses = []
        
    def choose_action(self, state):
        # Translate state to fit model input
        state = torch.from_numpy(state).float().unsqueeze(0)
        # Get actions probability distribution
        probs, _ = self.model(state)
        distrib = Categorical(probs)
        # Sample action
        action = distrib.sample()
        return action.item(), distrib.log_prob(action)
    
    def compute_loss(self, state, action, log_prob, reward, next_state, done, G=None):
        # Get value of state-action pair
        state = torch.from_numpy(state).float().unsqueeze(0)
        _, q_values = self.model(state)
        q_value = q_values[0, action]
        # Compute Actor's Loss
        L_actor = -log_prob * G
        if not done:
            if G is None:
                # Sample next action
                next_action, _ = self.choose_action(next_state)
                # Get value of next_state-next_action pair
                next_state = torch.from_numpy(next_state).float().unsqueeze(0)
                _, next_q_values = self.model(next_state)
                next_q_value = next_q_values[0, action]
                # Compute q_target
                q_target = torch.Tensor([reward]) + self.dr * next_q_value
            else:
                q_target = G
        else:
            q_target = torch.Tensor([reward])
        # Compute Critic's loss
        L_critic = (q_target - q_value) ** 2
        self.value_losses.append(L_critic)
        # Return complete loss
        return L_actor + L_critic
        
    def train(self, nb_episodes=1000, complete_return=False):
        for i in range(nb_episodes):
            state = env.reset()
            experiences = []
            for t in range(500):
                env.render()
                # Actor chooses action
                action, log_prob = self.choose_action(state)
                # Perform action
                next_state, reward, done, info = env.step(action)
                
                experiences.append((state, action, log_prob, reward, next_state, done))
                
                # Check for end state
                if done:
                    print("Episode #{} finished after {} timesteps.".format(i + 1, t + 1))
                    self.rewards.append(t + 1)
                    break
                
                state = next_state
                
            # Compute Loss
            loss = []
            for t in range(len(experiences)):
                if complete_return:
                    # Compute return
                    G = 0.0
                    for k in range(t, len(experiences)):
                        G += experiences[k][3] * self.dr ** k
                else:
                    G = None
                state, action, log_prob, reward, next_state, done = experiences[t]
                loss.append(self.compute_loss(state, action, log_prob, reward, next_state, done, G))
                
            # Learning step
            self.optim.zero_grad()
            loss = torch.cat(loss).sum()
            loss.backward()
            self.optim.step()
                
        self.cartpole_env.close()

In [None]:
ac_agent = AC_Agent(env)
ac_agent.train(nb_episodes=10000, complete_return=True)

In [3]:
def compute_mov_avg(data: list, window=10):
    ids = [window * x for x in range(0, int(len(data) / window) + 1)]
    if ids[-1] < len(data) - 1:
        ids.append(len(data) - 1)
    mov_avg = []
    for i in range(1, len(ids)):
        mov_avg.append(sum(data[ids[i - 1]:ids[i]]) / len(data[ids[i - 1]:ids[i]]))
    return ids[1:], mov_avg

def display_metrics(data: list, legend: str, mov_avg=50):
    plt.figure(figsize=(15,5))
    # Plot data
    plt.plot(np.arange(len(data)), data, label=legend)
    # Plot moving average
    if len(data) > 5 * mov_avg:
        ids, avg = compute_mov_avg(data, mov_avg)
        plt.plot(ids, avg, label=legend + ' average')
    plt.legend()
    plt.show()

In [None]:
display_metrics(ac_agent.rewards, "Reward")
display_metrics(ac_agent.value_losses, "Value Loss")

### Advantage Actor Critic agent

In [9]:
class AdvantageActorCritic(nn.Module):
    
    def __init__(self):
        super(AdvantageActorCritic, self).__init__()
        
        # Common network
        self.fc1 = nn.Linear(in_features=4, out_features=10)
        self.fc2 = nn.Linear(in_features=10, out_features=2)
        
        # Actor head
        self.actor_head = nn.Linear(in_features=2, out_features=2)
        
        # Critic head
        self.critic_head = nn.Linear(in_features=2, out_features=1)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        
        policy = F.softmax(self.actor_head(x))
        value = self.critic_head(x)
        return policy, value

class A2C_Agent():
    
    def __init__(self, env, lr=0.001, dr=0.95):
        # Cartpole environment
        self.cartpole_env = env
        
        # Learning rates
        self.lr = lr
        # Discount rate
        self.dr = dr
        
        # Actor Critic model
        self.model = AdvantageActorCritic()
        self.optim = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        
        # Saving training curves
        self.rewards = []
        self.value_losses = []
        
    def choose_action(self, state):
        # Translate state to fit model input
        state = torch.from_numpy(state).float().unsqueeze(0)
        # Get actions probability distribution
        probs, _ = self.model(state)
        distrib = Categorical(probs)
        # Sample action
        action = distrib.sample()
        return action.item(), distrib.log_prob(action)
    
    def compute_loss(self, state, action, log_prob, reward, next_state, done, G=None):
        # Get value of state
        state = torch.from_numpy(state).float().unsqueeze(0)
        _, value = self.model(state)
        # Compute Q-target
        if not done:
            if G is None:
                # Get value of next_state
                next_state = torch.from_numpy(next_state).float().unsqueeze(0)
                _, next_value = self.model(next_state)
                # Compute q_target
                q_target = torch.Tensor([reward]) + self.dr * next_value
            else:
                q_target = G
        else:
            q_target = torch.Tensor([reward])
        # Compute Advantage
        advantage = q_target - value
        # Compute Actor's Loss
        L_actor = -log_prob * advantage
        # Compute Critic's loss
        L_critic = (q_target - value) ** 2
        self.value_losses.append(L_critic)
        # Return complete loss
        return L_actor + L_critic
        
    def train(self, nb_episodes=1000, complete_return=False):
        for i in range(nb_episodes):
            state = env.reset()
            experiences = []
            for t in range(500):
                env.render()
                # Actor chooses action
                action, log_prob = self.choose_action(state)
                # Perform action
                next_state, reward, done, info = env.step(action)
                
                experiences.append((state, action, log_prob, reward, next_state, done))
                
                # Check for end state
                if done:
                    print("Episode #{} finished after {} timesteps.".format(i + 1, t + 1))
                    self.rewards.append(t + 1)
                    break
                
                state = next_state
                
            # Compute Loss
            loss = []
            for t in range(len(experiences)):
                if complete_return:
                    # Compute return
                    G = 0.0
                    for k in range(t, len(experiences)):
                        G += experiences[k][3] * self.dr ** k
                else:
                    G = None
                state, action, log_prob, reward, next_state, done = experiences[t]
                loss.append(self.compute_loss(state, action, log_prob, reward, next_state, done, G))
                
            # Learning step
            self.optim.zero_grad()
            loss = torch.cat(loss).sum()
            loss.backward()
            self.optim.step()
                
        self.cartpole_env.close()

In [None]:
a2c_agent = A2C_Agent(env)
a2c_agent.train(nb_episodes=15000, complete_return=True)

  policy = F.softmax(self.actor_head(x))


Episode #1 finished after 10 timesteps.
Episode #2 finished after 10 timesteps.
Episode #3 finished after 12 timesteps.
Episode #4 finished after 14 timesteps.
Episode #5 finished after 12 timesteps.
Episode #6 finished after 9 timesteps.
Episode #7 finished after 11 timesteps.
Episode #8 finished after 15 timesteps.
Episode #9 finished after 51 timesteps.
Episode #10 finished after 12 timesteps.
Episode #11 finished after 21 timesteps.
Episode #12 finished after 18 timesteps.
Episode #13 finished after 17 timesteps.
Episode #14 finished after 19 timesteps.
Episode #15 finished after 10 timesteps.
Episode #16 finished after 12 timesteps.
Episode #17 finished after 24 timesteps.
Episode #18 finished after 15 timesteps.
Episode #19 finished after 12 timesteps.
Episode #20 finished after 17 timesteps.
Episode #21 finished after 26 timesteps.
Episode #22 finished after 9 timesteps.
Episode #23 finished after 11 timesteps.
Episode #24 finished after 16 timesteps.
Episode #25 finished after 

Episode #199 finished after 20 timesteps.
Episode #200 finished after 52 timesteps.
Episode #201 finished after 31 timesteps.
Episode #202 finished after 23 timesteps.
Episode #203 finished after 15 timesteps.
Episode #204 finished after 11 timesteps.
Episode #205 finished after 9 timesteps.
Episode #206 finished after 18 timesteps.
Episode #207 finished after 16 timesteps.
Episode #208 finished after 13 timesteps.
Episode #209 finished after 11 timesteps.
Episode #210 finished after 10 timesteps.
Episode #211 finished after 17 timesteps.
Episode #212 finished after 20 timesteps.
Episode #213 finished after 26 timesteps.
Episode #214 finished after 29 timesteps.
Episode #215 finished after 49 timesteps.
Episode #216 finished after 19 timesteps.
Episode #217 finished after 15 timesteps.
Episode #218 finished after 21 timesteps.
Episode #219 finished after 12 timesteps.
Episode #220 finished after 13 timesteps.
Episode #221 finished after 11 timesteps.
Episode #222 finished after 22 time

Episode #395 finished after 37 timesteps.
Episode #396 finished after 23 timesteps.
Episode #397 finished after 15 timesteps.
Episode #398 finished after 28 timesteps.
Episode #399 finished after 16 timesteps.
Episode #400 finished after 32 timesteps.
Episode #401 finished after 21 timesteps.
Episode #402 finished after 26 timesteps.
Episode #403 finished after 13 timesteps.
Episode #404 finished after 11 timesteps.
Episode #405 finished after 14 timesteps.
Episode #406 finished after 22 timesteps.
Episode #407 finished after 15 timesteps.
Episode #408 finished after 16 timesteps.
Episode #409 finished after 26 timesteps.
Episode #410 finished after 33 timesteps.
Episode #411 finished after 12 timesteps.
Episode #412 finished after 12 timesteps.
Episode #413 finished after 14 timesteps.
Episode #414 finished after 42 timesteps.
Episode #415 finished after 14 timesteps.
Episode #416 finished after 27 timesteps.
Episode #417 finished after 40 timesteps.
Episode #418 finished after 33 tim

Episode #591 finished after 28 timesteps.
Episode #592 finished after 26 timesteps.
Episode #593 finished after 18 timesteps.
Episode #594 finished after 20 timesteps.
Episode #595 finished after 25 timesteps.
Episode #596 finished after 35 timesteps.
Episode #597 finished after 13 timesteps.
Episode #598 finished after 35 timesteps.
Episode #599 finished after 19 timesteps.
Episode #600 finished after 10 timesteps.
Episode #601 finished after 67 timesteps.
Episode #602 finished after 33 timesteps.
Episode #603 finished after 33 timesteps.
Episode #604 finished after 50 timesteps.
Episode #605 finished after 12 timesteps.
Episode #606 finished after 21 timesteps.
Episode #607 finished after 26 timesteps.
Episode #608 finished after 13 timesteps.
Episode #609 finished after 16 timesteps.
Episode #610 finished after 39 timesteps.
Episode #611 finished after 19 timesteps.
Episode #612 finished after 18 timesteps.
Episode #613 finished after 29 timesteps.
Episode #614 finished after 15 tim

Episode #787 finished after 26 timesteps.
Episode #788 finished after 9 timesteps.
Episode #789 finished after 21 timesteps.
Episode #790 finished after 34 timesteps.
Episode #791 finished after 16 timesteps.
Episode #792 finished after 11 timesteps.
Episode #793 finished after 12 timesteps.
Episode #794 finished after 11 timesteps.
Episode #795 finished after 21 timesteps.
Episode #796 finished after 20 timesteps.
Episode #797 finished after 33 timesteps.
Episode #798 finished after 22 timesteps.
Episode #799 finished after 19 timesteps.
Episode #800 finished after 19 timesteps.
Episode #801 finished after 31 timesteps.
Episode #802 finished after 29 timesteps.
Episode #803 finished after 46 timesteps.
Episode #804 finished after 24 timesteps.
Episode #805 finished after 18 timesteps.
Episode #806 finished after 67 timesteps.
Episode #807 finished after 52 timesteps.
Episode #808 finished after 21 timesteps.
Episode #809 finished after 21 timesteps.
Episode #810 finished after 31 time

Episode #983 finished after 15 timesteps.
Episode #984 finished after 21 timesteps.
Episode #985 finished after 46 timesteps.
Episode #986 finished after 15 timesteps.
Episode #987 finished after 19 timesteps.
Episode #988 finished after 13 timesteps.
Episode #989 finished after 16 timesteps.
Episode #990 finished after 18 timesteps.
Episode #991 finished after 19 timesteps.
Episode #992 finished after 20 timesteps.
Episode #993 finished after 21 timesteps.
Episode #994 finished after 23 timesteps.
Episode #995 finished after 49 timesteps.
Episode #996 finished after 22 timesteps.
Episode #997 finished after 56 timesteps.
Episode #998 finished after 22 timesteps.
Episode #999 finished after 12 timesteps.
Episode #1000 finished after 27 timesteps.
Episode #1001 finished after 12 timesteps.
Episode #1002 finished after 27 timesteps.
Episode #1003 finished after 20 timesteps.
Episode #1004 finished after 9 timesteps.
Episode #1005 finished after 31 timesteps.
Episode #1006 finished after 

Episode #1175 finished after 15 timesteps.
Episode #1176 finished after 15 timesteps.
Episode #1177 finished after 21 timesteps.
Episode #1178 finished after 20 timesteps.
Episode #1179 finished after 25 timesteps.
Episode #1180 finished after 32 timesteps.
Episode #1181 finished after 16 timesteps.
Episode #1182 finished after 55 timesteps.
Episode #1183 finished after 11 timesteps.
Episode #1184 finished after 24 timesteps.
Episode #1185 finished after 30 timesteps.
Episode #1186 finished after 14 timesteps.
Episode #1187 finished after 20 timesteps.
Episode #1188 finished after 28 timesteps.
Episode #1189 finished after 20 timesteps.
Episode #1190 finished after 12 timesteps.
Episode #1191 finished after 26 timesteps.
Episode #1192 finished after 11 timesteps.
Episode #1193 finished after 33 timesteps.
Episode #1194 finished after 11 timesteps.
Episode #1195 finished after 15 timesteps.
Episode #1196 finished after 27 timesteps.
Episode #1197 finished after 14 timesteps.
Episode #11

Episode #1366 finished after 29 timesteps.
Episode #1367 finished after 29 timesteps.
Episode #1368 finished after 22 timesteps.
Episode #1369 finished after 27 timesteps.
Episode #1370 finished after 19 timesteps.
Episode #1371 finished after 30 timesteps.
Episode #1372 finished after 17 timesteps.
Episode #1373 finished after 49 timesteps.
Episode #1374 finished after 72 timesteps.
Episode #1375 finished after 24 timesteps.
Episode #1376 finished after 21 timesteps.
Episode #1377 finished after 11 timesteps.
Episode #1378 finished after 20 timesteps.
Episode #1379 finished after 74 timesteps.
Episode #1380 finished after 19 timesteps.
Episode #1381 finished after 18 timesteps.
Episode #1382 finished after 15 timesteps.
Episode #1383 finished after 42 timesteps.
Episode #1384 finished after 25 timesteps.
Episode #1385 finished after 25 timesteps.
Episode #1386 finished after 24 timesteps.
Episode #1387 finished after 9 timesteps.
Episode #1388 finished after 31 timesteps.
Episode #138

Episode #1557 finished after 43 timesteps.
Episode #1558 finished after 22 timesteps.
Episode #1559 finished after 34 timesteps.
Episode #1560 finished after 24 timesteps.
Episode #1561 finished after 37 timesteps.
Episode #1562 finished after 26 timesteps.
Episode #1563 finished after 47 timesteps.
Episode #1564 finished after 35 timesteps.
Episode #1565 finished after 24 timesteps.
Episode #1566 finished after 22 timesteps.
Episode #1567 finished after 11 timesteps.
Episode #1568 finished after 53 timesteps.
Episode #1569 finished after 20 timesteps.
Episode #1570 finished after 50 timesteps.
Episode #1571 finished after 69 timesteps.
Episode #1572 finished after 14 timesteps.
Episode #1573 finished after 37 timesteps.
Episode #1574 finished after 54 timesteps.
Episode #1575 finished after 29 timesteps.
Episode #1576 finished after 19 timesteps.
Episode #1577 finished after 37 timesteps.
Episode #1578 finished after 40 timesteps.
Episode #1579 finished after 20 timesteps.
Episode #15

Episode #1748 finished after 46 timesteps.
Episode #1749 finished after 68 timesteps.
Episode #1750 finished after 44 timesteps.
Episode #1751 finished after 60 timesteps.
Episode #1752 finished after 78 timesteps.
Episode #1753 finished after 48 timesteps.
Episode #1754 finished after 43 timesteps.
Episode #1755 finished after 25 timesteps.
Episode #1756 finished after 17 timesteps.
Episode #1757 finished after 42 timesteps.
Episode #1758 finished after 25 timesteps.
Episode #1759 finished after 26 timesteps.
Episode #1760 finished after 23 timesteps.
Episode #1761 finished after 41 timesteps.
Episode #1762 finished after 66 timesteps.
Episode #1763 finished after 28 timesteps.
Episode #1764 finished after 47 timesteps.
Episode #1765 finished after 34 timesteps.
Episode #1766 finished after 29 timesteps.
Episode #1767 finished after 16 timesteps.
Episode #1768 finished after 131 timesteps.
Episode #1769 finished after 63 timesteps.
Episode #1770 finished after 64 timesteps.
Episode #1

Episode #1939 finished after 17 timesteps.
Episode #1940 finished after 23 timesteps.
Episode #1941 finished after 43 timesteps.
Episode #1942 finished after 21 timesteps.
Episode #1943 finished after 123 timesteps.
Episode #1944 finished after 38 timesteps.
Episode #1945 finished after 13 timesteps.
Episode #1946 finished after 31 timesteps.
Episode #1947 finished after 11 timesteps.
Episode #1948 finished after 18 timesteps.
Episode #1949 finished after 14 timesteps.
Episode #1950 finished after 27 timesteps.
Episode #1951 finished after 29 timesteps.
Episode #1952 finished after 38 timesteps.
Episode #1953 finished after 37 timesteps.
Episode #1954 finished after 10 timesteps.
Episode #1955 finished after 10 timesteps.
Episode #1956 finished after 51 timesteps.
Episode #1957 finished after 34 timesteps.
Episode #1958 finished after 24 timesteps.
Episode #1959 finished after 22 timesteps.
Episode #1960 finished after 45 timesteps.
Episode #1961 finished after 78 timesteps.
Episode #1

Episode #2130 finished after 25 timesteps.
Episode #2131 finished after 128 timesteps.
Episode #2132 finished after 59 timesteps.
Episode #2133 finished after 28 timesteps.
Episode #2134 finished after 26 timesteps.
Episode #2135 finished after 22 timesteps.
Episode #2136 finished after 62 timesteps.
Episode #2137 finished after 68 timesteps.
Episode #2138 finished after 27 timesteps.
Episode #2139 finished after 67 timesteps.
Episode #2140 finished after 19 timesteps.
Episode #2141 finished after 52 timesteps.
Episode #2142 finished after 12 timesteps.
Episode #2143 finished after 25 timesteps.
Episode #2144 finished after 71 timesteps.
Episode #2145 finished after 41 timesteps.
Episode #2146 finished after 33 timesteps.
Episode #2147 finished after 133 timesteps.
Episode #2148 finished after 35 timesteps.
Episode #2149 finished after 58 timesteps.
Episode #2150 finished after 15 timesteps.
Episode #2151 finished after 45 timesteps.
Episode #2152 finished after 38 timesteps.
Episode #

Episode #2321 finished after 68 timesteps.
Episode #2322 finished after 20 timesteps.
Episode #2323 finished after 23 timesteps.
Episode #2324 finished after 65 timesteps.
Episode #2325 finished after 128 timesteps.
Episode #2326 finished after 46 timesteps.
Episode #2327 finished after 42 timesteps.
Episode #2328 finished after 50 timesteps.
Episode #2329 finished after 33 timesteps.
Episode #2330 finished after 29 timesteps.
Episode #2331 finished after 14 timesteps.
Episode #2332 finished after 52 timesteps.
Episode #2333 finished after 44 timesteps.
Episode #2334 finished after 51 timesteps.
Episode #2335 finished after 61 timesteps.
Episode #2336 finished after 41 timesteps.
Episode #2337 finished after 55 timesteps.
Episode #2338 finished after 16 timesteps.
Episode #2339 finished after 46 timesteps.
Episode #2340 finished after 24 timesteps.
Episode #2341 finished after 55 timesteps.
Episode #2342 finished after 50 timesteps.
Episode #2343 finished after 55 timesteps.
Episode #2

Episode #2512 finished after 97 timesteps.
Episode #2513 finished after 36 timesteps.
Episode #2514 finished after 16 timesteps.
Episode #2515 finished after 27 timesteps.
Episode #2516 finished after 22 timesteps.
Episode #2517 finished after 63 timesteps.
Episode #2518 finished after 42 timesteps.
Episode #2519 finished after 93 timesteps.
Episode #2520 finished after 48 timesteps.
Episode #2521 finished after 98 timesteps.
Episode #2522 finished after 52 timesteps.
Episode #2523 finished after 42 timesteps.
Episode #2524 finished after 46 timesteps.
Episode #2525 finished after 41 timesteps.
Episode #2526 finished after 14 timesteps.
Episode #2527 finished after 104 timesteps.
Episode #2528 finished after 18 timesteps.
Episode #2529 finished after 49 timesteps.
Episode #2530 finished after 19 timesteps.
Episode #2531 finished after 102 timesteps.
Episode #2532 finished after 45 timesteps.
Episode #2533 finished after 44 timesteps.
Episode #2534 finished after 18 timesteps.
Episode #

Episode #2703 finished after 62 timesteps.
Episode #2704 finished after 126 timesteps.
Episode #2705 finished after 68 timesteps.
Episode #2706 finished after 40 timesteps.
Episode #2707 finished after 41 timesteps.
Episode #2708 finished after 87 timesteps.
Episode #2709 finished after 63 timesteps.
Episode #2710 finished after 49 timesteps.
Episode #2711 finished after 108 timesteps.
Episode #2712 finished after 54 timesteps.
Episode #2713 finished after 61 timesteps.
Episode #2714 finished after 15 timesteps.
Episode #2715 finished after 53 timesteps.
Episode #2716 finished after 97 timesteps.
Episode #2717 finished after 27 timesteps.
Episode #2718 finished after 89 timesteps.
Episode #2719 finished after 47 timesteps.
Episode #2720 finished after 130 timesteps.
Episode #2721 finished after 73 timesteps.
Episode #2722 finished after 46 timesteps.
Episode #2723 finished after 64 timesteps.
Episode #2724 finished after 55 timesteps.
Episode #2725 finished after 60 timesteps.
Episode 

Episode #2894 finished after 89 timesteps.
Episode #2895 finished after 139 timesteps.
Episode #2896 finished after 49 timesteps.
Episode #2897 finished after 110 timesteps.
Episode #2898 finished after 84 timesteps.
Episode #2899 finished after 64 timesteps.
Episode #2900 finished after 76 timesteps.
Episode #2901 finished after 43 timesteps.
Episode #2902 finished after 75 timesteps.
Episode #2903 finished after 28 timesteps.
Episode #2904 finished after 96 timesteps.
Episode #2905 finished after 58 timesteps.
Episode #2906 finished after 75 timesteps.
Episode #2907 finished after 87 timesteps.
Episode #2908 finished after 22 timesteps.
Episode #2909 finished after 145 timesteps.
Episode #2910 finished after 20 timesteps.
Episode #2911 finished after 48 timesteps.
Episode #2912 finished after 136 timesteps.
Episode #2913 finished after 40 timesteps.
Episode #2914 finished after 59 timesteps.
Episode #2915 finished after 36 timesteps.
Episode #2916 finished after 42 timesteps.
Episode

Episode #3084 finished after 71 timesteps.
Episode #3085 finished after 58 timesteps.
Episode #3086 finished after 66 timesteps.
Episode #3087 finished after 32 timesteps.
Episode #3088 finished after 29 timesteps.
Episode #3089 finished after 29 timesteps.
Episode #3090 finished after 85 timesteps.
Episode #3091 finished after 23 timesteps.
Episode #3092 finished after 88 timesteps.
Episode #3093 finished after 44 timesteps.
Episode #3094 finished after 47 timesteps.
Episode #3095 finished after 55 timesteps.
Episode #3096 finished after 54 timesteps.
Episode #3097 finished after 42 timesteps.
Episode #3098 finished after 43 timesteps.
Episode #3099 finished after 70 timesteps.
Episode #3100 finished after 45 timesteps.
Episode #3101 finished after 19 timesteps.
Episode #3102 finished after 87 timesteps.
Episode #3103 finished after 191 timesteps.
Episode #3104 finished after 64 timesteps.
Episode #3105 finished after 33 timesteps.
Episode #3106 finished after 78 timesteps.
Episode #3

Episode #3275 finished after 45 timesteps.
Episode #3276 finished after 52 timesteps.
Episode #3277 finished after 63 timesteps.
Episode #3278 finished after 115 timesteps.
Episode #3279 finished after 48 timesteps.
Episode #3280 finished after 86 timesteps.
Episode #3281 finished after 56 timesteps.
Episode #3282 finished after 93 timesteps.
Episode #3283 finished after 52 timesteps.
Episode #3284 finished after 54 timesteps.
Episode #3285 finished after 54 timesteps.
Episode #3286 finished after 15 timesteps.
Episode #3287 finished after 70 timesteps.
Episode #3288 finished after 29 timesteps.
Episode #3289 finished after 37 timesteps.
Episode #3290 finished after 57 timesteps.
Episode #3291 finished after 48 timesteps.
Episode #3292 finished after 39 timesteps.
Episode #3293 finished after 47 timesteps.
Episode #3294 finished after 27 timesteps.
Episode #3295 finished after 54 timesteps.
Episode #3296 finished after 90 timesteps.
Episode #3297 finished after 46 timesteps.
Episode #3

Episode #3466 finished after 45 timesteps.
Episode #3467 finished after 15 timesteps.
Episode #3468 finished after 33 timesteps.
Episode #3469 finished after 53 timesteps.
Episode #3470 finished after 106 timesteps.
Episode #3471 finished after 44 timesteps.
Episode #3472 finished after 40 timesteps.
Episode #3473 finished after 66 timesteps.
Episode #3474 finished after 31 timesteps.
Episode #3475 finished after 85 timesteps.
Episode #3476 finished after 60 timesteps.
Episode #3477 finished after 53 timesteps.
Episode #3478 finished after 54 timesteps.
Episode #3479 finished after 18 timesteps.
Episode #3480 finished after 53 timesteps.
Episode #3481 finished after 49 timesteps.
Episode #3482 finished after 37 timesteps.
Episode #3483 finished after 139 timesteps.
Episode #3484 finished after 27 timesteps.
Episode #3485 finished after 98 timesteps.
Episode #3486 finished after 37 timesteps.
Episode #3487 finished after 34 timesteps.
Episode #3488 finished after 86 timesteps.
Episode #

Episode #3656 finished after 21 timesteps.
Episode #3657 finished after 38 timesteps.
Episode #3658 finished after 101 timesteps.
Episode #3659 finished after 38 timesteps.
Episode #3660 finished after 70 timesteps.
Episode #3661 finished after 29 timesteps.
Episode #3662 finished after 23 timesteps.
Episode #3663 finished after 61 timesteps.
Episode #3664 finished after 46 timesteps.
Episode #3665 finished after 89 timesteps.
Episode #3666 finished after 42 timesteps.
Episode #3667 finished after 74 timesteps.
Episode #3668 finished after 69 timesteps.
Episode #3669 finished after 144 timesteps.
Episode #3670 finished after 52 timesteps.
Episode #3671 finished after 56 timesteps.
Episode #3672 finished after 31 timesteps.
Episode #3673 finished after 41 timesteps.
Episode #3674 finished after 64 timesteps.
Episode #3675 finished after 135 timesteps.
Episode #3676 finished after 51 timesteps.
Episode #3677 finished after 102 timesteps.
Episode #3678 finished after 66 timesteps.
Episode

Episode #3846 finished after 59 timesteps.
Episode #3847 finished after 54 timesteps.
Episode #3848 finished after 75 timesteps.
Episode #3849 finished after 45 timesteps.
Episode #3850 finished after 44 timesteps.
Episode #3851 finished after 51 timesteps.
Episode #3852 finished after 22 timesteps.
Episode #3853 finished after 43 timesteps.
Episode #3854 finished after 46 timesteps.
Episode #3855 finished after 12 timesteps.
Episode #3856 finished after 21 timesteps.
Episode #3857 finished after 43 timesteps.
Episode #3858 finished after 55 timesteps.
Episode #3859 finished after 37 timesteps.
Episode #3860 finished after 44 timesteps.
Episode #3861 finished after 23 timesteps.
Episode #3862 finished after 79 timesteps.
Episode #3863 finished after 66 timesteps.
Episode #3864 finished after 30 timesteps.
Episode #3865 finished after 44 timesteps.
Episode #3866 finished after 34 timesteps.
Episode #3867 finished after 42 timesteps.
Episode #3868 finished after 46 timesteps.
Episode #38

Episode #4037 finished after 45 timesteps.
Episode #4038 finished after 73 timesteps.
Episode #4039 finished after 71 timesteps.
Episode #4040 finished after 166 timesteps.
Episode #4041 finished after 174 timesteps.
Episode #4042 finished after 42 timesteps.
Episode #4043 finished after 103 timesteps.
Episode #4044 finished after 41 timesteps.
Episode #4045 finished after 84 timesteps.
Episode #4046 finished after 83 timesteps.
Episode #4047 finished after 45 timesteps.
Episode #4048 finished after 196 timesteps.
Episode #4049 finished after 116 timesteps.
Episode #4050 finished after 35 timesteps.
Episode #4051 finished after 59 timesteps.
Episode #4052 finished after 44 timesteps.
Episode #4053 finished after 42 timesteps.
Episode #4054 finished after 45 timesteps.
Episode #4055 finished after 78 timesteps.
Episode #4056 finished after 237 timesteps.
Episode #4057 finished after 12 timesteps.
Episode #4058 finished after 29 timesteps.
Episode #4059 finished after 87 timesteps.
Episo

Episode #4227 finished after 40 timesteps.
Episode #4228 finished after 12 timesteps.
Episode #4229 finished after 37 timesteps.
Episode #4230 finished after 52 timesteps.
Episode #4231 finished after 51 timesteps.
Episode #4232 finished after 51 timesteps.
Episode #4233 finished after 50 timesteps.
Episode #4234 finished after 39 timesteps.
Episode #4235 finished after 65 timesteps.
Episode #4236 finished after 88 timesteps.
Episode #4237 finished after 126 timesteps.
Episode #4238 finished after 233 timesteps.
Episode #4239 finished after 67 timesteps.
Episode #4240 finished after 46 timesteps.
Episode #4241 finished after 61 timesteps.
Episode #4242 finished after 58 timesteps.
Episode #4243 finished after 41 timesteps.
Episode #4244 finished after 150 timesteps.
Episode #4245 finished after 59 timesteps.
Episode #4246 finished after 58 timesteps.
Episode #4247 finished after 73 timesteps.
Episode #4248 finished after 177 timesteps.
Episode #4249 finished after 51 timesteps.
Episode

Episode #4417 finished after 203 timesteps.
Episode #4418 finished after 159 timesteps.
Episode #4419 finished after 63 timesteps.
Episode #4420 finished after 32 timesteps.
Episode #4421 finished after 38 timesteps.
Episode #4422 finished after 32 timesteps.
Episode #4423 finished after 72 timesteps.
Episode #4424 finished after 123 timesteps.
Episode #4425 finished after 15 timesteps.
Episode #4426 finished after 208 timesteps.
Episode #4427 finished after 120 timesteps.
Episode #4428 finished after 139 timesteps.
Episode #4429 finished after 119 timesteps.
Episode #4430 finished after 111 timesteps.
Episode #4431 finished after 41 timesteps.
Episode #4432 finished after 36 timesteps.
Episode #4433 finished after 34 timesteps.
Episode #4434 finished after 59 timesteps.
Episode #4435 finished after 19 timesteps.
Episode #4436 finished after 80 timesteps.
Episode #4437 finished after 116 timesteps.
Episode #4438 finished after 55 timesteps.
Episode #4439 finished after 71 timesteps.
Ep

Episode #4607 finished after 39 timesteps.
Episode #4608 finished after 114 timesteps.
Episode #4609 finished after 51 timesteps.
Episode #4610 finished after 37 timesteps.
Episode #4611 finished after 75 timesteps.
Episode #4612 finished after 40 timesteps.
Episode #4613 finished after 98 timesteps.
Episode #4614 finished after 43 timesteps.
Episode #4615 finished after 74 timesteps.
Episode #4616 finished after 58 timesteps.
Episode #4617 finished after 43 timesteps.
Episode #4618 finished after 114 timesteps.
Episode #4619 finished after 30 timesteps.
Episode #4620 finished after 77 timesteps.
Episode #4621 finished after 53 timesteps.
Episode #4622 finished after 40 timesteps.
Episode #4623 finished after 47 timesteps.
Episode #4624 finished after 52 timesteps.
Episode #4625 finished after 43 timesteps.
Episode #4626 finished after 31 timesteps.
Episode #4627 finished after 80 timesteps.
Episode #4628 finished after 42 timesteps.
Episode #4629 finished after 41 timesteps.
Episode #

Episode #4798 finished after 56 timesteps.
Episode #4799 finished after 44 timesteps.
Episode #4800 finished after 45 timesteps.
Episode #4801 finished after 41 timesteps.
Episode #4802 finished after 36 timesteps.
Episode #4803 finished after 42 timesteps.
Episode #4804 finished after 92 timesteps.
Episode #4805 finished after 48 timesteps.
Episode #4806 finished after 57 timesteps.
Episode #4807 finished after 43 timesteps.
Episode #4808 finished after 50 timesteps.
Episode #4809 finished after 65 timesteps.
Episode #4810 finished after 41 timesteps.
Episode #4811 finished after 41 timesteps.
Episode #4812 finished after 37 timesteps.
Episode #4813 finished after 57 timesteps.
Episode #4814 finished after 48 timesteps.
Episode #4815 finished after 60 timesteps.
Episode #4816 finished after 41 timesteps.
Episode #4817 finished after 120 timesteps.
Episode #4818 finished after 37 timesteps.
Episode #4819 finished after 47 timesteps.
Episode #4820 finished after 41 timesteps.
Episode #4

Episode #4989 finished after 34 timesteps.
Episode #4990 finished after 136 timesteps.
Episode #4991 finished after 47 timesteps.
Episode #4992 finished after 42 timesteps.
Episode #4993 finished after 35 timesteps.
Episode #4994 finished after 177 timesteps.
Episode #4995 finished after 42 timesteps.
Episode #4996 finished after 51 timesteps.
Episode #4997 finished after 51 timesteps.
Episode #4998 finished after 39 timesteps.
Episode #4999 finished after 37 timesteps.
Episode #5000 finished after 90 timesteps.
Episode #5001 finished after 60 timesteps.
Episode #5002 finished after 190 timesteps.
Episode #5003 finished after 61 timesteps.
Episode #5004 finished after 37 timesteps.
Episode #5005 finished after 63 timesteps.
Episode #5006 finished after 46 timesteps.
Episode #5007 finished after 81 timesteps.
Episode #5008 finished after 46 timesteps.
Episode #5009 finished after 159 timesteps.
Episode #5010 finished after 154 timesteps.
Episode #5011 finished after 40 timesteps.
Episod

Episode #5179 finished after 42 timesteps.
Episode #5180 finished after 142 timesteps.
Episode #5181 finished after 57 timesteps.
Episode #5182 finished after 203 timesteps.
Episode #5183 finished after 24 timesteps.
Episode #5184 finished after 170 timesteps.
Episode #5185 finished after 42 timesteps.
Episode #5186 finished after 120 timesteps.
Episode #5187 finished after 116 timesteps.
Episode #5188 finished after 43 timesteps.
Episode #5189 finished after 87 timesteps.
Episode #5190 finished after 179 timesteps.
Episode #5191 finished after 43 timesteps.
Episode #5192 finished after 48 timesteps.
Episode #5193 finished after 172 timesteps.
Episode #5194 finished after 90 timesteps.
Episode #5195 finished after 225 timesteps.
Episode #5196 finished after 39 timesteps.
Episode #5197 finished after 51 timesteps.
Episode #5198 finished after 22 timesteps.
Episode #5199 finished after 133 timesteps.
Episode #5200 finished after 155 timesteps.
Episode #5201 finished after 43 timesteps.
E

Episode #5368 finished after 50 timesteps.
Episode #5369 finished after 63 timesteps.
Episode #5370 finished after 89 timesteps.
Episode #5371 finished after 47 timesteps.
Episode #5372 finished after 51 timesteps.
Episode #5373 finished after 37 timesteps.
Episode #5374 finished after 120 timesteps.
Episode #5375 finished after 125 timesteps.
Episode #5376 finished after 57 timesteps.
Episode #5377 finished after 88 timesteps.
Episode #5378 finished after 50 timesteps.
Episode #5379 finished after 52 timesteps.
Episode #5380 finished after 63 timesteps.
Episode #5381 finished after 37 timesteps.
Episode #5382 finished after 76 timesteps.
Episode #5383 finished after 37 timesteps.
Episode #5384 finished after 56 timesteps.
Episode #5385 finished after 47 timesteps.
Episode #5386 finished after 73 timesteps.
Episode #5387 finished after 155 timesteps.
Episode #5388 finished after 58 timesteps.
Episode #5389 finished after 84 timesteps.
Episode #5390 finished after 76 timesteps.
Episode 

Episode #5559 finished after 28 timesteps.
Episode #5560 finished after 49 timesteps.
Episode #5561 finished after 45 timesteps.
Episode #5562 finished after 55 timesteps.
Episode #5563 finished after 51 timesteps.
Episode #5564 finished after 49 timesteps.
Episode #5565 finished after 49 timesteps.
Episode #5566 finished after 50 timesteps.
Episode #5567 finished after 28 timesteps.
Episode #5568 finished after 42 timesteps.
Episode #5569 finished after 43 timesteps.
Episode #5570 finished after 46 timesteps.
Episode #5571 finished after 42 timesteps.
Episode #5572 finished after 26 timesteps.
Episode #5573 finished after 67 timesteps.
Episode #5574 finished after 51 timesteps.
Episode #5575 finished after 65 timesteps.
Episode #5576 finished after 56 timesteps.
Episode #5577 finished after 47 timesteps.
Episode #5578 finished after 26 timesteps.
Episode #5579 finished after 52 timesteps.
Episode #5580 finished after 42 timesteps.
Episode #5581 finished after 49 timesteps.
Episode #55

Episode #5750 finished after 50 timesteps.
Episode #5751 finished after 77 timesteps.
Episode #5752 finished after 78 timesteps.
Episode #5753 finished after 50 timesteps.
Episode #5754 finished after 39 timesteps.
Episode #5755 finished after 127 timesteps.
Episode #5756 finished after 72 timesteps.
Episode #5757 finished after 57 timesteps.
Episode #5758 finished after 49 timesteps.
Episode #5759 finished after 121 timesteps.
Episode #5760 finished after 48 timesteps.
Episode #5761 finished after 79 timesteps.
Episode #5762 finished after 83 timesteps.
Episode #5763 finished after 85 timesteps.
Episode #5764 finished after 69 timesteps.
Episode #5765 finished after 49 timesteps.
Episode #5766 finished after 119 timesteps.
Episode #5767 finished after 70 timesteps.
Episode #5768 finished after 83 timesteps.
Episode #5769 finished after 108 timesteps.
Episode #5770 finished after 61 timesteps.
Episode #5771 finished after 63 timesteps.
Episode #5772 finished after 54 timesteps.
Episode

Episode #5940 finished after 65 timesteps.
Episode #5941 finished after 50 timesteps.
Episode #5942 finished after 38 timesteps.
Episode #5943 finished after 39 timesteps.
Episode #5944 finished after 70 timesteps.
Episode #5945 finished after 161 timesteps.
Episode #5946 finished after 83 timesteps.
Episode #5947 finished after 60 timesteps.
Episode #5948 finished after 37 timesteps.
Episode #5949 finished after 43 timesteps.
Episode #5950 finished after 78 timesteps.
Episode #5951 finished after 84 timesteps.
Episode #5952 finished after 40 timesteps.
Episode #5953 finished after 53 timesteps.
Episode #5954 finished after 42 timesteps.
Episode #5955 finished after 98 timesteps.
Episode #5956 finished after 70 timesteps.
Episode #5957 finished after 161 timesteps.
Episode #5958 finished after 66 timesteps.
Episode #5959 finished after 54 timesteps.
Episode #5960 finished after 51 timesteps.
Episode #5961 finished after 64 timesteps.
Episode #5962 finished after 74 timesteps.
Episode #

Episode #6130 finished after 90 timesteps.
Episode #6131 finished after 151 timesteps.
Episode #6132 finished after 56 timesteps.
Episode #6133 finished after 74 timesteps.
Episode #6134 finished after 56 timesteps.
Episode #6135 finished after 38 timesteps.
Episode #6136 finished after 29 timesteps.
Episode #6137 finished after 89 timesteps.
Episode #6138 finished after 40 timesteps.
Episode #6139 finished after 26 timesteps.
Episode #6140 finished after 93 timesteps.
Episode #6141 finished after 66 timesteps.
Episode #6142 finished after 66 timesteps.
Episode #6143 finished after 162 timesteps.
Episode #6144 finished after 74 timesteps.
Episode #6145 finished after 47 timesteps.
Episode #6146 finished after 31 timesteps.
Episode #6147 finished after 227 timesteps.
Episode #6148 finished after 38 timesteps.
Episode #6149 finished after 133 timesteps.
Episode #6150 finished after 64 timesteps.
Episode #6151 finished after 56 timesteps.
Episode #6152 finished after 20 timesteps.
Episode

Episode #6320 finished after 49 timesteps.
Episode #6321 finished after 20 timesteps.
Episode #6322 finished after 41 timesteps.
Episode #6323 finished after 156 timesteps.
Episode #6324 finished after 59 timesteps.
Episode #6325 finished after 135 timesteps.
Episode #6326 finished after 61 timesteps.
Episode #6327 finished after 113 timesteps.
Episode #6328 finished after 37 timesteps.
Episode #6329 finished after 55 timesteps.
Episode #6330 finished after 101 timesteps.
Episode #6331 finished after 57 timesteps.
Episode #6332 finished after 24 timesteps.
Episode #6333 finished after 26 timesteps.
Episode #6334 finished after 48 timesteps.
Episode #6335 finished after 108 timesteps.
Episode #6336 finished after 41 timesteps.
Episode #6337 finished after 36 timesteps.
Episode #6338 finished after 239 timesteps.
Episode #6339 finished after 38 timesteps.
Episode #6340 finished after 103 timesteps.
Episode #6341 finished after 77 timesteps.
Episode #6342 finished after 39 timesteps.
Epis

Episode #6510 finished after 93 timesteps.
Episode #6511 finished after 37 timesteps.
Episode #6512 finished after 113 timesteps.
Episode #6513 finished after 35 timesteps.
Episode #6514 finished after 164 timesteps.
Episode #6515 finished after 17 timesteps.
Episode #6516 finished after 66 timesteps.
Episode #6517 finished after 37 timesteps.
Episode #6518 finished after 112 timesteps.
Episode #6519 finished after 64 timesteps.
Episode #6520 finished after 71 timesteps.
Episode #6521 finished after 180 timesteps.
Episode #6522 finished after 52 timesteps.
Episode #6523 finished after 31 timesteps.
Episode #6524 finished after 43 timesteps.
Episode #6525 finished after 67 timesteps.
Episode #6526 finished after 65 timesteps.
Episode #6527 finished after 77 timesteps.
Episode #6528 finished after 99 timesteps.
Episode #6529 finished after 115 timesteps.
Episode #6530 finished after 107 timesteps.
Episode #6531 finished after 86 timesteps.
Episode #6532 finished after 66 timesteps.
Episo

Episode #6700 finished after 41 timesteps.
Episode #6701 finished after 129 timesteps.
Episode #6702 finished after 32 timesteps.
Episode #6703 finished after 43 timesteps.
Episode #6704 finished after 37 timesteps.
Episode #6705 finished after 37 timesteps.
Episode #6706 finished after 40 timesteps.
Episode #6707 finished after 53 timesteps.
Episode #6708 finished after 66 timesteps.
Episode #6709 finished after 72 timesteps.
Episode #6710 finished after 169 timesteps.
Episode #6711 finished after 28 timesteps.
Episode #6712 finished after 47 timesteps.
Episode #6713 finished after 50 timesteps.
Episode #6714 finished after 56 timesteps.
Episode #6715 finished after 52 timesteps.
Episode #6716 finished after 30 timesteps.
Episode #6717 finished after 25 timesteps.
Episode #6718 finished after 43 timesteps.
Episode #6719 finished after 80 timesteps.
Episode #6720 finished after 86 timesteps.
Episode #6721 finished after 76 timesteps.
Episode #6722 finished after 38 timesteps.
Episode #

Episode #6891 finished after 85 timesteps.
Episode #6892 finished after 39 timesteps.
Episode #6893 finished after 56 timesteps.
Episode #6894 finished after 148 timesteps.
Episode #6895 finished after 43 timesteps.
Episode #6896 finished after 29 timesteps.
Episode #6897 finished after 84 timesteps.
Episode #6898 finished after 48 timesteps.
Episode #6899 finished after 30 timesteps.
Episode #6900 finished after 51 timesteps.
Episode #6901 finished after 58 timesteps.
Episode #6902 finished after 50 timesteps.
Episode #6903 finished after 33 timesteps.
Episode #6904 finished after 42 timesteps.
Episode #6905 finished after 45 timesteps.
Episode #6906 finished after 32 timesteps.
Episode #6907 finished after 108 timesteps.
Episode #6908 finished after 51 timesteps.
Episode #6909 finished after 46 timesteps.
Episode #6910 finished after 47 timesteps.
Episode #6911 finished after 107 timesteps.
Episode #6912 finished after 37 timesteps.
Episode #6913 finished after 39 timesteps.
Episode 

Episode #7081 finished after 44 timesteps.
Episode #7082 finished after 68 timesteps.
Episode #7083 finished after 66 timesteps.
Episode #7084 finished after 79 timesteps.
Episode #7085 finished after 30 timesteps.
Episode #7086 finished after 58 timesteps.
Episode #7087 finished after 146 timesteps.
Episode #7088 finished after 35 timesteps.
Episode #7089 finished after 95 timesteps.
Episode #7090 finished after 51 timesteps.
Episode #7091 finished after 33 timesteps.
Episode #7092 finished after 36 timesteps.
Episode #7093 finished after 29 timesteps.
Episode #7094 finished after 72 timesteps.
Episode #7095 finished after 30 timesteps.
Episode #7096 finished after 56 timesteps.
Episode #7097 finished after 48 timesteps.
Episode #7098 finished after 68 timesteps.
Episode #7099 finished after 47 timesteps.
Episode #7100 finished after 43 timesteps.
Episode #7101 finished after 33 timesteps.
Episode #7102 finished after 54 timesteps.
Episode #7103 finished after 54 timesteps.
Episode #7

Episode #7272 finished after 47 timesteps.
Episode #7273 finished after 286 timesteps.
Episode #7274 finished after 97 timesteps.
Episode #7275 finished after 76 timesteps.
Episode #7276 finished after 33 timesteps.
Episode #7277 finished after 58 timesteps.
Episode #7278 finished after 48 timesteps.
Episode #7279 finished after 31 timesteps.
Episode #7280 finished after 71 timesteps.
Episode #7281 finished after 46 timesteps.
Episode #7282 finished after 46 timesteps.
Episode #7283 finished after 33 timesteps.
Episode #7284 finished after 30 timesteps.
Episode #7285 finished after 59 timesteps.
Episode #7286 finished after 26 timesteps.
Episode #7287 finished after 107 timesteps.
Episode #7288 finished after 33 timesteps.
Episode #7289 finished after 96 timesteps.
Episode #7290 finished after 39 timesteps.
Episode #7291 finished after 40 timesteps.
Episode #7292 finished after 38 timesteps.
Episode #7293 finished after 64 timesteps.
Episode #7294 finished after 44 timesteps.
Episode #

In [None]:
display_metrics(a2c_agent.rewards, "Reward")
display_metrics(a2c_agent.value_losses, "Value Loss")