# Actor Critic for Cartpole

In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.distributions import Categorical

import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

import gym

### Cartpole environment

In [2]:
env = gym.make('CartPole-v1')

### Actor Critic agent

In [None]:
"""class ActorCritic(nn.Module):
    
    def __init__(self):
        super(ActorCritic, self).__init__()
        
        # Common network
        self.fc1 = nn.Linear(in_features=4, out_features=10)
        self.fc2 = nn.Linear(in_features=10, out_features=2)
        
        # Actor head
        self.actor_head = nn.Linear(in_features=2, out_features=2)
        
        # Critic head
        self.critic_head = nn.Linear(in_features=2, out_features=2)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        
        policy = F.softmax(self.actor_head(x))
        value = self.critic_head(x)
        return policy, value

class AC_Agent():
    
    def __init__(self, env, lr=0.01, dr=0.95):
        # Cartpole environment
        self.cartpole_env = env
        
        # Learning rates
        self.lr = lr
        # Discount rate
        self.dr = dr
        
        # Actor Critic model
        self.model = ActorCritic()
        self.optim = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        
        # Saving training curves
        self.rewards = []
        self.value_losses = []"""

In [5]:
class ActorCritic():
    
    def __init__(self, env, actor_lr=0.01, critic_lr=0.01, dr=0.95):
        # Cartpole environment
        self.cartpole_env = env
        
        # Learning rates
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        # Discount rate
        self.dr = dr
        
        # Actor model: learns policy function
        #   - Inputs: 4 real values: [Cart Position, Cart Velocity, Pole Angle, Pole Angular Velocity]
        #   - 3 fully connected layers
        #   - final softmax activation to get probability distribution
        #   - Outputs: Probability of taking the two actions: [Push cart to left, Push cart to right]
        self.actor = nn.Sequential(
            nn.Linear(in_features=4, out_features=10),
            nn.ReLU(),
            nn.Linear(in_features=10, out_features=2),
            nn.ReLU(),
            nn.Linear(in_features=2, out_features=2),
            nn.Softmax()
        )
        self.actor_optim = torch.optim.Adam(self.actor.parameters(), lr=self.actor_lr)
        
        # Critic model: learning value function
        #   - Inputs: 4 real values: [Cart Position, Cart Velocity, Pole Angle, Pole Angular Velocity]
        #   - 3 fully connected layers
        #   - Outputs: Value of each action in current state
        self.critic = nn.Sequential(
            nn.Linear(in_features=4, out_features=10),
            nn.ReLU(),
            nn.Linear(in_features=10, out_features=2),
            nn.ReLU(),
            nn.Linear(in_features=2, out_features=2)
        )
        self.critic_optim = torch.optim.Adam(self.critic.parameters(), lr=self.critic_lr)
        self.critic_loss = nn.MSELoss()
        
        # Saving training curves
        self.rewards = []
        self.value_losses = []
        
    def choose_action(self, state):
        # Translate state to fit model input
        state = torch.from_numpy(state).float().unsqueeze(0)
        # Get actions probability distribution using Actor model
        probs = self.actor(state)
        distrib = Categorical(probs)
        # Sample action
        action = distrib.sample()
        return action.item(), distrib.log_prob(action)
    
    def get_q_values(self, state):
        # Translate state to fit model input
        state = torch.from_numpy(state).float().unsqueeze(0)
        return self.critic(state)
    
    def actor_update(self, log_policy, q_value):
        # Compute loss ((-1) * Loss, because we're doing gradient ascent)
        policy_loss = -log_policy * float(q_value)
        # Backpropagate
        self.actor_optim.zero_grad()
        policy_loss.backward()
        self.actor_optim.step()
        
    def critic_update(self, q_value, reward, new_state, next_action):
        # Get Q-value of new state-next action pair
        q_values = self.get_q_values(new_state)
        next_q_value = q_values[0, next_action]
        # Compute Q-target = Reward + discount * Q(new_state)
        q_target = torch.Tensor([reward]) + self.dr * next_q_value
        # Backpropagate
        #value_loss = self.critic_loss(q_value, q_target)
        value_loss = (q_target - q_value) ** 2
        self.critic_optim.zero_grad()
        value_loss.backward()
        self.critic_optim.step()
        
        self.value_losses.append(float(value_loss))
        
    def train(self, nb_episodes=3000):
        for i in range(nb_episodes):
            state = env.reset()
            experiences = []
            for t in range(500):
                env.render()
                # Actor chooses action
                action, log_prob = self.choose_action(state)
                # Perform action
                new_state, reward, done, info = env.step(action)
                
                # Check for end state
                if done:
                    print("Episode #{} finished after {} timesteps.".format(i + 1, t + 1))
                    self.rewards.append(t)
                    break
                
                experiences.append((state, action, log_prob, reward, new_state, done))
                """# Get Q-value of state-action pair
                q_values = self.get_q_values(state)
                q_value = q_values[0, action]
                # Update Actor's parameters
                self.actor_update(log_prob, q_value)
                # Sample next action
                next_action, _ = self.choose_action(new_state)
                # Update Critic's parameters
                self.critic_update(q_value, reward, new_state, next_action)"""
                
                state = new_state
                
            # Training
            # For each step
            for t in range(len(experiences)):
                # Get Q-value of state-action pair
                
                # Update Actor's parameters
                
                # Sample next action
                
                # Update Critic's parameters
                
        self.env.close()

In [4]:
ac_model = ActorCritic(env)
ac_model.train()

  input = module(input)


Episode #1 finished after 13 timesteps.
Episode #2 finished after 13 timesteps.
Episode #3 finished after 12 timesteps.
Episode #4 finished after 17 timesteps.
Episode #5 finished after 10 timesteps.
Episode #6 finished after 31 timesteps.
Episode #7 finished after 14 timesteps.
Episode #8 finished after 14 timesteps.
Episode #9 finished after 15 timesteps.
Episode #10 finished after 14 timesteps.
Episode #11 finished after 10 timesteps.
Episode #12 finished after 20 timesteps.
Episode #13 finished after 15 timesteps.
Episode #14 finished after 9 timesteps.
Episode #15 finished after 31 timesteps.
Episode #16 finished after 27 timesteps.
Episode #17 finished after 10 timesteps.
Episode #18 finished after 14 timesteps.
Episode #19 finished after 10 timesteps.
Episode #20 finished after 22 timesteps.
Episode #21 finished after 16 timesteps.
Episode #22 finished after 9 timesteps.
Episode #23 finished after 17 timesteps.
Episode #24 finished after 11 timesteps.
Episode #25 finished after 

Episode #200 finished after 15 timesteps.
Episode #201 finished after 13 timesteps.
Episode #202 finished after 11 timesteps.
Episode #203 finished after 9 timesteps.
Episode #204 finished after 14 timesteps.
Episode #205 finished after 16 timesteps.
Episode #206 finished after 11 timesteps.
Episode #207 finished after 23 timesteps.
Episode #208 finished after 27 timesteps.
Episode #209 finished after 30 timesteps.
Episode #210 finished after 11 timesteps.
Episode #211 finished after 22 timesteps.
Episode #212 finished after 29 timesteps.
Episode #213 finished after 19 timesteps.
Episode #214 finished after 12 timesteps.
Episode #215 finished after 19 timesteps.
Episode #216 finished after 11 timesteps.
Episode #217 finished after 45 timesteps.
Episode #218 finished after 13 timesteps.
Episode #219 finished after 14 timesteps.
Episode #220 finished after 14 timesteps.
Episode #221 finished after 10 timesteps.
Episode #222 finished after 26 timesteps.
Episode #223 finished after 8 times

Episode #398 finished after 10 timesteps.
Episode #399 finished after 8 timesteps.
Episode #400 finished after 11 timesteps.
Episode #401 finished after 10 timesteps.
Episode #402 finished after 8 timesteps.
Episode #403 finished after 9 timesteps.
Episode #404 finished after 9 timesteps.
Episode #405 finished after 10 timesteps.
Episode #406 finished after 13 timesteps.
Episode #407 finished after 11 timesteps.
Episode #408 finished after 10 timesteps.
Episode #409 finished after 9 timesteps.
Episode #410 finished after 9 timesteps.
Episode #411 finished after 12 timesteps.
Episode #412 finished after 10 timesteps.
Episode #413 finished after 10 timesteps.
Episode #414 finished after 9 timesteps.
Episode #415 finished after 10 timesteps.
Episode #416 finished after 12 timesteps.
Episode #417 finished after 10 timesteps.
Episode #418 finished after 10 timesteps.
Episode #419 finished after 9 timesteps.
Episode #420 finished after 9 timesteps.
Episode #421 finished after 11 timesteps.
E

Episode #596 finished after 10 timesteps.
Episode #597 finished after 10 timesteps.
Episode #598 finished after 9 timesteps.
Episode #599 finished after 10 timesteps.
Episode #600 finished after 10 timesteps.
Episode #601 finished after 10 timesteps.
Episode #602 finished after 13 timesteps.
Episode #603 finished after 10 timesteps.
Episode #604 finished after 11 timesteps.
Episode #605 finished after 10 timesteps.
Episode #606 finished after 10 timesteps.
Episode #607 finished after 11 timesteps.
Episode #608 finished after 10 timesteps.
Episode #609 finished after 9 timesteps.
Episode #610 finished after 9 timesteps.
Episode #611 finished after 10 timesteps.
Episode #612 finished after 10 timesteps.
Episode #613 finished after 10 timesteps.
Episode #614 finished after 9 timesteps.
Episode #615 finished after 8 timesteps.
Episode #616 finished after 10 timesteps.
Episode #617 finished after 9 timesteps.
Episode #618 finished after 10 timesteps.
Episode #619 finished after 10 timesteps

Episode #794 finished after 9 timesteps.
Episode #795 finished after 9 timesteps.
Episode #796 finished after 12 timesteps.
Episode #797 finished after 10 timesteps.
Episode #798 finished after 9 timesteps.
Episode #799 finished after 9 timesteps.
Episode #800 finished after 10 timesteps.
Episode #801 finished after 11 timesteps.
Episode #802 finished after 10 timesteps.
Episode #803 finished after 12 timesteps.
Episode #804 finished after 10 timesteps.
Episode #805 finished after 10 timesteps.
Episode #806 finished after 10 timesteps.
Episode #807 finished after 9 timesteps.
Episode #808 finished after 10 timesteps.
Episode #809 finished after 10 timesteps.
Episode #810 finished after 10 timesteps.
Episode #811 finished after 10 timesteps.
Episode #812 finished after 10 timesteps.
Episode #813 finished after 11 timesteps.
Episode #814 finished after 9 timesteps.
Episode #815 finished after 9 timesteps.
Episode #816 finished after 10 timesteps.
Episode #817 finished after 10 timesteps.

Episode #993 finished after 8 timesteps.
Episode #994 finished after 10 timesteps.
Episode #995 finished after 8 timesteps.
Episode #996 finished after 8 timesteps.
Episode #997 finished after 11 timesteps.
Episode #998 finished after 9 timesteps.
Episode #999 finished after 10 timesteps.
Episode #1000 finished after 10 timesteps.
Episode #1001 finished after 10 timesteps.
Episode #1002 finished after 9 timesteps.
Episode #1003 finished after 8 timesteps.
Episode #1004 finished after 9 timesteps.
Episode #1005 finished after 11 timesteps.
Episode #1006 finished after 9 timesteps.
Episode #1007 finished after 8 timesteps.
Episode #1008 finished after 8 timesteps.
Episode #1009 finished after 9 timesteps.
Episode #1010 finished after 10 timesteps.
Episode #1011 finished after 8 timesteps.
Episode #1012 finished after 9 timesteps.
Episode #1013 finished after 9 timesteps.
Episode #1014 finished after 10 timesteps.
Episode #1015 finished after 9 timesteps.
Episode #1016 finished after 10 t

Episode #1186 finished after 9 timesteps.
Episode #1187 finished after 10 timesteps.
Episode #1188 finished after 10 timesteps.
Episode #1189 finished after 10 timesteps.
Episode #1190 finished after 10 timesteps.
Episode #1191 finished after 9 timesteps.
Episode #1192 finished after 9 timesteps.
Episode #1193 finished after 8 timesteps.
Episode #1194 finished after 10 timesteps.
Episode #1195 finished after 10 timesteps.
Episode #1196 finished after 9 timesteps.
Episode #1197 finished after 11 timesteps.
Episode #1198 finished after 9 timesteps.
Episode #1199 finished after 11 timesteps.
Episode #1200 finished after 9 timesteps.
Episode #1201 finished after 8 timesteps.
Episode #1202 finished after 12 timesteps.
Episode #1203 finished after 10 timesteps.
Episode #1204 finished after 11 timesteps.
Episode #1205 finished after 10 timesteps.
Episode #1206 finished after 10 timesteps.
Episode #1207 finished after 8 timesteps.
Episode #1208 finished after 10 timesteps.
Episode #1209 finish

Episode #1380 finished after 8 timesteps.
Episode #1381 finished after 10 timesteps.
Episode #1382 finished after 8 timesteps.
Episode #1383 finished after 9 timesteps.
Episode #1384 finished after 10 timesteps.
Episode #1385 finished after 10 timesteps.
Episode #1386 finished after 9 timesteps.
Episode #1387 finished after 9 timesteps.
Episode #1388 finished after 11 timesteps.
Episode #1389 finished after 13 timesteps.
Episode #1390 finished after 10 timesteps.
Episode #1391 finished after 10 timesteps.
Episode #1392 finished after 8 timesteps.
Episode #1393 finished after 10 timesteps.
Episode #1394 finished after 11 timesteps.
Episode #1395 finished after 10 timesteps.
Episode #1396 finished after 10 timesteps.
Episode #1397 finished after 12 timesteps.
Episode #1398 finished after 10 timesteps.
Episode #1399 finished after 11 timesteps.
Episode #1400 finished after 8 timesteps.
Episode #1401 finished after 11 timesteps.
Episode #1402 finished after 10 timesteps.
Episode #1403 fini

Episode #1572 finished after 9 timesteps.
Episode #1573 finished after 14 timesteps.
Episode #1574 finished after 11 timesteps.
Episode #1575 finished after 13 timesteps.
Episode #1576 finished after 9 timesteps.
Episode #1577 finished after 11 timesteps.
Episode #1578 finished after 13 timesteps.
Episode #1579 finished after 10 timesteps.
Episode #1580 finished after 10 timesteps.
Episode #1581 finished after 10 timesteps.
Episode #1582 finished after 13 timesteps.
Episode #1583 finished after 9 timesteps.
Episode #1584 finished after 10 timesteps.
Episode #1585 finished after 13 timesteps.
Episode #1586 finished after 10 timesteps.
Episode #1587 finished after 9 timesteps.
Episode #1588 finished after 9 timesteps.
Episode #1589 finished after 13 timesteps.
Episode #1590 finished after 13 timesteps.
Episode #1591 finished after 9 timesteps.
Episode #1592 finished after 9 timesteps.
Episode #1593 finished after 13 timesteps.
Episode #1594 finished after 10 timesteps.
Episode #1595 fini

Episode #1764 finished after 8 timesteps.
Episode #1765 finished after 9 timesteps.
Episode #1766 finished after 8 timesteps.
Episode #1767 finished after 9 timesteps.
Episode #1768 finished after 9 timesteps.
Episode #1769 finished after 10 timesteps.
Episode #1770 finished after 9 timesteps.
Episode #1771 finished after 9 timesteps.
Episode #1772 finished after 9 timesteps.
Episode #1773 finished after 9 timesteps.
Episode #1774 finished after 11 timesteps.
Episode #1775 finished after 10 timesteps.
Episode #1776 finished after 13 timesteps.
Episode #1777 finished after 10 timesteps.
Episode #1778 finished after 9 timesteps.
Episode #1779 finished after 9 timesteps.
Episode #1780 finished after 11 timesteps.
Episode #1781 finished after 9 timesteps.
Episode #1782 finished after 11 timesteps.
Episode #1783 finished after 9 timesteps.
Episode #1784 finished after 10 timesteps.
Episode #1785 finished after 10 timesteps.
Episode #1786 finished after 10 timesteps.
Episode #1787 finished a

Episode #1958 finished after 10 timesteps.
Episode #1959 finished after 13 timesteps.
Episode #1960 finished after 10 timesteps.
Episode #1961 finished after 9 timesteps.
Episode #1962 finished after 11 timesteps.
Episode #1963 finished after 16 timesteps.
Episode #1964 finished after 11 timesteps.
Episode #1965 finished after 13 timesteps.
Episode #1966 finished after 12 timesteps.
Episode #1967 finished after 9 timesteps.
Episode #1968 finished after 17 timesteps.
Episode #1969 finished after 10 timesteps.
Episode #1970 finished after 8 timesteps.
Episode #1971 finished after 13 timesteps.
Episode #1972 finished after 10 timesteps.
Episode #1973 finished after 9 timesteps.
Episode #1974 finished after 8 timesteps.
Episode #1975 finished after 10 timesteps.
Episode #1976 finished after 12 timesteps.
Episode #1977 finished after 9 timesteps.
Episode #1978 finished after 12 timesteps.
Episode #1979 finished after 13 timesteps.
Episode #1980 finished after 11 timesteps.
Episode #1981 fin

Episode #2150 finished after 13 timesteps.
Episode #2151 finished after 16 timesteps.
Episode #2152 finished after 45 timesteps.
Episode #2153 finished after 11 timesteps.
Episode #2154 finished after 16 timesteps.
Episode #2155 finished after 14 timesteps.
Episode #2156 finished after 33 timesteps.
Episode #2157 finished after 12 timesteps.
Episode #2158 finished after 14 timesteps.
Episode #2159 finished after 11 timesteps.
Episode #2160 finished after 9 timesteps.
Episode #2161 finished after 28 timesteps.
Episode #2162 finished after 19 timesteps.
Episode #2163 finished after 13 timesteps.
Episode #2164 finished after 12 timesteps.
Episode #2165 finished after 8 timesteps.
Episode #2166 finished after 14 timesteps.
Episode #2167 finished after 13 timesteps.
Episode #2168 finished after 11 timesteps.
Episode #2169 finished after 11 timesteps.
Episode #2170 finished after 12 timesteps.
Episode #2171 finished after 32 timesteps.
Episode #2172 finished after 11 timesteps.
Episode #2173

Episode #2341 finished after 15 timesteps.
Episode #2342 finished after 28 timesteps.
Episode #2343 finished after 40 timesteps.
Episode #2344 finished after 14 timesteps.
Episode #2345 finished after 34 timesteps.
Episode #2346 finished after 9 timesteps.
Episode #2347 finished after 42 timesteps.
Episode #2348 finished after 16 timesteps.
Episode #2349 finished after 16 timesteps.
Episode #2350 finished after 14 timesteps.
Episode #2351 finished after 21 timesteps.
Episode #2352 finished after 13 timesteps.
Episode #2353 finished after 17 timesteps.
Episode #2354 finished after 40 timesteps.
Episode #2355 finished after 24 timesteps.
Episode #2356 finished after 41 timesteps.
Episode #2357 finished after 47 timesteps.
Episode #2358 finished after 19 timesteps.
Episode #2359 finished after 18 timesteps.
Episode #2360 finished after 19 timesteps.
Episode #2361 finished after 21 timesteps.
Episode #2362 finished after 21 timesteps.
Episode #2363 finished after 16 timesteps.
Episode #236

Episode #2532 finished after 18 timesteps.
Episode #2533 finished after 22 timesteps.
Episode #2534 finished after 16 timesteps.
Episode #2535 finished after 12 timesteps.
Episode #2536 finished after 24 timesteps.
Episode #2537 finished after 14 timesteps.
Episode #2538 finished after 19 timesteps.
Episode #2539 finished after 22 timesteps.
Episode #2540 finished after 10 timesteps.
Episode #2541 finished after 18 timesteps.
Episode #2542 finished after 22 timesteps.
Episode #2543 finished after 22 timesteps.
Episode #2544 finished after 35 timesteps.
Episode #2545 finished after 12 timesteps.
Episode #2546 finished after 22 timesteps.
Episode #2547 finished after 16 timesteps.
Episode #2548 finished after 13 timesteps.
Episode #2549 finished after 18 timesteps.
Episode #2550 finished after 12 timesteps.
Episode #2551 finished after 16 timesteps.
Episode #2552 finished after 21 timesteps.
Episode #2553 finished after 27 timesteps.
Episode #2554 finished after 18 timesteps.
Episode #25

Episode #2724 finished after 21 timesteps.
Episode #2725 finished after 12 timesteps.
Episode #2726 finished after 18 timesteps.
Episode #2727 finished after 10 timesteps.
Episode #2728 finished after 35 timesteps.
Episode #2729 finished after 12 timesteps.
Episode #2730 finished after 29 timesteps.
Episode #2731 finished after 12 timesteps.
Episode #2732 finished after 19 timesteps.
Episode #2733 finished after 11 timesteps.
Episode #2734 finished after 8 timesteps.
Episode #2735 finished after 20 timesteps.
Episode #2736 finished after 16 timesteps.
Episode #2737 finished after 18 timesteps.
Episode #2738 finished after 18 timesteps.
Episode #2739 finished after 20 timesteps.
Episode #2740 finished after 37 timesteps.
Episode #2741 finished after 14 timesteps.
Episode #2742 finished after 21 timesteps.
Episode #2743 finished after 10 timesteps.
Episode #2744 finished after 23 timesteps.
Episode #2745 finished after 17 timesteps.
Episode #2746 finished after 10 timesteps.
Episode #274

Episode #2915 finished after 15 timesteps.
Episode #2916 finished after 19 timesteps.
Episode #2917 finished after 16 timesteps.
Episode #2918 finished after 14 timesteps.
Episode #2919 finished after 10 timesteps.
Episode #2920 finished after 20 timesteps.
Episode #2921 finished after 22 timesteps.
Episode #2922 finished after 26 timesteps.
Episode #2923 finished after 13 timesteps.
Episode #2924 finished after 13 timesteps.
Episode #2925 finished after 15 timesteps.
Episode #2926 finished after 10 timesteps.
Episode #2927 finished after 18 timesteps.
Episode #2928 finished after 21 timesteps.
Episode #2929 finished after 32 timesteps.
Episode #2930 finished after 12 timesteps.
Episode #2931 finished after 14 timesteps.
Episode #2932 finished after 13 timesteps.
Episode #2933 finished after 18 timesteps.
Episode #2934 finished after 17 timesteps.
Episode #2935 finished after 15 timesteps.
Episode #2936 finished after 11 timesteps.
Episode #2937 finished after 24 timesteps.
Episode #29

In [6]:
ac_model.train()

Episode #1 finished after 13 timesteps.
Episode #2 finished after 17 timesteps.
Episode #3 finished after 26 timesteps.
Episode #4 finished after 15 timesteps.
Episode #5 finished after 14 timesteps.
Episode #6 finished after 12 timesteps.
Episode #7 finished after 14 timesteps.
Episode #8 finished after 16 timesteps.
Episode #9 finished after 18 timesteps.
Episode #10 finished after 27 timesteps.
Episode #11 finished after 39 timesteps.
Episode #12 finished after 9 timesteps.
Episode #13 finished after 17 timesteps.
Episode #14 finished after 11 timesteps.
Episode #15 finished after 32 timesteps.
Episode #16 finished after 19 timesteps.
Episode #17 finished after 20 timesteps.
Episode #18 finished after 23 timesteps.
Episode #19 finished after 17 timesteps.
Episode #20 finished after 10 timesteps.
Episode #21 finished after 9 timesteps.
Episode #22 finished after 14 timesteps.
Episode #23 finished after 11 timesteps.
Episode #24 finished after 12 timesteps.
Episode #25 finished after 

Episode #199 finished after 43 timesteps.
Episode #200 finished after 17 timesteps.
Episode #201 finished after 18 timesteps.
Episode #202 finished after 17 timesteps.
Episode #203 finished after 12 timesteps.
Episode #204 finished after 25 timesteps.
Episode #205 finished after 19 timesteps.
Episode #206 finished after 22 timesteps.
Episode #207 finished after 39 timesteps.
Episode #208 finished after 10 timesteps.
Episode #209 finished after 12 timesteps.
Episode #210 finished after 30 timesteps.
Episode #211 finished after 13 timesteps.
Episode #212 finished after 19 timesteps.
Episode #213 finished after 35 timesteps.
Episode #214 finished after 15 timesteps.
Episode #215 finished after 13 timesteps.
Episode #216 finished after 15 timesteps.
Episode #217 finished after 15 timesteps.
Episode #218 finished after 16 timesteps.
Episode #219 finished after 20 timesteps.
Episode #220 finished after 12 timesteps.
Episode #221 finished after 12 timesteps.
Episode #222 finished after 13 tim

Episode #396 finished after 8 timesteps.
Episode #397 finished after 13 timesteps.
Episode #398 finished after 9 timesteps.
Episode #399 finished after 19 timesteps.
Episode #400 finished after 11 timesteps.
Episode #401 finished after 9 timesteps.
Episode #402 finished after 8 timesteps.
Episode #403 finished after 13 timesteps.
Episode #404 finished after 8 timesteps.
Episode #405 finished after 14 timesteps.
Episode #406 finished after 10 timesteps.
Episode #407 finished after 11 timesteps.
Episode #408 finished after 9 timesteps.
Episode #409 finished after 14 timesteps.
Episode #410 finished after 12 timesteps.
Episode #411 finished after 13 timesteps.
Episode #412 finished after 10 timesteps.
Episode #413 finished after 11 timesteps.
Episode #414 finished after 10 timesteps.
Episode #415 finished after 16 timesteps.
Episode #416 finished after 16 timesteps.
Episode #417 finished after 11 timesteps.
Episode #418 finished after 11 timesteps.
Episode #419 finished after 9 timesteps.

Episode #594 finished after 9 timesteps.
Episode #595 finished after 9 timesteps.
Episode #596 finished after 9 timesteps.
Episode #597 finished after 9 timesteps.
Episode #598 finished after 9 timesteps.
Episode #599 finished after 11 timesteps.
Episode #600 finished after 9 timesteps.
Episode #601 finished after 11 timesteps.
Episode #602 finished after 9 timesteps.
Episode #603 finished after 10 timesteps.
Episode #604 finished after 11 timesteps.
Episode #605 finished after 10 timesteps.
Episode #606 finished after 8 timesteps.
Episode #607 finished after 8 timesteps.
Episode #608 finished after 8 timesteps.
Episode #609 finished after 9 timesteps.
Episode #610 finished after 9 timesteps.
Episode #611 finished after 10 timesteps.
Episode #612 finished after 10 timesteps.
Episode #613 finished after 10 timesteps.
Episode #614 finished after 9 timesteps.
Episode #615 finished after 10 timesteps.
Episode #616 finished after 8 timesteps.
Episode #617 finished after 10 timesteps.
Episod

Episode #792 finished after 10 timesteps.
Episode #793 finished after 10 timesteps.
Episode #794 finished after 9 timesteps.
Episode #795 finished after 10 timesteps.
Episode #796 finished after 10 timesteps.
Episode #797 finished after 10 timesteps.
Episode #798 finished after 11 timesteps.
Episode #799 finished after 9 timesteps.
Episode #800 finished after 10 timesteps.
Episode #801 finished after 9 timesteps.
Episode #802 finished after 10 timesteps.
Episode #803 finished after 9 timesteps.
Episode #804 finished after 10 timesteps.
Episode #805 finished after 8 timesteps.
Episode #806 finished after 9 timesteps.
Episode #807 finished after 11 timesteps.
Episode #808 finished after 9 timesteps.
Episode #809 finished after 8 timesteps.
Episode #810 finished after 14 timesteps.
Episode #811 finished after 10 timesteps.
Episode #812 finished after 9 timesteps.
Episode #813 finished after 10 timesteps.
Episode #814 finished after 8 timesteps.
Episode #815 finished after 10 timesteps.
Ep

Episode #989 finished after 9 timesteps.
Episode #990 finished after 10 timesteps.
Episode #991 finished after 9 timesteps.
Episode #992 finished after 9 timesteps.
Episode #993 finished after 9 timesteps.
Episode #994 finished after 10 timesteps.
Episode #995 finished after 10 timesteps.
Episode #996 finished after 9 timesteps.
Episode #997 finished after 9 timesteps.
Episode #998 finished after 10 timesteps.
Episode #999 finished after 9 timesteps.
Episode #1000 finished after 10 timesteps.
Episode #1001 finished after 10 timesteps.
Episode #1002 finished after 9 timesteps.
Episode #1003 finished after 9 timesteps.
Episode #1004 finished after 9 timesteps.
Episode #1005 finished after 10 timesteps.
Episode #1006 finished after 8 timesteps.
Episode #1007 finished after 10 timesteps.
Episode #1008 finished after 10 timesteps.
Episode #1009 finished after 9 timesteps.
Episode #1010 finished after 9 timesteps.
Episode #1011 finished after 8 timesteps.
Episode #1012 finished after 9 times

Episode #1184 finished after 9 timesteps.
Episode #1185 finished after 10 timesteps.
Episode #1186 finished after 9 timesteps.
Episode #1187 finished after 9 timesteps.
Episode #1188 finished after 10 timesteps.
Episode #1189 finished after 9 timesteps.
Episode #1190 finished after 9 timesteps.
Episode #1191 finished after 10 timesteps.
Episode #1192 finished after 8 timesteps.
Episode #1193 finished after 11 timesteps.
Episode #1194 finished after 10 timesteps.
Episode #1195 finished after 9 timesteps.
Episode #1196 finished after 10 timesteps.
Episode #1197 finished after 9 timesteps.
Episode #1198 finished after 10 timesteps.
Episode #1199 finished after 9 timesteps.
Episode #1200 finished after 10 timesteps.
Episode #1201 finished after 8 timesteps.
Episode #1202 finished after 10 timesteps.
Episode #1203 finished after 10 timesteps.
Episode #1204 finished after 9 timesteps.
Episode #1205 finished after 10 timesteps.
Episode #1206 finished after 12 timesteps.
Episode #1207 finished

Episode #1377 finished after 9 timesteps.
Episode #1378 finished after 8 timesteps.
Episode #1379 finished after 10 timesteps.
Episode #1380 finished after 9 timesteps.
Episode #1381 finished after 8 timesteps.
Episode #1382 finished after 8 timesteps.
Episode #1383 finished after 9 timesteps.
Episode #1384 finished after 9 timesteps.
Episode #1385 finished after 10 timesteps.
Episode #1386 finished after 12 timesteps.
Episode #1387 finished after 9 timesteps.
Episode #1388 finished after 10 timesteps.
Episode #1389 finished after 9 timesteps.
Episode #1390 finished after 9 timesteps.
Episode #1391 finished after 12 timesteps.
Episode #1392 finished after 9 timesteps.
Episode #1393 finished after 10 timesteps.
Episode #1394 finished after 10 timesteps.
Episode #1395 finished after 10 timesteps.
Episode #1396 finished after 9 timesteps.
Episode #1397 finished after 12 timesteps.
Episode #1398 finished after 9 timesteps.
Episode #1399 finished after 10 timesteps.
Episode #1400 finished a

Episode #1571 finished after 12 timesteps.
Episode #1572 finished after 10 timesteps.
Episode #1573 finished after 9 timesteps.
Episode #1574 finished after 9 timesteps.
Episode #1575 finished after 10 timesteps.
Episode #1576 finished after 9 timesteps.
Episode #1577 finished after 10 timesteps.
Episode #1578 finished after 12 timesteps.
Episode #1579 finished after 10 timesteps.
Episode #1580 finished after 11 timesteps.
Episode #1581 finished after 10 timesteps.
Episode #1582 finished after 8 timesteps.
Episode #1583 finished after 11 timesteps.
Episode #1584 finished after 9 timesteps.
Episode #1585 finished after 9 timesteps.
Episode #1586 finished after 10 timesteps.
Episode #1587 finished after 10 timesteps.
Episode #1588 finished after 11 timesteps.
Episode #1589 finished after 8 timesteps.
Episode #1590 finished after 9 timesteps.
Episode #1591 finished after 8 timesteps.
Episode #1592 finished after 9 timesteps.
Episode #1593 finished after 10 timesteps.
Episode #1594 finishe

Episode #1764 finished after 13 timesteps.
Episode #1765 finished after 19 timesteps.
Episode #1766 finished after 10 timesteps.
Episode #1767 finished after 18 timesteps.
Episode #1768 finished after 18 timesteps.
Episode #1769 finished after 10 timesteps.
Episode #1770 finished after 27 timesteps.
Episode #1771 finished after 17 timesteps.
Episode #1772 finished after 10 timesteps.
Episode #1773 finished after 22 timesteps.
Episode #1774 finished after 16 timesteps.
Episode #1775 finished after 23 timesteps.
Episode #1776 finished after 27 timesteps.
Episode #1777 finished after 27 timesteps.
Episode #1778 finished after 19 timesteps.
Episode #1779 finished after 23 timesteps.
Episode #1780 finished after 17 timesteps.
Episode #1781 finished after 15 timesteps.
Episode #1782 finished after 60 timesteps.
Episode #1783 finished after 59 timesteps.
Episode #1784 finished after 18 timesteps.
Episode #1785 finished after 22 timesteps.
Episode #1786 finished after 14 timesteps.
Episode #17

Episode #1955 finished after 21 timesteps.
Episode #1956 finished after 11 timesteps.
Episode #1957 finished after 14 timesteps.
Episode #1958 finished after 17 timesteps.
Episode #1959 finished after 14 timesteps.
Episode #1960 finished after 13 timesteps.
Episode #1961 finished after 10 timesteps.
Episode #1962 finished after 25 timesteps.
Episode #1963 finished after 13 timesteps.
Episode #1964 finished after 14 timesteps.
Episode #1965 finished after 14 timesteps.
Episode #1966 finished after 15 timesteps.
Episode #1967 finished after 10 timesteps.
Episode #1968 finished after 15 timesteps.
Episode #1969 finished after 12 timesteps.
Episode #1970 finished after 16 timesteps.
Episode #1971 finished after 12 timesteps.
Episode #1972 finished after 9 timesteps.
Episode #1973 finished after 8 timesteps.
Episode #1974 finished after 15 timesteps.
Episode #1975 finished after 16 timesteps.
Episode #1976 finished after 9 timesteps.
Episode #1977 finished after 10 timesteps.
Episode #1978 

Episode #2146 finished after 21 timesteps.
Episode #2147 finished after 15 timesteps.
Episode #2148 finished after 11 timesteps.
Episode #2149 finished after 24 timesteps.
Episode #2150 finished after 9 timesteps.
Episode #2151 finished after 11 timesteps.
Episode #2152 finished after 10 timesteps.
Episode #2153 finished after 10 timesteps.
Episode #2154 finished after 15 timesteps.
Episode #2155 finished after 18 timesteps.
Episode #2156 finished after 13 timesteps.
Episode #2157 finished after 23 timesteps.
Episode #2158 finished after 11 timesteps.
Episode #2159 finished after 11 timesteps.
Episode #2160 finished after 15 timesteps.
Episode #2161 finished after 14 timesteps.
Episode #2162 finished after 17 timesteps.
Episode #2163 finished after 16 timesteps.
Episode #2164 finished after 11 timesteps.
Episode #2165 finished after 14 timesteps.
Episode #2166 finished after 19 timesteps.
Episode #2167 finished after 34 timesteps.
Episode #2168 finished after 10 timesteps.
Episode #216

Episode #2338 finished after 10 timesteps.
Episode #2339 finished after 10 timesteps.
Episode #2340 finished after 11 timesteps.
Episode #2341 finished after 8 timesteps.
Episode #2342 finished after 8 timesteps.
Episode #2343 finished after 10 timesteps.
Episode #2344 finished after 10 timesteps.
Episode #2345 finished after 10 timesteps.
Episode #2346 finished after 9 timesteps.
Episode #2347 finished after 10 timesteps.
Episode #2348 finished after 22 timesteps.
Episode #2349 finished after 9 timesteps.
Episode #2350 finished after 11 timesteps.
Episode #2351 finished after 13 timesteps.
Episode #2352 finished after 8 timesteps.
Episode #2353 finished after 14 timesteps.
Episode #2354 finished after 9 timesteps.
Episode #2355 finished after 10 timesteps.
Episode #2356 finished after 21 timesteps.
Episode #2357 finished after 9 timesteps.
Episode #2358 finished after 8 timesteps.
Episode #2359 finished after 11 timesteps.
Episode #2360 finished after 9 timesteps.
Episode #2361 finish

Episode #2530 finished after 22 timesteps.
Episode #2531 finished after 9 timesteps.
Episode #2532 finished after 22 timesteps.
Episode #2533 finished after 39 timesteps.
Episode #2534 finished after 18 timesteps.
Episode #2535 finished after 19 timesteps.
Episode #2536 finished after 14 timesteps.
Episode #2537 finished after 19 timesteps.
Episode #2538 finished after 17 timesteps.
Episode #2539 finished after 13 timesteps.
Episode #2540 finished after 24 timesteps.
Episode #2541 finished after 34 timesteps.
Episode #2542 finished after 14 timesteps.
Episode #2543 finished after 19 timesteps.
Episode #2544 finished after 24 timesteps.
Episode #2545 finished after 36 timesteps.
Episode #2546 finished after 12 timesteps.
Episode #2547 finished after 67 timesteps.
Episode #2548 finished after 14 timesteps.
Episode #2549 finished after 15 timesteps.
Episode #2550 finished after 11 timesteps.
Episode #2551 finished after 19 timesteps.
Episode #2552 finished after 24 timesteps.
Episode #255

Episode #2721 finished after 17 timesteps.
Episode #2722 finished after 11 timesteps.
Episode #2723 finished after 30 timesteps.
Episode #2724 finished after 24 timesteps.
Episode #2725 finished after 9 timesteps.
Episode #2726 finished after 16 timesteps.
Episode #2727 finished after 16 timesteps.
Episode #2728 finished after 13 timesteps.
Episode #2729 finished after 15 timesteps.
Episode #2730 finished after 50 timesteps.
Episode #2731 finished after 15 timesteps.
Episode #2732 finished after 16 timesteps.
Episode #2733 finished after 20 timesteps.
Episode #2734 finished after 15 timesteps.
Episode #2735 finished after 9 timesteps.
Episode #2736 finished after 18 timesteps.
Episode #2737 finished after 21 timesteps.
Episode #2738 finished after 22 timesteps.
Episode #2739 finished after 11 timesteps.
Episode #2740 finished after 12 timesteps.
Episode #2741 finished after 24 timesteps.
Episode #2742 finished after 21 timesteps.
Episode #2743 finished after 16 timesteps.
Episode #2744

Episode #2912 finished after 14 timesteps.
Episode #2913 finished after 13 timesteps.
Episode #2914 finished after 11 timesteps.
Episode #2915 finished after 11 timesteps.
Episode #2916 finished after 10 timesteps.
Episode #2917 finished after 11 timesteps.
Episode #2918 finished after 11 timesteps.
Episode #2919 finished after 17 timesteps.
Episode #2920 finished after 18 timesteps.
Episode #2921 finished after 13 timesteps.
Episode #2922 finished after 19 timesteps.
Episode #2923 finished after 9 timesteps.
Episode #2924 finished after 8 timesteps.
Episode #2925 finished after 14 timesteps.
Episode #2926 finished after 9 timesteps.
Episode #2927 finished after 11 timesteps.
Episode #2928 finished after 9 timesteps.
Episode #2929 finished after 11 timesteps.
Episode #2930 finished after 12 timesteps.
Episode #2931 finished after 15 timesteps.
Episode #2932 finished after 10 timesteps.
Episode #2933 finished after 10 timesteps.
Episode #2934 finished after 11 timesteps.
Episode #2935 f

In [7]:
env.close()