## TABLE LOOKUP METHOD OF Q LEARNING

In [None]:
import numpy as np
import gym
import matplotlib.pyplot as plt

# Import and initialize Mountain Car Environment
env = gym.make('MountainCar-v0')
env.reset()

# Define Q-learning function
def QLearning(env, learning, discount, epsilon, min_eps, episodes):
    # Determine size of discretized state space
    num_states = (env.observation_space.high - env.observation_space.low)*\
                    np.array([10, 100])
    num_states = np.round(num_states, 0).astype(int) + 1
    
    # Initialize Q table
    Q = np.random.uniform(low = -1, high = 1, 
                          size = (num_states[0], num_states[1], 
                                  env.action_space.n))
    
    # Initialize variables to track rewards
    reward_list = []
    ave_reward_list = []
    
    # Calculate episodic reduction in epsilon
    reduction = (epsilon - min_eps)/episodes
    
    # Run Q learning algorithm
    for i in range(episodes):
        # Initialize parameters
        done = False
        tot_reward, reward = 0,0
        state = env.reset()
        
        # Discretize state
        state_adj = (state - env.observation_space.low)*np.array([10, 100])
        state_adj = np.round(state_adj, 0).astype(int)
    
        while done != True:   
            # Render environment for last five episodes
            if i >= (episodes - 20):
                env.render()
                
            # Determine next action - epsilon greedy strategy
            if np.random.random() < 1 - epsilon:
                action = np.argmax(Q[state_adj[0], state_adj[1]]) 
            else:
                action = np.random.randint(0, env.action_space.n)
                
            # Get next state and reward
            state2, reward, done, info = env.step(action) 
            
            # Discretize state2
            state2_adj = (state2 - env.observation_space.low)*np.array([10, 100])
            state2_adj = np.round(state2_adj, 0).astype(int)
            
            #Allow for terminal states
            if done and state2[0] >= 0.5:
                Q[state_adj[0], state_adj[1], action] = reward
                
            # Adjust Q value for current state
            else:
                delta = learning*(reward + 
                                 discount*np.max(Q[state2_adj[0], 
                                                   state2_adj[1]]) - 
                                 Q[state_adj[0], state_adj[1],action])
                Q[state_adj[0], state_adj[1],action] += delta
                                     
            # Update variables
            tot_reward += reward
            state_adj = state2_adj
        
        # Decay epsilon
        if epsilon > min_eps:
            epsilon -= reduction
        
        # Track rewards
        reward_list.append(tot_reward)
        
        if (i+1) % 100 == 0:
            ave_reward = np.mean(reward_list)
            ave_reward_list.append(ave_reward)
            reward_list = []
            
        if (i+1) % 100 == 0:    
            print('Episode {} Average Reward: {}'.format(i+1, ave_reward))
            
    env.close()
    
    return ave_reward_list

# Run Q-learning algorithm
rewards = QLearning(env, 0.2, 0.9, 0.8, 0, 5000)

# Plot Rewards
plt.plot(100*(np.arange(len(rewards)) + 1), rewards)
plt.xlabel('Episodes')
plt.ylabel('Average Reward')
plt.title('Average Reward vs Episodes')
#plt.savefig('rewards.jpg')     
plt.close() 

## NN METHOD OF Q LEARNING WITH EXPERIENCE REPLAY

In [6]:
import numpy as np
import collections
from collections import deque
import tensorflow as tf
import tqdm
from tqdm import tqdm
def dense(x, weights, bias, activation=tf.identity):
    """Dense layer."""
    z = tf.matmul(x, weights) + bias
    return activation(z)


def init_weights(shape, initializer):
    """Initialize weights for tensorflow layer."""
    weights = tf.Variable(
        initializer(shape),
        trainable=True,
        dtype=tf.float32
    )

    return weights


class Network(object):
    """Q-function approximator."""

    def __init__(self,
                 input_size,
                 output_size,
                 hidden_size=[50, 50],
                 weights_initializer=tf.initializers.glorot_uniform(),
                 bias_initializer=tf.initializers.zeros(),
                 optimizer=tf.optimizers.Adam,
                 **optimizer_kwargs):
        """Initialize weights and hyperparameters."""
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size

        np.random.seed(41)

        self.initialize_weights(weights_initializer, bias_initializer)
        self.optimizer = optimizer(**optimizer_kwargs)
        self.update_count = 0 # this will tell how many times the train_step method is executed
        
    def initialize_weights(self, weights_initializer, bias_initializer):
        """Initialize and store weights."""
        wshapes = [
            [self.input_size, self.hidden_size[0]],
            [self.hidden_size[0], self.hidden_size[1]],
            [self.hidden_size[1], self.output_size]
        ]

        bshapes = [
            [1, self.hidden_size[0]],
            [1, self.hidden_size[1]],
            [1, self.output_size]
        ]

        self.weights = [init_weights(s, weights_initializer) for s in wshapes]
        self.biases = [init_weights(s, bias_initializer) for s in bshapes]

        self.trainable_variables = self.weights + self.biases

    def model(self, inputs):
        """Given a state vector, return the Q values of actions."""
        h1 = dense(inputs, self.weights[0], self.biases[0], tf.nn.relu)
        h2 = dense(h1, self.weights[1], self.biases[1], tf.nn.relu)

        out = dense(h2, self.weights[2], self.biases[2])

        return out

    def train_step(self, inputs, targets, actions_one_hot):
        """Update weights."""
        with tf.GradientTape() as tape:
            qvalues = tf.squeeze(self.model(inputs))
            preds = tf.reduce_sum(qvalues * actions_one_hot, axis=1)
            loss = tf.losses.mean_squared_error(targets, preds)

        grads = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.trainable_variables))
        self.update_count = self.update_count + 1
        #print("NN_updated : ",self.update_count + 1)

class Memory(object):
    """Memory buffer for Experience Replay."""

    def __init__(self, max_size):
        """Initialize a buffer containing max_size experiences."""
        self.buffer = deque(maxlen=max_size)

    def add(self, experience):
        """Add an experience to the buffer."""
        self.buffer.append(experience)

    def sample(self, batch_size):
        """Sample a batch of experiences from the buffer."""
        buffer_size = len(self.buffer)
        index = np.random.choice(
            np.arange(buffer_size),
            size=batch_size,
            replace=False
        )

        return [self.buffer[i] for i in index]

    def __len__(self):
        """Interface to access buffer length."""
        return len(self.buffer)


class Agent(object):
    """Deep Q-learning agent."""

    def __init__(self,
                 state_space_size,
                 action_space_size,
                 target_update_freq=1000,
                 discount=0.99,
                 batch_size=32,
                 max_explore=1,
                 min_explore=0.05,
                 anneal_rate=(1 / 100000),
                 replay_memory_size=100000,
                 replay_start_size=10000):
        """Set parameters, initialize network."""
        self.action_space_size = action_space_size
        # DQN has 2 networks THESE ARE STATE (V) ESTIMATORS AND NOT Q ESTIMATORS
        # hell this is a policy gradient method it is not even a V estimator
        self.online_network = Network(state_space_size, action_space_size)
        self.target_network = Network(state_space_size, action_space_size)

        self.update_target_network()

        # training parameters
        self.target_update_freq = target_update_freq
        self.discount = discount
        self.batch_size = batch_size

        # policy during learning
        self.max_explore = max_explore + (anneal_rate * replay_start_size)
        self.min_explore = min_explore
        self.anneal_rate = anneal_rate
        self.steps = 0

        # replay memory
        self.memory = Memory(replay_memory_size)
        self.replay_start_size = replay_start_size
        self.experience_replay = Memory(replay_memory_size)
        
        self.last_state = []
        self.last_action = None
        
    def handle_episode_start(self):
        self.last_state, self.last_action = [], None

    def step(self, observation, training=True):
        """Observe state and rewards, select action.

        It is assumed that `observation` will be an object with
        a `state` vector and a `reward` float or integer. The reward
        corresponds to the action taken in the previous step.
        """
        last_state, last_action = self.last_state, self.last_action
        last_reward = observation[1]
        state = observation[0]
        
        action = self.policy(state, training)

        if training:
            self.steps += 1

            if len(last_state) != 0:
                experience = {
                    "state": last_state,
                    "action": last_action,
                    "reward": last_reward,
                    "next_state": state
                }

                self.memory.add(experience)

            if self.steps > self.replay_start_size:
                self.train_network()

                if self.steps % self.target_update_freq == 0:
                    self.update_target_network()

        self.last_state = state
        self.last_action = action
        #print("Step Taken ", self.steps)
        return action
    
    def policy(self,state, training):
        """Epsilon-greedy policy for training, greedy policy otherwise."""
        explore_prob = self.max_explore - (self.steps * self.anneal_rate)
        explore = max(explore_prob, self.min_explore) > np.random.rand()

        if training and explore:
            action = np.random.randint(self.action_space_size)
        else:
            inputs = np.expand_dims(state, 0)
            qvalues = self.online_network.model(inputs)
            action = np.squeeze(np.argmax(qvalues, axis=-1))

        return action

    def update_target_network(self):
        """Update target network weights with current online network values."""
        variables = self.online_network.trainable_variables
        variables_copy = [tf.Variable(v) for v in variables]
        self.target_network.trainable_variables = variables_copy

    def train_network(self):
        """Update online network weights."""
        batch = self.memory.sample(self.batch_size)
        inputs = np.array([b["state"] for b in batch],dtype = 'float32')
        actions = np.array([b["action"] for b in batch])
        rewards = np.array([b["reward"] for b in batch],dtype = 'float32')
        next_inputs = np.array([b["next_state"] for b in batch],dtype = 'float32')
        
        actions_one_hot = np.eye(self.action_space_size)[actions]
        actions_one_hot = np.array(actions_one_hot,dtype = 'float32')
        
        next_qvalues = np.squeeze(self.target_network.model(next_inputs))
        targets = rewards + self.discount * np.amax(next_qvalues, axis=-1)
        self.online_network.train_step(inputs, targets, actions_one_hot)

In [7]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [8]:
import numpy as np
import gym
import matplotlib.pyplot as plt

# starting the environment
env = gym.make('MountainCar-v0')
observation_space = 2
actions = 3

In [9]:
# initializing the agent

agent47 = Agent(
state_space_size = observation_space,
action_space_size = actions
)

In [10]:
# training loop

for i in tqdm(range(1000)):
    print(i)
    state = env.reset()
    state = np.array(state,dtype = 'float32')
    last_rew = 0
    last_rew = np.array(last_rew,dtype = 'float32')
    done = False
    for j in range(1000):
        if not done:
            if i > 990:
                env.render()
            if j == 0:
                obs = np.array([state,last_rew])
            state,reward,done,info = env.step(agent47.step(obs,training = True))
            state = np.array(state,dtype = 'float32')
            reward = np.array(reward,dtype = 'float32')
            obs = np.array([state,reward])

        
env.close()

  2%|▏         | 20/1000 [00:00<00:10, 94.05it/s]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

  4%|▍         | 42/1000 [00:00<00:09, 97.79it/s]


29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50


  5%|▌         | 51/1000 [00:01<00:51, 18.32it/s]

51
52
53
54
55
56


  6%|▌         | 57/1000 [00:09<06:41,  2.35it/s]

57
58
59
60


  6%|▌         | 61/1000 [00:14<10:48,  1.45it/s]

61
62
63


  6%|▋         | 64/1000 [00:18<13:38,  1.14it/s]

64
65


  7%|▋         | 66/1000 [00:21<15:39,  1.01s/it]

66
67


  7%|▋         | 68/1000 [00:24<17:06,  1.10s/it]

68


  7%|▋         | 69/1000 [00:25<17:57,  1.16s/it]

69


  7%|▋         | 70/1000 [00:26<18:33,  1.20s/it]

70


  7%|▋         | 71/1000 [00:27<18:58,  1.23s/it]

71


  7%|▋         | 72/1000 [00:29<21:55,  1.42s/it]

72


  7%|▋         | 73/1000 [00:31<23:30,  1.52s/it]

73


  7%|▋         | 74/1000 [00:32<22:43,  1.47s/it]

74


  8%|▊         | 75/1000 [00:34<22:04,  1.43s/it]

75


  8%|▊         | 76/1000 [00:35<22:20,  1.45s/it]

76


  8%|▊         | 77/1000 [00:37<22:28,  1.46s/it]

77


  8%|▊         | 78/1000 [00:38<22:17,  1.45s/it]

78


  8%|▊         | 79/1000 [00:40<21:58,  1.43s/it]

79


  8%|▊         | 80/1000 [00:41<21:37,  1.41s/it]

80


  8%|▊         | 81/1000 [00:42<21:30,  1.40s/it]

81


  8%|▊         | 82/1000 [00:44<21:20,  1.40s/it]

82


  8%|▊         | 83/1000 [00:45<21:20,  1.40s/it]

83


  8%|▊         | 84/1000 [00:46<21:08,  1.39s/it]

84


  8%|▊         | 85/1000 [00:48<20:56,  1.37s/it]

85


  9%|▊         | 86/1000 [00:49<21:07,  1.39s/it]

86


  9%|▊         | 87/1000 [00:51<21:05,  1.39s/it]

87


  9%|▉         | 88/1000 [00:52<21:04,  1.39s/it]

88


  9%|▉         | 89/1000 [00:53<20:53,  1.38s/it]

89


  9%|▉         | 90/1000 [00:55<20:50,  1.37s/it]

90


  9%|▉         | 91/1000 [00:56<20:40,  1.36s/it]

91


  9%|▉         | 92/1000 [00:57<20:44,  1.37s/it]

92


  9%|▉         | 93/1000 [00:59<20:34,  1.36s/it]

93


  9%|▉         | 94/1000 [01:00<20:26,  1.35s/it]

94


 10%|▉         | 95/1000 [01:01<20:44,  1.38s/it]

95


 10%|▉         | 96/1000 [01:04<26:38,  1.77s/it]

96


 10%|▉         | 97/1000 [01:07<30:25,  2.02s/it]

97


 10%|▉         | 98/1000 [01:10<33:47,  2.25s/it]

98


 10%|▉         | 99/1000 [01:12<36:14,  2.41s/it]

99


 10%|█         | 100/1000 [01:15<38:14,  2.55s/it]

100


 10%|█         | 101/1000 [01:18<39:21,  2.63s/it]

101


 10%|█         | 102/1000 [01:19<33:46,  2.26s/it]

102


 10%|█         | 103/1000 [01:21<29:53,  2.00s/it]

103


 10%|█         | 104/1000 [01:22<27:25,  1.84s/it]

104


 10%|█         | 105/1000 [01:24<25:55,  1.74s/it]

105


 11%|█         | 106/1000 [01:25<24:50,  1.67s/it]

106


 11%|█         | 107/1000 [01:27<24:01,  1.61s/it]

107


 11%|█         | 108/1000 [01:28<23:15,  1.56s/it]

108


 11%|█         | 109/1000 [01:30<22:44,  1.53s/it]

109


 11%|█         | 110/1000 [01:31<22:12,  1.50s/it]

110


 11%|█         | 111/1000 [01:33<21:57,  1.48s/it]

111


 11%|█         | 112/1000 [01:34<21:55,  1.48s/it]

112


 11%|█▏        | 113/1000 [01:36<23:32,  1.59s/it]

113


 11%|█▏        | 114/1000 [01:38<27:56,  1.89s/it]

114


 12%|█▏        | 115/1000 [01:41<32:02,  2.17s/it]

115


 12%|█▏        | 116/1000 [01:44<32:13,  2.19s/it]

116


 12%|█▏        | 117/1000 [01:46<32:22,  2.20s/it]

117


 12%|█▏        | 118/1000 [01:48<32:31,  2.21s/it]

118


 12%|█▏        | 119/1000 [01:50<30:26,  2.07s/it]

119


 12%|█▏        | 120/1000 [01:52<30:46,  2.10s/it]

120


 12%|█▏        | 121/1000 [01:54<30:55,  2.11s/it]

121


 12%|█▏        | 122/1000 [01:56<31:01,  2.12s/it]

122


 12%|█▏        | 123/1000 [01:58<30:59,  2.12s/it]

123


 12%|█▏        | 124/1000 [02:00<30:24,  2.08s/it]

124


 12%|█▎        | 125/1000 [02:02<30:30,  2.09s/it]

125


 13%|█▎        | 126/1000 [02:05<30:46,  2.11s/it]

126


 13%|█▎        | 127/1000 [02:06<29:49,  2.05s/it]

127


 13%|█▎        | 128/1000 [02:08<29:00,  2.00s/it]

128


 13%|█▎        | 129/1000 [02:10<28:23,  1.96s/it]

129


 13%|█▎        | 130/1000 [02:12<28:37,  1.97s/it]

130


 13%|█▎        | 131/1000 [02:14<28:39,  1.98s/it]

131


 13%|█▎        | 132/1000 [02:16<29:14,  2.02s/it]

132


 13%|█▎        | 133/1000 [02:18<29:36,  2.05s/it]

133


 13%|█▎        | 134/1000 [02:21<29:50,  2.07s/it]

134


 14%|█▎        | 135/1000 [02:23<29:31,  2.05s/it]

135


 14%|█▎        | 136/1000 [02:25<29:29,  2.05s/it]

136


 14%|█▎        | 137/1000 [02:27<29:50,  2.07s/it]

137


 14%|█▍        | 138/1000 [02:29<30:01,  2.09s/it]

138


 14%|█▍        | 139/1000 [02:31<30:13,  2.11s/it]

139


 14%|█▍        | 140/1000 [02:33<30:28,  2.13s/it]

140


 14%|█▍        | 141/1000 [02:35<30:37,  2.14s/it]

141


 14%|█▍        | 142/1000 [02:37<30:14,  2.12s/it]

142


 14%|█▍        | 143/1000 [02:39<29:34,  2.07s/it]

143


 14%|█▍        | 144/1000 [02:41<29:31,  2.07s/it]

144


 14%|█▍        | 145/1000 [02:44<29:42,  2.08s/it]

145


 15%|█▍        | 146/1000 [02:46<29:42,  2.09s/it]

146


 15%|█▍        | 147/1000 [02:47<27:38,  1.94s/it]

147


 15%|█▍        | 148/1000 [02:49<25:13,  1.78s/it]

148


 15%|█▍        | 149/1000 [02:50<25:20,  1.79s/it]

149


 15%|█▌        | 150/1000 [02:53<26:41,  1.88s/it]

150


 15%|█▌        | 151/1000 [02:55<27:38,  1.95s/it]

151


 15%|█▌        | 152/1000 [02:57<28:11,  1.99s/it]

152


 15%|█▌        | 153/1000 [02:59<28:46,  2.04s/it]

153


 15%|█▌        | 154/1000 [03:01<29:03,  2.06s/it]

154


 16%|█▌        | 155/1000 [03:03<29:17,  2.08s/it]

155


 16%|█▌        | 156/1000 [03:05<29:26,  2.09s/it]

156


 16%|█▌        | 157/1000 [03:07<29:23,  2.09s/it]

157


 16%|█▌        | 158/1000 [03:09<29:11,  2.08s/it]

158


 16%|█▌        | 159/1000 [03:12<29:19,  2.09s/it]

159


 16%|█▌        | 160/1000 [03:14<29:30,  2.11s/it]

160


 16%|█▌        | 161/1000 [03:16<29:41,  2.12s/it]

161


 16%|█▌        | 162/1000 [03:18<29:36,  2.12s/it]

162


 16%|█▋        | 163/1000 [03:20<29:40,  2.13s/it]

163


 16%|█▋        | 164/1000 [03:22<29:34,  2.12s/it]

164


 16%|█▋        | 165/1000 [03:24<29:14,  2.10s/it]

165


 17%|█▋        | 166/1000 [03:26<29:03,  2.09s/it]

166


 17%|█▋        | 167/1000 [03:28<28:57,  2.09s/it]

167


 17%|█▋        | 168/1000 [03:31<29:20,  2.12s/it]

168


 17%|█▋        | 169/1000 [03:33<29:12,  2.11s/it]

169


 17%|█▋        | 170/1000 [03:35<28:54,  2.09s/it]

170


 17%|█▋        | 171/1000 [03:37<28:56,  2.10s/it]

171


 17%|█▋        | 172/1000 [03:39<29:15,  2.12s/it]

172


 17%|█▋        | 173/1000 [03:41<29:41,  2.15s/it]

173


 17%|█▋        | 174/1000 [03:43<29:41,  2.16s/it]

174


 18%|█▊        | 175/1000 [03:46<29:48,  2.17s/it]

175


 18%|█▊        | 176/1000 [03:48<29:49,  2.17s/it]

176


 18%|█▊        | 177/1000 [03:50<29:36,  2.16s/it]

177


 18%|█▊        | 178/1000 [03:52<29:29,  2.15s/it]

178


 18%|█▊        | 179/1000 [03:54<29:37,  2.17s/it]

179


 18%|█▊        | 180/1000 [03:56<29:45,  2.18s/it]

180


 18%|█▊        | 181/1000 [03:59<29:55,  2.19s/it]

181


 18%|█▊        | 182/1000 [04:01<30:09,  2.21s/it]

182


 18%|█▊        | 183/1000 [04:03<30:35,  2.25s/it]

183


 18%|█▊        | 184/1000 [04:05<30:00,  2.21s/it]

184


 18%|█▊        | 185/1000 [04:08<29:59,  2.21s/it]

185


 19%|█▊        | 186/1000 [04:10<29:41,  2.19s/it]

186


 19%|█▊        | 187/1000 [04:12<29:50,  2.20s/it]

187


 19%|█▉        | 188/1000 [04:14<30:06,  2.23s/it]

188


 19%|█▉        | 189/1000 [04:16<30:04,  2.23s/it]

189


 19%|█▉        | 190/1000 [04:19<29:51,  2.21s/it]

190


 19%|█▉        | 191/1000 [04:21<29:38,  2.20s/it]

191


 19%|█▉        | 192/1000 [04:23<29:27,  2.19s/it]

192


 19%|█▉        | 193/1000 [04:25<29:02,  2.16s/it]

193


 19%|█▉        | 194/1000 [04:27<28:30,  2.12s/it]

194


 20%|█▉        | 195/1000 [04:29<28:16,  2.11s/it]

195


 20%|█▉        | 196/1000 [04:31<28:10,  2.10s/it]

196


 20%|█▉        | 197/1000 [04:33<28:18,  2.12s/it]

197


 20%|█▉        | 198/1000 [04:36<28:26,  2.13s/it]

198


 20%|█▉        | 199/1000 [04:38<28:22,  2.13s/it]

199


 20%|██        | 200/1000 [04:40<28:39,  2.15s/it]

200


 20%|██        | 201/1000 [04:42<29:05,  2.18s/it]

201


 20%|██        | 202/1000 [04:44<29:35,  2.23s/it]

202


 20%|██        | 203/1000 [04:47<29:45,  2.24s/it]

203


 20%|██        | 204/1000 [04:49<29:58,  2.26s/it]

204


 20%|██        | 205/1000 [04:51<29:55,  2.26s/it]

205


 21%|██        | 206/1000 [04:54<29:51,  2.26s/it]

206


 21%|██        | 207/1000 [04:56<29:39,  2.24s/it]

207


 21%|██        | 208/1000 [04:58<29:33,  2.24s/it]

208


 21%|██        | 209/1000 [05:00<29:15,  2.22s/it]

209


 21%|██        | 210/1000 [05:02<28:54,  2.20s/it]

210


 21%|██        | 211/1000 [05:04<28:44,  2.19s/it]

211


 21%|██        | 212/1000 [05:07<28:43,  2.19s/it]

212


 21%|██▏       | 213/1000 [05:09<28:43,  2.19s/it]

213


 21%|██▏       | 214/1000 [05:11<28:55,  2.21s/it]

214


 22%|██▏       | 215/1000 [05:13<29:03,  2.22s/it]

215


 22%|██▏       | 216/1000 [05:16<29:02,  2.22s/it]

216


 22%|██▏       | 217/1000 [05:18<28:55,  2.22s/it]

217


 22%|██▏       | 218/1000 [05:20<28:54,  2.22s/it]

218


 22%|██▏       | 219/1000 [05:22<28:55,  2.22s/it]

219


 22%|██▏       | 220/1000 [05:25<29:16,  2.25s/it]

220


 22%|██▏       | 221/1000 [05:27<29:09,  2.25s/it]

221


 22%|██▏       | 222/1000 [05:29<28:45,  2.22s/it]

222


 22%|██▏       | 223/1000 [05:31<28:33,  2.20s/it]

223


 22%|██▏       | 224/1000 [05:33<28:42,  2.22s/it]

224


 22%|██▎       | 225/1000 [05:36<28:57,  2.24s/it]

225


 23%|██▎       | 226/1000 [05:38<29:06,  2.26s/it]

226


 23%|██▎       | 227/1000 [05:40<29:11,  2.27s/it]

227


 23%|██▎       | 228/1000 [05:43<29:04,  2.26s/it]

228


 23%|██▎       | 229/1000 [05:45<29:00,  2.26s/it]

229


 23%|██▎       | 230/1000 [05:47<29:04,  2.27s/it]

230


 23%|██▎       | 231/1000 [05:49<28:52,  2.25s/it]

231


 23%|██▎       | 232/1000 [05:51<28:30,  2.23s/it]

232


 23%|██▎       | 233/1000 [05:54<28:35,  2.24s/it]

233


 23%|██▎       | 234/1000 [05:56<28:37,  2.24s/it]

234


 24%|██▎       | 235/1000 [05:58<28:22,  2.23s/it]

235


 24%|██▎       | 236/1000 [06:00<27:25,  2.15s/it]

236


 24%|██▎       | 237/1000 [06:02<26:47,  2.11s/it]

237


 24%|██▍       | 238/1000 [06:04<27:01,  2.13s/it]

238


 24%|██▍       | 239/1000 [06:06<27:08,  2.14s/it]

239


 24%|██▍       | 240/1000 [06:09<27:26,  2.17s/it]

240


 24%|██▍       | 241/1000 [06:11<27:44,  2.19s/it]

241


 24%|██▍       | 242/1000 [06:13<27:38,  2.19s/it]

242


 24%|██▍       | 243/1000 [06:15<27:43,  2.20s/it]

243


 24%|██▍       | 244/1000 [06:18<27:54,  2.22s/it]

244


 24%|██▍       | 245/1000 [06:20<28:01,  2.23s/it]

245


 25%|██▍       | 246/1000 [06:22<28:28,  2.27s/it]

246


 25%|██▍       | 247/1000 [06:24<28:29,  2.27s/it]

247


 25%|██▍       | 248/1000 [06:27<28:20,  2.26s/it]

248


 25%|██▍       | 249/1000 [06:29<28:03,  2.24s/it]

249


 25%|██▌       | 250/1000 [06:31<27:50,  2.23s/it]

250


 25%|██▌       | 251/1000 [06:33<27:30,  2.20s/it]

251


 25%|██▌       | 252/1000 [06:35<27:27,  2.20s/it]

252


 25%|██▌       | 253/1000 [06:38<27:36,  2.22s/it]

253


 25%|██▌       | 254/1000 [06:40<27:37,  2.22s/it]

254


 26%|██▌       | 255/1000 [06:42<27:56,  2.25s/it]

255


 26%|██▌       | 256/1000 [06:45<28:02,  2.26s/it]

256


 26%|██▌       | 257/1000 [06:47<28:08,  2.27s/it]

257


 26%|██▌       | 258/1000 [06:49<28:09,  2.28s/it]

258


 26%|██▌       | 259/1000 [06:51<27:56,  2.26s/it]

259


 26%|██▌       | 260/1000 [06:54<27:46,  2.25s/it]

260


 26%|██▌       | 261/1000 [06:56<27:53,  2.27s/it]

261


 26%|██▌       | 262/1000 [06:58<27:44,  2.26s/it]

262


 26%|██▋       | 263/1000 [07:00<28:04,  2.29s/it]

263


 26%|██▋       | 264/1000 [07:03<28:03,  2.29s/it]

264


 26%|██▋       | 265/1000 [07:05<28:07,  2.30s/it]

265


 27%|██▋       | 266/1000 [07:07<28:12,  2.31s/it]

266


 27%|██▋       | 267/1000 [07:10<28:09,  2.31s/it]

267


 27%|██▋       | 268/1000 [07:12<28:07,  2.31s/it]

268


 27%|██▋       | 269/1000 [07:14<27:48,  2.28s/it]

269


 27%|██▋       | 270/1000 [07:17<27:55,  2.29s/it]

270


 27%|██▋       | 271/1000 [07:19<27:43,  2.28s/it]

271


 27%|██▋       | 272/1000 [07:21<27:36,  2.28s/it]

272


 27%|██▋       | 273/1000 [07:23<27:37,  2.28s/it]

273


 27%|██▋       | 274/1000 [07:26<27:40,  2.29s/it]

274


 28%|██▊       | 275/1000 [07:28<27:27,  2.27s/it]

275


 28%|██▊       | 276/1000 [07:30<27:37,  2.29s/it]

276


 28%|██▊       | 277/1000 [07:33<27:37,  2.29s/it]

277


 28%|██▊       | 278/1000 [07:35<27:29,  2.29s/it]

278


 28%|██▊       | 279/1000 [07:37<27:30,  2.29s/it]

279


 28%|██▊       | 280/1000 [07:40<27:54,  2.33s/it]

280


 28%|██▊       | 281/1000 [07:42<27:56,  2.33s/it]

281


 28%|██▊       | 282/1000 [07:44<28:07,  2.35s/it]

282


 28%|██▊       | 283/1000 [07:47<28:28,  2.38s/it]

283


 28%|██▊       | 284/1000 [07:49<28:42,  2.41s/it]

284


 28%|██▊       | 285/1000 [07:52<28:29,  2.39s/it]

285


 29%|██▊       | 286/1000 [07:54<28:25,  2.39s/it]

286


 29%|██▊       | 287/1000 [07:56<28:16,  2.38s/it]

287


 29%|██▉       | 288/1000 [07:59<27:59,  2.36s/it]

288


 29%|██▉       | 289/1000 [08:01<27:51,  2.35s/it]

289


 29%|██▉       | 290/1000 [08:03<27:35,  2.33s/it]

290


 29%|██▉       | 291/1000 [08:05<27:06,  2.29s/it]

291


 29%|██▉       | 292/1000 [08:08<26:40,  2.26s/it]

292


 29%|██▉       | 293/1000 [08:10<26:33,  2.25s/it]

293


 29%|██▉       | 294/1000 [08:12<25:45,  2.19s/it]

294


 30%|██▉       | 295/1000 [08:14<25:21,  2.16s/it]

295


 30%|██▉       | 296/1000 [08:16<25:45,  2.20s/it]

296


 30%|██▉       | 297/1000 [08:19<26:03,  2.22s/it]

297


 30%|██▉       | 298/1000 [08:21<25:54,  2.21s/it]

298


 30%|██▉       | 299/1000 [08:23<26:15,  2.25s/it]

299


 30%|███       | 300/1000 [08:25<26:34,  2.28s/it]

300


 30%|███       | 301/1000 [08:28<27:05,  2.33s/it]

301


 30%|███       | 302/1000 [08:30<27:06,  2.33s/it]

302


 30%|███       | 303/1000 [08:33<27:05,  2.33s/it]

303


 30%|███       | 304/1000 [08:35<27:08,  2.34s/it]

304


 30%|███       | 305/1000 [08:37<27:15,  2.35s/it]

305


 31%|███       | 306/1000 [08:40<26:59,  2.33s/it]

306


 31%|███       | 307/1000 [08:42<26:55,  2.33s/it]

307


 31%|███       | 308/1000 [08:44<27:03,  2.35s/it]

308


 31%|███       | 309/1000 [08:47<27:00,  2.35s/it]

309


 31%|███       | 310/1000 [08:49<26:53,  2.34s/it]

310


 31%|███       | 311/1000 [08:51<26:44,  2.33s/it]

311


 31%|███       | 312/1000 [08:54<26:30,  2.31s/it]

312


 31%|███▏      | 313/1000 [08:56<26:19,  2.30s/it]

313


 31%|███▏      | 314/1000 [08:58<26:24,  2.31s/it]

314


 32%|███▏      | 315/1000 [09:00<26:26,  2.32s/it]

315


 32%|███▏      | 316/1000 [09:03<26:17,  2.31s/it]

316


 32%|███▏      | 317/1000 [09:05<26:31,  2.33s/it]

317


 32%|███▏      | 318/1000 [09:07<26:26,  2.33s/it]

318


 32%|███▏      | 319/1000 [09:10<26:19,  2.32s/it]

319


 32%|███▏      | 320/1000 [09:12<26:01,  2.30s/it]

320


 32%|███▏      | 321/1000 [09:14<26:06,  2.31s/it]

321


 32%|███▏      | 322/1000 [09:17<26:12,  2.32s/it]

322


 32%|███▏      | 323/1000 [09:19<26:13,  2.32s/it]

323


 32%|███▏      | 324/1000 [09:21<26:22,  2.34s/it]

324


 32%|███▎      | 325/1000 [09:24<26:44,  2.38s/it]

325


 33%|███▎      | 326/1000 [09:26<26:54,  2.40s/it]

326


 33%|███▎      | 327/1000 [09:29<27:03,  2.41s/it]

327


 33%|███▎      | 328/1000 [09:31<27:03,  2.42s/it]

328


 33%|███▎      | 329/1000 [09:34<27:08,  2.43s/it]

329


 33%|███▎      | 330/1000 [09:36<27:19,  2.45s/it]

330


 33%|███▎      | 331/1000 [09:38<26:55,  2.41s/it]

331


 33%|███▎      | 332/1000 [09:41<27:13,  2.45s/it]

332


 33%|███▎      | 333/1000 [09:43<27:12,  2.45s/it]

333


 33%|███▎      | 334/1000 [09:46<27:05,  2.44s/it]

334


 34%|███▎      | 335/1000 [09:48<27:06,  2.45s/it]

335


 34%|███▎      | 336/1000 [09:51<26:50,  2.43s/it]

336


 34%|███▎      | 337/1000 [09:53<26:31,  2.40s/it]

337


 34%|███▍      | 338/1000 [09:55<26:36,  2.41s/it]

338


 34%|███▍      | 339/1000 [09:58<26:22,  2.39s/it]

339


 34%|███▍      | 340/1000 [10:00<26:03,  2.37s/it]

340


 34%|███▍      | 341/1000 [10:02<25:57,  2.36s/it]

341


 34%|███▍      | 342/1000 [10:05<25:42,  2.34s/it]

342


 34%|███▍      | 343/1000 [10:07<25:21,  2.32s/it]

343


 34%|███▍      | 344/1000 [10:09<25:02,  2.29s/it]

344


 34%|███▍      | 345/1000 [10:12<25:04,  2.30s/it]

345


 35%|███▍      | 346/1000 [10:14<25:11,  2.31s/it]

346


 35%|███▍      | 347/1000 [10:16<25:18,  2.32s/it]

347


 35%|███▍      | 348/1000 [10:19<25:19,  2.33s/it]

348


 35%|███▍      | 349/1000 [10:21<25:11,  2.32s/it]

349


 35%|███▌      | 350/1000 [10:23<25:06,  2.32s/it]

350


 35%|███▌      | 351/1000 [10:26<25:23,  2.35s/it]

351


 35%|███▌      | 352/1000 [10:28<25:44,  2.38s/it]

352


 35%|███▌      | 353/1000 [10:31<25:51,  2.40s/it]

353


 35%|███▌      | 354/1000 [10:33<25:58,  2.41s/it]

354


 36%|███▌      | 355/1000 [10:35<26:11,  2.44s/it]

355


 36%|███▌      | 356/1000 [10:38<26:10,  2.44s/it]

356


 36%|███▌      | 357/1000 [10:40<25:56,  2.42s/it]

357


 36%|███▌      | 358/1000 [10:43<25:45,  2.41s/it]

358


 36%|███▌      | 359/1000 [10:45<25:58,  2.43s/it]

359


 36%|███▌      | 360/1000 [10:48<26:22,  2.47s/it]

360


 36%|███▌      | 361/1000 [10:50<26:04,  2.45s/it]

361


 36%|███▌      | 362/1000 [10:53<25:59,  2.44s/it]

362


 36%|███▋      | 363/1000 [10:55<25:52,  2.44s/it]

363


 36%|███▋      | 364/1000 [10:57<25:45,  2.43s/it]

364


 36%|███▋      | 365/1000 [11:00<26:02,  2.46s/it]

365


 37%|███▋      | 366/1000 [11:02<25:49,  2.44s/it]

366


 37%|███▋      | 367/1000 [11:05<25:45,  2.44s/it]

367


 37%|███▋      | 368/1000 [11:07<25:38,  2.43s/it]

368


 37%|███▋      | 369/1000 [11:10<25:38,  2.44s/it]

369


 37%|███▋      | 370/1000 [11:12<25:42,  2.45s/it]

370


 37%|███▋      | 371/1000 [11:15<25:35,  2.44s/it]

371


 37%|███▋      | 372/1000 [11:17<25:33,  2.44s/it]

372


 37%|███▋      | 373/1000 [11:19<25:37,  2.45s/it]

373


 37%|███▋      | 374/1000 [11:22<25:28,  2.44s/it]

374


 38%|███▊      | 375/1000 [11:24<25:19,  2.43s/it]

375


 38%|███▊      | 376/1000 [11:27<25:16,  2.43s/it]

376


 38%|███▊      | 377/1000 [11:29<25:10,  2.42s/it]

377


 38%|███▊      | 378/1000 [11:32<25:12,  2.43s/it]

378


 38%|███▊      | 379/1000 [11:34<25:09,  2.43s/it]

379


 38%|███▊      | 380/1000 [11:37<25:35,  2.48s/it]

380


 38%|███▊      | 381/1000 [11:39<25:42,  2.49s/it]

381


 38%|███▊      | 382/1000 [11:42<25:39,  2.49s/it]

382


 38%|███▊      | 383/1000 [11:44<25:29,  2.48s/it]

383


 38%|███▊      | 384/1000 [11:47<25:27,  2.48s/it]

384


 38%|███▊      | 385/1000 [11:49<25:37,  2.50s/it]

385


 39%|███▊      | 386/1000 [11:52<25:47,  2.52s/it]

386


 39%|███▊      | 387/1000 [11:54<25:52,  2.53s/it]

387


 39%|███▉      | 388/1000 [11:57<25:44,  2.52s/it]

388


 39%|███▉      | 389/1000 [11:59<25:45,  2.53s/it]

389


 39%|███▉      | 390/1000 [12:02<25:48,  2.54s/it]

390


 39%|███▉      | 391/1000 [12:04<25:48,  2.54s/it]

391


 39%|███▉      | 392/1000 [12:07<25:47,  2.55s/it]

392


 39%|███▉      | 393/1000 [12:09<25:21,  2.51s/it]

393


 39%|███▉      | 394/1000 [12:12<25:18,  2.51s/it]

394


 40%|███▉      | 395/1000 [12:14<25:17,  2.51s/it]

395


 40%|███▉      | 396/1000 [12:17<25:21,  2.52s/it]

396


 40%|███▉      | 397/1000 [12:19<25:26,  2.53s/it]

397


 40%|███▉      | 398/1000 [12:22<25:20,  2.53s/it]

398


 40%|███▉      | 399/1000 [12:24<25:18,  2.53s/it]

399


 40%|████      | 400/1000 [12:27<25:15,  2.53s/it]

400


 40%|████      | 401/1000 [12:30<25:14,  2.53s/it]

401


 40%|████      | 402/1000 [12:32<25:19,  2.54s/it]

402


 40%|████      | 403/1000 [12:35<24:59,  2.51s/it]

403


 40%|████      | 404/1000 [12:37<24:45,  2.49s/it]

404


 40%|████      | 405/1000 [12:39<24:43,  2.49s/it]

405


 41%|████      | 406/1000 [12:42<24:26,  2.47s/it]

406


 41%|████      | 407/1000 [12:44<23:51,  2.41s/it]

407


 41%|████      | 408/1000 [12:47<23:38,  2.40s/it]

408


 41%|████      | 409/1000 [12:49<23:47,  2.42s/it]

409


 41%|████      | 410/1000 [12:52<24:06,  2.45s/it]

410


 41%|████      | 411/1000 [12:54<24:32,  2.50s/it]

411


 41%|████      | 412/1000 [12:57<24:34,  2.51s/it]

412


 41%|████▏     | 413/1000 [12:59<24:39,  2.52s/it]

413


 41%|████▏     | 414/1000 [13:02<24:36,  2.52s/it]

414


 42%|████▏     | 415/1000 [13:04<24:36,  2.52s/it]

415


 42%|████▏     | 416/1000 [13:07<24:35,  2.53s/it]

416


 42%|████▏     | 417/1000 [13:09<24:33,  2.53s/it]

417


 42%|████▏     | 418/1000 [13:12<24:13,  2.50s/it]

418


 42%|████▏     | 419/1000 [13:14<24:04,  2.49s/it]

419


 42%|████▏     | 420/1000 [13:17<24:10,  2.50s/it]

420


 42%|████▏     | 421/1000 [13:19<23:55,  2.48s/it]

421


 42%|████▏     | 422/1000 [13:22<23:30,  2.44s/it]

422


 42%|████▏     | 423/1000 [13:24<23:24,  2.43s/it]

423


 42%|████▏     | 424/1000 [13:26<23:12,  2.42s/it]

424


 42%|████▎     | 425/1000 [13:29<23:19,  2.43s/it]

425


 43%|████▎     | 426/1000 [13:31<23:14,  2.43s/it]

426


 43%|████▎     | 427/1000 [13:34<23:12,  2.43s/it]

427


 43%|████▎     | 428/1000 [13:36<23:29,  2.46s/it]

428


 43%|████▎     | 429/1000 [13:39<23:35,  2.48s/it]

429


 43%|████▎     | 430/1000 [13:41<23:49,  2.51s/it]

430


 43%|████▎     | 431/1000 [13:44<23:40,  2.50s/it]

431


 43%|████▎     | 432/1000 [13:46<23:31,  2.49s/it]

432


 43%|████▎     | 433/1000 [13:49<23:32,  2.49s/it]

433


 43%|████▎     | 434/1000 [13:51<23:35,  2.50s/it]

434


 44%|████▎     | 435/1000 [13:54<23:40,  2.51s/it]

435


 44%|████▎     | 436/1000 [13:56<23:46,  2.53s/it]

436


 44%|████▎     | 437/1000 [13:59<23:31,  2.51s/it]

437


 44%|████▍     | 438/1000 [14:01<23:43,  2.53s/it]

438


 44%|████▍     | 439/1000 [14:04<23:36,  2.52s/it]

439


 44%|████▍     | 440/1000 [14:06<23:40,  2.54s/it]

440


 44%|████▍     | 441/1000 [14:09<23:32,  2.53s/it]

441


 44%|████▍     | 442/1000 [14:12<23:42,  2.55s/it]

442


 44%|████▍     | 443/1000 [14:14<23:45,  2.56s/it]

443


 44%|████▍     | 444/1000 [14:17<23:53,  2.58s/it]

444


 44%|████▍     | 445/1000 [14:19<23:48,  2.57s/it]

445


 45%|████▍     | 446/1000 [14:22<23:52,  2.59s/it]

446


 45%|████▍     | 447/1000 [14:25<23:53,  2.59s/it]

447


 45%|████▍     | 448/1000 [14:27<23:53,  2.60s/it]

448


 45%|████▍     | 449/1000 [14:30<23:52,  2.60s/it]

449


 45%|████▌     | 450/1000 [14:32<24:08,  2.63s/it]

450


 45%|████▌     | 451/1000 [14:35<22:40,  2.48s/it]

451


 45%|████▌     | 452/1000 [14:37<23:14,  2.54s/it]

452


 45%|████▌     | 453/1000 [14:40<23:32,  2.58s/it]

453


 45%|████▌     | 454/1000 [14:43<23:46,  2.61s/it]

454


 46%|████▌     | 455/1000 [14:45<23:56,  2.64s/it]

455


 46%|████▌     | 456/1000 [14:48<23:59,  2.65s/it]

456


 46%|████▌     | 457/1000 [14:51<23:56,  2.65s/it]

457


 46%|████▌     | 458/1000 [14:53<23:37,  2.62s/it]

458


 46%|████▌     | 459/1000 [14:56<23:45,  2.64s/it]

459


 46%|████▌     | 460/1000 [14:59<23:43,  2.64s/it]

460


 46%|████▌     | 461/1000 [15:01<23:17,  2.59s/it]

461


 46%|████▌     | 462/1000 [15:03<22:40,  2.53s/it]

462


 46%|████▋     | 463/1000 [15:06<22:37,  2.53s/it]

463


 46%|████▋     | 464/1000 [15:09<23:02,  2.58s/it]

464


 46%|████▋     | 465/1000 [15:11<23:08,  2.60s/it]

465


 47%|████▋     | 466/1000 [15:13<21:24,  2.41s/it]

466


 47%|████▋     | 467/1000 [15:16<22:12,  2.50s/it]

467


 47%|████▋     | 468/1000 [15:19<22:36,  2.55s/it]

468


 47%|████▋     | 469/1000 [15:21<22:40,  2.56s/it]

469


 47%|████▋     | 470/1000 [15:24<22:50,  2.59s/it]

470


 47%|████▋     | 471/1000 [15:26<21:56,  2.49s/it]

471


 47%|████▋     | 472/1000 [15:29<22:32,  2.56s/it]

472


 47%|████▋     | 473/1000 [15:31<22:37,  2.58s/it]

473


 47%|████▋     | 474/1000 [15:34<22:52,  2.61s/it]

474


 48%|████▊     | 475/1000 [15:37<22:58,  2.63s/it]

475


 48%|████▊     | 476/1000 [15:39<22:51,  2.62s/it]

476


 48%|████▊     | 477/1000 [15:42<22:40,  2.60s/it]

477


 48%|████▊     | 478/1000 [15:45<22:33,  2.59s/it]

478


 48%|████▊     | 479/1000 [15:47<22:28,  2.59s/it]

479


 48%|████▊     | 480/1000 [15:50<22:33,  2.60s/it]

480


 48%|████▊     | 481/1000 [15:52<22:40,  2.62s/it]

481


 48%|████▊     | 482/1000 [15:55<22:41,  2.63s/it]

482


 48%|████▊     | 483/1000 [15:58<22:41,  2.63s/it]

483


 48%|████▊     | 484/1000 [16:00<22:31,  2.62s/it]

484


 48%|████▊     | 485/1000 [16:03<22:32,  2.63s/it]

485


 49%|████▊     | 486/1000 [16:06<22:28,  2.62s/it]

486


 49%|████▊     | 487/1000 [16:08<22:21,  2.62s/it]

487


 49%|████▉     | 488/1000 [16:11<22:30,  2.64s/it]

488


 49%|████▉     | 489/1000 [16:13<22:29,  2.64s/it]

489


 49%|████▉     | 490/1000 [16:16<22:48,  2.68s/it]

490


 49%|████▉     | 491/1000 [16:19<23:04,  2.72s/it]

491


 49%|████▉     | 492/1000 [16:22<23:09,  2.73s/it]

492


 49%|████▉     | 493/1000 [16:25<23:12,  2.75s/it]

493


 49%|████▉     | 494/1000 [16:27<22:53,  2.71s/it]

494


 50%|████▉     | 495/1000 [16:30<22:33,  2.68s/it]

495


 50%|████▉     | 496/1000 [16:32<22:20,  2.66s/it]

496


 50%|████▉     | 497/1000 [16:35<21:10,  2.53s/it]

497


 50%|████▉     | 498/1000 [16:37<21:14,  2.54s/it]

498


 50%|████▉     | 499/1000 [16:40<21:12,  2.54s/it]

499


 50%|█████     | 500/1000 [16:42<21:25,  2.57s/it]

500


 50%|█████     | 501/1000 [16:45<21:32,  2.59s/it]

501


 50%|█████     | 502/1000 [16:48<21:55,  2.64s/it]

502


 50%|█████     | 503/1000 [16:51<22:18,  2.69s/it]

503


 50%|█████     | 504/1000 [16:53<22:24,  2.71s/it]

504


 50%|█████     | 505/1000 [16:56<22:32,  2.73s/it]

505


 51%|█████     | 506/1000 [16:59<22:37,  2.75s/it]

506


 51%|█████     | 507/1000 [17:02<22:34,  2.75s/it]

507


 51%|█████     | 508/1000 [17:04<22:30,  2.74s/it]

508


 51%|█████     | 509/1000 [17:07<22:25,  2.74s/it]

509


 51%|█████     | 510/1000 [17:10<22:30,  2.76s/it]

510


 51%|█████     | 511/1000 [17:13<22:36,  2.77s/it]

511


 51%|█████     | 512/1000 [17:16<22:33,  2.77s/it]

512


 51%|█████▏    | 513/1000 [17:18<22:16,  2.74s/it]

513


 51%|█████▏    | 514/1000 [17:21<22:06,  2.73s/it]

514


 52%|█████▏    | 515/1000 [17:24<21:57,  2.72s/it]

515


 52%|█████▏    | 516/1000 [17:26<21:53,  2.71s/it]

516


 52%|█████▏    | 517/1000 [17:29<21:32,  2.68s/it]

517


 52%|█████▏    | 518/1000 [17:32<21:31,  2.68s/it]

518


 52%|█████▏    | 519/1000 [17:34<21:26,  2.67s/it]

519


 52%|█████▏    | 520/1000 [17:37<21:31,  2.69s/it]

520


 52%|█████▏    | 521/1000 [17:39<19:28,  2.44s/it]

521


 52%|█████▏    | 522/1000 [17:42<20:20,  2.55s/it]

522


 52%|█████▏    | 523/1000 [17:44<20:55,  2.63s/it]

523


 52%|█████▏    | 524/1000 [17:47<21:18,  2.69s/it]

524


 52%|█████▎    | 525/1000 [17:50<21:25,  2.71s/it]

525


 53%|█████▎    | 526/1000 [17:53<21:26,  2.71s/it]

526


 53%|█████▎    | 527/1000 [17:55<21:13,  2.69s/it]

527


 53%|█████▎    | 528/1000 [17:58<20:59,  2.67s/it]

528


 53%|█████▎    | 529/1000 [18:00<19:19,  2.46s/it]

529


 53%|█████▎    | 530/1000 [18:02<18:02,  2.30s/it]

530


 53%|█████▎    | 531/1000 [18:04<17:05,  2.19s/it]

531


 53%|█████▎    | 532/1000 [18:06<16:23,  2.10s/it]

532


 53%|█████▎    | 533/1000 [18:08<15:53,  2.04s/it]

533


 53%|█████▎    | 534/1000 [18:10<15:40,  2.02s/it]

534


 54%|█████▎    | 535/1000 [18:12<15:42,  2.03s/it]

535


 54%|█████▎    | 536/1000 [18:14<15:27,  2.00s/it]

536


 54%|█████▎    | 537/1000 [18:15<15:10,  1.97s/it]

537


 54%|█████▍    | 538/1000 [18:17<14:54,  1.94s/it]

538


 54%|█████▍    | 539/1000 [18:19<14:54,  1.94s/it]

539


 54%|█████▍    | 540/1000 [18:21<15:18,  2.00s/it]

540


 54%|█████▍    | 541/1000 [18:24<15:43,  2.06s/it]

541


 54%|█████▍    | 542/1000 [18:26<16:40,  2.18s/it]

542


 54%|█████▍    | 543/1000 [18:29<17:35,  2.31s/it]

543


 54%|█████▍    | 544/1000 [18:31<18:10,  2.39s/it]

544


 55%|█████▍    | 545/1000 [18:34<18:34,  2.45s/it]

545


 55%|█████▍    | 546/1000 [18:36<18:47,  2.48s/it]

546


 55%|█████▍    | 547/1000 [18:39<19:18,  2.56s/it]

547


 55%|█████▍    | 548/1000 [18:42<19:36,  2.60s/it]

548


 55%|█████▍    | 549/1000 [18:45<19:59,  2.66s/it]

549


 55%|█████▌    | 550/1000 [18:46<17:02,  2.27s/it]

550


 55%|█████▌    | 551/1000 [18:49<18:00,  2.41s/it]

551


 55%|█████▌    | 552/1000 [18:51<18:36,  2.49s/it]

552


 55%|█████▌    | 553/1000 [18:54<18:56,  2.54s/it]

553


 55%|█████▌    | 554/1000 [18:57<19:01,  2.56s/it]

554


 56%|█████▌    | 555/1000 [18:59<19:15,  2.60s/it]

555


 56%|█████▌    | 556/1000 [19:02<19:32,  2.64s/it]

556


 56%|█████▌    | 557/1000 [19:05<19:45,  2.68s/it]

557


 56%|█████▌    | 558/1000 [19:08<19:52,  2.70s/it]

558


 56%|█████▌    | 559/1000 [19:10<20:01,  2.72s/it]

559


 56%|█████▌    | 560/1000 [19:13<19:59,  2.73s/it]

560


 56%|█████▌    | 561/1000 [19:16<19:58,  2.73s/it]

561


 56%|█████▌    | 562/1000 [19:19<20:02,  2.75s/it]

562


 56%|█████▋    | 563/1000 [19:21<19:48,  2.72s/it]

563


 56%|█████▋    | 564/1000 [19:24<19:26,  2.67s/it]

564


 56%|█████▋    | 565/1000 [19:27<19:30,  2.69s/it]

565


 57%|█████▋    | 566/1000 [19:29<19:34,  2.71s/it]

566


 57%|█████▋    | 567/1000 [19:32<19:40,  2.73s/it]

567


 57%|█████▋    | 568/1000 [19:35<19:40,  2.73s/it]

568


 57%|█████▋    | 569/1000 [19:38<19:37,  2.73s/it]

569


 57%|█████▋    | 570/1000 [19:40<19:32,  2.73s/it]

570


 57%|█████▋    | 571/1000 [19:43<19:18,  2.70s/it]

571


 57%|█████▋    | 572/1000 [19:46<19:08,  2.68s/it]

572


 57%|█████▋    | 573/1000 [19:48<19:16,  2.71s/it]

573


 57%|█████▋    | 574/1000 [19:50<17:28,  2.46s/it]

574


 57%|█████▊    | 575/1000 [19:53<17:55,  2.53s/it]

575


 58%|█████▊    | 576/1000 [19:56<18:07,  2.56s/it]

576


 58%|█████▊    | 577/1000 [19:58<18:24,  2.61s/it]

577


 58%|█████▊    | 578/1000 [20:01<18:31,  2.63s/it]

578


 58%|█████▊    | 579/1000 [20:04<18:42,  2.67s/it]

579


 58%|█████▊    | 580/1000 [20:06<18:41,  2.67s/it]

580


 58%|█████▊    | 581/1000 [20:09<18:45,  2.69s/it]

581


 58%|█████▊    | 582/1000 [20:12<18:32,  2.66s/it]

582


 58%|█████▊    | 583/1000 [20:14<17:21,  2.50s/it]

583


 58%|█████▊    | 584/1000 [20:17<17:43,  2.56s/it]

584


 58%|█████▊    | 585/1000 [20:19<18:08,  2.62s/it]

585


 59%|█████▊    | 586/1000 [20:22<18:06,  2.62s/it]

586


 59%|█████▊    | 587/1000 [20:24<17:43,  2.58s/it]

587


 59%|█████▉    | 588/1000 [20:27<17:19,  2.52s/it]

588


 59%|█████▉    | 589/1000 [20:29<16:59,  2.48s/it]

589


 59%|█████▉    | 590/1000 [20:31<14:55,  2.18s/it]

590


 59%|█████▉    | 591/1000 [20:33<15:37,  2.29s/it]

591


 59%|█████▉    | 592/1000 [20:36<16:18,  2.40s/it]

592


 59%|█████▉    | 593/1000 [20:39<17:06,  2.52s/it]

593


 59%|█████▉    | 594/1000 [20:41<17:32,  2.59s/it]

594


 60%|█████▉    | 595/1000 [20:44<17:49,  2.64s/it]

595


 60%|█████▉    | 596/1000 [20:47<17:23,  2.58s/it]

596


 60%|█████▉    | 597/1000 [20:49<17:11,  2.56s/it]

597


 60%|█████▉    | 598/1000 [20:52<17:34,  2.62s/it]

598


 60%|█████▉    | 599/1000 [20:55<17:41,  2.65s/it]

599


 60%|██████    | 600/1000 [20:57<17:47,  2.67s/it]

600


 60%|██████    | 601/1000 [21:00<17:52,  2.69s/it]

601


 60%|██████    | 602/1000 [21:03<17:55,  2.70s/it]

602


 60%|██████    | 603/1000 [21:06<17:57,  2.71s/it]

603


 60%|██████    | 604/1000 [21:08<17:51,  2.71s/it]

604


 60%|██████    | 605/1000 [21:11<17:43,  2.69s/it]

605


 61%|██████    | 606/1000 [21:14<17:33,  2.67s/it]

606


 61%|██████    | 607/1000 [21:16<17:36,  2.69s/it]

607


 61%|██████    | 608/1000 [21:19<17:31,  2.68s/it]

608


 61%|██████    | 609/1000 [21:22<17:45,  2.72s/it]

609


 61%|██████    | 610/1000 [21:24<17:18,  2.66s/it]

610


 61%|██████    | 611/1000 [21:27<17:32,  2.71s/it]

611


 61%|██████    | 612/1000 [21:30<17:32,  2.71s/it]

612


 61%|██████▏   | 613/1000 [21:33<17:41,  2.74s/it]

613


 61%|██████▏   | 614/1000 [21:35<16:29,  2.56s/it]

614


 62%|██████▏   | 615/1000 [21:37<16:40,  2.60s/it]

615


 62%|██████▏   | 616/1000 [21:40<16:52,  2.64s/it]

616


 62%|██████▏   | 617/1000 [21:43<17:03,  2.67s/it]

617


 62%|██████▏   | 618/1000 [21:46<17:04,  2.68s/it]

618


 62%|██████▏   | 619/1000 [21:48<17:05,  2.69s/it]

619


 62%|██████▏   | 620/1000 [21:51<16:57,  2.68s/it]

620


 62%|██████▏   | 621/1000 [21:54<16:39,  2.64s/it]

621


 62%|██████▏   | 622/1000 [21:56<16:41,  2.65s/it]

622


 62%|██████▏   | 623/1000 [21:59<16:32,  2.63s/it]

623


 62%|██████▏   | 624/1000 [22:02<16:36,  2.65s/it]

624


 62%|██████▎   | 625/1000 [22:04<16:10,  2.59s/it]

625


 63%|██████▎   | 626/1000 [22:06<15:40,  2.52s/it]

626


 63%|██████▎   | 627/1000 [22:09<15:20,  2.47s/it]

627


 63%|██████▎   | 628/1000 [22:11<15:25,  2.49s/it]

628


 63%|██████▎   | 629/1000 [22:14<15:35,  2.52s/it]

629


 63%|██████▎   | 630/1000 [22:17<15:52,  2.58s/it]

630


 63%|██████▎   | 631/1000 [22:19<16:01,  2.61s/it]

631


 63%|██████▎   | 632/1000 [22:22<16:10,  2.64s/it]

632


 63%|██████▎   | 633/1000 [22:25<16:22,  2.68s/it]

633


 63%|██████▎   | 634/1000 [22:27<16:31,  2.71s/it]

634


 64%|██████▎   | 635/1000 [22:30<16:28,  2.71s/it]

635


 64%|██████▎   | 636/1000 [22:33<16:28,  2.72s/it]

636


 64%|██████▎   | 637/1000 [22:36<16:21,  2.70s/it]

637


 64%|██████▍   | 638/1000 [22:38<15:51,  2.63s/it]

638


 64%|██████▍   | 639/1000 [22:40<14:51,  2.47s/it]

639


 64%|██████▍   | 640/1000 [22:42<13:11,  2.20s/it]

640


 64%|██████▍   | 641/1000 [22:44<13:56,  2.33s/it]

641


 64%|██████▍   | 642/1000 [22:47<14:35,  2.44s/it]

642


 64%|██████▍   | 643/1000 [22:50<14:51,  2.50s/it]

643


 64%|██████▍   | 644/1000 [22:52<15:03,  2.54s/it]

644


 64%|██████▍   | 645/1000 [22:55<14:44,  2.49s/it]

645


 65%|██████▍   | 646/1000 [22:57<15:09,  2.57s/it]

646


 65%|██████▍   | 647/1000 [23:00<15:30,  2.64s/it]

647


 65%|██████▍   | 648/1000 [23:03<15:26,  2.63s/it]

648


 65%|██████▍   | 649/1000 [23:05<15:19,  2.62s/it]

649


 65%|██████▌   | 650/1000 [23:08<15:09,  2.60s/it]

650


 65%|██████▌   | 651/1000 [23:11<15:05,  2.60s/it]

651


 65%|██████▌   | 652/1000 [23:13<14:50,  2.56s/it]

652


 65%|██████▌   | 653/1000 [23:16<14:48,  2.56s/it]

653


 65%|██████▌   | 654/1000 [23:18<14:52,  2.58s/it]

654


 66%|██████▌   | 655/1000 [23:21<14:56,  2.60s/it]

655


 66%|██████▌   | 656/1000 [23:23<14:47,  2.58s/it]

656


 66%|██████▌   | 657/1000 [23:25<13:41,  2.39s/it]

657


 66%|██████▌   | 658/1000 [23:28<14:13,  2.50s/it]

658


 66%|██████▌   | 659/1000 [23:31<14:31,  2.56s/it]

659


 66%|██████▌   | 660/1000 [23:33<13:15,  2.34s/it]

660


 66%|██████▌   | 661/1000 [23:35<13:52,  2.46s/it]

661


 66%|██████▌   | 662/1000 [23:37<13:07,  2.33s/it]

662


 66%|██████▋   | 663/1000 [23:39<12:28,  2.22s/it]

663


 66%|██████▋   | 664/1000 [23:42<13:15,  2.37s/it]

664


 66%|██████▋   | 665/1000 [23:45<13:48,  2.47s/it]

665


 67%|██████▋   | 666/1000 [23:48<14:15,  2.56s/it]

666


 67%|██████▋   | 667/1000 [23:50<14:14,  2.57s/it]

667


 67%|██████▋   | 668/1000 [23:53<14:21,  2.59s/it]

668


 67%|██████▋   | 669/1000 [23:55<13:06,  2.38s/it]

669


 67%|██████▋   | 670/1000 [23:57<12:54,  2.35s/it]

670


 67%|██████▋   | 671/1000 [24:00<13:27,  2.45s/it]

671


 67%|██████▋   | 672/1000 [24:02<13:46,  2.52s/it]

672


 67%|██████▋   | 673/1000 [24:05<14:09,  2.60s/it]

673


 67%|██████▋   | 674/1000 [24:08<14:25,  2.65s/it]

674


 68%|██████▊   | 675/1000 [24:11<14:37,  2.70s/it]

675


 68%|██████▊   | 676/1000 [24:13<14:42,  2.72s/it]

676


 68%|██████▊   | 677/1000 [24:16<14:49,  2.75s/it]

677


 68%|██████▊   | 678/1000 [24:19<14:53,  2.77s/it]

678


 68%|██████▊   | 679/1000 [24:22<14:58,  2.80s/it]

679


 68%|██████▊   | 680/1000 [24:25<15:08,  2.84s/it]

680


 68%|██████▊   | 681/1000 [24:28<15:10,  2.85s/it]

681


 68%|██████▊   | 682/1000 [24:31<15:01,  2.83s/it]

682


 68%|██████▊   | 683/1000 [24:33<14:52,  2.82s/it]

683


 68%|██████▊   | 684/1000 [24:36<14:43,  2.80s/it]

684


 68%|██████▊   | 685/1000 [24:39<14:32,  2.77s/it]

685


 69%|██████▊   | 686/1000 [24:42<14:24,  2.75s/it]

686


 69%|██████▊   | 687/1000 [24:44<14:11,  2.72s/it]

687


 69%|██████▉   | 688/1000 [24:47<13:48,  2.65s/it]

688


 69%|██████▉   | 689/1000 [24:49<13:40,  2.64s/it]

689


 69%|██████▉   | 690/1000 [24:52<13:49,  2.67s/it]

690


 69%|██████▉   | 691/1000 [24:55<13:53,  2.70s/it]

691


 69%|██████▉   | 692/1000 [24:57<13:48,  2.69s/it]

692


 69%|██████▉   | 693/1000 [25:00<13:50,  2.71s/it]

693


 69%|██████▉   | 694/1000 [25:03<13:53,  2.72s/it]

694


 70%|██████▉   | 695/1000 [25:06<13:58,  2.75s/it]

695


 70%|██████▉   | 696/1000 [25:09<13:56,  2.75s/it]

696


 70%|██████▉   | 697/1000 [25:11<13:55,  2.76s/it]

697


 70%|██████▉   | 698/1000 [25:14<13:56,  2.77s/it]

698


 70%|██████▉   | 699/1000 [25:17<13:54,  2.77s/it]

699


 70%|███████   | 700/1000 [25:20<13:49,  2.76s/it]

700


 70%|███████   | 701/1000 [25:22<13:49,  2.77s/it]

701


 70%|███████   | 702/1000 [25:25<13:07,  2.64s/it]

702


 70%|███████   | 703/1000 [25:27<12:17,  2.48s/it]

703


 70%|███████   | 704/1000 [25:30<12:45,  2.59s/it]

704


 70%|███████   | 705/1000 [25:32<12:55,  2.63s/it]

705


 71%|███████   | 706/1000 [25:35<13:06,  2.67s/it]

706


 71%|███████   | 707/1000 [25:38<13:05,  2.68s/it]

707


 71%|███████   | 708/1000 [25:41<12:57,  2.66s/it]

708


 71%|███████   | 709/1000 [25:43<12:44,  2.63s/it]

709


 71%|███████   | 710/1000 [25:46<12:39,  2.62s/it]

710


 71%|███████   | 711/1000 [25:48<12:31,  2.60s/it]

711


 71%|███████   | 712/1000 [25:51<12:30,  2.61s/it]

712


 71%|███████▏  | 713/1000 [25:53<12:24,  2.60s/it]

713


 71%|███████▏  | 714/1000 [25:56<12:24,  2.60s/it]

714


 72%|███████▏  | 715/1000 [25:59<12:25,  2.61s/it]

715


 72%|███████▏  | 716/1000 [26:01<12:23,  2.62s/it]

716


 72%|███████▏  | 717/1000 [26:04<12:23,  2.63s/it]

717


 72%|███████▏  | 718/1000 [26:06<11:09,  2.37s/it]

718


 72%|███████▏  | 719/1000 [26:08<11:33,  2.47s/it]

719


 72%|███████▏  | 720/1000 [26:11<11:52,  2.55s/it]

720


 72%|███████▏  | 721/1000 [26:14<12:07,  2.61s/it]

721


 72%|███████▏  | 722/1000 [26:17<12:12,  2.63s/it]

722


 72%|███████▏  | 723/1000 [26:19<12:13,  2.65s/it]

723


 72%|███████▏  | 724/1000 [26:22<12:07,  2.64s/it]

724


 72%|███████▎  | 725/1000 [26:24<11:51,  2.59s/it]

725


 73%|███████▎  | 726/1000 [26:27<11:55,  2.61s/it]

726


 73%|███████▎  | 727/1000 [26:30<12:00,  2.64s/it]

727


 73%|███████▎  | 728/1000 [26:32<12:07,  2.67s/it]

728


 73%|███████▎  | 729/1000 [26:35<12:08,  2.69s/it]

729


 73%|███████▎  | 730/1000 [26:38<12:07,  2.70s/it]

730


 73%|███████▎  | 731/1000 [26:41<12:05,  2.70s/it]

731


 73%|███████▎  | 732/1000 [26:43<12:04,  2.70s/it]

732


 73%|███████▎  | 733/1000 [26:46<12:00,  2.70s/it]

733


 73%|███████▎  | 734/1000 [26:49<11:55,  2.69s/it]

734


 74%|███████▎  | 735/1000 [26:51<11:52,  2.69s/it]

735


 74%|███████▎  | 736/1000 [26:54<11:44,  2.67s/it]

736


 74%|███████▎  | 737/1000 [26:57<11:42,  2.67s/it]

737


 74%|███████▍  | 738/1000 [26:59<11:45,  2.69s/it]

738


 74%|███████▍  | 739/1000 [27:02<11:43,  2.70s/it]

739


 74%|███████▍  | 740/1000 [27:05<11:40,  2.69s/it]

740


 74%|███████▍  | 741/1000 [27:07<11:32,  2.68s/it]

741


 74%|███████▍  | 742/1000 [27:10<11:28,  2.67s/it]

742


 74%|███████▍  | 743/1000 [27:13<11:25,  2.67s/it]

743


 74%|███████▍  | 744/1000 [27:15<11:24,  2.67s/it]

744


 74%|███████▍  | 745/1000 [27:18<11:18,  2.66s/it]

745


 75%|███████▍  | 746/1000 [27:21<11:09,  2.64s/it]

746


 75%|███████▍  | 747/1000 [27:23<10:22,  2.46s/it]

747


 75%|███████▍  | 748/1000 [27:25<10:34,  2.52s/it]

748


 75%|███████▍  | 749/1000 [27:28<10:39,  2.55s/it]

749


 75%|███████▌  | 750/1000 [27:31<10:44,  2.58s/it]

750


 75%|███████▌  | 751/1000 [27:33<10:05,  2.43s/it]

751


 75%|███████▌  | 752/1000 [27:35<10:14,  2.48s/it]

752


 75%|███████▌  | 753/1000 [27:38<10:28,  2.54s/it]

753


 75%|███████▌  | 754/1000 [27:41<10:29,  2.56s/it]

754


 76%|███████▌  | 755/1000 [27:43<10:30,  2.57s/it]

755


 76%|███████▌  | 756/1000 [27:46<10:33,  2.60s/it]

756


 76%|███████▌  | 757/1000 [27:49<10:42,  2.64s/it]

757


 76%|███████▌  | 758/1000 [27:51<10:40,  2.65s/it]

758


 76%|███████▌  | 759/1000 [27:54<10:38,  2.65s/it]

759


 76%|███████▌  | 760/1000 [27:57<10:36,  2.65s/it]

760


 76%|███████▌  | 761/1000 [27:59<10:33,  2.65s/it]

761


 76%|███████▌  | 762/1000 [28:02<10:27,  2.64s/it]

762


 76%|███████▋  | 763/1000 [28:04<10:26,  2.64s/it]

763


 76%|███████▋  | 764/1000 [28:07<10:20,  2.63s/it]

764


 76%|███████▋  | 765/1000 [28:10<10:16,  2.62s/it]

765


 77%|███████▋  | 766/1000 [28:12<10:12,  2.62s/it]

766


 77%|███████▋  | 767/1000 [28:15<09:55,  2.56s/it]

767


 77%|███████▋  | 768/1000 [28:17<09:59,  2.59s/it]

768


 77%|███████▋  | 769/1000 [28:20<10:12,  2.65s/it]

769


 77%|███████▋  | 770/1000 [28:23<10:14,  2.67s/it]

770


 77%|███████▋  | 771/1000 [28:26<10:12,  2.67s/it]

771


 77%|███████▋  | 772/1000 [28:28<10:20,  2.72s/it]

772


 77%|███████▋  | 773/1000 [28:31<10:13,  2.70s/it]

773


 77%|███████▋  | 774/1000 [28:34<10:15,  2.72s/it]

774


 78%|███████▊  | 775/1000 [28:37<10:18,  2.75s/it]

775


 78%|███████▊  | 776/1000 [28:39<10:19,  2.77s/it]

776


 78%|███████▊  | 777/1000 [28:42<10:15,  2.76s/it]

777


 78%|███████▊  | 778/1000 [28:45<10:07,  2.74s/it]

778


 78%|███████▊  | 779/1000 [28:47<09:56,  2.70s/it]

779


 78%|███████▊  | 780/1000 [28:50<09:48,  2.68s/it]

780


 78%|███████▊  | 781/1000 [28:53<09:47,  2.68s/it]

781


 78%|███████▊  | 782/1000 [28:55<09:36,  2.64s/it]

782


 78%|███████▊  | 783/1000 [28:58<09:29,  2.62s/it]

783


 78%|███████▊  | 784/1000 [29:01<09:23,  2.61s/it]

784


 78%|███████▊  | 785/1000 [29:03<09:20,  2.61s/it]

785


 79%|███████▊  | 786/1000 [29:06<09:17,  2.60s/it]

786


 79%|███████▊  | 787/1000 [29:08<09:14,  2.60s/it]

787


 79%|███████▉  | 788/1000 [29:11<09:13,  2.61s/it]

788


 79%|███████▉  | 789/1000 [29:14<09:17,  2.64s/it]

789


 79%|███████▉  | 790/1000 [29:16<09:17,  2.66s/it]

790


 79%|███████▉  | 791/1000 [29:19<09:17,  2.67s/it]

791


 79%|███████▉  | 792/1000 [29:22<09:17,  2.68s/it]

792


 79%|███████▉  | 793/1000 [29:24<09:16,  2.69s/it]

793


 79%|███████▉  | 794/1000 [29:27<09:11,  2.68s/it]

794


 80%|███████▉  | 795/1000 [29:30<09:08,  2.67s/it]

795


 80%|███████▉  | 796/1000 [29:32<09:06,  2.68s/it]

796


 80%|███████▉  | 797/1000 [29:35<08:58,  2.65s/it]

797


 80%|███████▉  | 798/1000 [29:38<08:55,  2.65s/it]

798


 80%|███████▉  | 799/1000 [29:40<08:51,  2.65s/it]

799


 80%|████████  | 800/1000 [29:43<08:53,  2.67s/it]

800


 80%|████████  | 801/1000 [29:46<08:54,  2.69s/it]

801


 80%|████████  | 802/1000 [29:49<08:54,  2.70s/it]

802


 80%|████████  | 803/1000 [29:51<08:50,  2.69s/it]

803


 80%|████████  | 804/1000 [29:54<08:53,  2.72s/it]

804


 80%|████████  | 805/1000 [29:57<08:59,  2.77s/it]

805


 81%|████████  | 806/1000 [30:00<09:04,  2.81s/it]

806


 81%|████████  | 807/1000 [30:03<09:00,  2.80s/it]

807


 81%|████████  | 808/1000 [30:05<08:35,  2.69s/it]

808


 81%|████████  | 809/1000 [30:08<08:38,  2.71s/it]

809


 81%|████████  | 810/1000 [30:11<08:38,  2.73s/it]

810


 81%|████████  | 811/1000 [30:13<08:31,  2.71s/it]

811


 81%|████████  | 812/1000 [30:15<07:57,  2.54s/it]

812


 81%|████████▏ | 813/1000 [30:18<07:59,  2.56s/it]

813


 81%|████████▏ | 814/1000 [30:21<08:00,  2.58s/it]

814


 82%|████████▏ | 815/1000 [30:23<07:50,  2.54s/it]

815


 82%|████████▏ | 816/1000 [30:26<07:52,  2.57s/it]

816


 82%|████████▏ | 817/1000 [30:28<07:55,  2.60s/it]

817


 82%|████████▏ | 818/1000 [30:30<06:47,  2.24s/it]

818


 82%|████████▏ | 819/1000 [30:32<07:08,  2.37s/it]

819


 82%|████████▏ | 820/1000 [30:35<07:19,  2.44s/it]

820


 82%|████████▏ | 821/1000 [30:38<07:33,  2.53s/it]

821


 82%|████████▏ | 822/1000 [30:41<07:46,  2.62s/it]

822


 82%|████████▏ | 823/1000 [30:43<07:54,  2.68s/it]

823


 82%|████████▏ | 824/1000 [30:46<07:54,  2.70s/it]

824


 82%|████████▎ | 825/1000 [30:49<07:56,  2.72s/it]

825


 83%|████████▎ | 826/1000 [30:52<07:53,  2.72s/it]

826


 83%|████████▎ | 827/1000 [30:54<07:50,  2.72s/it]

827


 83%|████████▎ | 828/1000 [30:57<07:45,  2.71s/it]

828


 83%|████████▎ | 829/1000 [31:00<07:43,  2.71s/it]

829


 83%|████████▎ | 830/1000 [31:02<07:39,  2.70s/it]

830


 83%|████████▎ | 831/1000 [31:05<07:36,  2.70s/it]

831


 83%|████████▎ | 832/1000 [31:08<07:33,  2.70s/it]

832


 83%|████████▎ | 833/1000 [31:11<07:31,  2.70s/it]

833


 83%|████████▎ | 834/1000 [31:13<07:30,  2.72s/it]

834


 84%|████████▎ | 835/1000 [31:16<07:31,  2.74s/it]

835


 84%|████████▎ | 836/1000 [31:19<07:28,  2.73s/it]

836


 84%|████████▎ | 837/1000 [31:21<07:24,  2.73s/it]

837


 84%|████████▍ | 838/1000 [31:24<07:22,  2.73s/it]

838


 84%|████████▍ | 839/1000 [31:27<07:17,  2.72s/it]

839


 84%|████████▍ | 840/1000 [31:30<07:18,  2.74s/it]

840


 84%|████████▍ | 841/1000 [31:32<07:18,  2.76s/it]

841


 84%|████████▍ | 842/1000 [31:35<07:13,  2.75s/it]

842


 84%|████████▍ | 843/1000 [31:38<07:12,  2.75s/it]

843


 84%|████████▍ | 844/1000 [31:41<07:10,  2.76s/it]

844


 84%|████████▍ | 845/1000 [31:44<07:07,  2.76s/it]

845


 85%|████████▍ | 846/1000 [31:46<07:01,  2.74s/it]

846


 85%|████████▍ | 847/1000 [31:49<06:59,  2.74s/it]

847


 85%|████████▍ | 848/1000 [31:52<06:58,  2.75s/it]

848


 85%|████████▍ | 849/1000 [31:55<06:57,  2.76s/it]

849


 85%|████████▌ | 850/1000 [31:57<06:51,  2.75s/it]

850


 85%|████████▌ | 851/1000 [32:00<06:45,  2.72s/it]

851


 85%|████████▌ | 852/1000 [32:03<06:40,  2.70s/it]

852


 85%|████████▌ | 853/1000 [32:05<06:37,  2.71s/it]

853


 85%|████████▌ | 854/1000 [32:08<06:30,  2.68s/it]

854


 86%|████████▌ | 855/1000 [32:11<06:30,  2.70s/it]

855


 86%|████████▌ | 856/1000 [32:13<06:33,  2.73s/it]

856


 86%|████████▌ | 857/1000 [32:16<06:27,  2.71s/it]

857


 86%|████████▌ | 858/1000 [32:19<06:19,  2.67s/it]

858


 86%|████████▌ | 859/1000 [32:21<06:09,  2.62s/it]

859


 86%|████████▌ | 860/1000 [32:24<06:01,  2.59s/it]

860


 86%|████████▌ | 861/1000 [32:26<05:55,  2.56s/it]

861


 86%|████████▌ | 862/1000 [32:29<05:57,  2.59s/it]

862


 86%|████████▋ | 863/1000 [32:31<05:45,  2.52s/it]

863


 86%|████████▋ | 864/1000 [32:34<05:47,  2.55s/it]

864


 86%|████████▋ | 865/1000 [32:36<05:48,  2.58s/it]

865


 87%|████████▋ | 866/1000 [32:39<05:45,  2.58s/it]

866


 87%|████████▋ | 867/1000 [32:42<05:41,  2.57s/it]

867


 87%|████████▋ | 868/1000 [32:44<05:41,  2.59s/it]

868


 87%|████████▋ | 869/1000 [32:47<05:40,  2.60s/it]

869


 87%|████████▋ | 870/1000 [32:49<05:38,  2.60s/it]

870


 87%|████████▋ | 871/1000 [32:52<05:36,  2.61s/it]

871


 87%|████████▋ | 872/1000 [32:55<05:32,  2.60s/it]

872


 87%|████████▋ | 873/1000 [32:57<05:27,  2.58s/it]

873


 87%|████████▋ | 874/1000 [33:00<05:23,  2.57s/it]

874


 88%|████████▊ | 875/1000 [33:02<05:22,  2.58s/it]

875


 88%|████████▊ | 876/1000 [33:05<05:23,  2.61s/it]

876


 88%|████████▊ | 877/1000 [33:08<05:23,  2.63s/it]

877


 88%|████████▊ | 878/1000 [33:10<05:20,  2.62s/it]

878


 88%|████████▊ | 879/1000 [33:13<05:20,  2.65s/it]

879


 88%|████████▊ | 880/1000 [33:16<05:15,  2.63s/it]

880


 88%|████████▊ | 881/1000 [33:18<05:11,  2.62s/it]

881


 88%|████████▊ | 882/1000 [33:21<05:07,  2.60s/it]

882


 88%|████████▊ | 883/1000 [33:23<05:04,  2.60s/it]

883


 88%|████████▊ | 884/1000 [33:26<05:00,  2.59s/it]

884


 88%|████████▊ | 885/1000 [33:28<04:55,  2.57s/it]

885


 89%|████████▊ | 886/1000 [33:31<04:57,  2.61s/it]

886


 89%|████████▊ | 887/1000 [33:34<04:56,  2.62s/it]

887


 89%|████████▉ | 888/1000 [33:37<04:57,  2.66s/it]

888


 89%|████████▉ | 889/1000 [33:39<04:57,  2.68s/it]

889


 89%|████████▉ | 890/1000 [33:42<04:52,  2.66s/it]

890


 89%|████████▉ | 891/1000 [33:45<04:54,  2.71s/it]

891


 89%|████████▉ | 892/1000 [33:47<04:53,  2.72s/it]

892


 89%|████████▉ | 893/1000 [33:50<04:49,  2.70s/it]

893


 89%|████████▉ | 894/1000 [33:53<04:45,  2.69s/it]

894


 90%|████████▉ | 895/1000 [33:55<04:43,  2.70s/it]

895


 90%|████████▉ | 896/1000 [33:58<04:38,  2.68s/it]

896


 90%|████████▉ | 897/1000 [34:01<04:34,  2.67s/it]

897


 90%|████████▉ | 898/1000 [34:03<04:30,  2.65s/it]

898


 90%|████████▉ | 899/1000 [34:06<04:26,  2.64s/it]

899


 90%|█████████ | 900/1000 [34:08<04:08,  2.48s/it]

900


 90%|█████████ | 901/1000 [34:11<04:10,  2.53s/it]

901


 90%|█████████ | 902/1000 [34:13<04:09,  2.55s/it]

902


 90%|█████████ | 903/1000 [34:16<04:08,  2.56s/it]

903


 90%|█████████ | 904/1000 [34:18<04:05,  2.56s/it]

904


 90%|█████████ | 905/1000 [34:21<04:03,  2.56s/it]

905


 91%|█████████ | 906/1000 [34:24<04:01,  2.57s/it]

906


 91%|█████████ | 907/1000 [34:26<03:58,  2.56s/it]

907


 91%|█████████ | 908/1000 [34:29<04:03,  2.65s/it]

908


 91%|█████████ | 909/1000 [34:32<04:01,  2.65s/it]

909


 91%|█████████ | 910/1000 [34:34<03:57,  2.63s/it]

910


 91%|█████████ | 911/1000 [34:36<03:37,  2.44s/it]

911


 91%|█████████ | 912/1000 [34:39<03:40,  2.51s/it]

912


 91%|█████████▏| 913/1000 [34:42<03:40,  2.53s/it]

913


 91%|█████████▏| 914/1000 [34:44<03:43,  2.60s/it]

914


 92%|█████████▏| 915/1000 [34:47<03:45,  2.65s/it]

915


 92%|█████████▏| 916/1000 [34:50<03:40,  2.62s/it]

916


 92%|█████████▏| 917/1000 [34:52<03:39,  2.65s/it]

917


 92%|█████████▏| 918/1000 [34:55<03:39,  2.68s/it]

918


 92%|█████████▏| 919/1000 [34:58<03:33,  2.63s/it]

919


 92%|█████████▏| 920/1000 [35:00<03:32,  2.66s/it]

920


 92%|█████████▏| 921/1000 [35:03<03:31,  2.67s/it]

921


 92%|█████████▏| 922/1000 [35:06<03:30,  2.70s/it]

922


 92%|█████████▏| 923/1000 [35:08<03:27,  2.70s/it]

923


 92%|█████████▏| 924/1000 [35:11<03:24,  2.68s/it]

924


 92%|█████████▎| 925/1000 [35:14<03:19,  2.67s/it]

925


 93%|█████████▎| 926/1000 [35:16<03:17,  2.67s/it]

926


 93%|█████████▎| 927/1000 [35:19<03:16,  2.70s/it]

927


 93%|█████████▎| 928/1000 [35:22<03:13,  2.69s/it]

928


 93%|█████████▎| 929/1000 [35:24<03:09,  2.67s/it]

929


 93%|█████████▎| 930/1000 [35:27<03:04,  2.64s/it]

930


 93%|█████████▎| 931/1000 [35:30<02:59,  2.61s/it]

931


 93%|█████████▎| 932/1000 [35:32<02:57,  2.62s/it]

932


 93%|█████████▎| 933/1000 [35:35<02:53,  2.59s/it]

933


 93%|█████████▎| 934/1000 [35:37<02:51,  2.60s/it]

934


 94%|█████████▎| 935/1000 [35:39<02:36,  2.41s/it]

935


 94%|█████████▎| 936/1000 [35:42<02:38,  2.47s/it]

936


 94%|█████████▎| 937/1000 [35:45<02:38,  2.51s/it]

937


 94%|█████████▍| 938/1000 [35:47<02:37,  2.54s/it]

938


 94%|█████████▍| 939/1000 [35:50<02:36,  2.56s/it]

939


 94%|█████████▍| 940/1000 [35:52<02:34,  2.58s/it]

940


 94%|█████████▍| 941/1000 [35:55<02:34,  2.62s/it]

941


 94%|█████████▍| 942/1000 [35:58<02:29,  2.57s/it]

942


 94%|█████████▍| 943/1000 [36:00<02:23,  2.51s/it]

943


 94%|█████████▍| 944/1000 [36:03<02:22,  2.54s/it]

944


 94%|█████████▍| 945/1000 [36:05<02:21,  2.57s/it]

945


 95%|█████████▍| 946/1000 [36:08<02:18,  2.57s/it]

946


 95%|█████████▍| 947/1000 [36:10<02:17,  2.59s/it]

947


 95%|█████████▍| 948/1000 [36:13<02:15,  2.60s/it]

948


 95%|█████████▍| 949/1000 [36:15<02:03,  2.42s/it]

949


 95%|█████████▌| 950/1000 [36:18<02:01,  2.44s/it]

950


 95%|█████████▌| 951/1000 [36:20<02:01,  2.48s/it]

951


 95%|█████████▌| 952/1000 [36:23<02:01,  2.53s/it]

952


 95%|█████████▌| 953/1000 [36:25<02:01,  2.59s/it]

953


 95%|█████████▌| 954/1000 [36:28<02:01,  2.64s/it]

954


 96%|█████████▌| 955/1000 [36:31<01:59,  2.65s/it]

955


 96%|█████████▌| 956/1000 [36:34<01:56,  2.65s/it]

956


 96%|█████████▌| 957/1000 [36:36<01:55,  2.68s/it]

957


 96%|█████████▌| 958/1000 [36:39<01:51,  2.65s/it]

958


 96%|█████████▌| 959/1000 [36:42<01:48,  2.64s/it]

959


 96%|█████████▌| 960/1000 [36:44<01:45,  2.65s/it]

960


 96%|█████████▌| 961/1000 [36:47<01:43,  2.66s/it]

961


 96%|█████████▌| 962/1000 [36:50<01:41,  2.66s/it]

962


 96%|█████████▋| 963/1000 [36:52<01:38,  2.67s/it]

963


 96%|█████████▋| 964/1000 [36:55<01:35,  2.65s/it]

964


 96%|█████████▋| 965/1000 [36:57<01:32,  2.63s/it]

965


 97%|█████████▋| 966/1000 [37:00<01:29,  2.62s/it]

966


 97%|█████████▋| 967/1000 [37:03<01:26,  2.63s/it]

967


 97%|█████████▋| 968/1000 [37:05<01:23,  2.62s/it]

968


 97%|█████████▋| 969/1000 [37:08<01:21,  2.62s/it]

969


 97%|█████████▋| 970/1000 [37:10<01:18,  2.60s/it]

970


 97%|█████████▋| 971/1000 [37:13<01:14,  2.58s/it]

971


 97%|█████████▋| 972/1000 [37:16<01:12,  2.59s/it]

972


 97%|█████████▋| 973/1000 [37:18<01:09,  2.58s/it]

973


 97%|█████████▋| 974/1000 [37:21<01:07,  2.61s/it]

974


 98%|█████████▊| 975/1000 [37:24<01:06,  2.65s/it]

975


 98%|█████████▊| 976/1000 [37:26<01:04,  2.67s/it]

976


 98%|█████████▊| 977/1000 [37:29<01:02,  2.70s/it]

977


 98%|█████████▊| 978/1000 [37:32<00:59,  2.70s/it]

978


 98%|█████████▊| 979/1000 [37:35<00:56,  2.71s/it]

979


 98%|█████████▊| 980/1000 [37:37<00:52,  2.61s/it]

980


 98%|█████████▊| 981/1000 [37:40<00:50,  2.65s/it]

981


 98%|█████████▊| 982/1000 [37:42<00:48,  2.69s/it]

982


 98%|█████████▊| 983/1000 [37:45<00:45,  2.70s/it]

983


 98%|█████████▊| 984/1000 [37:48<00:42,  2.66s/it]

984


 98%|█████████▊| 985/1000 [37:50<00:39,  2.63s/it]

985


 99%|█████████▊| 986/1000 [37:53<00:36,  2.62s/it]

986


 99%|█████████▊| 987/1000 [37:55<00:33,  2.59s/it]

987


 99%|█████████▉| 988/1000 [37:58<00:31,  2.63s/it]

988


 99%|█████████▉| 989/1000 [38:01<00:28,  2.63s/it]

989


 99%|█████████▉| 990/1000 [38:03<00:26,  2.67s/it]

990


 99%|█████████▉| 991/1000 [38:06<00:23,  2.64s/it]

991


 99%|█████████▉| 992/1000 [38:09<00:22,  2.76s/it]

992


 99%|█████████▉| 993/1000 [38:13<00:20,  2.98s/it]

993


 99%|█████████▉| 994/1000 [38:17<00:19,  3.31s/it]

994


100%|█████████▉| 995/1000 [38:21<00:17,  3.55s/it]

995


100%|█████████▉| 996/1000 [38:25<00:14,  3.71s/it]

996


100%|█████████▉| 997/1000 [38:28<00:10,  3.62s/it]

997


100%|█████████▉| 998/1000 [38:32<00:07,  3.72s/it]

998


100%|█████████▉| 999/1000 [38:36<00:03,  3.75s/it]

999


100%|██████████| 1000/1000 [38:40<00:00,  2.32s/it]


In [6]:
env.close()

In [10]:
#testing loop
act = []
for i in tqdm(range(10)):
    print(i)
    state = env.reset()
    state = np.array(state,dtype = 'float32')
    last_rew = 0
    last_rew = np.array(last_rew,dtype = 'float32')
    for j in range(1000):
        env.render()
        if j == 0:
            obs = np.array([state,last_rew])
        action = agent47.step(obs,training = False)
        act.append(action)
        state,reward,done,info = env.step(action)
        state = np.array(state,dtype = 'float32')
        reward = np.array(reward,dtype = 'float32')
        obs = np.array([state,reward])

        
env.close()

  0%|          | 0/10 [00:00<?, ?it/s]

0


  0%|          | 0/10 [00:06<?, ?it/s]
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\ishtd\Anaconda3\envs\deeplearning\lib\site-packages\IPython\core\interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-0ffed353310c>", line 10, in <module>
    env.render()
  File "C:\Users\ishtd\Anaconda3\envs\deeplearning\lib\site-packages\gym\core.py", line 233, in render
    return self.env.render(mode, **kwargs)
  File "C:\Users\ishtd\Anaconda3\envs\deeplearning\lib\site-packages\gym\envs\classic_control\mountain_car.py", line 119, in render
    return self.viewer.render(return_rgb_array = mode=='rgb_array')
  File "C:\Users\ishtd\Anaconda3\envs\deeplearning\lib\site-packages\gym\envs\classic_control\rendering.py", line 124, in render
    self.window.flip()
  File "C:\Users\ishtd\Anaconda3\envs\deeplearning\lib\site-packages\pyglet\window\win32\__init__.py", line 336, in flip
    _dwmapi.DwmFlush()
KeyboardInterrupt

During handling of the above exception, ano

TypeError: object of type 'NoneType' has no len()

In [11]:
env.close()

ERROR! Session/line number was not unique in database. History logging moved to new session 753


In [12]:
act

[array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=int64),
 array(0, dtype=

In [15]:
for i in act:
    print(i)

2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2


In [16]:
len(act)

3128

## KERAS RL

In [13]:
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents import SARSAAgent
from rl.policy import BoltzmannQPolicy


Using TensorFlow backend.


In [19]:
ENV_NAME = 'MountainCar-v0'

# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 2)                 0         
_________________________________________________________________
dense_5 (Dense)              (None, 16)                48        
_________________________________________________________________
activation_5 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_6 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_7 (Activation)    (None, 16)               

In [20]:
# SARSA does not require a memory.
policy = BoltzmannQPolicy()
sarsa = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy)
sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

In [22]:
sarsa.fit(env, nb_steps=50000, visualize=False, verbose=2)

Training for 50000 steps ...
   200/50000: episode: 1, duration: 1.589s, episode steps: 200, steps per second: 126, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.155 [0.000, 2.000], mean observation: -0.254 [-0.584, 0.008], loss: 22.363530, mae: 54.889876, mean_q: -81.644252
   400/50000: episode: 2, duration: 1.641s, episode steps: 200, steps per second: 122, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.285 [0.000, 2.000], mean observation: -0.245 [-0.650, 0.020], loss: 22.040751, mae: 57.202113, mean_q: -84.224867
   600/50000: episode: 3, duration: 1.653s, episode steps: 200, steps per second: 121, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.360 [0.000, 2.000], mean observation: -0.235 [-0.626, 0.017], loss: 19.601438, mae: 53.201671, mean_q: -78.640333
   800/50000: episode: 4, duration: 1.652s, episode steps: 200, steps per second: 121, episode reward: -200.000, mean reward: -1.00

  6200/50000: episode: 31, duration: 1.651s, episode steps: 200, steps per second: 121, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.395 [0.000, 2.000], mean observation: -0.233 [-0.567, 0.008], loss: 12.536173, mae: 43.032049, mean_q: -63.197145
  6400/50000: episode: 32, duration: 1.637s, episode steps: 200, steps per second: 122, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 0.685 [0.000, 2.000], mean observation: -0.289 [-0.901, 0.027], loss: 17.831770, mae: 46.627816, mean_q: -68.536005
  6600/50000: episode: 33, duration: 1.627s, episode steps: 200, steps per second: 123, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.085 [0.000, 2.000], mean observation: -0.254 [-0.565, 0.008], loss: 9.281157, mae: 43.990830, mean_q: -64.109612
  6800/50000: episode: 34, duration: 1.680s, episode steps: 200, steps per second: 119, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean a

 12200/50000: episode: 61, duration: 1.700s, episode steps: 200, steps per second: 118, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 0.840 [0.000, 2.000], mean observation: -0.279 [-0.842, 0.025], loss: 16.090722, mae: 45.989771, mean_q: -67.775025
 12400/50000: episode: 62, duration: 1.682s, episode steps: 200, steps per second: 119, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 0.675 [0.000, 2.000], mean observation: -0.290 [-0.787, 0.015], loss: 13.983935, mae: 46.208318, mean_q: -67.724652
 12600/50000: episode: 63, duration: 1.688s, episode steps: 200, steps per second: 119, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.100 [0.000, 2.000], mean observation: -0.256 [-0.612, 0.008], loss: 10.133814, mae: 43.539270, mean_q: -64.196401
 12800/50000: episode: 64, duration: 1.646s, episode steps: 200, steps per second: 122, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean 

 18200/50000: episode: 91, duration: 1.698s, episode steps: 200, steps per second: 118, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.405 [0.000, 2.000], mean observation: -0.233 [-0.587, 0.008], loss: 10.855528, mae: 43.390440, mean_q: -63.658402
 18400/50000: episode: 92, duration: 1.745s, episode steps: 200, steps per second: 115, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 0.860 [0.000, 2.000], mean observation: -0.278 [-0.817, 0.024], loss: 14.494032, mae: 46.160267, mean_q: -68.018954
 18600/50000: episode: 93, duration: 1.797s, episode steps: 200, steps per second: 111, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.030 [0.000, 2.000], mean observation: -0.260 [-0.615, 0.009], loss: 11.467219, mae: 44.753378, mean_q: -65.762755
 18800/50000: episode: 94, duration: 1.825s, episode steps: 200, steps per second: 110, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean 

 24200/50000: episode: 121, duration: 1.622s, episode steps: 200, steps per second: 123, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 0.990 [0.000, 2.000], mean observation: -0.258 [-0.602, 0.010], loss: 12.803362, mae: 45.066047, mean_q: -66.347507
 24400/50000: episode: 122, duration: 1.675s, episode steps: 200, steps per second: 119, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 0.900 [0.000, 2.000], mean observation: -0.271 [-0.727, 0.015], loss: 14.123809, mae: 45.752264, mean_q: -67.359591
 24600/50000: episode: 123, duration: 1.663s, episode steps: 200, steps per second: 120, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.310 [0.000, 2.000], mean observation: -0.236 [-0.654, 0.017], loss: 10.172039, mae: 43.363581, mean_q: -63.748072
 24800/50000: episode: 124, duration: 1.660s, episode steps: 200, steps per second: 121, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], m

 30200/50000: episode: 151, duration: 1.649s, episode steps: 200, steps per second: 121, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.450 [0.000, 2.000], mean observation: -0.226 [-0.588, 0.013], loss: 12.341633, mae: 43.064786, mean_q: -63.402677
 30400/50000: episode: 152, duration: 1.685s, episode steps: 200, steps per second: 119, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.420 [0.000, 2.000], mean observation: -0.229 [-0.623, 0.011], loss: 10.673128, mae: 43.434006, mean_q: -63.690645
 30600/50000: episode: 153, duration: 1.686s, episode steps: 200, steps per second: 119, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.320 [0.000, 2.000], mean observation: -0.234 [-0.733, 0.021], loss: 8.928185, mae: 43.599091, mean_q: -64.281562
 30800/50000: episode: 154, duration: 1.661s, episode steps: 200, steps per second: 120, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], me

 36200/50000: episode: 181, duration: 1.661s, episode steps: 200, steps per second: 120, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 0.950 [0.000, 2.000], mean observation: -0.272 [-0.751, 0.019], loss: 14.044078, mae: 45.256840, mean_q: -66.912602
 36400/50000: episode: 182, duration: 1.652s, episode steps: 200, steps per second: 121, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.485 [0.000, 2.000], mean observation: -0.225 [-0.592, 0.011], loss: 11.729991, mae: 42.676716, mean_q: -62.369840
 36600/50000: episode: 183, duration: 1.700s, episode steps: 200, steps per second: 118, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.040 [0.000, 2.000], mean observation: -0.264 [-0.701, 0.012], loss: 13.060466, mae: 44.952912, mean_q: -66.405325
 36800/50000: episode: 184, duration: 1.753s, episode steps: 200, steps per second: 114, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], m

 42200/50000: episode: 211, duration: 1.652s, episode steps: 200, steps per second: 121, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 0.885 [0.000, 2.000], mean observation: -0.276 [-0.776, 0.021], loss: 12.032395, mae: 45.565283, mean_q: -67.164056
 42400/50000: episode: 212, duration: 1.650s, episode steps: 200, steps per second: 121, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.190 [0.000, 2.000], mean observation: -0.250 [-0.614, 0.005], loss: 12.948334, mae: 44.056827, mean_q: -65.036085
 42600/50000: episode: 213, duration: 1.641s, episode steps: 200, steps per second: 122, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.355 [0.000, 2.000], mean observation: -0.236 [-0.571, 0.006], loss: 10.862045, mae: 43.367203, mean_q: -63.645797
 42800/50000: episode: 214, duration: 1.650s, episode steps: 200, steps per second: 121, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], m

 48200/50000: episode: 241, duration: 1.642s, episode steps: 200, steps per second: 122, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 1.410 [0.000, 2.000], mean observation: -0.234 [-0.661, 0.014], loss: 9.280254, mae: 43.534326, mean_q: -63.963901
 48400/50000: episode: 242, duration: 1.652s, episode steps: 200, steps per second: 121, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 0.890 [0.000, 2.000], mean observation: -0.271 [-0.692, 0.017], loss: 13.916360, mae: 45.471722, mean_q: -67.316790
 48600/50000: episode: 243, duration: 1.670s, episode steps: 200, steps per second: 120, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], mean action: 0.810 [0.000, 2.000], mean observation: -0.274 [-0.699, 0.014], loss: 14.443666, mae: 45.227689, mean_q: -66.780255
 48800/50000: episode: 244, duration: 1.678s, episode steps: 200, steps per second: 119, episode reward: -200.000, mean reward: -1.000 [-1.000, -1.000], me

<keras.callbacks.callbacks.History at 0x25c66593cf8>

In [23]:
# Finally, evaluate our algorithm for 5 episodes.
sarsa.test(env, nb_episodes=5, visualize=True)

Testing for 5 episodes ...
Episode 1: reward: -200.000, steps: 200
Episode 2: reward: -200.000, steps: 200
Episode 3: reward: -200.000, steps: 200
Episode 4: reward: -200.000, steps: 200
Episode 5: reward: -200.000, steps: 200


<keras.callbacks.callbacks.History at 0x25c6811b080>

In [24]:
env.close()