In [1]:
import gym
import tensorflow as tf
import numpy as np
from tqdm import tqdm
from collections import deque

In [2]:
# keras model approach
from tensorflow.keras import Model,Sequential
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Flatten,BatchNormalization,Dense, Input
from tensorflow.keras.activations import relu

In [3]:
# ADDING THE CODE SO THAT TENSORFLOW DOES NOT EAT THE WHOLE GPU MEMORY
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
tf.keras.backend.set_floatx('float32')

In [4]:
env = gym.make('CartPole-v1')
env.action_space

Discrete(2)

In [5]:
adam = tf.keras.optimizers.Adam(learning_rate = 0.001)

In [6]:
def model_keras():
    
    inputs = Input(shape=(4,))
    
    x = Dense(100,activation='relu',kernel_initializer="glorot_uniform")(inputs)
    x = BatchNormalization()(x)
    x = Dense(100,activation='relu',kernel_initializer="glorot_uniform")(x)
    x = BatchNormalization()(x)
    x = Dense(100,activation='relu',kernel_initializer="glorot_uniform")(x)
    x = BatchNormalization()(x)
    x = Dense(100,activation='relu',kernel_initializer="glorot_uniform")(x)
    x = BatchNormalization()(x)
    output = Dense(2,activation='linear',kernel_initializer="glorot_uniform")(x)
    model = Model(inputs=inputs, outputs=output, name="RL_Value_Function")
    
    print(model.summary())
    
    model.compile(optimizer=adam,loss='mean_squared_error',metrics=['mean_squared_error'])
    
    return model
sample_model = model_keras()


Model: "RL_Value_Function"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 4)]               0         
_________________________________________________________________
dense (Dense)                (None, 100)               500       
_________________________________________________________________
batch_normalization (BatchNo (None, 100)               400       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
batch_normalization_1 (Batch (None, 100)               400       
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
batch_normalization_2 (Batch (None, 100)         

In [7]:
def custom_loss(y_true,y_pred):
    return tf.keras.losses.mean_squared_error(y_true,y_pred)

In [8]:
# lets build some memory into the model to perform decorrelated batch updates
# this is TD learning

# so apparantly the NN has to be adjusted only according to the action taken by it
# for example if action 2 is taken then only the weights for action 2 should be changed
# for this reason we should only update the q_2 vector with regarding the chosen action
# rewards will also be added to that action only
# q_2 is what we thought the value of the state will be after doing action
# we also add the reward and make this the target for the NN

replay_state_q = [] # This will be the input to the NN
replay_action = [] # this is what we did (action for the state q)
replay_reward = [] # this is reward we get for choosing action q_1
replay_next_state_q = [] # this is the true value we have to adjust the network towards this + reward
replay_batch = deque(maxlen = 3000)
episode = [] # we will drop earlier episode data using this

warmup = 10 #will start training after these many episodes have passed
training_count = 0 # this is a counter which is updated each time batch training is done
# after certain number of batches we remove the old data (in the starting of the list)

# to balance exploration
epsilon = {
"epsilon" : 1.0,
"epsilon_decay": 0.999,
"epsilon_min":0.01,
}

In [9]:
import random
random.seed(2020)

@tf.function
def train_step(model,new_state,new_rew,old_q,lamb): 
    
    with tf.GradientTape() as tape:
        # logits is the forward pass
        logits = model(new_state, training=True)
        
        loss_value = custom_loss(create_y_true(logits,new_rew,lamb),old_q)
    
    #we retrieve the gradients
    grads = tape.gradient(loss_value, model.trainable_weights)
    
    #THIS IS ONE STEP OF GRAD DESCENT (Minimizes the loss)
    adam.apply_gradients(zip(grads, model.trainable_weights))
    #model.fit(x = inputs,y = outputs,batch_size = 1,epochs = 1,verbose = 0)

#@tf.function
def batch_train(model,gamma,batch_size,epsilon):
    
    #decaying the exploration
    if epsilon['epsilon'] > epsilon['epsilon_min']:
         epsilon['epsilon'] =  epsilon['epsilon'] * epsilon['epsilon_decay']
   
    batch = random.sample(replay_batch,batch_size)
    
    batch_reward = []
    batch_action = []
    
    batch_current_state = np.zeros((batch_size, 4))
    batch_next_state = np.zeros((batch_size, 4))

    for i in range(batch_size):
        batch_reward.append(batch[i][2])
        batch_action.append(batch[i][1])
        batch_current_state[i] = batch[i][0]
        batch_next_state[i] = batch[i][3]
    
    #lets calculate the next state value as the current value will be calculated in 
    # in gradient tape
    
    next_q = model.predict(np.array(batch_next_state))
    max_q = []
    for i in next_q:
        max_q.append(max(i))
    max_q = np.array(max_q,dtype = 'float32')
    target = reward + gamma*max_q # this is the Q learning Target
    

    logits = model(batch_current_state)
    

    
    q_target = np.array(logits)
    
    # replacing the q values of logits for which the action is taken
    # as only those have to be updated
    for i in range(batch_size):
        q_target[i][batch_action[i]] = target[i]

    
    sample_model.fit(batch_current_state, q_target, batch_size=batch_size,
                       epochs=1, verbose=0)
    
    '''with tf.GradientTape() as tape:
        # logits is the forward pass
        logits = model(batch_current_state, training=True)
        
        q_target = np.array(logits)
        
        # replacing the q values of logits for which the action is taken
        # as only those have to be updated
        q_target[:,batch_action] = target
        # calculating the loss
        loss_value = custom_loss(q_target,logits)
    
    #we retrieve the gradients
    grads = tape.gradient(loss_value, model.trainable_weights)
    
    #THIS IS ONE STEP OF GRAD DESCENT (Minimizes the loss)
    adam.apply_gradients(zip(grads, model.trainable_weights))'''

def policy(q_vals,eps):
    # lets implement a policy which decays
    if np.random.rand() <= eps:  
        return random.randrange(2)
    else:
        action = np.argmax(q_vals[0])
        return action

In [12]:
for i in tqdm(range(500)):
    observation = env.reset()
    observation = np.expand_dims(observation, axis=0)
    
    total_reward = 0
    for j in range(200):
        episode.append(i)
        
        #storing the current state
        state_1 = observation
        
        # this is the current q values
        
        q_state = sample_model(observation)
        action = policy(q_state,epsilon["epsilon"]) # current action
        state_action = action
        observation,reward,done,info = env.step(action)
        
        # calculating the total reward
        total_reward = total_reward + reward
        

        #if done and j<195:
        #    reward = -1000
        observation = np.expand_dims(observation, axis=0)
        state_2 = observation
        state_reward = reward
        
        replay_batch.append((state_1,state_action,state_reward,state_2))
        
        if i>warmup:
            batch_train(sample_model,0.99,64,epsilon)
        
        '''if i>warmup:
            batch_train(sample_model,0.99,64)
            training_count = training_count + 1
            
            if training_count >5000:
                # removing the older data
                q_1 = q_1[5000:]
                q_2 = q_2[5000:]
                replay_observation = replay_observation[5000:]
                replay_reward = replay_reward[5000:]
                episode = episode[5000:]
                training_count = 0
                '''
        
        
        if done:
            break
        if i >4990:
            env.render()
    print(total_reward)

  0%|          | 2/500 [00:00<00:28, 17.69it/s]

10.0
10.0
8.0
9.0
8.0

  1%|▏         | 7/500 [00:00<00:26, 18.84it/s]


10.0
10.0
9.0


  2%|▏         | 11/500 [00:00<00:26, 18.76it/s]

10.0
10.0
8.0
10.0


  3%|▎         | 13/500 [00:01<01:53,  4.28it/s]

10.0
10.0


  3%|▎         | 15/500 [00:03<02:53,  2.80it/s]

10.0


  3%|▎         | 16/500 [00:03<03:33,  2.27it/s]

10.0


  3%|▎         | 17/500 [00:04<03:55,  2.05it/s]

9.0


  4%|▎         | 18/500 [00:05<04:26,  1.81it/s]

11.0


  4%|▍         | 19/500 [00:05<04:27,  1.80it/s]

9.0


  4%|▍         | 20/500 [00:06<04:43,  1.69it/s]

10.0


  4%|▍         | 21/500 [00:06<04:52,  1.64it/s]

10.0


  4%|▍         | 22/500 [00:07<04:51,  1.64it/s]

9.0


  5%|▍         | 23/500 [00:08<04:57,  1.60it/s]

10.0


  5%|▍         | 24/500 [00:08<04:58,  1.60it/s]

9.0


  5%|▌         | 25/500 [00:09<04:53,  1.62it/s]

9.0


  5%|▌         | 26/500 [00:10<04:40,  1.69it/s]

8.0


  5%|▌         | 27/500 [00:10<04:40,  1.69it/s]

9.0


  6%|▌         | 28/500 [00:11<04:49,  1.63it/s]

10.0


  6%|▌         | 29/500 [00:11<05:03,  1.55it/s]

10.0


  6%|▌         | 30/500 [00:12<05:11,  1.51it/s]

10.0


  6%|▌         | 31/500 [00:13<05:03,  1.54it/s]

9.0


  6%|▋         | 32/500 [00:13<04:45,  1.64it/s]

8.0


  7%|▋         | 33/500 [00:14<04:58,  1.56it/s]

11.0


  7%|▋         | 34/500 [00:15<04:56,  1.57it/s]

9.0


  7%|▋         | 35/500 [00:15<04:59,  1.55it/s]

10.0


  7%|▋         | 36/500 [00:16<05:10,  1.50it/s]

11.0


  7%|▋         | 37/500 [00:17<04:55,  1.56it/s]

9.0


  8%|▊         | 38/500 [00:17<05:00,  1.54it/s]

10.0


  8%|▊         | 39/500 [00:18<05:06,  1.50it/s]

10.0


  8%|▊         | 40/500 [00:19<05:16,  1.46it/s]

11.0


  8%|▊         | 41/500 [00:19<05:07,  1.49it/s]

9.0


  8%|▊         | 42/500 [00:20<05:09,  1.48it/s]

10.0


  9%|▊         | 43/500 [00:21<04:56,  1.54it/s]

9.0


  9%|▉         | 44/500 [00:21<04:47,  1.59it/s]

9.0


  9%|▉         | 45/500 [00:22<04:38,  1.63it/s]

9.0


  9%|▉         | 46/500 [00:22<04:34,  1.65it/s]

9.0


  9%|▉         | 47/500 [00:23<04:28,  1.68it/s]

9.0


 10%|▉         | 48/500 [00:24<04:34,  1.65it/s]

10.0


 10%|▉         | 49/500 [00:24<04:34,  1.64it/s]

9.0


 10%|█         | 50/500 [00:25<04:40,  1.60it/s]

9.0


 10%|█         | 51/500 [00:26<04:45,  1.57it/s]

10.0


 10%|█         | 52/500 [00:26<04:43,  1.58it/s]

10.0


 11%|█         | 53/500 [00:27<04:34,  1.63it/s]

9.0


 11%|█         | 54/500 [00:27<04:39,  1.59it/s]

10.0


 11%|█         | 55/500 [00:28<04:23,  1.69it/s]

8.0


 11%|█         | 56/500 [00:29<04:27,  1.66it/s]

10.0


 11%|█▏        | 57/500 [00:29<04:31,  1.63it/s]

10.0


 12%|█▏        | 58/500 [00:30<04:35,  1.60it/s]

10.0


 12%|█▏        | 59/500 [00:30<04:29,  1.64it/s]

9.0


 12%|█▏        | 60/500 [00:31<04:17,  1.71it/s]

8.0


 12%|█▏        | 61/500 [00:32<04:25,  1.65it/s]

10.0


 12%|█▏        | 62/500 [00:32<04:33,  1.60it/s]

10.0


 13%|█▎        | 63/500 [00:33<04:37,  1.57it/s]

10.0


 13%|█▎        | 64/500 [00:33<04:27,  1.63it/s]

9.0


 13%|█▎        | 65/500 [00:34<04:20,  1.67it/s]

9.0


 13%|█▎        | 66/500 [00:35<04:16,  1.69it/s]

9.0


 13%|█▎        | 67/500 [00:35<04:21,  1.65it/s]

10.0


 14%|█▎        | 68/500 [00:36<04:33,  1.58it/s]

11.0


 14%|█▍        | 69/500 [00:37<04:33,  1.58it/s]

10.0


 14%|█▍        | 70/500 [00:37<04:32,  1.58it/s]

10.0


 14%|█▍        | 71/500 [00:38<04:24,  1.62it/s]

9.0


 14%|█▍        | 72/500 [00:38<04:22,  1.63it/s]

9.0


 15%|█▍        | 73/500 [00:39<04:17,  1.66it/s]

9.0


 15%|█▍        | 74/500 [00:40<04:21,  1.63it/s]

10.0


 15%|█▌        | 75/500 [00:40<04:27,  1.59it/s]

10.0


 15%|█▌        | 76/500 [00:41<04:29,  1.57it/s]

10.0


 15%|█▌        | 77/500 [00:42<04:37,  1.52it/s]

11.0


 16%|█▌        | 78/500 [00:42<04:27,  1.58it/s]

9.0


 16%|█▌        | 79/500 [00:43<04:26,  1.58it/s]

9.0


 16%|█▌        | 80/500 [00:44<04:28,  1.56it/s]

9.0


 16%|█▌        | 81/500 [00:44<04:56,  1.41it/s]

11.0


 16%|█▋        | 82/500 [00:46<07:21,  1.06s/it]

26.0


 17%|█▋        | 83/500 [00:47<06:29,  1.07it/s]

10.0


 17%|█▋        | 84/500 [00:48<06:14,  1.11it/s]

11.0


 17%|█▋        | 85/500 [00:48<05:54,  1.17it/s]

11.0


 17%|█▋        | 86/500 [00:49<05:25,  1.27it/s]

9.0


 17%|█▋        | 87/500 [00:50<05:11,  1.33it/s]

9.0


 18%|█▊        | 88/500 [00:50<05:02,  1.36it/s]

9.0


 18%|█▊        | 89/500 [00:51<05:05,  1.35it/s]

10.0


 18%|█▊        | 90/500 [00:52<05:03,  1.35it/s]

10.0


 18%|█▊        | 91/500 [00:53<05:02,  1.35it/s]

10.0


 18%|█▊        | 92/500 [00:53<05:08,  1.32it/s]

11.0


 19%|█▊        | 93/500 [00:54<04:53,  1.39it/s]

9.0


 19%|█▉        | 94/500 [00:55<04:37,  1.46it/s]

8.0


 19%|█▉        | 95/500 [00:55<04:35,  1.47it/s]

9.0


 19%|█▉        | 96/500 [00:56<04:25,  1.52it/s]

9.0


 19%|█▉        | 97/500 [01:13<38:18,  5.70s/it]

10.0


 20%|█▉        | 98/500 [01:14<28:06,  4.20s/it]

9.0


 20%|█▉        | 99/500 [01:15<21:10,  3.17s/it]

10.0


 20%|██        | 100/500 [01:16<16:15,  2.44s/it]

9.0


 20%|██        | 101/500 [01:16<12:43,  1.91s/it]

9.0


 20%|██        | 102/500 [01:17<10:20,  1.56s/it]

10.0


 21%|██        | 103/500 [01:18<08:39,  1.31s/it]

10.0


 21%|██        | 104/500 [01:18<07:24,  1.12s/it]

9.0


 21%|██        | 105/500 [01:19<06:35,  1.00s/it]

10.0


 21%|██        | 106/500 [01:20<05:47,  1.13it/s]

9.0


 21%|██▏       | 107/500 [01:20<05:12,  1.26it/s]

9.0


 22%|██▏       | 108/500 [01:21<04:56,  1.32it/s]

10.0


 22%|██▏       | 109/500 [01:22<04:35,  1.42it/s]

9.0


 22%|██▏       | 110/500 [01:22<04:29,  1.45it/s]

9.0


 22%|██▏       | 111/500 [01:23<04:22,  1.48it/s]

9.0


 22%|██▏       | 112/500 [01:24<04:18,  1.50it/s]

9.0


 23%|██▎       | 113/500 [01:24<04:31,  1.43it/s]

11.0


 23%|██▎       | 114/500 [01:25<04:23,  1.47it/s]

9.0


 23%|██▎       | 115/500 [01:26<04:20,  1.48it/s]

10.0


 23%|██▎       | 116/500 [01:26<04:22,  1.46it/s]

11.0


 23%|██▎       | 117/500 [01:27<04:26,  1.44it/s]

10.0


 24%|██▎       | 118/500 [01:28<04:18,  1.48it/s]

9.0


 24%|██▍       | 119/500 [01:28<04:17,  1.48it/s]

9.0


 24%|██▍       | 120/500 [01:29<04:20,  1.46it/s]

10.0


 24%|██▍       | 121/500 [01:30<04:12,  1.50it/s]

9.0


 24%|██▍       | 122/500 [01:30<04:05,  1.54it/s]

9.0


 25%|██▍       | 123/500 [01:31<03:57,  1.59it/s]

9.0


 25%|██▍       | 124/500 [01:32<03:57,  1.58it/s]

10.0


 25%|██▌       | 125/500 [01:32<03:42,  1.69it/s]

8.0


 25%|██▌       | 126/500 [01:33<03:31,  1.77it/s]

8.0


 25%|██▌       | 127/500 [01:33<03:44,  1.66it/s]

10.0


 26%|██▌       | 128/500 [01:34<03:38,  1.70it/s]

8.0


 26%|██▌       | 129/500 [01:35<03:50,  1.61it/s]

10.0


 26%|██▌       | 130/500 [01:35<04:06,  1.50it/s]

11.0


 26%|██▌       | 131/500 [01:36<04:24,  1.39it/s]

12.0


 26%|██▋       | 132/500 [01:37<04:38,  1.32it/s]

13.0


 27%|██▋       | 133/500 [01:38<04:38,  1.32it/s]

12.0


 27%|██▋       | 134/500 [01:38<04:10,  1.46it/s]

8.0


 27%|██▋       | 135/500 [01:39<03:58,  1.53it/s]

9.0


 27%|██▋       | 136/500 [01:39<03:50,  1.58it/s]

9.0


 27%|██▋       | 137/500 [01:40<03:50,  1.58it/s]

10.0


 28%|██▊       | 138/500 [01:41<03:58,  1.52it/s]

11.0


 28%|██▊       | 139/500 [01:41<03:53,  1.54it/s]

10.0


 28%|██▊       | 140/500 [01:42<03:46,  1.59it/s]

9.0


 28%|██▊       | 141/500 [01:43<03:40,  1.63it/s]

9.0


 28%|██▊       | 142/500 [01:43<03:51,  1.55it/s]

10.0


 29%|██▊       | 143/500 [01:44<03:50,  1.55it/s]

9.0


 29%|██▉       | 144/500 [01:45<04:00,  1.48it/s]

10.0


 29%|██▉       | 145/500 [01:45<03:47,  1.56it/s]

8.0


 29%|██▉       | 146/500 [01:46<03:57,  1.49it/s]

10.0


 29%|██▉       | 147/500 [01:47<03:55,  1.50it/s]

9.0


 30%|██▉       | 148/500 [01:47<03:46,  1.56it/s]

8.0


 30%|██▉       | 149/500 [01:48<03:53,  1.51it/s]

10.0


 30%|███       | 150/500 [01:49<03:59,  1.46it/s]

10.0


 30%|███       | 151/500 [01:49<03:54,  1.49it/s]

9.0


 30%|███       | 152/500 [01:50<03:51,  1.50it/s]

9.0


 31%|███       | 153/500 [01:51<03:48,  1.52it/s]

9.0


 31%|███       | 154/500 [01:51<03:54,  1.48it/s]

10.0


 31%|███       | 155/500 [01:52<03:57,  1.45it/s]

10.0


 31%|███       | 156/500 [01:53<04:00,  1.43it/s]

10.0


 31%|███▏      | 157/500 [01:53<04:01,  1.42it/s]

10.0


 32%|███▏      | 158/500 [01:54<03:56,  1.44it/s]

9.0


 32%|███▏      | 159/500 [01:55<04:00,  1.42it/s]

10.0


 32%|███▏      | 160/500 [01:56<04:04,  1.39it/s]

10.0


 32%|███▏      | 161/500 [01:56<04:02,  1.40it/s]

10.0


 32%|███▏      | 162/500 [01:57<03:53,  1.45it/s]

9.0


 33%|███▎      | 163/500 [01:58<04:01,  1.40it/s]

11.0


 33%|███▎      | 164/500 [01:58<04:01,  1.39it/s]

10.0


 33%|███▎      | 165/500 [01:59<04:04,  1.37it/s]

10.0


 33%|███▎      | 166/500 [02:00<03:54,  1.42it/s]

9.0


 33%|███▎      | 167/500 [02:01<03:51,  1.44it/s]

9.0


 34%|███▎      | 168/500 [02:01<03:44,  1.48it/s]

9.0


 34%|███▍      | 169/500 [02:02<03:51,  1.43it/s]

10.0


 34%|███▍      | 170/500 [02:03<03:54,  1.41it/s]

10.0


 34%|███▍      | 171/500 [02:03<03:56,  1.39it/s]

10.0


 34%|███▍      | 172/500 [02:04<03:54,  1.40it/s]

10.0


 35%|███▍      | 173/500 [02:05<03:45,  1.45it/s]

9.0


 35%|███▍      | 174/500 [02:05<03:52,  1.40it/s]

11.0


 35%|███▌      | 175/500 [02:06<03:51,  1.41it/s]

10.0


 35%|███▌      | 176/500 [02:07<03:52,  1.39it/s]

10.0


 35%|███▌      | 177/500 [02:08<03:55,  1.37it/s]

10.0


 36%|███▌      | 178/500 [02:08<03:38,  1.47it/s]

8.0


 36%|███▌      | 179/500 [02:09<03:42,  1.45it/s]

10.0


 36%|███▌      | 180/500 [02:10<03:39,  1.45it/s]

9.0


 36%|███▌      | 181/500 [02:10<03:52,  1.37it/s]

11.0


 36%|███▋      | 182/500 [02:11<03:56,  1.35it/s]

10.0


 37%|███▋      | 183/500 [02:12<03:51,  1.37it/s]

9.0


 37%|███▋      | 184/500 [02:13<03:52,  1.36it/s]

10.0


 37%|███▋      | 185/500 [02:13<03:51,  1.36it/s]

10.0


 37%|███▋      | 186/500 [02:14<03:49,  1.37it/s]

10.0


 37%|███▋      | 187/500 [02:15<03:40,  1.42it/s]

10.0


 38%|███▊      | 188/500 [02:15<03:30,  1.48it/s]

9.0


 38%|███▊      | 189/500 [02:16<03:32,  1.46it/s]

10.0


 38%|███▊      | 190/500 [02:17<03:37,  1.42it/s]

10.0


 38%|███▊      | 191/500 [02:17<03:26,  1.49it/s]

9.0


 38%|███▊      | 192/500 [02:18<03:23,  1.52it/s]

9.0


 39%|███▊      | 193/500 [02:19<03:21,  1.53it/s]

10.0


 39%|███▉      | 194/500 [02:19<03:20,  1.53it/s]

10.0


 39%|███▉      | 195/500 [02:20<03:17,  1.54it/s]

10.0


 39%|███▉      | 196/500 [02:21<03:09,  1.60it/s]

9.0


 39%|███▉      | 197/500 [02:21<03:13,  1.57it/s]

10.0


 40%|███▉      | 198/500 [02:22<03:12,  1.57it/s]

10.0


 40%|███▉      | 199/500 [02:23<03:12,  1.57it/s]

10.0


 40%|████      | 200/500 [02:23<03:09,  1.59it/s]

9.0


 40%|████      | 201/500 [02:24<03:09,  1.57it/s]

10.0


 40%|████      | 202/500 [02:24<03:05,  1.61it/s]

9.0


 41%|████      | 203/500 [02:25<02:59,  1.65it/s]

9.0


 41%|████      | 204/500 [02:26<03:08,  1.57it/s]

10.0


 41%|████      | 205/500 [02:26<03:08,  1.56it/s]

10.0


 41%|████      | 206/500 [02:27<03:08,  1.56it/s]

10.0


 41%|████▏     | 207/500 [02:28<03:16,  1.49it/s]

10.0


 42%|████▏     | 208/500 [02:28<03:22,  1.44it/s]

10.0


 42%|████▏     | 209/500 [02:29<03:24,  1.42it/s]

9.0


 42%|████▏     | 210/500 [02:30<03:21,  1.44it/s]

9.0


 42%|████▏     | 211/500 [02:30<03:09,  1.52it/s]

9.0


 42%|████▏     | 212/500 [02:31<03:12,  1.50it/s]

11.0


 43%|████▎     | 213/500 [02:32<03:16,  1.46it/s]

11.0


 43%|████▎     | 214/500 [02:32<03:04,  1.55it/s]

8.0


 43%|████▎     | 215/500 [02:33<02:59,  1.58it/s]

9.0


 43%|████▎     | 216/500 [02:34<03:03,  1.55it/s]

10.0


 43%|████▎     | 217/500 [02:34<02:59,  1.58it/s]

9.0


 44%|████▎     | 218/500 [02:35<03:04,  1.53it/s]

11.0


 44%|████▍     | 219/500 [02:35<02:52,  1.63it/s]

8.0


 44%|████▍     | 220/500 [02:36<02:48,  1.66it/s]

9.0


 44%|████▍     | 221/500 [02:37<02:45,  1.68it/s]

9.0


 44%|████▍     | 222/500 [02:37<02:54,  1.60it/s]

10.0


 45%|████▍     | 223/500 [02:38<02:58,  1.55it/s]

10.0


 45%|████▍     | 224/500 [02:39<02:59,  1.54it/s]

10.0


 45%|████▌     | 225/500 [02:39<02:53,  1.59it/s]

9.0


 45%|████▌     | 226/500 [02:40<02:56,  1.55it/s]

10.0


 45%|████▌     | 227/500 [02:41<02:51,  1.60it/s]

9.0


 46%|████▌     | 228/500 [02:41<02:52,  1.57it/s]

10.0


 46%|████▌     | 229/500 [02:42<02:47,  1.62it/s]

9.0


 46%|████▌     | 230/500 [02:42<02:43,  1.65it/s]

9.0


 46%|████▌     | 231/500 [02:43<02:40,  1.68it/s]

9.0


 46%|████▋     | 232/500 [02:43<02:36,  1.71it/s]

8.0


 47%|████▋     | 233/500 [02:44<02:39,  1.67it/s]

10.0


 47%|████▋     | 234/500 [02:45<02:39,  1.67it/s]

9.0


 47%|████▋     | 235/500 [02:45<02:52,  1.53it/s]

11.0


 47%|████▋     | 236/500 [02:46<02:43,  1.61it/s]

8.0


 47%|████▋     | 237/500 [02:47<02:35,  1.69it/s]

8.0


 48%|████▊     | 238/500 [02:47<02:40,  1.63it/s]

10.0


 48%|████▊     | 239/500 [02:48<02:46,  1.57it/s]

10.0


 48%|████▊     | 240/500 [02:49<02:54,  1.49it/s]

10.0


 48%|████▊     | 241/500 [02:49<02:56,  1.47it/s]

11.0


 48%|████▊     | 242/500 [02:50<02:47,  1.54it/s]

9.0


 49%|████▊     | 243/500 [02:51<02:46,  1.54it/s]

10.0


 49%|████▉     | 244/500 [02:51<02:37,  1.63it/s]

8.0


 49%|████▉     | 245/500 [02:52<02:39,  1.60it/s]

10.0


 49%|████▉     | 246/500 [02:52<02:34,  1.65it/s]

9.0


 49%|████▉     | 247/500 [02:53<02:35,  1.63it/s]

10.0


 50%|████▉     | 248/500 [02:54<02:31,  1.67it/s]

9.0


 50%|████▉     | 249/500 [02:54<02:32,  1.65it/s]

10.0


 50%|█████     | 250/500 [02:55<02:33,  1.62it/s]

10.0


 50%|█████     | 251/500 [02:55<02:30,  1.66it/s]

9.0


 50%|█████     | 252/500 [02:56<02:38,  1.57it/s]

11.0


 51%|█████     | 253/500 [02:57<02:32,  1.62it/s]

9.0


 51%|█████     | 254/500 [02:57<02:23,  1.71it/s]

8.0


 51%|█████     | 255/500 [02:58<02:21,  1.73it/s]

9.0


 51%|█████     | 256/500 [02:58<02:24,  1.69it/s]

10.0


 51%|█████▏    | 257/500 [02:59<02:26,  1.66it/s]

10.0


 52%|█████▏    | 258/500 [03:00<02:24,  1.67it/s]

9.0


 52%|█████▏    | 259/500 [03:00<02:30,  1.60it/s]

11.0


 52%|█████▏    | 260/500 [03:01<02:30,  1.59it/s]

10.0


 52%|█████▏    | 261/500 [03:01<02:26,  1.64it/s]

9.0


 52%|█████▏    | 262/500 [03:02<02:22,  1.67it/s]

9.0


 53%|█████▎    | 263/500 [03:03<02:19,  1.70it/s]

9.0


 53%|█████▎    | 264/500 [03:03<02:23,  1.64it/s]

10.0


 53%|█████▎    | 265/500 [03:04<02:25,  1.62it/s]

9.0


 53%|█████▎    | 266/500 [03:05<02:33,  1.53it/s]

10.0


 53%|█████▎    | 267/500 [03:05<02:32,  1.53it/s]

10.0


 54%|█████▎    | 268/500 [03:06<02:29,  1.55it/s]

10.0


 54%|█████▍    | 269/500 [03:06<02:24,  1.60it/s]

9.0


 54%|█████▍    | 270/500 [03:07<02:24,  1.59it/s]

10.0


 54%|█████▍    | 271/500 [03:08<02:16,  1.67it/s]

8.0


 54%|█████▍    | 272/500 [03:08<02:09,  1.76it/s]

8.0


 55%|█████▍    | 273/500 [03:09<02:04,  1.83it/s]

8.0


 55%|█████▍    | 274/500 [03:09<02:00,  1.87it/s]

8.0


 55%|█████▌    | 275/500 [03:10<02:06,  1.78it/s]

10.0


 55%|█████▌    | 276/500 [03:10<02:07,  1.76it/s]

9.0


 55%|█████▌    | 277/500 [03:11<02:09,  1.73it/s]

9.0


 56%|█████▌    | 278/500 [03:12<02:14,  1.65it/s]

10.0


 56%|█████▌    | 279/500 [03:12<02:23,  1.54it/s]

10.0


 56%|█████▌    | 280/500 [03:13<02:26,  1.50it/s]

10.0


 56%|█████▌    | 281/500 [03:14<02:26,  1.50it/s]

10.0


 56%|█████▋    | 282/500 [03:14<02:20,  1.55it/s]

9.0


 57%|█████▋    | 283/500 [03:15<02:14,  1.62it/s]

8.0


 57%|█████▋    | 284/500 [03:16<02:17,  1.57it/s]

10.0


 57%|█████▋    | 285/500 [03:16<02:09,  1.66it/s]

8.0


 57%|█████▋    | 286/500 [03:17<02:03,  1.74it/s]

8.0


 57%|█████▋    | 287/500 [03:17<02:02,  1.74it/s]

9.0


 58%|█████▊    | 288/500 [03:18<02:01,  1.75it/s]

9.0


 58%|█████▊    | 289/500 [03:18<02:01,  1.74it/s]

9.0


 58%|█████▊    | 290/500 [03:19<02:03,  1.69it/s]

10.0


 58%|█████▊    | 291/500 [03:20<02:06,  1.65it/s]

10.0


 58%|█████▊    | 292/500 [03:20<02:08,  1.62it/s]

9.0


 59%|█████▊    | 293/500 [03:21<02:11,  1.57it/s]

10.0


 59%|█████▉    | 294/500 [03:22<02:18,  1.48it/s]

12.0


 59%|█████▉    | 295/500 [03:22<02:19,  1.47it/s]

11.0


 59%|█████▉    | 296/500 [03:23<02:12,  1.54it/s]

9.0


 59%|█████▉    | 297/500 [03:24<02:08,  1.57it/s]

9.0


 60%|█████▉    | 298/500 [03:24<02:08,  1.57it/s]

9.0


 60%|█████▉    | 299/500 [03:25<02:15,  1.48it/s]

12.0


 60%|██████    | 300/500 [03:25<02:05,  1.60it/s]

8.0


 60%|██████    | 301/500 [03:26<02:05,  1.58it/s]

10.0


 60%|██████    | 302/500 [03:27<02:07,  1.55it/s]

10.0


 61%|██████    | 303/500 [03:27<02:04,  1.58it/s]

9.0


 61%|██████    | 304/500 [03:28<02:13,  1.46it/s]

10.0


 61%|██████    | 305/500 [03:29<02:13,  1.46it/s]

10.0


 61%|██████    | 306/500 [03:30<02:10,  1.49it/s]

9.0


 61%|██████▏   | 307/500 [03:30<02:09,  1.49it/s]

10.0


 62%|██████▏   | 308/500 [03:31<02:08,  1.50it/s]

10.0


 62%|██████▏   | 309/500 [03:31<02:02,  1.55it/s]

9.0


 62%|██████▏   | 310/500 [03:32<02:01,  1.56it/s]

10.0


 62%|██████▏   | 311/500 [03:33<02:01,  1.56it/s]

10.0


 62%|██████▏   | 312/500 [03:33<01:56,  1.61it/s]

9.0


 63%|██████▎   | 313/500 [03:34<01:53,  1.65it/s]

9.0


 63%|██████▎   | 314/500 [03:35<01:54,  1.62it/s]

10.0


 63%|██████▎   | 315/500 [03:35<01:55,  1.61it/s]

10.0


 63%|██████▎   | 316/500 [03:36<01:53,  1.62it/s]

9.0


 63%|██████▎   | 317/500 [03:36<01:52,  1.62it/s]

9.0


 64%|██████▎   | 318/500 [03:37<01:55,  1.57it/s]

10.0


 64%|██████▍   | 319/500 [03:38<01:55,  1.57it/s]

10.0


 64%|██████▍   | 320/500 [03:38<01:47,  1.67it/s]

8.0


 64%|██████▍   | 321/500 [03:39<01:49,  1.64it/s]

10.0


 64%|██████▍   | 322/500 [03:39<01:47,  1.66it/s]

9.0


 65%|██████▍   | 323/500 [03:40<01:48,  1.64it/s]

10.0


 65%|██████▍   | 324/500 [03:41<01:49,  1.61it/s]

10.0


 65%|██████▌   | 325/500 [03:41<01:53,  1.55it/s]

11.0


 65%|██████▌   | 326/500 [03:42<01:48,  1.60it/s]

9.0


 65%|██████▌   | 327/500 [03:43<01:49,  1.58it/s]

10.0


 66%|██████▌   | 328/500 [03:43<01:45,  1.62it/s]

9.0


 66%|██████▌   | 329/500 [03:44<01:44,  1.63it/s]

9.0


 66%|██████▌   | 330/500 [03:44<01:42,  1.66it/s]

9.0


 66%|██████▌   | 331/500 [03:45<01:37,  1.73it/s]

8.0


 66%|██████▋   | 332/500 [03:46<01:40,  1.67it/s]

10.0


 67%|██████▋   | 333/500 [03:46<01:39,  1.68it/s]

9.0


 67%|██████▋   | 334/500 [03:47<01:41,  1.64it/s]

10.0


 67%|██████▋   | 335/500 [03:47<01:40,  1.65it/s]

9.0


 67%|██████▋   | 336/500 [03:48<01:38,  1.66it/s]

9.0


 67%|██████▋   | 337/500 [03:49<01:39,  1.63it/s]

10.0


 68%|██████▊   | 338/500 [03:49<01:40,  1.61it/s]

10.0


 68%|██████▊   | 339/500 [03:50<01:44,  1.54it/s]

11.0


 68%|██████▊   | 340/500 [03:51<01:40,  1.59it/s]

9.0


 68%|██████▊   | 341/500 [03:51<01:37,  1.63it/s]

9.0


 68%|██████▊   | 342/500 [03:52<01:41,  1.55it/s]

11.0


 69%|██████▊   | 343/500 [03:52<01:41,  1.54it/s]

9.0


 69%|██████▉   | 344/500 [03:53<01:39,  1.56it/s]

9.0


 69%|██████▉   | 345/500 [03:54<01:39,  1.55it/s]

10.0


 69%|██████▉   | 346/500 [03:54<01:42,  1.51it/s]

10.0


 69%|██████▉   | 347/500 [03:55<01:41,  1.51it/s]

10.0


 70%|██████▉   | 348/500 [03:56<02:11,  1.16it/s]

19.0


 70%|██████▉   | 349/500 [03:58<02:32,  1.01s/it]

21.0


 70%|███████   | 350/500 [03:58<02:08,  1.16it/s]

8.0


 70%|███████   | 351/500 [03:59<01:52,  1.32it/s]

8.0


 70%|███████   | 352/500 [03:59<01:47,  1.38it/s]

10.0


 71%|███████   | 353/500 [04:00<01:42,  1.44it/s]

10.0


 71%|███████   | 354/500 [04:01<01:38,  1.48it/s]

10.0


 71%|███████   | 355/500 [04:01<01:36,  1.50it/s]

10.0


 71%|███████   | 356/500 [04:02<01:31,  1.57it/s]

9.0


 71%|███████▏  | 357/500 [04:03<01:31,  1.57it/s]

10.0


 72%|███████▏  | 358/500 [04:03<01:25,  1.67it/s]

8.0


 72%|███████▏  | 359/500 [04:04<01:26,  1.63it/s]

10.0


 72%|███████▏  | 360/500 [04:04<01:21,  1.71it/s]

8.0


 72%|███████▏  | 361/500 [04:05<01:20,  1.72it/s]

9.0


 72%|███████▏  | 362/500 [04:06<01:23,  1.66it/s]

10.0


 73%|███████▎  | 363/500 [04:06<01:24,  1.63it/s]

10.0


 73%|███████▎  | 364/500 [04:07<01:24,  1.61it/s]

10.0


 73%|███████▎  | 365/500 [04:07<01:25,  1.58it/s]

10.0


 73%|███████▎  | 366/500 [04:08<01:28,  1.51it/s]

11.0


 73%|███████▎  | 367/500 [04:09<01:30,  1.46it/s]

11.0


 74%|███████▎  | 368/500 [04:10<01:28,  1.49it/s]

10.0


 74%|███████▍  | 369/500 [04:10<01:23,  1.57it/s]

9.0


 74%|███████▍  | 370/500 [04:11<01:25,  1.52it/s]

11.0


 74%|███████▍  | 371/500 [04:12<01:27,  1.47it/s]

11.0


 74%|███████▍  | 372/500 [04:12<01:22,  1.55it/s]

9.0


 75%|███████▍  | 373/500 [04:13<01:22,  1.54it/s]

10.0


 75%|███████▍  | 374/500 [04:13<01:21,  1.55it/s]

10.0


 75%|███████▌  | 375/500 [04:14<01:15,  1.66it/s]

8.0


 75%|███████▌  | 376/500 [04:15<01:16,  1.63it/s]

10.0


 75%|███████▌  | 377/500 [04:15<01:13,  1.67it/s]

9.0


 76%|███████▌  | 378/500 [04:16<01:13,  1.67it/s]

9.0


 76%|███████▌  | 379/500 [04:16<01:14,  1.62it/s]

10.0


 76%|███████▌  | 380/500 [04:17<01:14,  1.60it/s]

10.0


 76%|███████▌  | 381/500 [04:18<01:10,  1.68it/s]

8.0


 76%|███████▋  | 382/500 [04:18<01:11,  1.65it/s]

10.0


 77%|███████▋  | 383/500 [04:19<01:10,  1.67it/s]

9.0


 77%|███████▋  | 384/500 [04:19<01:11,  1.62it/s]

10.0


 77%|███████▋  | 385/500 [04:20<01:12,  1.59it/s]

10.0


 77%|███████▋  | 386/500 [04:21<01:11,  1.60it/s]

9.0


 77%|███████▋  | 387/500 [04:21<01:11,  1.59it/s]

10.0


 78%|███████▊  | 388/500 [04:22<01:06,  1.68it/s]

8.0


 78%|███████▊  | 389/500 [04:22<01:07,  1.65it/s]

10.0


 78%|███████▊  | 390/500 [04:23<01:07,  1.62it/s]

10.0


 78%|███████▊  | 391/500 [04:24<01:08,  1.59it/s]

10.0


 78%|███████▊  | 392/500 [04:24<01:09,  1.55it/s]

10.0


 79%|███████▊  | 393/500 [04:25<01:04,  1.66it/s]

8.0


 79%|███████▉  | 394/500 [04:26<01:04,  1.64it/s]

10.0


 79%|███████▉  | 395/500 [04:26<01:00,  1.72it/s]

8.0


 79%|███████▉  | 396/500 [04:27<01:04,  1.61it/s]

11.0


 79%|███████▉  | 397/500 [04:27<01:04,  1.59it/s]

10.0


 80%|███████▉  | 398/500 [04:28<01:04,  1.58it/s]

10.0


 80%|███████▉  | 399/500 [04:29<01:02,  1.63it/s]

9.0


 80%|████████  | 400/500 [04:29<01:02,  1.60it/s]

10.0


 80%|████████  | 401/500 [04:30<01:02,  1.59it/s]

10.0


 80%|████████  | 402/500 [04:31<01:02,  1.58it/s]

10.0


 81%|████████  | 403/500 [04:31<00:59,  1.62it/s]

9.0


 81%|████████  | 404/500 [04:32<00:58,  1.64it/s]

9.0


 81%|████████  | 405/500 [04:32<00:58,  1.62it/s]

10.0


 81%|████████  | 406/500 [04:33<00:56,  1.66it/s]

9.0


 81%|████████▏ | 407/500 [04:33<00:53,  1.74it/s]

8.0


 82%|████████▏ | 408/500 [04:34<00:54,  1.69it/s]

10.0


 82%|████████▏ | 409/500 [04:35<00:53,  1.71it/s]

9.0


 82%|████████▏ | 410/500 [04:35<00:52,  1.73it/s]

9.0


 82%|████████▏ | 411/500 [04:36<00:53,  1.66it/s]

10.0


 82%|████████▏ | 412/500 [04:37<00:55,  1.58it/s]

11.0


 83%|████████▎ | 413/500 [04:37<00:55,  1.58it/s]

10.0


 83%|████████▎ | 414/500 [04:38<00:53,  1.60it/s]

9.0


 83%|████████▎ | 415/500 [04:39<01:14,  1.14it/s]

22.0


 83%|████████▎ | 416/500 [04:41<01:21,  1.03it/s]

18.0


 83%|████████▎ | 417/500 [04:41<01:14,  1.12it/s]

11.0


 84%|████████▎ | 418/500 [04:42<01:19,  1.03it/s]

18.0


 84%|████████▍ | 419/500 [04:43<01:16,  1.05it/s]

14.0


 84%|████████▍ | 420/500 [05:04<09:13,  6.92s/it]

14.0


 84%|████████▍ | 421/500 [05:05<06:41,  5.09s/it]

12.0


 84%|████████▍ | 422/500 [05:06<04:51,  3.73s/it]

9.0


 85%|████████▍ | 423/500 [05:06<03:36,  2.81s/it]

10.0


 85%|████████▍ | 424/500 [05:08<03:15,  2.58s/it]

31.0


 85%|████████▌ | 425/500 [05:09<02:28,  1.98s/it]

9.0


 85%|████████▌ | 426/500 [05:10<02:08,  1.74s/it]

17.0


 85%|████████▌ | 427/500 [05:11<01:48,  1.49s/it]

14.0


 86%|████████▌ | 428/500 [05:11<01:28,  1.23s/it]

8.0


 86%|████████▌ | 429/500 [05:12<01:13,  1.03s/it]

8.0


 86%|████████▌ | 430/500 [05:13<01:14,  1.06s/it]

15.0


 86%|████████▌ | 431/500 [05:15<01:24,  1.23s/it]

22.0


 86%|████████▋ | 432/500 [05:15<01:10,  1.03s/it]

9.0


 87%|████████▋ | 433/500 [05:16<01:02,  1.07it/s]

10.0


 87%|████████▋ | 434/500 [05:17<00:54,  1.20it/s]

9.0


 87%|████████▋ | 435/500 [05:17<00:49,  1.32it/s]

8.0


 87%|████████▋ | 436/500 [05:18<00:46,  1.38it/s]

9.0


 87%|████████▋ | 437/500 [05:19<00:46,  1.37it/s]

11.0


 88%|████████▊ | 438/500 [05:19<00:43,  1.42it/s]

10.0


 88%|████████▊ | 439/500 [05:20<00:42,  1.45it/s]

10.0


 88%|████████▊ | 440/500 [05:21<00:39,  1.52it/s]

9.0


 88%|████████▊ | 441/500 [05:21<00:38,  1.54it/s]

10.0


 88%|████████▊ | 442/500 [05:22<00:37,  1.55it/s]

10.0


 89%|████████▊ | 443/500 [05:22<00:34,  1.65it/s]

8.0


 89%|████████▉ | 444/500 [05:23<00:39,  1.41it/s]

15.0


 89%|████████▉ | 445/500 [05:25<00:54,  1.02it/s]

24.0


 89%|████████▉ | 446/500 [05:26<00:47,  1.13it/s]

10.0


 89%|████████▉ | 447/500 [05:26<00:43,  1.21it/s]

10.0


 90%|████████▉ | 448/500 [05:27<00:41,  1.25it/s]

9.0


 90%|████████▉ | 449/500 [05:28<00:41,  1.23it/s]

12.0


 90%|█████████ | 450/500 [05:34<01:55,  2.30s/it]

88.0


 90%|█████████ | 451/500 [05:34<01:27,  1.79s/it]

9.0


 90%|█████████ | 452/500 [05:35<01:10,  1.46s/it]

11.0


 91%|█████████ | 453/500 [05:35<00:56,  1.20s/it]

9.0


 91%|█████████ | 454/500 [05:36<00:47,  1.03s/it]

10.0


 91%|█████████ | 455/500 [05:37<00:41,  1.10it/s]

10.0


 91%|█████████ | 456/500 [05:37<00:35,  1.23it/s]

9.0


 91%|█████████▏| 457/500 [05:38<00:33,  1.28it/s]

11.0


 92%|█████████▏| 458/500 [05:39<00:30,  1.38it/s]

9.0


 92%|█████████▏| 459/500 [05:39<00:29,  1.41it/s]

10.0


 92%|█████████▏| 460/500 [05:40<00:27,  1.44it/s]

9.0


 92%|█████████▏| 461/500 [05:41<00:26,  1.46it/s]

10.0


 92%|█████████▏| 462/500 [05:41<00:24,  1.54it/s]

9.0


 93%|█████████▎| 463/500 [05:42<00:23,  1.55it/s]

10.0


 93%|█████████▎| 464/500 [05:43<00:23,  1.51it/s]

11.0


 93%|█████████▎| 465/500 [05:43<00:22,  1.57it/s]

9.0


 93%|█████████▎| 466/500 [05:44<00:21,  1.55it/s]

10.0


 93%|█████████▎| 467/500 [05:44<00:20,  1.60it/s]

8.0


 94%|█████████▎| 468/500 [05:45<00:20,  1.55it/s]

10.0


 94%|█████████▍| 469/500 [05:46<00:20,  1.55it/s]

10.0


 94%|█████████▍| 470/500 [05:46<00:19,  1.56it/s]

10.0


 94%|█████████▍| 471/500 [05:47<00:18,  1.57it/s]

10.0


 94%|█████████▍| 472/500 [05:48<00:17,  1.61it/s]

9.0


 95%|█████████▍| 473/500 [05:48<00:16,  1.59it/s]

10.0


 95%|█████████▍| 474/500 [05:49<00:15,  1.64it/s]

9.0


 95%|█████████▌| 475/500 [05:49<00:14,  1.67it/s]

9.0


 95%|█████████▌| 476/500 [05:50<00:13,  1.74it/s]

8.0


 95%|█████████▌| 477/500 [05:50<00:12,  1.77it/s]

8.0


 96%|█████████▌| 478/500 [05:51<00:12,  1.70it/s]

10.0


 96%|█████████▌| 479/500 [05:52<00:13,  1.59it/s]

10.0


 96%|█████████▌| 480/500 [05:52<00:12,  1.62it/s]

9.0


 96%|█████████▌| 481/500 [05:53<00:11,  1.62it/s]

9.0


 96%|█████████▋| 482/500 [05:54<00:10,  1.64it/s]

9.0


 97%|█████████▋| 483/500 [05:54<00:10,  1.60it/s]

10.0


 97%|█████████▋| 484/500 [05:55<00:10,  1.56it/s]

9.0


 97%|█████████▋| 485/500 [05:56<00:09,  1.54it/s]

10.0


 97%|█████████▋| 486/500 [05:56<00:08,  1.58it/s]

9.0


 97%|█████████▋| 487/500 [05:57<00:08,  1.59it/s]

9.0


 98%|█████████▊| 488/500 [05:57<00:07,  1.56it/s]

10.0


 98%|█████████▊| 489/500 [05:58<00:07,  1.54it/s]

10.0


 98%|█████████▊| 490/500 [05:59<00:06,  1.54it/s]

10.0


 98%|█████████▊| 491/500 [05:59<00:05,  1.58it/s]

9.0


 98%|█████████▊| 492/500 [06:00<00:05,  1.58it/s]

9.0


 99%|█████████▊| 493/500 [06:01<00:04,  1.53it/s]

10.0


 99%|█████████▉| 494/500 [06:01<00:03,  1.53it/s]

10.0


 99%|█████████▉| 495/500 [06:02<00:03,  1.54it/s]

10.0


 99%|█████████▉| 496/500 [06:02<00:02,  1.65it/s]

8.0


 99%|█████████▉| 497/500 [06:03<00:01,  1.62it/s]

10.0


100%|█████████▉| 498/500 [06:04<00:01,  1.59it/s]

10.0


100%|█████████▉| 499/500 [06:04<00:00,  1.59it/s]

10.0


100%|██████████| 500/500 [06:05<00:00,  1.37it/s]

10.0





In [13]:
env.close()

In [None]:
#lets test the nn
for i in tqdm(range(500)):
    observation = env.reset()
    observation = np.expand_dims(observation, axis=0)
    total_reward = 0
    for j in range(1000):
        env.render()
        nn_out = sample_model.predict(observation)
        action = policy(nn_out,0)
        print(nn_out[0])
        print(action)
        observation,reward,done,info = env.step(action)
        observation = np.expand_dims(observation, axis=0)
        total_reward = total_reward + reward
        if done:
            print("episode ended")
            break
env.close()

In [None]:
env.close()

In [None]:
# random action
for i in tqdm(range(50)):
    observation = env.reset()
    observation = np.expand_dims(observation, axis=0)
    for j in range(1000):
        env.render()
        observation,reward,done,info = env.step(env.action_space.sample())
        print(reward,done)
        if done:
            break