In [4]:
import tensorflow as tf
import numpy as np
import gym

tf.logging.set_verbosity(tf.logging.ERROR)

In [2]:
env = gym.make('CartPole-v0')

### ***Hyper parameters***

In [21]:
# H: number of hidden layer neurons
# lr: learning rate
# gamma: discount factor
# D: input dimensionality(observation)

H = 10
batch_size = 5
lr = 5e-3
gamma = 0.99
D = 4

### ***Training***

In [22]:
tf.reset_default_graph()

observations = tf.placeholder(shape=[None,D], dtype=tf.float32, name='input_x')

W1 = tf.get_variable('W1', shape=[D,H], initializer=tf.contrib.layers.xavier_initializer())
layer1 = tf.nn.relu(tf.matmul(observations, W1))

W2 = tf.get_variable('W2', shape=[H,1], initializer=tf.contrib.layers.xavier_initializer())
score = tf.nn.relu(tf.matmul(layer1, W2))

probability = tf.nn.sigmoid(score)

tvars = tf.trainable_variables()
input_y = tf.placeholder(shape=[None,1], dtype=tf.float32, name='input_y')
advantages = tf.placeholder(dtype=tf.float32, name='reward_signal')

loglik = tf.log(input_y*(input_y-probability)+(1-input_y)*(input_y+probability))
loss = -tf.reduce_mean(loglik*advantages)
newGrads = tf.gradients(loss, tvars)

adam = tf.train.AdamOptimizer(learning_rate=lr)
W1Grad = tf.placeholder(dtype=tf.float32, name='batch_grad1')
W2Grad = tf.placeholder(dtype=tf.float32, name='batch_grad2')
batchGrad = [W1Grad, W2Grad]
updateGrad = adam.apply_gradients(zip(batchGrad, tvars))

### ***Advantage function***

In [23]:
def discount_rewards(r) :
    discounted = np.zeros_like(r)
    running_add = 0
    
    for t in reversed(range(0, r.size)) :
        running_add = running_add*gamma+r[t]
        discounted[t] = running_add
        
    return discounted

### ***Running the agent and Env***

In [24]:
xs, drs, ys = [], [], []
running_reward = None
reward_sum = 0
count = 1
EPOCHS = 10000
init = tf.global_variables_initializer()

with tf.Session() as sess :
    rendering = False
    sess.run(init)
    observation = env.reset()
    
    gradBuffer = sess.run(tvars)
    for idx, grad in enumerate(gradBuffer) :
        gradBuffer[idx] = grad*0
        
    while count <= EPOCHS :
        if reward_sum/batch_size > 100 or rendering :
            env.render()
            rendering = True
            
        x = np.reshape(observation, [1,D])
        
        tfprob = sess.run(probability, feed_dict={observations: x})
        action = 1 if np.random.uniform() < tfprob else 0
        
        xs.append(x)
        y = 1 if action == 0 else 0
        ys.append(y)
        
        observation, reward, done, info = env.step(action)
        reward_sum += reward
        drs.append(reward)
        
        if done :
            count += 1
            
            epx = np.vstack(xs)
            epy = np.vstack(ys)
            epr = np.vstack(drs)
            xs, drs, ys = [], [], []
            
            discounted_epr = discount_rewards(epr)
            discounted_epr -= np.mean(discounted_epr)
            discounted_epr /= np.std(discounted_epr)
            
            tGrad = sess.run(newGrads, feed_dict={observations: epx, input_y: epy, advantages: discounted_epr})
            for idx, grad in enumerate(tGrad) :
                gradBuffer[idx] += grad
                
            if count%batch_size == 0 :
                sess.run(updateGrad, feed_dict={W1Grad: gradBuffer[0], W2Grad: gradBuffer[1]})
                for idx, grad in enumerate(gradBuffer) :
                    gradBuffer[idx] += grad*0
                    
                running_reward = reward_sum if running_reward is None else running_reward*0.99+reward_sum*0.01
                
                print('Average reward for episodes %.3f. Total average reward %.3f' % (reward_sum/batch_size, running_reward/batch_size))
                
                if reward_sum/batch_size > 200 :
                    print("Task solved in ", count, 'episodes.')
                    break
                    
                reward_sum = 0
                
            observation = env.reset()
            
print(count, " Episodes completed")

Average reward for episodes 13.000. Total average reward 13.000
Average reward for episodes 15.000. Total average reward 13.020
Average reward for episodes 31.800. Total average reward 13.208
Average reward for episodes 16.400. Total average reward 13.240
Average reward for episodes 16.200. Total average reward 13.269
Average reward for episodes 20.400. Total average reward 13.341
Average reward for episodes 21.000. Total average reward 13.417
Average reward for episodes 28.800. Total average reward 13.571
Average reward for episodes 38.600. Total average reward 13.821
Average reward for episodes 18.200. Total average reward 13.865
Average reward for episodes 38.200. Total average reward 14.108
Average reward for episodes 32.400. Total average reward 14.291
Average reward for episodes 21.800. Total average reward 14.366
Average reward for episodes 15.600. Total average reward 14.379
Average reward for episodes 21.000. Total average reward 14.445
Average reward for episodes 21.400. Tota

Average reward for episodes 16.200. Total average reward 20.395
Average reward for episodes 20.800. Total average reward 20.399
Average reward for episodes 16.000. Total average reward 20.355
Average reward for episodes 24.000. Total average reward 20.392
Average reward for episodes 33.600. Total average reward 20.524
Average reward for episodes 19.800. Total average reward 20.516
Average reward for episodes 23.600. Total average reward 20.547
Average reward for episodes 19.800. Total average reward 20.540
Average reward for episodes 17.200. Total average reward 20.506
Average reward for episodes 22.800. Total average reward 20.529
Average reward for episodes 18.400. Total average reward 20.508
Average reward for episodes 23.000. Total average reward 20.533
Average reward for episodes 25.400. Total average reward 20.582
Average reward for episodes 30.200. Total average reward 20.678
Average reward for episodes 16.800. Total average reward 20.639
Average reward for episodes 14.600. Tota

Average reward for episodes 46.200. Total average reward 23.381
Average reward for episodes 30.800. Total average reward 23.455
Average reward for episodes 33.800. Total average reward 23.559
Average reward for episodes 35.200. Total average reward 23.675
Average reward for episodes 33.400. Total average reward 23.772
Average reward for episodes 42.200. Total average reward 23.957
Average reward for episodes 39.000. Total average reward 24.107
Average reward for episodes 20.600. Total average reward 24.072
Average reward for episodes 41.600. Total average reward 24.247
Average reward for episodes 36.800. Total average reward 24.373
Average reward for episodes 17.400. Total average reward 24.303
Average reward for episodes 44.800. Total average reward 24.508
Average reward for episodes 39.400. Total average reward 24.657
Average reward for episodes 42.200. Total average reward 24.832
Average reward for episodes 53.600. Total average reward 25.120
Average reward for episodes 35.000. Tota

Average reward for episodes 18.600. Total average reward 27.579
Average reward for episodes 20.600. Total average reward 27.509
Average reward for episodes 32.200. Total average reward 27.556
Average reward for episodes 17.200. Total average reward 27.453
Average reward for episodes 26.800. Total average reward 27.446
Average reward for episodes 30.400. Total average reward 27.476
Average reward for episodes 17.800. Total average reward 27.379
Average reward for episodes 18.000. Total average reward 27.285
Average reward for episodes 24.400. Total average reward 27.256
Average reward for episodes 29.800. Total average reward 27.282
Average reward for episodes 22.200. Total average reward 27.231
Average reward for episodes 26.000. Total average reward 27.218
Average reward for episodes 18.000. Total average reward 27.126
Average reward for episodes 21.000. Total average reward 27.065
Average reward for episodes 22.400. Total average reward 27.018
Average reward for episodes 22.200. Tota

Average reward for episodes 32.000. Total average reward 27.100
Average reward for episodes 27.800. Total average reward 27.107
Average reward for episodes 21.000. Total average reward 27.046
Average reward for episodes 23.600. Total average reward 27.012
Average reward for episodes 22.800. Total average reward 26.970
Average reward for episodes 15.800. Total average reward 26.858
Average reward for episodes 31.400. Total average reward 26.903
Average reward for episodes 21.800. Total average reward 26.852
Average reward for episodes 20.200. Total average reward 26.786
Average reward for episodes 24.200. Total average reward 26.760
Average reward for episodes 16.200. Total average reward 26.654
Average reward for episodes 15.400. Total average reward 26.542
Average reward for episodes 40.000. Total average reward 26.676
Average reward for episodes 22.800. Total average reward 26.638
Average reward for episodes 27.600. Total average reward 26.647
Average reward for episodes 31.800. Tota

Average reward for episodes 25.400. Total average reward 28.141
Average reward for episodes 27.600. Total average reward 28.136
Average reward for episodes 40.400. Total average reward 28.258
Average reward for episodes 28.800. Total average reward 28.264
Average reward for episodes 25.600. Total average reward 28.237
Average reward for episodes 32.200. Total average reward 28.277
Average reward for episodes 30.200. Total average reward 28.296
Average reward for episodes 42.600. Total average reward 28.439
Average reward for episodes 19.400. Total average reward 28.349
Average reward for episodes 20.800. Total average reward 28.273
Average reward for episodes 18.200. Total average reward 28.172
Average reward for episodes 27.400. Total average reward 28.165
Average reward for episodes 19.800. Total average reward 28.081
Average reward for episodes 30.000. Total average reward 28.100
Average reward for episodes 16.600. Total average reward 27.985
Average reward for episodes 35.200. Tota

Average reward for episodes 34.800. Total average reward 29.703
Average reward for episodes 41.200. Total average reward 29.818
Average reward for episodes 26.800. Total average reward 29.787
Average reward for episodes 23.800. Total average reward 29.728
Average reward for episodes 34.000. Total average reward 29.770
Average reward for episodes 19.400. Total average reward 29.667
Average reward for episodes 50.200. Total average reward 29.872
Average reward for episodes 29.800. Total average reward 29.871
Average reward for episodes 24.800. Total average reward 29.820
Average reward for episodes 40.600. Total average reward 29.928
Average reward for episodes 21.000. Total average reward 29.839
Average reward for episodes 21.400. Total average reward 29.755
Average reward for episodes 19.600. Total average reward 29.653
Average reward for episodes 29.000. Total average reward 29.647
Average reward for episodes 31.600. Total average reward 29.666
Average reward for episodes 23.600. Tota

Average reward for episodes 26.600. Total average reward 28.733
Average reward for episodes 22.200. Total average reward 28.668
Average reward for episodes 25.200. Total average reward 28.633
Average reward for episodes 44.000. Total average reward 28.787
Average reward for episodes 29.600. Total average reward 28.795
Average reward for episodes 21.400. Total average reward 28.721
Average reward for episodes 23.800. Total average reward 28.672
Average reward for episodes 32.000. Total average reward 28.705
Average reward for episodes 23.800. Total average reward 28.656
Average reward for episodes 21.400. Total average reward 28.584
Average reward for episodes 28.600. Total average reward 28.584
Average reward for episodes 20.000. Total average reward 28.498
Average reward for episodes 21.600. Total average reward 28.429
Average reward for episodes 24.600. Total average reward 28.391
Average reward for episodes 21.200. Total average reward 28.319
Average reward for episodes 52.800. Tota

Average reward for episodes 30.600. Total average reward 26.698
Average reward for episodes 25.400. Total average reward 26.685
Average reward for episodes 17.800. Total average reward 26.596
Average reward for episodes 21.200. Total average reward 26.542
Average reward for episodes 28.800. Total average reward 26.565
Average reward for episodes 19.600. Total average reward 26.495
Average reward for episodes 30.800. Total average reward 26.538
Average reward for episodes 28.400. Total average reward 26.557
Average reward for episodes 28.000. Total average reward 26.571
Average reward for episodes 32.200. Total average reward 26.627
Average reward for episodes 34.200. Total average reward 26.703
Average reward for episodes 25.200. Total average reward 26.688
Average reward for episodes 24.400. Total average reward 26.665
Average reward for episodes 20.400. Total average reward 26.603
Average reward for episodes 34.400. Total average reward 26.681
Average reward for episodes 25.600. Tota

Average reward for episodes 20.800. Total average reward 25.862
Average reward for episodes 23.200. Total average reward 25.835
Average reward for episodes 24.800. Total average reward 25.825
Average reward for episodes 21.200. Total average reward 25.778
Average reward for episodes 20.000. Total average reward 25.721
Average reward for episodes 33.600. Total average reward 25.799
Average reward for episodes 19.400. Total average reward 25.735
Average reward for episodes 26.600. Total average reward 25.744
Average reward for episodes 24.000. Total average reward 25.727
Average reward for episodes 28.200. Total average reward 25.751
Average reward for episodes 28.200. Total average reward 25.776
Average reward for episodes 35.800. Total average reward 25.876
Average reward for episodes 31.400. Total average reward 25.931
Average reward for episodes 18.400. Total average reward 25.856
Average reward for episodes 32.200. Total average reward 25.919
Average reward for episodes 28.400. Tota

Average reward for episodes 36.800. Total average reward 25.943
Average reward for episodes 30.600. Total average reward 25.990
Average reward for episodes 29.400. Total average reward 26.024
Average reward for episodes 23.200. Total average reward 25.996
Average reward for episodes 25.800. Total average reward 25.994
Average reward for episodes 14.600. Total average reward 25.880
Average reward for episodes 26.800. Total average reward 25.889
Average reward for episodes 14.600. Total average reward 25.776
Average reward for episodes 26.200. Total average reward 25.780
Average reward for episodes 18.200. Total average reward 25.705
Average reward for episodes 26.400. Total average reward 25.712
Average reward for episodes 35.800. Total average reward 25.812
Average reward for episodes 42.200. Total average reward 25.976
Average reward for episodes 16.400. Total average reward 25.881
Average reward for episodes 17.200. Total average reward 25.794
Average reward for episodes 19.800. Tota

Average reward for episodes 27.800. Total average reward 25.999
Average reward for episodes 28.000. Total average reward 26.019
Average reward for episodes 28.200. Total average reward 26.041
Average reward for episodes 31.400. Total average reward 26.095
Average reward for episodes 32.000. Total average reward 26.154
Average reward for episodes 28.200. Total average reward 26.174
Average reward for episodes 22.600. Total average reward 26.139
Average reward for episodes 24.600. Total average reward 26.123
Average reward for episodes 34.800. Total average reward 26.210
Average reward for episodes 24.800. Total average reward 26.196
Average reward for episodes 24.400. Total average reward 26.178
Average reward for episodes 21.600. Total average reward 26.132
Average reward for episodes 34.200. Total average reward 26.213
Average reward for episodes 23.800. Total average reward 26.189
Average reward for episodes 22.800. Total average reward 26.155
Average reward for episodes 21.400. Tota

Average reward for episodes 25.000. Total average reward 27.781
Average reward for episodes 22.800. Total average reward 27.731
Average reward for episodes 28.400. Total average reward 27.738
Average reward for episodes 21.000. Total average reward 27.670
Average reward for episodes 21.000. Total average reward 27.604
Average reward for episodes 23.000. Total average reward 27.558
Average reward for episodes 24.400. Total average reward 27.526
Average reward for episodes 44.000. Total average reward 27.691
Average reward for episodes 31.000. Total average reward 27.724
Average reward for episodes 20.400. Total average reward 27.651
Average reward for episodes 37.200. Total average reward 27.746
Average reward for episodes 53.600. Total average reward 28.005
Average reward for episodes 40.800. Total average reward 28.133
Average reward for episodes 46.000. Total average reward 28.311
Average reward for episodes 29.800. Total average reward 28.326
Average reward for episodes 20.000. Tota

Average reward for episodes 19.800. Total average reward 27.823
Average reward for episodes 18.800. Total average reward 27.733
Average reward for episodes 25.400. Total average reward 27.710
Average reward for episodes 42.000. Total average reward 27.852
Average reward for episodes 20.600. Total average reward 27.780
Average reward for episodes 34.600. Total average reward 27.848
Average reward for episodes 29.800. Total average reward 27.868
Average reward for episodes 27.600. Total average reward 27.865
Average reward for episodes 19.800. Total average reward 27.784
Average reward for episodes 21.400. Total average reward 27.720
Average reward for episodes 22.800. Total average reward 27.671
Average reward for episodes 43.800. Total average reward 27.833
Average reward for episodes 24.600. Total average reward 27.800
Average reward for episodes 31.800. Total average reward 27.840
Average reward for episodes 25.400. Total average reward 27.816
Average reward for episodes 37.200. Tota

Average reward for episodes 30.800. Total average reward 28.040
Average reward for episodes 25.200. Total average reward 28.012
Average reward for episodes 31.200. Total average reward 28.044
Average reward for episodes 32.200. Total average reward 28.085
Average reward for episodes 27.000. Total average reward 28.074
Average reward for episodes 26.200. Total average reward 28.056
Average reward for episodes 33.200. Total average reward 28.107
Average reward for episodes 39.400. Total average reward 28.220
Average reward for episodes 24.400. Total average reward 28.182
Average reward for episodes 31.000. Total average reward 28.210
Average reward for episodes 32.000. Total average reward 28.248
Average reward for episodes 24.000. Total average reward 28.205
Average reward for episodes 21.800. Total average reward 28.141
Average reward for episodes 23.600. Total average reward 28.096
Average reward for episodes 35.200. Total average reward 28.167
Average reward for episodes 31.800. Tota

Average reward for episodes 36.400. Total average reward 27.920
Average reward for episodes 22.400. Total average reward 27.865
Average reward for episodes 31.600. Total average reward 27.902
Average reward for episodes 20.600. Total average reward 27.829
Average reward for episodes 23.800. Total average reward 27.789
Average reward for episodes 19.600. Total average reward 27.707
Average reward for episodes 23.200. Total average reward 27.662
Average reward for episodes 42.200. Total average reward 27.807
Average reward for episodes 31.000. Total average reward 27.839
Average reward for episodes 26.000. Total average reward 27.821
Average reward for episodes 28.000. Total average reward 27.823
Average reward for episodes 16.800. Total average reward 27.712
Average reward for episodes 24.800. Total average reward 27.683
Average reward for episodes 28.000. Total average reward 27.686
Average reward for episodes 15.400. Total average reward 27.564
Average reward for episodes 32.200. Tota