Purpose of this notebook is to play around with Tensorflow.

In [1]:
#  indexing the output layer of a network into Gaussian distributions

In [2]:
import numpy as np
import tensorflow as tf

In [3]:
import pkg_resources
pkg_resources.get_distribution("tensorflow-gpu").version

'1.3.0'

In [4]:
num_actions = 2
output_dim = num_actions * 2  # one for mean, one for stdev
observation_dim = 3

In [5]:
#  first we simulate learning
#  we create a neural network using a two dimensions observation

observation = tf.placeholder(tf.float32, [None, observation_dim], 'observation')
                                              
from energy_py.agents.tensorflow_machinery import fc_layer

with tf.variable_scope('input_layer'):
    input_layer = fc_layer(observation, [observation_dim, 50], [50], tf.nn.relu)

with tf.variable_scope('hidden_layer_1'):
    hidden_layer_1 = fc_layer(input_layer, [50, 100], [100], tf.nn.relu)

with tf.variable_scope('hidden_layer_2'):
    hidden_layer_2 = fc_layer(hidden_layer_1, [100, 100], [100], tf.nn.relu)

with tf.variable_scope('output_layer'):
    output_layer = fc_layer(hidden_layer_2, [100, output_dim], [output_dim])

In [6]:
#  indexes for the output layer
mean_idx = tf.range(start=0, limit=output_dim, delta=2)
stdev_idx = tf.range(start=1, limit=output_dim, delta=2)

In [7]:
#  gather ops
means = tf.gather(params=output_layer, indices=mean_idx, axis=1)
stdevs = tf.gather(params=output_layer, indices=stdev_idx, axis=1)
stdevs = tf.clip_by_value(stdevs, 1e-10, tf.reduce_max(stdevs))

In [8]:
#  create normal distributions
norm_dist = tf.contrib.distributions.Normal(loc=means, scale=stdevs)
action = norm_dist.sample(1)

In [9]:
#  now for the part of the graph associated with learning
taken_action = tf.placeholder(tf.float32, [None, num_actions], 'taken_action')
discounted_return = tf.placeholder(tf.float32, [None, 1], 'discounted_returns')

probs = norm_dist.prob(taken_action)
probs_clipped = tf.clip_by_value(probs, 1e-10, 1)
log_probs = tf.log(probs_clipped)

In [10]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    scaled_observation = np.array([[0, 0.5, 1],[1, 0.5, 1], [1, 0.5, 1]]).reshape(-1, observation_dim)
    taken_action_ = np.array([[1e-6,0], [0.5, 0.5], [1,1]]).reshape(-1, num_actions)
    discounted_return_ = np.array([1,2,3]).reshape(-1, 1)
    outputs = sess.run([means, stdevs, taken_action, probs, probs_clipped], {observation : scaled_observation, 
                                                          taken_action : taken_action_,
                                                          discounted_return : discounted_return_})

In [11]:
for out in outputs:
    print(out)

[[-21.9981308   24.8816967 ]
 [-24.03048325  36.97780228]
 [-24.03048325  36.97780228]]
[[  1.32588539e+01   1.00000001e-10]
 [  5.08512421e+01   1.00000001e-10]
 [  5.08512421e+01   1.00000001e-10]]
[[  9.99999997e-07   0.00000000e+00]
 [  5.00000000e-01   5.00000000e-01]
 [  1.00000000e+00   1.00000000e+00]]
[[ 0.00759735  0.        ]
 [ 0.00698356  0.        ]
 [ 0.00695018  0.        ]]
[[  7.59734632e-03   1.00000001e-10]
 [  6.98355865e-03   1.00000001e-10]
 [  6.95017865e-03   1.00000001e-10]]
