-
Notifications
You must be signed in to change notification settings - Fork 0
/
t3_ddpg_core_spinup.py
48 lines (34 loc) · 1.72 KB
/
t3_ddpg_core_spinup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import numpy as np
import tensorflow as tf
def placeholder(dim=None):
return tf.placeholder(dtype=tf.float32, shape=(None, dim) if dim else (None,))
def placeholders(*args):
return [placeholder(dim) for dim in args]
def mlp(x, hidden_sizes=(32,), activation=tf.tanh, output_activation=None):
initializer_xavier = tf.contrib.layers.xavier_initializer()
initializer_he = tf.contrib.layers.variance_scaling_initializer()
for h in hidden_sizes[:-1]:
x = tf.layers.dense(x, units=h, activation=activation, kernel_initializer=initializer_he)
return tf.layers.dense(x, units=hidden_sizes[-1], activation=output_activation,
kernel_initializer=initializer_xavier)
def get_vars(scope):
return [x for x in tf.global_variables() if scope in x.name]
def count_vars(scope):
v = get_vars(scope)
return sum([np.prod(var.shape.as_list()) for var in v])
"""
Actor-Critics
"""
def mlp_actor_critic(x, a, hidden_sizes=(400, 300), activation=tf.nn.relu,
output_activation=tf.tanh, action_space=None):
act_dim = a.shape.as_list()[-1]
act_limit = action_space.high[0]
with tf.variable_scope('pi'):
pi = act_limit * mlp(x, list(hidden_sizes) + [act_dim], activation, output_activation)
with tf.variable_scope('q1'):
q1 = tf.squeeze(mlp(tf.concat([x, a], axis=-1), list(hidden_sizes) + [1], activation, None), axis=1)
with tf.variable_scope('q2'):
q2 = tf.squeeze(mlp(tf.concat([x, a], axis=-1), list(hidden_sizes) + [1], activation, None), axis=1)
with tf.variable_scope('q1', reuse=True):
q1_pi = tf.squeeze(mlp(tf.concat([x, pi], axis=-1), list(hidden_sizes) + [1], activation, None), axis=1)
return pi, q1, q2, q1_pi