Skip to content

Commit

Permalink
GaussianMLP Policy: NN is now pluggable into the Policy
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel authored and Daniel committed May 23, 2016
1 parent 3d66038 commit 9d623ee
Showing 1 changed file with 105 additions and 49 deletions.
154 changes: 105 additions & 49 deletions rllab/policies/gaussian_mlp_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,60 @@
import theano.tensor as TT


class GaussianMLPPolicy(StochasticPolicy, LasagnePowered, Serializable):
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
learn_std=True,
init_std=1.0,
adaptive_std=False,
std_share_network=False,
std_hidden_sizes=(32, 32),
min_std=1e-6,
std_hidden_nonlinearity=NL.tanh,
hidden_nonlinearity=NL.tanh,
output_nonlinearity=None,
network_creator=MLP
):
class GaussianPolicyNeuralNet(object):
def __init__(self):
self.input = None
self.output_layers = None

def __call__(self, observations):
raise NotImplementedError


class GaussianPolicyMLP(GaussianPolicyNeuralNet):
def __init__(self,
env_spec,
min_std=1e-6,
*args,
**kwargs):
"""
:param env_spec:
:param ...: Additionnal arguments are passed to create_network function
:return:
"""

# create network
mean_network, l_log_std = self.create_network(env_spec, *args, **kwargs)

l_mean = mean_network.output_layer
obs_var = mean_network.input_var

self._l_mean = l_mean
self._l_log_std = l_log_std
self.output_layers = [l_mean, l_log_std]
self.min_std = min_std
mean_var, log_std_var = L.get_output([l_mean, l_log_std])
if self.min_std is not None:
log_std_var = TT.maximum(log_std_var, np.log(min_std))

self._mean_var, self._log_std_var = mean_var, log_std_var
self.observation_space = env_spec.observation_space

self._f_dist = ext.compile_function(
inputs=[obs_var],
outputs=[mean_var, log_std_var],
)

def create_network(self,
env_spec,
hidden_sizes=(32, 32),
learn_std=True,
init_std=1.0,
adaptive_std=False,
std_share_network=False,
std_hidden_sizes=(32, 32),
std_hidden_nonlinearity=NL.tanh,
hidden_nonlinearity=NL.tanh,
output_nonlinearity=None):
"""
:param env_spec:
:param hidden_sizes: list of sizes for the fully-connected hidden layers
Expand All @@ -41,30 +79,21 @@ def __init__(
:param adaptive_std:
:param std_share_network:
:param std_hidden_sizes: list of sizes for the fully-connected layers for std
:param min_std: whether to make sure that the std is at least some threshold value, to avoid numerical issues
:param std_hidden_nonlinearity:
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param output_nonlinearity: nonlinearity for the output layer
:return:
:return (mean_network, layer_log_std):
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Box)

obs_dim = env_spec.observation_space.flat_dim
action_dim = env_spec.action_space.flat_dim

# create network
mean_network = network_creator(
mean_network = MLP(
input_shape=(obs_dim,),
output_dim=action_dim,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=output_nonlinearity,
)
self._mean_network = mean_network

l_mean = mean_network.output_layer
obs_var = mean_network.input_var

if adaptive_std:
std_network = MLP(
Expand All @@ -75,55 +104,78 @@ def __init__(
hidden_nonlinearity=std_hidden_nonlinearity,
output_nonlinearity=None,
)
l_log_std = std_network.output_layer
layer_log_std = std_network.output_layer
else:
l_log_std = ParamLayer(
layer_log_std = ParamLayer(
mean_network.input_layer,
num_units=action_dim,
param=lasagne.init.Constant(np.log(init_std)),
name="output_log_std",
trainable=learn_std,
)
return mean_network, layer_log_std

self.min_std = min_std

mean_var, log_std_var = L.get_output([l_mean, l_log_std])
def __call__(self, observations):
"""
:param observations: List of observations
:return (means, variances):
"""
return self._f_dist(self.format_network_input(observations))

def call_sym(self, observations):
"""
:param observations: Neural net input - variable
:return (mean_var, log_std_var):
"""
mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], observations)
if self.min_std is not None:
log_std_var = TT.maximum(log_std_var, np.log(min_std))
log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
return mean_var, log_std_var

self._mean_var, self._log_std_var = mean_var, log_std_var
def format_network_input(self, observations):
"""
:param observations: List of observations
:return observations: formatted to match NN input
"""
return self.observation_space.flatten_n(observations)

self._l_mean = l_mean
self._l_log_std = l_log_std

class GaussianMLPPolicy(StochasticPolicy, LasagnePowered, Serializable):
def __init__(
self,
env_spec,
network_creator=GaussianPolicyMLP,
*args,
**kwargs
):
"""
:param env_spec:
:param min_std: whether to make sure that the std is at least some threshold value, to avoid numerical issues
:param network_creator: callable variable that should return (mean_layer, log_std_layer)
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Box)

self._neural_net = network_creator(env_spec, *args, **kwargs)
self._dist = DiagonalGaussian()

LasagnePowered.__init__(self, [l_mean, l_log_std])
LasagnePowered.__init__(self, self._neural_net.output_layers)
super(GaussianMLPPolicy, self).__init__(env_spec)

self._f_dist = ext.compile_function(
inputs=[obs_var],
outputs=[mean_var, log_std_var],
)

def dist_info_sym(self, obs_var, state_info_vars=None):
mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], obs_var)
if self.min_std is not None:
log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
mean_var, log_std_var = self._neural_net.call_sym(obs_var)
return dict(mean=mean_var, log_std=log_std_var)

@overrides
def get_action(self, observation):
flat_obs = self.observation_space.flatten(observation)
mean, log_std = [x[0] for x in self._f_dist([flat_obs])]
mean, log_std = [x[0] for x in self._neural_net([observation])]
rnd = np.random.normal(size=mean.shape)
action = rnd * np.exp(log_std) + mean
return action, dict(mean=mean, log_std=log_std)

def get_actions(self, observations):
flat_obs = self.observation_space.flatten_n(observations)
means, log_stds = self._f_dist(flat_obs)
means, log_stds = self._neural_net(observations)
rnd = np.random.normal(size=means.shape)
actions = rnd * np.exp(log_stds) + means
return actions, dict(mean=means, log_std=log_stds)
Expand Down Expand Up @@ -151,3 +203,7 @@ def log_diagnostics(self, paths):
@property
def distribution(self):
return self._dist

@property
def neural_net(self):
return self._neural_net

0 comments on commit 9d623ee

Please sign in to comment.