Skip to content
This repository has been archived by the owner on Dec 28, 2023. It is now read-only.

Commit

Permalink
Fix A2C
Browse files Browse the repository at this point in the history
  • Loading branch information
Omegastick committed Apr 5, 2019
1 parent e8f7652 commit 80fafb5
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 77 deletions.
113 changes: 57 additions & 56 deletions example/gym_client.cpp
Expand Up @@ -19,15 +19,15 @@ using namespace cpprl;
// Algorithm hyperparameters
const int batch_size = 5;
const float discount_factor = 0.99;
const float entropy_coef = 1e-4;
const float entropy_coef = 1e-3;
const float learning_rate = 1e-3;
const int reward_average_window_size = 100;
const int reward_average_window_size = 10;
const float value_loss_coef = 0.5;

// Environment hyperparameters
const std::string env_name = "LunarLander-v2";
const int num_envs = 8;
const float env_gamma = -1; // Set to -1 to disable
const float env_gamma = discount_factor; // Set to -1 to disable

// Model hyperparameters
const int actions = 4;
Expand Down Expand Up @@ -83,64 +83,64 @@ int main(int argc, char *argv[])

storage.set_first_observation(observation);

std::ifstream weights_file{"/home/px046/prog/pytorch-cpp-rl/build/weights.json"};
auto json = nlohmann::json::parse(weights_file);
for (const auto &parameter : json.items())
{
if (base->named_parameters().contains(parameter.key()))
{
std::vector<int64_t> tensor_size = parameter.value()[0];
std::vector<float> parameter_vec;
if (parameter.key().find("bias") == std::string::npos)
{
std::vector<std::vector<float>> parameter_2d_vec = parameter.value()[1].get<std::vector<std::vector<float>>>();
parameter_vec = flatten_2d_vector<float>(parameter_2d_vec);
}
else
{
parameter_vec = parameter.value()[1].get<std::vector<float>>();
}
NoGradGuard guard;
auto json_weights = torch::from_blob(parameter_vec.data(), tensor_size).contiguous();
base->named_parameters()[parameter.key()].copy_(json_weights);
spdlog::info("Wrote {}", parameter.key());
if (parameter.key().find("bias") == std::string::npos)
{
spdlog::info("Json: {} - Memory: {}", parameter.value()[1][0][0], base->named_parameters()[parameter.key()][0][0].item().toFloat());
}
}
else if (policy->named_modules()["output"]->named_parameters().contains(parameter.key()))
{
std::vector<int64_t> tensor_size = parameter.value()[0];
std::vector<float> parameter_vec;
if (parameter.key().find("bias") == std::string::npos)
{
std::vector<std::vector<float>> parameter_2d_vec = parameter.value()[1].get<std::vector<std::vector<float>>>();
parameter_vec = flatten_2d_vector<float>(parameter_2d_vec);
}
else
{
parameter_vec = parameter.value()[1].get<std::vector<float>>();
}
NoGradGuard guard;
auto json_weights = torch::from_blob(parameter_vec.data(), tensor_size).contiguous();
policy->named_modules()["output"]->named_parameters()[parameter.key()].copy_(json_weights);
spdlog::info("Wrote {}", parameter.key());
if (parameter.key().find("bias") == std::string::npos)
{
spdlog::info("Json: {} - Memory: {}",
parameter.value()[1][0][0],
policy->named_modules()["output"]->named_parameters()[parameter.key()][0][0].item().toFloat());
}
}
}
// std::ifstream weights_file{"/home/px046/prog/pytorch-cpp-rl/build/weights.json"};
// auto json = nlohmann::json::parse(weights_file);
// for (const auto &parameter : json.items())
// {
// if (base->named_parameters().contains(parameter.key()))
// {
// std::vector<int64_t> tensor_size = parameter.value()[0];
// std::vector<float> parameter_vec;
// if (parameter.key().find("bias") == std::string::npos)
// {
// std::vector<std::vector<float>> parameter_2d_vec = parameter.value()[1].get<std::vector<std::vector<float>>>();
// parameter_vec = flatten_2d_vector<float>(parameter_2d_vec);
// }
// else
// {
// parameter_vec = parameter.value()[1].get<std::vector<float>>();
// }
// NoGradGuard guard;
// auto json_weights = torch::from_blob(parameter_vec.data(), tensor_size).contiguous();
// base->named_parameters()[parameter.key()].copy_(json_weights);
// spdlog::info("Wrote {}", parameter.key());
// if (parameter.key().find("bias") == std::string::npos)
// {
// spdlog::info("Json: {} - Memory: {}", parameter.value()[1][0][0], base->named_parameters()[parameter.key()][0][0].item().toFloat());
// }
// }
// else if (policy->named_modules()["output"]->named_parameters().contains(parameter.key()))
// {
// std::vector<int64_t> tensor_size = parameter.value()[0];
// std::vector<float> parameter_vec;
// if (parameter.key().find("bias") == std::string::npos)
// {
// std::vector<std::vector<float>> parameter_2d_vec = parameter.value()[1].get<std::vector<std::vector<float>>>();
// parameter_vec = flatten_2d_vector<float>(parameter_2d_vec);
// }
// else
// {
// parameter_vec = parameter.value()[1].get<std::vector<float>>();
// }
// NoGradGuard guard;
// auto json_weights = torch::from_blob(parameter_vec.data(), tensor_size).contiguous();
// policy->named_modules()["output"]->named_parameters()[parameter.key()].copy_(json_weights);
// spdlog::info("Wrote {}", parameter.key());
// if (parameter.key().find("bias") == std::string::npos)
// {
// spdlog::info("Json: {} - Memory: {}",
// parameter.value()[1][0][0],
// policy->named_modules()["output"]->named_parameters()[parameter.key()][0][0].item().toFloat());
// }
// }
// }

std::vector<float> running_rewards(num_envs);
int episode_count = 0;
std::vector<float> reward_history(reward_average_window_size);

torch::manual_seed(0);
for (int update = 0; update < 1; ++update)
for (int update = 0; update < 100000; ++update)
{
for (int step = 0; step < batch_size; ++step)
{
Expand All @@ -167,9 +167,10 @@ int main(int argc, char *argv[])
observation_vec = flatten_2d_vector<float>(step_result->observation);
observation = torch::from_blob(observation_vec.data(), {num_envs, observation_size});
auto rewards = flatten_2d_vector<float>(step_result->reward);
auto real_rewards = flatten_2d_vector<float>(step_result->real_reward);
for (int i = 0; i < num_envs; ++i)
{
running_rewards[i] += rewards[i];
running_rewards[i] += real_rewards[i];
if (step_result->done[i][0])
{
reward_history[episode_count % reward_average_window_size] = running_rewards[i];
Expand Down
3 changes: 2 additions & 1 deletion example/requests.h
Expand Up @@ -54,6 +54,7 @@ struct StepResponse
std::vector<std::vector<float>> observation;
std::vector<std::vector<float>> reward;
std::vector<std::vector<bool>> done;
MSGPACK_DEFINE_MAP(observation, reward, done);
std::vector<std::vector<float>> real_reward;
MSGPACK_DEFINE_MAP(observation, reward, done, real_reward);
};
}
17 changes: 14 additions & 3 deletions gym_server/envs.py
Expand Up @@ -40,16 +40,27 @@ def _obfilt(self, obs):
obs = np.clip((obs - self.ob_rms.mean)
/ np.sqrt(self.ob_rms.var + self.epsilon),
-self.clipob, self.clipob)
return obs
else:
return obs
return obs

def train(self):
self.training = True

def eval(self):
self.training = False

def step_wait(self):
obs, rews, news, infos = self.venv.step_wait()
infos = {'reward': np.expand_dims(rews, -1)}
self.ret = self.ret * self.gamma + rews
obs = self._obfilt(obs)
if self.ret_rms:
self.ret_rms.update(self.ret)
rews = np.clip(rews / np.sqrt(self.ret_rms.var + self.epsilon),
-self.cliprew,
self.cliprew)
self.ret[news] = 0.
return obs, rews, news, infos


def make_env(env_id, seed, rank):
def _thunk():
Expand Down
7 changes: 5 additions & 2 deletions gym_server/messages.py
Expand Up @@ -53,15 +53,18 @@ class StepMessage(Message):
def __init__(self,
observation: np.ndarray,
reward: np.ndarray,
done: np.ndarray):
done: np.ndarray,
real_reward: np.ndarray):
self.observation = observation
self.reward = reward
self.done = done
self.real_reward = real_reward

def to_msg(self) -> bytes:
request = {
"observation": self.observation.tolist(),
"reward": self.reward.tolist(),
"done": self.done.tolist()
"done": self.done.tolist(),
"real_reward": self.real_reward.tolist()
}
return msgpack.packb(request)
26 changes: 14 additions & 12 deletions gym_server/server.py
Expand Up @@ -38,26 +38,28 @@ def serve(self):
def _serve(self):
while True:
request = self.zmq_client.receive()
method = request["method"]
param = request["param"]
method = request['method']
param = request['param']

if method == "make":
self.__make(param["env_name"], param["num_envs"],
param["gamma"])
if method == 'make':
self.__make(param['env_name'], param['num_envs'],
param['gamma'])
self.zmq_client.send(MakeMessage())

elif method == "reset":
elif method == 'reset':
observation = self.__reset()
self.zmq_client.send(ResetMessage(observation))

elif method == "step":
if "render" in param:
elif method == 'step':
if 'render' in param:
result = self.__step(
np.array(param["actions"]), param["render"])
np.array(param['actions']), param['render'])
else:
result = self.__step(np.array(param["actions"]))
self.zmq_client.send(StepMessage(result[0], result[1],
result[2]))
result = self.__step(np.array(param['actions']))
self.zmq_client.send(StepMessage(result[0],
result[1],
result[2],
result[3]['reward']))

def make(self, env_name, num_envs, gamma):
"""
Expand Down
3 changes: 0 additions & 3 deletions gym_server/zmq_client.py
@@ -1,12 +1,9 @@
"""
Pytorch-cpp-rl OpenAI gym server ZMQ client.
"""
import logging
import zmq
import msgpack

from gym_server.messages import Message


class ZmqClient:
"""
Expand Down
File renamed without changes.

0 comments on commit 80fafb5

Please sign in to comment.