In [1]:
from datetime import datetime
import os
import numpy as np
from CMap2D import flatten_contours, render_contours_in_lidar, CMap2D, CSimAgent, fast_2f_norm

from stable_baselines import PPO2
from stable_baselines.gail import ExpertDataset

from navrep.tools.custom_policy import CustomPolicy, ARCH, _C
from navrep.envs.e2eenv import E2ENavRepEnvPretrain
from navrep.tools.expert_policy import FastmarchORCAPolicy, alt_generate_expert_traj
from crowd_sim.envs.utils.action import ActionXYRot

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


Ros was not found, disabled.


In [2]:
class E2ENavRepEnvPretrainNoRot(E2ENavRepEnvPretrain):
    def __init__(self, *args, **kwargs):
        super(E2ENavRepEnvPretrainNoRot, self).__init__(*args, **kwargs)
    def reset(self):
        self.encoder.reset()
        
        self.steps_since_reset = 0
        self.episode_reward = 0
        _, _ = self.soadrl_sim.reset(self.scenario, compute_local_map=False)
        random_rot = ActionXYRot(0, 0, 0.*(np.random.random()-0.5))
        self.soadrl_sim.step(random_rot, compute_local_map=False, border=self.border)
        if not self.LEGACY_MODE:
            self._add_border_obstacle()
        contours = self.soadrl_sim.obstacle_vertices
        self.flat_contours = flatten_contours(contours)
        self.distances_travelled_in_base_frame = np.zeros((len(self.soadrl_sim.humans), 3))
        obs = self._convert_obs()
        if self.LEGACY_MODE:
            state, local_map, reward, done, info = self.soadrl_sim.step(
                ActionXYRot(0, 0, 0), compute_local_map=True, border=self.border)
            obs = (state, local_map)
        
        h = self.encoder._encode_obs(obs, np.array([0,0,0]))
        N = h.shape[0]
        h = h.reshape((N,))
        return h

In [9]:
env_no_rot = E2ENavRepEnvPretrainNoRot(silent=True, adaptive=False, collect_statistics=False)
env_no_rot.soadrl_sim.human_num = 2
env_no_rot.soadrl_sim.num_walls = 1
env_no_rot.soadrl_sim.num_circles = 0

env_rot = E2ENavRepEnvPretrain(silent=True, adaptive=False, collect_statistics=False)
env_rot.soadrl_sim.human_num = 2
env_rot.soadrl_sim.num_walls = 1
env_rot.soadrl_sim.num_circles = 0

env_difficult = E2ENavRepEnvPretrain(silent=True, adaptive=True, collect_statistics=False)

In [10]:
#alt_generate_expert_traj(env_no_rot,500,policy=FastmarchORCAPolicy(suicide_if_stuck=False), save_path = 'fmORCA_humans_no_rot', render=False)
#alt_generate_expert_traj(env_rot,500,policy=FastmarchORCAPolicy(suicide_if_stuck=False), save_path = 'fmORCA_humans_rot', render=False)
alt_generate_expert_traj(env_difficult,500,policy=FastmarchORCAPolicy(suicide_if_stuck=False), save_path = 'fmORCA_humans_diff', render=False)

(4101,)
actions (52791, 2)
obs (52791, 4101)
rewards (52791,)
episode_returns (500,)
episode_starts (52791,)


{'actions': array([[-0.42965959, -0.06140743],
        [-0.66069858,  0.36724222],
        [-0.93753837,  0.25530783],
        ...,
        [-0.07077036, -0.20369188],
        [-0.06519951, -0.18309845],
        [-0.06127269, -0.192726  ]]),
 'obs': array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ..., -0.42965959,
         -0.06140743,  0.        ],
        [ 0.        ,  0.        ,  0.        , ..., -0.66069858,
          0.36724222,  0.        ],
        ...,
        [ 0.        ,  0.        ,  0.        , ..., -0.07309605,
         -0.20998105,  0.        ],
        [ 0.        ,  0.        ,  0.        , ..., -0.07077036,
         -0.20369188,  0.        ],
        [ 0.        ,  0.        ,  0.        , ..., -0.06519951,
         -0.18309845,  0.        ]]),
 'rewards': array([8.29162162e-03, 8.74589958e-03, 1.46165881e-02, ...,
        4.31177843e-03, 3.88535975e-03, 1.00000000e+

In [5]:
#model_no_rot = PPO2(CustomPolicy, env_no_rot, verbose=0)
#dataset = ExpertDataset(expert_path='fmORCA_humans_no_rot.npz',traj_limitation=1, batch_size=64)
#model_no_rot.pretrain(dataset, n_epochs=500)

#model_rot = PPO2(CustomPolicy, env_rot, verbose=0)
#dataset = ExpertDataset(expert_path='fmORCA_humans_rot.npz',traj_limitation=1, batch_size=64)
#model_rot.pretrain(dataset, n_epochs=500)

model_diff = PPO2(CustomPolicy, env_difficult, verbose=0)
dataset = ExpertDataset(expert_path='fmORCA_humans_diff.npz',traj_limitation=1, batch_size=64)
model_diff.pretrain(dataset, n_epochs=500)

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.
actions (48614, 2)
obs (48614, 4101)
rewards (48614,)
episode_returns (500,)
episode_starts (48614,)
Total trajectories: 1
Total transitions: 184
Average returns: 92.01811809418167
Std for returns: 21.661328875075423
actions (48793, 2)
obs (48793, 4101)
rewards (48793,)
episode_returns (500,)
episode_starts (48793,)
Total trajectories: 1
Total transitions: 185
Average returns: 91.44605342064315
Std for returns: 23.696938909705764


<stable_baselines.ppo2.ppo2.PPO2 at 0x7f97ee3c6ac8>

In [8]:
model= model_no_rot
env = model.get_env()
obs = env.reset()
reward_sum = 0.0
for _ in range(1000):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, _ = env.step(action)
    reward_sum += reward
    env.render(save_to_file=True)
    if done:
        print(reward_sum)
        reward_sum = 0.0
        obs = env.reset()

[97.86052]
[-25.635164]
[101.51939]
[-25.212408]
[59.592873]
[-24.416807]
[101.190186]
[101.75147]
[90.489555]
[-33.705242]
[-24.141401]
[-23.456432]
[-27.607397]
