In [1]:
from datetime import datetime
import os
import numpy as np
from CMap2D import flatten_contours, render_contours_in_lidar, CMap2D, CSimAgent, fast_2f_norm

from stable_baselines import PPO2
from stable_baselines.gail import ExpertDataset

from navrep.tools.custom_policy import CustomPolicy, ARCH, _C
from navrep.envs.e2eenv import E2ENavRepEnvPretrain
from navrep.tools.expert_policy import FastmarchORCAPolicy, alt_generate_expert_traj
from crowd_sim.envs.utils.action import ActionXYRot

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


Ros was not found, disabled.


In [2]:
class E2ENavRepEnvPretrainNoRot(E2ENavRepEnvPretrain):
    def __init__(self, *args, **kwargs):
        super(E2ENavRepEnvPretrainNoRot, self).__init__(*args, **kwargs)
    def reset(self):
        self.encoder.reset()
        
        self.steps_since_reset = 0
        self.episode_reward = 0
        _, _ = self.soadrl_sim.reset(self.scenario, compute_local_map=False)
        random_rot = ActionXYRot(0, 0, 0.*(np.random.random()-0.5))
        self.soadrl_sim.step(random_rot, compute_local_map=False, border=self.border)
        if not self.LEGACY_MODE:
            self._add_border_obstacle()
        contours = self.soadrl_sim.obstacle_vertices
        self.flat_contours = flatten_contours(contours)
        self.distances_travelled_in_base_frame = np.zeros((len(self.soadrl_sim.humans), 3))
        obs = self._convert_obs()
        if self.LEGACY_MODE:
            state, local_map, reward, done, info = self.soadrl_sim.step(
                ActionXYRot(0, 0, 0), compute_local_map=True, border=self.border)
            obs = (state, local_map)
        
        h = self.encoder._encode_obs(obs, np.array([0,0,0]))
        N = h.shape[0]
        h = h.reshape((N,))
        return h

In [9]:
env_no_rot = E2ENavRepEnvPretrainNoRot(silent=True, adaptive=False, collect_statistics=False)
env_no_rot.soadrl_sim.human_num = 2
env_no_rot.soadrl_sim.num_walls = 1
env_no_rot.soadrl_sim.num_circles = 0

env_rot = E2ENavRepEnvPretrain(silent=True, adaptive=False, collect_statistics=False)
env_rot.soadrl_sim.human_num = 2
env_rot.soadrl_sim.num_walls = 1
env_rot.soadrl_sim.num_circles = 0

env_difficult = E2ENavRepEnvPretrain(silent=True, adaptive=True, collect_statistics=False)

In [10]:
#alt_generate_expert_traj(env_no_rot,500,policy=FastmarchORCAPolicy(suicide_if_stuck=False), save_path = 'fmORCA_humans_no_rot', render=False)
#alt_generate_expert_traj(env_rot,500,policy=FastmarchORCAPolicy(suicide_if_stuck=False), save_path = 'fmORCA_humans_rot', render=False)
alt_generate_expert_traj(env_difficult,500,policy=FastmarchORCAPolicy(suicide_if_stuck=False), save_path = 'fmORCA_humans_diff', render=False)

(4101,)
actions (52791, 2)
obs (52791, 4101)
rewards (52791,)
episode_returns (500,)
episode_starts (52791,)


{'actions': array([[-0.42965959, -0.06140743],
        [-0.66069858,  0.36724222],
        [-0.93753837,  0.25530783],
        ...,
        [-0.07077036, -0.20369188],
        [-0.06519951, -0.18309845],
        [-0.06127269, -0.192726  ]]),
 'obs': array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ..., -0.42965959,
         -0.06140743,  0.        ],
        [ 0.        ,  0.        ,  0.        , ..., -0.66069858,
          0.36724222,  0.        ],
        ...,
        [ 0.        ,  0.        ,  0.        , ..., -0.07309605,
         -0.20998105,  0.        ],
        [ 0.        ,  0.        ,  0.        , ..., -0.07077036,
         -0.20369188,  0.        ],
        [ 0.        ,  0.        ,  0.        , ..., -0.06519951,
         -0.18309845,  0.        ]]),
 'rewards': array([8.29162162e-03, 8.74589958e-03, 1.46165881e-02, ...,
        4.31177843e-03, 3.88535975e-03, 1.00000000e+

In [11]:
#model_no_rot = PPO2(CustomPolicy, env_no_rot, verbose=0)
#dataset = ExpertDataset(expert_path='fmORCA_humans_no_rot.npz',traj_limitation=1, batch_size=64)
#model_no_rot.pretrain(dataset, n_epochs=500)

#model_rot = PPO2(CustomPolicy, env_rot, verbose=0)
#dataset = ExpertDataset(expert_path='fmORCA_humans_rot.npz',traj_limitation=1, batch_size=64)
#model_rot.pretrain(dataset, n_epochs=500)

model_diff = PPO2(CustomPolicy, env_difficult, verbose=0)
dataset = ExpertDataset(expert_path='fmORCA_humans_diff.npz',traj_limitation=1, batch_size=64)
model_diff.pretrain(dataset, n_epochs=500)

actions (52791, 2)
obs (52791, 4101)
rewards (52791,)
episode_returns (500,)
episode_starts (52791,)
Total trajectories: 1
Total transitions: 44
Average returns: 36.716863468878664
Std for returns: 69.70170255056497


<stable_baselines.ppo2.ppo2.PPO2 at 0x7f96a67ed080>

In [8]:
model= model_long
env = model.get_env()
obs = env.reset()
reward_sum = 0.0
for _ in range(1000):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, _ = env.step(action)
    reward_sum += reward
    env.render(save_to_file=False)
    if done:
        print(reward_sum)
        reward_sum = 0.0
        obs = env.reset()

[-26.700445]
[-25.159101]
[-26.866325]
[-25.072529]
[-25.075518]
[-25.717968]
[-25.247717]
[-25.166935]
[-25.056839]
[-25.012012]
[-25.814096]
[-27.810425]
[-25.055595]
[-31.854746]
[-25.166918]
[-32.203747]
[-25.157665]
[-25.023987]
[-25.76916]
[-25.050709]
[-25.078642]
[-25.065264]
[-25.035376]
[-25.0639]
[-30.182325]
[-54.22718]
[-25.058443]


## Test very long pretrain

In [7]:
env_learn = E2ENavRepEnvPretrain(silent=True, adaptive=True, collect_statistics=False)
env_pretrain = E2ENavRepEnvPretrain(silent=True, adaptive=True, collect_statistics=False)
model_long = PPO2(CustomPolicy, env_learn, verbose=0)
for i in range(100):
    alt_generate_expert_traj(env_pretrain,100,policy=FastmarchORCAPolicy(suicide_if_stuck=False), save_path = 'fmORCA_tmp', render=False)
    dataset = ExpertDataset(expert_path='fmORCA_tmp.npz',traj_limitation=1, batch_size=64)
    model_long.pretrain(dataset, n_epochs=50)

(4101,)
actions (10230, 2)
obs (10230, 4101)
rewards (10230,)
episode_returns (100,)
episode_starts (10230,)
actions (10230, 2)
obs (10230, 4101)
rewards (10230,)
episode_returns (100,)
episode_starts (10230,)
Total trajectories: 1
Total transitions: 224
Average returns: 25.535129780296806
Std for returns: 75.17856231228428
(4101,)
actions (10193, 2)
obs (10193, 4101)
rewards (10193,)
episode_returns (100,)
episode_starts (10193,)
actions (10193, 2)
obs (10193, 4101)
rewards (10193,)
episode_returns (100,)
episode_starts (10193,)
Total trajectories: 1
Total transitions: 197
Average returns: 30.14070158287741
Std for returns: 68.3747730980344
(4101,)
actions (10754, 2)
obs (10754, 4101)
rewards (10754,)
episode_returns (100,)
episode_starts (10754,)
actions (10754, 2)
obs (10754, 4101)
rewards (10754,)
episode_returns (100,)
episode_starts (10754,)
Total trajectories: 1
Total transitions: 152
Average returns: 18.750542893171517
Std for returns: 79.62851162147648
(4101,)
actions (10493, 

actions (10175, 2)
obs (10175, 4101)
rewards (10175,)
episode_returns (100,)
episode_starts (10175,)
Total trajectories: 1
Total transitions: 216
Average returns: 30.736888013938753
Std for returns: 73.13734577884635
(4101,)
actions (10330, 2)
obs (10330, 4101)
rewards (10330,)
episode_returns (100,)
episode_starts (10330,)
actions (10330, 2)
obs (10330, 4101)
rewards (10330,)
episode_returns (100,)
episode_starts (10330,)
Total trajectories: 1
Total transitions: 224
Average returns: 25.976801296230768
Std for returns: 65.73547628022942
(4101,)
actions (10642, 2)
obs (10642, 4101)
rewards (10642,)
episode_returns (100,)
episode_starts (10642,)
actions (10642, 2)
obs (10642, 4101)
rewards (10642,)
episode_returns (100,)
episode_starts (10642,)
Total trajectories: 1
Total transitions: 289
Average returns: 23.061634358964707
Std for returns: 78.02797355593637
(4101,)
actions (11034, 2)
obs (11034, 4101)
rewards (11034,)
episode_returns (100,)
episode_starts (11034,)
actions (11034, 2)
obs

Total trajectories: 1
Total transitions: 127
Average returns: 28.400553785014136
Std for returns: 67.52739943819039
(4101,)
actions (10378, 2)
obs (10378, 4101)
rewards (10378,)
episode_returns (100,)
episode_starts (10378,)
actions (10378, 2)
obs (10378, 4101)
rewards (10378,)
episode_returns (100,)
episode_starts (10378,)
Total trajectories: 1
Total transitions: 132
Average returns: 25.137817944204777
Std for returns: 69.11800775682174
(4101,)
actions (11092, 2)
obs (11092, 4101)
rewards (11092,)
episode_returns (100,)
episode_starts (11092,)
actions (11092, 2)
obs (11092, 4101)
rewards (11092,)
episode_returns (100,)
episode_starts (11092,)
Total trajectories: 1
Total transitions: 239
Average returns: 27.532997147579266
Std for returns: 77.83467116933484
(4101,)
actions (10803, 2)
obs (10803, 4101)
rewards (10803,)
episode_returns (100,)
episode_starts (10803,)
actions (10803, 2)
obs (10803, 4101)
rewards (10803,)
episode_returns (100,)
episode_starts (10803,)
Total trajectories: 1


Std for returns: 69.98797922263044
(4101,)
actions (10474, 2)
obs (10474, 4101)
rewards (10474,)
episode_returns (100,)
episode_starts (10474,)
actions (10474, 2)
obs (10474, 4101)
rewards (10474,)
episode_returns (100,)
episode_starts (10474,)
Total trajectories: 1
Total transitions: 260
Average returns: 30.077371436174857
Std for returns: 69.66888125372562
(4101,)
actions (10697, 2)
obs (10697, 4101)
rewards (10697,)
episode_returns (100,)
episode_starts (10697,)
actions (10697, 2)
obs (10697, 4101)
rewards (10697,)
episode_returns (100,)
episode_starts (10697,)
Total trajectories: 1
Total transitions: 202
Average returns: 23.952077671067755
Std for returns: 74.07223580020381
(4101,)
actions (10845, 2)
obs (10845, 4101)
rewards (10845,)
episode_returns (100,)
episode_starts (10845,)
actions (10845, 2)
obs (10845, 4101)
rewards (10845,)
episode_returns (100,)
episode_starts (10845,)
Total trajectories: 1
Total transitions: 226
Average returns: 29.91860540750038
Std for returns: 71.353