In [14]:
from datetime import datetime
import os
import numpy as np
from CMap2D import flatten_contours, render_contours_in_lidar, CMap2D, CSimAgent, fast_2f_norm

from stable_baselines import PPO2
from stable_baselines.gail import ExpertDataset

from navrep.tools.custom_policy import CustomPolicy, ARCH, _C
from navrep.envs.e2eenv import E2ENavRepEnvPretrain
from navrep.tools.expert_policy import FastmarchORCAPolicy, alt_generate_expert_traj
from crowd_sim.envs.utils.action import ActionXYRot

In [15]:
class E2ENavRepEnvPretrainNoRot(E2ENavRepEnvPretrain):
    def __init__(self, *args, **kwargs):
        super(E2ENavRepEnvPretrainNoRot, self).__init__(*args, **kwargs)
    def reset(self):
        self.encoder.reset()
        
        self.steps_since_reset = 0
        self.episode_reward = 0
        _, _ = self.soadrl_sim.reset(self.scenario, compute_local_map=False)
        random_rot = ActionXYRot(0, 0, 0.*(np.random.random()-0.5))
        self.soadrl_sim.step(random_rot, compute_local_map=False, border=self.border)
        if not self.LEGACY_MODE:
            self._add_border_obstacle()
        contours = self.soadrl_sim.obstacle_vertices
        self.flat_contours = flatten_contours(contours)
        self.distances_travelled_in_base_frame = np.zeros((len(self.soadrl_sim.humans), 3))
        obs = self._convert_obs()
        if self.LEGACY_MODE:
            state, local_map, reward, done, info = self.soadrl_sim.step(
                ActionXYRot(0, 0, 0), compute_local_map=True, border=self.border)
            obs = (state, local_map)
        
        h = self.encoder._encode_obs(obs, np.array([0,0,0]))
        N = h.shape[0]
        h = h.reshape((N,))
        return h

In [16]:
env_no_rot = E2ENavRepEnvPretrainNoRot(silent=True, adaptive=False, collect_statistics=False)
env_no_rot.soadrl_sim.human_num = 2
env_no_rot.soadrl_sim.num_walls = 1
env_no_rot.soadrl_sim.num_circles = 0

env_rot = E2ENavRepEnvPretrain(silent=True, adaptive=False, collect_statistics=False)
env_rot.soadrl_sim.human_num = 2
env_rot.soadrl_sim.num_walls = 1
env_rot.soadrl_sim.num_circles = 0

In [17]:
alt_generate_expert_traj(env_no_rot,500,policy=FastmarchORCAPolicy(suicide_if_stuck=False), save_path = 'fmORCA_humans_no_rot', render=False)
alt_generate_expert_traj(env_rot,500,policy=FastmarchORCAPolicy(suicide_if_stuck=False), save_path = 'fmORCA_humans_rot', render=False)

(4101,)
actions (48762, 2)
obs (48762, 4101)
rewards (48762,)
episode_returns (500,)
episode_starts (48762,)
(4101,)
actions (48703, 2)
obs (48703, 4101)
rewards (48703,)
episode_returns (500,)
episode_starts (48703,)


{'actions': array([[ 1.00861365,  0.01032194],
        [ 0.9951988 ,  0.02235663],
        [ 1.00472285,  0.00554371],
        ...,
        [ 0.22838406,  0.02005403],
        [ 0.19907745, -0.00206081],
        [ 0.20572709, -0.01337441]]),
 'obs': array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  1.00861365,
          0.01032194,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.9951988 ,
          0.02235663,  0.        ],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.23461736,
          0.01684431,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.22838406,
          0.02005403,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.19907745,
         -0.00206081,  0.        ]]),
 'rewards': array([2.01722611e-02, 1.99038666e-02, 2.00944104e-02, ...,
        4.56701646e-03, 3.98172204e-03, 1.00000000e+

In [None]:
model_no_rot = PPO2(CustomPolicy, env, verbose=0)
dataset = ExpertDataset(expert_path='fmORCA_humans_no_rot.npz',traj_limitation=1, batch_size=64)
model_no_rot.pretrain(dataset, n_epochs=500)

model_rot = PPO2(CustomPolicy, env, verbose=0)
dataset = ExpertDataset(expert_path='fmORCA_humans_rot.npz',traj_limitation=1, batch_size=64)
model_rot.pretrain(dataset, n_epochs=500)

In [None]:
model= model_no_rot
env = model.get_env()
obs = env.reset()
reward_sum = 0.0
for _ in range(1000):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, _ = env.step(action)
    reward_sum += reward
    env.render()
    if done:
        print(reward_sum)
        reward_sum = 0.0
        obs = env.reset()