Merge pull request #44 from BerkeleyAutomation/supervisor_fix

Supervisor fix
BerkeleyAutomation · Apr 4, 2018 · ced22d1 · ced22d1
2 parents 52776f0 + 0105bc2
commit ced22d1
Show file tree

Hide file tree

Showing 7 changed files with 58 additions and 21 deletions.
diff --git a/gym_urbandriving/agents/hierarchical/steering_action_agent.py b/gym_urbandriving/agents/hierarchical/steering_action_agent.py
@@ -37,7 +37,7 @@ def __init__(self, agent_num=0):
 
 
 
-    def eval_policy(self, action,state):
+    def eval_policy(self, action,state,simplified=False):
         """
         Returns action based next state in trajectory. 
 
@@ -53,6 +53,11 @@ def eval_policy(self, action,state):
         tuple with floats (steering,acceleration)
         """
 
+
+
+        if simplified:
+            return SteeringAction(0.0,0.0)
+
         if not isinstance(action,SteeringAction):
             raise Exception('Actions is Not of Type Steering Action')
 

diff --git a/gym_urbandriving/agents/hierarchical/velocity_action_agent.py b/gym_urbandriving/agents/hierarchical/velocity_action_agent.py
@@ -41,7 +41,7 @@ def __init__(self, agent_num=0):
 
 
 
-    def eval_policy(self, action,state):
+    def eval_policy(self, action,state,simplified = False):
         """
         Returns action based next state in trajectory. 
 
@@ -66,9 +66,9 @@ def eval_policy(self, action,state):
             geoplanner.plan_for_agents(state,type_of_agent='controlled_cars',agent_num=self.agent_num)
             self.not_initiliazed = False
 
-
-        target_vel = action
-        state.dynamic_objects['controlled_cars'][str(self.agent_num)].trajectory.set_vel(target_vel)
+        if not simplified:
+            target_vel = action
+            state.dynamic_objects['controlled_cars'][str(self.agent_num)].trajectory.set_vel(target_vel)
 
         return super(VelocityActionAgent, self).eval_policy(state,type_of_agent='controlled_cars')
 

diff --git a/gym_urbandriving/agents/supervisor/velocity_supervisor.py b/gym_urbandriving/agents/supervisor/velocity_supervisor.py
@@ -63,6 +63,8 @@ def eval_policy(self, state,simplified = False):
 
         if not simplified:
             target_vel = VelocityMPCPlanner().plan(deepcopy(state), self.agent_num, type_of_agent = "controlled_cars")
+        else:
+            target_vel = state.dynamic_objects['background_cars'][str(self.agent_num)].trajectory.get_vel()
 
 
 

diff --git a/gym_urbandriving/envs/urbandriving_env.py b/gym_urbandriving/envs/urbandriving_env.py
@@ -66,7 +66,7 @@ def __init__(self,
             self._reset()
 
 
-    def _step(self, action, background_simplified=False):
+    def _step(self, action, background_simplified=False,supervisor=False):
         """
         The step function accepts a control for the 0th agent in the scene. Then, it queries
         all the background agents to determine their actions. Then, it updates the scene
@@ -97,21 +97,18 @@ def _step(self, action, background_simplified=False):
         background_traffic_actions = []
 
         ### GET ALL ACTIONS ####
-
 
         for agent in self.current_state.bg_agents['background_cars']:
-            if background_simplified:
-                background_car_actions.append(agent.eval_policy(self.current_state,simplified=True))
-            else:
-                background_car_actions.append(agent.eval_policy(self.current_state))
+            background_car_actions.append(agent.eval_policy(self.current_state,simplified=background))
 
         for agent in self.current_state.bg_agents['traffic_lights']:
             background_traffic_actions.append(agent.eval_policy(self.current_state))
 
-        if not background_simplified:
-            for i,agent in enumerate(self.current_state.bg_agents['controlled_cars']):
-                controlled_car_actions.append(agent.eval_policy(action[i],self.current_state))
 
+        for i,agent in enumerate(self.current_state.bg_agents['controlled_cars']):
+            controlled_car_actions.append(agent.eval_policy(action[i],self.current_state,simplified=background_simplified))
+
+
 
         ####UPDATE ALL POLICIES#########
         for index, dobj in self.current_state.dynamic_objects['background_cars'].items():
@@ -120,9 +117,9 @@ def _step(self, action, background_simplified=False):
         for index, dobj in self.current_state.dynamic_objects['traffic_lights'].items():
             dobj.step(background_traffic_actions[int(index)])
 
-        if not background_simplified:
-            for i, dobj in self.current_state.dynamic_objects['controlled_cars'].items():
-                dobj.step(controlled_car_actions[int(i)].get_value())
+
+        for i, dobj in self.current_state.dynamic_objects['controlled_cars'].items():
+            dobj.step(controlled_car_actions[int(i)].get_value())
 
         self.current_state.time += 1
         dynamic_coll, static_coll, controlled_car_collisions = self.current_state.get_collisions()
@@ -143,9 +140,24 @@ def _step(self, action, background_simplified=False):
             assert(self.visualizer)
             self._render()
             observations = [self.visualizer.get_bitmap()] * len(state.dynamic_objects['controlled_cars'])
+        if self.visualizer:
+            self._render()
 
         return observations, reward, done, info_dict
 
+    def get_initial_observations(self):
+        if self.observation_type == 'raw':
+            observations = [state] * len(state.dynamic_objects['controlled_cars'])
+        elif self.observation_type == 'Q-LIDAR':
+            observations = []
+            for key in self.current_state.dynamic_objects['controlled_cars'].keys():
+                observations.append(self.featurizer.featurize(self.current_state, key))
+        elif self.observation_type == 'bmp':
+            assert(self.visualizer)
+            self._render()
+            observations = [self.visualizer.get_bitmap()] * len(state.dynamic_objects['controlled_cars'])
+
+        return
 
     def _reset(self, new_state=None):
         """
@@ -161,7 +173,6 @@ def _reset(self, new_state=None):
             self.init_state = new_state
             self.statics_rendered = False
         if self.randomize:
-
             self.init_state.randomize()
 
         self.current_state = deepcopy(self.init_state)
@@ -194,5 +205,8 @@ def _render(self, mode='human', close=False, waypoints=[],
                                    transparent_surface = transparent_surface)
             self.statics_rendered = True
 
+    def get_current_state(self):
+        return self.current_state
+
     def get_state_copy(self):
         return deepcopy(self.current_state)
diff --git a/gym_urbandriving/planning/trajectory.py b/gym_urbandriving/planning/trajectory.py
@@ -1,5 +1,6 @@
 import numpy as np
 from skimage import transform
+from gym_urbandriving.actions import VelocityAction
 
 # Four corners of the interection, hard-coded in camera space
 corners = np.array([[765, 385],
@@ -114,6 +115,14 @@ def set_vel(self, target_vel):
             self.stopped = True
         else:
             self.stopped = False
+
+    def get_vel(self):
+        if target_vel == None:
+            return
+        v_index = self.mode.index('v')
+
+        return VelocityAction(self._trajectory[-1][v_index])
+
 
 
 

diff --git a/gym_urbandriving/planning/vel_mpc_planner.py b/gym_urbandriving/planning/vel_mpc_planner.py
@@ -12,11 +12,16 @@ def plan(self, state, agent_num,type_of_agent = "background_cars"):
         state_copy = deepcopy(state)
         testing_env = uds.UrbanDrivingEnv(init_state=state_copy,
                                           randomize=False)
+
+        num_controlled_cars = state_copy.agent_config['controlled_cars']
+        empty_actions = [None]*num_controlled_cars
+
         state_copy = testing_env.current_state
         if state_copy.dynamic_objects[type_of_agent][str(agent_num)].trajectory.stopped:
-            state_copy.dynamic_objects[type_of_agent][str(agent_num)].trajectory.set_vel(VelocityAction(4))
+            state_copy.dynamic_objects[type_of_agent][str(agent_num)].trajectory.set_vel(VelocityAction(4.0))
             for t in range(self.lookahead):
-                state_copy, reward, done, info_dict = testing_env._step(None,background_simplified = True)
+
+                state_copy, reward, done, info_dict = testing_env._step(empty_actions,background_simplified = True)
                 state_copy = state_copy[0]
                 done = state_copy.collides_any_dynamic(agent_num,type_of_agent = type_of_agent)
                 if done:
@@ -28,7 +33,7 @@ def plan(self, state, agent_num,type_of_agent = "background_cars"):
 
         elif not state_copy.dynamic_objects[type_of_agent][str(agent_num)].trajectory.stopped:
             for t in range(self.lookahead):
-                state_copy, reward, done, info_dict = testing_env._step(None,background_simplified = True)
+                state_copy, reward, done, info_dict = testing_env._step(empty_actions,background_simplified = True)
                 state_copy = state_copy[0]
                 done = state_copy.collides_any_dynamic(agent_num,type_of_agent = type_of_agent)
                 if done:

diff --git a/gym_urbandriving/state/state.py b/gym_urbandriving/state/state.py
@@ -75,6 +75,7 @@ def randomize(self):
                     self.dynamic_objects['background_cars'][str(car_index)] = car
                     self.dynamic_objects['background_cars'][str(car_index)].destination = self.assign_goal_states(start)
                     break
+
         if self.agent_config['use_traffic_lights']:
             self.dynamic_objects['traffic_lights'] = {}
             for i, traffic_light in enumerate(self.state_config['traffic_lights']):
@@ -194,6 +195,7 @@ def collides_any_dynamic(self, agentnum,type_of_agent = 'background_cars'):
         for coll in dynamic_collisions:
             if (agentnum in coll) and (type_of_agent in coll):
                 return True
+
         return False
 
     def min_dist_to_coll(self, agentnum,type_of_agent = 'background_cars'):