Merge pull request #41 from BerkeleyAutomation/action_spaces

Action spaces
BerkeleyAutomation · Apr 2, 2018 · 6800a18 · 6800a18
2 parents 05db02d + 81836ca
commit 6800a18
Show file tree

Hide file tree

Showing 17 changed files with 154 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -3,8 +3,8 @@
 
 [**Documentation**](urban-driving-simulator.rtfd.io)
 
-# Urban Driving Simulator
+# FLUIDS
 This repository contains a first order driving simulator for autonomous driving in urban environments. The package implements a OpenAI Gym environment describing a city scene with cars and pedestrians.
 
 
-We are currently under heavy development.
+Please see the documentation listed above for more information on how to install and use FLUIDS. 
diff --git a/docs/source/actions.rst b/docs/source/actions.rst
@@ -0,0 +1,15 @@
+Actions
+=============
+
+Velocity Action
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. autoclass:: gym_urbandriving.actions.velocity_action.VelocityAction
+   :members: get_value,sample
+
+Steering Action
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. autoclass:: gym_urbandriving.actions.steering_action.SteeringAction
+   :members: get_value,sample
+
+
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -13,6 +13,7 @@ Urban Driving Simulator Documentation
    environment.rst
    agents.rst
    state.rst
+   actions.rst
    observations.rst
    objects.rst
 

diff --git a/gym_urbandriving/actions/__init__.py b/gym_urbandriving/actions/__init__.py
@@ -0,0 +1,2 @@
+from gym_urbandriving.actions.steering_action import SteeringAction
+from gym_urbandriving.actions.velocity_action import VelocityAction
diff --git a/gym_urbandriving/actions/steering_action.py b/gym_urbandriving/actions/steering_action.py
@@ -0,0 +1,51 @@
+from gym.spaces import Box 
+import numpy as np
+
+
+
+class SteeringAction:
+	"""
+    This class is a wrapper for the action space at the lowest level of the heirarchy 
+    it represents the steering and acceleration applied directly to the car. 
+
+    """
+
+	def __init__(self,steering =0.0, acceleration = 0.0):
+		"""
+        Initializes the SteeringAction Class
+
+        Parameters
+        ----------
+        steering: float
+            the change in steering angle 
+        acceleration: float
+        	the acceleration applied to the car
+        """
+
+
+		self.box = Box(low=np.array([-1,-5.0]),high=np.array([3,5.0]))
+
+		self.controls = np.array([steering,acceleration])
+
+
+
+	def get_value(self):
+		"""
+		Gets the numpy array of the class
+
+		Returns
+		----------
+			steering and acceleration in numpy array shape (2,)
+		"""
+		return self.controls
+
+	def sample(self):
+		"""
+        Samples a random control in this class using the OpenAI Box class
+
+        Returns
+        ----------
+       	steering and acceleration in numpy array shape (2,)
+        """
+
+		return self.box.sample()
diff --git a/gym_urbandriving/actions/velocity_action.py b/gym_urbandriving/actions/velocity_action.py
@@ -0,0 +1,48 @@
+from gym.spaces import Box 
+import numpy as np
+
+
+
+class VelocityAction:
+	"""
+    This class is a wrapper for the velocity control in the hierarchy
+    it represents the target velocity for the car to drive at
+    """
+
+	def __init__(self,velocity=0.0):
+		"""
+        Initializes the VelocityAction Class
+
+        Parameters
+        ----------
+        velocity: float
+            the desired velocity 
+        """
+
+		self.box = Box(low=0.0,high=5.0,shape=(1,))
+
+		self.velocity = np.array([velocity])
+
+		if not self.box.contains(self.velocity):
+			raise Exception('Velocity is Out of Bounds')
+
+
+	def get_value(self):
+		"""
+		Gets the value of the current velocity
+
+		Returns
+		----------
+			float
+		"""
+		return self.velocity[0]
+
+	def sample(self):
+		"""
+        Samples a random control in this class using the OpenAI Box class
+
+        Returns
+        ----------
+       	velocity in numpy array shape (1,)
+        """
+		return self.box.sample()
diff --git a/gym_urbandriving/agents/background/pursuit_agent.py b/gym_urbandriving/agents/background/pursuit_agent.py
@@ -1,6 +1,7 @@
 import numpy as np
 from gym_urbandriving.utils.PID import PIDController
 from gym_urbandriving.agents import Agent
+from gym_urbandriving.actions import SteeringAction
 
 class PursuitAgent(Agent):
 
@@ -29,7 +30,7 @@ def eval_policy(self, state,type_of_agent = 'background_cars'):
 
         Returns
         -------
-        tuple with floats (steering,acceleration)
+        SteeringAction
         """
 
         obj = state.dynamic_objects[type_of_agent][str(self.agent_num)]
@@ -71,4 +72,4 @@ def eval_policy(self, state,type_of_agent = 'background_cars'):
         action_steer = self.PID_steer.get_control(e_angle)
 
 
-        return np.array([action_steer, action_acc])
+        return SteeringAction(steering=action_steer, acceleration=action_acc)
diff --git a/gym_urbandriving/agents/hierarchical/steering_action_agent.py b/gym_urbandriving/agents/hierarchical/steering_action_agent.py
@@ -4,6 +4,8 @@
 from gym_urbandriving.planning import VelocityMPCPlanner,GeometricPlanner
 from copy import deepcopy
 import gym_urbandriving as uds
+from gym_urbandriving.actions import SteeringAction
+import IPython
 
 class SteeringActionAgent(PursuitAgent):
     """
@@ -43,20 +45,16 @@ def eval_policy(self, action,state):
         ----------
         state : PositionState
             State of the world, unused
-        action : float
-            Target velocity for car to travel at 
+        action : SteeringAction
+            
 
         Returns
         -------
         tuple with floats (steering,acceleration)
         """
 
-        if (not type(action) == np.array):
-            if not action.shape[0] == 2:
-                raise Exception('Steering Action is not a numpy array of shape (2,)')
-
-        else:
-            raise Exception('Steering Action is not a numpy array of shape (2,)')
+        if not isinstance(action,SteeringAction):
+            raise Exception('Actions is Not of Type Steering Action')
 
         return action
 

diff --git a/gym_urbandriving/agents/hierarchical/velocity_action_agent.py b/gym_urbandriving/agents/hierarchical/velocity_action_agent.py
@@ -5,6 +5,8 @@
 from copy import deepcopy
 import gym_urbandriving as uds
 
+from gym_urbandriving.actions import VelocityAction
+
 class VelocityActionAgent(PursuitAgent):
     """
     Hierarichal agent which implements the full plannning stack except the velocity component
@@ -47,16 +49,16 @@ def eval_policy(self, action,state):
         ----------
         state : PositionState
             State of the world, unused
-        action : float or None
+        action : VelocityAction or None
             Target velocity for car to travel at 
 
         Returns
         -------
         tuple with floats (steering,acceleration)
         """
 
-        if not (type(action) == float or action == None):
-           raise Exception('Velocity Action is not of type float')
+        if not (isinstance(action,VelocityAction) or action == None):
+           raise Exception('Action is not of type VelocityAction')
 
         if self.not_initiliazed:
             geoplanner = GeometricPlanner(deepcopy(state), inter_point_d=40.0, planning_time=0.1)

diff --git a/gym_urbandriving/agents/supervisor/steering_supervisor.py b/gym_urbandriving/agents/supervisor/steering_supervisor.py
@@ -50,7 +50,7 @@ def eval_policy(self, state,simplified = False):
 
         Returns
         -------
-        tuple with floats (steering,acceleration)
+        SteeringAction
         """
 
         if self.not_initiliazed:

diff --git a/gym_urbandriving/envs/urbandriving_env.py b/gym_urbandriving/envs/urbandriving_env.py
@@ -115,14 +115,14 @@ def _step(self, action, background_simplified=False):
 
         ####UPDATE ALL POLICIES#########
         for index, dobj in self.current_state.dynamic_objects['background_cars'].items():
-            dobj.step(background_car_actions[int(index)])
+            dobj.step(background_car_actions[int(index)].get_value())
 
         for index, dobj in self.current_state.dynamic_objects['traffic_lights'].items():
             dobj.step(background_traffic_actions[int(index)])
 
         if not background_simplified:
             for i, dobj in self.current_state.dynamic_objects['controlled_cars'].items():
-                dobj.step(controlled_car_actions[int(i)])
+                dobj.step(controlled_car_actions[int(i)].get_value())
 
         self.current_state.time += 1
         dynamic_coll, static_coll, controlled_car_collisions = self.current_state.get_collisions()

diff --git a/gym_urbandriving/planning/geometric_planner.py b/gym_urbandriving/planning/geometric_planner.py
@@ -8,6 +8,8 @@
 import scipy as sc
 import scipy.interpolate
 from scipy.interpolate import UnivariateSpline
+from gym_urbandriving.actions import VelocityAction
+
 
 class ValidityChecker(ob.StateValidityChecker):
     def __init__(self, si, state, controlled_obj):
@@ -148,7 +150,7 @@ def plan_for_agents(self, state,type_of_agent='background_cars',agent_num=0):
 
             orig_obj.trajectory = traj
             orig_obj.vel = 0
-            orig_obj.trajectory.set_vel(4)
+            orig_obj.trajectory.set_vel(VelocityAction(4))
 
 
             npoints = orig_obj.trajectory.npoints()
@@ -174,7 +176,7 @@ def plan_for_agents(self, state,type_of_agent='background_cars',agent_num=0):
             newtraj = Trajectory(mode = 'xyv', fsm=0)
             for x, y in zip(xn, yn):
                 newtraj.add_point((x, y, 4))
-            newtraj.set_vel(4)
+            newtraj.set_vel(VelocityAction(4))
             orig_obj.trajectory = newtraj
 
     def plan(self, controlled_object, x1, y1, v1, a1):

diff --git a/gym_urbandriving/planning/trajectory.py b/gym_urbandriving/planning/trajectory.py
@@ -109,8 +109,8 @@ def set_vel(self, target_vel):
             return
         v_index = self.mode.index('v')
         for i in range(self.npoints()):
-            self._trajectory[i][v_index] = target_vel
-        if target_vel == 0.0:
+            self._trajectory[i][v_index] = target_vel.get_value()
+        if target_vel.get_value() == 0.0:
             self.stopped = True
         else:
             self.stopped = False

diff --git a/gym_urbandriving/planning/vel_mpc_planner.py b/gym_urbandriving/planning/vel_mpc_planner.py
@@ -1,5 +1,6 @@
 from copy import deepcopy
 import gym_urbandriving as uds
+from gym_urbandriving.actions import VelocityAction
 
 class VelocityMPCPlanner:
     def __init__(self, lookahead=10):
@@ -13,16 +14,16 @@ def plan(self, state, agent_num,type_of_agent = "background_cars"):
                                           randomize=False)
         state_copy = testing_env.current_state
         if state_copy.dynamic_objects[type_of_agent][str(agent_num)].trajectory.stopped:
-            state_copy.dynamic_objects[type_of_agent][str(agent_num)].trajectory.set_vel(4)
+            state_copy.dynamic_objects[type_of_agent][str(agent_num)].trajectory.set_vel(VelocityAction(4))
             for t in range(self.lookahead):
                 state_copy, reward, done, info_dict = testing_env._step(None,background_simplified = True)
                 state_copy = state_copy[0]
                 done = state_copy.collides_any_dynamic(agent_num,type_of_agent = type_of_agent)
                 if done:
                     break
             if not done:
-                return 4.0
-            return 0.0
+                return VelocityAction(4.0)
+            return VelocityAction(0.0)
 
 
         elif not state_copy.dynamic_objects[type_of_agent][str(agent_num)].trajectory.stopped:
@@ -33,6 +34,6 @@ def plan(self, state, agent_num,type_of_agent = "background_cars"):
                 if done:
                     break
             if done:
-                return 0.0
-            return 4.0
+                return VelocityAction(0.0)
+            return VelocityAction(4.0)
 
diff --git a/tests/test_q_lidar.py b/tests/test_q_lidar.py
@@ -7,13 +7,14 @@
 from gym_urbandriving.planning import Trajectory
 import numpy as np
 import IPython
+from gym_urbandriving.actions import SteeringAction
 
 with open('configs/default_config.json') as json_data_file:
     data = json.load(json_data_file)
 
 data['agents']['controlled_cars'] = 2
 
-action = [np.array([0.0,0.0])] * 2
+action = [SteeringAction(0.0,0.0)] * 2
 env = uds.UrbanDrivingEnv(data)
 
 observations,reward,done,info_dict = env.step(action)

diff --git a/tests/test_simulator_step.py b/tests/test_simulator_step.py
@@ -7,11 +7,12 @@
 from gym_urbandriving.planning import Trajectory
 import numpy as np
 import IPython
+from gym_urbandriving.actions import SteeringAction
 
 with open('configs/default_config.json') as json_data_file:
     data = json.load(json_data_file)
 
-action = [np.array([0.0,0.0])]
+action = [SteeringAction(0.0,0.0)]
 env = uds.UrbanDrivingEnv(data)
 
 observations,reward,done,info_dict = env.step(action)

diff --git a/tests/test_velocity_action.py b/tests/test_velocity_action.py
@@ -7,13 +7,14 @@
 from gym_urbandriving.planning import Trajectory
 import numpy as np
 import IPython
+from gym_urbandriving.actions import VelocityAction
 
 with open('configs/default_config.json') as json_data_file:
     data = json.load(json_data_file)
 
 
 data['agents']['action_space'] = 'velocity'
-action = [0.0]
+action = [VelocityAction(0.0)]
 
 env = uds.UrbanDrivingEnv(data)