feat: tweaks to training process to enhance results

ReadyResearchers-2023-24 · Mar 17, 2024 · 2b62f6b · 2b62f6b
1 parent ee43b2e
commit 2b62f6b
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 13 deletions.
diff --git a/src/clover_train/scripts/simulation_nodes.py b/src/clover_train/scripts/simulation_nodes.py
@@ -7,13 +7,17 @@
 import rospkg
 import rospy
 import time
+import subprocess
 
 
 NODE_KILL_TIMEOUT = 30  # seconds
 
+clover_simulation_launch = None
+
 
 def launch_clover_simulation(gazebo_world_filepath: str = None, gui: bool = True) -> None:
     """Launch all nodes related to clover_simulation."""
+    global clover_simulation_launch
     uuid = roslaunch.rlutil.get_or_generate_uuid(None, False)
     roslaunch.configure_logging(uuid)
     # find clover_simulation launch file
@@ -31,12 +35,16 @@ def launch_clover_simulation(gazebo_world_filepath: str = None, gui: bool = True
     if gazebo_world_filepath is not None:
         cli_args.append(f"gazebo_world_filepath:={gazebo_world_filepath}")
     # construct launch parent object for starting launch file
-    this_launch = roslaunch.parent.ROSLaunchParent(uuid, [(cli_args[0], cli_args[1:])])
-    this_launch.start()
+    clover_simulation_launch = roslaunch.parent.ROSLaunchParent(uuid, [(cli_args[0], cli_args[1:])])
+    clover_simulation_launch.start()
 
 
 def kill_clover_simulation() -> None:
     """Kill all nodes related to clover_simulation."""
+    if clover_simulation_launch is not None:
+        clover_simulation_launch.shutdown()
+    subprocess.run(["killall", "gzserver"])
+    subprocess.run(["killall", "gzclient"])
     # do not kill roscore processes
     nodes_to_not_kill = {"/rosout", "/clover_train"}
     # take set difference between existing nodes and nodes to not kill

diff --git a/src/clover_train/scripts/train.py b/src/clover_train/scripts/train.py
@@ -415,22 +415,23 @@ def update_target(target_weights, weights, tau):
 
 def get_actor():
     # Initialize weights
-    relu_initializer_uniform = tf.random_uniform_initializer(minval=0, maxval=1)
-    relu_initializer_towards_zero = tf.random_uniform_initializer(minval=0, maxval=0.2)
+    relu_initializer_r = tf.random_uniform_initializer(minval=0.5, maxval=1)
+    relu_initializer_theta = tf.random_uniform_initializer(minval=0, maxval=0.2)
+    relu_initializer_phi = tf.random_uniform_initializer(minval=0, maxval=1)
 
     inputs = layers.Input(shape=(num_states,))
     out = layers.Dense(256, activation="relu")(inputs)
     out = layers.Dense(256, activation="relu")(out)
     # initialize relu outputs for actions with range [0, x]
     # put in list (to make extensible in case of adding more actions)
     outputs_list = [
-        layers.Dense(1, activation="relu", kernel_initializer=relu_initializer_uniform)(
+        layers.Dense(1, activation="relu", kernel_initializer=relu_initializer_r)(
             out
         ),
         layers.Dense(
-            1, activation="relu", kernel_initializer=relu_initializer_towards_zero
+            1, activation="relu", kernel_initializer=relu_initializer_theta
         )(out),
-        layers.Dense(1, activation="relu", kernel_initializer=relu_initializer_uniform)(
+        layers.Dense(1, activation="relu", kernel_initializer=relu_initializer_phi)(
             out
         ),
     ]
@@ -668,9 +669,17 @@ def calibrate_accelerometers() -> None:
             return True
 
 
-std_dev = 0.2
+# choose each stdev such that
+#  mean + 4 * sigma = max or
+#  mean - 4 * sigma = min
+std_dev = [
+    1/4 * (action_r_max - (action_r_max - action_r_min) / 2),
+    1/4 * (action_theta_max - (action_theta_max - action_theta_min) / 2),
+    1/4 * (action_phi_max - (action_phi_max - action_phi_min) / 2),
+]
+
 ou_noise = OUActionNoise(
-    mean=np.zeros(num_actions), std_deviation=float(std_dev) * np.ones(num_actions)
+    mean=np.array(num_actions), std_deviation=np.array(std_dev) * np.ones(num_actions)
 )
 
 # identifier for this program's execution
@@ -740,21 +749,26 @@ def calibrate_accelerometers() -> None:
         num_actions_taken = 0
         num_actions_per_ep = 4
 
-        while not rospy.is_shutdown() and num_actions_taken < num_actions_per_ep:
-            # learning has completed or episode has restarted;
-            # we can unpause physics engine
-            service_proxies.unpause_physics()
+        # take off to get drone off of ground for first step
+        navigate_wait(x=0, y=0, z=0.5, frame_id="body", auto_arm=True)
 
+        while not rospy.is_shutdown() and num_actions_taken < num_actions_per_ep:
             tf_prev_state = tf.expand_dims(
                 tf.convert_to_tensor(util.dataclass_to_list(prev_state)), 0
             )
 
             action = policy(tf_prev_state, ou_noise)
             print("[TRACE] policy calculated")
+
+            # calculations completed;
+            # we can unpause physics engine
+            service_proxies.unpause_physics()
+
             # Recieve state and reward from environment.
             local_state, reward, done = episode_take_action(action)
             print("[TRACE] action taken")
             num_actions_taken += 1
+
             # pause physics engine while learning is taking place
             service_proxies.pause_physics()