Skip to content

Commit

Permalink
feat: tweaks to training process to enhance results
Browse files Browse the repository at this point in the history
  • Loading branch information
simojo committed Mar 17, 2024
1 parent ee43b2e commit 2b62f6b
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 13 deletions.
12 changes: 10 additions & 2 deletions src/clover_train/scripts/simulation_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,17 @@
import rospkg
import rospy
import time
import subprocess


NODE_KILL_TIMEOUT = 30 # seconds

clover_simulation_launch = None


def launch_clover_simulation(gazebo_world_filepath: str = None, gui: bool = True) -> None:
"""Launch all nodes related to clover_simulation."""
global clover_simulation_launch
uuid = roslaunch.rlutil.get_or_generate_uuid(None, False)
roslaunch.configure_logging(uuid)
# find clover_simulation launch file
Expand All @@ -31,12 +35,16 @@ def launch_clover_simulation(gazebo_world_filepath: str = None, gui: bool = True
if gazebo_world_filepath is not None:
cli_args.append(f"gazebo_world_filepath:={gazebo_world_filepath}")
# construct launch parent object for starting launch file
this_launch = roslaunch.parent.ROSLaunchParent(uuid, [(cli_args[0], cli_args[1:])])
this_launch.start()
clover_simulation_launch = roslaunch.parent.ROSLaunchParent(uuid, [(cli_args[0], cli_args[1:])])
clover_simulation_launch.start()


def kill_clover_simulation() -> None:
"""Kill all nodes related to clover_simulation."""
if clover_simulation_launch is not None:
clover_simulation_launch.shutdown()
subprocess.run(["killall", "gzserver"])
subprocess.run(["killall", "gzclient"])
# do not kill roscore processes
nodes_to_not_kill = {"/rosout", "/clover_train"}
# take set difference between existing nodes and nodes to not kill
Expand Down
36 changes: 25 additions & 11 deletions src/clover_train/scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,22 +415,23 @@ def update_target(target_weights, weights, tau):

def get_actor():
# Initialize weights
relu_initializer_uniform = tf.random_uniform_initializer(minval=0, maxval=1)
relu_initializer_towards_zero = tf.random_uniform_initializer(minval=0, maxval=0.2)
relu_initializer_r = tf.random_uniform_initializer(minval=0.5, maxval=1)
relu_initializer_theta = tf.random_uniform_initializer(minval=0, maxval=0.2)
relu_initializer_phi = tf.random_uniform_initializer(minval=0, maxval=1)

inputs = layers.Input(shape=(num_states,))
out = layers.Dense(256, activation="relu")(inputs)
out = layers.Dense(256, activation="relu")(out)
# initialize relu outputs for actions with range [0, x]
# put in list (to make extensible in case of adding more actions)
outputs_list = [
layers.Dense(1, activation="relu", kernel_initializer=relu_initializer_uniform)(
layers.Dense(1, activation="relu", kernel_initializer=relu_initializer_r)(
out
),
layers.Dense(
1, activation="relu", kernel_initializer=relu_initializer_towards_zero
1, activation="relu", kernel_initializer=relu_initializer_theta
)(out),
layers.Dense(1, activation="relu", kernel_initializer=relu_initializer_uniform)(
layers.Dense(1, activation="relu", kernel_initializer=relu_initializer_phi)(
out
),
]
Expand Down Expand Up @@ -668,9 +669,17 @@ def calibrate_accelerometers() -> None:
return True


std_dev = 0.2
# choose each stdev such that
# mean + 4 * sigma = max or
# mean - 4 * sigma = min
std_dev = [
1/4 * (action_r_max - (action_r_max - action_r_min) / 2),
1/4 * (action_theta_max - (action_theta_max - action_theta_min) / 2),
1/4 * (action_phi_max - (action_phi_max - action_phi_min) / 2),
]

ou_noise = OUActionNoise(
mean=np.zeros(num_actions), std_deviation=float(std_dev) * np.ones(num_actions)
mean=np.array(num_actions), std_deviation=np.array(std_dev) * np.ones(num_actions)
)

# identifier for this program's execution
Expand Down Expand Up @@ -740,21 +749,26 @@ def calibrate_accelerometers() -> None:
num_actions_taken = 0
num_actions_per_ep = 4

while not rospy.is_shutdown() and num_actions_taken < num_actions_per_ep:
# learning has completed or episode has restarted;
# we can unpause physics engine
service_proxies.unpause_physics()
# take off to get drone off of ground for first step
navigate_wait(x=0, y=0, z=0.5, frame_id="body", auto_arm=True)

while not rospy.is_shutdown() and num_actions_taken < num_actions_per_ep:
tf_prev_state = tf.expand_dims(
tf.convert_to_tensor(util.dataclass_to_list(prev_state)), 0
)

action = policy(tf_prev_state, ou_noise)
print("[TRACE] policy calculated")

# calculations completed;
# we can unpause physics engine
service_proxies.unpause_physics()

# Recieve state and reward from environment.
local_state, reward, done = episode_take_action(action)
print("[TRACE] action taken")
num_actions_taken += 1

# pause physics engine while learning is taking place
service_proxies.pause_physics()

Expand Down

0 comments on commit 2b62f6b

Please sign in to comment.