# AWS Deepracer Reward Function

In [3]:
#--Deepracer Reward Functions are a crucial part to the reinforcement training process. 
#--The reward function is continously called every 1/15th of a second during the model training process. 
#--In simple terms, through the input of various variables such as the heading, speed, or steering angle of the car,
#--if the model performs a logical, desired action - it is rewarded with the reward variable
#--and if it does not adhere to wanted actions, it is penalized. 
#--The reward function is vital - and there are many different interesting variations of it, some of which can take into account
#--more complex variables such as track waypoints, which tie in hand-in-hand to the action space
#--(whether it may be continous or discrete)
#--Thus, here are 3 reward functions examples in increasing complexity level

In [2]:
#--REWARD FUNCTION 1: DEEPRACER DEFAULT
#--This reward function calculates three closest markers to the central line, and rewards the car if it adheres to the boundaries
#--It also penalizes the model if it goes away from the central markers
def reward_function(params):

    # Read input parameters
    track_width = params['track_width']
    distance_from_center = params['distance_from_center']
    progress = params['progress']
    if params['steps'] == 2:
        last.progress = 0.0
    x = progress - last.progress
    last.progress = progress

    # Calculate 3 markers that are increasingly further away from the center line
    marker_1 = 0.1 * track_width
    marker_2 = 0.25 * track_width
    marker_3 = 0.5 * track_width

    # Give higher reward if the car is closer to center line and vice versa
    if distance_from_center <= marker_1:
        reward = 1
    elif distance_from_center <= marker_2:
        reward = 0.5
    elif distance_from_center <= marker_3:
        reward = 0.1
    else:
        reward = 1e-3  # likely crashed/ close to off track

    return x*reward

In [None]:
#--REWARD FUNCTION 2: STAY INSIDE THE BORDER
#--This reward function is not complicated, and simply leaves the agent to decide the best path within the track
#--It simply aims to reward the model if it is within the track

def reward_function(params):
    '''
    Example of rewarding the agent to stay inside the two borders of the track
    '''
    
    # Read input parameters
    all_wheels_on_track = params['all_wheels_on_track']
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']
    
    # Give a very low reward by default
    reward = 1e-3

    # Give a high reward if no wheels go off the track and 
    # the car is somewhere in between the track borders 
    if all_wheels_on_track and (0.5*track_width - distance_from_center) >= 0.05:
        reward = 1.0

    # Always return a float value
    return reward

In [None]:
#--REWARD FUNCTION 3: PREVENT ZIG-ZAG
#--This takes into a new variable "abs_steering", and penalizes the model if the car is steering too much
#--Essentially preventing the zig-zag issue brought up by the default reward function (as it looks at markers along the center line)

def reward_function(params):
    '''
    Example of penalize steering, which helps mitigate zig-zag behaviors
    '''
    
    # Read input parameters
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']
    abs_steering = abs(params['steering_angle']) # Only need the absolute steering angle

    # Calculate 3 marks that are farther and father away from the center line
    marker_1 = 0.1 * track_width
    marker_2 = 0.25 * track_width
    marker_3 = 0.5 * track_width

    # Give higher reward if the car is closer to center line and vice versa
    if distance_from_center <= marker_1:
        reward = 1.0
    elif distance_from_center <= marker_2:
        reward = 0.5
    elif distance_from_center <= marker_3:
        reward = 0.1
    else:
        reward = 1e-3  # likely crashed/ close to off track

    # Steering penality threshold, change the number based on your action space setting
    ABS_STEERING_THRESHOLD = 15 

    # Penalize reward if the car is steering too much
    if abs_steering > ABS_STEERING_THRESHOLD:
        reward *= 0.8

    return float(reward)


In [None]:
#--REWARD FUNCTION 4: WAYPOINTS
#--The way waypoints work is that they are simply lane markers along a certain track
#--They are usually different for each track, but the advantage is that these points are already pre-established
#--This way, the navigation aspect of your model is accelerated, and enhanced - significantly increasing the speed and track completion percentage of the DeepRacer Model


import math
def reward_function(params):
    '''
    Example of rewarding the agent to follow center line
    '''

    # Read input variables
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    x = params['x']
    y = params['y']
    if closest_waypoints[1] < closest_waypoints[0]:
        dir = -1
    else:
        dir = 1
    r_sqr = (params['track_width']*0.9)**2

    # Initialize the reward with typical value
    reward = 1.0

    idx = closest_waypoints[1]
    N = len(waypoints)
    for i in range(10):
        dist = (waypoints[idx][1]-y)**2 + (waypoints[idx][0]-x)**2
        if dist >= r_sqr:
            break
        idx = (idx + dir) % N

    # Calculate the direction of the center line based on the closest waypoints
    next_point = waypoints[idx]

    # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
    direction = math.atan2(next_point[1] - y, next_point[0] - x)
    # Convert to degree
    direction = math.degrees(direction)

    # Calculate the difference between the track direction and the heading direction of the car
    best_steering = direction - heading
    steering_angle = params['steering_angle']
    error = (steering_angle-best_steering) % 360
    if error > 180:
        error -= 360
    if abs(error >= 60):
        reward = 0.01
    else:
        reward = 1 - abs(error)/60.0

    return reward

In [None]:
#--Note, to see or visualize all the waypoints on AWS tracks, see: https://github.com/ARCC-RACE/waypoint-visualization

#--Or any Github DIR on the internet with 'visualize waypoints AWS DeepRacer'