In [None]:
import math

def reward_function(params):
    '''
    Example of rewarding the agent to follow center line
    '''
    
    # Read all input parameters
    all_wheels_on_track = params['all_wheels_on_track']
    x = params['x']
    y = params['y']
    distance_from_center = params['distance_from_center']
    is_left_of_center = params['is_left_of_center']
    heading = params['heading']
    progress = params['progress']
    steps = params['steps']
    speed = params['speed']
    steering_angle = params['steering_angle']
    track_width = params['track_width']
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    is_offtrack = params['is_offtrack']
    
    ###### expected progress 
    reward = 0.0
    
    expected_steps = 105 # for re:invent 2018, finish one lap in 7 seconds, 15*7 = 105 steps
    
    expected_progress_per_step = 100/expected_steps
    
    expected_progress = expected_progress_per_step * steps
    
    progress_reward = progress/expected_progress
    
    
    ### Caculate real heading:
    
    r = math.sqrt(dx**2 + dy**2)
    
    sin_result = dy/r
    
    move_radian = math.asin(sin_result)
    
    move_degree = math.degrees(move_radian)
    
    if (dx < 0):
        move_degree = 180 - move_degree
    
    ##### tracing rabbit
    waypoints_length = len(waypoints)
    
    front_waypoint = params['closest_waypoints'][1]
    
    rabbit_waypoint = front_waypoint + 2
    
    if (rabbit_waypoint >= waypoints_length):
        rabbit_waypoint = rabbit_waypoint % waypoints_length
    
    rabbit = [waypoints[rabbit_waypoint][0],waypoints[rabbit_waypoint][1]]
    
    radius = math.hypot(x - rabbit[0], y - rabbit[1])
    
    pointing = [0,0]
    pointing[0] = x + (radius * math.cos(heading))
    pointing[1] = y + (radius * math.sin(heading))
    
    vector_delta = math.hypot(pointing[0] - rabbit[0], pointing[1] - rabbit[1])
    
    
    
    rabbit_reward = ( 1 - ( vector_delta / (radius * 2)))
    
    # reward += progress_reward
    # reward += rabbit_reward
    
    reward = progress_reward * rabbit_reward
    
    
    print(params)
    print("=====progress reward: %f" % progress_reward)
    print("=====rabbit reward: %f" % rabbit_reward)
    print("=====reward: %f" % reward)
    
    return float(reward)


    

In [None]:
import math

has_record = False
last_x = 0 
last_y = 0 
last_step = -1

def reward_function(params):
    ###############################################################################
    '''
    Example of using waypoints and heading to make the car point in the right direction
    '''

    global has_record
    global last_x
    global last_y
    global last_step
    
    # Read input variables
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    step = params['step']
    
    x = params['x']
    y = params['y']

    # Initialize the reward with typical value
    reward = 1.0

    # Calculate the direction of the center line based on the closest waypoints
    next_point = waypoints[closest_waypoints[1]]
    prev_point = waypoints[closest_waypoints[0]]

    # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
    track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
    # Convert to degree
    track_direction = math.degrees(track_direction)
    
    if (has_record):
        if (step > last_step)
        # Calculate the real direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
        real_direction = math.atan2(y - last_y, x - last_x)
        # Convert to degree
        real_direction = math.degrees(real_direction)
    else:
        real_direction = track_direction

    # Calculate the difference between the track direction and the heading direction of the car
    direction_diff = abs(track_direction - real_direction)
    if direction_diff > 180:
        direction_diff = 360 - direction_diff

    # Penalize the reward if the difference is too large
    DIRECTION_THRESHOLD = 10.0
    if direction_diff > DIRECTION_THRESHOLD:
        reward *= 0.5
        
    last_x = cur_x
    last_y = cur_y
    has_record = True

    return float(reward)

In [5]:
import math

has_record = False
last_x = 0 
last_y = 0 
last_steps = -1

def reward_function(params):
    ###############################################################################
    '''
    Example of using waypoints and heading to make the car point in the right direction
    '''

    global has_record
    global last_x
    global last_y
    global last_steps
    
    # Read input variables
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    steps = params['steps']
    
    x = params['x']
    y = params['y']

    # Initialize the reward with typical value
    reward = 1.0

    # Calculate the direction of the center line based on the closest waypoints
    next_point = waypoints[closest_waypoints[1]]
    prev_point = waypoints[closest_waypoints[0]]

    # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
    track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
    # Convert to degree
    track_direction = math.degrees(track_direction)
    
    if (has_record):
        if (steps > last_steps):
            # Calculate the real direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
            real_direction = math.atan2(y - last_y, x - last_x)
            # Convert to degree
            real_direction = math.degrees(real_direction)
        else:
            real_direction = track_direction
    else:
        real_direction = track_direction

    # Calculate the difference between the track direction and the heading direction of the car
    direction_diff = abs(track_direction - real_direction)
    if direction_diff > 180:
        direction_diff = 360 - direction_diff

    # Penalize the reward if the difference is too large
    DIRECTION_THRESHOLD = 10.0
    if direction_diff > DIRECTION_THRESHOLD:
        reward *= 0.5
        
    last_x = x
    last_y = y
    last_steps = steps
    has_record = True

    return float(reward)

In [12]:
from race_utils import SampleGenerator
generator = SampleGenerator()
params = generator.random_sample()

In [13]:
reward_function(params)

1.0