In [None]:
import math
def reward_function(params):
    # Example of rewarding the agent to follow center line

    # Read input parameters
    track_width = params['track_width']
    distance_from_center = params['distance_from_center']
    all_wheels_on_track = params['all_wheels_on_track']
    speed = params['speed']
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    reversed = params['is_reversed']
    steps = params['steps']
    progress = params['progress']
    steering_angle = params['steering_angle']
    TOTAL_NUM_STEPS = 40.20 #Track Length
    
    # Calculate 3 markers that are at varying distances away from the center line
    marker_1 = 0.1 * track_width
    marker_2 = 0.25 * track_width
    marker_3 = 0.5 * track_width
    #stearing angle
    abs_steering = abs(steering_angle)
    #way points
    next_point = waypoints[closest_waypoints[1]]
    prev_point = waypoints[closest_waypoints[0]]
    # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
    track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
    # Convert to degree
    track_direction = math.degrees(track_direction)

    # Calculate the difference between the track direction and the heading direction of the car
    direction_diff = abs(track_direction - heading)
    if direction_diff > 180:
        direction_diff = 360 - direction_diff
    
    #specify thresholds
    SPEED_THRESHOLD = 1.0
    DIRECTION_THRESHOLD = 10.0
    ABS_STEERING_THRESHOLD = 20.0
    

    #declare rewards
    reward_centre = 0.0 
    reward_direction = 1.0
    reward_speed = 0.0
    reward_reversed = 0.0
    reward_steering = 1.0
    reward = 1.0
    
    # Give higher reward if the car is closer to center line and vice versa
    if distance_from_center <= marker_1:
        reward_centre = 1.0
    elif distance_from_center <= marker_2:
        reward_centre = 0.5
    elif distance_from_center <= marker_3:
        reward_centre = 0.1
    else:
        reward_centre = 1e-3 # likely crashed/ close to off track
    
    if direction_diff > DIRECTION_THRESHOLD:
        reward_direction *= 0.5
    
    if all_wheels_on_track == False:
        # Penalize if the car goes off track
        reward_speed = 1e-3
    elif speed < SPEED_THRESHOLD:
        # Penalize if the car goes too slow
        reward_speed = 0.5
    else:
        # High reward if the car stays on track and goes fast
        reward_speed = 1.0
    
    if reversed == True:
        reward_reversed =1e-3
    else:
        reward_reversed = 1.0
        
    if abs_steering > ABS_STEERING_THRESHOLD:
        reward_steering *= 0.8
        
    reward = reward_centre + reward_direction + reward_speed + reward_reversed + reward_steering 
    
    if (steps % 100) == 0 and progress > (steps / TOTAL_NUM_STEPS) * 100 :
        reward += 10.0  
        
    return float(reward)
