In [None]:
import numpy as np

# Optimal racing line for reInvent2018 track
# Each row: [x,y]
racing_track = [
    [2.89403282, 0.50183759],
    [3.16466256, 0.47000187],
    [3.43313686, 0.44269012],
    [3.73804833, 0.44277521],
    [4.10748697, 0.44300213],
    [4.4112053 , 0.44312774],
    [4.70859097, 0.443148  ],
    [5.32000213, 0.44346001],
    [5.47293562, 0.44350053],
    [5.73669212, 0.45364641],
    [5.99188299, 0.45498864],
    [6.21249509, 0.45508553],
    [6.48158723, 0.50896846],
    [6.75017396, 0.61330471],
    [6.95037424, 0.81422432],
    [7.16039855, 1.08773177],
    [7.24031257, 1.30941191],
    [7.24531257, 1.45656152],
    [7.23031257, 1.61694727],
    [7.20031257, 1.78832272],
    [7.12093691, 1.96549709],
    [6.97006122, 2.13947983],
    [6.81552508, 2.29682262],
    [6.61550234, 2.43264896],
    [6.42188674, 2.54399786],
    [6.21111611, 2.63219591],
    [5.99094124, 2.70253964],
    [5.76663451, 2.76272803],
    [5.56290672, 2.81598783],
    [5.36025826, 2.87264074],
    [5.15930711, 2.93485823],
    [4.96057668, 3.00486842],
    [4.76447824, 3.08511434],
    [4.57227007, 3.18350394],
    [4.38316505, 3.29782299],
    [4.1967222 , 3.42521235],
    [4.01238138, 3.56230546],
    [3.82932472, 3.705078  ],
    [3.67962407, 3.82302593],
    [3.52950061, 3.93636792],
    [3.37857174, 4.04233739],
    [3.22633348, 4.13894456],
    [3.07209502, 4.22494661],
    [2.91481473, 4.29941335],
    [2.75293273, 4.3616582 ],
    [2.58399664, 4.41081989],
    [2.40389998, 4.44527942],
    [2.20491324, 4.46118664],
    [1.97246383, 4.44902466],
    [1.70091679, 4.38829452],
    [1.42034298, 4.26098557],
    [1.16502579, 4.06146292],
    [0.96752606, 3.78363381],
    [0.87363065, 3.43687089],
    [0.85452827, 3.09650956],
    [0.87660414, 2.81168089],
    [0.91228695, 2.57755526],
    [0.96293881, 2.31103478],
    [1.00824568, 2.10288769],
    [1.06229556, 1.90085086],
    [1.12998448, 1.70432378],
    [1.21208843, 1.52227906],
    [1.3075882 , 1.3606979 ],
    [1.38008891, 1.22064228],
    [1.53931205, 1.00094861],
    [1.68365433, 0.90024299],
    [1.85113094, 0.81238267],
    [2.04923242, 0.71832671],
    [2.2899168 , 0.64292543],
    [2.58493805, 0.56808001],
    [2.89403282, 0.50183759]]

BEST_COMPLETION_STEPS = 120.0
AVG_COMPLETION_STEPS = 165.0
MIN_STEPS = 2.0
MIN_REWARD = 0.001
MIN_PROGRESS = 0.1


def dist_2_points(p1, p2):
    x_sq = (p1[0] - p2[0]) ** 2
    y_sq = (p1[1] - p2[1]) ** 2
    return (x_sq + y_sq) ** 0.5


def closest_2_racing_points_index(racing_coords, car_coords):
    # Calculate all distances to racing points
    distances = []
    for i in range(len(racing_coords)):
        distance = dist_2_points(racing_coords[i], car_coords)
        distances.append(distance)

    # Get index of the closest racing point
    closest_index = distances.index(min(distances))

    i_plus = (closest_index + 1) % len(racing_coords)
    d_plus = dist_2_points(racing_coords[i_plus], car_coords)

    i_minus = (closest_index - 1)
    d_minus = dist_2_points(racing_coords[i_minus], car_coords)

    second_closest_index = i_plus
    if d_minus < d_plus:
        second_closest_index = i_minus

    return [closest_index, second_closest_index]


def dist_to_racing_line(closest_coords, second_closest_coords, car_coords):

    # Calculate the distances between 2 closest racing points
    a = abs(dist_2_points(closest_coords, second_closest_coords))

    # Distances between car and closest and second closest racing point
    b = abs(dist_2_points(car_coords, closest_coords))
    c = abs(dist_2_points(car_coords, second_closest_coords))

    # Calculate distance between car and racing line (goes through 2 closest racing points)
    # try-except in case a=0 (rare bug in DeepRacer)
    try:
        a2 = a ** 2
        b2 = b ** 2
        c2 = c ** 2
        a4 = a ** 4
        b4 = b ** 4
        c4 = c ** 4
        d = (2 * a2 * b2) + (2 * b2 * c2) + (2 * c2 * a2)
        d = d - a4 - b4 - c4
        distance = (abs(d) ** 0.5) / (2 * a)
    except:
        distance = b

    return distance


def get_optimals(x, y):
    # Get closest indexes for racing line (and distances to all points on racing line)
    closest_index, second_closest_index = closest_2_racing_points_index(racing_track, [x, y])

    # Get optimal [x, y, speed, time] for closest and second closest index
    optimals = racing_track[closest_index]
    optimals_second = racing_track[second_closest_index]
    return closest_index, second_closest_index, optimals, optimals_second


def get_projected_completion_steps(steps, progress):
    projected_completion_steps = AVG_COMPLETION_STEPS
    if progress > MIN_PROGRESS and steps > MIN_STEPS:
        projected_completion_steps = steps * 100.0 / progress
    return projected_completion_steps


class Reward:
    RACE_LINE_WEIGHT = 0.8
    STEPS_WEIGHT = 1.2
    STEERING_WEIGHT = 1.0

    def __init__(self, verbose=False):
        self.verbose = verbose
        self.log_line = ""
   
    def normal_reward(self, dist, track_width, track_width_available=0.5, std_dev=0.20):
        norm_dist = dist / (track_width * track_width_available)
        if norm_dist > 1.0:
            norm_dist = 1.0
        if norm_dist < 0.0:
            norm_dist = 0.0
        norm_power = norm_dist / std_dev
        norm_power = -(norm_power ** 2)
        return np.exp(norm_power)

    def race_line_reward(self, current_reward, closest_coords, second_closest_coords, car_coords, track_width):
        dist = dist_to_racing_line(closest_coords, second_closest_coords, car_coords)
#         dist_penalty = dist / (track_width * 0.9)
#         dist_reward = (1 - dist_penalty) # ** 4
        dist_reward = self.normal_reward(dist, track_width, 0.8)
        dist_reward = max(0, dist_reward)
        self.log_line = "{}, dist_to_racing_line={}, dist_reward={}".format(self.log_line, dist, dist_reward)
        return current_reward + (dist_reward * self.RACE_LINE_WEIGHT)

    def steps_reward(self, current_reward, steps, progress):
        projected_steps = get_projected_completion_steps(steps, progress)
        self.log_line = "{}, projected_steps={}".format(self.log_line, projected_steps)
        steps_advantage = AVG_COMPLETION_STEPS - projected_steps
        reward = current_reward
        if steps_advantage > 0:
            reward = steps_advantage / (AVG_COMPLETION_STEPS - BEST_COMPLETION_STEPS)
            reward = current_reward + (reward * self.STEPS_WEIGHT)
        return reward

    def steering_reward(self, current_reward, steering_angle):
        steering_reward = current_reward
        # prevents zig-zagging
        if abs(steering_angle) > 25.0:
            steering_reward = 0.8 * current_reward
        elif abs(steering_angle) >= 15.0:
            steering_reward = 0.9 * current_reward
        self.log_line = "{}, steering_reward={}".format(self.log_line, steering_reward)
        return steering_reward

    def reward_function(self, params):
        # Read input parameters
        x = params['x']
        y = params['y']
        car_coords = [x, y]
        track_width = params['track_width']
        steps = params['steps']
        speed = params['speed']
        progress = params['progress']
        steering_angle = params['steering_angle']
        closest_waypoint = params['closest_waypoints'][0]
        Curve_SPEED_Threshold = 1.5

        closest_index, second_closest_index, optimals, optimals_second = get_optimals(x, y)

#         at_turn = ((10 <= closest_waypoint <= 24) or (32 <= closest_waypoint <= 34) or (39 <= closest_waypoint <= 45) or (50 <= closest_waypoint <= 51) or (61 <= closest_waypoint <= 67))
       
        reward = MIN_REWARD
        self.log_line = "start_reward={}, closest_index={}".format(reward, closest_index)
        reward = self.race_line_reward(reward, optimals, optimals_second, car_coords, track_width)
        reward = self.steps_reward(reward, steps, progress)
#         reward = self.steering_reward(reward, steering_angle)

#         if not at_turn:
#             if abs(steering_angle) > 20.0:
#                 reward *= 0.6
#             elif abs(steering_angle) > 10.0:
#                 reward *= 0.8
               
#         if at_turn and speed >= Curve_SPEED_Threshold:
#             reward = 0
#         elif not at_turn and speed <= Curve_SPEED_Threshold:
#             reward = 0
       
        if progress >= 100.0:
            reward += 100.0

        if self.verbose:
            print(self.log_line)

        # Always return a float value
        return float(reward)


reward_object = Reward(verbose=True)  # add parameter verbose=True to get noisy output for testing


def reward_function(params):
    return reward_object.reward_function(params)

In [None]:
Gradient descent batch size	256
Entropy	0.01
Discount factor	0.999
Loss type	huber
Learning rate	3.5e-05
Number of experience episodes between each policy-updating iteration	20
Number of epochs	10

In [None]:
Gradient descent batch size	512
Entropy	0.01
Discount factor	0.999
Loss type	huber
Learning rate	2e-06
Number of experience episodes between each policy-updating iteration	20
Number of epochs	10