In [None]:
import numpy as np

def reward_function(params):
    
    # Read input parameters
    track_width = params['track_width']
    distance_from_center = params['distance_from_center']
    steering = abs(params['steering_angle'])
    speed = params['speed']
    on_track = params['all_wheels_on_track']
    on_left = params['is_left_of_center']

    reward = 0.0

    # Penalize heavily if the car goes off-track
    if not on_track:
        reward = 1e-3
    else:
        reward += 0.1
    
    if on_left:
        reward *= 0.8
    else:
        reward += 0.1

    # Define the dtype for our structured array
    marker_rewards = [('marker', float), ('marker_reward', float)]

    # Create the structured array
    markers = np.array([
        (0.05 * track_width, 2.0),
        (0.10 * track_width, 1.6),
        (0.15 * track_width, 1.4),
        (0.20 * track_width, 1.2),
        (0.25 * track_width, 0.8),
        (0.30 * track_width, 0.6),
        (0.35 * track_width, 0.4),
        (0.40 * track_width, 0.2),
        (0.50 * track_width, 0.1)
    ], dtype=marker_rewards)

    for marker in markers:
        marker_value, marker_reward = marker
        if distance_from_center <= marker_value:
            reward *= marker_reward
            break

    # Define the dtype for our structured array
    speed_rewards = [('max_steering', float), ('max_speed', float), ('speed_reward', float)]

    # Create the structured array
    boundaries = np.array([
        (0.00, 3.75, 1.6),
        (0.01, 3.25, 1.1),
        (0.02, 3.20, 0.9),
        (0.03, 3.10, 0.8),
        (0.04, 3.00, 0.7),
        (0.05, 2.90, 0.6),
        (0.06, 2.70, 0.5),
        (0.07, 2.50, 0.6),
        (0.08, 2.30, 0.7),
        (0.09, 2.10, 0.8),
        (0.10, 1.70, 0.9),
        (0.50, 1.50, 1.1),
        (1.00, 1.40, 1.2),
        (2.00, 1.30, 1.3),
        (3.00, 1.20, 1.4),
        (4.00, 1.10, 1.5),
        (5.00, 1.00, 1.6),
        (11.0, 0.70, 1.7),
        (23.0, 0.50, 2.5),
        (np.inf, 0.5, 3)
    ], dtype=speed_rewards)

    for bound in boundaries:
        max_steering, max_speed, speed_reward = bound
        if steering <= max_steering:
            if speed <= max_speed:
                reward += speed_reward
            break
    
    return float(reward)