# Comparison of Reward Functions

This shows how the different reward functions behave (especially when crossing a checkpoint).

This can be used as a tool to come up with a good reward function.

In [None]:
# See how the reward function changes after hitting a checkpoint.
# The goal here is to find a reward that will encourage the agent to
# go through the check, already pointing at the next check.

from pod.ai.ai_utils import gen_pods
from pod.constants import Constants
import math
import numpy as np

from pod.board import PodBoard
from pod.util import PodState
from pod.controller import SimpleController
from pod.ai.rewards import speed_reward, diff_reward, dist_reward, ang_reward, check_reward, make_reward, pgr, regood
from pod.drawer import Drawer
from pod.player import Player
from vec2 import Vec2, UNIT

TURNS = 100

board = PodBoard.grid(rows=2, cols=3).reorder([3,1,0,2,4,5])

# Generate some starting points
pods = []
labels = []
for ang in np.arange(0, math.pi + 0.00001, math.pi / 4):
    check_to_pos = UNIT.rotate(ang) * (2 * Constants.check_radius())
    vel = UNIT.rotate(ang + math.pi) * (Constants.max_vel() * 0.1)
    pods.append(PodState(
        pos=board.checkpoints[0] + check_to_pos,
        vel=vel,
        angle=ang + math.pi
    ))
    labels.append("%.1f°" % (ang * 180/math.pi))

# For each starting point, create a Player
players = [Player(SimpleController(board), pod) for pod in pods]
    
drawer = Drawer(
    board,
    players=players,
    labels=labels
)

### Initial state of game

In [None]:
drawer.draw_frame(pods)

Show the players playing through a few frames.

In [None]:
drawer.animate()

### Comparison of reward functions

In [None]:
drawer.compare_rewards([
#    ('pgr', pgr),
    ('regood', regood),
#    ('speed', speed_reward),
#    ('diff', diff_reward),
#    ('dist', dist_reward),
], [0, -1])

# Build a good reward

Enumerate lots of combinations of the different reward functions.
Play them to see which one works best!

In [None]:
from pod.board import PodBoard
from pod.ai.tree_search_controller import TreeSearchController
from pod.player import Player
from pod.ai.rewards import speed_reward, diff_reward, dist_reward, ang_reward, check_reward, make_reward, pgr, regood

TO_BEAT = 197
MAX_TURNS = 210
best = MAX_TURNS

dist_sum = 0
ang_sum = 0
check_sum = 0
n_sums = 0

board = PodBoard.grid(3, 2)

for x_dist in range(8,11):
    for x_ang in range(1,4):
        for x_check in range(2):
            r_func = make_reward([
                (x_dist, dist_reward),
                (x_ang, ang_reward),
                (x_check, check_reward)
            ])
            p = Player(TreeSearchController(board, r_func, 2))

            while(p.pod.laps < 2 and p.pod.turns < MAX_TURNS):
                p.step()

            if p.pod.turns <= best:
                print("dist {} ang {} check {} ---> {}".format(
                    x_dist, x_ang, x_check, p.pod.turns))
                best = p.pod.turns
                if p.pod.turns <= TO_BEAT:
                    dist_sum += x_dist
                    ang_sum += x_ang
                    check_sum += x_check
                    n_sums += 1

print("Avg dist %.5f ang %.5f check %.5f" % (dist_sum / n_sums, ang_sum / n_sums, check_sum / n_sums))

In [None]:
from pod.drawer import Drawer

drawer = Drawer(board, controllers=[
    RewardController(board, make_reward([
        (92.29268, dist_reward),
        (20.34146, ang_reward),
    ])),
    RewardController(board, make_reward([
        (1, speed_reward),
        (1, ang_reward),
        (1, check_reward),
    ])),
], labels=['precise', 'speed'])

drawer.animate(300)

# Optimal action discretization

Here we try out different levels of precision in the discretization of the action space.

In [None]:
from pod.board import PodBoard
from pod.ai.tree_search_controller import TreeSearchController
from pod.ai.rewards import regood
from pod.drawer import Drawer
from pod.ai.action_discretizer import ActionDiscretizer

board = PodBoard.grid(2,3)

In [None]:
t2a3 = TreeSearchController(board, regood, max_depth=3, ad=ActionDiscretizer(2,3))
t5a3 = TreeSearchController(board, regood, max_depth=3, ad=ActionDiscretizer(5,3))
t2a9 = TreeSearchController(board, regood, max_depth=3, ad=ActionDiscretizer(2,9))
t5a9 = TreeSearchController(board, regood, max_depth=3, ad=ActionDiscretizer(5,9))

In [None]:
drawer = Drawer(
    board,
    controllers=[t5a9, t2a9, t5a3, t2a3],
    labels=['t5a9', 't2a9', 't5a3', 't2a3']
)

In [None]:
drawer.animate(100, parallel=True)

In [None]:
drawer.chart_rewards(regood)