# Deep Q learning

In [None]:
from pod.board import PodBoard
from pod.ai.deep_q_controller import DeepQController
from pod.ai.rewards import make_reward, speed_reward, check_reward

r_func = make_reward([
    (1, speed_reward),
    (5, check_reward)
])

board = PodBoard.ladder()
controller = DeepQController(board, r_func)

### Online training, using exploration from random starting points

In [None]:
import matplotlib.pyplot as plt

rewards, accuracy = controller.train()

plt.subplot(1, 2, 1)
plt.plot(rewards)

plt.subplot(1, 2, 2)
plt.plot(accuracy)

plt.show()

### Training from a predefined set of states

In [None]:
import matplotlib.pyplot as plt
from pod.ai.ai_utils import gen_pods
from pod.constants import Constants
import math
import numpy as np

pods_everywhere = gen_pods(
    board.checkpoints[0],
    np.arange(Constants.check_radius(), 10000, 1000),
    np.arange(math.pi * -0.9, math.pi * 0.91, math.pi * 0.2),
    np.arange(math.pi * -0.9, math.pi * 0.91, math.pi * 0.2),
    np.arange(0, Constants.max_vel() + 1, Constants.max_vel() / 5)
)

# TODO: training goes much better if I add extra pods pointing towards the check...why?
pods_focused = gen_pods(
    board.checkpoints[0],
    np.arange(Constants.check_radius(), 10000, 1000),
    np.arange(-0.3, 0.3, 0.05),
    np.arange(math.pi * -0.9, math.pi * 0.91, math.pi * 0.2),
    np.arange(0, Constants.max_vel() + 1, Constants.max_vel() / 5)
)

pods = [*pods_everywhere, *pods_focused]

print("{} total states".format(len(pods)))

In [None]:
accuracy = controller.train_from_examples(pods)

plt.plot(accuracy)
plt.show()

### Testing

In [None]:
TURNS = 100

from pod.drawer import Drawer
from pod.controller import SimpleController

drawer = Drawer(board, controllers=[controller, SimpleController(board)])

drawer.animate(TURNS)

In [None]:
drawer.chart_rewards(r_func, TURNS)

### Scratch pad...

In [None]:
from pod.ai import rewards
from pod.board import PodBoard
from pod.util import PodState
from pod.game import game_step
from pod.ai.action_discretizer import ActionDiscretizer
from pod.drawer import Drawer

board = PodBoard.circle(5, 4000)
pod = PodState()
pod.angle = (board.get_check(0) - pod.pos).angle() + 1.55
pod.vel = board.get_check(0).normalize() * 0
ad = ActionDiscretizer()
REPEATS = 1

next_pods = []
for a in range(ad.num_actions):
    next_pod = pod.clone()
    for i in range(REPEATS):
        game_step(board, next_pod, ad.action_to_output(a, next_pod.angle, next_pod.pos), next_pod)
    next_pods.append(next_pod)
    
    di = rewards.diff_reward(pod, next_pod, board)
    de = rewards.dense_reward(next_pod, board)
    print("diff %.3f dense %.3f di-de %.3f  --  Action %s" % (
        di,
        de,
        di - de,
        str(ad.action_to_play(a))
    ))

drawer = Drawer(board, [])
drawer.draw_frame([pod] + next_pods)