# Deep Q learning

In [None]:
from pod.board import PodBoard
from pod.ai.deep_q_controller import DeepQController

board = PodBoard()
controller = DeepQController(board)

### Online training, using exploration from random starting points

In [None]:
import matplotlib.pyplot as plt

rewards, accuracy = controller.train()

plt.subplot(1, 2, 1)
plt.plot(rewards)

plt.subplot(1, 2, 2)
plt.plot(accuracy)

plt.show()

### Training from a predefined set of states

In [None]:
import matplotlib.pyplot as plt
from pod.ai.ai_utils import gen_pods
from pod.constants import Constants
import math
import numpy as np

pods_everywhere = gen_pods(
    board.checkpoints[0],
    np.arange(Constants.check_radius(), 10000, 1000),
    np.arange(math.pi * -0.9, math.pi * 0.91, math.pi * 0.2),
    np.arange(math.pi * -0.9, math.pi * 0.91, math.pi * 0.2),
    np.arange(0, Constants.max_vel() + 1, Constants.max_vel() / 5)
)

# TODO: training goes much better if I add extra pods pointing towards the check...why?
pods_focused = gen_pods(
    board.checkpoints[0],
    np.arange(Constants.check_radius(), 10000, 1000),
    np.arange(-0.3, 0.3, 0.05),
    np.arange(math.pi * -0.9, math.pi * 0.91, math.pi * 0.2),
    np.arange(0, Constants.max_vel() + 1, Constants.max_vel() / 5)
)

pods = [*pods_everywhere, *pods_focused]

print("{} total states".format(len(pods)))

In [None]:
accuracy = controller.train_from_examples(pods)

plt.plot(accuracy)
plt.show()

### Testing

In [None]:
TURNS = 100

from pod.game import Player
from pod.drawer import Drawer
from pod.controller import SimpleController

q_player = Player(controller)
simple_player = Player(SimpleController())

drawer = Drawer(board, [q_player, simple_player])

q_player.reset(board)
simple_player.reset(board)

drawer.animate(TURNS)

In [None]:
q_player.reset(board)
simple_player.reset(board)

drawer.chart_rewards(TURNS)

In [None]:
import numpy as np
controller.model(np.array([[1,1,1,1,1,1]]))

In [None]:
controller.model(np.array([[0,0,0,0,0,0]]))

In [None]:
controller.model(np.array([[0,1,0,1,0,1]]))