# Deep Q learning

In [None]:
from pod.board import PodBoard
from pod.ai.deep_q_controller import DeepQController
from pod.controller import SimpleController
from pod.drawer import Drawer
from pod.ai.rewards import re_dca
import matplotlib.pyplot as plt

In [None]:
board = PodBoard.trainer()
controller = DeepQController(board, re_dca)

### Online training, using exploration from random starting points

In [None]:
rewards, accuracy = controller.train()

plt.subplot(1, 2, 1)
plt.plot(rewards)

plt.subplot(1, 2, 2)
plt.plot(accuracy)

plt.show()

### Training from a predefined set of states

In [None]:
import matplotlib.pyplot as plt
from pod.ai.ai_utils import gen_pods
from pod.constants import Constants
import math
import numpy as np

pods_everywhere = gen_pods(
    board.checkpoints[0],
    np.arange(Constants.check_radius(), 10000, 1000),
    np.arange(math.pi * -0.9, math.pi * 0.91, math.pi * 0.2),
    np.arange(math.pi * -0.9, math.pi * 0.91, math.pi * 0.2),
    np.arange(0, Constants.max_vel() + 1, Constants.max_vel() / 5)
)

# TODO: training goes much better if I add extra pods pointing towards the check...why?
pods_focused = gen_pods(
    board.checkpoints[0],
    np.arange(Constants.check_radius(), 10000, 1000),
    np.arange(-0.3, 0.3, 0.05),
    np.arange(math.pi * -0.9, math.pi * 0.91, math.pi * 0.2),
    np.arange(0, Constants.max_vel() + 1, Constants.max_vel() / 5)
)

pods = [*pods_everywhere, *pods_focused]

print("{} total states".format(len(pods)))

In [None]:
accuracy = controller.train_from_examples(pods)

plt.plot(accuracy)
plt.show()

### Testing

In [None]:
drawer = Drawer(board, controllers=[controller, SimpleController(board)])
drawer.animate(max_turns=30)

In [None]:
drawer.chart_rewards(re_dca)

# Sandbox

In [None]:
from random import random
import tensorflow as tf

for i in range(10):
    v = [random() for j in range(6)]
#    print("Input: {}".format(v))
    o = controller.model(tf.constant([v]))
    print("Output: {}".format(o))


In [None]:
from vec2 import Vec2
from pod.util import PodState
import tensorflow as tf
controller.model = tf.keras.models.load_model("/tmp/dq", custom_objects = {"LeakyReLU": tf.keras.layers.LeakyReLU})

for x in range(0, 16001, 4000):
    for y in range(0, 9001, 3000):
        pod = PodState(pos=Vec2(x, y))
        print(str(controller.model(tf.constant([controller.vectorizer.to_vector(board, pod)]))))