# Deep Tree Search

The idea here:
1. Train a NN to pick the action with the highest reward (i.e. same as DeepRewardController)
1. Use this NN to create a special reward function **stepping_reward**. This function:
  1. Takes the action proposed by the NN to move forward one step
  1. Returns the reward at that step
1. Now, loop:
  1. Train the NN to pick what a depth-2 TreeSearchController would pick, using the **stepping_reward**
  1. The **stepping_reward** should now take 2 steps before evaluating

The end result is that the NN should be trained to produce what a TreeSearchController would produce, with search depth = the number of training iterations done here.

In [None]:
import tensorflow as tf
import numpy as np

from pod.board import PodBoard
from pod.drawer import Drawer
from pod.ai.deep_tree_controller import DeepTreeController
from pod.ai.rewards import regood

board = PodBoard.trainer(4)
controller = DeepTreeController(board, regood)

In [None]:
import math

from pod.ai.ai_utils import gen_pods, play_gen_pods
from pod.ai.misc_controllers import RandomController
from pod.constants import Constants

# Step 1: get a bunch of pods spread around the board
print("Generating pods...")
pods = gen_pods(
    board.checkpoints,
    [Constants.check_radius() + i for i in range(100, 10000, 2000)],
    [i * math.pi / 5 for i in range(5)],
    [(i + 0.5) * math.pi / 5 for i in range(5)],
    [i * Constants.max_vel() / 2 for i in range(3)]
)

# Step 2: play them a few turns to build even more
print("Generating even more pods...")
pods = play_gen_pods(pods, RandomController(board), 5)

# Step 3: Vectorize each pod
print("Vectorizing...")
pods = [(pod, controller.vectorizer.to_vector(board, pod)) for pod in pods]

print("Done!")

In [None]:
import matplotlib.pyplot as plt

for i in range(3):
    history = controller.train(pods, 20)
    print("Controller now at depth {}".format(controller.depth))

In [None]:
from pod.ai.reward_controller import RewardController
from pod.ai.tree_search_controller import TreeSearchController

drawer = Drawer(board, controllers=[
    controller,
    RewardController(board, regood),
    TreeSearchController(board, regood, 2)])
drawer.animate(200)

In [None]:
drawer.chart_rewards(regood)

# Scratchpad

In [None]:
from pod.board import PodBoard
from pod.drawer import Drawer
board = PodBoard.trainer(5)
drawer = Drawer(board, [])
drawer.draw_frame()