# Deep Tree Search

The idea here:
1. Train a NN to pick the action with the highest reward (i.e. same as DeepRewardController)
1. Use this NN to create a special reward function **stepping_reward**. This function:
  1. Takes the action proposed by the NN to move forward one step
  1. Returns the reward at that step
1. Now, loop:
  1. Train the NN to pick what a depth-2 TreeSearchController would pick, using the **stepping_reward**
  1. The **stepping_reward** should now take 2 steps before evaluating

The end result is that the NN should be trained to produce what a TreeSearchController would produce, with search depth = the number of training iterations done here.

In [1]:
import tensorflow as tf
import numpy as np

from pod.board import PodBoard
from pod.drawer import Drawer
from pod.ai.deep_tree_controller import DeepTreeController
from pod.ai.rewards import regood

board = PodBoard.trainer(4)
controller = DeepTreeController(board, regood)

In [2]:
import math

from pod.ai.ai_utils import gen_pods, play_gen_pods
from pod.ai.misc_controllers import RandomController
from pod.controller import SimpleController
from pod.constants import Constants

# Step 1: get a bunch of pods spread around the board
print("Generating pods...")
pods = gen_pods(
    board.checkpoints,
    [i * math.pi / 5 for i in range(5)],
    [Constants.check_radius() * (i**2) + 1 for i in range(1, 5)],
    [i * math.pi for i in [1, 0.75, -0.75, 0.5, -0.5, 0.3, -0.3, 0.2, -0.2, 0]],
    [i * math.pi / 2 for i in range(4)],
    [i * Constants.max_vel() / 2 for i in range(3)]
)

# Step 2: play them a few turns to build even more
#print("Generating even more pods...")
pods = play_gen_pods(pods, RandomController(board), 2)
pods = play_gen_pods(pods, SimpleController(board), 3)

# Step 3: Vectorize each pod
print("Vectorizing...")
pods = [(pod, controller.vectorizer.to_vector(board, pod)) for pod in pods]

print("Done!")

Generating pods...
Generating pods: checks=4 positions=20 angles=10 vels=12
9600 pods generated
28800 pods generated
115200 pods generated
Vectorizing...
Done!


In [3]:
import matplotlib.pyplot as plt

for i in range(2):
    history = controller.train(pods, 30)
    print("Controller now at depth {}".format(controller.depth))

Generating labels for 115200 pods...


  return array(a, dtype, copy=False, order=order)


AttributeError: Can't pickle local object 'DeepTreeController.__wrap_reward_func.<locals>.r_func'

In [None]:
from pod.ai.reward_controller import RewardController
from pod.ai.tree_search_controller import TreeSearchController

board = PodBoard.grid()
controller.board = board
drawer = Drawer(board, controllers=[
    controller,
    RewardController(board, regood),
    TreeSearchController(board, regood, 2)])
drawer.animate(200)

In [None]:
drawer.chart_rewards(regood)

# Scratchpad

In [None]:
import tensorflow as tf
import numpy as np
from pod.util import PodState
from pod.board import PodBoard
from pod.player import Player
from pod.drawer import Drawer
from pod.ai.rewards import regood
from pod.ai.tree_search_controller import TreeSearchController
from pod.ai.deep_tree_controller import DeepTreeController
from pod.ai.vectorizer import Vectorizer, V6

board = PodBoard.grid().shuffle()
tsc = TreeSearchController(board, regood, max_depth=3)
dtc = DeepTreeController(board, regood)
v6 = V6()

#states = np.ndarray((0,v6.vec_len()))
#labels = np.ndarray((0,1), dtype=int)

for play in range(5):
    print("------------ Play {} --------------".format(play))
    tsc.reset()
    player = Player(tsc, PodState.random())
    drawer = Drawer(board, [player])
    drawer.record(max_frames = 2000, max_laps = 100, reset = False)
    
    print("------------ Labelling --------------")
    # The states and actions are offset by one: the first state should produce the second action
    # (because the tree records the LAST action)
    states = np.append(states, [v6.to_vector(board, log[0]['pod']) for log in drawer.log[:-1]], 0)
    labels = np.append(labels, [log[0]['action'] for log in drawer.log[1:]])
    print("States {} labels {}".format(len(states), len(labels)))
    dtc.train_with_labels(states, labels, 20)


In [None]:
drawer = Drawer(board, controllers=[dtc, tsc])
drawer.animate()

In [None]:
import math
x = [1,2,3,4,5,6,7,8,9,10,11]
num=2

size = math.ceil(len(x) / num)
start = 0
while(start < len(x)):
    stop = min(start + size, len(x))
    print(x[start:stop])
    start = stop


In [None]:
import numpy as np
x = [1,2,3,4,5,6,7,8,9,10,11]
[len(d) for d in np.array_split(x, 3)]