In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from alphazero.coach import Coach
from alphazero.gomoku_game import GomokuGame as Game
from alphazero.brain import GomokuBrain
from alphazero.interfaces import TrainParams

In [3]:
params = TrainParams(
    update_threshold=0.6,
    max_queue_length=200000,    # Number of game examples to train the neural networks.
    num_simulations=25,
    arena_compare=40,         # Number of games to play during arena play to determine if new net will be accepted.
    cpuct=1.0,
    checkpoint_dir='./temperature/',
    load_model=False,
    load_folder_file=('/dev/models/8x100x50', 'best.pth.tar'),
    num_iters_for_train_examples_history=20,
    num_iterations=1000,
    num_episodes=100,
    temperature_threshold=15
)

In [4]:
game = Game(15)

In [5]:
brain = GomokuBrain(game)

In [6]:
coach = Coach(game, brain, params)

In [7]:
coach.learn()

Self Play:   0%|          | 0/100 [1:07:49<?, ?it/s]

KeyboardInterrupt



In [11]:
from alphazero.gomoku_board import create_nxnx2_with_border
from domoku.jupyter_tools import print_bin

print_bin(create_nxnx2_with_border(9))

shape: (11, 11, 2)
[[0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0]]

[[1 1 1 1 1 1 1 1 1 1 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 1 1 1 1 1 1 1 1 1 1]]


# NEXT:

#### Implement ```execute_move``` of ```gomoku_board.py```. Find a decent way to keep the border stones on the 2nd plane

In [13]:
import numpy as np
n = 9

In [25]:
# This useful structure brings border stones back to the other player's plane
PUSH_BORDER_BACK = np.array([[[-1]*(n+2)] + n *[[-1] + n * [0] + [-1]] + [[-1]*(n+2)]]    + [[[1]*(n+2)] + n *[[1] + n * [0] + [1]] + [[1]*(n+2)]])
PUSH_BORDER_BACK = np.rollaxis(PUSH_BORDER_BACK, 0, 3)
print_bin(PUSH_BORDER_BACK)

shape: (11, 11, 2)
[[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
 [-1  0  0  0  0  0  0  0  0  0 -1]
 [-1  0  0  0  0  0  0  0  0  0 -1]
 [-1  0  0  0  0  0  0  0  0  0 -1]
 [-1  0  0  0  0  0  0  0  0  0 -1]
 [-1  0  0  0  0  0  0  0  0  0 -1]
 [-1  0  0  0  0  0  0  0  0  0 -1]
 [-1  0  0  0  0  0  0  0  0  0 -1]
 [-1  0  0  0  0  0  0  0  0  0 -1]
 [-1  0  0  0  0  0  0  0  0  0 -1]
 [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]]

[[1 1 1 1 1 1 1 1 1 1 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1]
 [1 1 1 1 1 1 1 1 1 1 1]]


In [26]:
from alphazero.gomoku_board import Board

board = Board(9, "d4e5")

In [29]:
print_bin(board.pieces, True)

shape: (11, 11, 2)
[[2 2 2 2 2 2 2 2 2 2 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 0 0 0 0 2 0 0 0 0 2]
 [2 0 0 0 1 0 0 0 0 0 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 2 2 2 2 2 2 2 2 2 2]]


In [33]:
sample = board.pieces
new_board = np.rollaxis(np.stack([sample[:, :, 1], sample[:, :, 0]]), 0, 3) + PUSH_BORDER_BACK

print_bin(new_board, True)

shape: (11, 11, 2)
[[2 2 2 2 2 2 2 2 2 2 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 0 0 0 0 1 0 0 0 0 2]
 [2 0 0 0 2 0 0 0 0 0 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 0 0 0 0 0 0 0 0 0 2]
 [2 2 2 2 2 2 2 2 2 2 2]]
