## Imports

In [1]:
import os
import sys
import gc
import random

In [2]:
import numpy as np
import pandas as pd
from scipy.ndimage.interpolation import shift

In [3]:
import keras
from keras.models import Model
from keras.utils import to_categorical
from keras.layers import Dense, Conv2D, MaxPool2D, Input, Flatten, BatchNormalization, Dropout

Using TensorFlow backend.


In [4]:
print(sys.version)

3.6.6 |Anaconda, Inc.| (default, Oct  9 2018, 12:34:16) 
[GCC 7.3.0]


---

## Data

In [5]:
data_path = "../input"
data_path_list = os.listdir(data_path)

In [6]:
train = []

for d_path in data_path_list:
    d_path = os.path.join(data_path, d_path)
    
    with open(d_path, 'r') as fin:
        train.append(pd.read_csv(d_path))

In [7]:
train1, train2 = train
train1, train2 = train1.values, train2.values

In [8]:
train = np.vstack((train1, train2))

In [9]:
train1 = train2 = None
gc.collect()

0

## Data processing

In [10]:
def unwrap_data_row(row, with_label=False):
    row = list(row)[0].split()
    
    return row[0], row[1:]

def unwrap_data(data):
    data_label = []
    data_steps = []

    for row in data:
        l, row = unwrap_data_row(row, True)
    
        data_label.append(l)
        data_steps.append(row)
    
    return data_steps, data_label

In [11]:
train_data, train_labels = unwrap_data(train)
gc.collect()
train_data, train_labels = np.array(train_data), np.array(train_labels)
gc.collect()

0

In [12]:
train, labels = train_data, train_labels
gc.collect()

0

 ---

## Architecture

In [13]:
data_input = Input(shape=(15, 15, 3), name='BoardStatus')

Conv1 = Conv2D(128, (3, 3), padding='same', activation='relu')(data_input)
BN1 = BatchNormalization()(Conv1)
Conv2 = Conv2D(64, (4, 4), padding='same', activation='relu')(BN1)
BN2 = BatchNormalization()(Conv2)
Conv3 = Conv2D(64, (4, 4), padding='same', activation='relu')(BN2)
BN3 = BatchNormalization()(Conv3)
Conv4 = Conv2D(128, (5, 5), padding='same', activation='relu')(BN3)
BN4 = BatchNormalization()(Conv4)
Pool1 = MaxPool2D()(BN3)

FLT = Flatten()(Pool1)
FC1 = Dense(500, activation='sigmoid', input_shape=(7 * 7 * 64,))(FLT)
DO1 = Dropout(0.5)(FC1)
FC2 = Dense(500, activation='sigmoid', input_shape=(500,))(DO1)
DO2 = Dropout(0.5)(FC2)
FC3 = Dense(15 * 15 + 1, activation='softmax', input_shape=(500,))(DO2)

policyNN = Model(inputs=data_input, outputs=FC3)

In [14]:
policyNN.summary()
policyNN.compile('adam',
                 loss='categorical_crossentropy',
                 metrics=[
                     keras.metrics.categorical_accuracy
                 ])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
BoardStatus (InputLayer)     (None, 15, 15, 3)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 15, 15, 128)       3584      
_________________________________________________________________
batch_normalization_1 (Batch (None, 15, 15, 128)       512       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 15, 15, 64)        131136    
_________________________________________________________________
batch_normalization_2 (Batch (None, 15, 15, 64)        256       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 64)        65600     
_________________________________________________________________
batch_normalization_3 (Batch (None, 15, 15, 64)        256       
__________

## Training

In [15]:
!pip install git+https://github.com/ElDmitro/RenjuRL@AI

Collecting git+https://github.com/ElDmitro/RenjuRL@AI
  Cloning https://github.com/ElDmitro/RenjuRL (to revision AI) to /tmp/pip-req-build-qjjb7c1n
Branch AI set up to track remote branch AI from origin.
Switched to a new branch 'AI'
Building wheels for collected packages: renju-DK
  Building wheel for renju-DK (setup.py) ... [?25ldone
[?25h  Stored in directory: /tmp/pip-ephem-wheel-cache-r28mvplc/wheels/2d/38/5b/ec3f959b938bb1750709dac1cbce01413855b16ef4c60a2614
Successfully built renju-DK
Installing collected packages: renju-DK
Successfully installed renju-DK-1.2.2


In [16]:
from renjuRL_DK.renju_game.brains.BoardManager import B_SHAPE, PLAYERS, X_MAPPING, Y_MAPPING

In [17]:
X_RANGE = range(15, 0, -1)
X_RANGE = [str(x) for x in X_RANGE]
X_MAPPING = dict(zip(X_RANGE, range(len(X_RANGE))))

Y_RANGE = list("abcdefghijklmno")
Y_MAPPING = dict(zip(Y_RANGE, range(len(Y_RANGE))))

In [18]:
def make_move(board, pl, move):
    if move == '--':
        return True
    
    move = (move[0], move[1:])
    x = X_MAPPING[move[1]]
    y = Y_MAPPING[move[0]]
    
    if board[x, y] != 0:
        return False
        
    board[x, y] = pl
    
    return True

In [19]:
def get_aug(board, move, border_rectangle):
    x_tl, y_tl, x_br, y_br = border_rectangle
    
    if x_tl > x_br:
        return board, move
    
    x_offset_range = list(range(-x_tl, 0)) + list(range(0, B_SHAPE[0] - x_br))
    y_offset_range = list(range(-y_tl, 0)) + list(range(0, B_SHAPE[0] - y_br))
    
    x_offset, y_offset = random.choice(x_offset_range), random.choice(y_offset_range)
    x, y = move // 15, move % 15
    if x != 15:
        move = move + x_offset * 15 + y_offset
        
    return np.roll(board, (x_offset, y_offset, 0), axis=(0, 1, 2)), move
    

In [20]:
label2pl = {
    'black': PLAYERS[0],
    'white': PLAYERS[1],
    'draw': 0,
    'unknown': None
}

def move2cord(move):
    if move == '--':
        return 15, 0
    
    move = (move[0], move[1:])
    x = X_MAPPING[move[1]]
    y = Y_MAPPING[move[0]]
    
    return x, y

def refresh_board():
    board = np.dstack((
            np.zeros((15, 15, 2), dtype=np.int8),
            np.ones((15, 15, 1), dtype=np.int8)
        ))
        
    # board[x, y, 2] <- 0 if we made step (x, y)
    # --//--         <- 1/-1 if it is allowed to move
    # Value means which player are going to move
    step_mask = np.dstack((
        np.full((15, 15, 2), False),
        np.full((15, 15, 1), True)
    ))
    next_player = PLAYERS[0]
    
    return board, step_mask, next_player, (15, 15, -1, -1)

def refresh_rectangle(rec, x, y):
    return min(x, rec[0]), min(y, rec[1]), max(x, rec[2]), max(y, rec[3])

def data_generator(parties, labels, batch_size, data_mask):
    assert batch_size > 0, "Negative batch size was passed"
    
    p_num = len(data_mask)
    
    s_idx = p_idx = 0
    
    result = label2pl[labels[data_mask[p_idx]]]
    while result is None:
        p_idx += 1
        if p_idx >= p_num:
            p_idx = 0
            
    result = label2pl[labels[data_mask[p_idx]]]
    board, step_mask, next_player, border_rectangle = refresh_board()       
    while True:
        batch = []
        moves = []
        
        bch_iter = 0
        while bch_iter < batch_size:
            if s_idx >= len(parties[data_mask[p_idx]]):
                board, step_mask, next_player, border_rectangle = refresh_board()
                
                p_idx += 1
                s_idx = 0
                if p_idx >= p_num:
                    p_idx = 0
                    
                result = label2pl[labels[data_mask[p_idx]]]
                while result is None:
                    p_idx += 1
                    if p_idx >= p_num:
                        p_idx = 0
            
                    result = label2pl[labels[data_mask[p_idx]]]   
            # -----------------------------------------
            
            move = parties[data_mask[p_idx]][s_idx]
            s_idx += 1
            x, y = move2cord(move)
            if x != 15:
                border_rectangle = refresh_rectangle(border_rectangle, x, y)

            if next_player != -result:
                if x != 15:
                    board[step_mask] = next_player
                board_c, move = get_aug(board, x * 15 + y, border_rectangle)
                #move = x * 15 + y
                batch.append(board_c)
                moves.append(move)
                bch_iter += 1
                
            if x != 15:
                # ((3 - player) mod 3) mod 2 gives correct board channel
                board[x, y, ((3-next_player) % 3) % 2] = next_player
                board[x, y, 2] = 0
                step_mask[x, y, 2] = False
                border_rectangle = refresh_rectangle(border_rectangle, x, y)
            
            next_player = -next_player
            
        yield np.array(batch), to_categorical(moves, 226)

In [21]:
N_samples = np.vectorize(len)(train).sum()
train_size = int(len(train) * 0.8)

In [22]:
data_mask = np.random.choice(np.arange(len(train)), train_size)

In [23]:
val_mask = ~np.in1d(np.arange(len(train)), data_mask)

In [24]:
gen = data_generator(train, labels, 500, data_mask)

In [25]:
os.mkdir('../model_checkpoints/')

In [26]:
os.mkdir('../tfboard')

In [27]:
model_saver_callback = keras.callbacks.ModelCheckpoint('checkpoint', verbose=1, period=5)
tboard_callback = keras.callbacks.TensorBoard('../tfboard')

In [28]:
val_x, val_y = 0, 0
val_gen = data_generator(train, labels, 100000, np.arange(len(train))[val_mask])
for i, j in val_gen:
    val_x = i
    val_y = j
    
    break


In [None]:
policyNN.fit_generator(gen,
                       steps_per_epoch=1500,
                       epochs=100,
                       callbacks=[
                           model_saver_callback,
                           tboard_callback
                       ],
                       validation_data=(val_x, val_y))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100

Epoch 00005: saving model to checkpoint
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
  67/1500 [>.............................] - ETA: 52s - loss: 2.8252 - categorical_accuracy: 0.3719

In [None]:
policyNN

In [None]:
gc.collect()

In [None]:
def idx2cord(idx):
    return idx // 15, idx % 15

In [None]:
board, step_mask, next_player = refresh_board()
moves_f = [lambda x: input(), lambda x: policyNN.predict(x)[0]]
unmarch_move = [lambda x: move2cord(x), lambda x: idx2cord(x.argmax())]
i = 0
while True:
    if i % 2 == 0:
        print(board[:, :, 0] | board[:, :, 1])
    move = moves_f[i % 2](np.expand_dims(board, 0))
    x, y = unmarch_move[i % 2](move)

    board[step_mask] = next_player
                
    if x != 15:
        board[x, y, ((3-next_player) % 3) % 2] = next_player
        board[x, y, 2] = 0
        step_mask[x, y, 2] = False
            
    next_player = -next_player
    i += 1