In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from IPython.display import clear_output
from sklearn.utils import shuffle

%matplotlib inline

In [2]:
im_size = 15

move_dict = {'a':0, 'b':1, 'c':2, 'd':3, 'e':4, 'f':5, 
             'g':6, 'h':7, 'j':8, 'k':9, 'l':10, 'm':11, 
             'n':12, 'o':13, 'p':14}
letters = ['a', 'b', 'c', 'd', 'e', 'f', 
           'g', 'h', 'j', 'k', 'l', 'm', 
           'n', 'o', 'p']
for i in range(15):
    move_dict[i] = letters[i]
    
def data_gen(left=0, right=9999999999, batch_size=None):
    X = np.zeros((0, 3, 15, 15))
    policy_Y = np.zeros((0, 15*15))
    val_Y = np.zeros(0)
    i = 0
    flag = True
    while flag == True:
        train_file = open('train-1.renju')
        for line in train_file:
            if i < left:
                i += 1
                continue
            elif i > right:
                break
            i += 1
            winner = line[0]
            if winner != 'b' and winner != 'w':
                continue
            line = line.split(' ')[1:]
            position = np.zeros((1, 3, 15, 15))
            player = 0
            for turn in line:
                if player == 0:
                    position[0][2] = 1
                else:
                    position[0][2] = -1
                x = move_dict[turn[0]]
                y = int(turn[1:])-1
                X = np.append(X, position, axis=0)
                position[0][player][x][y] = 1
                pol = np.zeros((1, 15*15))
                val = np.zeros(1)
                if ((player == 0 and winner == 'b') or
                    (player == 1 and winner == 'w')):
                    val[0] = 1
                else:
                    val[0] = -1
                pol[0][15*x+y] = 1
                policy_Y = np.append(policy_Y, pol, axis=0)
                val_Y = np.append(val_Y, val, axis=0)
                if player == 0:
                    player = 1
                else:
                    player = 0
            
            if batch_size == None:
                flag = False
            elif len(X) >= batch_size:
                yield X, {'policy_head': policy_Y, 'val_head': val_Y}
                X = np.zeros((0, 3, 15, 15))
                policy_Y = np.zeros((0, 15*15))
                val_Y = np.zeros(0)
    yield X, {'policy_head': policy_Y, 'val_head': val_Y}

In [3]:
from keras.models import Model
from keras import backend as K
from keras.callbacks import Callback
from keras.callbacks import ModelCheckpoint
import keras
from keras.layers import (Input, concatenate, Conv2D, MaxPooling2D, 
                          UpSampling2D, Convolution2D, ZeroPadding2D, 
                          BatchNormalization, Activation, concatenate, 
                          Flatten, Add, Dense)
from keras.optimizers import Adam

keras.backend.set_image_data_format('channels_first')

def big_policy():
    inputs = Input(shape=(3, 15, 15))
    conv = Conv2D(filters=128, kernel_size=(5,5), padding='same')(inputs)
    batchnorm = BatchNormalization()(conv)
    out = Activation('relu')(batchnorm)
    
    conv = Conv2D(filters=128, kernel_size=(5,5), padding='same')(out)
    batchnorm = BatchNormalization()(conv)
    active = Activation('relu')(batchnorm)
    conv = Conv2D(filters=128, kernel_size=(5,5), padding='same')(active)
    batchnorm = BatchNormalization()(conv)
    add = Add()([batchnorm, out])
    out = Activation('relu')(add)
    
    conv = Conv2D(filters=128, kernel_size=(5,5), padding='same')(out)
    batchnorm = BatchNormalization()(conv)
    active = Activation('relu')(batchnorm)
    conv = Conv2D(filters=128, kernel_size=(5,5), padding='same')(active)
    batchnorm = BatchNormalization()(conv)
    add = Add()([batchnorm, out])
    out = Activation('relu')(add)
    
    policy_conv = Conv2D(filters=2, kernel_size=(5,5), padding='same')(out)
    batchnorm = BatchNormalization()(policy_conv)
    active = Activation('relu')(batchnorm)
    policy = Dense(15*15, activation='softmax', name='policy_head')(Flatten()(active))
    
    val_conv = Conv2D(filters=1, kernel_size=(5,5), padding='same')(out)
    batchnorm = BatchNormalization()(val_conv)
    active = Activation('relu')(batchnorm)
    dense = Dense(256)(Flatten()(active))
    active = Activation('relu')(dense)
    val = Dense(1, activation='tanh', name='val_head')(active)
    
    return Model(inputs=inputs, outputs=[policy, val])

def rollout_policy():
    inputs = Input(shape=(3, 15, 15))
    conv = Conv2D(filters=256, kernel_size=(5,5), padding='same')(inputs)
    batchnorm = BatchNormalization()(conv)
    out = Activation('relu')(batchnorm)
    
    policy_conv = Conv2D(filters=2, kernel_size=(5,5), padding='same')(out)
    batchnorm = BatchNormalization()(policy_conv)
    active = Activation('relu')(batchnorm)
    policy = Dense(15*15, activation='softmax', name='policy_head')(Flatten()(active))
    
    return Model(inputs=inputs, outputs=policy)

Using TensorFlow backend.


In [4]:
model = big_policy()
model.compile(optimizer='rmsprop', 
              loss={'val_head': 'mean_squared_error', 'policy_head': 'categorical_crossentropy'}, 
              loss_weights={'val_head': 0.5, 'policy_head': 0.5})
checkpoint = ModelCheckpoint('weights/big_policy_{epoch:d}_{val_loss:.2f}.hdf5', 
                             monitor='val_loss', 
                             verbose=1, 
                             save_best_only=False, 
                             period=1,
                             save_weights_only=True)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 3, 15, 15)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 128, 15, 15)  9728        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 128, 15, 15)  60          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 128, 15, 15)  0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (

In [5]:
plays = 69719164
b_size = 100
save_freq = 5
epoch_n = 5
try:
    model.fit_generator(data_gen(batch_size=b_size), 
                        steps_per_epoch=int((plays/b_size)/save_freq),
                        epochs=epoch_n*save_freq, 
                        callbacks=[checkpoint],
                        validation_data=(np.zeros((1, 3, 15, 15)), 
                                         {'policy_head': np.zeros((1, 15*15)), 'val_head': np.zeros(1)}))
except KeyboardInterrupt:
    model.save_weights('weights/big_policy.hdf5')

Epoch 1/25

Epoch 00001: saving model to weights/big_policy_1_0.00.hdf5


OSError: Unable to create file (Unable to open file: name = 'weights/big_policy_1_0.00.hdf5', errno = 2, error message = 'no such file or directory', flags = 13, o_flags = 242)