In [9]:
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Conv2D, ReLU, Flatten, Dense, Softmax, BatchNormalization, Input, ZeroPadding2D, Activation, Add
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam, Nadam, SGD
from tensorflow.keras.applications.resnet50 import ResNet50

import keras
import numpy as np
import h5py

In [3]:
tf.__version__
tf.test.is_gpu_available()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


True

# Data Pre-Processing

Open **kyu_train.csv** file and split the games into a list.
Every row of csv: `KL0000000001,B,B[pq],W[dd],B[dp],W[pd],B[jc],...`. 

Columns are:

    1. KL0000000001: Game ID
    2. B: Player's color
    3-... : Moves
    
We cropped only the moves to game list as:

In [4]:
df = open('./Training Dataset/kyu_train.csv').read().splitlines()
games = [i.split(',',2)[-1] for i in df]
colors = [i.split(',',2)[1] for i in df]

Create a dictionary to convert the coordinates from characters to numbers

In [4]:
chars = 'abcdefghijklmnopqrs'
coordinates = {k:v for v,k in enumerate(chars)}
chartonumbers = {k:v for k,v in enumerate(chars)}
coordinates

{'a': 0,
 'b': 1,
 'c': 2,
 'd': 3,
 'e': 4,
 'f': 5,
 'g': 6,
 'h': 7,
 'i': 8,
 'j': 9,
 'k': 10,
 'l': 11,
 'm': 12,
 'n': 13,
 'o': 14,
 'p': 15,
 'q': 16,
 'r': 17,
 's': 18}

We decided to build a DCNN model in this tutorial. We create data samples by using every move in every game, meaning that the target is to predict the next move by feeding the previous state of the table in every game for every move. Therefore, we can collect much more data samples from games.

For the simplicity, we used 4 dimensional feature map to represent the data as below:
 1. Positions of black stones: mark them as 1 and the rest of the table as 0
 2. Positions of white stones: mark them as 1 and the rest of the table as 0
 3. Empty areas of the table: mark the empty areas as 1 and occupied areas as 0
 4. The last move in the table: mark the position of the last move as 1 and the rest as 0
 
Target value is a number between 0-361(19\*19). Later this will be one-hot encoded.

In [5]:
#0:所有跟最後一步顏色相同的
#1:所有跟最後一步顏色不同的
#2:標示空地
#3~10:最後8步
#11:周圍黑棋7*7
#12:周圍白棋7*7
def prepare_input(moves, player_color):
    x = np.zeros((19,19,13))
    if len(moves) == 0:
        return x
#     player_color = moves[-1][0] #
    for move in moves:
        color = move[0]
        column = coordinates[move[2]]
        row = coordinates[move[3]]
        if color == player_color: #
            x[row,column,0] = 1
            x[row,column,2] = 1
        else: #
            x[row,column,1] = 1
            x[row,column,2] = 1
    x[:,:,2] = np.where(x[:,:,2] == 0, 1, 0)
    
    #倒數8步
    sz = len(moves)
    last1 = sz - 1
    last8 = max(sz - 9, 0)
    for i in range(last1, last8, -1):
        col = coordinates[moves[i][2]]
        row = coordinates[moves[i][3]]
        x[row, col, 11 - (sz - i)] = 1
    
    #周圍7*7
    last_col = coordinates[moves[-1][2]]
    last_row = coordinates[moves[-1][3]]
    rad = 3 #要改範圍大小的話改這個
    row1 = max(0, last_row - rad)
    row7 = min(18, last_row + rad)
    col1 = max(0, last_col - rad)
    col7 = min(18, last_col + rad)
    for i in range(row1, row7 + 1, 1):
        for j in range(col1, col7 + 1, 1):
            x[i, j, 11] = x[i, j, 1]
            x[i, j, 12] = x[i, j, 2]
            
    #列印所有棋盤
#     for i in range(0, 3, 1):
#         print("  a b c d e f g h i j k l m n o p q r s")
#         for j in range(0, 19, 1):
#             print(chars[j], end = " ")
#             for k in range(0, 19, 1):
#                 print(int(x[j, k, i]), end = " ")
#             print("")
#         print("")
    
    return x

def prepare_label(move):
    column = coordinates[move[2]]
    row = coordinates[move[3]]
    return column*19+row

In [6]:
# Check how many samples can be obtained
n_games = 0
n_moves = 0
for game in games:
    n_games += 1
    moves_list = game.split(',')
    for move in moves_list:
        n_moves += 1
print(f"Total Games: {n_games}, Total Moves: {n_moves}")

Total Games: 118500, Total Moves: 27135638


The code below is run for baseline model only by using only the first 500 games from the dataset. You might need to create a data generator to use complete dataset. Otherwise your RAM might not enough to store all (If you run the code on free version of Google Colab, it will crash above 500 game samples).

In [5]:
def dataGenerator(n_samples=0, start_index=-1, stop_index=-1,shuffle=True):
    def generator():
        if (start_index>0 and stop_index>0):
            indexes = np.arange(start_index, stop_index)
        else:
            indexes = np.arange(n_samples)
        if shuffle:
            np.random.shuffle(indexes)
        
        for i in indexes:
            x = np.array(hdf5['xs'][i])
            y = np.array(hdf5['ys'][i])
            x = np.moveaxis(x,0,-1)
            yield x, y
    return generator

hdf5 = h5py.File('./hdf5/kyu_dataset_detlef_remove.hdf5','r')

batch_size = 32
data_gen = dataGenerator(n_samples=int(hdf5['xs'].shape[0]/10)*9, shuffle=False) # 90% of the complete dataset
dataset = tf.data.Dataset.from_generator(data_gen, 
                                         output_types=(tf.dtypes.float32, tf.dtypes.int32),
                                         output_shapes=(tf.TensorShape((19,19,13)),tf.TensorShape((1))))
dataset = dataset.batch(batch_size, drop_remainder=True)
dataset = dataset.prefetch(tf.data.AUTOTUNE)

data_gen_valid = dataGenerator(start_index=int(hdf5['xs'].shape[0]/10)*9, # 10% of the complete dataset
                               stop_index=hdf5['xs'].shape[0], 
                               shuffle=False)
dataset_valid = tf.data.Dataset.from_generator(data_gen_valid, 
                                         output_types=(tf.dtypes.float32, tf.dtypes.int32),
                                         output_shapes=(tf.TensorShape((19,19,13)),tf.TensorShape((1))))
dataset_valid = dataset.batch(batch_size, drop_remainder=True)
dataset_valid = dataset.prefetch(tf.data.AUTOTUNE)

print(int(hdf5['xs'].shape[0]/10)*9)
print(hdf5['xs'].shape[0])

7159239
7954711


# Training

### Simple DCNN Model:

In [10]:
def residual_block(x, filters, kernel_size):
    y = Conv2D(kernel_size=kernel_size,
               filters=filters,
               padding='same')(x)
    y = ReLU()(y)
    y = Conv2D(kernel_size=kernel_size,
               filters=filters,
               padding='same')(y)
    output = Add()([x,y])
    output = ReLU()(output)
    return output

def go_res():
    inputs = Input(shape=(19, 19, 13))
    conv5x5 = Conv2D(kernel_size=5,
                     filters=256,
                     padding="same",
                     name='conv5x5')(inputs)
    conv1x1 = Conv2D(kernel_size=1,
                     filters=256,
                     padding="same",
                     name='conv1x1')(inputs)
    outputs = Add()([conv5x5, conv1x1])
    outputs = ReLU()(outputs)
    outputs = residual_block(x=outputs,
                             filters=256,
                             kernel_size=3)
    outputs = residual_block(x=outputs,
                             filters=256,
                             kernel_size=3)
    outputs = residual_block(x=outputs,
                             filters=256,
                             kernel_size=3)
    outputs = residual_block(x=outputs,
                             filters=256,
                             kernel_size=3)
    outputs = residual_block(x=outputs,
                             filters=256,
                             kernel_size=3)
    outputs = residual_block(x=outputs,
                             filters=256,
                             kernel_size=3)
    outputs = Conv2D(kernel_size=3,
                     filters=1,
                     padding="same")(outputs)
    outputs = ReLU()(outputs)
    outputs = Flatten()(outputs)
    outputs = Softmax()(outputs)
    model = Model(inputs, outputs)
    
    opt = Adam(learning_rate=0.0001)
    model.compile(optimizer=opt,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model


model = go_res()
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 19, 19, 13)] 0                                            
__________________________________________________________________________________________________
conv5x5 (Conv2D)                (None, 19, 19, 256)  83456       input_2[0][0]                    
__________________________________________________________________________________________________
conv1x1 (Conv2D)                (None, 19, 19, 256)  3584        input_2[0][0]                    
__________________________________________________________________________________________________
add (Add)                       (None, 19, 19, 256)  0           conv5x5[0][0]                    
                                                                 conv1x1[0][0]                

In [4]:
def identity_block(input_tensor, kernel_size, filters, stage, block):
    filters1, filters2, filters3 = filters
    bn_axis = 3
    
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    # 這邊就是圖5上的1x1x64降維操作，假設input x的維度是(n, n, 256), channel last
    x = layers.Conv2D(filters1, (1, 1),
#                       kernel_initializer='he_normal',
                      name=conv_name_base + '2a')(input_tensor)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = layers.Activation('relu')(x)
    
    # 正常的3x3x64卷積操作，Feature Map長寬仍是n x n
    x = layers.Conv2D(filters2, kernel_size,
                      padding='same',
#                       kernel_initializer='he_normal',
                      name=conv_name_base + '2b')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = layers.Activation('relu')(x)
    
    # 最後升維到256，維度(n,n,256) -> 變成可以和(Indentity)input x相加的維度
    x = layers.Conv2D(filters3, (1, 1),
#                       kernel_initializer='he_normal',
                      name=conv_name_base + '2c')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
    
    # 相加後做non-linear轉換
    x = layers.add([x, input_tensor])
    x = layers.Activation('relu')(x)
    return x

In [5]:
def conv_block(input_tensor,
               kernel_size,
               filters,
               stage,
               block,
               strides=(1, 1)):
    filters1, filters2, filters3 = filters
    bn_axis = 3
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    
    # 因為是projection shortcut 所以input的x可能跟output維度不同
    # input維度(n,n,256) -->降維 (n,n,64)
    # 如果Strides有改，則利用Strides來改變Feature Map長寬
    x = layers.Conv2D(filters1, (1, 1), strides=strides,  
#                       kernel_initializer='he_normal',
                      name=conv_name_base + '2a')(input_tensor)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = layers.Activation('relu')(x)
    
    # (3,3)的kernel, padding都pad好pad滿，不改變Feature Map尺寸大小
    x = layers.Conv2D(filters2, kernel_size, padding='same', 
#                       kernel_initializer='he_normal',
                      name=conv_name_base + '2b')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = layers.Activation('relu')(x)
    
    # 用1x1 conv升維到假設512
    x = layers.Conv2D(filters3, (1, 1), 
#                       kernel_initializer='he_normal',
                      name=conv_name_base + '2c')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
    
    # 因input維度是256，這邊就需要做projectr將維度升到512相加
    shortcut = layers.Conv2D(filters3, (1, 1), strides=strides,
#                              kernel_initializer='he_normal',
                             name=conv_name_base + '1')(input_tensor)
    shortcut = layers.BatchNormalization(
        axis=bn_axis, name=bn_name_base + '1')(shortcut)
    # F(x) + x(升維後的x)
    x = layers.add([x, shortcut])
    x = layers.Activation('relu')(x)
    return x

In [6]:
def ResNet50(include_top=True,
             input_tensor=None,
             input_shape=None,
             pooling=False):
    img_input = layers.Input(shape = input_shape)
    bn_axis = 3
    ##### optional
#     x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
#     x = layers.Conv2D(32, (7, 7),
#                       strides=(2, 2),
#                       padding='same',
#                       kernel_initializer='he_normal',
#                       name='conv1')(img_input)
#     x = layers.BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
#     x = layers.Activation('relu')(x)
    #####
    
    x = conv_block(img_input, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))  # input Channel大小會跟最後最後residual output尺寸一樣
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
    # 256-d to 512-d
    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') # projection shortcut
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
    # 512-d to 1024-d
#     x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') # projection shortcut
#     x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
#     x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
#     x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
#     x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
#     x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
#     # 1024-d to 2048-d
#     x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
#     x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
#     x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
    
#     x = conv_block(x, 3, [512, 512, 361], stage=6, block='a')
    x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
    x = layers.Dense(361, activation='softmax', name='fc1000')(x)
#     outputs = layers.Conv2D(kernel_size=3,
#                      filters=1,
#                      padding="same")(outputs)
#     outputs = layers.ReLU()(outputs)
#     outputs = layers.Flatten()(outputs)
#     outputs = layers.Softmax()(outputs)
#     if include_top:
#         x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
#         x = layers.Dense(classes, activation='softmax', name='fc1000')(x)
#     else:
#         if pooling == 'avg':
#             x = layers.GlobalAveragePooling2D()(x)
#         elif pooling == 'max':
#             x = layers.GlobalMaxPooling2D()(x)
#         else:
#             warnings.warn('The output shape of `ResNet50(include_top=False)` '
#                           'has been changed since Keras 2.2.0.')
#     x = layers.GlobalAveragePooling2D()(x)
    
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    model = Model(inputs, x, name='resnet50')
    return model

In [7]:
model = ResNet50(include_top=True,
                 input_tensor=None,
                 input_shape=(19, 19, 13),
                 pooling=False)
opt = Nadam(learning_rate = 0.0005)
model.compile(optimizer = opt,
              loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 19, 19, 13)  0           []                               
                                ]                                                                 
                                                                                                  
 res2a_branch2a (Conv2D)        (None, 19, 19, 64)   896         ['input_1[0][0]']                
                                                                                                  
 bn2a_branch2a (BatchNormalizat  (None, 19, 19, 64)  256         ['res2a_branch2a[0][0]']         
 ion)                                                                                             
                                                                                           

 res3a_branch2a (Conv2D)        (None, 19, 19, 128)  32896       ['activation_8[0][0]']           
                                                                                                  
 bn3a_branch2a (BatchNormalizat  (None, 19, 19, 128)  512        ['res3a_branch2a[0][0]']         
 ion)                                                                                             
                                                                                                  
 activation_9 (Activation)      (None, 19, 19, 128)  0           ['bn3a_branch2a[0][0]']          
                                                                                                  
 res3a_branch2b (Conv2D)        (None, 19, 19, 128)  147584      ['activation_9[0][0]']           
                                                                                                  
 bn3a_branch2b (BatchNormalizat  (None, 19, 19, 128)  512        ['res3a_branch2b[0][0]']         
 ion)     

                                                                                                  
 res3d_branch2b (Conv2D)        (None, 19, 19, 128)  147584      ['activation_18[0][0]']          
                                                                                                  
 bn3d_branch2b (BatchNormalizat  (None, 19, 19, 128)  512        ['res3d_branch2b[0][0]']         
 ion)                                                                                             
                                                                                                  
 activation_19 (Activation)     (None, 19, 19, 128)  0           ['bn3d_branch2b[0][0]']          
                                                                                                  
 res3d_branch2c (Conv2D)        (None, 19, 19, 512)  66048       ['activation_19[0][0]']          
                                                                                                  
 bn3d_bran

In [9]:
# model = load_model('./models/model_kyu_resnet50.h5')

In [11]:
history = model.fit(
    dataset,
    epochs = 1,
    
    validation_data = dataset_valid
)

    804/Unknown - 171s 186ms/step - loss: 4.4464 - accuracy: 0.0892

KeyboardInterrupt: 

In [9]:
model.save('./models/model_kyu_resnet50_withpp.h5')

In [None]:
history = model.evaluate(
    dataset_valid
)

  67467/Unknown - 8889s 132ms/step - loss: 10.7980 - accuracy: 0.0062

In [None]:
model.save('./models/model_kyu_resnet50_withpp_1.h5')

In [27]:
history = model.fit(
    dataset,
    epochs = 1,
    validation_data = val_dataset
)



In [28]:
model.save('./models/model_kyu_resnet_2.h5')

In [33]:
history = model.fit(
    dataset,
    epochs = 1,
    validation_data = val_dataset
)



In [None]:
model.save('./models/model_kyu_resnet_3.h5')

In [None]:
history = model.fit(
    dataset,
    epochs = 1
)

In [None]:
model.save('./models/model_kyu_resnet_4.h5')

In [None]:
history = model.fit(
    dataset,
    epochs = 1
)

In [None]:
model.save('./models/model_kyu_10_15_f128_5.h5')

In [None]:
history = model.fit(
    dataset,
    epochs = 1
)

In [None]:
model.save('./models/model_kyu_10_15_f128_6.h5')

In [None]:
history = model.fit(
    dataset,
    epochs = 1
)

In [None]:
model.save('./models/model_kyu_10_15_f128_7.h5')

In [None]:
history = model.fit(
    dataset,
    epochs = 1
)

In [None]:
model.save('./models/model_kyu_10_15_f128_8.h5')

In [None]:
history = model.fit(
    dataset,
    epochs = 1
)

In [None]:
model.save('./models/model_kyu_10_15_f128_9.h5')

In [None]:
history = model.fit(
    dataset,
    epochs = 1
)

In [None]:
model.save('./models/model_kyu_10_14_11.h5')

## ALL DONE!

For using the model and creating a submission file, follow the notebook **Create Public Upload CSV.ipynb**

# End of Tutorial

You are free to use more modern NN architectures, a better pre-processing, feature extraction methods to achieve much better accuracy!