In [1]:
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras import layers
# from tensorflow.keras.layers import Input, Conv2D, ReLU, Flatten, Dense, Softmax, BatchNormalization, Dropout, Add
from tensorflow.keras.optimizers import Adam, SGD, Nadam
from tensorflow.keras import regularizers

import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
tf.__version__
tf.test.is_gpu_available()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


True

# Data Pre-Processing

Open **play_style_train.csv** file and split the games into a list.
Every row of csv: `PSL0000000001,1,B[pd],W[dp],B[qp],W[dc],B[nq],W[nc],B[qf],W[kd],B[ce],W[dg],B[dd],W[cc],B[fd],W[ed],B[ee],W[ec],B[ge],W[gc],B[di]`. 

Columns are:

    1. PSL0000000001: Game ID
    2. 1: Game Style
    3-... : Moves, the last move represents the play style (B[di] in this case)
    
We cropped only the moves to game list as:

In [3]:
df = open('./Training Dataset/play_style_train.csv').read().splitlines()
games = [i.split(',',2)[-1] for i in df]
game_styles = [int(i.split(',',2)[-2]) for i in df]

Create a dictionary to convert the coordinates from characters to numbers

In [4]:
chars = 'abcdefghijklmnopqrs'
coordinates = {k:v for v,k in enumerate(chars)}
coordinates

{'a': 0,
 'b': 1,
 'c': 2,
 'd': 3,
 'e': 4,
 'f': 5,
 'g': 6,
 'h': 7,
 'i': 8,
 'j': 9,
 'k': 10,
 'l': 11,
 'm': 12,
 'n': 13,
 'o': 14,
 'p': 15,
 'q': 16,
 'r': 17,
 's': 18}

We decided to build a DCNN model in this tutorial. We create data samples by using every move in every game, meaning that the target is to predict the next move by feeding the previous state of the table in every game for every move. Therefore, we can collect much more data samples from games.

For the simplicity, we used 2 dimensional feature map to represent the data as below:
 1. Occupied areas: mark them as 1 and the empty places as 0
 2. The last move in the table: mark the position of the last move as 1 and the rest as 0
 
The target is to predict the game style (1, 2 or 3) from the state of the game table. Later this will be one-hot encoded.

In [5]:
#0:所有跟最後一步顏色相同的
#1:所有跟最後一步顏色不同的
#2:標示空地
#3~10:最後8步
#11:周圍黑棋7*7
#12:周圍白棋7*7
def prepare_input(moves):
    x = np.zeros((19,19,13))
    player_color = moves[-1][0] #
    for move in moves:
        color = move[0]
        column = coordinates[move[2]]
        row = coordinates[move[3]]
        if color == player_color: #
            x[row,column,0] = 1
            x[row,column,2] = 1
        else: #
            x[row,column,1] = 1
            x[row,column,2] = 1
    x[:,:,2] = np.where(x[:,:,2] == 0, 1, 0)
    
    #倒數8步
    sz = len(moves)
    last1 = sz - 1
    last8 = max(sz - 9, 0)
    for i in range(last1, last8, -1):
        col = coordinates[moves[i][2]]
        row = coordinates[moves[i][3]]
        x[row, col, 11 - (sz - i)] = 1
    
    #周圍7*7
    last_col = coordinates[moves[-1][2]]
    last_row = coordinates[moves[-1][3]]
    rad = 3 #要改範圍大小的話改這個
    row1 = max(0, last_row - rad)
    row7 = min(18, last_row + rad)
    col1 = max(0, last_col - rad)
    col7 = min(18, last_col + rad)
    for i in range(row1, row7 + 1, 1):
        for j in range(col1, col7 + 1, 1):
            x[i, j, 11] = x[i, j, 1]
            x[i, j, 12] = x[i, j, 2]
            
    #列印所有棋盤
#     for i in range(0, 3, 1):
#         print("  a b c d e f g h i j k l m n o p q r s")
#         for j in range(0, 19, 1):
#             print(chars[j], end = " ")
#             for k in range(0, 19, 1):
#                 print(int(x[j, k, i]), end = " ")
#             print("")
#         print("")
    
    return x

In [6]:
# Check how many samples can be obtained
n_games = 0
for game in games:
    n_games += 1
print(f"Total Games: {n_games}")

Total Games: 26615


Since play style training has smaller dataset comparing to kyu or dan training, we can put the complete dataset to memory. Still, it is better to create a data generator.

In [7]:
x = []
for game in games:
    moves_list = game.split(',')
    x.append(prepare_input(moves_list))
x = np.array(x)
y = np.array(game_styles)-1

In [None]:
def data_generator(games, batch_size):
    def generator():
        x_batch = [] # Initialize data batch
        y_batch = [] # Initialize target batch
        for game_i, game in enumerate(games): # Iterate through games
            moves_list = game.split(',')
#             print(moves_list)
            x_batch.append(prepare_input(moves_list))
            y_batch.append(game_styles[game_i])
            if len(x_batch) == batch_size: # Yield when reached batch size
                yield np.array(x_batch), tf.one_hot(np.array(y_batch)-1, depth=3)
                x_batch = []
                y_batch = []
    return generator

batch_size = 64
# val_rate = 0.1 # 0.1 means 0.1 for val ,0.9 for training
# split_point = int(len(games) * (1 - val_rate))
generator = data_generator(games, batch_size)
dataset = tf.data.Dataset.from_generator(generator, 
                                         output_types=(tf.float32, tf.float32),
                                         output_shapes=(tf.TensorShape((batch_size,19,19,13)),tf.TensorShape((batch_size,3)))
                                        )
# SHUFFLE_BUFFER_SIZE = 200
# dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(batch_size)
dataset = dataset.prefetch(tf.data.AUTOTUNE)

In [8]:
x.shape

(26615, 19, 19, 13)

In [9]:
y.shape

(26615,)

In [10]:
np.bincount(y)

array([8184, 9403, 9028], dtype=int64)

Target is one-hot encoded and loss is changed to `categorical_crossentropy`

In [11]:
y_hot = tf.one_hot(y, depth=3)

Dataset splitting: 90% Training, 10% validation

In [12]:
# x_train, x_val, y_train, y_val = train_test_split(x, y_hot.numpy(), test_size=0.1)
x_train = x
y_train = y_hot.numpy()

# Training

### Simple DCNN Model:

In [13]:
def identity_block(input_tensor, kernel_size, filters, stage, block):
    filters1, filters2, filters3 = filters
    bn_axis = -1
    
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    # 這邊就是圖5上的1x1x64降維操作，假設input x的維度是(n, n, 256), channel last
    x = layers.Conv2D(filters1, (1, 1),
#                       kernel_initializer='he_normal',
                      name=conv_name_base + '2a')(input_tensor)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = layers.Activation('relu')(x)
    
    # 正常的3x3x64卷積操作，Feature Map長寬仍是n x n
    x = layers.Conv2D(filters2, kernel_size,
                      padding='same',
#                       kernel_initializer='he_normal',
                      name=conv_name_base + '2b')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = layers.Activation('relu')(x)
    
    # 最後升維到256，維度(n,n,256) -> 變成可以和(Indentity)input x相加的維度
    x = layers.Conv2D(filters3, (1, 1),
#                       kernel_initializer='he_normal',
                      name=conv_name_base + '2c')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
    
    # 相加後做non-linear轉換
    x = layers.add([x, input_tensor])
    x = layers.Activation('relu')(x)
    return x

In [14]:
def conv_block(input_tensor,
               kernel_size,
               filters,
               stage,
               block,
               strides=(1, 1)):
    filters1, filters2, filters3 = filters
    bn_axis = -1
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    
    # 因為是projection shortcut 所以input的x可能跟output維度不同
    # input維度(n,n,256) -->降維 (n,n,64)
    # 如果Strides有改，則利用Strides來改變Feature Map長寬
    x = layers.Conv2D(filters1, (1, 1), strides=strides,  
#                       kernel_initializer='he_normal',
                      name=conv_name_base + '2a')(input_tensor)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = layers.Activation('relu')(x)
    
    # (3,3)的kernel, padding都pad好pad滿，不改變Feature Map尺寸大小
    x = layers.Conv2D(filters2, kernel_size, padding='same', 
#                       kernel_initializer='he_normal',
                      name=conv_name_base + '2b')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = layers.Activation('relu')(x)
    
    # 用1x1 conv升維到假設512
    x = layers.Conv2D(filters3, (1, 1), 
#                       kernel_initializer='he_normal',
                      name=conv_name_base + '2c')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
    
    # 因input維度是256，這邊就需要做projectr將維度升到512相加
    shortcut = layers.Conv2D(filters3, (1, 1), strides=strides,
#                              kernel_initializer='he_normal',
                             name=conv_name_base + '1')(input_tensor)
    shortcut = layers.BatchNormalization(
        axis=bn_axis, name=bn_name_base + '1')(shortcut)
    # F(x) + x(升維後的x)
    x = layers.add([x, shortcut])
    x = layers.Activation('relu')(x)
    return x

In [15]:
def ResNet50(include_top=True,
             input_tensor=None,
             input_shape=None,
             pooling=False):
    img_input = layers.Input(shape = input_shape)
    bn_axis = -1 #unknow
    ##### optional
#     x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
#     x = layers.Conv2D(32, (7, 7),
#                       strides=(2, 2),
#                       padding='same',
#                       kernel_initializer='he_normal',
#                       name='conv1')(img_input)
#     x = layers.BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
#     x = layers.Activation('relu')(x)
    #####
    
    x = conv_block(img_input, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))  # input Channel大小會跟最後最後residual output尺寸一樣
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
    # 256-d to 512-d
    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') # projection shortcut
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
    # 512-d to 1024-d
    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') # projection shortcut
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
#     # 1024-d to 2048-d
    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
    
    x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
    x = layers.Dense(3, activation='softmax', name='fc1000')(x)
#     if include_top:
#         x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
#         x = layers.Dense(classes, activation='softmax', name='fc1000')(x)
#     else:
#         if pooling == 'avg':
#             x = layers.GlobalAveragePooling2D()(x)
#         elif pooling == 'max':
#             x = layers.GlobalMaxPooling2D()(x)
#         else:
#             warnings.warn('The output shape of `ResNet50(include_top=False)` '
#                           'has been changed since Keras 2.2.0.')
#     x = layers.GlobalAveragePooling2D()(x)
    
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input
        
    model = Model(inputs, x, name='resnet50')
    return model

In [16]:
model = ResNet50(include_top=True,
                 input_tensor=None,
                 input_shape=(19, 19, 13),
                 pooling=False)
opt = Nadam(learning_rate = 0.0005)
model.compile(optimizer = opt,
              loss='categorical_crossentropy',
                metrics=['accuracy'])
model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 19, 19, 13)  0           []                               
                                ]                                                                 
                                                                                                  
 res2a_branch2a (Conv2D)        (None, 19, 19, 64)   896         ['input_1[0][0]']                
                                                                                                  
 bn2a_branch2a (BatchNormalizat  (None, 19, 19, 64)  256         ['res2a_branch2a[0][0]']         
 ion)                                                                                             
                                                                                           

 res3a_branch2a (Conv2D)        (None, 19, 19, 128)  32896       ['activation_8[0][0]']           
                                                                                                  
 bn3a_branch2a (BatchNormalizat  (None, 19, 19, 128)  512        ['res3a_branch2a[0][0]']         
 ion)                                                                                             
                                                                                                  
 activation_9 (Activation)      (None, 19, 19, 128)  0           ['bn3a_branch2a[0][0]']          
                                                                                                  
 res3a_branch2b (Conv2D)        (None, 19, 19, 128)  147584      ['activation_9[0][0]']           
                                                                                                  
 bn3a_branch2b (BatchNormalizat  (None, 19, 19, 128)  512        ['res3a_branch2b[0][0]']         
 ion)     

                                                                                                  
 res3d_branch2b (Conv2D)        (None, 19, 19, 128)  147584      ['activation_18[0][0]']          
                                                                                                  
 bn3d_branch2b (BatchNormalizat  (None, 19, 19, 128)  512        ['res3d_branch2b[0][0]']         
 ion)                                                                                             
                                                                                                  
 activation_19 (Activation)     (None, 19, 19, 128)  0           ['bn3d_branch2b[0][0]']          
                                                                                                  
 res3d_branch2c (Conv2D)        (None, 19, 19, 512)  66048       ['activation_19[0][0]']          
                                                                                                  
 bn3d_bran

 res4c_branch2b (Conv2D)        (None, 19, 19, 256)  590080      ['activation_27[0][0]']          
                                                                                                  
 bn4c_branch2b (BatchNormalizat  (None, 19, 19, 256)  1024       ['res4c_branch2b[0][0]']         
 ion)                                                                                             
                                                                                                  
 activation_28 (Activation)     (None, 19, 19, 256)  0           ['bn4c_branch2b[0][0]']          
                                                                                                  
 res4c_branch2c (Conv2D)        (None, 19, 19, 1024  263168      ['activation_28[0][0]']          
                                )                                                                 
                                                                                                  
 bn4c_bran

 activation_37 (Activation)     (None, 19, 19, 256)  0           ['bn4f_branch2b[0][0]']          
                                                                                                  
 res4f_branch2c (Conv2D)        (None, 19, 19, 1024  263168      ['activation_37[0][0]']          
                                )                                                                 
                                                                                                  
 bn4f_branch2c (BatchNormalizat  (None, 19, 19, 1024  4096       ['res4f_branch2c[0][0]']         
 ion)                           )                                                                 
                                                                                                  
 add_12 (Add)                   (None, 19, 19, 1024  0           ['bn4f_branch2c[0][0]',          
                                )                                 'activation_35[0][0]']          
          

                                                                                                  
 activation_46 (Activation)     (None, 19, 19, 512)  0           ['bn5c_branch2b[0][0]']          
                                                                                                  
 res5c_branch2c (Conv2D)        (None, 19, 19, 2048  1050624     ['activation_46[0][0]']          
                                )                                                                 
                                                                                                  
 bn5c_branch2c (BatchNormalizat  (None, 19, 19, 2048  8192       ['res5c_branch2c[0][0]']         
 ion)                           )                                                                 
                                                                                                  
 add_15 (Add)                   (None, 19, 19, 2048  0           ['bn5c_branch2c[0][0]',          
          

In [13]:
# model = load_model('./models/model_playstyle_resnet50_v2.h5')

In [17]:
history = model.fit(
    x = x_train, 
    y = y_train,
    batch_size = 32,
    epochs = 6,
#     validation_data=(x_val, y_val),
)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [18]:
model.save('./models/model_playstyle_resnet50_alltrain.h5')

In [19]:
history = model.fit(
    x = x_train, 
    y = y_train,
    batch_size = 32,
    epochs = 6,
#     validation_data=(x_val, y_val),
)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [20]:
model.save('./models/model_playstyle_resnet50_alltrain_1.h5')

In [21]:
history = model.fit(
    x = x_train, 
    y = y_train,
    batch_size = 32,
    epochs = 6,
#     validation_data=(x_val, y_val),
)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [22]:
model.save('./models/model_playstyle_resnet50_alltrain_2.h5')

In [13]:
model = load_model('./models/model_playstyle_resnet50_alltrain_2.h5')

In [14]:
history = model.fit(
    x = x_train, 
    y = y_train,
    batch_size = 32,
    epochs = 6,
#     validation_data=(x_val, y_val),
)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [15]:
model.save('./models/model_playstyle_resnet50_alltrain_3.h5')

In [12]:
model = load_model('./models/model_playstyle_resnet50_alltrain_3.h5')

In [13]:
history = model.fit(
    x = x_train, 
    y = y_train,
    batch_size = 32,
    epochs = 6,
#     validation_data=(x_val, y_val),
)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [14]:
model.save('./models/model_playstyle_resnet50_alltrain_4.h5')

In [13]:
model = load_model('./models/model_playstyle_resnet50_alltrain_4.h5')

In [14]:
history = model.fit(
    x = x_train, 
    y = y_train,
    batch_size = 32,
    epochs = 6,
#     validation_data=(x_val, y_val),
)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [15]:
model.save('./models/model_playstyle_resnet50_alltrain_5.h5')

In [None]:
history = model.fit(
    x = x_train, 
    y = y_train,
    batch_size = 32,
    epochs = 6,
#     validation_data=(x_val, y_val),
)

In [None]:
model.save('./models/model_playstyle_resnet50_alltrain_6.h5')

## ALL DONE!

For using the model and creating a submission file, follow the notebook **Create Public Upload CSV.ipynb**

# End of Tutorial

You are free to use more modern NN architectures, a better pre-processing, feature extraction methods to achieve much better accuracy!