In [1]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical

2023-11-23 16:23:31.602473: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-23 16:23:31.623461: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-23 16:23:31.623480: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-23 16:23:31.624124: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-23 16:23:31.627768: I tensorflow/core/platform/cpu_feature_guar

In [3]:
tf.__version__

'2.15.0'

# Data Pre-Processing

Open **dan_train.csv** file and split the games into a list.
Every row of csv: `DL0000000001,B,B[pd],W[dp],B[pp],W[dc],B[de],...`. 

Columns are:

    1. DL0000000001: Game ID
    2. B: Player's color
    3-... : Moves
    
We cropped only the moves to game list as:

In [2]:
df = open('./CSVs/Tutorial_dan_train.csv').read().splitlines()
games = [i.split(',',2)[-1] for i in df]

Create a dictionary to convert the coordinates from characters to numbers

In [3]:
chars = 'abcdefghijklmnopqrs'
coordinates = {k:v for v,k in enumerate(chars)}
chartonumbers = {k:v for k,v in enumerate(chars)}
coordinates

{'a': 0,
 'b': 1,
 'c': 2,
 'd': 3,
 'e': 4,
 'f': 5,
 'g': 6,
 'h': 7,
 'i': 8,
 'j': 9,
 'k': 10,
 'l': 11,
 'm': 12,
 'n': 13,
 'o': 14,
 'p': 15,
 'q': 16,
 'r': 17,
 's': 18}

We decided to build a DCNN model in this tutorial. We create data samples by using every move in every game, meaning that the target is to predict the next move by feeding the previous state of the table in every game for every move. Therefore, we can collect much more data samples from games.

For the simplicity, we used 4 dimensional feature map to represent the data as below:
 1. Positions of black stones: mark them as 1 and the rest of the table as 0
 2. Positions of white stones: mark them as 1 and the rest of the table as 0
 3. Empty areas of the table: mark the empty areas as 1 and occupied areas as 0
 4. The last move in the table: mark the position of the last move as 1 and the rest as 0
 
Target value is a number between 0-361(19\*19). Later this will be one-hot encoded.

In [4]:
def prepare_input(moves,board):
    x = np.zeros((19,19,4))
    for move in moves:
        color = move[0]
        column = coordinates[move[2]]
        row = coordinates[move[3]]
        if color == 'B':
            x[row,column,0] = 1
            x[row,column,2] = 1
        if color == 'W':
            x[row,column,1] = 1
            x[row,column,2] = 1
    if moves:
        last_move_column = coordinates[moves[-1][2]]
        last_move_row = coordinates[moves[-1][3]]
        x[row,column,3] = 1
    x[:,:,2] = np.where(x[:,:,2] == 0, 1, 0)
    return x

def prepare_label(move):
    column = coordinates[move[2]]
    row = coordinates[move[3]]
    return column*19+row


In [8]:
# Check how many samples can be obtained
n_games = 0
n_moves = 0
for game in games:
    n_games += 1
    moves_list = game.split(',')
    for move in moves_list:
        n_moves += 1
print(f"Total Games: {n_games}, Total Moves: {n_moves}")

Total Games: 1280, Total Moves: 297110


The code below is run for baseline model only by using only the first 500 games from the dataset. You might need to create a data generator to use complete dataset. Otherwise your RAM might not enough to store all (If you run the code on free version of Google Colab, it will crash above 500 game samples).

In [5]:
def initialize_board():
    return np.zeros((19, 19))  # 19x19 的空棋盤，可以是任何你認為合適的初始值

# 在原始程式碼中添加此行：
x = []
y = []
for game in games[:1000]:
    board = initialize_board()
    moves_list = game.split(',')
    for count, move in enumerate(moves_list):
        x.append(prepare_input(moves_list[:count], board))
        y.append(prepare_label(moves_list[count]))
x = np.array(x)
y = np.array(y)
x.shape
y.shape
# 其餘程式碼保持不變


(228448,)

In [6]:
x.shape

(228448, 19, 19, 4)

In [7]:
y.shape

(228448,)

In [8]:
y_one_hot = tf.one_hot(y, depth=19*19)

2023-11-23 16:24:00.721327: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-23 16:24:00.721472: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-23 16:24:00.734703: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required l

Dataset splitting: 90% Training, 10% validation

In [9]:
x_train, x_val, y_train, y_val = train_test_split(x, y_one_hot.numpy(), test_size=0.10)

# Training

### Simple DCNN Model:

In [13]:
def create_model():
    inputs = Input(shape=(19, 19, 4))

    # Convolutional layers without pooling
    conv_1 = Conv2D(kernel_size=3, filters=32, padding='same', activation='relu',
                     kernel_regularizer=l2(0.01))(inputs)
    conv_1 = BatchNormalization()(conv_1)
    conv_2 = Conv2D(kernel_size=3, filters=32, padding='same', activation='relu',
                     kernel_regularizer=l2(0.01))(conv_1)
    conv_2 = BatchNormalization()(conv_2)
    
    conv_3 = Conv2D(kernel_size=3, filters=32, padding='same', activation='relu',
                     kernel_regularizer=l2(0.01))(conv_2)
    conv_3 = BatchNormalization()(conv_3)
    conv_4 = Conv2D(kernel_size=3, filters=32, padding='same', activation='relu',
                     kernel_regularizer=l2(0.01))(conv_3)
    conv_4 = BatchNormalization()(conv_4)
    
    conv_5 = Conv2D(kernel_size=3, filters=16, padding='same', activation='relu',
                     kernel_regularizer=l2(0.01))(conv_4)
    conv_5 = BatchNormalization()(conv_5)
    conv_6 = Conv2D(kernel_size=3, filters=1, padding='same', activation='relu',
                     kernel_regularizer=l2(0.01))(conv_5)
    conv_6 = BatchNormalization()(conv_6)
    conv_6 = Flatten()(conv_6)

    # Fully connected layers with dropout
    dense_1 = Dense(512, activation='relu')(conv_6)
    dropout_1 = Dropout(0.5)(dense_1)
    dense_2 = Dense(256, activation='relu')(dropout_1)
    dropout_2 = Dropout(0.5)(dense_2)
    
    # Output layer
    outputs = Dense(19*19, activation='softmax')(dropout_2)
    
    model = Model(inputs, outputs)
    
    # Optimizer with learning rate scheduling
    initial_learning_rate = 0.0001
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
    )
    opt = tf.keras.optimizers.RMSprop(learning_rate=lr_schedule)
    model.compile(optimizer=opt,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model


In [14]:
model = create_model()
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 19, 19, 4)]       0         
                                                                 
 conv2d_6 (Conv2D)           (None, 19, 19, 32)        1184      
                                                                 
 batch_normalization_6 (Bat  (None, 19, 19, 32)        128       
 chNormalization)                                                
                                                                 
 conv2d_7 (Conv2D)           (None, 19, 19, 32)        9248      
                                                                 
 batch_normalization_7 (Bat  (None, 19, 19, 32)        128       
 chNormalization)                                                
                                                                 
 conv2d_8 (Conv2D)           (None, 19, 19, 32)        9248

In [15]:
history = model.fit(
    x=x_train,
    y=y_train,
    batch_size=128,
    epochs=20,
    validation_data=(x_val, y_val)
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
model.save('./model_dan_tutorial.h5')

  saving_api.save_model(


## ALL DONE!

For using the model and creating a submission file, follow the notebook **Create Public Upload CSV.ipynb**

# End of Tutorial

You are free to use more modern NN architectures, a better pre-processing, feature extraction methods to achieve much better accuracy!