In [1]:
import chess, os, chess.pgn
import numpy as np
import tensorflow as tf

from model import ResNet
import utils
conf = utils.Config()


from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 2060, compute capability 7.5


2022-07-20 19:38:37.558333: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-20 19:38:37.588126: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-20 19:38:37.588436: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-20 19:38:37.589568: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [2]:
def gen(path):
    path = path.decode("utf-8")
    planes = None
    output_array = np.zeros([*conf.BOARD_SHAPE, conf.N_PLANES], dtype=conf.PLANES_DTYPE_NP)
    
    for filename in [path]:
    # for filename in glob.glob(os.path.join(database_path, '*.pgn')): # for whole dataset
        with open(os.path.join(os.getcwd(), filename), 'r') as pgn:
            game = chess.pgn.read_game(pgn)

            while game != None:
                whole_game_moves = game.game().mainline_moves()
                board = chess.Board()
                board_history = [board.fen()[:-6]]
                
                for move in whole_game_moves:
                    # the input is the PREVIOUS board
                    planes = utils.update_planes(planes, board, board_history)
                    # inputs.append(planes)
                    
                    # the output is the move from that position
                    mask = utils.mask_moves([move])[0]
                    output_array[mask[0], mask[1], mask[2]] = 1
                    # outputs.append(output_array)

                    yield (planes, output_array) ### yield before resetting the output

                    output_array[mask[0], mask[1], mask[2]] = 0
                    
                    # then you actually push the move preparing for next turn
                    board.push(move)
                    board_history.append(board.fen()[:-6])
                
                game = chess.pgn.read_game(pgn)

# avg = np.average([info[file, "games"] for file in info.keys()])

In [3]:
train_file_path = "/home/marcello/github/ChessBreaker/data/Database/lichess_elite_2014-11.pgn"  # 19547 samples
val_file_path = "/home/marcello/github/ChessBreaker/data/Database/lichess_elite_2014-09.pgn"    # 15587 samples

dataset = tf.data.Dataset.from_generator(gen, args = [train_file_path],
    output_signature=(
    tf.TensorSpec((8,8,119), dtype=tf.dtypes.float16),
    tf.TensorSpec((8,8,73), dtype=tf.dtypes.float16)
))

val_dataset = tf.data.Dataset.from_generator(gen, args = [val_file_path],
    output_signature=(
    tf.TensorSpec((8,8,119), dtype=tf.dtypes.float16),
    tf.TensorSpec((8,8,73), dtype=tf.dtypes.float16)
))

ds = dataset.shuffle(conf.BATCH_DIM*2) \
    .batch(conf.BATCH_DIM, num_parallel_calls=tf.data.AUTOTUNE) \
    .prefetch(tf.data.AUTOTUNE)

val_ds = val_dataset.shuffle(conf.BATCH_DIM*2) \
    .batch(conf.BATCH_DIM*2, num_parallel_calls=tf.data.AUTOTUNE) \
    .prefetch(tf.data.AUTOTUNE)

2022-07-20 19:38:37.774700: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-20 19:38:37.775811: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-20 19:38:37.776039: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-20 19:38:37.776159: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

In [4]:
model = ResNet()

model.compile(
    optimizer = "adam",
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.1),
    metrics = ["accuracy"]
)

In [7]:
sam = 0
for sample in ds.take(1):
    sam, ple = sample
    print(np.shape(sam))
    print(np.shape(ple))

model(sam)

(8, 8, 8, 119)
(8, 8, 8, 73)


2022-07-20 19:39:22.284765: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8302


(<tf.Tensor: shape=(8, 8, 8, 73), dtype=float16, numpy=
 array([[[[-1.1810e-02, -2.2232e-02, -1.7303e-02, ..., -3.0518e-04,
            4.5258e-02, -1.3879e-01],
          [-3.5431e-02,  7.0267e-03, -5.3802e-02, ...,  3.2837e-02,
           -4.4098e-02, -1.0583e-01],
          [-5.4413e-02, -9.2316e-03, -1.3435e-02, ...,  2.6917e-02,
            2.3926e-02, -1.1780e-01],
          ...,
          [-5.8014e-02,  9.1095e-03, -3.1769e-02, ...,  5.0812e-02,
            6.4697e-03, -1.2402e-01],
          [-6.9275e-02, -8.1848e-02, -1.6937e-02, ...,  2.5696e-02,
            2.1561e-02, -1.2097e-01],
          [-6.7444e-02,  2.0599e-03, -1.9348e-02, ...,  1.1212e-01,
            6.2256e-02, -1.6406e-01]],
 
         [[-8.1482e-02, -1.7197e-02, -3.3356e-02, ...,  4.4220e-02,
            3.7415e-02, -1.4697e-01],
          [-7.6294e-02, -2.6596e-02, -5.5176e-02, ...,  5.1651e-03,
           -7.7576e-02, -1.0016e-01],
          [-8.5022e-02, -2.8900e-02, -3.4821e-02, ...,  1.8906e-02,
          

In [8]:
model.fit(
    ds,
    validation_data = val_ds,
    epochs = 10,
    steps_per_epoch = 235,
    workers = 16,
    use_multiprocessing = True
)

Epoch 1/10


ValueError: in user code:

    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/engine/training.py", line 1051, in train_function  *
        return step_function(self, iterator)
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/engine/training.py", line 1040, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/engine/training.py", line 1030, in run_step  **
        outputs = model.train_step(data)
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/engine/training.py", line 889, in train_step
        y_pred = self(x, training=True)
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/tmp/__autograph_generated_file6xi3zyeq.py", line 10, in tf__call
        x = ag__.converted_call(ag__.ld(self).block_1, (ag__.ld(inputs),), None, fscope)
    File "/tmp/__autograph_generated_filehz06e_qc.py", line 11, in tf__call
        x = ag__.converted_call(ag__.converted_call(ag__.ld(layers).BatchNormalization, (), None, fscope), (ag__.ld(x),), None, fscope)

    ValueError: Exception encountered when calling layer "res_net" (type ResNet).
    
    in user code:
    
        File "/home/marcello/github/ChessBreaker/model.py", line 54, in call  *
            x = self.block_1(inputs)
        File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "/tmp/__autograph_generated_filehz06e_qc.py", line 11, in tf__call
            x = ag__.converted_call(ag__.converted_call(ag__.ld(layers).BatchNormalization, (), None, fscope), (ag__.ld(x),), None, fscope)
    
        ValueError: Exception encountered when calling layer "ResB1" (type ResNetBlock).
        
        in user code:
        
            File "/home/marcello/github/ChessBreaker/model.py", line 21, in call  *
                x = layers.BatchNormalization()(x)
            File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler  **
                raise e.with_traceback(filtered_tb) from None
        
            ValueError: tf.function only supports singleton tf.Variables created on the first call. Make sure the tf.Variable is only created once or created outside tf.function. See https://www.tensorflow.org/guide/function#creating_tfvariables for more information.
        
        
        Call arguments received by layer "ResB1" (type ResNetBlock):
          • inputs=tf.Tensor(shape=(None, 8, 8, 119), dtype=float16)
    
    
    Call arguments received by layer "res_net" (type ResNet):
      • inputs=tf.Tensor(shape=(None, 8, 8, 119), dtype=float16)
