In [6]:
import chess, os, chess.pgn
import numpy as np
import tensorflow as tf

from model import ResNet
import utils
conf = utils.Config()


from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

In [7]:
def gen(path):
    path = path.decode("utf-8")
    planes = None
    output_array = np.zeros([*conf.BOARD_SHAPE, conf.N_PLANES], dtype=conf.PLANES_DTYPE_NP)
    
    for filename in [path]:
    # for filename in glob.glob(os.path.join(database_path, '*.pgn')): # for whole dataset
        with open(os.path.join(os.getcwd(), filename), 'r') as pgn:
            game = chess.pgn.read_game(pgn)

            while game != None:
                whole_game_moves = game.game().mainline_moves()
                result = utils.outcome(game.headers["Result"])

                board = chess.Board()
                board_history = [board.fen()[:-6]]
                
                for move in whole_game_moves:
                    # the input is the PREVIOUS board
                    planes = utils.update_planes(planes, board, board_history)
                    # inputs.append(planes)
                    
                    # the output is the move from that position
                    mask = utils.mask_moves([move])[0]
                    output_array[mask[0], mask[1], mask[2]] = 1
                    # outputs.append(output_array)

                    # oss: input = planes, output = (moves + result)!!
                    yield (planes, (output_array, result)) ### yield before resetting the output

                    output_array[mask[0], mask[1], mask[2]] = 0
                    
                    # then you actually push the move preparing for next turn
                    board.push(move)
                    board_history.append(board.fen()[:-6])
                
                game = chess.pgn.read_game(pgn)

# avg = np.average([info[file, "games"] for file in info.keys()])

In [8]:
train_file_path = "/home/marcello/github/ChessBreaker/data/Database/lichess_elite_2014-11.pgn"  # 19547 samples
val_file_path = "/home/marcello/github/ChessBreaker/data/Database/lichess_elite_2014-09.pgn"    # 15587 samples

output_signature=(
    tf.TensorSpec((8,8,119), dtype=tf.dtypes.float16),
    (
        tf.TensorSpec((8,8,73), dtype=tf.dtypes.float16),
        tf.TensorSpec((), dtype=tf.dtypes.float16)
    )
)

dataset = tf.data.Dataset.from_generator(gen, output_signature=output_signature, args=[train_file_path])

val_dataset = tf.data.Dataset.from_generator(gen, output_signature=output_signature, args=[val_file_path])

ds = dataset.shuffle(conf.BATCH_DIM*2) \
    .batch(conf.BATCH_DIM, num_parallel_calls=tf.data.AUTOTUNE) \
    .prefetch(tf.data.AUTOTUNE)

val_ds = val_dataset.shuffle(conf.BATCH_DIM*2) \
    .batch(conf.BATCH_DIM*2, num_parallel_calls=tf.data.AUTOTUNE) \
    .prefetch(tf.data.AUTOTUNE)

In [9]:
model = ResNet()

model.compile(
    optimizer = "adam",
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.1),
    metrics = ["accuracy"]
)

In [10]:
sam = 0
for sample in ds.take(1):
    planes, (move, outcome) = sample
    print(np.shape(planes))
    print(np.shape(move))
    print(np.shape(outcome))


(8, 8, 8, 119)
(8, 8, 8, 73)
(8,)


In [11]:
model.fit(
    ds,
    validation_data = val_ds,
    epochs = 10,
    steps_per_epoch = 235,
    workers = 16,
    use_multiprocessing = True
)

Epoch 1/10


2022-07-21 17:55:02.504755: W tensorflow/core/common_runtime/bfc_allocator.cc:479] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.00MiB (rounded to 2097152)requested by op Fill
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2022-07-21 17:55:02.504832: I tensorflow/core/common_runtime/bfc_allocator.cc:1027] BFCAllocator dump for GPU_0_bfc
2022-07-21 17:55:02.504849: I tensorflow/core/common_runtime/bfc_allocator.cc:1034] Bin (256): 	Total Chunks: 39, Chunks in use: 39. 9.8KiB allocated for chunks. 9.8KiB in use in bin. 177B client-requested in use in bin.
2022-07-21 17:55:02.504864: I tensorflow/core/common_runtime/bfc_allocator.cc:1034] Bin (512): 	Total Chunks: 19, Chunks in use: 19. 9.5KiB allocated for chunks. 9.5KiB in use in bin. 9.1KiB client-requested in use in bin.
2022-07-21 17:55:02.504878: I tensorflow/co

ResourceExhaustedError: in user code:

    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/engine/training.py", line 1051, in train_function  *
        return step_function(self, iterator)
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/engine/training.py", line 1040, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/engine/training.py", line 1030, in run_step  **
        outputs = model.train_step(data)
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/engine/training.py", line 893, in train_step
        self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/optimizers/optimizer_v2/optimizer_v2.py", line 539, in minimize
        return self.apply_gradients(grads_and_vars, name=name)
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/mixed_precision/loss_scale_optimizer.py", line 770, in apply_gradients
        maybe_apply_op = tf.__internal__.smart_cond.smart_cond(should_apply_grads, apply_fn,
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/mixed_precision/loss_scale_optimizer.py", line 768, in apply_fn
        return self._apply_gradients(grads, wrapped_vars, name)
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/mixed_precision/loss_scale_optimizer.py", line 802, in _apply_gradients
        return self._optimizer.apply_gradients(
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/optimizers/optimizer_v2/optimizer_v2.py", line 646, in apply_gradients
        self._create_all_weights(var_list)
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/optimizers/optimizer_v2/optimizer_v2.py", line 860, in _create_all_weights
        self._create_slots(var_list)
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/optimizers/optimizer_v2/adam.py", line 124, in _create_slots
        self.add_slot(var, 'v')
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/optimizers/optimizer_v2/optimizer_v2.py", line 946, in add_slot
        weight = tf.Variable(
    File "/home/marcello/github/ChessBreaker/env/lib/python3.8/site-packages/keras/initializers/initializers_v2.py", line 152, in __call__
        return tf.zeros(shape, dtype)

    ResourceExhaustedError: OOM when allocating tensor with shape[1,1,1024,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Fill]
