In [1]:
pip install python-chess


Collecting python-chess
  Downloading python_chess-1.999-py3-none-any.whl.metadata (776 bytes)
Collecting chess<2,>=1 (from python-chess)
  Downloading chess-1.10.0-py3-none-any.whl.metadata (19 kB)
Downloading python_chess-1.999-py3-none-any.whl (1.4 kB)
Downloading chess-1.10.0-py3-none-any.whl (154 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: chess, python-chess
Successfully installed chess-1.10.0 python-chess-1.999
Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd
import os
import chess
import h5py
from keras.utils import to_categorical
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, Input

2024-06-19 18:25:50.787394: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-19 18:25:50.787489: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-19 18:25:50.935599: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:


# Load the .h5 file
with h5py.File("/kaggle/input/grandmaster-datasets/magnus_datasetv1.h5", "r") as hf:
    # Load datasets into memory
    feature_board = hf["board_matrix"][:]
    game_id = hf["GameID"][:]
    move_id = hf["MoveID"][:]
    playerMove = hf["PlayerMove"][:]
    top_moves_list = hf["TopMoves"][:]
    best_move = hf["BestMove"][:]
    centipawns = hf["Centipawns"][:]
    mates = hf["Mates"][:]
    move_sequence = hf["MoveSequence"][:]
    

# Now you can work with the loaded data outside the `with` block
# Example: Print shapes of the datasets
print("Shapes of datasets:")
print(f"  game_id: {game_id.shape}")
print(f"  move_id: {move_id.shape}")
print(f"  player_move: {playerMove.shape}")
print(f"  top_moves_list: {top_moves_list.shape}")
print(f"  best_move: {best_move.shape}")
print(f"  centipawns: {centipawns.shape}")
print(f"  mates: {mates.shape}")
print(f"  move_sequence: {move_sequence.shape}")


Shapes of datasets:
  game_id: (100000,)
  move_id: (100000,)
  player_move: (100000,)
  top_moves_list: (100000, 5)
  best_move: (100000,)
  centipawns: (100000, 5)
  mates: (100000, 5)
  move_sequence: (100000, 5)


In [4]:

# Convert move to label
def move_to_label(move):
    from_square = move.from_square
    to_square = move.to_square
    return from_square * 64 + to_square
# Function to represent a move on an 8x8 grid
def move_to_grid(move):
    
    from_square = move.from_square
    to_square = move.to_square
    
    # Initialize an empty 8x8 grid
    grid = np.zeros((8, 8), dtype=np.int8)
    
    # Convert the from_square and to_square to row, col format
    from_row, from_col = divmod(from_square, 8)
    to_row, to_col = divmod(to_square, 8)
    
    # Encode the move
    grid[from_row, from_col] = 1  # Starting position
    grid[to_row, to_col] = 2      # Ending position
    return grid
def standardize(data):
    mean = np.mean(data)
    std_dev = np.std(data)
    standardized_data = (data - mean) / std_dev
    return standardized_data

best_move =[move.decode('utf-8') if isinstance(move, bytes) else move for move in best_move]
matrix_best_move = [move_to_label(chess.Move.from_uci(move)) for move in best_move]
matrix_best_move = np.array(matrix_best_move)
best_move_one_hot = np.array(tf.one_hot(matrix_best_move, depth=4096, dtype=tf.uint8))


playerMove =[move.decode('utf-8') if isinstance(move, bytes) else move for move in playerMove]
matrix_playerMove = [move_to_label(chess.Move.from_uci(move)) for move in playerMove]
matrix_playerMove = np.array(matrix_playerMove)
y_one_hot =  np.array(tf.one_hot(matrix_playerMove, depth=4096, dtype=tf.uint8))




# Convert top moves list to labels
matrix_top_moves = []
matrix_move_sequence = []

for moves_list in top_moves_list:
    moves_list_str = [move.decode('utf-8') if isinstance(move, bytes) else move for move in moves_list]
    
    # Create labels, filtering out empty moves
    labels = [move_to_label(chess.Move.from_uci(move))if move else -1 for move in moves_list_str]
    matrix_top_moves.append(labels)

for move_seq in move_sequence:
 
    move_sequence_str = [move.decode('utf-8') if isinstance(move, bytes) else move for move in move_seq]
    
    # Create labels, filtering out empty moves
    labels = [move_to_label(chess.Move.from_uci(move))if move else -1 for move in move_sequence_str]
    matrix_move_sequence.append(labels)    
    
# Convert to numpy array
matrix_top_moves = np.array(matrix_top_moves)

feature_board = np.array(feature_board)
matrix_move_sequence = np.array(matrix_move_sequence) 


standardized_centipawns = standardize(centipawns)
standardized_mates = standardize(mates)

# Print shapes of the standardized datasets and first few elements to check
print("Shapes of standardized datasets:")
print(f"standardized_centipawns: {standardized_centipawns.shape}")
print(f"standardized_mates: {standardized_mates.shape}")

print("Shape of features:", feature_board.shape)
print("Shape of top moves:", matrix_top_moves.shape)
print("Shape of move sequence:", matrix_move_sequence.shape)
print("Shape of y:", y_one_hot.shape)
print("All preprocessing done.....")


Shapes of standardized datasets:
standardized_centipawns: (100000, 5)
standardized_mates: (100000, 5)
Shape of features: (100000, 14, 8, 8)
Shape of top moves: (100000, 5)
Shape of move sequence: (100000, 5)
Shape of y: (100000, 4096)
All preprocessing done.....


In [5]:
standardized_mates[0]

array([0.01612166, 0.01612166, 0.01612166, 0.01612166, 0.01612166])

In [8]:
'''
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, concatenate, BatchNormalization, Layer

# Define the first input (14, 8, 8) - feature_board
input1 = Input(shape=(14, 8, 8))

# Convolutional layers for the first input
x1 = Conv2D(32, (3, 3), activation='relu', padding='same')(input1)
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D((2, 2))(x1)

x1 = Conv2D(64, (3, 3), activation='relu', padding='same')(x1)
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D((2, 2))(x1)

x1 = Conv2D(128, (3, 3), activation='relu', padding='same')(x1)
x1 = BatchNormalization()(x1)
x1 = Flatten()(x1)
x1 = Dense(256, activation='relu')(x1)

# Adding more dense layers to x1 branch to increase its weight
x1 = Dense(512, activation='relu')(x1)
x1 = BatchNormalization()(x1)

x1 = Dense(256, activation='relu')(x1)
x1 = BatchNormalization()(x1)

# Define the second input (5) - standardized_centipawns
input2 = Input(shape=(5,))

# Fully connected layers for the second input
x2 = Dense(512, activation='relu')(input2)
x2 = BatchNormalization()(x2)
x2 = Dropout(0.1)(x2)

# Define the third input (5) - standardized_mates
input3 = Input(shape=(5,))

# Fully connected layers for the third input
x3 = Dense(512, activation='relu')(input3)
x3 = BatchNormalization()(x3)
x3 = Dropout(0.5)(x3)


# Define the fourth input (5, 4096) - matrix_move_sequence
input4 = Input(shape=(5, 4096))

# Fully connected layers for the fourth input
x4 = Flatten()(input4)  # Flatten the input to a 1D tensor
x4 = Dense(512, activation='relu')(x4)
x4 = BatchNormalization()(x4)
x4 = Dropout(0.3)(x4)

x4 = Dense(256, activation='relu')(x4)
x4 = BatchNormalization()(x4)
x4 = Dropout(0.3)(x4)

# Define the fifth input (5, 4096) - matrix_top_moves
input5 = Input(shape=(5, 4096))

# Fully connected layers for the fifth input
x5 = Flatten()(input5)  # Flatten the input to a 1D tensor
x5 = Dense(512, activation='relu')(x5)
x5 = BatchNormalization()(x5)
x5 = Dropout(0.3)(x5)

x5 = Dense(256, activation='relu')(x5)
x5 = BatchNormalization()(x5)
x5 = Dropout(0.3)(x5)

# Define the sixth input (4096) - best_move_one_hot
input6 = Input(shape=(4096,))

# Fully connected layers for the sixth input
x6 = Dense(512, activation='relu')(input6)
x6 = BatchNormalization()(x6)



# Concatenate the outputs of all inputs
merged = concatenate([x1, x2, x3, x4, x5, x6])

# Add some fully connected layers on top of the merged output
x = Dense(512, activation='relu')(merged)
x = BatchNormalization()(x)
x = Dropout(0.1)(x)

# Additional layers
x = Dense(1024, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

x = Dense(1024, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.1)(x)

x = Dense(2048, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

x = Dense(2048, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

# Output layer
x = Dense(4096, activation='softmax')(x)

# Create the model with six inputs
model = Model(inputs=[input1, input2, input3, input4, input5, input6], outputs=x)

# Display the model's architecture
model.summary()
'''

SyntaxError: incomplete input (1666783150.py, line 114)

In [11]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, concatenate, BatchNormalization

# Define the first input (14, 8, 8) - feature_board
input1 = Input(shape=(14, 8, 8))

# Convolutional layers for the first input
x1 = Conv2D(32, (3, 3), activation='relu', padding='same')(input1)
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D((2, 2))(x1)

x1 = Conv2D(64, (3, 3), activation='relu', padding='same')(x1)
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D((2, 2))(x1)

x1 = Flatten()(x1)
x1 = Dense(256, activation='relu')(x1)
x1 = BatchNormalization()(x1)
x1 = Dropout(0.3)(x1)

# Define the second input (5) - standardized_centipawns
input2 = Input(shape=(5,))

# Define the third input (5) - standardized_mates
input3 = Input(shape=(5,))

# Concatenate standardized_centipawns and standardized_mates
combined_inputs_2_3 = concatenate([input2, input3])

# Fully connected layers for the combined second and third input
x2_3 = Dense(512, activation='relu')(combined_inputs_2_3)
x2_3 = BatchNormalization()(x2_3)
x2_3 = Dropout(0.3)(x2_3)

# Define the fourth input (5, 4096) - matrix_move_sequence
input4 = Input(shape=(5, 4096))

# Define the fifth input (5, 4096) - matrix_top_moves
input5 = Input(shape=(5, 4096))

# Concatenate matrix_move_sequence and matrix_top_moves
combined_inputs_4_5 = concatenate([input4, input5])

# Fully connected layers for the combined fourth and fifth input
x4_5 = Flatten()(combined_inputs_4_5)  # Flatten the input to a 1D tensor
x4_5 = Dense(512, activation='relu')(x4_5)
x4_5 = BatchNormalization()(x4_5)
x4_5 = Dropout(0.3)(x4_5)

x4_5 = Dense(256, activation='relu')(x4_5)
x4_5 = BatchNormalization()(x4_5)
x4_5 = Dropout(0.3)(x4_5)

# Define the sixth input (4096) - best_move_one_hot
input6 = Input(shape=(4096,))

# Fully connected layers for the sixth input
x6 = Dense(512, activation='relu')(input6)
x6 = BatchNormalization()(x6)

# Concatenate the outputs of all inputs
merged = concatenate([x1, x2_3, x4_5, x6])

# Add more fully connected layers after merging all inputs
x = Dense(512, activation='relu')(merged)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)

x = Dense(1024, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

x = Dense(1024, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

x = Dense(2048, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

x = Dense(2048, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

# Output layer
x = Dense(4096, activation='softmax')(x)

# Create the model with six inputs
model = Model(inputs=[input1, input2, input3, input4, input5, input6], outputs=x)

# Display the model's architecture
model.summary()


**With data generator**

In [12]:
from tensorflow.keras.optimizers import RMSprop,SGD
# Generator function to yield batches of data
from tensorflow.keras.optimizers import RMSprop, SGD

# Generator function to yield batches of data
def data_generator(X1, X2, X3, X4, X5, X6, y, batch_size):
    num_samples = len(X1)
    
    while True:
        for i in range(0, num_samples, batch_size):
            X1_batch = X1[i:i+batch_size]
            X2_batch = X2[i:i+batch_size]
            X3_batch = X3[i:i+batch_size]
            X4_batch = X4[i:i+batch_size]
            X5_batch = X5[i:i+batch_size]
            X6_batch = X6[i:i+batch_size]
            y_batch = y[i:i+batch_size]
            
            # Initialize empty array for one-hot encoded X4_batch
            x4_batch_one_hot = np.zeros((len(X4_batch), X4_batch.shape[1], 4096), dtype=np.uint8)
            for i, moves_list in enumerate(X4_batch):
                for j, move in enumerate(moves_list):
                    if move != -1:
                        x4_batch_one_hot[i, j, move] = 1
                        
            # Initialize empty array for one-hot encoded X5_batch
            x5_batch_one_hot = np.zeros((len(X5_batch), X5_batch.shape[1], 4096), dtype=np.uint8)
            for i, moves_list in enumerate(X5_batch):
                for j, move in enumerate(moves_list):
                    if move != -1:
                        x5_batch_one_hot[i, j, move] = 1
                        
            yield (
                (tf.convert_to_tensor(X1_batch, dtype=tf.uint8), 
                 tf.convert_to_tensor(X2_batch, dtype=tf.float32),
                 tf.convert_to_tensor(X3_batch, dtype=tf.float32),
                 tf.convert_to_tensor(x4_batch_one_hot, dtype=tf.uint8),
                 tf.convert_to_tensor(x5_batch_one_hot, dtype=tf.uint8),
                 tf.convert_to_tensor(X6_batch, dtype=tf.uint8)),
                tf.convert_to_tensor(y_batch, dtype=tf.uint8)
            )

# Generator for validation data
def validation_data_generator(X1, X2, X3, X4, X5, X6, y, batch_size):
    num_samples = len(X1)
    
    while True:
        for i in range(0, num_samples, batch_size):
            X1_batch = X1[i:i+batch_size]
            X2_batch = X2[i:i+batch_size]
            X3_batch = X3[i:i+batch_size]
            X4_batch = X4[i:i+batch_size]
            X5_batch = X5[i:i+batch_size]
            X6_batch = X6[i:i+batch_size]
            y_batch = y[i:i+batch_size]
            
            # Initialize empty array for one-hot encoded X4_batch
            x4_batch_one_hot = np.zeros((len(X4_batch), X4_batch.shape[1], 4096), dtype=np.uint8)
            for i, moves_list in enumerate(X4_batch):
                for j, move in enumerate(moves_list):
                    if move != -1:
                        x4_batch_one_hot[i, j, move] = 1
                        
            # Initialize empty array for one-hot encoded X5_batch
            x5_batch_one_hot = np.zeros((len(X5_batch), X5_batch.shape[1], 4096), dtype=np.uint8)
            for i, moves_list in enumerate(X5_batch):
                for j, move in enumerate(moves_list):
                    if move != -1:
                        x5_batch_one_hot[i, j, move] = 1
                        
            yield (
                (tf.convert_to_tensor(X1_batch, dtype=tf.uint8), 
                 tf.convert_to_tensor(X2_batch, dtype=tf.float32),
                 tf.convert_to_tensor(X3_batch, dtype=tf.float32),
                 tf.convert_to_tensor(x4_batch_one_hot, dtype=tf.uint8),
                 tf.convert_to_tensor(x5_batch_one_hot, dtype=tf.uint8),
                 tf.convert_to_tensor(X6_batch, dtype=tf.uint8)),
                tf.convert_to_tensor(y_batch, dtype=tf.uint8)
            )



# Compile the model with the custom optimizer
model.compile(optimizer=Adam(learning_rate=0.001 ), loss='categorical_crossentropy', metrics=['accuracy'])

# Split the data into training and validation sets
X1_train, X1_val, X2_train, X2_val, X3_train, X3_val, X4_train, X4_val, X5_train, X5_val, X6_train, X6_val, y_train, y_val = train_test_split(
    feature_board, standardized_centipawns, standardized_mates, matrix_move_sequence, matrix_top_moves, best_move_one_hot, y_one_hot,
    test_size=0.1,shuffle=False
)

print("Shapes after train-test split:")
print("X1_train shape:", X1_train.shape)
print("X1_val shape:", X1_val.shape)
print("X2_train shape:", X2_train.shape)
print("X2_val shape:", X2_val.shape)
print("y_train shape:", y_train.shape)
print("y_val shape:", y_val.shape)

# Define batch size and number of epochs
batch_size = 32
epochs_per_cycle = 20

# Calculate steps per epoch
steps_per_epoch = len(X1_train) // batch_size
validation_steps = len(X1_val) // batch_size


early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    min_delta=1e-4,
    restore_best_weights=True
)

# Train the model using the generator
history = model.fit(
    data_generator(X1_train, X2_train, X3_train, X4_train, X5_train, X6_train, y_train, batch_size),
    steps_per_epoch=steps_per_epoch,
    epochs=epochs_per_cycle,
    verbose=1,
    validation_data=validation_data_generator(X1_val, X2_val, X3_val, X4_val, X5_val, X6_val, y_val, batch_size),
    validation_steps=validation_steps,
    callbacks=[early_stopping]
)

# Optionally, you can print the history to see the training progress
print(history.history)

Shapes after train-test split:
X1_train shape: (90000, 14, 8, 8)
X1_val shape: (10000, 14, 8, 8)
X2_train shape: (90000, 5)
X2_val shape: (10000, 5)
y_train shape: (90000, 4096)
y_val shape: (10000, 4096)
Epoch 1/20
[1m2812/2812[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 14ms/step - accuracy: 0.0503 - loss: 7.2860 - val_accuracy: 0.2721 - val_loss: 4.8347
Epoch 2/20
[1m  13/2812[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m35s[0m 13ms/step - accuracy: 0.2269 - loss: 5.2350

W0000 00:00:1718822982.450726     117 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m2809/2812[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - accuracy: 0.2302 - loss: 4.8752

W0000 00:00:1718823017.506128     117 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m2812/2812[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 13ms/step - accuracy: 0.2302 - loss: 4.8747 - val_accuracy: 0.3828 - val_loss: 3.9843
Epoch 3/20
[1m2812/2812[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 13ms/step - accuracy: 0.3459 - loss: 3.7559 - val_accuracy: 0.4227 - val_loss: 3.7349
Epoch 4/20
[1m2812/2812[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 13ms/step - accuracy: 0.4348 - loss: 2.9994 - val_accuracy: 0.4430 - val_loss: 3.7214
Epoch 5/20
[1m2812/2812[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 13ms/step - accuracy: 0.5102 - loss: 2.4184 - val_accuracy: 0.4484 - val_loss: 3.8275
Epoch 6/20
[1m2812/2812[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 13ms/step - accuracy: 0.5706 - loss: 1.9844 - val_accuracy: 0.4474 - val_loss: 3.9533
Epoch 7/20
[1m2812/2812[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 13ms/step - accuracy: 0.6231 - loss: 1.6483 - val_accuracy: 0.4506 - val_loss: 4.1169
Epoch 8/20
[1m

KeyboardInterrupt: 

In [None]:
# Save model after training
model.save(f"/kaggle/working/model_update_d20_v74.h5")

In [None]:
# Plotting the training history
import matplotlib.pyplot as plt

# Accuracy plot
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

