In [1]:
pip install python-chess


Collecting python-chess
  Downloading python_chess-1.999-py3-none-any.whl.metadata (776 bytes)
Collecting chess<2,>=1 (from python-chess)
  Downloading chess-1.10.0-py3-none-any.whl.metadata (19 kB)
Downloading python_chess-1.999-py3-none-any.whl (1.4 kB)
Downloading chess-1.10.0-py3-none-any.whl (154 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: chess, python-chess
Successfully installed chess-1.10.0 python-chess-1.999
Note: you may need to restart the kernel to use updated packages.


In [9]:
import numpy as np
import pandas as pd
import os
import chess
import h5py
from keras.utils import to_categorical
from keras import models, layers
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint

# Load the .h5 file
with h5py.File("/kaggle/input/moves-10-million/processed_boards_moves.h5", "r") as hf:
    feature_board = np.array(hf["board_matrix"])
    best_move = np.array(hf["best_move"])

print("Loaded .h5 file successfully")
print(f"Feature board shape: {feature_board.shape}")
print(f"Best move shape: {best_move.shape}")

Loaded .h5 file successfully
Feature board shape: (10100000, 14, 8, 8)
Best move shape: (10100000,)


In [10]:
# Find indices where best_move is 'None'
valid_indices = np.where(best_move != 'None')[0]

# Filter out rows with 'None' in best_move
feature_board = feature_board[valid_indices]
best_move = best_move[valid_indices]

In [11]:
best_move[:10]

array([b'h7e7', b'd5f5', b'c7b6', b'f8d6', b'e4f3', b'f4g4', b'd7c7',
       b'd3e5', b'g4f2', b'e3d4'], dtype=object)

In [13]:


# Convert move to label
def move_to_label(move):
    from_square = move.from_square
    to_square = move.to_square
    return from_square * 64 + to_square

# Convert moves to labels
matrix_moves = []
for move in best_move:
   
    try:
        label = move_to_label(chess.Move.from_uci(move))
        matrix_moves.append(label)
    except ValueError as e:
        print(f"Invalid move: {move} - Exception: {e}")
        in
# Convert to numpy array
matrix_moves = np.array(matrix_moves)

print("All preprocessing done.....")

Invalid move: b'h7e7' - Exception: invalid uci: b'h7e7'


NameError: name 'invalid_moves' is not defined

In [None]:
# Function to create or load a model
def create_model(weights_path=None):
    model = models.Sequential([
        layers.Conv2D(64, (3, 3), activation='relu', input_shape=(14, 8, 8), padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.1),

        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.2),

        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.1),

        layers.Flatten(),
        layers.Dense(1024, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.1),

        layers.Dense(4096, activation='softmax')
        ])


    if weights_path:
        model.load_weights(weights_path)

    return model

In [None]:
from sklearn.model_selection import train_test_split
import tensorflow as tf

# Generator function to yield batches of data
def data_generator(X, y, batch_size):
    num_samples = len(X)
    while True:
        for i in range(0, num_samples, batch_size):
            X_batch = X[i:i+batch_size]
            y_batch = y[i:i+batch_size]
            # Convert labels to one-hot encoding
            y_one_hot = tf.one_hot(y_batch, depth=4096, dtype=tf.uint8)
            assert X_batch.shape[0] == y_one_hot.shape[0], f"Batch size mismatch: {X_batch.shape[0]} != {y_one_hot.shape[0]}"
            assert 4096 == y_one_hot.shape[1], f"Batch size mismatch:  {y_one_hot.shape[1]}"
            yield X_batch, y_one_hot

# Generator for validation data
def validation_data_generator(X, y, batch_size):
    num_samples = len(X)
    while True:
        for i in range(0, num_samples, batch_size):
            X_batch = X[i:i+batch_size]
            y_batch = y[i:i+batch_size]
            y_one_hot = tf.one_hot(y_batch, depth=4096, dtype=tf.uint8)
            yield X_batch, y_one_hot


batch_size = 1024  # Adjust batch size based on your memory constraints
epochs_per_cycle = 20  # Number of epochs to train on each batch of moves

# Check for NaNs or infinite values in the data
assert not np.any(np.isnan(feature_board)), "Feature board contains NaNs"
assert not np.any(np.isnan(matrix_moves)), "Matrix moves contain NaNs"
assert not np.any(np.isinf(feature_board)), "Feature board contains infinite values"
assert not np.any(np.isinf(matrix_moves)), "Matrix moves contain infinite values"

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(feature_board, matrix_moves, test_size=0.05, random_state=42)

# Calculate steps per epoch
steps_per_epoch = len(X_train) // batch_size
validation_steps = len(X_val) // batch_size



# Create the model
model = create_model()  # Make sure create_model function is defined properly

# Set a lower learning rate and compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5, clipvalue=1.0)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    patience=5,
    min_lr=1e-8
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    min_delta=1e-4,
    restore_best_weights=True
)

# Train the model using the generator
history = model.fit(
    data_generator(X_train, y_train, batch_size),
    steps_per_epoch=steps_per_epoch,
    epochs=epochs_per_cycle,
    verbose=1,
    validation_data=validation_data_generator(X_val, y_val, batch_size),
    validation_steps=validation_steps,
    callbacks=[reduce_lr, early_stopping]
)

# Optionally, you can print the history to see the training progress
print(history.history)


In [None]:
# Plotting the training history
import matplotlib.pyplot as plt

# Accuracy plot
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

**64 related code is from here**

In [None]:
# Create a list to store one-hot encoded labels
matrix_moves = []
# Function to convert square to index
def square_to_index(square):
    rank = square // 8
    file = square % 8
    return rank, file

# Function to represent a move on a flattened 8x8 grid
def move_to_grid(move):
    from_square = move.from_square
    to_square = move.to_square
    
    # Initialize an empty flattened 8x8 grid
    grid = np.zeros(64, dtype=np.int8)
    
    # Encode the move
    grid[from_square] = 1  # Starting position
    grid[to_square] = 2    # Ending position
    
    return grid
# Convert moves to labels and one-hot encode them in batches
for move in best_move:
    matrix_moves.append(move_to_grid(chess.Move.from_uci(move.decode('utf-8'))))
print("All preprocessing done.....")

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
import tensorflow as tf

# Check for NaNs or infinite values in the data
assert not np.any(np.isnan(feature_board))
assert not np.any(np.isnan(matrix_moves))
assert not np.any(np.isinf(feature_board))
assert not np.any(np.isinf(matrix_moves))

feature_board = np.array(feature_board)
matrix_moves = np.array(matrix_moves)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(feature_board, matrix_moves, test_size=0.2, random_state=42)

batch_size = 2048  # Adjust batch size to ensure it fits the dataset
epochs_per_cycle = 20  # Number of epochs to train on each batch of moves

# Create the model
model = create_model()

# Set a lower learning rate and compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-6, clipvalue=1.0)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',  # Monitors the validation loss
    factor=0.1,          # Factor by which the learning rate will be reduced
    patience=5,         # Number of epochs with no improvement after which learning rate will be reduced
    min_lr=1e-8          # Lower bound on the learning rate
)

early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitors the validation loss
    patience=5,         # Number of epochs with no improvement after which training will be stopped
    min_delta=1e-4,      # Minimum change to qualify as an improvement
    restore_best_weights=True  # Restores the weights of the best epoch
)

# Train the model using the training and validation data
history = model.fit(
    X_train, y_train,
    batch_size=batch_size,
    epochs=epochs_per_cycle,
    validation_data=(X_val, y_val),
    callbacks=[reduce_lr, early_stopping] 
)

# Optionally, you can print the history to see the training progress
print(history.history)


In [None]:
# Save model after training
model.save(f"/kaggle/working/model_update_temp.h5")