# Python Chess Engine Train

<a href="https://colab.research.google.com/github/iAmEthanMai/chess-engine-model/blob/main/train_chess_engine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## START of Notebook

In [1]:
%load_ext autotime

from datetime import datetime

start_time = datetime.now()
print(f"Execution started on {start_time.strftime('%d-%m-%Y at %H:%M:%S')}")

Execution started on 26-05-2023 at 12:52:01
time: 292 µs (started: 2023-05-26 12:52:01 +05:30)


## Environement Settings

In [2]:
import tensorflow as tf
import pandas as pd

from enum import Enum
from pathlib import Path
from tensorflow import estimator, feature_column
from tqdm.auto import tqdm
from typing import List
from zipfile import ZipFile

time: 2.17 s (started: 2023-05-26 12:52:01 +05:30)


## Data


In [4]:
base_path = Path.cwd()
dataset_zip_file = base_path.joinpath('Datasets.zip')
with ZipFile(dataset_zip_file, 'r') as zip_ref:
    zip_ref.extractall(base_path)
chess_moves_dataset_path = base_path.joinpath('Datasets')

time: 1.54 s (started: 2023-05-26 13:01:26 +05:30)


In [None]:
class ChessMovesDatasets(Enum):
    FISCHER = chess_moves_dataset_path.joinpath('FISCHER')
    MORPHY = chess_moves_dataset_path.joinpath('MORPHY')
    CAPABLANCA = chess_moves_dataset_path.joinpath('CAPABLANCA')

    @classmethod
    def keys(cls) -> List[str]:
        """Returns a list of all the enum keys."""
        return cls._member_names_

In [None]:
# chess_moves_files = ChessMovesDatasets.FISCHER.value.glob("*.csv")
chess_moves_files = chess_moves_dataset_path.glob("**/*.csv")

train = pd.concat(map(pd.read_csv, chess_moves_files))
train.sample(frac=1, random_state=42).reset_index(drop=True, inplace=True)

# Fill None values to blank values
train.fillna('', inplace=True)

print(f"Shape of the training data: {train.shape}")
display(train.head())

## Features

In [None]:
feature_cols = list(train.iloc[:, 0:192].columns)
target_col = 'good_move'
X = train.drop(target_col, axis=1)
y = train['good_move']

In [None]:
categorical_columns = list(X.iloc[:, 0:63].columns)
numerical_columns = list(X.iloc[:, 64:192].columns)
feature_columns = []

for feature_name in categorical_columns:
    vocabulary = X[feature_name].unique()
    feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))


for feature_name in numerical_columns:
    feature_columns.append(tf.feature_column.numeric_column(feature_name,dtype = tf.float32))

## Input Function

In [None]:
def make_input_fn(data_df, label_df, num_epochs = 10, shuffle = True, batch_size = 32):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
        if shuffle:
            ds = ds.shuffle(1000)
        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    return input_function

## Split Data into Batches

In [None]:
def split_into_batches(df, batch_size=100000):
    nb_rows = len(df.index)
    intervals = []
    
    for i in range(0, nb_rows + 1, batch_size):
        intervals.append(i)
    
    if(intervals[-1] != nb_rows):
        intervals.append(nb_rows)
    
    batches_X = []
    batches_y = []
    
    for i in range(0, len(intervals) - 1):
        batches_X.append(train.iloc[intervals[i]:intervals[i + 1], :][feature_cols])
        batches_y.append(train.iloc[intervals[i]:intervals[i + 1], :][target_col])

    return batches_X, batches_y

In [None]:
batches_X, batches_y = split_into_batches(train)

## Model Training 

In [None]:
dt = datetime.now().strftime('%Y-%m-%d')
estimator_path = base_path.joinpath(f'Estimator/{dt}')
linear_est = estimator.LinearClassifier(feature_columns=feature_columns, model_dir=estimator_path)

In [None]:
input_functions = []
for df_X, df_y in zip(batches_X, batches_y):
    input_functions.append(make_input_fn(df_X, df_y))

print(f"Length of Input Functions: {len(input_functions)}")

In [None]:
# Train the model on all the input functions
training_pbar = tqdm(input_functions)
for idx, input_function in enumerate(training_pbar, start=1):
    training_pbar.set_description(f'Batch: {idx}')
    linear_est.train(input_function)

## Save the model

In [None]:
serving_input_fn = estimator.export.build_parsing_serving_input_receiver_fn(
                                        feature_column.make_parse_example_spec(feature_columns))

estimator_path = linear_est.export_saved_model(estimator_path, serving_input_fn)

## END of Notebook

In [None]:
end_time = datetime.now()
print(f"Execution ended on {end_time.strftime('%d-%m-%Y at %H:%M:%S')}")
print(f"Total Execution Time: {str(end_time - start_time)}")

%unload_ext autotime