In [44]:
####################################################
## Brian Behnke
## 11/25/2022
## Data Mining Techniques - CS 43105
## 
## Trains Tensorflow model based on moves made in past chess games
## 
## Slightly modified version of a script written by Ethan Mai
## https://colab.research.google.com/github/iAmEthanMai/chess-engine-model/blob/main/train_chess_engine.ipynb
####################################################

import os
import glob

import tensorflow as tf
import pandas     as pd
import numpy      as np

from sklearn.utils import shuffle

In [45]:
training_data_path = 'C:\\Users\\Brian\\Chess\\Data\\csv\\test_train'

training_data_files = glob.glob(training_data_path + '/*.csv')

li = []

for file in training_data_files:
    df = pd.read_csv(file, index_col=None, header=0)
    li.append(df)

train = pd.concat(li, axis=0, ignore_index=True)

In [46]:
train = shuffle(train)

In [47]:
train.shape

(120426, 193)

In [48]:
train.head()

Unnamed: 0,a1,b1,c1,d1,e1,f1,g1,h1,a2,b2,...,to_h7,to_a8,to_b8,to_c8,to_d8,to_e8,to_f8,to_g8,to_h8,good_move
25336,,,,,,,,K,P,P,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
100225,R,,,Q,K,,,R,P,P,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
25727,R,N,B,Q,K,,,R,P,P,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
71742,R,N,B,Q,,R,K,,P,P,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
93045,R,,,,R,,K,,P,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False


In [49]:
features = list(train.iloc[:, 0:192].columns)
X = train[features]
y = train['good_move']

In [50]:
categorical_columns = list(X.iloc[:, 0:63].columns)
numerical_columns = list(X.iloc[:, 64:192].columns)
feature_columns = []

for feature_name in categorical_columns:
    vocabulary = X[feature_name].unique()
    feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in numerical_columns:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

In [51]:
def make_input_fn(data_df, label_df, num_epochs=5, shuffle=True, batch_size=32):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
        if shuffle:
            ds = ds.shuffle(1000)
        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    return input_function

In [52]:
def split_into_batches(df, batch_size=100000):
    nb_rows = len(df.index)
    intervals = []

    for i in range(0, nb_rows + 1, batch_size):
        intervals.append(i)

    if intervals[-1] != nb_rows:
        intervals.append(nb_rows)

    batches_X = []
    batches_y = []

    for i in range(0, len(intervals) - 1):
        batches_X.append(train.iloc[intervals[i]:intervals[i + 1], :][features])
        batches_y.append(train.iloc[intervals[i]:intervals[i + 1], :]['good_move'])

    return batches_X, batches_y

In [53]:
batches_X, batches_y = split_into_batches(train)

In [54]:
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns, model_dir='C:\\Users\\Brian\\Chess\\Data\\test\\model_dir')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\Brian\\Chess\\Data\\test\\model_dir', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [55]:
input_functions = []
for df_X, df_y, in zip(batches_X, batches_y):
    input_functions.append(make_input_fn(df_X, df_y))

In [56]:
len(input_functions)

2

In [57]:
for i, input_function in enumerate(input_functions):
    print('<======================================== NEW BATCH ========================================>')
    print('Batch: ' + str(i))
    linear_est.train(input_function)

serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(tf.feature_column.make_parse_example_spec(feature_columns))
estimator_base_path = 'C:\\Users\\Brian\\Chess\\Data\\test\\model_dir'
estimator_path = linear_est.export_saved_model(estimator_base_path, serving_input_fn)

Batch: 0
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\Brian\Chess\Data\test\model_dir\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 0.6931472, step = 0
INFO:tensorflow:global_step/sec: 4.54876
INFO:tensorflow:loss = 0.051398195, step = 100 (21.985 sec)
INFO:tensorflow:global_step/sec: 5.8777
INFO:tensorflow:loss = 0.017800627, step = 200 (17.013 sec)
INFO:tensorflow:global_step/sec: 5.8992
INFO:tensorflow:loss = 0.15659113, step = 300 (16.951 sec)
INFO:tensorflow:global_step/sec: 5.87977
INFO:tensorflow:loss = 0.13504632, step = 400 (17.009 sec)
INFO:tensorflow:global_step/sec: 5.91858
INFO:tensorflow:loss =