In [90]:
import numpy as np
import pandas as pd
import plotly.express as px
import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

import tensorflow as tf

2023-08-09 13:22:52.024926: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [70]:

data = pd.read_csv('games.csv')
data.head()

Unnamed: 0,id,rated,created_at,last_move_at,turns,victory_status,winner,increment_code,white_id,white_rating,black_id,black_rating,moves,opening_eco,opening_name,opening_ply
0,TZJHLljE,False,1504210000000.0,1504210000000.0,13,outoftime,white,15+2,bourgris,1500,a-00,1191,d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5...,D10,Slav Defense: Exchange Variation,5
1,l1NXvwaE,True,1504130000000.0,1504130000000.0,16,resign,black,5+10,a-00,1322,skinnerua,1261,d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6...,B00,Nimzowitsch Defense: Kennedy Variation,4
2,mIICvQHh,True,1504130000000.0,1504130000000.0,61,mate,white,5+10,ischia,1496,a-00,1500,e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc...,C20,King's Pawn Game: Leonardis Variation,3
3,kWKvrqYL,True,1504110000000.0,1504110000000.0,61,mate,white,20+0,daniamurashov,1439,adivanov2009,1454,d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O...,D02,Queen's Pawn Game: Zukertort Variation,3
4,9tXo1AUZ,True,1504030000000.0,1504030000000.0,95,mate,white,30+3,nik221107,1523,adivanov2009,1469,e4 e5 Nf3 d6 d4 Nc6 d5 Nb4 a3 Na6 Nc3 Be7 b4 N...,C41,Philidor Defense,5


In [71]:
count_white = len(data[data['winner']=='white'])
count_black = len(data[data['winner']=='black'])
pct_of_white = count_white/(count_white+count_black)
print("percentage of white winning is", pct_of_white*100)
pct_of_black = count_black/(count_white+count_black)
print("percentage of black winning", pct_of_black*100)


percentage of white winning is 52.33933431023655
percentage of black winning 47.66066568976345


In [80]:
moves = np.array(
    data.query("winner != 'draw'")['moves']
)
labels = np.array(
    data.query("winner != 'draw'")['winner']
    .apply(lambda x: 1 if x == 'white' else 0)
)

In [81]:
all_moves = set()

for move_list in moves:
    for move in move_list.split(" "):
        if move not in all_moves:
            all_moves.add(move)

max_vocab = len(all_moves)

In [91]:
max_len = 0

for move_list in moves:
    total = 0
    for move in move_list.split(" "):
        total += 1
    if total > max_len:
        max_len = total

In [92]:
tokenizer = Tokenizer(num_words=max_vocab)
tokenizer.fit_on_texts(moves)

sequences = tokenizer.texts_to_sequences(moves)

word_index = tokenizer.word_index

model_inputs = pad_sequences(sequences, maxlen=max_len)

In [93]:
train_inputs, test_inputs, train_labels, test_labels = train_test_split(model_inputs, labels, train_size=0.7, random_state=24)

In [94]:
embedding_dim = 256

inputs = tf.keras.Input(shape=max_len)

embedding = tf.keras.layers.Embedding(
    input_dim=max_vocab,
    output_dim=embedding_dim,
    input_length=max_len
)(inputs)

gru = tf.keras.layers.GRU(units=embedding_dim)(embedding)

outputs = tf.keras.layers.Dense(1, activation='sigmoid')(gru)


model = tf.keras.Model(inputs=inputs, outputs=outputs)


model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
    ]
)


batch_size = 32
epochs = 3

history = model.fit(
    train_inputs,
    train_labels,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[tf.keras.callbacks.ReduceLROnPlateau()],
    verbose=2
)

Epoch 1/3
335/335 - 444s - loss: 0.5392 - accuracy: 0.7282 - auc: 0.8053 - val_loss: 0.4774 - val_accuracy: 0.7794 - val_auc: 0.8607 - lr: 0.0010 - 444s/epoch - 1s/step
Epoch 2/3
335/335 - 392s - loss: 0.3541 - accuracy: 0.8502 - auc: 0.9227 - val_loss: 0.4053 - val_accuracy: 0.8179 - val_auc: 0.8992 - lr: 0.0010 - 392s/epoch - 1s/step
Epoch 3/3
335/335 - 387s - loss: 0.4367 - accuracy: 0.7981 - auc: 0.8793 - val_loss: 0.5420 - val_accuracy: 0.7357 - val_auc: 0.8055 - lr: 0.0010 - 387s/epoch - 1s/step
