In [1]:
import chess.pgn
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences
import chess
from tensorflow.keras.callbacks import ModelCheckpoint, Callback
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Embedding, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.layers import LeakyReLU, Dropout
from sklearn.impute import SimpleImputer
from sklearn.utils import resample

2024-08-13 15:37:00.907250: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-13 15:37:01.079660: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-13 15:37:02.862626: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-13 15:37:04.539801: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-13 15:37:05.891891: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been 

In [2]:
data = pd.read_csv('data/games.csv')

# Dict to convert UCI -> Int and Int -> UCI
uci_to_int = {}
int_to_uci = {}
counter = 1 

# Convert UCI -> Int Move
def move_to_int(move):
    if move not in uci_to_int:
        global counter
        uci_to_int[move] = counter
        int_to_uci[counter] = move
        counter += 1
    return uci_to_int[move]

# Convert a string of moves into ints using the chess library
def parse_moves(moves_str):
    board = chess.Board()
    move_list = []
    for move in moves_str.split():
        try:
            uci_move = board.push_san(move).uci()
            move_list.append(move_to_int(uci_move))
        except ValueError:
            print(f"Invalid move: {move}")
            break
    return move_list

data['parsed_moves'] = data['moves'].apply(parse_moves)

X = pad_sequences(data['parsed_moves'], maxlen=28, padding='post', truncating='post')

## Finding out how many moves we have (I think 27)
max_index = max(uci_to_int.values())

In [3]:
## To avoid alot of the same name ones we cluster into just those with the same name rather than the eco
grouped_eco_labels = {
    'A00': 'Polish (Sokolsky) opening',
    'A01': 'Nimzovich-Larsen attack',
    'A02-A03': "Bird's opening",
    'A04-A09': 'Reti opening',
    'A10-A39': 'English opening',
    'A40-A44': "Queen's pawn",
    'A45-A46': "Queen's pawn game",
    'A47': "Queen's Indian defence",
    'A48-A49': "King's Indian defence",
    'A50': "Queen's pawn game",
    'A51-A52': 'Budapest defence',
    'A53-A55': 'Old Indian defence',
    'A56': 'Benoni defence',
    'A57-A59': 'Benko gambit',
    'A60-A79': 'Benoni defence',
    'A80-A99': 'Dutch',
    'B00': "King's pawn opening",
    'B01': 'Scandinavian (centre counter) defence',
    'B02-B05': "Alekhine's defence",
    'B06': 'Robatsch (modern) defence',
    'B07-B09': 'Pirc defence',
    'B10-B19': 'Caro-Kann defence',
    'B20-B99': 'Sicilian defence',
    'C00-C19': 'French defence',
    'C20-C99': "King's pawn game",
    'D00-D99': "Queen's Gambit",
    'E00-E99': "King's Indian defence",
}

## Converting the eco to integers
eco_to_int = {}
int_to_opening = {}
counter = 0

for eco_range, opening_name in grouped_eco_labels.items():
    if '-' in eco_range:
        start, end = eco_range.split('-')
        for i in range(int(start[1:]), int(end[1:]) + 1):
            eco = start[0] + str(i).zfill(2)
            if eco not in eco_to_int:
                eco_to_int[eco] = counter
    else:
        if eco_range not in eco_to_int:
            eco_to_int[eco_range] = counter

    int_to_opening[counter] = opening_name
    counter += 1

data['opening_encoded'] = data['opening_eco'].map(eco_to_int)
X_train, X_temp, y_train, y_temp = train_test_split(X, data['opening_encoded'], test_size=0.8, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Balancing classes in the training set
class_counts = pd.Series(y_train).value_counts()
max_class_size = class_counts.max()

# Print sizes before balancing
print("Class sizes in training set before balancing:")
print(class_counts)

balanced_train_data = []
for class_label in class_counts.index:
    class_data = pd.DataFrame({'X': list(X_train[y_train == class_label]), 'y': y_train[y_train == class_label]})
    
    # Tile the data
    tiles = max_class_size // len(class_data)
    remainder = max_class_size % len(class_data)
    
    tiled_data = pd.concat([class_data] * tiles)
    
    # Upsample the rest if remainder is greater than 0
    if remainder > 0:
        upsampled_data = resample(class_data, replace=True, n_samples=remainder, random_state=42)
        balanced_class_data = pd.concat([tiled_data, upsampled_data])
    else:
        balanced_class_data = tiled_data
    
    balanced_train_data.append(balanced_class_data)

# Combine all balanced training data
balanced_train_data = pd.concat(balanced_train_data)

# Print sizes after balancing
print("\nClass sizes in training set after balancing:")
print(balanced_train_data['y'].value_counts())

# Separate X and y for the balanced training data
X_train_balanced = np.vstack(balanced_train_data['X'].values)
y_train_balanced = balanced_train_data['y'].values


Class sizes in training set before balancing:
opening_encoded
24    1253
22     529
25     528
23     309
0      206
5      168
4      148
17     126
16     117
21     112
26     105
3       91
20      65
6       52
18      45
2       40
15      30
19      29
1       25
8       10
13       7
10       7
11       3
9        2
12       2
14       2
Name: count, dtype: int64

Class sizes in training set after balancing:
y
24    1253
22    1253
25    1253
23    1253
0     1253
5     1253
4     1253
17    1253
16    1253
21    1253
26    1253
3     1253
20    1253
6     1253
18    1253
2     1253
15    1253
19    1253
1     1253
8     1253
13    1253
10    1253
11    1253
9     1253
12    1253
14    1253
Name: count, dtype: int64


In [4]:
## Prints the validation progress to a png file every "save_every" epochs
class ValidationLossPlotter(Callback):
    def __init__(self, save_every=5):
        super(ValidationLossPlotter, self).__init__()
        self.epoch_count = 0
        self.save_every = save_every
        self.history = []

    def on_epoch_end(self, epoch, logs=None):
        val_loss = logs.get('val_loss')
        self.history.append(val_loss)
        self.epoch_count += 1

        plt.figure(figsize=(10, 6))
        plt.plot(range(1, self.epoch_count + 1), self.history, label='Validation Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Validation Loss')
        plt.title('Validation Loss Progress')
        plt.legend()

        if self.epoch_count % self.save_every == 0:
            plt.savefig('validation_progress.png')

        plt.close()

best_model_name = 'best_model.keras'
validation_plotter = ValidationLossPlotter(save_every=2)


In [5]:
## Creating CVAE
@tf.keras.utils.register_keras_serializable()
class CVAE(tf.keras.Model):
    def __init__(self, latent_dim):
        super(CVAE, self).__init__()
        self.latent_dim = latent_dim
        self.encoder = tf.keras.Sequential([
            tf.keras.layers.InputLayer(input_shape=(28,)),
            tf.keras.layers.Embedding(input_dim=max_index + 1, output_dim=64, input_length=28),
            tf.keras.layers.Conv1D(32, 3, activation='linear', padding='same'),
            LeakyReLU(alpha=0.3),
            tf.keras.layers.Conv1D(64, 3, activation='linear', padding='same'),
            LeakyReLU(alpha=0.3),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(128, activation='linear'),
            LeakyReLU(alpha=0.3),
            Dropout(0.2),
            tf.keras.layers.Dense(256, activation='linear'),
            LeakyReLU(alpha=0.3),
            Dropout(0.2),
            tf.keras.layers.Dense(latent_dim + latent_dim)
        ])

        self.decoder = tf.keras.Sequential([
            tf.keras.layers.InputLayer(input_shape=(latent_dim,)),
            tf.keras.layers.Dense(256, activation='linear'),
            LeakyReLU(alpha=0.3),
            Dropout(0.2),
            tf.keras.layers.Dense(128, activation='linear'),
            LeakyReLU(alpha=0.3),
            Dropout(0.2),
            tf.keras.layers.Dense(units=28 * max_index, activation='linear'),
            LeakyReLU(alpha=0.3),
            tf.keras.layers.Reshape(target_shape=(28, max_index)),
        ])

    def encode(self, x):
        mean, logvar = tf.split(self.encoder(x), num_or_size_splits=2, axis=1)
        return mean, logvar

    def reparameterize(self, mean, logvar):
        eps = tf.random.normal(shape=mean.shape)
        return eps * tf.exp(logvar * 0.5) + mean

    def decode(self, z, apply_sigmoid=False):
        logits = self.decoder(z)
        if apply_sigmoid:
            probs = tf.sigmoid(logits)
            return probs
        return logits

optimizer = tf.keras.optimizers.Adam(1e-4)

def log_normal_pdf(sample, mean, logvar, raxis=1):
    log2pi = tf.math.log(2. * np.pi)
    return tf.reduce_sum(
        -.5 * ((sample - mean) ** 2. * tf.exp(-logvar) + logvar + log2pi),
        axis=raxis)

def compute_loss(model, x):
    mean, logvar = model.encode(x)
    z = model.reparameterize(mean, logvar)
    x_logit = model.decode(z)
    cross_ent = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=x_logit, labels=x)
    logpx_z = -tf.reduce_sum(cross_ent, axis=[1])
    logpz = log_normal_pdf(z, 0., 0.)
    logqz_x = log_normal_pdf(z, mean, logvar)
    return -tf.reduce_mean(logpx_z + logpz - logqz_x)

@tf.function
def train_step(model, x, optimizer):
    with tf.GradientTape() as tape:
        loss = compute_loss(model, x)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

@tf.function
def validate_step(model, x):
    val_loss = compute_loss(model, x)
    return val_loss

In [6]:
epochs = 25
latent_dim = 24

model = CVAE(latent_dim)

batch_size = 1024
train_dataset = tf.data.Dataset.from_tensor_slices(X_train_balanced).shuffle(len(X_train_balanced)).batch(batch_size)
validation_dataset = tf.data.Dataset.from_tensor_slices(X_val).batch(batch_size)

best_val_loss = float('inf')

## Training loop
for epoch in range(epochs):
    epoch_loss_avg = tf.keras.metrics.Mean()
    epoch_val_loss_avg = tf.keras.metrics.Mean()

    for train_x_batch in train_dataset:
        loss = train_step(model, train_x_batch, optimizer)
        epoch_loss_avg.update_state(loss)

    # Validation loop
    for val_x_batch in validation_dataset:
        val_loss = validate_step(model, val_x_batch)
        epoch_val_loss_avg.update_state(val_loss)

    # Get the average loss for this epoch
    train_loss = epoch_loss_avg.result().numpy()
    val_loss = epoch_val_loss_avg.result().numpy()

    print(f'Epoch {epoch + 1}, Train Loss: {train_loss}, Validation Loss: {val_loss}')

    validation_plotter.on_epoch_end(epoch, logs={'val_loss': val_loss})


2024-08-13 15:40:05.264318: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-08-13 15:40:08.546986: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 1, Train Loss: 210.31668090820312, Validation Loss: 209.23435974121094


In [7]:
## Gets the latent representation, basically meaning the generation aspect is cut off
def get_latent_vectors(model, data):
    latent_vectors = []
    for sample in data:
        mean, logvar = model.encode(sample[np.newaxis, :])
        z = model.reparameterize(mean, logvar)
        latent_vectors.append(z.numpy().squeeze())
    return np.array(latent_vectors)


In [8]:
X_train_latent = get_latent_vectors(model, X_train_balanced)
X_test_latent = get_latent_vectors(model, X_test)

imputer = SimpleImputer(strategy='mean')
X_train_latent = imputer.fit_transform(X_train_latent)
X_test_latent = imputer.transform(X_test_latent)


In [9]:
def evaluate_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred, zero_division=1))
    print(confusion_matrix(y_test, y_pred))

In [10]:
## Runs on random forest
rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
evaluate_model(rf_model, X_train_latent, y_train_balanced, X_test_latent, y_test)

              precision    recall  f1-score   support

           0       0.06      0.06      0.06       395
           1       0.01      0.05      0.01        64
           2       0.01      0.03      0.01        63
           3       0.03      0.05      0.03       216
           4       0.03      0.05      0.04       250
           5       0.03      0.04      0.03       333
           6       0.00      0.01      0.00       133
           7       1.00      0.00      0.00         3
           8       0.00      0.03      0.01        29
           9       0.00      0.14      0.01         7
          10       0.00      0.00      0.00        17
          11       0.00      0.00      0.00         8
          12       0.00      0.00      0.00         6
          13       0.00      0.00      0.00         8
          14       0.00      0.00      0.00         6
          15       0.01      0.05      0.02        55
          16       0.04      0.04      0.04       265
          17       0.03    

: 