# Settings

In [None]:
seed = 8018
drop_duplicates_for_projection = True

# Prep Model

In [1]:
# define the network
import tensorflow as tf
dims = (8, 8, 13)
n_components = 2
encoder = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=dims),
    tf.keras.layers.Conv2D(
        filters=16, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
    ),
    tf.keras.layers.Conv2D(
        filters=32, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
    ),
    tf.keras.layers.Conv2D(
        filters=64, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
    ),
    tf.keras.layers.Conv2D(
        filters=128, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
    ),
    tf.keras.layers.Conv2D(
        filters=256, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
    ),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=100),
    tf.keras.layers.Dense(units=100),
    tf.keras.layers.Dense(units=100),
    tf.keras.layers.Dense(units=n_components),
])
encoder.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 4, 4, 16)          1888      
                                                                 
 conv2d_1 (Conv2D)           (None, 2, 2, 32)          4640      
                                                                 
 conv2d_2 (Conv2D)           (None, 1, 1, 64)          18496     
                                                                 
 conv2d_3 (Conv2D)           (None, 1, 1, 128)         73856     
                                                                 
 conv2d_4 (Conv2D)           (None, 1, 1, 256)         295168    
                                                                 
 flatten (Flatten)           (None, 256)               0         
                                                                 
 dense (Dense)               (None, 100)              

# Training Parameters

In [2]:
keras_fit_kwargs = {"callbacks": [
    tf.keras.callbacks.EarlyStopping(
        monitor='loss',
        min_delta=10**-2,
        patience=10,
        verbose=1,
    )
]}

# Prep Data

In [3]:
import pandas as pd

# Load the dataset
df = pd.read_csv('all_openings.csv')

# One-hot encode the chessboard columns
expected_categories = ['wp', 'wr', 'wn', 'wb', 'wq', 'wk', 'bp', 'br', 'bn', 'bb', 'bq', 'bk', '']

# Define chessboard columns
chessboard_columns = [f"{col}{row}" for row in range(1, 9) for col in "abcdefgh"]

for column in chessboard_columns:
    df[column] = pd.Categorical(df[column], categories=expected_categories)


training_data_df = df[chessboard_columns]

training_data_df = pd.get_dummies(training_data_df, columns=chessboard_columns)

# Combine the one-hot encoded chessboard DataFrame with the rest of the metadata
# First, drop the original chessboard columns from the main DataFrame to avoid duplicates
metadata_df = df.drop(columns=chessboard_columns)
# Then, concatenate the encoded chessboard DataFrame with the metadata DataFrame
combined_df = pd.concat([metadata_df, training_data_df], axis=1)

In [5]:
if drop_duplicates_for_projection
    # drop duplicates for projection, use the column names from chessboard_dummies_df for finding duplicates
    print("Length before dropping duplicates:", len(training_data_df))
    training_data_df.drop_duplicates(subset=training_data_df.columns, inplace=True)
    print("Length after dropping duplicates:", len(training_data_df))

Length before dropping duplicates: 36155
Length after dropping duplicates: 7079


In [7]:
import numpy as np

# Load and preprocess the data correctly for the encoder
# get numpy data out of training_data_df
train_data = training_data_df.to_numpy()

# Verify the shape
print("Train shape:", train_data.shape)

# Generate embeddings using the encoder just to verify the shape
train_embeddings = encoder.predict(train_data.reshape(-1, 8, 8, 13))

Train shape: (7079, 832)


# Reducer

In [10]:
from umap.parametric_umap import ParametricUMAP

reducer = ParametricUMAP(
    verbose=True,
    keras_fit_kwargs = keras_fit_kwargs,
    encoder=encoder,
    dims=dims,
    random_state=seed,
    n_training_epochs=20)

# Fit

In [13]:
import colorama
reducer = ParametricUMAP(encoder=encoder, dims=dims)
print("Before fitting, check dims:", dims)
print("Reducer expected input shape:", reducer.dims)
# Attempt to fit and transform
try:
    embedding = reducer.fit_transform(train_data.reshape((train_data.shape[0], -1)))
    print(f"Embedding shape after fit_transform: {embedding.shape}")
except Exception as e:
    print(f"{colorama.Fore.RED}Error during fit_transform: {e}{colorama.Style.RESET_ALL}")

Before fitting, check dims: (8, 8, 13)
Reducer expected input shape: (8, 8, 13)
Epoch 1/10

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Embedding shape after fit_transform: (7079, 2)


In [None]:
# from umap.parametric_umap import load_ParametricUMAP
# embedder = load_ParametricUMAP('/your/path/here')

In [33]:
# create unique filename based on date and time
from datetime import datetime
now = datetime.now()
reducer.save('./embeddings/parametric_umap_embeddings_' + now.strftime("%Y-%m-%d_%H-%M-%S"))





INFO:tensorflow:Assets written to: ./embeddings/parametric_umap_embeddings_2024-03-21_16-02-03\encoder\assets


INFO:tensorflow:Assets written to: ./embeddings/parametric_umap_embeddings_2024-03-21_16-02-03\encoder\assets


Keras encoder model saved to ./embeddings/parametric_umap_embeddings_2024-03-21_16-02-03\encoder
INFO:tensorflow:Assets written to: ./embeddings/parametric_umap_embeddings_2024-03-21_16-02-03\parametric_model\assets


INFO:tensorflow:Assets written to: ./embeddings/parametric_umap_embeddings_2024-03-21_16-02-03\parametric_model\assets


Keras full model saved to ./embeddings/parametric_umap_embeddings_2024-03-21_16-02-03\parametric_model
Pickle of ParametricUMAP model saved to ./embeddings/parametric_umap_embeddings_2024-03-21_16-02-03\model.pkl


In [18]:

# list of chess board dummy columns (to be used for hashing) = training_data_df columns
chessboard_dummy_columns = training_data_df.columns

# Add Projection Coordinates to Training Data DataFrame
training_data_df["x"] = embedding[:, 0]
training_data_df["y"] = embedding[:, 1]

# Generate a unique hash for each row's configuration in both DataFrames
def generate_row_hash(df, prefix_columns):
    return df[prefix_columns].apply(lambda x: hash(tuple(x)), axis=1)

# Generate hashes for both DataFrames
training_data_hash = generate_row_hash(training_data_df, chessboard_dummy_columns)
combined_data_hash = generate_row_hash(combined_df, chessboard_dummy_columns)  # Only the one-hot encoded columns

# Add these hashes as a column to both DataFrames
training_data_df['config_hash'] = training_data_hash
combined_df['config_hash'] = combined_data_hash

# Merge the x, y coordinates from training_data_df to combined_df based on the hash
combined_df = combined_df.merge(training_data_df[['config_hash', 'x', 'y']], on='config_hash', how='left')

# Cleanup if necessary (drop the hash column if no longer needed)
combined_df.drop(columns=['config_hash'], inplace=True)

In [22]:
# drop x_x and y_x
combined_df.drop(columns=['x_x', 'y_x'], inplace=True)
# rename x_y to x, and y_y to y
combined_df.rename(columns={'x_y': 'x', 'y_y': 'y'}, inplace=True)

Unnamed: 0,last_move,from_square,to_square,Event,Site,Date,Round,White,Black,Result,...,h8_wk,h8_bp,h8_br,h8_bn,h8_bb,h8_bq,h8_bk,h8_,x,y
0,,,,?,?,????.??.??,?,?,?,*,...,0,0,1,0,0,0,0,0,1.650878,0.503384
1,Nh3,g1,h3,?,?,????.??.??,?,?,?,*,...,0,0,1,0,0,0,0,0,1.699159,0.640763
2,,,,?,?,????.??.??,?,?,?,*,...,0,0,1,0,0,0,0,0,1.650878,0.503384
3,Nh3,g1,h3,?,?,????.??.??,?,?,?,*,...,0,0,1,0,0,0,0,0,1.699159,0.640763
4,d5,d7,d5,?,?,????.??.??,?,?,?,*,...,0,0,1,0,0,0,0,0,1.717516,0.843047


In [41]:
# replace the dummy columns with the original non-one-hot encoded columns
# First, drop the one-hot encoded columns
combined_df.drop(columns=chessboard_dummy_columns, inplace=True)

# Then, concatenate the original chessboard columns
combined_df[chessboard_columns] = df[chessboard_columns]


In [None]:
# store csv as all_openings_projected.csv
combined_df.to_csv('all_openings_projected.csv', index=False)

In [26]:
# rename opening_type to algo, Result to cp, game_number to line
combined_df.rename(columns={'opening_type': 'algo', 'Result': 'cp', 'game_number': 'line'}, inplace=True)

In [42]:
# store csv as all_openings_projected.csv
combined_df.to_csv('all_openings_projected_PSE_format.csv', index=False)