In [1]:
import numpy as np
import tensorflow as tf
from sklearn.metrics import accuracy_score
from tensorflow import keras
from keras import layers, Input
import pickle
from keras.callbacks import EarlyStopping
from keras import backend as K
import gc
import os
import json

In [2]:
random_state = 93

In [3]:
batch_sizes = [int(32*(2**i)) for i in range(5)]

print(batch_sizes)

[32, 64, 128, 256, 512]


In [4]:
def create_base_cnn(n_pieces):
    # Use Input layer to define the input shape
    inputs = Input(shape=(n_pieces, 8, 8))  #Help: n_pieces, 8, 8!
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(256, activation='relu')(x)
    model = keras.Model(inputs=inputs, outputs=x)

    return model 

In [5]:
lr = 0.001

In [6]:
def create_single_input_model(n_pieces):
    inputs = Input(shape=(n_pieces, 8, 8)) #Help: n_pieces, 8, 8!
    base_cnn = create_base_cnn(n_pieces) 
    x = base_cnn(inputs) 
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dense(1, activation='tanh')(x) # Output between -1 and 1

    model = keras.Model(inputs=inputs, outputs=x)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss='mse', metrics=['mse'])
    return model

In [7]:
def create_pairwise_model(version, n_pieces):
    base_cnn = create_base_cnn(n_pieces) 
    input_a = Input(shape=(n_pieces, 8, 8)) #Help: n_pieces, 8, 8!
    input_b = Input(shape=(n_pieces, 8, 8))

    encoded_a = base_cnn(input_a)
    encoded_b = base_cnn(input_b)

    diff = layers.Subtract()([encoded_a, encoded_b])
    mult = layers.Multiply()([encoded_a, encoded_b])
    merged = layers.Concatenate()([diff, mult])

    x = layers.Dense(128, activation='relu')(merged)
    x = layers.Dense(32, activation='relu')(x)

    if version == "Regression":
        output = layers.Dense(1, activation='tanh')(x) #Output between -1 and 1 #Help: use sigmoid for labels 0,1,2
        # output = layers.Lambda(lambda x: x * 2)(output)
        loss = 'mse'
        metrics = ['mse']
    elif version == "Classification":
        output = layers.Dense(3, activation='softmax')(x)
        loss = tf.keras.losses.SparseCategoricalCrossentropy()
        metrics = ['accuracy']
    else:
        raise ValueError(f"Unknown version: {version}")

    model = keras.Model(inputs=[input_a, input_b], outputs=output)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss=loss, metrics=metrics)
    return model

In [8]:
def find_best_threshold_v1(X1, X2, y_true):
    # Finds the best threshold by maximizing accuracy
    best_threshold, best_score = None, 0

    thresholds = np.linspace(0, 0.1, 200)

    for threshold in thresholds:
        y_pred = np.where(np.abs(X1 - X2) < threshold, 0, np.where(X1 > X2, 1, 2)) #Help: Maybe swap 0 and 1
        score = accuracy_score(y_true, y_pred)
        if score > best_score:
            best_score = score
            best_threshold = threshold

    return best_threshold, best_score

In [9]:
def find_best_thresholds_v2(X, y_true):
    # Finds the best threshold1 and threshold2 using a grid search by maximizing accuracy
    thresholds1 = np.linspace(-0.8, 0, 200)
    thresholds2 = np.linspace(0, 0.8, 200)

    best_t1, best_t2, best_acc = None, None, 0

    for t1 in thresholds1:
        for t2 in thresholds2:
            if t1 >= t2:  
                continue

            y_pred = np.where(X > t2, 2, np.where(X < t1, 1, 0)) #Help: Maybe swap 0 and 1
            acc = accuracy_score(y_true, y_pred)
            if acc > best_acc:
                best_t1, best_t2, best_acc = t1, t2, acc

    return best_t1, best_t2, best_acc

In [10]:
def create_post_nn(n):
    model = keras.Sequential([
        layers.Dense(16, activation='relu', input_shape=(n,)),  
        layers.Dense(8, activation='relu'),  
        layers.Dense(3, activation='softmax') 
    ])

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),  
        metrics=['accuracy']
    )

    return model

**FOR ALL MODELS**

In [11]:
def training(model, X, y, epochs_per_batch=6, acc=False):
    hist = {
        'loss': [],
        'val_loss': [],
        }  
    if acc:
        hist['accuracy'] = []
        hist['val_accuracy'] = []
         
    # EarlyStopping callback
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=5, # stop if val_loss doesn’t improve after 5 epochs
        restore_best_weights=True,
        verbose=1
    )

    if y.shape[0] < 1_000_000:
        # make size larger for better training
        factor = 1_000_000// y.shape[0]
        X = np.tile(X, (factor, 1, 1, 1))
        y = np.tile(y,factor)
        indices = np.random.permutation(y.shape[0])
        X = X[indices]
        y = y[indices]


    for n, batch_size in enumerate(batch_sizes):
        print(f"Batchsize: {batch_size}")
        print(f"Starting with epoch: {n*epochs_per_batch}")
        # Train your model with the current batch size
        epoch_history = model.fit(
            X, y,
            batch_size=batch_size,
            epochs=(n+1)*epochs_per_batch,
            initial_epoch=n*epochs_per_batch,  
            validation_split=0.15,
            verbose=1,
            callbacks = [early_stopping]
        )

        # Append the results to the history dictionary
        hist['loss'].extend(epoch_history.history['loss'])
        hist['val_loss'].extend(epoch_history.history['val_loss'])
        if acc:
            hist['accuracy'].extend(epoch_history.history['accuracy'])
            hist['val_accuracy'].extend(epoch_history.history['val_accuracy'])
        if model.stop_training:
            print("Early stopping triggered.")
            break
        
    return model, hist

In [12]:
endgames = ['KRK', 'KQKR', 'KPKR', 'KPK']
threshold_config = {}
model_types = [
    ("single", create_single_input_model),  # (name, create_func)
    ("regression", lambda n: create_pairwise_model("Regression", n)),
    ("classification", lambda n: create_pairwise_model("Classification", n)),
]

for endgame in endgames:

    print(f"Processing endgame: {endgame}")

    #Load the data
    data = np.load(rf"C:\SPEICHER\Bachelor_Thesis\Experiments\DataBase\{endgame}_position_train_val_2C.npz")
    positions, dtm_values = data["X"], data["y"]
    print(f"Loaded {endgame} data with {dtm_values.shape[0]} samples")

    pairs_data = np.load(rf"C:\SPEICHER\Bachelor_Thesis\Experiments\Database\{endgame}_pairs_train_val_2C.npz")
    X1, X2, y = pairs_data["X1"], pairs_data["X2"], pairs_data["y"]
    print(f"Loaded {endgame} pairs data with {y.shape[0]} samples")

    for model_name, create_func in model_types:
        #Help: Check if model already exists
        model_path = f"C:/SPEICHER/Bachelor_Thesis/Experiments/Models/{endgame}_model_{model_name}_2C.keras"
        if os.path.exists(model_path):
            print(f"Skipping {model_name}, already trained.")
            continue
        
        print(f"\nTraining {model_name} model for {endgame}")

        # Create model
        model = create_func(positions.shape[1]) #Help: [1]
        # model.summary()

        #Help: Maybe different mapping
        y_trans = np.where(y==1, -1, np.where(y==2, 1, 0))  # -1 = pos1 better, 0 = roughly equal, 1 = pos2 better

        # Choose inputs
        if model_name == "single":
            model, history = training(model, positions, dtm_values)
        elif model_name == "regression":
            model, history = training(model, [X1, X2], y_trans)
        else:
            model, history = training(model, [X1, X2], y, acc=True)

        # Save model and history
        model.save(rf"C:\SPEICHER\Bachelor_Thesis\Experiments\Models\{endgame}_model_{model_name}_2C.keras")
        with open(rf"C:\SPEICHER\Bachelor_Thesis\Experiments\Histories\{endgame}_history_{model_name}_2C.pkl", "wb") as f:
            pickle.dump(history, f)

        # Post-processing for single-input model
        if model_name == "single":
            # Predictions
            y1_pred = model.predict(X1[:100000])
            y2_pred = model.predict(X2[:100000])
            y_pred_comb = np.hstack([y1_pred, y2_pred])

            # Threshold method
            best_threshold, best_accuracy = find_best_threshold_v1(y1_pred, y2_pred, y[:100000])
            print(f"Best threshold: {best_threshold:.4f}, Accuracy: {best_accuracy:.4f}")
            threshold_config.setdefault(endgame, {})[model_name] = {"threshold1": float(best_threshold),"threshold2": float(0)}
            # Nerual Network
            model = create_post_nn(2)
            # model.summary()

            early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
            model.fit(y_pred_comb, y[:100000], epochs=10, batch_size=32, validation_split=0.15, callbacks=[early_stopping])
            model.save(rf"C:\SPEICHER\Bachelor_Thesis\Experiments\Models\{endgame}_model_pro_{model_name}_2C.keras")

        elif model_name == "regression":
            # Predictions
            y_pred = model.predict([X1[:100000], X2[:100000]])
            
            # Threshold method
            best_t1, best_t2, best_acc = find_best_thresholds_v2(y_pred, y[:100000])
            print(f"Best thresholds: t1={best_t1:.4f}, t2={best_t2:.4f}, Accuracy: {best_acc:.4f}")
            threshold_config.setdefault(endgame, {})[model_name] = {"threshold1": float(best_t1),"threshold2": float(best_t2)}
            # Nerual Network
            model = create_post_nn(1)
            # model.summary()
            early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
            model.fit(y_pred, y[:100000], epochs=10, batch_size=32, validation_split=0.15, callbacks=[early_stopping])
            model.save(rf"C:\SPEICHER\Bachelor_Thesis\Experiments\Models\{endgame}_model_pro_{model_name}_2C.keras")

        K.clear_session()
        gc.collect()
    
    with open(r"C:\SPEICHER\Bachelor_Thesis\Experiments\Configs\thresholds_2C.json", "w") as f:
        json.dump(threshold_config, f, indent=4)

Processing endgame: KRK
Loaded KRK data with 50500 samples
Loaded KRK pairs data with 4500000 samples
Skipping single, already trained.
Skipping regression, already trained.
Skipping classification, already trained.
Processing endgame: KQKR
Loaded KQKR data with 3074198 samples
Loaded KQKR pairs data with 4500000 samples
Skipping single, already trained.
Skipping regression, already trained.
Skipping classification, already trained.
Processing endgame: KPKR
Loaded KPKR data with 4500000 samples
Loaded KPKR pairs data with 4500000 samples
Skipping single, already trained.
Skipping regression, already trained.
Skipping classification, already trained.
Processing endgame: KPK
Loaded KPK data with 151221 samples
Loaded KPK pairs data with 4500000 samples

Training single model for KPK
Batchsize: 32
Starting with epoch: 0
Epoch 1/6
[1m24101/24101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 4ms/step - loss: 0.0024 - mse: 0.0024 - val_loss: 1.4606e-04 - val_mse: 1.4606e-04
Epoch 2

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2657/2657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.8576 - loss: 0.5414 - val_accuracy: 0.9579 - val_loss: 0.1407
Epoch 2/10
[1m2657/2657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9630 - loss: 0.1235 - val_accuracy: 0.9671 - val_loss: 0.1050
Epoch 3/10
[1m2657/2657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9704 - loss: 0.0918 - val_accuracy: 0.9723 - val_loss: 0.0822
Epoch 4/10
[1m2657/2657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9746 - loss: 0.0756 - val_accuracy: 0.9743 - val_loss: 0.0735
Epoch 5/10
[1m2657/2657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9768 - loss: 0.0696 - val_accuracy: 0.9757 - val_loss: 0.0663
Epoch 6/10
[1m2657/2657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9765 - loss: 0.0682 - val_accuracy: 0.9740 - val_loss: 0.0723
Epoch 7/10
[1m2657/2657[0

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2657/2657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9387 - loss: 0.1960 - val_accuracy: 0.9983 - val_loss: 0.0099
Epoch 2/10
[1m2657/2657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9977 - loss: 0.0124 - val_accuracy: 0.9983 - val_loss: 0.0086
Epoch 3/10
[1m2657/2657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9974 - loss: 0.0131 - val_accuracy: 0.9983 - val_loss: 0.0084
Epoch 4/10
[1m2657/2657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9976 - loss: 0.0113 - val_accuracy: 0.9983 - val_loss: 0.0082
Epoch 5/10
[1m2657/2657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9976 - loss: 0.0116 - val_accuracy: 0.9984 - val_loss: 0.0080
Epoch 6/10
[1m2657/2657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9978 - loss: 0.0118 - val_accuracy: 0.9985 - val_loss: 0.0083
Epoch 7/10
[1m2657/2657[0