In [2]:
import pickle

# Specify the path to your pickle file
pickle_file = "C:\\Users\\omvis\\chess_prediction_project\\train_dataset\\combined_dataset.pkl"

# Load the data
with open(pickle_file, 'rb') as file:
    X, y = pickle.load(file)

In [3]:
len(X)

21456

In [4]:
import pickle

# Specify the path to your pickle file
pickle_file = "C:\\Users\\omvis\\chess_prediction_project\\train_dataset\\features.pkl"

# Load the data
with open(pickle_file, 'rb') as file:
    X_feature = pickle.load(file)

In [5]:
len(X_feature)

21456

In [6]:
X[0]

array([[ 0. ,  0. ,  0. ,  0.6,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. , -0.2, -1. ],
       [ 0. ,  0. ,  0. ,  0. , -0.2,  0. ,  0. , -0.2],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0.2,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0.2,  0. ,  0. ,  0. ,  0. ,  0.2,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0.2,  0. ,  0.2],
       [ 0. ,  0. ,  0. ,  0. ,  0.5,  0. ,  1. ,  0. ]])

In [7]:
X_feature[0]

array([0.        , 0.        , 1.        , 0.475     , 0.55555556,
       0.        , 0.        , 0.        , 0.14285714, 0.5       ,
       0.70503597, 0.5       , 0.        , 0.5       , 0.5       ,
       0.64285714, 0.42424242, 0.42424242, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.34117647,
       0.6       ])

In [8]:
y[0]

1

In [9]:
import numpy as np
X = np.array(X)
X_feature = np.array(X_feature)
y = np.array(y)

In [10]:
y.shape

(21456,)

In [11]:
from sklearn.preprocessing import MinMaxScaler

# Assuming X_feature is a numpy array of shape (num_samples, 26)
scaler = MinMaxScaler()

# Fit the scaler on X_feature and transform it
X_feature = scaler.fit_transform(X_feature)

In [12]:
X_feature[0]

array([0.        , 0.        , 1.        , 0.475     , 0.55555556,
       0.        , 0.        , 0.        , 0.14285714, 0.5       ,
       0.70503597, 0.5       , 0.        , 0.5       , 0.5       ,
       0.64285714, 0.42424242, 0.42424242, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.34117647,
       0.6       ])

In [171]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Dropout, LeakyReLU
from tensorflow.keras.optimizers import Adam,SGD
from tensorflow.keras.regularizers import l2

def build_model(board_input_shape=(8, 8, 1), phase_input_shape=(26,)):
    # Input for 8x8 chessboard (single channel)
    board_input = Input(shape=board_input_shape, name='board_input')

    # CNN layers for chessboard (8x8 matrix) with L2 regularization
    x1 = Conv2D(100, (3, 3), padding='same')(board_input)
    x1 = LeakyReLU(alpha=0.01)(x1)
    x1 = MaxPooling2D((2, 2))(x1)
    x1 = Dropout(0.25)(x1)
    
    x1 = Flatten()(x1)

    # Input for phase-specific features
    phase_input = Input(shape=phase_input_shape, name='phase_input')

    # Dense layers for phase-specific features with L2 regularization
    x2 = Dense(100, kernel_regularizer=l2(0.0001))(phase_input)
    x2 = LeakyReLU(alpha=0.01)(x2)
   
    # Concatenate the CNN output and the phase-specific features
    combined = Concatenate()([x1, x2])
    
    # Additional dense layers after concatenation with L2 regularization
    
    x =Dropout(0.5) (Dense(100,  kernel_regularizer=l2(0.0001))(combined))
    x = LeakyReLU(alpha=0.01)(x)
   
    # Output layer (3 classes: white, black, equal)
    output = Dense(3, activation='softmax')(x)

    # Create and compile the model
    model = Model(inputs=[board_input, phase_input], outputs=output)
    model.compile(optimizer = Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

model = build_model()

model.summary()

In [172]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets, specifying the test size as 2000
X_train, X_test, X_feature_train, X_feature_test, y_train, y_test = train_test_split(
    X, X_feature, y, test_size=4000, random_state=42
)

# Verify the split
print(f"Training set size: {X_train.shape[0]} samples")
print(f"Test set size: {X_test.shape[0]} samples")



Training set size: 17456 samples
Test set size: 4000 samples


In [173]:
from sklearn.model_selection import KFold
import numpy as np

# Assuming X_train, X_feature_train, y_train are numpy arrays
# Here, we will apply KFold to all datasets: X_train, X_feature_train, and y_train

kfold = KFold(n_splits=6, shuffle=True, random_state=42)

for train_idx, val_idx in kfold.split(X_train, y_train):
    # Split the data into training and validation sets
    X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
    X_feature_train_fold, X_feature_val_fold = X_feature_train[train_idx], X_feature_train[val_idx]
    y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]

    # Train the model using the training fold and validate using the validation fold
    model_trained = model.fit(
        [X_train_fold, X_feature_train_fold], y_train_fold,  # Train with both chessboard and phase features
        epochs=50,
        batch_size=64,
        validation_data=([X_val_fold, X_feature_val_fold], y_val_fold)  # Validate with both chessboard and phase features
    )


Epoch 1/50
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.4159 - loss: 1.1005 - val_accuracy: 0.5402 - val_loss: 1.0442
Epoch 2/50
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5253 - loss: 1.0331 - val_accuracy: 0.5560 - val_loss: 1.0067
Epoch 3/50
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5510 - loss: 0.9972 - val_accuracy: 0.5735 - val_loss: 0.9706
Epoch 4/50
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5611 - loss: 0.9702 - val_accuracy: 0.5718 - val_loss: 0.9511
Epoch 5/50
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5744 - loss: 0.9506 - val_accuracy: 0.5883 - val_loss: 0.9294
Epoch 6/50
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5919 - loss: 0.9244 - val_accuracy: 0.5914 - val_loss: 0.9166
Epoch 7/50
[1m228/228[0m 

In [174]:
model.evaluate(
    [X_test,X_feature_test],  # Provide the test data (both board and phase-specific features)
    y_test                  # Provide the true labels
)

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6746 - loss: 0.7733


[0.7676452994346619, 0.6775000095367432]

In [73]:
import pickle

# Specify the path to your pickle file
pickle_file = "C:\\Users\\omvis\\chess_prediction_project\\test_dataset\\test_dataset.pkl"

# Load the data
with open(pickle_file, 'rb') as file:
    Xtest, id = pickle.load(file)

In [74]:
import pickle

# Specify the path to your pickle file
pickle_file = "C:\\Users\\omvis\\chess_prediction_project\\test_dataset\\features_test.pkl"

# Load the data
with open(pickle_file, 'rb') as file:
    Xtest_feature = pickle.load(file)

In [75]:
Xtest = np.array(Xtest)
Xtest_feature = np.array(Xtest_feature)
id=np.array(id)

In [76]:
y_predicted = model.predict([Xtest,Xtest_feature])
y_predicted[0]

[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


array([0.36259976, 0.38208392, 0.25531632], dtype=float32)

In [77]:
y_predicted = np.argmax(y_predicted,axis=1)

In [78]:
y_predicted[:5]

array([1, 2, 0, 2, 2], dtype=int64)

In [79]:
len(y_predicted)

8539

In [80]:
import pandas as pd
df = pd.read_csv("C:\\Users\\omvis\\chess_prediction_project\\chess_test_csv.csv")
df.shape

(8540, 2)

In [81]:
import warnings
warnings.filterwarnings("ignore")
for idx, val in enumerate(df.image_id):
    index = np.where(id == val)[0]  # [0] gets the array of indices
    if y_predicted[index] == 0:
        df['evaluation'][idx] = "equal"
    elif y_predicted[index] == 1:
        df['evaluation'][idx] = "white"
    else:
        df['evaluation'][idx] = "black"

In [82]:
df.rename(columns={'image_id' : 'id' },inplace=True)

In [83]:
df.head(3)

Unnamed: 0,id,evaluation
0,26065374,equal
1,32481096,black
2,36997677,white


In [84]:
df = df[~df['id'].duplicated(keep='first')]

In [85]:
len(df['id'].unique())

8539

In [86]:
df.nunique

<bound method DataFrame.nunique of             id evaluation
0     26065374      equal
1     32481096      black
2     36997677      white
3     34417938      black
4     28905181      black
...        ...        ...
8535  31917698      black
8536    393246      equal
8537  26319610      black
8538  16378115      black
8539  30711130      black

[8539 rows x 2 columns]>

In [87]:
df['evaluation'].isna().sum()

0

In [88]:
df.to_csv("result.csv",index=False)