In [301]:
import numpy as np 
import pandas as pd 
import cv2
import glob
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

In [259]:
fnames_img_test = glob.glob("/kaggle/input/chess-positions/dataset/test/*.jpeg")
fnames_img_train = glob.glob("/kaggle/input/chess-positions/dataset/train/*jpeg")

In [260]:
def loadFENCode_Files(fnames, nb_samples=100):
    files = []
    f_FEN = []
    counter = 0
    for f in fnames:
#         print("reading {}".format(f.split(sep='/')[-1]))
        f_FEN.append(f.split(sep='/')[-1].split(sep='.')[0])
        files.append(cv2.imread(f))
        counter += 1
        if counter == nb_samples: 
            break
    return files, f_FEN

In [282]:
files_Train, f_FEN_Train = loadFENCode_Files(fnames_img_train)
files_Test, f_FEN_Test = loadFENCode_Files(fnames_img_test,nb_samples=1000)

In [262]:
plt.imshow(files_Train[0])

In [265]:
def ChessboardImgToTilesObservations(file, width=8, min_pos=50):
    retval, corners = cv2.findChessboardCorners(file, (width-1, width-1), flags=cv2.CALIB_CB_ADAPTIVE_THRESH + cv2.CALIB_CB_EXHAUSTIVE)
#     if(retval):
    tiles = []
    tiles_GrayScale = []
    for i in range(0,min_pos*(width),min_pos):
        for j in range(0,min_pos*(width),min_pos):
            tiles.append(file[i:i+min_pos, j:j+min_pos])
            temp = cv2.cvtColor(tiles[-1],cv2.COLOR_BGR2GRAY)
            #Reshaping the grayscaled image into 64 vectors of 2500 (50x50) pixels which correspond to a tile area
            tiles_GrayScale.append(temp.reshape(-1,1))
    return np.array(tiles), np.array(tiles_GrayScale)
#     else:
# #         print("No Checkerboard Found")
#         return np.empty(0), np.empty(0)

In [266]:
def generateDataset(chessboards):
    Obs_BGR = np.empty(0)
    Obs_GRAY = np.empty(0)
    for f in chessboards:
        o_col,o_gr = ChessboardImgToTilesObservations(f)
        if o_col.shape[0] != 0 and o_gr.shape[0] != 0:
            if Obs_BGR.shape[0] == 0:
                Obs_BGR = o_col
            else:
                Obs_BGR = np.vstack((Obs_BGR,o_col))

            if Obs_GRAY.shape[0] == 0:
                Obs_GRAY = o_gr
            else:
                Obs_GRAY = np.vstack((Obs_GRAY,o_gr))
    return Obs_BGR, Obs_GRAY

In [267]:
def FENCodeToLabels(fname):
    labels = []
    rows = fname.split(sep='-')
    for r in rows:
        for i in range(len(r)):
#             print(r[i])
            if r[i].isnumeric():
                for j in range(int(r[i])):
                    labels.append('e')
            else:
                labels.append(r[i])
    return labels

In [268]:
def generateLabels(f_FEN):
    tiles_vals = []
    for f in f_FEN:
        tiles_vals.append(FENCodeToLabels(f))
    return np.array(tiles_vals).ravel()
# print(np.array(tiles_vals).ravel())

# Extracting the Train and Test datasets with the corresponding labels

In [269]:
ytrain = generateLabels(f_FEN_Train)
tiles_Color, tiles_GrayScale = generateDataset(files_Train)
Xtrain = tiles_GrayScale.reshape(-1,min_pos**2)

In [283]:
ytest = generateLabels(f_FEN_Test)
tiles_Color_Test, tiles_GrayScale_Test = generateDataset(files_Test)
Xtest = tiles_GrayScale_Test.reshape(-1,min_pos**2)

In [270]:
scaler = StandardScaler()
Xtrain_Scaled = scaler.fit_transform(Xtrain)
Xtest_Scaled = scaler.transform(Xtest)

# Determining the number of component of PCA to use

In [271]:
pca = PCA()
r = pca.fit(Xtrain_Scaled, ytrain)

In [272]:
r.n_components_

In [273]:
plt.plot(range(1,r.n_components_+1), r.explained_variance_ratio_.cumsum());

# Building the pipeline

In [284]:
pca = PCA(n_components=90)
svc = SVC(kernel="linear", decision_function_shape="ovo")

In [285]:
pipeline = Pipeline([
                  ('scaler',scaler),
                  ('pca', pca),
                  ('SVC', svc)
                  ])
pipeline.fit(Xtrain, ytrain)


# Predictions on the first 1000 samples

In [286]:
predictions = pipeline.predict(np.array(tiles_GrayScale_Test).reshape(-1,min_pos**2))

In [287]:
def PredictionToFENCode(predictions):
    fen_code = []
    for i in range(0,len(predictions),width):
        row = predictions[i:i+width]
        empty_cells = 0
        for t in row: 
            if t == 'e':
                empty_cells += 1
            else: 
                if empty_cells != 0:
                    fen_code.append(str(empty_cells))
                    empty_cells = 0
                fen_code.append(t)
        if empty_cells != 0:
            fen_code.append(str(empty_cells))
        if i < len(predictions)-width:
            fen_code.append('-')
    return "".join(fen_code)

# Accuracy of the model on the 1000 first images of the test dataset

In [289]:
accuracy_score(ytest,predictions)

In [296]:
precision_score(ytest,predictions, average=None)

In [298]:
recall_score(ytest,predictions,average=None)

In [300]:
confusion_matrix(ytest,predictions)

In [305]:
ConfusionMatrixDisplay(confusion_matrix(ytest,predictions)).plot()