In [2]:
from zipfile import ZipFile

# opens the chess boards.zip file into the chessBoards object
zipFilePath = './archive.zip'
chessBoards = ZipFile(zipFilePath, 'r')

#datasetTest = []
#datasetTrain = []
test = []
train = []
newpath = True

# extract the file names from the train and test folders in the zip archive
for file in chessBoards.namelist():
    # fills the file names from within the dataset subfolder
    """if file[:13] == 'dataset/test/':
        datasetTest.append(file)
    if file[:14] == 'dataset/train/':
        datasetTrain.append(file)"""

    if file[:4] == 'test':
        test.append(file)
    if file[:5] == 'train':
        train.append(file)

    # find any new file paths not accounted for
    """if file[:13] != 'dataset/test/' and \
        file[:14] != 'dataset/train/' and \
        file[:4] != 'test'and \
        file[:5] != 'train' and \
        newpath:
        print("new path: ", file)
        newpath = False"""

print("Files in test: ", len(test))
print("Files in train: ", len(train))



Files in test:  20000
Files in train:  80000


In [7]:
import pickle
from PIL import Image
import io
import os
from zipfile import ZipFile
import random

# Extracts the data from the zip file and saves it to a pickle file
#
# @param[in] zipFilePath - the path to the zip file
# @param[in] outputPickleFile - the path to the pickle file to save the data to
# @param[in] directory - the directory within the zip file to extract the data from
# @param[in] sampleAmount - the number of samples to extract from the zip file
def extractData(zipFilePath, outputPickleFile, directory='test/', sampleAmount=None):
    data = []

    with ZipFile(zipFilePath, 'r') as chess_boards:
        # Get all file names in the zip file that start with the directory
        file_names = [name for name in chess_boards.namelist() if name.startswith(directory)]

        # Shuffle the file names to get a random sample
        random.shuffle(file_names)

        for file_name in file_names:
            if file_name.startswith(directory):
                # Extract FEN string from file name
                fen_string = file_name.split('/')[-1]

                # Extract image bytes and convert to PIL Image
                with chess_boards.open(file_name) as image_file:
                    image_bytes = image_file.read()
                    image = Image.open(io.BytesIO(image_bytes))

                    # Converts image to grayscale
                    image = image.convert('L')

                    data.append((fen_string, image))

                # Check if the desired number of samples is reached
                if sampleAmount is not None and len(data) >= sampleAmount:
                    break

    # Get the directory path and base filename
    output_dir, base_filename = os.path.split(outputPickleFile)
    
    # Include the number of samples in the base filename
    if sampleAmount is not None:
        base_filename = f"{os.path.splitext(base_filename)[0]}_{sampleAmount}.pickle"
    
    # Construct the full output pickle file path
    outputPickleFile = os.path.join(output_dir, base_filename)

    # Save data to pickle file
    with open(outputPickleFile, 'wb') as pickle_file:
        pickle.dump(data, pickle_file)

    print(f"Data extracted and saved to {outputPickleFile}")

# Entry point to start the process of pickling the data
def pickleData():
    zipFilePath = './archive.zip'
    outputPickleFile = './test_data.pickle'
    extractData(zipFilePath, outputPickleFile, "test/", 100)

    outputPickleFile = './train_data.pickle'
    extractData(zipFilePath, outputPickleFile, "train/", 1000)

pickleData()


Data extracted and saved to .\test_data_100.pickle
Data extracted and saved to .\train_data_1000.pickle


In [35]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pickle
import numpy as np

pieceLabels = ['p', 'r', 'n', 'b', 'q', 'k', 'P', 'R', 'N', 'B', 'Q', 'K']

# Loads the data from the pickle file
#
# @param[in] file_path - the path to the pickle file
# @return - the data loaded from the pickle file
def loadData(file_path):
    print(f"Loading data from {file_path}")
    with open(file_path, 'rb') as file:
        data = pickle.load(file)
    return data

# Splits the image into 50x50 chunks
#
# @param[in] image - the image to split
# @return - a list of 50x50 chunks
def splitIntoChunks(image):
    chunks = []
    width, height = image.size
    for y in range(0, height, 50):
        for x in range(0, width, 50):
            chunk = image.crop((x, y, x + 50, y + 50))
            chunks.append(chunk)
    return chunks

# Prepares the data for training
#
# @param[in] data - the data to prepare
# @return - the prepared data
def prepareData(data, maxEmptySpaces=None):
    X = []
    y = []
    emptySpacesCount = 0

    for fen_string, image in data:
        chunks = splitIntoChunks(image)
        for chunk in chunks:

            # Convert the chunk to a numpy array and flatten it
            chunkArray = np.array(chunk).flatten()

            # Label corresponding to the piece
            if fen_string:
                if fen_string[0] in pieceLabels:
                    pieceLabel = fen_string[0]
                    y.append(pieceLabel)
                    X.append(chunkArray)
                    print(f"Piece: {pieceLabel}")
                else:
                    if maxEmptySpaces is None or emptySpacesCount < maxEmptySpaces:
                        pieceLabel = ' '
                        y.append(pieceLabel)
                        X.append(chunkArray)
                        emptySpacesCount += 1

                # Update fen_string to remove the piece label
                fen_string = fen_string[1:]
            else:
                if maxEmptySpaces is None or emptySpacesCount < maxEmptySpaces:
                    y.append(' ')
                    X.append(chunkArray)
                    emptySpacesCount += 1

    return np.array(X), np.array(y)

# Trains a model using the training data from the pickle file
#
# @param[in] X_train - the training data
# @param[in] y_train - the training labels
# @return - the trained model
def trainModel(X_train, y_train):
    model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=None)
    print("Training model...")
    model.fit(X_train, y_train)
    return model

# Evaluates the model using the test data from the pickle file
#
# @param[in] model - the model to evaluate
# @param[in] X_test - the test data
# @param[in] y_test - the test labels
def evaluateModel(model, X_test, y_test):
    print("Evaluating model...")
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model accuracy: {accuracy}")

    for real, pred in zip(y_test, y_pred):
        if real != ' ':
            print(f"Real: {real}, Predicted: {pred}")

# Entry point to start the process of training and evaluating the model
def main():
    # Load the data
    train_data = loadData('./train_data_200.pickle')
    test_data = loadData('./test_data_100.pickle')

    # Prepare the data
    print("Preparing training data...")
    X_train, y_train = prepareData(train_data, maxEmptySpaces=100)
    print("Preparing test data...")
    X_test, y_test = prepareData(test_data)

    # Train the model
    model = trainModel(X_train, y_train)

    # Evaluate the model
    evaluateModel(model, X_test, y_test)

main()


Loading data from ./train_data_200.pickle
Loading data from ./test_data_100.pickle
Preparing training data...
Piece: K
Piece: b
Piece: r
Piece: p
Piece: q
Piece: P
Piece: B
Piece: r
Piece: N
Piece: b
Piece: k
Piece: N
Piece: Q
Piece: Q
Piece: p
Piece: N
Piece: K
Piece: Q
Piece: r
Piece: N
Piece: N
Piece: R
Piece: r
Piece: N
Piece: B
Piece: R
Piece: n
Piece: k
Piece: p
Piece: N
Piece: n
Piece: Q
Piece: b
Piece: K
Piece: P
Piece: p
Piece: k
Piece: P
Piece: R
Piece: n
Piece: Q
Piece: p
Piece: b
Piece: n
Piece: N
Piece: k
Piece: q
Piece: P
Piece: R
Piece: K
Piece: R
Piece: R
Piece: p
Piece: b
Piece: B
Piece: p
Piece: N
Piece: r
Piece: K
Piece: n
Piece: Q
Piece: k
Piece: R
Piece: b
Piece: Q
Piece: N
Piece: n
Piece: n
Piece: p
Piece: r
Piece: q
Piece: N
Piece: B
Piece: K
Piece: b
Piece: Q
Piece: k
Piece: N
Piece: P
Piece: p
Piece: N
Piece: p
Piece: q
Piece: p
Piece: r
Piece: K
Piece: B
Piece: b
Piece: k
Piece: p
Piece: k
Piece: R
Piece: R
Piece: R
Piece: n
Piece: K
Piece: b
Piece: q
Piece: b