In [None]:
import torch
from PyLinq import PyLinqData
# Check if CUDA is available
if torch.cuda.is_available():
    print("CUDA is available")
    device = torch.device("cuda")
else:
    print("CUDA is not available")
    device = torch.device("cpu")

# Move tensor to the GPU
x = torch.tensor([1.0])
x = x.to(device)
print(x)

In [None]:
# we are going to train a simple neural network with an 
# input layer of 71 doubles and middle layer of 101 doubles and an output layer of 71 doubles
# we will use the sigmoid function as the activation function
# we will use the mean squared error as the loss function
# we will use the stochastic gradient descent as the optimization algorithm
# we will use the learning rate of 0.01
# we will use the batch size of 100
# we will use the number of epochs of 100
# we will use the random seed of 42
# will import the training dat from the file "x_training.csv" in the same folder as this notebook
# will import the target data from the file "y_labels.csv" in the same folder as this notebook
# we will save the trained model to the file "model.pt" in the same folder as this notebook
# we will save the loss history to the file "loss_history.csv" in the same folder as this notebook
# we will save the accuracy history to the file "accuracy_history.csv" in the same folder as this notebook
# we will save the training history to the file "training_history.csv" in the same folder as this notebook
# we will save the validation history to the file "validation_history.csv" in the same folder as this notebook
# we will save the test history to the file "test_history.csv" in the same folder as this notebook
# we will save the confusion matrix to the file "confusion_matrix.csv" in the same folder as this notebook

# import the libraries
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import random
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

# set the random seed
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

# load the data
x = pd.read_csv("../x_training.csv")
y = pd.read_csv("../y_labels.csv")

# The training data consists of doubles.  The first 64 columns of input data have only a few possible values.  Examine the first 64 columns and find out all the possible values that can be found in those columns.
uniqueValus = x.iloc[:, 0:64].nunique()


In [None]:
# print the unique values with some explanation of what they are and their frequency in each of the 64 columns
print("The first 64 columns of the input data have the following unique values")
# create a concatenation of the input to the output data in a separate 2D numpy array
x = x.to_numpy()
y = y.to_numpy()
# concatenate x and y into a single 2D numpy array
data = np.concatenate((x, y), axis=1)
# make sure the data is in the correct format
data = data.astype(float)

# # convert the data into ordinal encoding for all columns
# for i in range(0, 71):
#     uniqueValues = np.unique(data[:, i]) # this line finds the unique values in the column
#     for j in range(0, len(uniqueValues)):
#         data[:, i] = np.where(data[:, i] == uniqueValues[j], j, data[:, i])

# # validate the conversion by converting the data back to the original values and comparing the original and converted data
# for i in range(0, 71):
#     uniqueValues = np.unique(x[:, i])
#     for j in range(0, len(uniqueValues)):
#         x[:, i] = np.where(x[:, i] == j, uniqueValues[j], x[:, i])



In [None]:
# print(uniqueValues)
print(data)

In [None]:
# train an autoencoder to reduce the dimensionality of the input data
# the input data has 71 columns
# the output data has 71 columns
# the middle layer has 44 columns
# the activation function is the sigmoid function
# the loss function is the mean squared error
# the optimization algorithm is the stochastic gradient descent
# the learning rate is 0.05 and the batch size is 100
# the number of epochs is 100
# the random seed is 42
# the model is saved to the file "autoencoder.pt"
# the loss history is saved to the file "autoencoder_loss_history.csv"

# create the autoencoder class
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(71, 500),
            nn.Sigmoid()
        )
        self.decoder = nn.Sequential(
            nn.Linear(500, 71),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    


In [None]:
    
# create the autoencoder
autoencoder = Autoencoder()
autoencoder.to(device)

# create the loss function
criterion = nn.MSELoss()

# create the optimizer
optimizer = optim.SGD(autoencoder.parameters(), lr=0.04)

# create the data loader
dataLoader = torch.utils.data.DataLoader(x, batch_size=100, shuffle=True)

# train the autoencoder
lossHistory = []


In [None]:

for epoch in range(1000):
    for i, data in enumerate(dataLoader, 0):
        inputs = data
        inputs = inputs.to(device)
        optimizer.zero_grad()
        outputs = autoencoder(inputs.float())
        loss = criterion(outputs.float(), inputs.float())
        loss.backward()
        optimizer.step()
    lossHistory.append(loss.item())
    print("Epoch: ", epoch, " Loss: ", loss.item())

# save the autoencoder
torch.save(autoencoder.state_dict(), "autoencoder.pt")


In [None]:

# save the loss history
lossHistory = pd.DataFrame(lossHistory, columns=["Loss"])

lossHistory.to_csv("autoencoder_loss_history.csv", index=False)

# plot the loss history
plt.plot(lossHistory)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Autoencoder Loss History")
plt.show()




In [None]:
# choose a random single input from the input data and use the trained autoencoder to encode and decode the input
# print the original input and the encoded and decoded input
# choose a random input
input = x[0, :]
# encode and decode the input
input = torch.tensor(input)
input = input.to(device)
output = autoencoder(input.float())
output = output.cpu().detach().numpy()
input = input.cpu().detach().numpy()
print("Original Input: ", input)
print("Output: ", output)   

# train a neural network to classify the input data
# the input data has 71 columns
# the output data has 71 columns
# the middle layer has 101 columns
# the activation function is the sigmoid function
# the loss function is the mean squared error
# the optimization algorithm is the stochastic gradient descent
# the learning rate is 0.01 and the batch size is 100
# the number of epochs is 500
# the random seed is 42
# the model is saved to the file "model.pt"

# create the neural network class


In [None]:

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Linear(71, 101)
        self.layer2 = nn.Linear(101, 71)
        self.activation = nn.Sigmoid()
    def forward(self, x):
        x = self.layer1(x)
        x = self.activation(x)
        x = self.layer2(x)
        x = self.activation(x)
        return x
    
# create the neural network
neuralNetwork = NeuralNetwork()
neuralNetwork.to(device)

# create the loss function
criterion = nn.MSELoss()

# create the optimizer
optimizer = optim.SGD(neuralNetwork.parameters(), lr=0.11)

# create the data loader
dataLoader = torch.utils.data.DataLoader(x, batch_size=100, shuffle=True)


In [None]:

# train the neural network
lossHistory = []
for epoch in range(500):
    for i, data in enumerate(dataLoader, 0):
        inputs = data
        inputs = inputs.to(device)
        optimizer.zero_grad()
        outputs = neuralNetwork(inputs.float())
        loss = criterion(outputs.float(), inputs.float())
        loss.backward()
        optimizer.step()
    lossHistory.append(loss.item())
    print("Epoch: ", epoch, " Loss: ", loss.item())

# save the neural network
torch.save(neuralNetwork.state_dict(), "model.pt")


In [None]:

# save the loss history
lossHistory = pd.DataFrame(lossHistory, columns=["Loss"])
lossHistory.to_csv("loss_history.csv", index=False)

# plot the loss history
plt.plot(lossHistory)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss History")
plt.show()

# choose a random single input from the input data and use the trained neural network to predict the output
# print the original input and the predicted output
# choose a random input
input = x[0, :]
# predict the output
input = torch.tensor(input)

input = input.to(device)
output = neuralNetwork(input.float())
output = output.cpu().detach().numpy()
input = input.cpu().detach().numpy()
print("Original Input: ", input)
print("Output: ", output)





In [67]:
# define the function to add the enpassant target rows
def addEnpassantTargets(origRow, fen, i):
    # create a string to represent the row
    # throw an error if i is not 2 or 6
    if(i != 2 and i != 5):
        raise ValueError("i must be 2 or 6")
    row = origRow
    if(fen[3] != "-"):
        if(int(fen[3][1]) == i + 1 or int(fen[3][1]) == i):
            cols = "abcdefgh"
            index = cols.index(fen[3][0])
            # use the simplest notation to replace the character at the index with the t character
            row = row[:index] + ("t" if i == 2 else "T") + row[index + 1:]
    # set row to the 8 char substring of all starting at side * 8
    return row

In [68]:
def castlingRightsToString(castlingRights):
    # Define the order of castling rights as they should appear in the string
    orderedRights = "KQkq"
    # Use list comprehension to check for each right in orderedRights and replace with "-" if absent
    return ''.join(c if c in castlingRights else '-' for c in orderedRights)

def revertCastlingRights(paddedRights):
    # Filter out '-' characters and join the remaining characters to form the castling rights part of FEN
    castlingRights = ''.join(c for c in paddedRights if c != '-')
    # Return a dash if there are no castling rights, otherwise return the castling rights string
    return castlingRights if castlingRights else '-'

# test these functions with a variety of inputs such as KQkq, Kkq, KQk, KQ, K, Q, k, q, and the empty string
# create a list of standard FEN castling rights strings which are variable length
castlingRights = ["KQkq", "Kkq", "KQk", "KQ", "K", "Q", "k", "q", "-"]
# use the map function to apply the function to each element of the list and then back again and compare the final result with the original
# loop through each string in the list and outpt the result of the function and the original string to the console
convertedRights = list(map(castlingRightsToString, castlingRights))
revertedRights = list(map(revertCastlingRights, convertedRights))
# go throough each string in the list and print the original string and the result of the function in detail
for i in range(len(castlingRights)):
    print("Original: ", castlingRights[i], " Converted: ", convertedRights[i], " Reverted: ", revertedRights[i])

    

Original:  KQkq  Converted:  KQkq  Reverted:  KQkq
Original:  Kkq  Converted:  K-kq  Reverted:  Kkq
Original:  KQk  Converted:  KQk-  Reverted:  KQk
Original:  KQ  Converted:  KQ--  Reverted:  KQ
Original:  K  Converted:  K---  Reverted:  K
Original:  Q  Converted:  -Q--  Reverted:  Q
Original:  k  Converted:  --k-  Reverted:  k
Original:  q  Converted:  ---q  Reverted:  q
Original:  -  Converted:  ----  Reverted:  -


In [69]:

# generate some new categorical data for a new neural network
# the new data has enough columns to represent a variation of FEN notation for a chess board, where the / is left out because
# each row is represented in full without any compression of the empty squares
# here is an example of a normal FEN notation: rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1
# define a function to take the normal notation and convert it to a 64 column representation without slashes plus
# The full move and half move counts are discarded, but the move turn and castling rights are preserved
# here is an example of the conversion of the starting position: rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1 to the new format
# rnbqkbnrpppppppp00000000000000000000000000000000PPPPPPPPRNBQKBNRwKQkq
# the first 64 columns represent the board, and the last 5 columns represent the move turn and castling rights
# the new data has 69 columns
def convertFEN(fen):
    # split the FEN into its 6 components
    fen = fen.split(" ")
    # split the board into its 8 rows
    board = fen[0].split("/")
    # create a new board with the rows combined and the empty squares compressed
    newBoard = ""
    for i in range(0, 8):
        # call a function to add the enpassant target rows if i has the value of 2 or 5
        for j in range(0, len(board[i])):
            if board[i][j].isdigit():
                for k in range(0, int(board[i][j])):
                    newBoard += "0"
            else:
                newBoard += board[i][j]
        if i == 2 or i == 5:
            # pass the last 8 chars of newBoard into the enpassant function, then replace the last 8 chars of newBoard with the result
            newBoard = newBoard[:-8] + addEnpassantTargets(newBoard[-8:], fen, i)
            

    # add the columns for move turn and castling rights
    newBoard += fen[1] + castlingRightsToString(fen[2])
    return newBoard


In [70]:
def revertFEN(customFEN):
    # Extract the board part and metadata from the custom FEN
    boardPart, moveTurn, paddedRights = customFEN[:64], customFEN[64], customFEN[65:]
    
    # Initialize variables for en passant target
    enPassantTarget = "-"
    reconstructedBoard = ""
    
    # Reconstruct the board
    for i in range(8):
        row = boardPart[i*8:(i+1)*8]
        newRow = ""
        emptyCount = 0
        for j, char in enumerate(row):
            if char == '0':
                emptyCount += 1
            elif char == 't' or char == 'T':  # Handle en passant target
                if emptyCount > 0:
                    newRow += str(emptyCount)
                    emptyCount = 0
                newRow += '0'  # Replace 't' with '0' for now, as we handle en passant separately
                # Determine the en passant square
                cols = "abcdefgh"
                enPassantTarget = cols[j] + str(6 if i == 3 else 3)  # Adjust rank based on the row
            else:
                if emptyCount > 0:
                    newRow += str(emptyCount)
                    emptyCount = 0
                newRow += char
        if emptyCount > 0:  # Handle trailing empty squares in a row
            newRow += str(emptyCount)
        reconstructedBoard += newRow + "/" if i < 7 else newRow  # Avoid adding '/' after the last row
    
    # Revert castling rights
    castlingRights = revertCastlingRights(paddedRights)
    
    # Assemble the FEN including en passant target, without move numbers
    return f"{reconstructedBoard} {moveTurn} {castlingRights} {enPassantTarget}"

# Example usage
customFEN = "r0bqkbnrppp0ppppn0000000000pP00000000000000t0000PPPP0PPPRNBQKBNRwKQkq"
assert len(customFEN) == 69 # 64 for the board, 1 for move turn, 4 for castling rights
print(revertFEN(customFEN))
print(revertFEN(customFEN) in "r1bqkbnr/ppp1pppp/n7/3pP3/8/8/PPPP1PPP/RNBQKBNR w KQkq d6 0 3")


r1bqkbnr/ppp1pppp/n7/3pP3/8/304/PPPP1PPP/RNBQKBNR w KQkq d3
False


In [71]:
# test the function on the starting position
# test with a different FEN where a move sequence E2E4, etc that leads to a board position where enpassant is possible
fen1 = "r1bqkbnr/ppp1pppp/n7/3pP3/8/8/PPPP1PPP/RNBQKBNR w KQkq d6 0 3"
newBoard1 = convertFEN(fen1)
print(newBoard1)

fen2 = "r1bqkbnr/ppp1p1pp/n2P4/8/5pP1/N7/PPPP1P1P/R1BQKBNR b KQkq g3 0 5"
newBoard2 = convertFEN(fen2)
print(newBoard2)

fen3 = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"
newBoard3 = convertFEN(fen3)
print(newBoard3)

fen4 = "2kr1bnr/pppqp1pp/n2P4/5b2/3P1B2/N5p1/PPPQ1P1P/R3KBNR w KQ - 5 9"
newBoard4 = convertFEN(fen4)
print(newBoard4)


r0bqkbnrppp0ppppn0000000000pP00000000000000T0000PPPP0PPPRNBQKBNRwKQkq
r0bqkbnrppp0p0ppn00P00t00000000000000pP0N0000000PPPP0P0PR0BQKBNRbKQkq
rnbqkbnrpppppppp00000000000000000000000000000000PPPPPPPPRNBQKBNRwKQkq
00kr0bnrpppqp0ppn00P000000000b00000P0B00N00000p0PPPQ0P0PR000KBNRwKQ--


In [72]:
#!pip install python-chess
import chess
import chess.pgn
import pandas as pd
import io

# Assuming convertFEN is defined as before and working correctly

# Sample PGN string for demonstration
pgn_string = """
[Event "NicosiaKyrenia's Study: Enpassant"]
[Site "https://lichess.org/study/WCSL1Ul1/YZoxPnWI"]
[Result "*"]
[Variant "Standard"]
[ECO "B00"]
[Opening "Lemming Defense"]
[Annotator "https://lichess.org/@/NicosiaKyrenia"]
[UTCDate "2024.02.18"]
[UTCTime "01:03:04"]

1. e4 Na6 2. e5 d5 3. exd6 f5 4. Na3 f4 5. g4 fxg3 *
"""

# Initialize pandas DataFrames for training data and labels
training_data = pd.DataFrame(columns=['CustomFEN'])
labels_data = pd.DataFrame(columns=['CustomFEN'])

# Read the PGN
pgn = io.StringIO(pgn_string)
game = chess.pgn.read_game(pgn)

# Initialize a board from the game
board = game.board()

for move in game.mainline_moves():
    # Generate standard FEN before the move
    fen_before = board.fen()
    # Convert to custom FEN and store as training data
    custom_fen_before = convertFEN(fen_before)
    training_data = pd.concat([training_data, pd.DataFrame({'CustomFEN': [custom_fen_before]})], ignore_index=True)
    
    # Apply the move on the board
    board.push(move)
    
    # Generate standard FEN after the move
    fen_after = board.fen()
    # Convert to custom FEN and store as label data
    custom_fen_after = convertFEN(fen_after)
    labels_data = pd.concat([labels_data, pd.DataFrame({'CustomFEN': [custom_fen_after]})], ignore_index=True)
    # custom_fen_after = convertFEN(fen_after)
    # labels_data = labels_data.append({'CustomFEN': custom_fen_after}, ignore_index=True)

# For demonstration, print the first few rows of each DataFrame
print(training_data.head())
print(labels_data.head())


                                           CustomFEN
0  rnbqkbnrpppppppp000000000000000000000000000000...
1  rnbqkbnrpppppppp00000000000000000000P000000000...
2  r0bqkbnrppppppppn0000000000000000000P000000000...
3  r0bqkbnrppppppppn00000000000P00000000000000000...
4  r0bqkbnrppp0ppppn0000000000pP00000000000000T00...
                                           CustomFEN
0  rnbqkbnrpppppppp00000000000000000000P000000000...
1  r0bqkbnrppppppppn0000000000000000000P000000000...
2  r0bqkbnrppppppppn00000000000P00000000000000000...
3  r0bqkbnrppp0ppppn0000000000pP00000000000000T00...
4  r0bqkbnrppp0ppppn00P00000000000000000000000000...


In [73]:
# This the following commented code is the general approach to go through the 
# training data and labels and convert the data to ordinal encoding
# convert the data into ordinal encoding for all columns
# for i in range(0, 69):
#     uniqueValues = np.unique(training_data.iloc[:, i]) # this line finds the unique values in the column
#     for j in range(0, len(uniqueValues)):
#         training_data.iloc[:, i] = np.where(training_data.iloc[:, i] == uniqueValues[j], j, training_data.iloc[:, i])
#         labels_data.iloc[:, i] = np.where(labels_data.iloc[:, i] == uniqueValues[j], j, labels_data.iloc[:, i])

# however, it is possible to synthesize the possible unique values for each column keeping in mind these rules:
# There are no pawns possible in rows 1 and 8
# Bishops can only be only be in the same color square as the starting square (not sure if that will help with ordinal encoding)
# There are only a few possible values for the castling rights and the move turn
# The enpassant target can only be in rows 3 and 6
# The enpassant target can only be in the same chess board column as the last move's pawn move (not sure if that will help with ordinal encoding)
# define a function to synthesize the possible unique values for each column of the training data and labels
# the function will return a list of the possible unique values for each column (don't know why it would be a list of lists)
# the function uniqueValuesPossibleForColumn will take a record and the column index as input and return the possible unique values for that column
# for i in range(0, 69):
#     uniqueValues = uniqueValuesPossibleForColumn(training_data.iloc[0, :], i)

def synthesize_custom_ordinal_values_final():
    ordinal_values = {}

    # Define all possible pieces, including uppercase for white and lowercase for black
    pieces_general = ['R', 'N', 'B', 'Q', 'K', 'P', 'r', 'n', 'b', 'q', 'k', 'p', '0']
    pieces_no_pawns = ['R', 'N', 'B', 'Q', 'K', 'r', 'n', 'b', 'q', 'k', '0']  # Exclude pawns in rows 1 and 8
    pieces_en_passant = ['R', 'N', 'B', 'Q', 'K', 'P', 'r', 'n', 'b', 'q', 'k', 'p', '0', 'T', 't']  # Include 't' for en passant
    
    # Assign possible values for each board square
    for i in range(64):
        row = i // 8 + 1
        if row in [1, 8]:
            ordinal_values[i] = pieces_no_pawns
        elif row in [3, 6]:  # Corrected rows for potential en passant targets, reflecting actual play possibilities
            ordinal_values[i] = pieces_en_passant
        else:
            ordinal_values[i] = pieces_general

    # Move turn possibilities
    ordinal_values[64] = ['w', 'b']
    
    # Castling rights - specific to each right, with '-' indicating the absence of the right
    ordinal_values[65] = ['K', '-']  # King-side for white
    ordinal_values[66] = ['Q', '-']  # Queen-side for white
    ordinal_values[67] = ['k', '-']  # King-side for black
    ordinal_values[68] = ['q', '-']  # Queen-side for black
    
    return ordinal_values

ordinal_values_final = synthesize_custom_ordinal_values_final()

# Example of accessing possible values for specific squares or data points
print("A1 square possibilities:", ordinal_values_final[0])  # Example for A1 square
print("Move turn possibilities:", ordinal_values_final[64]) # Example for move turn
print("First castling right possibility (King-side for white):", ordinal_values_final[65]) # Example for first castling right



A1 square possibilities: ['R', 'N', 'B', 'Q', 'K', 'r', 'n', 'b', 'q', 'k', '0']
Move turn possibilities: ['w', 'b']
First castling right possibility (King-side for white): ['K', '-']


In [74]:
# test some of the custom FENs with the ordinal encoding to make sure the encoding is working correctly
# for each custom FEN, go through each character and confirm that the character is in the possible unique values for that column
# if it is not, print an error message

for fen in [newBoard1, newBoard2, newBoard3, newBoard4]:
    for i, char in enumerate(fen):
        if char not in ordinal_values_final[i]:
            print(f"Error: {char} not in possible values for column {i}")


In [75]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import numpy as np

# Placeholder for the convertFEN function you provided earlier
def convertFEN(fen):
    # Your convertFEN function here
    custom_fen = convertFEN(fen)
    return custom_fen

# Function to encode a custom FEN string to numerical format
def encode_custom_fen(custom_fen):
    # Implement the encoding logic here, using the ordinal_values mapping
    # This is a placeholder function; you'll need to replace it with your actual encoding logic
    # encoded = np.array([ordinal_values[char] for char in custom_fen])
    # define a dictionary to hold the ordinal values for each piece.  Use negative numbers for black, and positive for white.
    # pawns are 1, knights are 3, bishops 3.5, rooks 5, queens 9, and kings 15
    pieceValues = {'R': 5, 'N': 3, 'B': 3.5, 'Q': 9, 'K': 15, 'P': 1, 'T': 1.3, 'r': -5, 'n': -3, 'b': -3.5, 'q': -9, 'k': -15, 'p': 1, 't': 1.3, '0': 0}
    moveTurn = {'w': 1, 'b': -1}
    castlingRights = {'K': 1, 'Q': 1, 'k': -1, 'q': -1, '-': 0}
    # convert the custom FEN to a list of floats in a numpy array
    encoded = np.array([pieceValues[char] for char in custom_fen[:64]] + [moveTurn[custom_fen[64]]] + [castlingRights[char] for char in custom_fen[65:]])
    # make sure its a float array
    encoded = encoded.astype(float)    

    return encoded

class ChessDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

def load_and_preprocess_data(training_csv, labels_csv):
    # Load the CSV files
    training_df = pd.read_csv(training_csv)
    labels_df = pd.read_csv(labels_csv)
    
    # Convert FEN strings to custom format and then encode
    training_data_encoded = [encode_custom_fen(convertFEN(fen)) for fen in training_df['FEN']]
    labels_encoded = [encode_custom_fen(convertFEN(fen)) for fen in labels_df['FEN']]
    
    # Convert to PyTorch tensors
    X = torch.tensor(training_data_encoded, dtype=torch.float32)
    y = torch.tensor(labels_encoded, dtype=torch.float32)
    
    return X, y

def create_dataloaders(X, y, batch_size=64):
    # Split the data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Create PyTorch datasets
    train_dataset = ChessDataset(X_train, y_train)
    val_dataset = ChessDataset(X_val, y_val)
    
    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader

# Example usage
training_csv = 'path/to/training_data.csv'
labels_csv = 'path/to/labels_data.csv'

X, y = load_and_preprocess_data(training_csv, labels_csv)
train_loader, val_loader = create_dataloaders(X, y)

# Now `train_loader` and `val_loader` are ready to be used in a training loop with PyTorch.


FileNotFoundError: [Errno 2] No such file or directory: 'path/to/training_data.csv'