In [2]:
import chess

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#from flatbuffers.packer import float32
#from tensorflow.python.keras.backend import learning_phase

import encoding_tools as EncodingTools

from model import ChessNet
from train import train

import torch
from torch.utils.data import DataLoader, Dataset


MODE = "DEBUG"  # If in release mode, please comment this line
# MODE = "RELEASE"

In [19]:
from FEN_to_chessboard import FenToChessBoard
from encoder_decoder import encode_board
# Pulling in training data using Pandas
df = pd.concat([
    pd.read_csv('stockfish_data/chess_games_1.csv'),
    pd.read_csv('stockfish_data/chess_games_2.csv'),
    pd.read_csv('stockfish_data/chess_games_2.csv')]
)

non_zero_winners = df[df['Winner'] != 0]
print("Game with an existed winner:", non_zero_winners.shape) # (18830, 4)

train_df = non_zero_winners[:5000] if MODE == "DEBUG" else non_zero_winners[:16000]
# We'll also grab the last 1000 examples as a validation set
val_df = non_zero_winners[-1000:] if MODE == "DEBUG" else non_zero_winners[-2800:]

# We'll stack all our encoded boards into a single numpy array
X_train_cur_board = np.stack(train_df['FEN'].apply(FenToChessBoard.fen_to_board))
X_train = np.stack(train_df['FEN'].apply(FenToChessBoard.fen_to_board).apply(encode_board)).reshape(-1, 22, 8, 8)
y_train = {'best_move' : train_df['BestMove'], 'winner' : train_df['Winner']}

X_val_cur_board = np.stack(val_df['FEN'].apply(FenToChessBoard.fen_to_board))
X_val = np.stack(val_df['FEN'].apply(FenToChessBoard.fen_to_board).apply(encode_board)).reshape(-1, 22, 8, 8)
y_val = {'best_move' : val_df['BestMove'], 'winner' : val_df['Winner']}

Game with an existed winner: (18830, 4)


In [20]:
# Instantiate the model
model = ChessNet()

# Move tensors to device if CUDA or MPS is available
if torch.cuda.is_available():
    device = "cuda"
# elif torch.mps.is_available(): # For M series chips of Mac
#     device = "mps"
else:
    device = "cpu"

model = model.to(device)

In [22]:
# Custom Dataset class to handle X and y pairs
class ChessDataset(Dataset):
    def __init__(self, X, param_dict):
        self.X = X
        self.best_move = param_dict['best_move'] # 需要进行 embedding -> [73, 8, 8]
        self.winner = param_dict['winner']

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        X_tensor = torch.tensor(self.X[idx], dtype=torch.float32)
        # 将 best_move 和 winner 转为张量
        best_move_tensor = torch.tensor(self.best_move[idx], dtype=torch.float32)
        winner_tensor = torch.tensor(self.winner[idx], dtype=torch.int8)
        # 返回 X[idx] 和处理后的张量
        return X_tensor, (best_move_tensor, winner_tensor)
        # return self.X[idx], (self.best_move[idx], self.winner[idx])


# Create Dataset objects for training and validation
train_dataset = ChessDataset(X_train, y_train)
val_dataset = ChessDataset(X_val, y_val)

# Create DataLoaders for batching
batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

for X_batch, y_batch in train_loader:
    # 获取一个批次的数据
    best_move_batch, winner_batch = y_batch  # 拆分 y_batch
    print("X_batch shape:", X_batch.shape)  # 打印 X 的维度
    print("best_move shape:", best_move_batch.shape)  # 打印 best_move 的维度
    print("winner shape:", winner_batch.shape)  # 打印 winner 的维度
    break  # 打印一个批次后停止

KeyError: 920

In [19]:
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim

n_epochs = 40
learning_rate = 0.001
train_losses, val_losses = [], []
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss().to(device)

for epoch in range(n_epochs):
    model.train()  # Set model to training mode
    epoch_train_loss = 0

    for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{n_epochs} [Training]"):
        X_batch = X_batch.permute(0, 3, 1, 2).to(device)
        y_batch = y_batch.to(device)
        # Change from [batch_size, height, width, channels] to [batch_size, channels, height, width]
        # print("X_batch shape:", X_batch.shape) if MODE == "DEBUG" else None
        # print("Y_batch shape:", y_batch.shape) if MODE == "DEBUG" else None
        optimizer.zero_grad()
        predictions = model(X_batch)
        loss = loss_fn(predictions, y_batch)
        loss.backward()
        optimizer.step()
        epoch_train_loss += loss.item()

    # Average training loss for the epoch
    train_losses.append(epoch_train_loss / len(train_loader))
    print(f"Epoch {epoch + 1}: Training Loss = {train_losses[-1]}")

    # Validation loop
    model.eval()  # Set model to evaluation mode
    epoch_val_loss = 0
    with torch.no_grad():
        for X_batch, y_batch in tqdm(val_loader, desc=f"Epoch {epoch + 1}/{n_epochs} [Validation]"):
            predictions = model(X_batch)
            loss = loss_fn(predictions, y_batch)
            epoch_val_loss += loss.item()

    # Average validation loss for the epoch
    val_losses.append(epoch_val_loss / len(val_loader))
    print(f"Epoch {epoch + 1}: Validation Loss = {val_losses[-1]}")

Epoch 1/40 [Training]:   0%|          | 0/40 [00:00<?, ?it/s]


torch.Size([256, 13, 8, 8])


RuntimeError: shape '[-1, 22, 8, 8]' is invalid for input of size 212992

In [None]:
# Plotting results (optional)
plt.style.use('ggplot')
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.legend()
plt.title('Loss During Training')
plt.show()

In [None]:
# Implementing our model as a function
def play_nn(fen, show_move_evaluations=False):
    # We can create a python-chess board instance from the FEN string like this:
    board = chess.Board(fen=fen)

    # And then evaluate all legal moves
    moves = []
    input_vectors = []
    for move in board.legal_moves:
        # For each move, we'll make a copy of the board and try that move out
        candidate_board = board.copy()
        candidate_board.push(move)
        moves.append(move)
        input_vectors.append(EncodingTools.encode_board(str(candidate_board)).astype(np.int32).flatten())

    input_vectors = np.stack(input_vectors)
    # This is where our model gets to shine! It tells us how good the resultant score board is for black:
    scores = model.predict(input_vectors, verbose=0)
    # argmax gives us the index of the highest scoring move
    if board.turn == chess.BLACK:
        index_of_best_move = np.argmax(scores)
    else:
        # If we're playing as white, we want black's score to be as small as possible, so we take argmax of the negative of our array
        index_of_best_move = np.argmax(-scores)

    if show_move_evaluations:
        print(zip(moves, scores))

    best_move = moves[index_of_best_move]

    # Now we turn our move into a string, return it and call it a day!
    return str(best_move)

In [None]:
# Now we'll import our test set, and make some final predictions!

test_df = pd.read_csv('datasets/test.csv')

test_df.head()

In [None]:
# Making all of our predictions happens in this one line!
# We're basically saying "run play_nn on all the boards in the test_df, and then keep the results as best_move"
# Because this invovles running our model a _ton_ this step will take a while.

test_df['best_move'] = test_df['board'].apply(play_nn)

In [None]:
test_df['best_move']

In [None]:
# Let's make sure our submission looks like the sample submission
submission = test_df[['id', 'best_move']]
print(submission.head())

sample_submission = pd.read_csv('datasets/sample_submission.csv', index_col='id')
print(sample_submission.head())

In [None]:
# We should not output the submission file
# submission.to_csv('submission.csv', index=False)