In [1]:
import tqdm
import torch

In [2]:
from src.dataset import load_pgn, load_multiple_pgns, create_value_csv_dataset
from src.dataclass import ChessDataset
from src.model import ConvNet

In [None]:
# Step 1: Create my training dataset
games = load_multiple_pgns(num_pgns=26)
create_value_csv_dataset(games, name="first26.2")

In [None]:
# The step above created a csv file with 2.302.559 chess state-action pairs
# Since I have many different datasets I can use some of them as validation sets
# This way I can skip performing a train - test split
# I also added value to my dataset

In [None]:
# Step 2: Create a validation dataset
games = load_pgn("data/pgn/lichess_elite_2016-01.pgn")
create_value_csv_dataset(games, name="le2016-01.2")

In [3]:
# Step 3: Load the data using the pytorch Dataset and Dataloader classes
dataset = ChessDataset("data/csv/first26.2.csv")
loader = torch.utils.data.DataLoader(dataset, batch_size=256, shuffle=True)

In [4]:
# Step 4: Initialize the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ConvNet()
model = model.to(device)

In [5]:
model

ConvNet(
  (conv1): Conv2d(18, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=4096, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=4544, bias=True)
  (fc3): Linear(in_features=4096, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=1, bias=True)
)

In [6]:
# Step 5: Choose a loss function and the optimizer
policy_loss = torch.nn.CrossEntropyLoss()
value_loss = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [7]:
# If the model isn't loaded training starts at 0 epochs
epoch = 0

In [None]:
# This cell loads the model from a previous state
checkpoint = torch.load(f"models/le_first_26.pth")
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch'] + 1

In [None]:
# Step 6: Train the model
EPOCHS = 50
train_losses = []
for epoch in tqdm.trange(epoch, EPOCHS+1):
    model.train()
    total_policy_loss = 0
    total_value_loss = 0

    for boards, labels, values in loader:
        boards = boards.to(device)
        labels = labels.to(device)
        values = values.to(device).float()

        optimizer.zero_grad()

        policy, value = model(boards)
        p_loss = policy_loss(policy, labels)
        v_loss = value_loss(value.squeeze(), values)

        loss = p_loss + v_loss

        loss.backward()
        optimizer.step()

        total_policy_loss += p_loss.item()
        total_value_loss += v_loss.item()

    avg_p_loss = total_policy_loss / len(loader)
    avg_v_loss = total_value_loss / len(loader)

    print(f"Epoch {epoch+1}/{EPOCHS+1} - Policy Loss: {avg_p_loss:.4f} - Value Loss: {avg_v_loss:.4f}")

    train_losses.append(avg_p_loss)
    torch.save({
    "train_losses": train_losses,
    }, "data/loss/loss_log_4.pt")

    if epoch % 10 == 0:
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, f"models/model.4.{epoch}.pth")

        print(f"Checkpoint saved at epoch {epoch}")

100%|██████████| 1/1 [10:30<00:00, 630.88s/it]

Epoch 1/0 - Policy Loss: 31136.5629 - Value Loss: 7615.7364
Epoch 1/0, Loss: 3.4615
Checkpoint saved at epoch 0



