In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as DataLoader
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
import numpy as np
from src.utils.model import ChessEngine
import src.utils.board_to_tensor as bt
from src.utils.data_preperation import chessDataLoader

In [2]:
tensor_file = "dataset/board_1_tensors.pt"
batch_size = 256
dataloader = chessDataLoader(tensor_file, batch_size=batch_size)

In [3]:
def train(model, train_loader, criterion, optimizer, num_epochs=10, device='mps'):
    if not os.path.exists("trainer"):
        os.makedirs("trainer")

    train_losses = []

    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0

        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")

        for batch in progress_bar:
            batch = batch.to(device)

            optimizer.zero_grad()
            outputs = model(batch)

            loss = criterion(outputs, batch[:, -1, :, :, :].view(-1, 4672))  # Adjust as needed
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            progress_bar.set_postfix(loss=loss.item())

        avg_loss = epoch_loss / len(train_loader)
        train_losses.append(avg_loss)

        torch.save(model.state_dict(), f"trainer/model_epoch_{epoch+1}.pth")
        print(f"Epoch {epoch+1}: Loss = {avg_loss:.4f}")

    # Plot loss curve
    plt.figure(figsize=(8, 5))
    plt.plot(range(1, num_epochs + 1), train_losses, marker='o', label="Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Training Loss")
    plt.legend()
    plt.savefig("trainer/training_metrics.png")
    plt.show()



In [4]:
# Main execution
if __name__ == "__main__":
    batch_size = 256
    num_epochs = 10
    learning_rate = 0.001
    device = "mps"

    # Load dataset
    train_loader = chessDataLoader("dataset/board_1_tensors.pt", batch_size=batch_size)

    # Initialize model, loss, and optimizer
    model = ChessEngine().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Train model
    train(model, train_loader, criterion, optimizer, num_epochs, device)

Epoch 1/10:   0%|                                     | 0/12702 [00:06<?, ?it/s]


RuntimeError: shape '[-1, 4672]' is invalid for input of size 311296