In [1]:
import os
import sys
import aim

os.environ["CONFIG_PATHS"] = "../configs/self_play.yaml"
os.environ["CONFIG_OVERRIDES"] = 'game.moves_directory="../data/moves_10"'
sys.path.append("../src")

import numpy as np
import torch
from torch import nn
from tqdm import tqdm
import time
from torch.utils.data import TensorDataset, DataLoader
from display import Display
from configuration import moves_data, config

Loaded config:  {"development": {"debug_mode": true, "profile": false, "runtime": 72000, "display_logs_in_console": false, "output_directory": "data/2024-11-24_00-18-45-literatist"}, "logging": {"save_interval": 3600, "mcts_report_fraction": 0, "ucb_report": false, "gpu_evaluation": true, "made_move": true}, "game": {"board_size": 10, "num_moves": 6233, "moves_directory": "../data/moves_10"}, "architecture": {"gameplay_processes": 6, "coroutines_per_process": 256, "game_flush_threshold": 200}, "training": {"run": true, "network_name": "default", "batch_size": 64, "policy_loss_weight": 0.158, "learning_rate": 0.001, "sample_window": 50000, "samples_per_generation": 10000, "sampling_ratio": 1.0, "sampling_delay": 10000, "new_data_check_interval": 30}, "networks": {"default": {"main_body_channels": 64, "value_head_channels": 16, "value_head_flat_layer_width": 64, "policy_head_channels": 64, "residual_blocks": 8, "model_path": "", "model_directory": "data/2024-11-24_00-18-45-literatist/mod

In [2]:
from neural_net import NeuralNet
from training.load_games import load_old_format_games

In [3]:
BATCH_SIZE = 64
DEVICE = "mps"
MOVES = moves_data()

Loading file: piece_indices
Loading file: rotation_mapping
Loading file: new_occupieds
Loading file: moves_ruled_out_for_all
Loading file: scores
Loading file: moves_ruled_out_for_player
Loading file: moves_enabled_for_player
Loading file: new_adjacents
Loading file: new_corners


In [4]:
GAMES_DIR = "../data/2024-11-23_00-37-50-doublehandedness/games"

In [23]:
game_paths = [
    os.path.join(GAMES_DIR, f)
    for f in os.listdir(GAMES_DIR)
]

# Train on first 3/4, test on last 1/4.
train_path_index_cutoff = 3 * len(game_paths) // 4

train_paths = sorted(game_paths)[:train_path_index_cutoff]
test_paths = sorted(game_paths)[train_path_index_cutoff:]

def paths_to_dataloader(paths):
    boards, policies, values = load_old_format_games(paths)
    dataset = TensorDataset(
        torch.Tensor(boards),
        torch.Tensor(policies),
        torch.Tensor(values),
    )
    return DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

train_dataloader = paths_to_dataloader(train_paths)
test_dataloader = paths_to_dataloader(test_paths)

In [None]:
run = aim.Run(repo='/tmp/.aim')

model = NeuralNet(config()["networks"]["default"]).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for batch_index, (boards, policies, values) in tqdm(enumerate(train_dataloader)):
    boards = boards.to(DEVICE)
    policies = policies.to(DEVICE)
    values = values.to(DEVICE)

    pred_values, pred_policy = model(boards)
    value_loss = nn.CrossEntropyLoss()(
        pred_values,
        values,
    )
    policy_loss = 0.158 * nn.CrossEntropyLoss()(
        pred_policy,
        policies,
    )
    loss = value_loss + policy_loss

    run.track(
        value_loss.item(),
        name="value_loss",
        step=batch_index,
    )
    run.track(
        policy_loss.item(),
        name="policy_loss",
        step=batch_index,
    )
    run.track(
        loss.item(),
        name="total_loss",
        step=batch_index,
    )

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

run.close()

In [20]:
value_losses = []
policy_losses = []
total_losses = []

for batch_index, (boards, policies, values) in tqdm(enumerate(test_dataloader)):
    pred_values, pred_policy = model(boards)
    value_loss = nn.CrossEntropyLoss()(
        pred_values,
        values,
    )
    policy_loss = 0.158 * nn.CrossEntropyLoss()(
        pred_policy,
        policies,
    )
    loss = value_loss + policy_loss

    value_losses.append(value_loss.item())
    policy_losses.append(policy_loss.item())
    total_losses.append(loss.item())

print("Average value loss: ", sum(value_losses) / len(value_losses))
print("Average policy loss: ", sum(policy_losses) / len(policy_losses))
print("Average total loss: ", sum(total_losses) / len(total_losses))

5316it [00:47, 112.72it/s]

0.9206433480421882



