In [1]:
import os
import sys
import aim

os.environ["CONFIG_PATHS"] = "../configs/self_play.yaml"
os.environ["CONFIG_OVERRIDES"] = 'game.moves_directory="../data/moves_10"'
sys.path.append("../src")

import numpy as np
import torch
from torch import nn
from tqdm import tqdm
import glob
import time
from torch.utils.data import TensorDataset, DataLoader
from display import Display
from configuration import moves_data, config
from training.actor import TrainingActor
import training.helpers
from training.load_games import load_games_new

Loaded config:  {"development": {"debug_mode": true, "profile": false, "runtime": 0, "display_logs_in_console": false, "output_directory": "data/2024-11-23_00-37-50-doublehandedness"}, "logging": {"save_interval": 3600, "mcts_report_fraction": 0, "ucb_report": false, "gpu_evaluation": true, "made_move": true}, "game": {"board_size": 10, "num_moves": 6233, "num_pieces": 21, "moves_directory": "../data/moves_10"}, "architecture": {"gameplay_processes": 6, "coroutines_per_process": 256, "game_flush_threshold": 200}, "training": {"run": true, "network_name": "default", "batch_size": 64, "policy_loss_weight": 0.158, "learning_rate": 0.001, "sample_window": 50000, "samples_per_generation": 10000, "sampling_ratio": 1.0, "minimum_window_size": 10000, "new_data_check_interval": 60}, "networks": {"default": {"main_body_channels": 64, "value_head_channels": 16, "value_head_flat_layer_width": 64, "policy_head_channels": 64, "residual_blocks": 8, "model_path": "", "model_directory": "data/2024-11-2

In [2]:
from neural_net import NeuralNet
from training.game_data_manager import GameDataManager, DirectoryGameDataPathFetcher, CustomGameDataPathFetcher

In [3]:
NETWORK_CONFIG = config()["networks"]["default"]

In [4]:
BATCH_SIZE = 64
DEVICE = "mps"
MOVES = moves_data()
GAMES_DIR = "../data/2024-11-23_00-37-50-doublehandedness/games"
WINDOW_SIZE = 50000
MINIMUM_WINDOW_SIZE = 10000
POLICY_LOSS_WEIGHT = 0.158
LEARNING_RATE = 1e-3
SAMPLING_RATIO = 2.0

Loading file: piece_indices
Loading file: rotation_mapping
Loading file: new_occupieds
Loading file: moves_ruled_out_for_all
Loading file: scores
Loading file: moves_ruled_out_for_player
Loading file: moves_enabled_for_player
Loading file: new_adjacents
Loading file: new_corners


In [5]:
gamedata_path_fetcher = DirectoryGameDataPathFetcher(GAMES_DIR)
game_data_manager = GameDataManager(gamedata_path_fetcher, 50000)

In [6]:
model = NeuralNet(NETWORK_CONFIG)
model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [7]:
training.helpers.feed_window_until_amount(
    game_data_manager,
    MINIMUM_WINDOW_SIZE,
    1e6,
)
print("Window size: ", game_data_manager.current_window_size())
print("Cumulative window fed: ", game_data_manager.cumulative_window_fed())

Window size:  10000
Cumulative window fed:  10000


In [8]:
run = aim.Run(repo='/tmp/.aim')
pbar = tqdm(total=2190000)

model.train()

batch_index = 0
while True:
    # if batch_index % 100 == 0:
    #     print(f"Batch {batch_index}, window size {game_data_manager.current_window_size()}, cumulative window fed {game_data_manager.cumulative_window_fed()}")

    training_result = training.helpers.loop_iteration(
        model,
        optimizer,
        game_data_manager,
        device=DEVICE,
        batch_size=BATCH_SIZE,
        sampling_ratio=SAMPLING_RATIO,
        policy_loss_weight=POLICY_LOSS_WEIGHT,
    )
    if not training_result:
        break

    pbar.update(training_result["ingestion_count"])

    for key, value in training_result.items():
        run.track(
            value,
            name=key,
            step=batch_index,
        )
    batch_index += 1

pbar.close()
run.close()

2195712it [1:10:52, 516.39it/s]                             


In [9]:
torch.save(model.state_dict(), "../data/2024-12-02_21-22-57-notebook-models/sample_ratio_two_2.pt")

In [10]:
pbar.close()
run.close()

In [8]:
# Load models
def load_model(path):
    model = NeuralNet(NETWORK_CONFIG)
    model.load_state_dict(torch.load(path, weights_only=True))
    model.to(DEVICE)
    return model

# base_model = load_model("/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/base_in_notebook_1.pt")

# policy_weight_one_model = NeuralNet(NETWORK_CONFIG)
# policy_weight_one_model.load_state_dict(torch.load("/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/policy_weight_of_one_1.pt", weights_only=True))
# policy_weight_one_model.to(DEVICE)

# print("done")

In [13]:
test_game_files = glob.glob("/Users/shivamsarodia/Dev/blockus/data/2024-11-23_00-37-50-doublehandedness/games_we_didnt_train_on/*.npz")
test_gamedata = load_games_new(test_game_files, with_tqdm=True)

print("Converting to tensors...")

# Convert numpy arrays to torch tensors
boards_tensor = torch.from_numpy(test_gamedata["boards"]).to(dtype=torch.float, device="mps")
policies_tensor = torch.from_numpy(test_gamedata["policies"]).to(dtype=torch.float, device="mps")
values_tensor = torch.from_numpy(test_gamedata["values"]).to(dtype=torch.float, device="mps")
valid_moves = torch.from_numpy(test_gamedata["valid_moves"]).to(dtype=torch.bool, device="mps")

dataset = torch.utils.data.TensorDataset(boards_tensor, policies_tensor, values_tensor, valid_moves)


100%|██████████| 384/384 [00:08<00:00, 47.35it/s]


Converting to tensors...


In [14]:
def get_test_losses(model):
    batch_size = 64
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
    )
    model.eval()

    losses = {
        "total": 0,
        "value": 0,
        "policy": 0,
    }

    with torch.inference_mode():
        for batch_index, (boards, policies, values, valid_moves) in enumerate(tqdm(dataloader)):
            pred_values, pred_policy_logits = model(boards)
            value_loss = nn.CrossEntropyLoss()(
                pred_values,
                values,
            )

            policy_loss = nn.CrossEntropyLoss()(
                pred_policy_logits,
                policies,
            )
            loss = value_loss + POLICY_LOSS_WEIGHT * policy_loss

            losses["total"] += loss.item()
            losses["value"] += value_loss.item()
            losses["policy"] += policy_loss.item()

    return losses

In [15]:
print(get_test_losses(model))
# print(get_test_losses(load_model("/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/base_in_notebook_1.pt")))
# print(get_test_losses(load_model("/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/base_in_notebook_2.pt")))
# print(get_test_losses(load_model("/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/base_in_notebook_3.pt")))

100%|██████████| 5261/5261 [01:22<00:00, 63.42it/s]

{'total': 6469.176969707012, 'value': 5154.445533156395, 'policy': 8321.084671974182}





: 

In [13]:
adj_pred_policy_logits = torch.ones_like(pred_policy_logits) * -500
adj_pred_policy_logits[valid_moves] = pred_policy_logits[valid_moves]

POLICY_LOSS_WEIGHT * nn.CrossEntropyLoss(reduction="mean")(
    adj_pred_policy_logits,
    policies,
)

tensor(0.2501, device='mps:0')