In [1]:
import os
import sys
import aim
import glob
import random

os.environ["CONFIG_PATHS"] = "../configs/training_unlooped.yaml"
os.environ["CONFIG_OVERRIDES"] = 'game.moves_directory="../data/moves_10"'
sys.path.append("../src")

import numpy as np
import torch
from torch import nn
from tqdm import tqdm
import glob
import time
from configuration import moves_data, config
from training.load_games import load_games_new

In [2]:
from neural_net import NeuralNet
from training.game_data_manager import GameDataManager, DirectoryGameDataPathFetcher, CustomGameDataPathFetcher

Loaded config:  {"game": {"board_size": 10, "num_moves": 6233, "num_pieces": 21, "moves_directory": "../data/moves_10"}, "training": {"batch_size": 128, "policy_loss_weight": 0.158, "learning_rate": 0.001}, "networks": {"default": {"main_body_channels": 64, "value_head_channels": 16, "value_head_flat_layer_width": 64, "policy_head_channels": 32, "unused_pieces_flat_layer_width": 32, "residual_blocks": 10}}, "agents": []}


In [3]:
NETWORK_CONFIG = config()["networks"]["default"]

In [4]:
BATCH_SIZE = 128
DEVICE = "mps"
MOVES = moves_data()
GAMES_DIR = "../data/2024-11-23_00-37-50-doublehandedness/games"
POLICY_LOSS_WEIGHT = 0.158
LEARNING_RATE = 1e-2  # previously 1e-3

Loading file: piece_indices
Loading file: rotation_mapping
Loading file: new_occupieds
Loading file: moves_ruled_out_for_all
Loading file: scores
Loading file: moves_ruled_out_for_player
Loading file: moves_enabled_for_player
Loading file: new_adjacents
Loading file: new_corners


In [6]:
model = NeuralNet(NETWORK_CONFIG)
model.to(DEVICE)

model.load_state_dict(torch.load("/Users/shivamsarodia/Dev/blockus/data/notebook-models/53766cc2437f462ebf478dab/epoch_4.pt", weights_only=True))

<All keys matched successfully>

In [12]:

def get_test_losses(model, test_dataset, exclude_invalid_moves=False):
    dataloader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
    )
    model.eval()

    results = {
        "total_loss": 0,
        "value_loss": 0,
        "policy_loss": 0,
        "value_max_correct": 0,
        "policy_max_correct": 0,
    }

    total_sample_count = 0

    with torch.inference_mode():
        for boards, policies, values, unused_pieces, valid_moves in dataloader:
            boards = boards.to(dtype=torch.float32, device=DEVICE)
            policies = policies.to(dtype=torch.float32, device=DEVICE)
            values = values.to(dtype=torch.float32, device=DEVICE)
            unused_pieces = unused_pieces.to(dtype=torch.float32, device=DEVICE)

            pred_values, pred_policy_logits = model(boards, unused_pieces)

            if exclude_invalid_moves:
                pred_policy_logits[~valid_moves] = -1e9

            value_loss = nn.CrossEntropyLoss(reduction="sum")(
                pred_values,
                values,
            )
            policy_loss = nn.CrossEntropyLoss(reduction="sum")(
                pred_policy_logits,
                policies,
            )
            loss = value_loss + POLICY_LOSS_WEIGHT * policy_loss

            results["total_loss"] += loss.item()
            results["value_loss"] += value_loss.item()
            results["policy_loss"] += policy_loss.item()
            results["value_max_correct"] += (pred_values.argmax(dim=1) == values.argmax(dim=1)).sum().item()
            results["policy_max_correct"] += (pred_policy_logits.argmax(dim=1) == policies.argmax(dim=1)).sum().item()

            total_sample_count += len(boards)

    results["total_loss"] /= total_sample_count
    results["value_loss"] /= total_sample_count
    results["policy_loss"] /= total_sample_count
    results["value_max_correct"] /= total_sample_count
    results["policy_max_correct"] /= total_sample_count

    return results

In [14]:
TRAIN_TEST_SPLIT = 0.9

def get_dataset(game_files):
    gamedata = load_games_new(game_files, with_tqdm=True)
    boards_tensor = torch.from_numpy(gamedata["boards"]).to(dtype=torch.float)
    policies_tensor = torch.from_numpy(gamedata["policies"]).to(dtype=torch.float)
    values_tensor = torch.from_numpy(gamedata["values"]).to(dtype=torch.float)
    unused_pieces = torch.from_numpy(gamedata["unused_pieces"]).to(dtype=torch.bool)
    valid_moves = torch.from_numpy(gamedata["valid_moves"]).to(dtype=torch.bool)
    return torch.utils.data.TensorDataset(boards_tensor, policies_tensor, values_tensor, unused_pieces, valid_moves)

file_paths = glob.glob("/Users/shivamsarodia/Dev/blockus/data/2024-11-23_00-37-50-doublehandedness/untrained_games_*/*.npz")

random.seed(20554)
random.shuffle(file_paths)

num_train_games = int(len(file_paths) * TRAIN_TEST_SPLIT)
train_file_paths = file_paths[:num_train_games]
test_file_paths = file_paths[num_train_games:]

test_dataset = get_dataset(test_file_paths)


100%|██████████| 205/205 [00:03<00:00, 52.53it/s]


In [15]:
test_losses = get_test_losses(model, test_dataset)
test_losses_exclude_invalid_moves = get_test_losses(model, test_dataset, exclude_invalid_moves=True)

In [16]:
test_losses

{'total_loss': 1.140363884838364,
 'value_loss': 0.9053822001797702,
 'policy_loss': 1.487225781700051,
 'value_max_correct': 0.6013491489091342,
 'policy_max_correct': 0.856560388501848}

In [17]:
test_losses_exclude_invalid_moves

{'total_loss': 1.1387235602985408,
 'value_loss': 0.9053822002614371,
 'policy_loss': 1.4768439857763338,
 'value_max_correct': 0.6013491489091342,
 'policy_max_correct': 0.8586377021375337}

In [6]:
def save_model(model):
    model_path = os.path.join(
        "/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/sample_ratio_two_partials/",
        f"{model_name}.pt",
    )
    torch.save(model.state_dict(), model_path)

In [5]:
# games_files = sorted(glob.glob("/Users/shivamsarodia/Dev/blockus/data/2024-11-23_00-37-50-doublehandedness/games/*.npz"))
# games_we_didnt_train_on = sorted(glob.glob("/Users/shivamsarodia/Dev/blockus/data/2024-11-23_00-37-50-doublehandedness/games_we_didnt_train_on/*.npz"))
# more_games_we_didnt_train_on = sorted(glob.glob("/Users/shivamsarodia/Dev/blockus/data/2024-11-23_00-37-50-doublehandedness/more_games_we_didnt_train_on/*.npz"))

# recent_games_files = [f for f in games_files if f > "/Users/shivamsarodia/Dev/blockus/data/2024-11-23_00-37-50-doublehandedness/games/1732975250110_851.npz"]
# all_games = recent_games_files + games_we_didnt_train_on + more_games_we_didnt_train_on

# random.seed(20554)
# random.shuffle(all_games)

# train_game_files = all_games[200:]
# test_game_files = all_games[:200]

# print("Num train files:", len(train_game_files))
# print("Num test files:", len(test_game_files))

def get_dataset(game_files):
    gamedata = load_games_new(game_files, with_tqdm=True)
    boards_tensor = torch.from_numpy(gamedata["boards"]).to(dtype=torch.float)
    policies_tensor = torch.from_numpy(gamedata["policies"]).to(dtype=torch.float)
    values_tensor = torch.from_numpy(gamedata["values"]).to(dtype=torch.float)
    valid_moves = torch.from_numpy(gamedata["valid_moves"]).to(dtype=torch.bool)
    return torch.utils.data.TensorDataset(boards_tensor, policies_tensor, values_tensor, valid_moves)

train_dataset = get_dataset(train_game_files)
test_dataset = get_dataset(test_game_files)

NameError: name 'train_game_files' is not defined

In [8]:
print("Num train samples:", len(train_dataset))
print("Num test samples:", len(test_dataset))

Num train samples: 983233
Num test samples: 172325


In [9]:
def get_test_losses(model):
    dataloader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
    )
    model.eval()

    results = {
        "total_loss": 0,
        "value_loss": 0,
        "policy_loss": 0,
        "value_max_correct": 0,
        "policy_max_correct": 0,
    }

    total_sample_count = 0

    with torch.inference_mode():
        for boards, policies, values, valid_moves in dataloader:
            boards = boards.to(dtype=torch.float32, device=DEVICE)
            policies = policies.to(dtype=torch.float32, device=DEVICE)
            values = values.to(dtype=torch.float32, device=DEVICE)

            pred_values, pred_policy_logits = model(boards.to(dtype=torch.float32, device=DEVICE))
            value_loss = nn.CrossEntropyLoss(reduction="sum")(
                pred_values,
                values,
            )
            policy_loss = nn.CrossEntropyLoss(reduction="sum")(
                pred_policy_logits,
                policies,
            )
            loss = value_loss + POLICY_LOSS_WEIGHT * policy_loss

            results["total_loss"] += loss.item()
            results["value_loss"] += value_loss.item()
            results["policy_loss"] += policy_loss.item()
            results["value_max_correct"] += (pred_values.argmax(dim=1) == values.argmax(dim=1)).sum().item()
            results["policy_max_correct"] += (pred_policy_logits.argmax(dim=1) == policies.argmax(dim=1)).sum().item()

            total_sample_count += len(boards)

    results["total_loss"] /= total_sample_count
    results["value_loss"] /= total_sample_count
    results["policy_loss"] /= total_sample_count
    results["value_max_correct"] /= total_sample_count
    results["policy_max_correct"] /= total_sample_count

    return results

In [10]:
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
)

run = aim.Run(repo='/Users/shivamsarodia/Dev/blockus/')
run["hparams"] = {
    "batch_size": BATCH_SIZE,
    "learning_rate": LEARNING_RATE,
    "policy_loss_weight": POLICY_LOSS_WEIGHT,
}

batch_index = 0
for epoch in range(5):
    for boards, policies, values, valid_moves in tqdm(train_dataloader):
        model.train()

        boards = boards.to(dtype=torch.float32, device=DEVICE)
        policies = policies.to(dtype=torch.float32, device=DEVICE)
        values = values.to(dtype=torch.float32, device=DEVICE)

        pred_values, pred_policy = model(boards)
        value_loss = nn.CrossEntropyLoss()(
            pred_values,
            values,
        )
        policy_loss = nn.CrossEntropyLoss()(
            pred_policy,
            policies,
        )
        loss = value_loss + POLICY_LOSS_WEIGHT * policy_loss

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        training_result = {
            "total_loss": loss.item(),
            "value_loss": value_loss.item(),
            "policy_loss": policy_loss.item(),
            "value_max_correct": (pred_values.argmax(dim=1) == values.argmax(dim=1)).sum().item() / len(boards),
            "policy_max_correct": (pred_policy.argmax(dim=1) == policies.argmax(dim=1)).sum().item() / len(boards),
        }

        for key, value in training_result.items():
            run.track(
                value,
                name=key,
                step=batch_index,
                context={"subset": "train"},
            )

        if batch_index % 1000 == 0:
            test_losses = get_test_losses(model)
            for key, value in test_losses.items():
                run.track(
                    value,
                    name=key,
                    step=batch_index,
                    context={"subset": "test"},
                )
        batch_index += 1

    print("Finished epoch")

run.close()

100%|██████████| 7682/7682 [10:46<00:00, 11.89it/s]  


Finished epoch


100%|██████████| 7682/7682 [10:50<00:00, 11.82it/s]  


Finished epoch


100%|██████████| 7682/7682 [09:44<00:00, 13.15it/s]  


Finished epoch


100%|██████████| 7682/7682 [12:04<00:00, 10.60it/s]  


Finished epoch


100%|██████████| 7682/7682 [12:52<00:00,  9.94it/s]  

Finished epoch





In [21]:
run.close()

In [9]:
# torch.save(model.state_dict(), "../data/2024-12-02_21-22-57-notebook-models/sample_ratio_two_2.pt")

In [24]:
count_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=1,
    shuffle=False,
)

boards_seen = set()
total_boards_count = 0
boards_with_at_least_one_piece_count = 0
boards_with_at_least_two_pieces_count = 0
boards_with_at_least_three_pieces_count = 0
boards_with_at_least_four_pieces_count = 0
for boards, policies, values, valid_moves in tqdm(count_dataloader):
    boards_seen.add(boards.numpy(force=True).tobytes())
    total_boards_count += 1
    if torch.sum(boards) > 0:
        boards_with_at_least_one_piece_count += 1
    if torch.sum(boards) > 5:
        boards_with_at_least_two_pieces_count += 1
    if torch.sum(boards) > 10:
        boards_with_at_least_three_pieces_count += 1
    if torch.sum(boards) > 15:
        boards_with_at_least_four_pieces_count += 1
        
print("Number of unique boards:", len(boards_seen))
print("Number of boards with at least one piece:", boards_with_at_least_one_piece_count)
print("Number of boards with at least two pieces:", boards_with_at_least_two_pieces_count)
print("Number of boards with at least three pieces:", boards_with_at_least_three_pieces_count)
print("Number of boards with at least four pieces:", boards_with_at_least_four_pieces_count)


100%|██████████| 172325/172325 [00:08<00:00, 19911.78it/s]

Number of unique boards: 107526
Number of boards with at least one piece: 164399
Number of boards with at least two pieces: 156661
Number of boards with at least three pieces: 148777
Number of boards with at least four pieces: 140872





In [23]:
# Load models
def load_model(path):
    model = NeuralNet(NETWORK_CONFIG)
    model.load_state_dict(torch.load(path, weights_only=True))
    model.to(DEVICE)
    return model

# base_model_1 = load_model("/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/base_in_notebook_1.pt")
# base_model_2 = load_model("/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/base_in_notebook_2.pt")
# ratio_two_model_1 = load_model("/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/sample_ratio_two_1.pt")
ratio_two_model_2 = load_model("/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/sample_ratio_two_2.pt")

# print("done")

In [24]:
get_test_losses(ratio_two_model_2)

100%|██████████| 2728/2728 [00:27<00:00, 100.28it/s]


{'total_loss': 0.019116447764486764,
 'value_loss': 0.015213298658504423,
 'policy_loss': 0.02470347422051892,
 'value_max_correct': 0.571880298794739,
 'policy_max_correct': 0.7852412813344943}

In [18]:
test_game_files = glob.glob("/Users/shivamsarodia/Dev/blockus/data/2024-11-23_00-37-50-doublehandedness/more_games_we_didnt_train_on/*.npz")
test_gamedata = load_games_new(test_game_files, with_tqdm=True)

print("Converting to tensors...")

# Convert numpy arrays to torch tensors
boards_tensor = torch.from_numpy(test_gamedata["boards"]).to(dtype=torch.float, device="mps")
policies_tensor = torch.from_numpy(test_gamedata["policies"]).to(dtype=torch.float, device="mps")
values_tensor = torch.from_numpy(test_gamedata["values"]).to(dtype=torch.float, device="mps")
valid_moves = torch.from_numpy(test_gamedata["valid_moves"]).to(dtype=torch.bool, device="mps")

dataset = torch.utils.data.TensorDataset(boards_tensor, policies_tensor, values_tensor, valid_moves)

100%|██████████| 276/276 [00:05<00:00, 47.85it/s]


Converting to tensors...


In [12]:
# base_model_1_test_result = get_test_losses(base_model_1)
base_model_2_test_result = get_test_losses(base_model_2)
# ratio_two_model_1_test_result = get_test_losses(ratio_two_model_1)
ratio_two_model_2_test_result = get_test_losses(ratio_two_model_2)

100%|██████████| 5261/5261 [00:52<00:00, 100.32it/s]
100%|██████████| 5261/5261 [00:52<00:00, 100.31it/s]


In [19]:
len(test_gamedata["boards"])

241365

In [20]:
241365 + 336673 + 500000

1078038

In [15]:
base_model_2_test_result

{'loss_total': 0.019015346798564505,
 'loss_value': 0.015089419780530288,
 'loss_policy': 0.024847638171400108,
 'value_max_correct': 0.5920462882381421,
 'policy_max_correct': 0.7863327323545398}

In [16]:
ratio_two_model_2_test_result

{'loss_total': 0.019214975594640738,
 'loss_value': 0.015309898972442953,
 'loss_policy': 0.02471567374811644,
 'value_max_correct': 0.5672061614682496,
 'policy_max_correct': 0.7846664270672136}

In [None]:
def range_of_boolean_var(p):
    standard_error = np.sqrt(p * (1 - p) / len(dataset))
    return p + 2 * standard_error, p - 2 * standard_error

import matplotlib.pyplot as plt

# Create figure with two subplots
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))

models = ['Base 1', 'Base 2', 'Ratio Two 1', 'Ratio Two 2']

# Value accuracy plot
value_accuracies = [
    base_model_1_test_result["value_max_correct"],
    base_model_2_test_result["value_max_correct"], 
    ratio_two_model_1_test_result["value_max_correct"],
    ratio_two_model_2_test_result["value_max_correct"]
]
value_errors = [range_of_boolean_var(acc) for acc in value_accuracies]
value_upper = [err[0] - acc for acc, err in zip(value_accuracies, value_errors)]
value_lower = [acc - err[1] for acc, err in zip(value_accuracies, value_errors)]

ax1.bar(models, value_accuracies, yerr=[value_lower, value_upper], capsize=5)
ax1.set_title('Value Prediction Accuracy')
ax1.set_ylabel('Accuracy')
ax1.set_ylim(bottom=min(value_accuracies) * 0.95)  # Set bottom to 95% of minimum value

# Policy accuracy plot  
policy_accuracies = [
    base_model_1_test_result["policy_max_correct"],
    base_model_2_test_result["policy_max_correct"],
    ratio_two_model_1_test_result["policy_max_correct"], 
    ratio_two_model_2_test_result["policy_max_correct"]
]
policy_errors = [range_of_boolean_var(acc) for acc in policy_accuracies]
policy_upper = [err[0] - acc for acc, err in zip(policy_accuracies, policy_errors)]
policy_lower = [acc - err[1] for acc, err in zip(policy_accuracies, policy_errors)]

ax2.bar(models, policy_accuracies, yerr=[policy_lower, policy_upper], capsize=5)
ax2.set_title('Policy Prediction Accuracy')
ax2.set_ylabel('Accuracy')
ax2.set_ylim(bottom=min(policy_accuracies) * 0.95)  # Set bottom to 95% of minimum value

plt.tight_layout()
plt.show()


In [None]:
print("Base model 1")
print(base_model_1_test_result)
print("Base model 2")
print(base_model_2_test_result)
print("Ratio two model 1")
print(ratio_two_model_1_test_result)
print("Ratio two model 2")
print(ratio_two_model_2_test_result)

In [6]:
import glob
import random

file_paths = glob.glob("/Users/shivamsarodia/Dev/blockus/data/2024-11-23_00-37-50-doublehandedness/untrained_games_*/*.npz")

random.seed(20554)
random.shuffle(file_paths)

num_train_games = int(len(file_paths) * 0.9)
train_file_paths = file_paths[:num_train_games]
test_file_paths = file_paths[num_train_games:]

print("Num train files:", len(train_file_paths))
print("Num test files:", len(test_file_paths))

print("Loading train games...")
train_dataset = get_dataset(train_file_paths)

print("Loading test games...")
test_dataset = get_dataset(test_file_paths)

train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=1,
)
test_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=1,
)

Num train files: 1836
Num test files: 205
Loading train games...


100%|██████████| 1836/1836 [00:38<00:00, 48.12it/s]


Loading test games...


100%|██████████| 205/205 [00:04<00:00, 47.32it/s]


AttributeError: 'list' object has no attribute 'numpy'

In [29]:
from collections import Counter

test_boards = Counter()
for board, _, _, _ in test_dataloader:
    test_boards[board.numpy(force=True).tobytes()] += 1

print("Number of total test boards:", len(test_dataset))
print("Number of unique test boards:", len(test_boards))

train_boards = Counter()
for board, _, _, _ in train_dataloader:
    train_boards[board.numpy(force=True).tobytes()] += 1

print("Number of total train boards:", len(train_dataset))
print("Number of unique train boards:", len(train_boards))

test_boards_in_train = Counter()
for board in test_boards:
    if board in train_boards:
        test_boards_in_train[board] = test_boards[board]

print("Number of boards shared:", len(test_boards_in_train))
print("Number of rows of test data that appear in train data:", sum(test_boards_in_train.values()))

Number of total test boards: 181003
Number of unique test boards: 101767
Number of total train boards: 1612873
Number of unique train boards: 781565
Number of boards shared: 20435
Number of rows of test data that appear in train data: 99330


In [32]:
99330 / 181003

0.548775434661304

In [30]:
99330 / 181003

0.548775434661304

In [17]:
print("Number of total test boards:", len(test_dataset))
print("Number of total train boards:", len(train_dataset))
print("Number of boards shared:", len(test_boards & train_boards))

Number of total test boards: 181003
Number of total train boards: 1612873
Number of boards shared: 20435


In [23]:
from collections import Counter

c1 = Counter()
c1[4] += 1
c2 = Counter()
c2[5] += 3
c1 & c2

Counter()

In [27]:
sum(c2.values())

3

In [16]:
101767 / 181003

0.5622392999011067

In [37]:
for board, _, _, _ in train_dataset:
    print(board.shape)
    break

torch.Size([4, 10, 10])
