In [2]:
import os
import sys
import aim

os.environ["CONFIG_PATHS"] = "../configs/self_play.yaml"
os.environ["CONFIG_OVERRIDES"] = 'game.moves_directory="../data/moves_10"'
sys.path.append("../src")

import numpy as np
import torch
from torch import nn
from tqdm import tqdm
import glob
import time
from torch.utils.data import TensorDataset, DataLoader
from display import Display
from configuration import moves_data, config
from training.actor import TrainingActor
import training.helpers
from training.load_games import load_games_new

Loaded config:  {"development": {"debug_mode": true, "profile": false, "runtime": 0, "display_logs_in_console": false, "output_directory": "data/2024-12-06_00-09-14-self-play-for-policy-weight-one"}, "logging": {"save_interval": 3600, "mcts_report_fraction": 0, "ucb_report": false, "gpu_evaluation": true, "made_move": true}, "game": {"board_size": 10, "num_moves": 6233, "num_pieces": 21, "moves_directory": "../data/moves_10"}, "architecture": {"gameplay_processes": 6, "coroutines_per_process": 256, "game_flush_threshold": 200}, "training": {"run": true, "network_name": "default", "batch_size": 64, "policy_loss_weight": 1.0, "learning_rate": 0.001, "sample_window": 50000, "samples_per_generation": 10000, "sampling_ratio": 2.0, "minimum_window_size": 10000, "new_data_check_interval": 60}, "networks": {"default": {"main_body_channels": 64, "value_head_channels": 16, "value_head_flat_layer_width": 64, "policy_head_channels": 64, "residual_blocks": 8, "model_path": "", "model_directory": "d

In [3]:
from neural_net import NeuralNet
from training.game_data_manager import GameDataManager, DirectoryGameDataPathFetcher, CustomGameDataPathFetcher

In [5]:
NETWORK_CONFIG = config()["networks"]["default"]

In [6]:
BATCH_SIZE = 64
DEVICE = "mps"
MOVES = moves_data()
GAMES_DIR = "../data/2024-11-23_00-37-50-doublehandedness/games"
WINDOW_SIZE = 50000
MINIMUM_WINDOW_SIZE = 10000
POLICY_LOSS_WEIGHT = 0.158
LEARNING_RATE = 1e-3
SAMPLING_RATIO = 2.0
SAMPLES_PER_GENERATION = 100000

Loading file: piece_indices
Loading file: rotation_mapping
Loading file: new_occupieds
Loading file: moves_ruled_out_for_all
Loading file: scores
Loading file: moves_ruled_out_for_player
Loading file: moves_enabled_for_player
Loading file: new_adjacents
Loading file: new_corners


In [14]:
gamedata_path_fetcher = DirectoryGameDataPathFetcher(GAMES_DIR)
game_data_manager = GameDataManager(gamedata_path_fetcher, 50000)

In [5]:
model = NeuralNet(NETWORK_CONFIG)
model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [16]:
training.helpers.feed_window_until_amount(
    game_data_manager,
    MINIMUM_WINDOW_SIZE,
    1e6,
)
print("Window size: ", game_data_manager.current_window_size())
print("Cumulative window fed: ", game_data_manager.cumulative_window_fed())

Window size:  10000
Cumulative window fed:  10000


In [17]:
def save_model(cumulative_window_fed, model):
    model_name = str(cumulative_window_fed).zfill(9)
    model_path = os.path.join(
        "/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/sample_ratio_two_partials/",
        f"{model_name}.pt",
    )
    torch.save(model.state_dict(), model_path)

In [18]:
max_samples_to_train_on = 1500000

run = aim.Run(repo='/tmp/.aim')
pbar = tqdm(total=max_samples_to_train_on)

model.train()

previous_cumulative_window_fed = game_data_manager.cumulative_window_fed()

batch_index = 0
while True:
    # if batch_index % 100 == 0:
    #     print(f"Batch {batch_index}, window size {game_data_manager.current_window_size()}, cumulative window fed {game_data_manager.cumulative_window_fed()}")

    training_result = training.helpers.loop_iteration(
        model,
        optimizer,
        game_data_manager,
        device=DEVICE,
        batch_size=BATCH_SIZE,
        sampling_ratio=SAMPLING_RATIO,
        policy_loss_weight=POLICY_LOSS_WEIGHT,
    )
    if not training_result:
        break

    pbar.update(training_result["ingestion_count"])

    for key, value in training_result.items():
        run.track(
            value,
            name=key,
            step=batch_index,
        )
    batch_index += 1

    cumulative_window_fed = training_result["cumulative_window_fed"]
    if cumulative_window_fed > max_samples_to_train_on:
        break

    if cumulative_window_fed // SAMPLES_PER_GENERATION > previous_cumulative_window_fed // SAMPLES_PER_GENERATION:
        save_model(cumulative_window_fed, model)

    previous_cumulative_window_fed = cumulative_window_fed

save_model(cumulative_window_fed, model)

pbar.close()
run.close()

 99%|█████████▉| 1490016/1500000 [49:52<00:20, 497.88it/s] 


In [9]:
# torch.save(model.state_dict(), "../data/2024-12-02_21-22-57-notebook-models/sample_ratio_two_2.pt")

In [10]:
pbar.close()
run.close()

In [7]:
# Load models
def load_model(path):
    model = NeuralNet(NETWORK_CONFIG)
    model.load_state_dict(torch.load(path, weights_only=True))
    model.to(DEVICE)
    return model

# base_model_1 = load_model("/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/base_in_notebook_1.pt")
base_model_2 = load_model("/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/base_in_notebook_2.pt")
# ratio_two_model_1 = load_model("/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/sample_ratio_two_1.pt")
ratio_two_model_2 = load_model("/Users/shivamsarodia/Dev/blockus/data/2024-12-02_21-22-57-notebook-models/sample_ratio_two_2.pt")

# print("done")

In [18]:
test_game_files = glob.glob("/Users/shivamsarodia/Dev/blockus/data/2024-11-23_00-37-50-doublehandedness/more_games_we_didnt_train_on/*.npz")
test_gamedata = load_games_new(test_game_files, with_tqdm=True)

print("Converting to tensors...")

# Convert numpy arrays to torch tensors
boards_tensor = torch.from_numpy(test_gamedata["boards"]).to(dtype=torch.float, device="mps")
policies_tensor = torch.from_numpy(test_gamedata["policies"]).to(dtype=torch.float, device="mps")
values_tensor = torch.from_numpy(test_gamedata["values"]).to(dtype=torch.float, device="mps")
valid_moves = torch.from_numpy(test_gamedata["valid_moves"]).to(dtype=torch.bool, device="mps")

dataset = torch.utils.data.TensorDataset(boards_tensor, policies_tensor, values_tensor, valid_moves)

100%|██████████| 276/276 [00:05<00:00, 47.85it/s]


Converting to tensors...


In [11]:
def get_test_losses(model):
    batch_size = 64
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
    )
    model.eval()

    results = {
        "loss_total": 0,
        "loss_value": 0,
        "loss_policy": 0,
        "value_max_correct": 0,
        "policy_max_correct": 0,
    }

    total_sample_count = 0

    with torch.inference_mode():
        for batch_index, (boards, policies, values, valid_moves) in enumerate(tqdm(dataloader)):
            pred_values, pred_policy_logits = model(boards.to(dtype=torch.float32))
            value_loss = nn.CrossEntropyLoss()(
                pred_values,
                values,
            )

            policy_loss = nn.CrossEntropyLoss()(
                pred_policy_logits,
                policies,
            )
            loss = value_loss + POLICY_LOSS_WEIGHT * policy_loss

            results["loss_total"] += loss.item()
            results["loss_value"] += value_loss.item()
            results["loss_policy"] += policy_loss.item()
            results["value_max_correct"] += (pred_values.argmax(dim=1) == values.argmax(dim=1)).sum().item()
            results["policy_max_correct"] += (pred_policy_logits.argmax(dim=1) == policies.argmax(dim=1)).sum().item()

            total_sample_count += len(boards)

    results["loss_total"] /= total_sample_count
    results["loss_value"] /= total_sample_count
    results["loss_policy"] /= total_sample_count
    results["value_max_correct"] /= total_sample_count
    results["policy_max_correct"] /= total_sample_count

    return results

In [12]:
# base_model_1_test_result = get_test_losses(base_model_1)
base_model_2_test_result = get_test_losses(base_model_2)
# ratio_two_model_1_test_result = get_test_losses(ratio_two_model_1)
ratio_two_model_2_test_result = get_test_losses(ratio_two_model_2)

100%|██████████| 5261/5261 [00:52<00:00, 100.32it/s]
100%|██████████| 5261/5261 [00:52<00:00, 100.31it/s]


In [19]:
len(test_gamedata["boards"])

241365

In [20]:
241365 + 336673 + 500000

1078038

In [15]:
base_model_2_test_result

{'loss_total': 0.019015346798564505,
 'loss_value': 0.015089419780530288,
 'loss_policy': 0.024847638171400108,
 'value_max_correct': 0.5920462882381421,
 'policy_max_correct': 0.7863327323545398}

In [16]:
ratio_two_model_2_test_result

{'loss_total': 0.019214975594640738,
 'loss_value': 0.015309898972442953,
 'loss_policy': 0.02471567374811644,
 'value_max_correct': 0.5672061614682496,
 'policy_max_correct': 0.7846664270672136}

In [None]:
def range_of_boolean_var(p):
    standard_error = np.sqrt(p * (1 - p) / len(dataset))
    return p + 2 * standard_error, p - 2 * standard_error

import matplotlib.pyplot as plt

# Create figure with two subplots
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))

models = ['Base 1', 'Base 2', 'Ratio Two 1', 'Ratio Two 2']

# Value accuracy plot
value_accuracies = [
    base_model_1_test_result["value_max_correct"],
    base_model_2_test_result["value_max_correct"], 
    ratio_two_model_1_test_result["value_max_correct"],
    ratio_two_model_2_test_result["value_max_correct"]
]
value_errors = [range_of_boolean_var(acc) for acc in value_accuracies]
value_upper = [err[0] - acc for acc, err in zip(value_accuracies, value_errors)]
value_lower = [acc - err[1] for acc, err in zip(value_accuracies, value_errors)]

ax1.bar(models, value_accuracies, yerr=[value_lower, value_upper], capsize=5)
ax1.set_title('Value Prediction Accuracy')
ax1.set_ylabel('Accuracy')
ax1.set_ylim(bottom=min(value_accuracies) * 0.95)  # Set bottom to 95% of minimum value

# Policy accuracy plot  
policy_accuracies = [
    base_model_1_test_result["policy_max_correct"],
    base_model_2_test_result["policy_max_correct"],
    ratio_two_model_1_test_result["policy_max_correct"], 
    ratio_two_model_2_test_result["policy_max_correct"]
]
policy_errors = [range_of_boolean_var(acc) for acc in policy_accuracies]
policy_upper = [err[0] - acc for acc, err in zip(policy_accuracies, policy_errors)]
policy_lower = [acc - err[1] for acc, err in zip(policy_accuracies, policy_errors)]

ax2.bar(models, policy_accuracies, yerr=[policy_lower, policy_upper], capsize=5)
ax2.set_title('Policy Prediction Accuracy')
ax2.set_ylabel('Accuracy')
ax2.set_ylim(bottom=min(policy_accuracies) * 0.95)  # Set bottom to 95% of minimum value

plt.tight_layout()
plt.show()


In [None]:
print("Base model 1")
print(base_model_1_test_result)
print("Base model 2")
print(base_model_2_test_result)
print("Ratio two model 1")
print(ratio_two_model_1_test_result)
print("Ratio two model 2")
print(ratio_two_model_2_test_result)