In [8]:
import os
import sys
import aim

os.environ["CONFIG_PATHS"] = "../configs/self_play.yaml"
os.environ["CONFIG_OVERRIDES"] = 'game.moves_directory="../data/moves_10"'
sys.path.append("../src")

import numpy as np
import torch
from torch import nn
from tqdm import tqdm
import time
from torch.utils.data import TensorDataset, DataLoader
from display import Display
from configuration import moves_data, config
from training.actor import TrainingActor
import training.helpers

In [9]:
from neural_net import NeuralNet
from training.game_data_manager import GameDataManager

In [10]:
NETWORK_CONFIG = config()["networks"]["default"]

In [11]:
BATCH_SIZE = 64
DEVICE = "mps"
MOVES = moves_data()
GAMES_DIR = "../data/2024-11-23_00-37-50-doublehandedness/games"
WINDOW_SIZE = 50000
MINIMUM_WINDOW_SIZE = 10000
POLICY_LOSS_WEIGHT = 0.158
LEARNING_RATE = 1e-3
SAMPLING_RATIO = 1.0

In [12]:
game_data_manager = GameDataManager(GAMES_DIR, 50000)

In [13]:
model = NeuralNet(NETWORK_CONFIG)
model.to(DEVICE)
model.train()

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [14]:
training.helpers.feed_window_until_amount(
    game_data_manager,
    MINIMUM_WINDOW_SIZE,
    1e6,
)
print("Window size: ", game_data_manager.current_window_size())
print("Cumulative window fed: ", game_data_manager.cumulative_window_fed())

Window size:  10000
Cumulative window fed:  10000


In [8]:
run = aim.Run(repo='/tmp/.aim')

batch_index = 0
while True:
    if batch_index % 100 == 0:
        print(f"Batch {batch_index}, window size {game_data_manager.current_window_size()}, cumulative window fed {game_data_manager.cumulative_window_fed()}")

    training_result = training.helpers.loop_iteration(
        model,
        optimizer,
        game_data_manager,
        device=DEVICE,
        batch_size=BATCH_SIZE,
        sampling_ratio=SAMPLING_RATIO,
        policy_loss_weight=POLICY_LOSS_WEIGHT,
    )
    if not training_result:
        break

    for key, value in training_result.items():
        run.track(
            value,
            name=key,
            step=batch_index,
        )
    batch_index += 1

run.close()

Batch 0, window size 0, cumulative window fed 0
Batch 100, window size 6400, cumulative window fed 6400
Batch 200, window size 12800, cumulative window fed 12800
Batch 300, window size 19200, cumulative window fed 19200
Batch 400, window size 25600, cumulative window fed 25600


KeyboardInterrupt: 

In [14]:
torch.save(model.state_dict(), "../data/2024-11-26_19-59-19-prodigal/models/trained_in_notebook.pt")

In [8]:
# game_paths = [
#     os.path.join(GAMES_DIR, f)
#     for f in os.listdir(GAMES_DIR)
# ]

# # Train on first 3/4, test on last 1/4.
# train_path_index_cutoff = 3 * len(game_paths) // 4

# train_paths = sorted(game_paths)[:train_path_index_cutoff]
# test_paths = sorted(game_paths)[train_path_index_cutoff:]

# def paths_to_dataloader(paths):
#     boards, policies, values = load_old_format_games(paths)
#     dataset = TensorDataset(
#         torch.Tensor(boards),
#         torch.Tensor(policies),
#         torch.Tensor(values),
#     )
#     return DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# train_dataloader = paths_to_dataloader(train_paths)
# test_dataloader = paths_to_dataloader(test_paths)

In [9]:
# run = aim.Run(repo='/tmp/.aim')

# model = NeuralNet(config()["networks"]["default"]).to(DEVICE)
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# for batch_index, (boards, policies, values) in tqdm(enumerate(train_dataloader)):
#     boards = boards.to(DEVICE)
#     policies = policies.to(DEVICE)
#     values = values.to(DEVICE)

#     pred_values, pred_policy = model(boards)
#     value_loss = nn.CrossEntropyLoss()(
#         pred_values,
#         values,
#     )
#     policy_loss = 0.158 * nn.CrossEntropyLoss()(
#         pred_policy,
#         policies,
#     )
#     loss = value_loss + policy_loss

#     run.track(
#         value_loss.item(),
#         name="value_loss",
#         step=batch_index,
#     )
#     run.track(
#         policy_loss.item(),
#         name="policy_loss",
#         step=batch_index,
#     )
#     run.track(
#         loss.item(),
#         name="total_loss",
#         step=batch_index,
#     )

#     loss.backward()
#     optimizer.step()
#     optimizer.zero_grad()

# run.close()

In [10]:
# value_losses = []
# policy_losses = []
# total_losses = []

# for batch_index, (boards, policies, values) in tqdm(enumerate(test_dataloader)):
#     pred_values, pred_policy = model(boards)
#     value_loss = nn.CrossEntropyLoss()(
#         pred_values,
#         values,
#     )
#     policy_loss = 0.158 * nn.CrossEntropyLoss()(
#         pred_policy,
#         policies,
#     )
#     loss = value_loss + policy_loss

#     value_losses.append(value_loss.item())
#     policy_losses.append(policy_loss.item())
#     total_losses.append(loss.item())

# print("Average value loss: ", sum(value_losses) / len(value_losses))
# print("Average policy loss: ", sum(policy_losses) / len(policy_losses))
# print("Average total loss: ", sum(total_losses) / len(total_losses))