In [51]:
# Import modules.
import torch
import random

import numpy as np

from network import H_GO
from torch import nn, optim
from collections import deque
from torchsummary import summary
from dataReader import DataReader

In [52]:
# Set training device.
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available.")
else:
    device = torch.device("cpu")
    print("CUDA is not available, using CPU instead.")

CUDA is available.


In [53]:
# Set random seed.
random_seed = 0

random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [54]:
# Load data.
data_reader = DataReader(dir_path="../dataset/10k", load_num=5000)

Read raw data completed. (0.53 s)
Clear data completed. (0.07 s)
Convert data completed. (0.25 s)
Make train data completed. (7.02 s)


In [55]:
# set model.
model = H_GO(input_size=8, output_size=361, hidden_dim=64)
model.to(device=device)

H_GO(
  (conv1): Conv2d(8, 64, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (tanh): Tanh()
  (layers): Sequential(
    (0): Block(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (1): Block(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    

In [56]:
# Print model summary.
summary(model, input_size=(8, 19, 19))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 17, 17]           4,672
       BatchNorm2d-2           [-1, 64, 17, 17]             128
              Tanh-3           [-1, 64, 17, 17]               0
            Conv2d-4           [-1, 64, 17, 17]          36,928
       BatchNorm2d-5           [-1, 64, 17, 17]             128
              ReLU-6           [-1, 64, 17, 17]               0
            Conv2d-7           [-1, 64, 17, 17]          36,928
       BatchNorm2d-8           [-1, 64, 17, 17]             128
              ReLU-9           [-1, 64, 17, 17]               0
            Block-10           [-1, 64, 17, 17]               0
           Conv2d-11           [-1, 64, 17, 17]          36,928
      BatchNorm2d-12           [-1, 64, 17, 17]             128
             ReLU-13           [-1, 64, 17, 17]               0
           Conv2d-14           [-1, 64,

In [57]:
# Set hyperparameters.
epochs = 200
batch_size = 1024
data_num = 5000

In [58]:
# Set optimizer and loss function.
optimizer = optim.Adam(params=model.parameters())
loss_fu_policy = nn.CrossEntropyLoss()
loss_fu_value = nn.BCELoss()

In [59]:
def supervised_training(epochs: int, model: H_GO, optimizer: optim.Adam, loss_fu_policy: nn.CrossEntropyLoss, loss_fu_value: nn.MSELoss) -> None:
    # Set model to training mode.
    model.train()

    for epoch in range(epochs):
        total_loss = 0
        total_acc_policy = 0
        total_acc_value = 0

        for _ in range(data_num // batch_size):
            # Get training data batch.
            training_batch = data_reader.get_training_batch(batch_size=batch_size, shuffle=True)

            # Convert data to tensor.
            game_data = torch.tensor([x[0] for x in training_batch], dtype=torch.float).to(device=device)
            step = torch.tensor([x[1] for x in training_batch], dtype=torch.long).to(device=device)
            winner = torch.tensor([x[2] for x in training_batch], dtype=torch.float).to(device=device)

            # Get model output.
            policy, value = model(game_data)

            # Calculate loss.
            policy_loss = loss_fu_policy(policy, step)
            value_loss = loss_fu_value(value, winner)
            loss = policy_loss + value_loss

            # Calculate accuracy.
            total_loss += loss.item()
            print(policy.argmax(1))
            total_acc_policy += torch.sum(policy.argmax(1) == step).item()
            total_acc_value += torch.sum(torch.round(value) == winner).item()

            # Update model parameters.
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(
            f"Epoch: {epoch} | "
            f"Loss of model: {total_loss:.3f} | "
            f"Accuracy of policy: {total_acc_policy / data_num * 100:.3f}% | "
            f"Accuracy of value: {total_acc_value / data_num * 100:.3f}%"
        )

    # Set model to evaluation mode.
    model.eval()

In [60]:
supervised_training(epochs=epochs, model=model, optimizer=optimizer, loss_fu_policy=loss_fu_policy, loss_fu_value=loss_fu_value)

tensor([ 16, 124, 200,  ..., 226,  44, 160], device='cuda:0')
tensor([117,  36, 172,  ...,  36,  74, 192], device='cuda:0')
tensor([192, 154,  73,  ..., 110, 158, 222], device='cuda:0')
tensor([224,  51, 159,  ..., 298, 250, 224], device='cuda:0')
Epoch: 0 | Loss of model: 26.571 | Accuracy of policy: 0.360% | Accuracy of value: 41.500%
tensor([139, 250,  61,  ..., 264, 295, 295], device='cuda:0')
tensor([225, 268,  51,  ...,  63, 263,  42], device='cuda:0')
tensor([225, 298, 299,  ..., 295, 263,  61], device='cuda:0')
tensor([ 72, 239,  61,  ...,  72,  72,  72], device='cuda:0')
Epoch: 1 | Loss of model: 26.229 | Accuracy of policy: 0.500% | Accuracy of value: 42.940%
tensor([ 72,  72,  72,  ...,  63,  63, 176], device='cuda:0')
tensor([239,  63,  72,  ...,  79,  72,  72], device='cuda:0')
tensor([239,  72,  72,  ...,  79,  72, 300], device='cuda:0')
tensor([263,  72, 301,  ..., 300,  63,  60], device='cuda:0')
Epoch: 2 | Loss of model: 26.071 | Accuracy of policy: 0.360% | Accuracy o

KeyboardInterrupt: 