In [1]:
import os
KAGGLE = "KAGGLE_KERNEL_RUN_TYPE" in os.environ
if KAGGLE:
    #!pip install kaggle-environments -U
    pass

In [2]:
!nvidia-smi
!free -h
!lscpu

Thu Jun 30 13:09:24 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 495.46       CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   76C    P0    34W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
import os
import gc
import sys
from time import time, sleep
import json
from pathlib import Path
from datetime import datetime
from itertools import count
from collections import defaultdict

from tqdm.notebook import tqdm
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
import matplotlib.pyplot as plt

# Envirionment

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"device={device}")

n_cpu = !lscpu | grep ^CPU\(s\):
n_cpu = int(n_cpu[0].split()[-1])
print(f"n_cpu={n_cpu}")

torch.backends.cudnn.benchmark = True

# Directory settings

if KAGGLE:
    TEMP_DIR = Path("../temp")
    STORAGE_DIR = Path()
    PROJECTS_DIR = Path()
else:
    TEMP_DIR = STORAGE_DIR = Path("./010")
    PROJECTS_DIR = Path("../../")

if not TEMP_DIR.exists():
    print(f"mkdir {TEMP_DIR}")
    TEMP_DIR.mkdir()
if not STORAGE_DIR.exists():
    print(f"mkdir {STORAGE_DIR}")
    STORAGE_DIR.mkdir()

sys.path.append(str(STORAGE_DIR))

# Hyperparameters

h = {
    "lr": 1e-3,
    "batch_size": 32,
    "checkpoint_file": "010/latest.pt"
}

#kif_dir = Path("../data/kifs/001b")
kif_dirs = [
    "../data/kifs/b-e/kifs",
    "../data/kifs/f-i/kifs",
    "../data/kifs/j-m/kifs",
    "../data/kifs/n-q/kifs",
]
kif_dirs = [Path(kif_dir) for kif_dir in kif_dirs]
kif_filenames = sorted([str(file) for kif_dir in kif_dirs for file in kif_dir.iterdir() if file.suffix == ".kif"])[:47000]
print(f"len(kif_filenames)={len(kif_filenames)}")

device=cuda
n_cpu=16
len(kif_filenames)=47000


In [4]:
if KAGGLE:
    from kaggle_secrets import UserSecretsClient
    PERSONAL_ACCESS_TOKEN = UserSecretsClient().get_secret("PERSONAL_ACCESS_TOKEN")

    !git clone https://github.com/Lgeu/marathon.git
    !rm -rf ./marathon/.git
    !git clone https://Lgeu:{PERSONAL_ACCESS_TOKEN}@github.com/Lgeu/kore2022.git
    !rm -rf ./kore2022/.git
else:
    !git pull

Already up to date.


In [5]:
if KAGGLE:
    # install boost
    %cd {TEMP_DIR}
    !wget https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.tar.gz
    !tar xzf boost_1_79_0.tar.gz
    %cd -
    %cd {TEMP_DIR / "boost_1_79_0"}
    !./bootstrap.sh
    !./b2 install -j4 --with-python -d0 cxxflags=-O3 cxxflags=-fPIC cflags=-O3 cflags=-fPIC
    %cd -

In [6]:
#!find / -name *python3.* 2> /dev/null

In [7]:
!g++ {PROJECTS_DIR / "kore2022/kore_extension.cpp"} -o {STORAGE_DIR / "kore_extension.so"} -std=c++17 -Wall -Wextra -O3 -march=haswell --shared -fPIC -I/home/user/anaconda3/include/python3.9 /usr/local/lib/libboost_numpy39.a /usr/local/lib/libboost_python39.a -lpython3.9 -L/home/user/anaconda3/lib

In file included from [01m[K../../kore2022/kore_fleets.cpp:1074[m[K,
                 from [01m[K../../kore2022/kore_extension.cpp:6[m[K:
      |  [01;35m[K^~~~~~~[m[K
In file included from [01m[K../../kore2022/kore_fleets.cpp:1074[m[K,
                 from [01m[K../../kore2022/kore_extension.cpp:6[m[K:
 1041 | #pragma omp parallel for
      | 
In file included from [01m[K../../kore2022/kore_extension.cpp:6[m[K:
  106 | static Point [01;35m[KGetColRow[m[K(const int pos) {
      |              [01;35m[K^~~~~~~~~[m[K


In [8]:
import kore_extension

In [9]:
# for i in range(10000, 30000):
#     print(i)
#     kif_filename = kif_filenames[i]
#     shipyard_features, global_features, target_values, target_action_types, \
#         target_action_n_ships, target_action_relative_position, target_action_n_steps, \
#         target_action_direction, target_action_quantized_n_ships = kore_extension.make_nnue_feature(kif_filename)

In [10]:
#!ls -1 012 | wc -l

In [11]:
class NNUEDataset(torch.utils.data.Dataset):
    def __init__(self, kif_filenames):
        self.kif_filenames = kif_filenames
        
    def __len__(self):
        return len(self.kif_filenames)
    
    def __getitem__(self, index):
        kif_filename = self.kif_filenames[index]
#         shipyard_features, global_features, target_values, target_action_types, \
#             _, target_action_relative_position, target_action_n_steps, \
#             target_action_direction, target_action_quantized_n_ships = kore_extension.make_nnue_feature(kif_filename)

        kif_id = Path(kif_filename).stem
        z = np.load(STORAGE_DIR.parent / f"012/feature_{kif_id}.npz")
        shipyard_features, global_features, target_values, target_action_types, \
            target_action_n_ships, target_action_relative_position, target_action_n_steps, \
            target_action_direction, target_action_quantized_n_ships = (z[f] for f in z.files)
        
        return (
            torch.from_numpy(shipyard_features),
            torch.from_numpy(global_features),
            torch.from_numpy(target_values.astype(np.float32)),
            torch.from_numpy(target_action_types.astype(np.int64)),
            torch.from_numpy(target_action_quantized_n_ships.astype(np.int64)),
            torch.from_numpy(target_action_relative_position.astype(np.int64)),
            torch.from_numpy(target_action_n_steps.astype(np.int64)),
            torch.from_numpy(target_action_direction.astype(np.int64)),
        )

def collate_fn(batch):
    return tuple(torch.cat(d) for d in zip(*batch))

train_dataset = NNUEDataset(kif_filenames)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=h["batch_size"],
    shuffle=True,
    num_workers=min(n_cpu, 8),
    collate_fn=collate_fn,
    pin_memory=True,
    drop_last=True,
)

In [13]:
N_GLOBAL_FEATURES = 9
N_SHIPYARD_FEATURES = 32800

class NNUE(nn.Module):
    def __init__(self):
        super().__init__()
        self.global_feature_encoder = nn.Linear(N_GLOBAL_FEATURES, 256)
        self.embedding = nn.EmbeddingBag(N_SHIPYARD_FEATURES + 1, 256, mode="sum", padding_idx=N_SHIPYARD_FEATURES)
        self.fc1 = nn.Linear(256, 256)
        self.fc2 = nn.Linear(256, 256)
        self.value_decoder = nn.Linear(256, 1)
        self.type_decoder = nn.Linear(256, 4)
        self.n_ships_decoder = nn.ModuleList([
            nn.Linear(256, 12),  # [1, 10]
            nn.Linear(256, 32),
            nn.Linear(256, 32),
            nn.Linear(256, 32),
        ])
        self.relative_position_decoder = nn.ModuleList([
            None,
            nn.Linear(256, 448),  # [0, 441)
            nn.Linear(256, 448),
            nn.Linear(256, 448),
        ])
        self.n_steps_decoder = nn.ModuleList([
            None,
            nn.Linear(256, 24),  # [1, 21]
            None,
            None,
        ])
        self.direction_decoder = nn.ModuleList([
            None,
            None,
            nn.Linear(256, 4),
            nn.Linear(256, 4),
        ])
    
    def forward(
        self,
        shipyard_features,
        global_features,
        target_values,
        target_action_types,
        target_action_n_ships,  # quantized
        target_action_relative_position,
        target_action_n_steps,
        target_action_direction,
    ):
        batch_size = shipyard_features.size(0)
        shipyard_features[shipyard_features == -100] = N_SHIPYARD_FEATURES
        
        # [batch_size, N_GLOBAL_FEATURES], [batch_size, 512] -> [batch_size, 256]
        x = self.global_feature_encoder(global_features) + self.embedding(shipyard_features)
        x = F.leaky_relu(x, 1.0 / 64.0)
        
        # [batch_size, 256]
        x = self.fc1(x)
        x = F.leaky_relu(x, 1.0 / 64.0)
        x = self.fc2(x)
        x = F.leaky_relu(x, 1.0 / 64.0)
        
        # [batch_size, 256] -> [batch_size]
        value = self.value_decoder(x).squeeze(1)
        # [batch_size, 256] -> [batch_size, 4]
        action_type = self.type_decoder(x)
        
        specific_predictions = []
        for i in range(4):
            # [batch_size, 256] -> [n_action_data, 256]
            xi = x[target_action_types == i]
            n_action_data = len(xi)
#             if n_action_data == 0:
#                 specific_predictions.append([
#                     0, None, None, None, None
#                 ])
            # [n_action_data, 256] -> [n_action_data, ??]
            n_ships = self.n_ships_decoder[i](xi)
            # [n_action_data, 256] -> [n_action_data, ??]
            relative_position = None if self.relative_position_decoder[i] is None else self.relative_position_decoder[i](xi)
            # [n_action_data, 256] -> [n_action_data, ??]
            n_steps = None if self.n_steps_decoder[i] is None else self.n_steps_decoder[i](xi)
            # [n_action_data, 256] -> [n_action_data, ??]
            direction = None if self.direction_decoder[i] is None else self.direction_decoder[i](xi)
            
            specific_predictions.append([
                n_action_data, n_ships, relative_position, n_steps, direction
            ])
        
        # === loss computation ===
        
        value_loss = F.binary_cross_entropy_with_logits(value, target_values, reduction="sum")
        type_loss = F.cross_entropy(action_type, target_action_types, reduction="sum")
        loss = value_loss * 10.0 + type_loss
        ACTION_LOSS_WEIGHTS = [1.0, 1.0, 5.0, 25.0]
        
        specific_losses = []
        for i in range(4):
            n_action_data, n_ships, relative_position, n_steps, direction = specific_predictions[i]
            indices = target_action_types == i
            
            n_ships_loss = F.cross_entropy(n_ships, target_action_n_ships[indices], reduction="sum")
            action_loss = n_ships_loss
            
            if relative_position is None:
                relative_position_loss = None
            else:
                relative_position_loss = F.cross_entropy(relative_position, target_action_relative_position[indices], reduction="sum")
                action_loss += relative_position_loss
            
            if n_steps is None:
                n_steps_loss = None
            else:
                n_steps_loss = F.cross_entropy(n_steps, target_action_n_steps[indices], reduction="sum")
                action_loss += n_steps_loss
            
            if direction is None:
                direction_loss = None
            else:
                direction_loss = F.cross_entropy(direction, target_action_direction[indices], reduction="sum")
                action_loss += direction_loss
            
            loss += ACTION_LOSS_WEIGHTS[i] * action_loss
            
            specific_losses.append([
                n_action_data, n_ships_loss, relative_position_loss, n_steps_loss, direction_loss
            ])
        
        loss *= 1 / batch_size
        
        return (value, action_type, specific_predictions), (value_loss, type_loss, specific_losses), loss

model = NNUE()

In [None]:
iteration = 0
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=h["lr"])

if h["checkpoint_file"]:
    dict_checkpoint = torch.load(h["checkpoint_file"], map_location="cpu")
    model.load_state_dict(dict_checkpoint["state_dict"], strict=False)
    if True:#not h["finetuning"]:
        optimizer.load_state_dict(dict_checkpoint["optimizer"])
        iteration = dict_checkpoint["iteration"]
        for name, value in dict_checkpoint.items():
            if isinstance(value, int):
                globals()["name"] = value
    del dict_checkpoint
    gc.collect()
    torch.cuda.empty_cache()

train_loader_iter = iter(train_loader)
training_stats = defaultdict(lambda: [0, 0.0])
ACTION_NAMES = ["spawn", "move", "attack", "convert"]
if not KAGGLE:
    writer = SummaryWriter(STORAGE_DIR)
t0 = time()

def update_loss_stats(training_stats, value_loss, type_loss, specific_losses):
    data_size = sum(specific_loss[0] for specific_loss in specific_losses)
    training_stats["value_loss"][0] += data_size
    training_stats["value_loss"][1] += value_loss.item()
    training_stats["type_loss"][0] += data_size
    training_stats["type_loss"][1] += type_loss.item()
    for action_name, (n_action_data, n_ships_loss, relative_position_loss, n_steps_loss, direction_loss) in zip(ACTION_NAMES, specific_losses):
        training_stats[f"{action_name}_n_ships_loss"][0] += n_action_data
        training_stats[f"{action_name}_n_ships_loss"][1] += n_ships_loss.item()
        if relative_position_loss is not None:
            training_stats[f"{action_name}_relative_position_loss"][0] += n_action_data
            training_stats[f"{action_name}_relative_position_loss"][1] += relative_position_loss.item()
        if n_steps_loss is not None:
            training_stats[f"{action_name}_n_steps_loss"][0] += n_action_data
            training_stats[f"{action_name}_n_steps_loss"][1] += n_steps_loss.item()
        if direction_loss is not None:
            training_stats[f"{action_name}_direction_loss"][0] += n_action_data
            training_stats[f"{action_name}_direction_loss"][1] += direction_loss.item()

for iteration in tqdm(count(iteration)):
    #print(iteration)
    model.train()
    
    try:
        batch = next(train_loader_iter)
    except StopIteration:
        train_loader_iter = iter(train_loader)
        batch = next(train_loader_iter)
    
    batch = tuple(d.to(device) for d in batch)
    optimizer.zero_grad()
    (value, action_type, specific_predictions), (value_loss, type_loss, specific_losses), loss = model(*batch)
    loss.backward()
    optimizer.step()
    update_loss_stats(training_stats, value_loss, type_loss, specific_losses)
    
    # dump stats
    if (iteration + 1) % 1000 == 0 or iteration == 9:
        with open(STORAGE_DIR / "log.txt", "a") as f:
            t = round(time() - t0, 1)
            json.dump({
                "time": t,
                "iteration": iteration,
                "stats": training_stats,
            }, f)
            f.write("\n")
        if not KAGGLE:
            for name, stat in training_stats.items():
                if stat[0] > 0:
                    writer.add_scalar("loss/" + name, stat[1] / stat[0], iteration + 1)
        training_stats.clear()

    # save model
    if (iteration + 1) % 10000 == 0 or iteration == 9:
        checkpoint_file = STORAGE_DIR / f"checkpoint_{iteration + 1:08d}.pt"
        torch.save({
            "iteration": iteration + 1,
            "state_dict": model.state_dict(),
            "optimizer": optimizer.state_dict(),
        }, checkpoint_file)
        !cp {checkpoint_file} {STORAGE_DIR / "latest.pt"}

0it [00:00, ?it/s]

In [None]:
# 40k iter から value_loss の係数を 1 から 10 に変更
