In [2]:
!pip install --no-cache-dir -e .

Obtaining file:///Users/han/python-shogi
Installing collected packages: python-shogi
  Attempting uninstall: python-shogi
    Found existing installation: python-shogi 1.0.14
    Uninstalling python-shogi-1.0.14:
      Successfully uninstalled python-shogi-1.0.14
  Running setup.py develop for python-shogi
Successfully installed python-shogi-1.0.14


In [59]:
! python utils/filter_csa.py --dir ./data/csa_train

./data/csa_train/.DS_Store
./data/csa_train/test_2025.csa
./data/csa_train/test_1279.csa
./data/csa_train/test_1041.csa
./data/csa_train/test.csa
kifu count : 2022


In [61]:
!python utils/make_kifu_list.py ./data/csa_train './data/kifu_list_random'

total kifu num = 2023
train kifu num = 1820
test kifu num = 203


In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from shogi.common import *
from shogi.features import *
from shogi.read_kifu import *
from shogi.network.policyvalue_res import PolicyValueResNetwork
from shogi.network.policy import PolicyNetwork
from shogi import serializers

import argparse
import pickle
import re
import os
import logging

In [8]:
# setting
parser = argparse.ArgumentParser()
parser.add_argument('kifulist_train', type=str)
parser.add_argument('kifulist_test', type=str)
parser.add_argument('--batchsize', '-b', type=int, default=32)
parser.add_argument('--test_batchsize', type=int, default=512)
parser.add_argument('--epoch', '-e', type=int, default=1)
parser.add_argument('--model', type=str, default='model/model_test')
parser.add_argument('--state', type=str, default='model/state_test')
parser.add_argument('--checkpoint', type=str, default='')
parser.add_argument('--initmodel', '-m', type=str, default='')
parser.add_argument('--resume', '-r', type=str, default='')
parser.add_argument('--log', default=None)
parser.add_argument('--lr', type=float, default=0.01)
parser.add_argument('--eval_interval', '-i', type=int, default=1000)
parser.add_argument('--save_interval_epoch', type=int, default=10)

# '--resume', 'checkpoint/210910_15_837419',
args = parser.parse_args(args=['kifu_list_train.txt', 'kifu_list_test.txt', '--epoch', '3', '--model', 'model/5_test_210913_1', '--checkpoint', 'checkpoint/5_test_210913_1', '--eval_interval', '2',  '--save_interval_epoch', '5'])
device = 'cuda' if torch.cuda.is_available else 'cpu'
# device = 'cpu'

logging.basicConfig(format='%(asctime)s\t%(levelname)s\t%(message)s', datefmt='%Y/%m/%d %H:%M:%S', filename=args.log, level=logging.DEBUG)
logging.info('checkpoint : {}'.format(args.checkpoint))
logging.info('batchsize : {}'.format(args.batchsize))
logging.info('initmodel : {}'.format(args.initmodel))
logging.info('resume : {}'.format(args.resume))
logging.info('log : {}'.format(args.log))
logging.info('lr : {}'.format(args.lr))

2021/09/14 02:38:45	INFO	checkpoint : checkpoint/5_test_210913_1
2021/09/14 02:38:45	INFO	batchsize : 32
2021/09/14 02:38:45	INFO	initmodel : 
2021/09/14 02:38:45	INFO	resume : 
2021/09/14 02:38:45	INFO	log : None
2021/09/14 02:38:45	INFO	lr : 0.01


In [9]:
# create model
num_resnet = 3
num_channel = 80

model=PolicyValueResNetwork(num_resnet, num_channel)
model.to(device)

optimizer = optim.SGD(model.parameters(),lr=args.lr)
cross_entropy_loss = nn.CrossEntropyLoss()
bce_with_logits_loss = nn.BCEWithLogitsLoss()

In [10]:
# neede functions
def mini_batch(positions, i, batchsize):
    mini_batch_data = []
    mini_batch_move = []
    mini_batch_win = []
    for b in range(batchsize):
        features, move, win = make_features(positions[i + b])
        mini_batch_data.append(features)
        mini_batch_move.append(move)
        mini_batch_win.append(win)

    return (torch.from_numpy(np.array(mini_batch_data, dtype=np.float32)).to(device),
            torch.from_numpy(np.array(mini_batch_move, dtype=np.long)).to(device),
            torch.from_numpy(np.array(mini_batch_win, dtype=np.float32).reshape((-1, 1))).to(device))

def mini_batch_for_test(positions, batchsize):
    mini_batch_data = []
    mini_batch_move = []
    mini_batch_win = []
    for b in range(batchsize):
        features, move, win = make_features(random.choice(positions))
        mini_batch_data.append(features)
        mini_batch_move.append(move)
        mini_batch_win.append(win)

    return (torch.from_numpy(np.array(mini_batch_data, dtype=np.float32)).to(device),
            torch.from_numpy(np.array(mini_batch_move, dtype=np.long)).to(device),
            torch.from_numpy(np.array(mini_batch_win, dtype=np.float32).reshape((-1, 1))).to(device))
    
def accuracy(y, t):
    return (torch.max(y, 1)[1] == t).sum().item() / len(t)

def binary_accuracy(y, t):
    pred = y >= 0
    truth = t >= 0.5
    return pred.eq(truth).sum().item() / len(t)

def save_checkpoint():        
    logging.info('save checkpoint')
    path = f'{args.checkpoint}_{epoch}_{t}'
    checkpoint = {
        'epoch': epoch,
        't': t,
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict()
    }
    torch.save(checkpoint, path)

In [11]:
# Init/Resume
if args.initmodel:
    logging.info('Load model from {}'.format(args.initmodel))
    serializers.load_npz(args.initmodel, model)
if args.resume:
    checkpoint = torch.load(args.resume, map_location=device)
    logging.info(f'Loading the checkpoint from {args.resume}')
    epoch = checkpoint['epoch']
    t = checkpoint['t']
    model.load_state_dict(checkpoint['model'])
    optimizer.load_state_dict(checkpoint['optimizer'])
else:
    epoch = 0
    t = 0
    
logging.info('read kifu start')

# 保存済みのpickleファイルがある場合、pickleファイルを読み込む
# train date
train_pickle_filename = re.sub(r'\..*?$', '', args.kifulist_train) + '.pickle'

if os.path.exists(train_pickle_filename):
    with open(train_pickle_filename, 'rb') as f:
        positions_train = pickle.load(f)
    logging.info(train_pickle_filename)
    logging.info('load train pickle')
else:
    positions_train = read_kifu(f'./data/{args.kifulist_train}')

# test data
test_pickle_filename = re.sub(r'\..*?$', '', args.kifulist_test) + '.pickle'
if os.path.exists(test_pickle_filename):
    with open(test_pickle_filename, 'rb') as f:
        positions_test = pickle.load(f)
    logging.info('load test pickle')
else:
    positions_test = read_kifu(f'./data/{args.kifulist_test}')

# 保存済みのpickleがない場合、pickleファイルを保存する
if not os.path.exists(train_pickle_filename):
    with open(train_pickle_filename, 'wb') as f:
        pickle.dump(positions_train, f, pickle.HIGHEST_PROTOCOL)
    logging.info('save train pickle')
if not os.path.exists(test_pickle_filename):
    with open(test_pickle_filename, 'wb') as f:
        pickle.dump(positions_test, f, pickle.HIGHEST_PROTOCOL)
    logging.info('save test pickle')
logging.info('read kifu end')

logging.info('train position num = {}'.format(len(positions_train)))
logging.info('test position num = {}'.format(len(positions_test)))

2021/09/14 02:38:50	INFO	read kifu start
2021/09/14 02:38:51	INFO	kifu_list_train.pickle
2021/09/14 02:38:51	INFO	load train pickle
2021/09/14 02:38:51	INFO	load test pickle
2021/09/14 02:38:51	INFO	read kifu end
2021/09/14 02:38:51	INFO	train position num = 3313
2021/09/14 02:38:51	INFO	test position num = 471


In [16]:
import random
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter()

# train
logging.info('start training')

itr = 0
sum_loss1 = 0
sum_loss2 = 0
sum_loss = 0

for e in range(args.epoch):
    epoch += 1
    itr_eval = 0
    sum_loss1_eval = 0
    sum_loss2_eval = 0
    sum_loss_eval = 0

    positions_train_shuffled = random.sample(positions_train, len(positions_train))
  
    for i in range(0, len(positions_train_shuffled)-args.batchsize, args.batchsize):
        t += 1
        itr += 1
        itr_eval += 1

        x, t1, t2 = mini_batch(positions_train_shuffled, i, args.batchsize)
        model.train()
        x, t1, t2 = mini_batch(positions_train_shuffled, i, args.batchsize)
        model.train()
        y1, y2 = model(x)

        loss1 = cross_entropy_loss(y1, t1)
        loss1 = loss1.mean()
        loss2 = bce_with_logits_loss(y2, t2)
        loss = loss1 + loss2

        model.zero_grad()
        loss1.backward()
        optimizer.step()

        sum_loss1_eval += loss1.item()
        sum_loss2_eval += loss2.item()
        sum_loss_eval += loss.item()

        sum_loss1 += loss1.item()
        sum_loss2 += loss2.item()
        sum_loss += loss.item()

        if t % args.eval_interval == 0:
            with torch.no_grad():
                x, t1, t2 = mini_batch_for_test(positions_test, args.test_batchsize)
                y1, y2 = model(x)

                loss1 = cross_entropy_loss(y1, t1)
                loss1 = loss1.mean()
                loss2 = bce_with_logits_loss(y2, t2)
                loss = loss1 + loss2

                loss, current = loss1.item(), t
                # print(f"loss1: {loss1:>7f}  loss2: {loss2:>7f}  loss: {loss:>7f}  [{current:>5d}]")
                logging.info('epoch = {}, iteration = {}, loss_policy = {}, loss_value = {}, loss = {}, accuracy = {}, {}'.format(
                    epoch, t, sum_loss1/itr, sum_loss2/itr, sum_loss/itr ,accuracy(y1,t1), binary_accuracy(y2,t2)))

                itr = 0
                sum_loss1 = 0
                sum_loss2 = 0
                sum_loss = 0
          
    logging.info('validate test set')
    itr_test = 0
    sum_test_accuracy1 = 0
    sum_test_accuracy2 = 0
    
    with torch.no_grad():
        for i in range(0, len(positions_test)-args.batchsize, args.batchsize):
            x, t1, t2 = mini_batch_for_test(positions_test, args.batchsize)
            y1 = model(x)

            itr_test += 1
            sum_test_accuracy1 += accuracy(y1, t1)
            sum_test_accuracy2 += binary_accuracy(y2, t2)

        logging.info('epoch = {}, iteration = {}, loss_polish = {}, loss_value = {}, loss = {}, accuracy = {}, {}'.format(
            epoch, t, sum_loss1_eval/itr_eval, sum_loss2_eval/itr_eval, sum_loss_eval/itr_eval, sum_test_accuracy1/itr_test, sum_test_accuracy2/itr_test))

        writer.add_scalar('Train_Loss_Policy/Iteration', sum_loss1_eval/itr_eval, t)
        writer.add_scalar('Train_Loss_Value/Iteration', sum_loss2_eval/itr_eval, t)
        writer.add_scalar('Train_Loss/Iteration', sum_loss_eval/itr_eval, t)

        writer.add_scalar('Test_Acc_Policy/Iteration', sum_test_accuracy1/itr_test, t)
        writer.add_scalar('Test_Acc_/Iteration', sum_test_accuracy2/itr_test, t)

    if e % args.save_interval_epoch == 0:
        save_checkpoint()

logging.info('End train')
save_checkpoint()

logging.info('Save model : {}'.format(args.model))
serializers.save_npz(args.model, model)

writer.close()

2021/09/14 02:43:28	INFO	start training
2021/09/14 02:43:29	INFO	epoch = 2, iteration = 4, loss_policy = 6.467972993850708, loss_value = 0.6889800131320953, loss = 7.1569530963897705, accuracy = 0.0078125, 0.509765625
2021/09/14 02:43:30	INFO	epoch = 2, iteration = 6, loss_policy = 6.3110997676849365, loss_value = 0.6866650581359863, loss = 6.997764825820923, accuracy = 0.017578125, 0.521484375
2021/09/14 02:43:31	INFO	epoch = 2, iteration = 8, loss_policy = 5.906799077987671, loss_value = 0.6855629682540894, loss = 6.592361927032471, accuracy = 0.044921875, 0.5390625
2021/09/14 02:43:31	INFO	epoch = 2, iteration = 10, loss_policy = 5.728514909744263, loss_value = 0.6945554316043854, loss = 6.423070192337036, accuracy = 0.07421875, 0.515625
2021/09/14 02:43:32	INFO	epoch = 2, iteration = 12, loss_policy = 5.382267236709595, loss_value = 0.6812370419502258, loss = 6.063504457473755, accuracy = 0.0703125, 0.521484375
2021/09/14 02:43:33	INFO	epoch = 2, iteration = 14, loss_policy = 5.554

2021/09/14 02:44:01	INFO	epoch = 2, iteration = 98, loss_policy = 4.131300926208496, loss_value = 0.6905283331871033, loss = 4.821829319000244, accuracy = 0.19140625, 0.5546875
2021/09/14 02:44:02	INFO	epoch = 2, iteration = 100, loss_policy = 4.014870762825012, loss_value = 0.6783401072025299, loss = 4.69321084022522, accuracy = 0.201171875, 0.5234375
2021/09/14 02:44:03	INFO	epoch = 2, iteration = 102, loss_policy = 4.14655613899231, loss_value = 0.7063133716583252, loss = 4.852869510650635, accuracy = 0.203125, 0.52734375
2021/09/14 02:44:03	INFO	epoch = 2, iteration = 104, loss_policy = 3.679903268814087, loss_value = 0.6984361410140991, loss = 4.378339529037476, accuracy = 0.21484375, 0.521484375
2021/09/14 02:44:03	INFO	validate test set


TypeError: max() received an invalid combination of arguments - got (tuple, int), but expected one of:
 * (Tensor input)
 * (Tensor input, name dim, bool keepdim, *, tuple of Tensors out)
 * (Tensor input, Tensor other, *, Tensor out)
 * (Tensor input, int dim, bool keepdim, *, tuple of Tensors out)


2021/09/13 20:06:09	INFO	Save model : model/5_shogi_210913_1
