## 5x5 shogiライブラリー設定

In [1]:
!pip install --no-cache-dir -e .
!pip install gmpy2

Obtaining file:///Users/han/python-shogi
Installing collected packages: python-shogi
  Attempting uninstall: python-shogi
    Found existing installation: python-shogi 1.0.14
    Uninstalling python-shogi-1.0.14:
      Successfully uninstalled python-shogi-1.0.14
  Running setup.py develop for python-shogi
Successfully installed python-shogi-1.0.14


In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import shogi

from shogi.common import *
from shogi.features import *
from shogi.read_kifu import *
from shogi.CSA import *
from shogi.player.mcts_player import MctsPlayer
from shogi.network.policyvalue_res import PolicyValueResNetwork
from shogi import serializers
from shogi import cli

import gmpy2
import random
import argparse
import pickle
import re
import os
import logging

## 自己対局
1. parallel_mcts_players.sh 
  - 自己対局に使うプレイヤー（**MONTE CARLO TREE SEARCH**)生成
2. model_list (**POLICY VALUE NETWORK**)
  - 現状を読みどんなコマをどこに動かすか：Policy Network
  - 現状かつ確率はどのくらいか：Value Network
  - を学習したPre trained Models
  - 今の段階ほぼランダム動きで学習されていて、性能は悪いと思われる。
3. cli.py
  - Player1
      - Palrallel Monte Carlo Tree Search : 探索
      - model_rand1 : MCTSに使う評価値を推測
      - name1
  - Player2
  - N round 
      - M games を行い、csa pathにCSAファイルを生成
  - CSAファイル（対局のKifu）を集める

            '''
            N+Player1
            N-Player2
            PI
            +
            +3544GI
            -1213FU
            +5453FU
            -1314FU
            +2514KA
            -3122GI
            +5352FU
            -2112KI
            +4554KI
            -1121OU
            +5545OU
            -4132KA
            +5443KI
            -3243KA
            +4555OU
            -0054KI
            %TORYO
            '''
    
            
    

In [4]:
## google colab
# !echo -e "#!/bin/sh\npython -m shogi.usi.usi_parallel_mcts" > parallel_mcts_player.sh
## local 
!echo "#!/bin/sh\npython -m shogi.usi.usi_parallel_mcts" > parallel_mcts_player_2.sh
!chmod +x parallel_mcts_player_2.sh

In [10]:
player1 = '/Users/han/python-shogi/parallel_mcts_player_1.sh'
player2 = '/Users/han/python-shogi/parallel_mcts_player_2.sh'

model_path = '/Users/han/python-shogi/checkpoint'

for r in range(5000):
    model_list = ['best/bast_pv_2', 'best/best_pv_1','base/base_pv']
    
    model_rand1 = random.choice(model_list)
    model_rand2 = random.choice(model_list)
    modelfile1 = model_path+'/'+model_rand1
    modelfile2 = model_path+'/'+model_rand2


    name1 = model_rand1
    name2 = model_rand2

    temp_rand1 = random.randint(10,200)
    play_rand1 = random.randint(50,200)
    temp_rand2 = random.randint(10,200)
    play_rand2 = random.randint(50,200)

    options1 = {'modelfile':modelfile1,'temperature':temp_rand1,'playout':play_rand1}
    options2 = {'modelfile':modelfile2,'temperature':temp_rand2,'playout':play_rand2}

    names = [model_rand1, model_rand2]

    csa='./data/csa_auto2'

    cli.main(player1,player2, options1=options1, options2=options2, names=names, games=5, draw=50)

best/best_pv_1 vs base/base_pv start.
まで70手で後手の勝ち
1 of 5 games finished.
best/best_pv_1 vs base/base_pv: 0-1-0 (0.0%)
Black vs White: 0-1-0 (0.0%)
best/best_pv_1 playing Black: 0-1-0 (0.0%)
best/best_pv_1 playing White: 0-0-0 (0.0%)
base/base_pv playing Black: 0-0-0 (0.0%)
base/base_pv playing White: 1-0-0 (100.0%)
base/base_pv vs best/best_pv_1 start.


KeyboardInterrupt: 

In [8]:
!ls checkpoint/base


base_pv


## Policy Value Network 学習
1. CSAファイルの前処理
     - 勝負が決まったMatchだけを選ぶ
     - Errorを起こした対局除去
2. Train / Test list 生成
3. Model作成

In [3]:
! python utils/filter_csa.py --dir './data/pgn_3'
! python utils/make_kifu_list.py './data/pgn_3' './data/kifu_good_6'

kifu count : 764
total kifu num = 764
train kifu num = 764
test kifu num = 0


In [4]:
# setting
parser = argparse.ArgumentParser()
parser.add_argument('kifulist_train', type=str)
parser.add_argument('kifulist_test', type=str)
parser.add_argument('--batchsize', '-b', type=int, default=32)
parser.add_argument('--test_batchsize', type=int, default=512)
parser.add_argument('--epoch', '-e', type=int, default=1)
parser.add_argument('--model', type=str, default='model/model_test')
parser.add_argument('--state', type=str, default='model/state_test')
parser.add_argument('--checkpoint', type=str, default='')
parser.add_argument('--initmodel', '-m', type=str, default='')
parser.add_argument('--resume', '-r', type=str, default='')
parser.add_argument('--log', default=None)
parser.add_argument('--lr', type=float, default=0.01)
parser.add_argument('--eval_interval', '-i', type=int, default=1000)
parser.add_argument('--save_interval_epoch', type=int, default=10)

args = parser.parse_args(args=['kifu_good_6_train.txt', 'kifu_good_test.txt', '--epoch', '3', '--model', 'model/5_test_210913_1', '--checkpoint', 'checkpoint/5_test_210913_1', '--eval_interval', '2',  '--save_interval_epoch', '5'])
device = 'cuda' if torch.cuda.is_available else 'cpu'

logging.basicConfig(format='%(asctime)s\t%(levelname)s\t%(message)s', datefmt='%Y/%m/%d %H:%M:%S', filename=args.log, level=logging.DEBUG)
logging.info('checkpoint : {}'.format(args.checkpoint))
logging.info('batchsize : {}'.format(args.batchsize))
logging.info('initmodel : {}'.format(args.initmodel))
logging.info('resume : {}'.format(args.resume))
logging.info('log : {}'.format(args.log))
logging.info('lr : {}'.format(args.lr))

2021/09/28 20:43:17	INFO	checkpoint : checkpoint/5_test_210913_1
2021/09/28 20:43:17	INFO	batchsize : 32
2021/09/28 20:43:17	INFO	initmodel : 
2021/09/28 20:43:17	INFO	resume : 
2021/09/28 20:43:17	INFO	log : None
2021/09/28 20:43:17	INFO	lr : 0.01


In [12]:
# create model
num_resnet = 4
num_channel = 80

model=PolicyValueResNetwork(num_resnet, num_channel)
model.to(device)

optimizer = optim.SGD(model.parameters(),lr=args.lr)
cross_entropy_loss = nn.CrossEntropyLoss()
bce_with_logits_loss = nn.BCEWithLogitsLoss()

AssertionError: Torch not compiled with CUDA enabled

学習に必要なFunction

In [8]:
# neede functions
def mini_batch(positions, i, batchsize):
    mini_batch_data = []
    mini_batch_move = []
    mini_batch_win = []
    for b in range(batchsize):
        features, move, win = make_features(positions[i + b])
        mini_batch_data.append(features)
        mini_batch_move.append(move)
        mini_batch_win.append(win)

    return (torch.from_numpy(np.array(mini_batch_data, dtype=np.float32)).to(device),
            torch.from_numpy(np.array(mini_batch_move, dtype=np.long)).to(device),
            torch.from_numpy(np.array(mini_batch_win, dtype=np.float32).reshape((-1, 1))).to(device))

def mini_batch_for_test(positions, batchsize):
    mini_batch_data = []
    mini_batch_move = []
    mini_batch_win = []
    for b in range(batchsize):
        features, move, win = make_features(random.choice(positions))
        mini_batch_data.append(features)
        mini_batch_move.append(move)
        mini_batch_win.append(win)

    return (torch.from_numpy(np.array(mini_batch_data, dtype=np.float32)).to(device),
            torch.from_numpy(np.array(mini_batch_move, dtype=np.long)).to(device),
            torch.from_numpy(np.array(mini_batch_win, dtype=np.float32).reshape((-1, 1))).to(device))
    
def accuracy(y, t):
    return (torch.max(y, 1)[1] == t).sum().item() / len(t)

def binary_accuracy(y, t):
    pred = y >= 0
    truth = t >= 0.5
    return pred.eq(truth).sum().item() / len(t)

def save_checkpoint():        
    logging.info('save checkpoint')
    path = f'{args.checkpoint}_{epoch}_{t}'
    checkpoint = {
        'epoch': epoch,
        't': t,
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict()
    }
    torch.save(checkpoint, path)

初期設定（Resume Train / CSAファイルからFeaturesを抽出）

In [5]:
# Init/Resume
if args.initmodel:
    logging.info('Load model from {}'.format(args.initmodel))
    serializers.load_npz(args.initmodel, model)
if args.resume:
    checkpoint = torch.load(args.resume, map_location=device)
    logging.info(f'Loading the checkpoint from {args.resume}')
    epoch = checkpoint['epoch']
    t = checkpoint['t']
    model.load_state_dict(checkpoint['model'])
    optimizer.load_state_dict(checkpoint['optimizer'])
else:
    epoch = 0
    t = 0
    
logging.info('read kifu start')

# 保存済みのpickleファイルがある場合、pickleファイルを読み込む
# train date
train_pickle_filename = re.sub(r'\..*?$', '', args.kifulist_train) + '.pickle'

if os.path.exists(train_pickle_filename):
    with open(train_pickle_filename, 'rb') as f:
        positions_train = pickle.load(f)
    logging.info(train_pickle_filename)
    logging.info('load train pickle')
else:
    positions_train = read_kifu(f'./data/{args.kifulist_train}')

# test data
test_pickle_filename = re.sub(r'\..*?$', '', args.kifulist_test) + '.pickle'
if os.path.exists(test_pickle_filename):
    with open(test_pickle_filename, 'rb') as f:
        positions_test = pickle.load(f)
    logging.info('load test pickle')
else:
    positions_test = read_kifu(f'./data/{args.kifulist_test}')

# 保存済みのpickleがない場合、pickleファイルを保存する
if not os.path.exists(train_pickle_filename):
    with open('./data/new_new.pickle', 'wb') as f:
        pickle.dump(positions_train, f, pickle.HIGHEST_PROTOCOL)
    logging.info('save train pickle')
if not os.path.exists(test_pickle_filename):
    with open(test_pickle_filename, 'wb') as f:
        pickle.dump(positions_test, f, pickle.HIGHEST_PROTOCOL)
    logging.info('save test pickle')
logging.info('read kifu end')

logging.info('train position num = {}'.format(len(positions_train)))
logging.info('test position num = {}'.format(len(positions_test)))

2021/09/28 20:43:32	INFO	read kifu start
2021/09/28 20:43:44	INFO	load test pickle
2021/09/28 20:43:45	INFO	save train pickle
2021/09/28 20:43:45	INFO	read kifu end
2021/09/28 20:43:45	INFO	train position num = 57063
2021/09/28 20:43:45	INFO	test position num = 24


In [8]:
with open('./data//kifu_good_4_train.pickle', 'rb') as f:
    temp1 = pickle.load(f)
with open('./kifu_good_5_train.pickle', 'rb') as f:
    temp2 = pickle.load(f)
temp = temp1+temp2
print(len(temp))
with open('./csa_good.pickle', 'wb') as f:
        pickle.dump(temp, f, pickle.HIGHEST_PROTOCOL)

414509


In [7]:
ls data

[34mcsa[m[m/
[34mcsa_auto2[m[m/
[34mcsa_current[m[m/
kifu_good_5_test.txt
kifu_good_5_train.txt
kifu_good_6_test.txt
kifu_good_6_train.txt
new_new.pickle
[34mpgn[m[m/
[34mpgn_2[m[m/
[34mpgn_3[m[m/
[34mpgns[m[m/
pgnsShokidoki UEC9++TJshogi5x5 0.19.pgn
[34mpickle[m[m/
[34mtemp[m[m/


In [10]:
len(temp2)

158356

In [43]:
device='cpu'
temp2 = mini_batch(temp1[25:27],1,1)
temp2

(tensor([[[[0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [1., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.]],
 
          [[0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 1., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.]],
 
          [[0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.]],
 
          [[0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.]],
 
          [[0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 1., 0.],
           [0., 0., 1., 0., 0.]],
 
          [[0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [1., 0., 0., 0., 

学習

In [None]:
import random
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter()

# train
logging.info('start training')

itr = 0
sum_loss1 = 0
sum_loss2 = 0
sum_loss = 0

for e in range(args.epoch):
    epoch += 1
    itr_eval = 0
    sum_loss1_eval = 0
    sum_loss2_eval = 0
    sum_loss_eval = 0

    positions_train_shuffled = random.sample(positions_train, len(positions_train))
  
    for i in range(0, len(positions_train_shuffled)-args.batchsize, args.batchsize):
        t += 1
        itr += 1
        itr_eval += 1

        x, t1, t2 = mini_batch(positions_train_shuffled, i, args.batchsize)
        model.train()
        y1, y2 = model(x)

        loss1 = cross_entropy_loss(y1, t1)
        loss1 = loss1.mean()
        loss2 = bce_with_logits_loss(y2, t2)
        loss = loss1 + loss2

        model.zero_grad()
        loss1.backward()
        optimizer.step()

        sum_loss1_eval += loss1.item()
        sum_loss2_eval += loss2.item()
        sum_loss_eval += loss.item()

        sum_loss1 += loss1.item()
        sum_loss2 += loss2.item()
        sum_loss += loss.item()

        if t % args.eval_interval == 0:
            with torch.no_grad():
                x, t1, t2 = mini_batch_for_test(positions_test, args.test_batchsize)
                y1, y2 = model(x)

                loss1 = cross_entropy_loss(y1, t1)
                loss1 = loss1.mean()
                loss2 = bce_with_logits_loss(y2, t2)
                loss = loss1 + loss2

                loss, current = loss1.item(), t
                # print(f"loss1: {loss1:>7f}  loss2: {loss2:>7f}  loss: {loss:>7f}  [{current:>5d}]")
                logging.info('epoch = {}, iteration = {}, loss_policy = {}, loss_value = {}, loss = {}, accuracy = {}, {}'.format(
                    epoch, t, sum_loss1/itr, sum_loss2/itr, sum_loss/itr ,accuracy(y1,t1), binary_accuracy(y2,t2)))

                itr = 0
                sum_loss1 = 0
                sum_loss2 = 0
                sum_loss = 0
          
    logging.info('validate test set')
    itr_test = 0
    sum_test_accuracy1 = 0
    sum_test_accuracy2 = 0
    
    with torch.no_grad():
        for i in range(0, len(positions_test)-args.batchsize, args.batchsize):
            x, t1, t2 = mini_batch_for_test(positions_test, args.batchsize)
            y1 = model(x)

            itr_test += 1
            sum_test_accuracy1 += accuracy(y1, t1)
            sum_test_accuracy2 += binary_accuracy(y2, t2)

        logging.info('epoch = {}, iteration = {}, loss_polish = {}, loss_value = {}, loss = {}, accuracy = {}, {}'.format(
            epoch, t, sum_loss1_eval/itr_eval, sum_loss2_eval/itr_eval, sum_loss_eval/itr_eval, sum_test_accuracy1/itr_test, sum_test_accuracy2/itr_test))

        writer.add_scalar('Train_Loss_Policy/Iteration', sum_loss1_eval/itr_eval, t)
        writer.add_scalar('Train_Loss_Value/Iteration', sum_loss2_eval/itr_eval, t)
        writer.add_scalar('Train_Loss/Iteration', sum_loss_eval/itr_eval, t)

        writer.add_scalar('Test_Acc_Policy/Iteration', sum_test_accuracy1/itr_test, t)
        writer.add_scalar('Test_Acc_/Iteration', sum_test_accuracy2/itr_test, t)

    if e % args.save_interval_epoch == 0:
        save_checkpoint()

logging.info('End train')
save_checkpoint()

logging.info('Save model : {}'.format(args.model))
serializers.save_npz(args.model, model)

writer.close()