# mjlog 解析 sketchbook

## 基準

- 配牌
    - P1: hai0="112,124,35,135,10,5,25,85,66,8,43,19,1,33"
    - P2: hai1="46,96,73,53,128,120,98,89,28,49,41,83,72"
- ツモ 
    - P0: <T[0-9]>
    - P1: <U[0-9]> 
    - P2: <V[0-9]>
    - P3: <W[0-9]>
- 打牌 
    - P0: <D[0-9]>
    - P1: <E[0-9]>
    - P2: <F[0-9]>
    - P3: <G[0-9]>

In [10]:
import datetime
import glob
import gzip
import os
import re
import sys
import xml.etree.ElementTree as ET

from bs4 import BeautifulSoup
import numpy as np
import tensorflow as tf
from tqdm.notebook import tqdm

sys.path.append('..')
from src.meld import Meld

### 捨て牌モデル学習用データの作成

カラムは15列(配牌+ツモの14枚 と 切った牌1枚) をone-hotに変換する 34 * 14 → 34になるようにする。

### ToDo

- 牌譜からツモと切る対応を作る
    - 途中の副露に対応する



In [11]:
def parse_mjlog_to_xml(path):
    with gzip.open(path, mode='rt') as fp:
        mjlog_text = fp.read()
    mjlog_xml = ET.fromstring(mjlog_text)

    return mjlog_text, mjlog_xml

def get_discard_other_pai_vector(manage_each_sutehai_list, player_num, pai):
    other_num_list = [1,2,3] if player_num == 0 else [0,2,3] if player_num == 1 else [0,1,3] if player_num == 2 else [0,1,2]
    other_player_discard_vector_list = []

    manage_each_sutehai_list[player_num].append(pai)
    for player in other_num_list:
        # other_player_discard_vector_list.append(
        #     np.identity(34)[manage_each_sutehai_list[player]].tolist() \
        #         + np.zeros((24 - len(manage_each_sutehai_list[player]), 34)).tolist()
        # )

        discard_vector = np.zeros((34, 4))
        for sutehai_num in manage_each_sutehai_list[player]:
            # 34 * 4 のベクトルにフラグを立てていく
            discard_vector[sutehai_num][np.where(discard_vector[sutehai_num] == 0)[0][0]] = 1
        other_player_discard_vector_list.append(discard_vector.T)
    
    return manage_each_sutehai_list, other_player_discard_vector_list

In [12]:
mjlog_path_list = sorted(glob.glob('../../TenhouXMLAnalize/2019/**.xml'))[:1]

In [13]:
# with tf.python_io.TFRecordWriter('player_discard_dataset.tfrecord') as player_discard_dataset:
#     # 下記クラスは一つの、KeyValueを設定
#     features = tf.train.Features(feature={
#         'player_hands' : tf.train.Feature(bytes_list=tf.train.BytesList(value=[sorted(tehai_onehot_vector)])),
#         'enemy_hands' : tf.train.Feature(bytes_list=tf.train.BytesList(value=[other_player_discard_vector_list])),
#         'discard_target' : tf.train.Feature(bytes_list=tf.train.BytesList(value=[np.identity(34)[pai].tolist()]))
#     })
#     example = tf.train.Example(features=features)
#     # 新しいデータが、追加で書かれていく
#     player_discard_dataset.write(example.SerializeToString())

In [14]:
len(mjlog_path_list)

1

In [16]:
# データチェック用
player_discard_train_vector_list = []
other_discard_train_vector_list = []
player_discard_target_vector_list= []

data_size_counter = 0
now = datetime.datetime.now()
now_str = now.strftime('%Y%m%d_%H%M%S')

with tf.io.TFRecordWriter(f'../data/mahjong_discard_dataset_1000games_{now_str}.tfrecord') as player_discard_dataset:
    for mjlog_path in tqdm(mjlog_path_list):
        # .mjlogファイルなら
        # mjlog_text, mjlog_xml = parse_mjlog_to_xml(os.path.join(os.getcwd(), mjlog_path))
        # .xmlなら
        with open(os.path.join(os.getcwd(), mjlog_path), 'r') as mjlog:
            mjlog_text = mjlog.read()
        mjlog_xml = ET.fromstring(mjlog_text) 

        # 1ゲーム
        for n, (mj_xml, mj_text) in enumerate(zip(mjlog_xml.findall('INIT'), re.findall("<INIT [^>]*?>(.+?)(<AGARI [^>]*?|<RYUUKYOKU [^>]*?>)", mjlog_text))):
            manage_each_hands_list = [
                sorted([int(pai_num) // 4 for pai_num in mj_xml.get('hai0').split(',')]),
                sorted([int(pai_num) // 4 for pai_num in mj_xml.get('hai1').split(',')]),
                sorted([int(pai_num) // 4 for pai_num in mj_xml.get('hai2').split(',')]),
                sorted([int(pai_num) // 4 for pai_num in mj_xml.get('hai3').split(',')])
            ]
            manage_each_sutehai_list = [[] for _ in range(4)]
            # 1局
            for move in mj_text[0].split('/>')[:-1]:
                # 牌を捨てるタイミングでデータセットを作成する
                if move[1:4] in ('REA', 'BYE', 'UN ', 'DOR'): # REACH, BYE, UN , DORAを除外
                    continue

                player_num = \
                    0 if move[1] in ('T', 'D') else \
                    1 if move[1] in ('U', 'E') else \
                    2 if move[1] in ('V', 'F') else \
                    3 if move[1] in ('W', 'G') else None 

                # ツモるもしくは捨てる場合は選択牌を管理
                if move[1] != 'N':
                    pai = int(move[2:]) // 4
                
                # ツモる
                if move[1] in ('T', 'U', 'V', 'W'):
                    manage_each_hands_list[player_num].append(pai)
                
                # 捨てる
                if move[1] in ('D', 'E', 'F', 'G'):
                    # onehot表現に変換してtrainリストへ
                    # tehai_onehot_vector = np.identity(34)[manage_each_hands_list[player_num]].tolist()
                    hands_vector = np.zeros((34, 4))
                    for hands_num in manage_each_hands_list[player_num]:
                        # 34 * 4 のベクトルにフラグを立てていく
                        hands_vector[hands_num][np.where(hands_vector[hands_num] == 0)[0][0]] = 1
                    tehai_onehot_vector = hands_vector.T

                    # 副露対応
                    # if len(tehai_onehot_vector) < 14:
                    #     while len(tehai_onehot_vector) < 14:
                    #         tehai_onehot_vector.append(np.zeros(34).tolist())

                    # 他家の捨て牌
                    manage_each_sutehai_list, other_player_discard_vector_list = \
                        get_discard_other_pai_vector(manage_each_sutehai_list, player_num, pai)
                        
                    other_discard_train_vector_list.append(other_player_discard_vector_list)

                    other_discard_train_vector = tf.convert_to_tensor(other_player_discard_vector_list)
                    other_discard_train_vector = tf.io.serialize_tensor(other_discard_train_vector)

                    # 自分の手牌
                    player_discard_train_vector_list.append(tehai_onehot_vector)
                    
                    player_discard_train_vector = tf.convert_to_tensor(tehai_onehot_vector)
                    player_discard_train_vector = tf.io.serialize_tensor(player_discard_train_vector)

                    # 自分の切り出し牌
                    player_discard_target_vector_list.append(np.identity(34)[pai].tolist())

                    player_discard_target_vector = tf.convert_to_tensor(np.identity(34)[pai].tolist())
                    player_discard_target_vector = tf.io.serialize_tensor(player_discard_target_vector)

                     # 下記クラスは一つの、KeyValueを設定
                    features = tf.train.Features(feature={
                        'player_hands' : tf.train.Feature(bytes_list=tf.train.BytesList(value=[player_discard_train_vector.numpy()])),
                        'enemy_discards' : tf.train.Feature(bytes_list=tf.train.BytesList(value=[other_discard_train_vector.numpy()])),
                        'player_discard' : tf.train.Feature(bytes_list=tf.train.BytesList(value=[player_discard_target_vector.numpy()]))
                    })

                    example = tf.train.Example(features=features)
                    # 新しいデータが、追加で書かれていく
                    player_discard_dataset.write(example.SerializeToString())

                    # 手牌操作
                    manage_each_hands_list[player_num].remove(pai)

                    # データサイズカウント
                    data_size_counter = data_size_counter + 1

                # 鳴く
                if move[1] == 'N':
                    meld_info = move.split(' ')
                    who = int(re.sub("\\D", "", meld_info[1]))
                    meld_num = int(re.sub("\\D", "", meld_info[2]))
                    meld_info_list = Meld(meld_num).getIndex()

                    if meld_info_list in ('ポン', 'チー'):
                        manage_each_hands_list[who].remove(meld_info_list[2])
                        manage_each_hands_list[who].remove(meld_info_list[3])
                    elif meld_info_list == '加カン':
                        manage_each_hands_list[who].remove(meld_info_list[4])
                    elif meld_info_list == '明カン':
                        manage_each_hands_list[who].remove(meld_info_list[2])
                        manage_each_hands_list[who].remove(meld_info_list[2])
                        manage_each_hands_list[who].remove(meld_info_list[2])
                    elif meld_info_list == '暗カン':
                        manage_each_hands_list[who].remove(meld_info_list[2])
                        manage_each_hands_list[who].remove(meld_info_list[2])
                        manage_each_hands_list[who].remove(meld_info_list[2])
                        manage_each_hands_list[who].remove(meld_info_list[2])

print(data_size_counter)

  0%|          | 0/1 [00:00<?, ?it/s]

269


In [19]:
player_discard_train_vector_list[0].T

array([[0., 0., 0., 0.],
       [1., 1., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 1., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 1., 1., 0.]])

In [20]:
player_discard_target_vector_list[0] #  29

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]