In [1]:
# Main code
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import re

def parse_order_info(order_info, player_name):
    order_list = re.findall(r"\[(.*?)\]", order_info)[0].split(", ")
    position = 1
    for i in range(len(order_list)):
        if player_name in order_list[i]:
            score = re.findall(r"\((.*?)\)", order_list[i])[0]
            position = i + 1
            break
    return score, position



def parse_gamelog(log_str):
    games = re.findall(r"Game started.*?Final results.*?\n", log_str, re.DOTALL)

    games_data = []
    hands_data = []

    for game_index, game in enumerate(games):
        try:
            player_name = re.findall(r"\[(.*?)\ ", re.findall(r"Players.*?\n", game)[0])[0]

            hands = re.findall(r"Round:.*?Cowboy: Score.*?\n", game, re.DOTALL)

            
            final_info = re.findall(r"Final results:.*?\n", game)[0]
            south_info = re.findall(r"Round: 4.*?]", game, re.DOTALL)
            if south_info:
                south_info = south_info[0]
            else:
                # when the game have no South rounds
                south_info = final_info

            south_score, south_position = parse_order_info(south_info, player_name)
            final_score, final_position = parse_order_info(final_info, player_name)

            games_data.append([game_index, south_score, final_score, south_position, final_position])

            for hand in hands:
                result = re.findall(r"Cowboy: Score.*?\n", hand)[0]
                score, score_diff, play_state, state_index = re.findall(r"Score: (.*?) Score difference: (.*?) Play State: (.*?) Latest change index: (.*?)\n", result)[0]

                meld_times = len(re.findall(r"With hand:", hand))
                reached = bool(re.findall(r"Go for it!", hand))

                is_dealer = player_name == re.findall(r"Dealer: (.*) ", hand)[0]

                hands_data.append([score, score_diff, play_state, state_index, meld_times, reached, is_dealer, game_index, final_position])
        
        except Exception as e:
            print(e)
    
    games_col = ['game_index',
     'south_score',
     'final_score',
     'south_position',
     'final_position']
    hands_col = ['score',
     'score_diff',
     'play_state',
     'state_index',
     'meld_times',
     'reached',
     'is_dealer',
     'game_index',
     'final_position',
    ]
    games_df = pd.DataFrame(data=np.array(games_data), columns=games_col)
    hands_df = pd.DataFrame(data=np.array(hands_data), columns=hands_col)
    return games_df, hands_df

In [18]:
# exec
log_files = ["gamelog.txt", "gamelog1.txt", "gamelog2.txt"] # change this
games_df_list = []
hands_df_list = []

for filename in log_files:
    with open("../"+filename) as f:
        log_str = f.read()
        games_df, hands_df = parse_gamelog(log_str)
        games_df_list.append(games_df)
        hands_df_list.append(hands_df)
        
# Change file name here
games_df_filename = "games2"
hands_df_filename = "hands2"

pd.concat(games_df_list).to_csv(games_df_filename + ".csv")
pd.concat(hands_df_list).to_csv(hands_df_filename + ".csv")

## Develop

In [2]:
origin_str = ""
with open("../gamelog.txt") as f:
    origin_str = f.read()

## Get 1 game

In [10]:
test_str = origin_str[:50000]

In [17]:
re.findall(r"Final result.*\n", test_str)

['Final results: [ラークマイルド (52,700) 63.0, xelnaga (34,300) 14.0, 名字和JJ一樣長 (13,000) -27.0, きゅんさん (0) -50.0]\n',
 'Final results: [ぽんこっこ (38,000) 48.0, 「のばら」２ (27,300) 7.0, LAS (19,200) -21.0, 名字和JJ一樣長 (15,500) -34.0]\n',
 'Final results: [342g (55,300) 65.0, サムエル慶次 (26,700) 7.0, 名字和JJ一樣長 (22,000) -18.0, JamesROF (-4,000) -54.0]\n']

In [40]:
print(origin_str[:500])

nohup: ignoring input
2019-02-06 15:48:10 INFO: AI: 0.3.2, first_version
2019-02-06 15:48:10 INFO: Successfully authenticated
2019-02-06 15:48:10 INFO: Player has 二段 rank and 1474.78 rate
2019-02-06 15:48:10 INFO: Looking for the game...
2019-02-06 15:48:11 INFO: Game settings:
2019-02-06 15:48:11 INFO: Aka dora: True
2019-02-06 15:48:11 INFO: Open tanyao: True
2019-02-06 15:48:11 INFO: Game type: hanchan
2019-02-06 15:48:11 INFO: Game started
2019-02-06 15:48:11 INFO: Log: http://tenhou.net/0/?


In [64]:
games = re.findall(r"Game started.*?Final results.*?\n", test_str, re.DOTALL)
len(games)

3

In [65]:
test_game = games[0]

In [71]:
print(test_game)

Game started
2019-02-06 15:48:11 INFO: Log: http://tenhou.net/0/?log=2019020700gm-0089-0000-1a7bf310&tw=3
2019-02-06 15:48:11 INFO: Players: [名字和JJ一樣長 (二段), ラークマイルド (四段), xelnaga (初段), きゅんさん (三段)]
2019-02-06 15:48:15 INFO: Cowboy: Score: 25000 Score difference: 0 Play State: PREPARING Latest change index: 0
2019-02-06 15:48:15 INFO: Round: 0, Honba: 0, Dora Indicators: 6p
2019-02-06 15:48:15 INFO: Players: [ラークマイルド (25,000), xelnaga (25,000), きゅんさん (25,000), 名字和JJ一樣長 (25,000)]
2019-02-06 15:48:15 INFO: Dealer: ラークマイルド (25,000)
2019-02-06 15:48:15 INFO: Round  wind: East
2019-02-06 15:48:15 INFO: Player wind: North
2019-02-06 15:48:29 INFO: Shanten: 4
2019-02-06 15:48:50 INFO: Shanten: 3
2019-02-06 15:48:58 INFO: Shanten: 2
2019-02-06 15:49:12 INFO: Meld: Type: chi, Tiles: 456m [14, 18, 20] by 0
2019-02-06 15:49:12 INFO: With hand: 4567m6p378s22266z + 6m
2019-02-06 15:49:12 INFO: Discard tile after called meld: 3s
2019-02-06 15:49:42 INFO: Meld: Type: chi, Tiles: 678m [23, 25, 29] by 0


In [76]:
test_hands = re.findall(r"Round:.*?Cowboy: Score.*?\n", test_game, re.DOTALL)
len(test_hands)

9

In [77]:
test_hand = test_hands[0]

In [78]:
print(test_hand)

Round: 0, Honba: 0, Dora Indicators: 6p
2019-02-06 15:48:15 INFO: Players: [ラークマイルド (25,000), xelnaga (25,000), きゅんさん (25,000), 名字和JJ一樣長 (25,000)]
2019-02-06 15:48:15 INFO: Dealer: ラークマイルド (25,000)
2019-02-06 15:48:15 INFO: Round  wind: East
2019-02-06 15:48:15 INFO: Player wind: North
2019-02-06 15:48:29 INFO: Shanten: 4
2019-02-06 15:48:50 INFO: Shanten: 3
2019-02-06 15:48:58 INFO: Shanten: 2
2019-02-06 15:49:12 INFO: Meld: Type: chi, Tiles: 456m [14, 18, 20] by 0
2019-02-06 15:49:12 INFO: With hand: 4567m6p378s22266z + 6m
2019-02-06 15:49:12 INFO: Discard tile after called meld: 3s
2019-02-06 15:49:42 INFO: Meld: Type: chi, Tiles: 678m [23, 25, 29] by 0
2019-02-06 15:49:42 INFO: With hand: 677m78s22266z + 8m [456m]
2019-02-06 15:49:42 INFO: Discard tile after called meld: 7m
2019-02-06 15:49:50 INFO: Set the player's state from PREPARING to PROACTIVE_GOODSHAPE
2019-02-06 15:49:50 INFO: Hand: 2p78s22266z
2019-02-06 15:49:50 INFO: Outs: 69s 5
2019-02-06 15:50:08 INFO: Shanten: -1
2019

In [94]:
test_result = re.findall(r"Cowboy: Score.*?\n", test_hand)[0]

In [95]:
test_result

'Cowboy: Score: 26100 Score difference: 1100 Play State: PROACTIVE_GOODSHAPE Latest change index: 8\n'

In [101]:
meld_times = len(re.findall(r"With hand:", test_hand))
meld_times

3

In [106]:
reached = bool(re.findall(r"Go for it!", test_hand))
reached

False

In [112]:
south_info = re.findall(r"Round: 4.*?]", test_game, re.DOTALL)[0]

In [113]:
final_info = re.findall(r"Final results:.*?\n", test_game)[0]

In [114]:
order_info = re.findall(r"\[.*?\]", south_info)[0]

In [118]:
order_info[1:-1].split(", ")

['ラークマイルド (33,300)', '名字和JJ一樣長 (26,100)', 'xelnaga (24,700)', 'きゅんさん (15,900)']

In [136]:
player_name = re.findall(r"\[.*?\(", re.findall(r"Players.*?\n", test_game)[0])[0][1:-2]

In [143]:
position = 1
order_list = order_info.split(", ")
for i in range(len(order_list)):
    if player_name in order_list[i]:
        position = i + 1
        break

In [144]:
position

2

In [184]:
def parse_order_info(order_info, player_name):
    order_list = re.findall(r"\[(.*?)\]", order_info)[0].split(", ")
    position = 1
    for i in range(len(order_list)):
        if player_name in order_list[i]:
            score = re.findall(r"\((.*?)\)", order_list[i])[0]
            position = i + 1
            break
    return score, position

In [185]:
parse_order_info(final_info, player_name)

('13,000', 3)

In [174]:
re.findall(r"Dealer: (.*) ", test_hand)

['ラークマイルド']

In [175]:
is_dealer = player_name == re.findall(r"Dealer: (.*) ", test_hand)

In [161]:
test_result

'Cowboy: Score: 26100 Score difference: 1100 Play State: PROACTIVE_GOODSHAPE Latest change index: 8\n'

In [170]:
re.findall(r"Score: (.*?) Score difference: (.*?) Play State: (.*?) Latest change index: (.*?)\n", test_result)

[('26100', '1100', 'PROACTIVE_GOODSHAPE', '8')]

In [171]:
score, score_diff, play_state, state_index = re.findall(r"Score: (.*?) Score difference: (.*?) Play State: (.*?) Latest change index: (.*?)\n", test_result)[0]

In [182]:
player_name = re.findall(r"\[(.*?)\ ", re.findall(r"Players.*?\n", test_game)[0])[0]

In [196]:
games = re.findall(r"Game started.*?Final results.*?\n", test_str, re.DOTALL)

games_data = []
hands_data = []

for game_index, game in enumerate(games):
    player_name = re.findall(r"\[(.*?)\ ", re.findall(r"Players.*?\n", game)[0])[0]
    
    hands = re.findall(r"Round:.*?Cowboy: Score.*?\n", test_game, re.DOTALL)
    
    south_info = re.findall(r"Round: 4.*?]", game, re.DOTALL)[0]
    final_info = re.findall(r"Final results:.*?\n", game)[0]
    south_score, south_position = parse_order_info(south_info, player_name)
    final_score, final_position = parse_order_info(final_info, player_name)
    
    games_data.append([game_index, south_score, final_score, south_position, final_position])
    
    for hand in hands:
        result = re.findall(r"Cowboy: Score.*?\n", hand)[0]
        score, score_diff, play_state, state_index = re.findall(r"Score: (.*?) Score difference: (.*?) Play State: (.*?) Latest change index: (.*?)\n", result)[0]
        
        meld_times = len(re.findall(r"With hand:", hand))
        reached = bool(re.findall(r"Go for it!", hand))
        
        is_dealer = player_name == re.findall(r"Dealer: (.*) ", hand)[0]
        
        hands_data.append([score, score_diff, play_state, state_index, meld_times, reached, is_dealer, game_index])

In [197]:
games_col = ['game_index',
 'south_score',
 'final_score',
 'south_position',
 'final_position']

In [198]:
hands_col = ['score',
 'score_diff',
 'play_state',
 'state_index',
 'meld_times',
 'reached',
 'is_dealer',
 'game_index']

In [193]:
games_df = pd.DataFrame(data=np.array(games_data), columns=games_col)

In [194]:
games_df

Unnamed: 0,game_index,south_score,final_score,south_position,final_position
0,0,26100,13000,2,3
1,1,700,15500,4,4
2,2,25000,22000,3,3


In [199]:
def parse_order_info(order_info, player_name):
    order_list = re.findall(r"\[(.*?)\]", order_info)[0].split(", ")
    position = 1
    for i in range(len(order_list)):
        if player_name in order_list[i]:
            score = re.findall(r"\((.*?)\)", order_list[i])[0]
            position = i + 1
            break
    return score, positionhands_df = pd.DataFrame(data=np.array(hands_data), columns=hands_col)
hands_df

Unnamed: 0,score,score_diff,play_state,state_index,meld_times,reached,is_dealer,game_index
0,26100.0,1100.0,PROACTIVE_GOODSHAPE,8,3,False,False,0
1,26100.0,0.0,DEFENCE,3,0,False,False,0
2,26100.0,0.0,DEFENCE,4,0,False,False,0
3,26100.0,0.0,DEFENCE,4,1,False,True,0
4,24100.0,-2000.0,DEFENCE,12,0,False,False,0
5,24100.0,0.0,DEFENCE,4,0,False,False,0
6,21000.0,-3100.0,DEFENCE,10,1,False,False,0
7,19000.0,-2000.0,PROACTIVE_BADSHAPE,8,1,False,False,0
8,13000.0,-6000.0,PREPARING,0,1,False,True,0
9,26100.0,1100.0,PROACTIVE_GOODSHAPE,8,3,False,False,1


## Functions

In [4]:
def parse_order_info(order_info, player_name):
    order_list = re.findall(r"\[(.*?)\]", order_info)[0].split(", ")
    position = 1
    for i in range(len(order_list)):
        if player_name in order_list[i]:
            score = re.findall(r"\((.*?)\)", order_list[i])[0]
            position = i + 1
            break
    return score, position



def parse_gamelog(log_str):
    games = re.findall(r"Game started.*?Final results.*?\n", log_str, re.DOTALL)

    games_data = []
    hands_data = []

    for game_index, game in enumerate(games):
        try:
            player_name = re.findall(r"\[(.*?)\ ", re.findall(r"Players.*?\n", game)[0])[0]

            hands = re.findall(r"Round:.*?Cowboy: Score.*?\n", game, re.DOTALL)

            
            final_info = re.findall(r"Final results:.*?\n", game)[0]
            south_info = re.findall(r"Round: 4.*?]", game, re.DOTALL)
            if south_info:
                south_info = south_info[0]
            else:
                # when the game have no South rounds
                south_info = final_info

            south_score, south_position = parse_order_info(south_info, player_name)
            final_score, final_position = parse_order_info(final_info, player_name)

            games_data.append([game_index, south_score, final_score, south_position, final_position])

            for hand in hands:
                result = re.findall(r"Cowboy: Score.*?\n", hand)[0]
                score, score_diff, play_state, state_index = re.findall(r"Score: (.*?) Score difference: (.*?) Play State: (.*?) Latest change index: (.*?)\n", result)[0]

                meld_times = len(re.findall(r"With hand:", hand))
                reached = bool(re.findall(r"Go for it!", hand))

                is_dealer = player_name == re.findall(r"Dealer: (.*) ", hand)[0]

                hands_data.append([score, score_diff, play_state, state_index, meld_times, reached, is_dealer, game_index, final_position])
        
        except Exception as e:
            print(e)
    
    games_col = ['game_index',
     'south_score',
     'final_score',
     'south_position',
     'final_position']
    hands_col = ['score',
     'score_diff',
     'play_state',
     'state_index',
     'meld_times',
     'reached',
     'is_dealer',
     'game_index',
     'final_position',
    ]
    games_df = pd.DataFrame(data=np.array(games_data), columns=games_col)
    hands_df = pd.DataFrame(data=np.array(hands_data), columns=hands_col)
    return games_df, hands_df

In [5]:
games_df, hands_df = parse_gamelog(origin_str)

In [6]:
gamelog2 = ""
with open("../gamelog2.txt") as f:
    gamelog2 = f.read()

In [7]:
games_df2, hands_df2 = parse_gamelog(gamelog2)

In [8]:
pd.concat([games_df, games_df2]).to_csv("games1.csv")

In [9]:
pd.concat([hands_df, hands_df2]).to_csv("hands1.csv")

In [10]:
hands_df

Unnamed: 0,score,score_diff,play_state,state_index,meld_times,reached,is_dealer,game_index,final_position
0,26100,1100,PROACTIVE_GOODSHAPE,8,3,False,False,0,3
1,26100,0,DEFENCE,3,0,False,False,0,3
2,26100,0,DEFENCE,4,0,False,False,0,3
3,26100,0,DEFENCE,4,1,False,True,0,3
4,24100,-2000,DEFENCE,12,0,False,False,0,3
5,24100,0,DEFENCE,4,0,False,False,0,3
6,21000,-3100,DEFENCE,10,1,False,False,0,3
7,19000,-2000,PROACTIVE_BADSHAPE,8,1,False,False,0,3
8,13000.0,-6000.0,PREPARING,0,1,False,True,0,3
9,25000,0,PREPARING,0,0,False,True,1,4


In [14]:
for i in re.findall(r"(http.*)\n", origin_str):
    print(i)

http://tenhou.net/0/?log=2019020700gm-0089-0000-1a7bf310&tw=3
http://tenhou.net/0/?log=2019020701gm-0089-0000-e0fa7774&tw=0
http://tenhou.net/0/?log=2019020701gm-0089-0000-f9c7cdc1&tw=2
http://tenhou.net/0/?log=2019020702gm-0089-0000-e2f3301d&tw=1
http://tenhou.net/0/?log=2019020702gm-0089-0000-6712678b&tw=3
http://tenhou.net/0/?log=2019020703gm-0089-0000-0ddb358f&tw=0
http://tenhou.net/0/?log=2019020703gm-0089-0000-08012e66&tw=3
http://tenhou.net/0/?log=2019020704gm-0089-0000-6bde1bb2&tw=3
http://tenhou.net/0/?log=2019020704gm-0089-0000-468c3aa9&tw=1
http://tenhou.net/0/?log=2019020705gm-0089-0000-7da238fe&tw=3
http://tenhou.net/0/?log=2019020705gm-0089-0000-4e960f42&tw=2
http://tenhou.net/0/?log=2019020706gm-0089-0000-530f6c59&tw=1
http://tenhou.net/0/?log=2019020706gm-0089-0000-176939ed&tw=1
http://tenhou.net/0/?log=2019020706gm-0089-0000-79253a84&tw=3
http://tenhou.net/0/?log=2019020707gm-0089-0000-2f7a0fcf&tw=3
http://tenhou.net/0/?log=2019020707gm-0089-0000-169571a3&tw=1
http://t