In [1]:
import sys
import os
import re
import pandas as pd

current_directory = os.getcwd()
logfolder_path = os.path.join(current_directory, 'Cashgame')
log_files = os.listdir(logfolder_path)
log_files = [file for file in log_files if file.endswith('.txt')]

In [2]:
#Variables

#count variables
game = 0
row_number = 0
hand = 0
won=0

#state variables
start_hand = 0
preflop = 0
flop = 0
turn = 0
river = 0
showdown = 0

#player variables
dealer_position = 0
seats = {}
cards = []
player_name_post_blind = ""
blind = 0
action = []
flopcards = []
turncard = []
rivercard = []

#action variables
preflop_fold_dict = {}
preflop_bet_dict = {}
preflop_check_dict = {}
flop_fold_dict = {}
flop_bet_dict = {}
flop_check_dict = {}
turn_fold_dict = {}
turn_bet_dict = {}
turn_check_dict = {}
river_fold_dict = {}
river_bet_dict = {}
river_check_dict = {}

#Room variables

room_to_value = {
    "Halley": 0.02,
    "Donati": 0.05,
    "Aludra": 0.10,
    "Hydra": 0.25,
    "Baade": 0.50,
    "Triangulum": 1.00,}   

pokerdata = pd.DataFrame(columns=['file','room','blinds','game','hand','won','firstcard','secondcard', 'flop1', 'flop2', 'flop3', 'turn', 'river','dealer', 'player0_seat', 'player0_chips','player1_seat', 'player1_chips','player2_seat', 'player2_chips','player3_seat', 'player3_chips','player4_seat', 'player4_chips','player5_seat', 'player5_chips','player6_seat', 'player6_chips', 'player0_preflop_fold', 'player0_preflop_bet', 'player0_preflop_check','player0_flop_fold', 'player0_flop_bet', 'player0_flop_check','player0_turn_fold', 'player0_turn_bet', 'player0_turn_check','player0_river_fold', 'player0_river_bet', 'player0_river_check'])

In [3]:
#re patterns
pattern_start = r"PokerStars"
pattern_preflop = r"HOLE CARDS"
pattern_flop = r"FLOP"
pattern_turn = r"TURN"
pattern_river = r"RIVER"
pattern_showdown = r"SHOW DOWN"
pattern_dealer_position = r"Seat #(\d+)" 
pattern_player_information = r"Seat (\d+): ([\w.@$*'!&?()+|<>#%&=,/:;{}~\"\-\u0080-\uFFFF\s]+) \((\d+) in chips\)"
pattern_player_information_cashgame = r"Seat (\d+): ([\w.@$*'!&?()+%&/|<>#,=:;{}~\"\-\u0080-\uFFFF\s]+) \(\$(\d+(?:\.\d{1,2})?) in chips\)"
pattern_post_blinds = r"([\w.@$*'!&?()+|<>#%&=,/:;{}~\"\-\u0080-\uFFFF\s]+): posts .* \$(\d+\.\d{2})"
pattern_handcards = r"Dealt to \S+ \[([A2-9TJQKA][cdhs])( [A2-9TJQKA][cdhs])*\]"
pattern_flop_cards = r"\[([A2-9TJQKA][cdhs]) ([A2-9TJQKA][cdhs]) ([A2-9TJQKA][cdhs])\]"
pattern_turn_card = r"\[([A2-9TJQKA][cdhs])\]"
pattern_river_card = r"\[([A2-9TJQKA][cdhs])\]"
pattern_summary = r"SUMMARY"

In [4]:
# Function to handle extraction of bets, folds, raises, checks, and calls
def extract_action(line):
    # Regex patterns for each action
    fold_pattern = r"([\w.@$*'!&?()+|<>#%&=,/:;{}~\"\-\u0080-\uFFFF\s]+): folds" 
    bet_pattern = r"([\w.@$*'!&?()+|<>#%&=,/:;{}~\"\-\u0080-\uFFFF\s]+): bets \$(\d+\.\d{1,2})"
    raise_pattern = r"([\w.@$*'!&?()+|<>#%&=,/:;{}~\"\-\u0080-\uFFFF\s]+): raises \$(\d+\.\d{1,2}) to \$(\d+\.\d{1,2})"
    check_pattern = r"([\w.@$*'!&?()+|<>#%&=,/:;{}~\"\-\u0080-\uFFFF\s]+): checks"
    call_pattern = r"([\w.@$*'!&?()+|<>#%&=,/:;{}~\"\-\u0080-\uFFFF\s]+): calls \$(\d+\.\d{1,2})"  # New pattern for calls
    collection_pattern = r"([\w.@$*'!&?()+|<>#%&=,/:;{}~\"\-\u0080-\uFFFF\s]+) collected \$(\d+\.\d{1,2}) from pot"

    # Check for fold action
    fold_match = re.search(fold_pattern, line,  re.UNICODE)
    if fold_match:
        player_name = fold_match.group(1)
        return [player_name, 'fold' , 0]

    # Check for bet action
    bet_match = re.search(bet_pattern, line,  re.UNICODE)
    if bet_match:
        player_name = bet_match.group(1)
        bet = float(bet_match.group(2))
        return [player_name, 'bet', bet]

    # Check for raise action
    raise_match = re.search(raise_pattern, line,  re.UNICODE)
    if raise_match:
        player_name = raise_match.group(1)
        bet = float(raise_match.group(3))  # This is the second amount (raise to)
        return [player_name, 'raise', bet]

    # Check for check action
    check_match = re.search(check_pattern, line,  re.UNICODE)
    if check_match:
        player_name = check_match.group(1)
        return [player_name, 'check', 0]

    # Check for call action
    call_match = re.search(call_pattern, line,  re.UNICODE)
    if call_match:
        player_name = call_match.group(1)
        bet = call_match.group(2)
        return [player_name, 'bet', bet]
    
    # Check for collection action
    collection_match = re.search(collection_pattern, line,  re.UNICODE)
    if collection_match:
        player_name = collection_match.group(1)
        collected_amount = float(collection_match.group(2))
        return [player_name, 'collect', collected_amount]
    
    return "No action found"   


In [5]:
#function to extract player actions

def update_player_state(action_dict, seats_dict, action_key, data_key=None, value_if_present=1):
    for pid, action in action_dict.items():
        seat = int(seats_dict[pid]['seat'])
        if 1 <= seat <= 6:
            value = action[data_key] if data_key else value_if_present
            player_state[seat][action_key] = value

In [6]:
for txt_file in log_files:
    file_path = os.path.join(logfolder_path, txt_file)
    game += 1
    hand = 0

    match_room = re.search(r'^HH\d+\s+(\w+)', txt_file)

    if match_room:
        room = match_room.group(1)
        room_blind = float(room_to_value.get(room))

    with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
        for line in file:
            row_number += 1 
            has_content = bool(line.strip())  

            contains_start = bool(re.search(pattern_start, line))
            contains_holed_cards = bool(re.search(pattern_preflop, line))
            contains_flop = bool(re.search(pattern_flop, line))
            contains_turn = bool(re.search(pattern_turn, line))
            contains_river = bool(re.search(pattern_river, line))
            contains_showdown = bool(re.search(pattern_showdown, line))
            contains_summary = bool(re.search(pattern_summary, line))

            match = re.search(pattern_dealer_position, line,  re.UNICODE)
        
            #Adjust pattern for Spin and Go here
            match_player = re.search(pattern_player_information_cashgame, line, re.UNICODE)
            match_post_blinds = re.search(pattern_post_blinds, line,  re.UNICODE)
            match_handcards = re.search(pattern_handcards, line, re.UNICODE)

            
            #start of a hand; first set state variables
            if contains_start == True:
                start_hand = 1
                preflop = 0
                flop = 0
                turn = 0
                river = 0
                showdown = 0
            
            if contains_holed_cards == True:
                start_hand = 0
                preflop = 1
                flop = 0
                turn = 0
                river = 0
                showdown = 0

            if contains_flop == True:
                start_hand = 0
                preflop = 0
                flop = 1
                turn = 0
                river = 0
                showdown = 0
                contains_flop_cards = re.search(pattern_flop_cards, line, re.UNICODE)
                flopcards = [contains_flop_cards.group(1), contains_flop_cards.group(2), contains_flop_cards.group(3)]

            if contains_turn == True:
                start_hand = 0
                preflop = 0
                flop = 0
                turn = 1
                river = 0
                showdown = 0
                contains_turn_card = re.search(pattern_turn_card, line, re.UNICODE)
                turncard = contains_turn_card.group(1)

            if contains_river == True:
                start_hand = 0
                preflop = 0
                flop = 0
                turn = 0
                river = 1
                showdown = 0
                contains_river_card = re.search(pattern_river_card, line, re.UNICODE)
                rivercard = contains_river_card.group(1)

            if contains_showdown == True:
                start_hand = 0
                preflop = 0
                flop = 0
                turn = 0
                river = 0
                showdown = 1

            if has_content == False:
                row_number = 0
                start_hand = 0
                preflop = 0
                flop = 0
                turn = 0
                river = 0
                showdown = 0
                seats = {}
                cards = []
                flopcards = []
                turncard = []
                rivercard = []
                preflop_fold_dict = {}
                preflop_bet_dict = {}
                preflop_check_dict = {}
                flop_fold_dict = {}
                flop_bet_dict = {}
                flop_check_dict = {}
                turn_fold_dict = {}
                turn_bet_dict = {}
                turn_check_dict = {}
                river_fold_dict = {}
                river_bet_dict = {}
                river_check_dict = {}
                won = 0

            if row_number == 1:
                hand += 1

            # Extract player information
            if start_hand == 1 and match:
                dealer_position = match.group(1)
                

            if start_hand == 1 and match_player:
                seat_number = match_player.group(1)
                player_name = match_player.group(2)
                chips = match_player.group(3)

                # Assign the extracted details to the seat
                seats[player_name] = {'seat': seat_number, 'chips': chips}  

            if start_hand == 1 and match_post_blinds:
                player_name_post_blind = match_post_blinds.group(1)
                blind = float(match_post_blinds.group(2))
                seats[player_name_post_blind]['chips'] =  str(round(float(seats[player_name_post_blind]['chips']) - blind, 2))

            else:
                player_name_post_blind = ""
                blind = 0
        
            #Extract Preflop infromation

            if preflop == 1 and match_handcards:
                cards = match_handcards.group(0)[match_handcards.group(0).find('[')+1:match_handcards.group(0).find(']')].split()

            if preflop == 1:
                action = extract_action(line)
                if action[0] != 'N':
                    if action[1] == 'fold':
                        preflop_fold_dict[action[0]] = {"position": seats[action[0]]['seat'], "fold": 1}
                    elif action[1] == 'bet' or action[1] == 'raise':
                        if action[0] in preflop_bet_dict:
                            preflop_bet_dict[action[0]]["bet"] += float(action[2])
                        else:  
                            preflop_bet_dict[action[0]] = {"position": seats[action[0]]['seat'], "bet": float(action[2])}
                    elif action[1] == 'check':
                        preflop_check_dict[action[0]] = {"position": seats[action[0]]['seat'], "check": 1}
                    elif action[1] == 'collect' and action[0] == 'Mahu2021':
                        won = 1

            #Extract Flop information

            elif flop == 1:
                action = extract_action(line)
                if action[0] != 'N':
                    if action[1] == 'fold':
                        flop_fold_dict[action[0]] = {"position": seats[action[0]]['seat'], "fold": 1}
                    elif action[1] == 'bet' or action[1] == 'raise':
                        if action[0] in flop_bet_dict:
                            flop_bet_dict[action[0]]["bet"] += float(action[2])
                        else:  
                            flop_bet_dict[action[0]] = {"position": seats[action[0]]['seat'], "bet": float(action[2])}
                    elif action[1] == 'check':
                        flop_check_dict[action[0]] = {"position": seats[action[0]]['seat'], "check": 1}
                    elif action[1] == 'collect' and action[0] == 'Mahu2021':
                        won = 1

            #Extract Turn information

            elif turn == 1:
                action = extract_action(line)
                if action[0] != 'N':
                    if action[1] == 'fold':
                        turn_fold_dict[action[0]] = {"position": seats[action[0]]['seat'], "fold": 1}
                    elif action[1] == 'bet' or action[1] == 'raise':
                        if action[0] in turn_bet_dict:
                            turn_bet_dict[action[0]]["bet"] += float(action[2])
                        else:  
                            turn_bet_dict[action[0]] = {"position": seats[action[0]]['seat'], "bet": float(action[2])}
                    elif action[1] == 'check':
                        turn_check_dict[action[0]] = {"position": seats[action[0]]['seat'], "check": 1}
                    elif action[1] == 'collect' and action[0] == 'Mahu2021':
                        won = 1

            #Extract River information

            elif river == 1:
                action = extract_action(line)
                if action[0] != 'N':
                    if action[1] == 'fold':
                        river_fold_dict[action[0]] = {"position": seats[action[0]]['seat'], "fold": 1}
                    elif action[1] == 'bet' or action[1] == 'raise':
                        if action[0] in river_bet_dict:
                            river_bet_dict[action[0]]["bet"] += float(action[2])
                        else:  
                            river_bet_dict[action[0]] = {"position": seats[action[0]]['seat'], "bet": float(action[2])}
                    elif action[1] == 'check':
                        river_check_dict[action[0]] = {"position": seats[action[0]]['seat'], "check": 1}
                    elif action[1] == 'collect' and action[0] == 'Mahu2021':
                        won = 1
            
            #Extract Showdown information
            
            elif showdown == 1:
                action = extract_action(line)
                if action[0] != 'N':
                    if action[1] == 'collect' and action[0] == 'Mahu2021':
                        won = 1

            else:
                action = []
            
            if contains_summary == True:
                if len(flopcards) == 0:
                    flopcards = ["", "", ""]
                if len(turncard) == 0:
                    turncard = ""
                if len(rivercard) == 0:
                    rivercard = ""
                keys = list(seats.keys())
                values = list(seats.values())
                if len(keys) == 6:
                    player1_name, player1_seat, player1_chips = keys[0], values[0]['seat'], values[0]['chips']
                    player2_name, player2_seat, player2_chips = keys[1], values[1]['seat'], values[1]['chips']
                    player3_name, player3_seat, player3_chips = keys[2], values[2]['seat'], values[2]['chips']
                    player4_name, player4_seat, player4_chips = keys[3], values[3]['seat'], values[3]['chips']
                    player5_name, player5_seat, player5_chips = keys[4], values[4]['seat'], values[4]['chips']
                    player6_name, player6_seat, player6_chips = keys[5], values[5]['seat'], values[5]['chips']

                #Player0 gameplay extraction for dataframe

                #Preflop
                if "Mahu2021" in preflop_fold_dict:
                    player0_preflop_fold = 1
                else:
                    player0_preflop_fold = 0

                if "Mahu2021" in preflop_bet_dict:
                    player0_preflop_bet = preflop_bet_dict["Mahu2021"]["bet"]
                else:
                    player0_preflop_bet = 0

                if "Mahu2021" in preflop_check_dict:
                    player0_preflop_check = 1
                else:
                    player0_preflop_check = 0

                #Flop
                if "Mahu2021" in flop_fold_dict:
                    player0_flop_fold = 1
                else:
                    player0_flop_fold = 0

                if "Mahu2021" in flop_bet_dict:
                    player0_flop_bet = flop_bet_dict["Mahu2021"]["bet"]
                else:
                    player0_flop_bet = 0

                if "Mahu2021" in flop_check_dict:
                    player0_flop_check = 1
                else:
                    player0_flop_check = 0

                #Turn
                if "Mahu2021" in turn_fold_dict:
                    player0_turn_fold = 1
                else:
                    player0_turn_fold = 0

                if "Mahu2021" in turn_bet_dict:
                    player0_turn_bet = turn_bet_dict["Mahu2021"]["bet"]
                else:
                    player0_turn_bet = 0

                if "Mahu2021" in turn_check_dict:
                    player0_turn_check = 1
                else:
                    player0_turn_check = 0

                #River
                if "Mahu2021" in river_fold_dict:
                    player0_river_fold = 1
                else:
                    player0_river_fold = 0

                if "Mahu2021" in river_bet_dict:
                    player0_river_bet = river_bet_dict["Mahu2021"]["bet"]
                else:
                    player0_river_bet = 0

                if "Mahu2021" in river_check_dict:
                    player0_river_check = 1
                else:
                    player0_river_check = 0   

                #All Player information
              
                
                player_state = {seat: {'preflop_fold': 0, 'preflop_bet': 0, 'preflop_check': 0,
                                       'flop_fold': 0, 'flop_bet': 0, 'flop_check': 0,
                                       'turn_fold': 0, 'turn_bet': 0, 'turn_check': 0,
                                       'river_fold': 0, 'river_bet': 0, 'river_check': 0} for seat in range(1, 7)}

                
                update_player_state(preflop_fold_dict, seats, 'preflop_fold', value_if_present=1)              
                update_player_state(preflop_bet_dict, seats, 'preflop_bet', data_key='bet')               
                update_player_state(preflop_check_dict, seats, 'preflop_check', value_if_present=1)
                update_player_state(flop_fold_dict, seats, 'flop_fold', value_if_present=1)
                update_player_state(flop_bet_dict, seats, 'flop_bet', data_key='bet')
                update_player_state(flop_check_dict, seats, 'flop_check', value_if_present=1)
                update_player_state(turn_fold_dict, seats, 'turn_fold', value_if_present=1)
                update_player_state(turn_bet_dict, seats, 'turn_bet', data_key='bet')
                update_player_state(turn_check_dict, seats, 'turn_check', value_if_present=1)
                update_player_state(river_fold_dict, seats, 'river_fold', value_if_present=1)
                update_player_state(river_bet_dict, seats, 'river_bet', data_key='bet')
                update_player_state(river_check_dict, seats, 'river_check', value_if_present=1)

                flattened = {
                    f'player{seat}_{action}': value
                    for seat, actions in player_state.items()
                    for action, value in actions.items()
                }

                new_row = [{"file": txt_file, "room": room, "blinds": room_blind, "game": game, "hand": hand, "won": won, "firstcard": cards[0], 
                "secondcard": cards[1], "flop1": flopcards[0], "flop2": flopcards[1], "flop3": flopcards[2],
                "turn": turncard, "river": rivercard, "dealer": dealer_position, "player0_seat": seats['Mahu2021']['seat'],
                "player0_chips": seats['Mahu2021']['chips'], "player1_seat": player1_seat,
                "player1_chips": player1_chips, "player2_seat": player2_seat, "player2_chips": player2_chips,
                "player3_seat": player3_seat, "player3_chips": player3_chips, "player4_seat": player4_seat,
                "player4_chips": player4_chips, "player5_seat": player5_seat, "player5_chips": player5_chips,
                "player6_seat": player6_seat, "player6_chips": player6_chips, "player0_preflop_fold": player0_preflop_fold,
                "player0_preflop_bet": player0_preflop_bet, "player0_preflop_check": player0_preflop_check,
                "player0_flop_fold": player0_flop_fold, "player0_flop_bet": player0_flop_bet,
                "player0_flop_check": player0_flop_check, "player0_turn_fold": player0_turn_fold,
                "player0_turn_bet": player0_turn_bet, "player0_turn_check": player0_turn_check,
                "player0_river_fold": player0_river_fold, "player0_river_bet": player0_river_bet,
                "player0_river_check": player0_river_check, **flattened}]
                
                pokerdata = pd.concat([pokerdata, pd.DataFrame(new_row)], ignore_index=True)
                

  pokerdata = pd.concat([pokerdata, pd.DataFrame(new_row)], ignore_index=True)


In [7]:
pokerdata.to_parquet('pokerdata.parquet', index=False)