In [51]:
import pandas as pd
import sqlite3

pd.set_option("display.max_columns", 100)
conn = sqlite3.connect("/home/sam/Documents/DSI/capstone/poker/data/drivehud.db")


hand_players = pd.read_sql_query("select * from HandsPlayers;", conn)
tour = pd.read_sql_query("select * from Tournaments;", conn) # pretty sure these all connect to form a more full picture
hand_history = pd.read_sql_query("select * from HandHistories;", conn)

hand_history.HandHistory = hand_history.HandHistory.str.split('\r\n')
df = hand_history.copy()

In [52]:
# start off small and then add features, but build MVP out of a minimal number of columns (Don't bogged down in the string cleaning, look at minimal number of features first and then go from where)

In [60]:
def fill_columns(df):
    df['buyin'] = df.HandHistory.apply(lambda row: buyin(row)) 

    df['the_deck'] = df.HandHistory.apply(lambda row: possible_cards(row))

    df['my_cards'] = df.HandHistory.apply(lambda row: hole_cards(row))
    
    df['blinds'] = df.HandHistory.apply(lambda row: blinds(row))

    df['starting_stack'] = df.HandHistory.apply(lambda row: start_stack(row))
    
    df['tournament_type'] = df.HandHistory.apply(lambda row: tournament_type(row))

    df['won'] = df.HandHistory.apply(lambda row: won(row))

    df['bet'] = df.HandHistory.apply(lambda row: bet(row))
    
    df['made_money'] = df['won'] > df['bet']
    
    df['made_money'] = df.made_money.apply(lambda x: fix_made_money(x))
    
    df['total_players'] = df.HandHistory.apply(lambda x: total_players(x))
    
    df['card_rank'] = df.my_cards.apply(lambda x: cards_numeric(x))
    
    df['position'] = df.HandHistory.apply(lambda x: position(x))
    
    df['BB'] = df.blinds.apply(lambda x: BB(x))
    
    df['BB_in_stack'] = df.apply(lambda x: BB_in_stack(x['starting_stack'], x['BB']), axis = 1)
    
    
    
    df['all_in'] = df.apply(lambda x: all_in(x['starting_stack'], x['bet']), axis = 1)
    return df


In [61]:
def fix_made_money(x):
    result = 0
    if x == True:
        result = 1
    else:
        result = 0
    return result


def total_players(x):
    count = 0
    for elem in x:
        if 'Pocket' in elem:
            count += 1
    return count 

def buyin(x): # this seems to work, the implementation is functionalized above 
    for elem in x:
        if 'totalbuyin' in elem:
            temp = elem 
            temp = temp.replace('<totalbuyin>$', '')
            temp = temp.replace('</totalbuyin>', '')
            return float(temp)

def hole_cards(x):
    c1 = ''
    c2 = ''
    for elem in x:
        if 'Pocket' in elem and 'Hero' in elem:
            temp = elem.split(' ')
            for s in temp:
                if 'Hero' in s:
                    c1 = s.replace('player="Hero">', '')
                    c1 = c1.replace('0', '')
                if '</cards>' in s:
                    c2 = s.replace('</cards>', '')
                    c2 = c2.replace('0', '')
    if c1 != '':
        cards = [c1, c2]
        return cards
    else:
        return None
    
    
def possible_cards(x): # will do this first, and then remove cards in seperate functions, still not sure how to deal with all in vs seeing later action
    deck = []
    for s in ['D', 'S', 'C', 'H']:
        for c in ['2','3','4', '5', '6', '7', '8', '9', '1', 'J', 'Q', 'K', 'A']: # making 10 just 1 for consistency sake
            temp = s + c
            deck.append(temp)
    return deck 

def blinds(x):
    blind_list = []
    result = []
    for elem in x:
        if '[cards]' in elem:
            blind_list.append(elem)
    for elem in blind_list:
        temp = elem.split(' ')
        for s in temp:
            if 'sum' in s:
                temp2 = s.replace('sum="', '')
                temp2 = temp2.replace('"', '')
                result.append(float(temp2))
    return result 

def start_stack(x):
    result = None
    temp = ''
    for elem in x:
        if 'Hero' in elem and 'addon' in elem:
            temp = elem
    
    if temp != '':
        temp = temp.split(' ')
        for elem in temp:
            if 'chips' in elem:
                result = elem.replace('chips="', '')
                result = result.replace('"', '')
                result = float(result)
    return result 

def won(x):
    won = ''
    temp = ''
    for elem in x:
        if 'Hero' in elem and 'addon' in elem:
            temp = elem
    
    if temp != '':
        temp = temp.split(' ')
        for elem in temp:
            if 'win' in elem:
                won = elem.replace('win="', '')
                won = won.replace('"', '')
                won = float(won)
    return won 

def bet(x):
    bet = ''
    temp = ''
    result = None
    for elem in x:
        if 'Hero' in elem and 'addon' in elem:
            temp = elem
    
    if temp != '':
        temp = temp.split(' ')
        for elem in temp:
            if 'bet' in elem:
                bet = elem.replace('bet="', '').replace('"', '')
                result = float(bet)
    return result 


# some tournaments just labeled 'holdem, at somepoint try to figure out what these are
def tournament_type(x):
    result = ''
    cut = 0
    counter = 0
    for elem in x:
        if '<tournamentname>' in elem:
            temp = elem
            cut = temp.count('(') 
            temp = temp.replace('<tournamentname>', '')
            
            for char in temp:
                if char != '(':
                    result += char
                else:
                    counter += 1
                    if counter == cut:
                        break 
                    else:
                        result += char
    result = result.rstrip()
    result = result.replace('amp;', '')
    if 'Table3' in result:
        result = 'Jackpot Sit & Go $0.50'
    return result 


def cards_numeric(x):
    result = 0
    if isinstance(x, list):
        if len(x[0]) == 2 and len(x[1]) == 2:
            c1 = x[0]
            c2 = x[1]
            c1_rank = c1[1]
            c2_rank = c2[1]
            c1_suit = c1[0]
            c2_suit = c2[0]
            c_rank = [c1_rank, c2_rank]
            
            
            numeric_card_lst = []
            
            for card in c_rank: # assigning numeric value of card ranks
                if card in ['A', 'K', 'Q', 'J', '1']:
                    numeric_dic = {'A': 14, 'K': 13, 'Q': 12, 'J': 11, '1': 10}
                    numeric_card_lst.append(numeric_dic[card])
                else:
                    numeric_card_lst.append(int(card))
            
            highest = max(numeric_card_lst) # assigning points for highest card
            
            if highest > 10: # points for over 10
                high_vals = {14:10, 13:8, 12:7, 11:6}
                result += high_vals[highest]
            else: # points for 10 & under
                result += highest / 2
                
            
            if c1_suit == c2_suit: # assigning points for whether cards are suited
                result += 2
            
            
            
            val = highest - min(numeric_card_lst) # getting the distance betweeen the cards
            
            if val == 0: # doubling points of pocket pairs 
                result *= 2
                if result < 5: # worth minimum of 5 points
                    result = 5
            else: # now assigning points for connectedness between 
                if val <= 3: # 2 gapper and less
                    result -= val - 1
                else:
                    if val == 4: # 3 gapper
                        result -= val
                    else: # 4 gapper and more
                        result -= 5
            if val <= 2 and val != 0 and numeric_card_lst[0] < 12 and numeric_card_lst[1] < 12:
                result += 1
                
                    
    return result # I elected not to round up as the formula dictates, not sure what benefit it would have in this scenario
            
#def bb_in_stack(x):

# get BB out of this one? not the actual BB, just if i am the BB 
def position(x):
    pos_lst = []
    dealer_pos = 0
    dealer_exists = False
    hero_counter = 0
    hero_found = False
    result = None 
    hero_pos = 0
    
    for elem in x:
        if 'dealer' in elem:
            pos_lst.append(elem)

            
    for idx, elem in enumerate(pos_lst):
        if 'dealer="1"':
            dealer_pos = idx
            dealer_exists = True
        
    if dealer_exists:
        if dealer_pos < len(pos_lst) - 1:
            beginning = pos_lst[dealer + 1 : ]
            rest = pos_lst[ : dealer + 1]
            for elem in beginning:
                if 'name="Hero"' in elem:
                    hero_counter += 1
                    hero_pos = hero_counter
                    hero_found = True
                    break 
                else:
                    hero_counter += 1
            if hero_found == False:
                for elem in rest:
                    if 'name="Hero"' in elem:
                        hero_counter += 1
                        hero_pos = hero_counter
                        hero_found = True
                        break
                    else:
                        hero_counter += 1
        else:
            for elem in pos_lst:
                if 'name="Hero"' in elem:
                    hero_counter += 1
                    hero_pos = hero_counter
                    hero_found = True
                    break
                else:
                    hero_counter += 1
    
    if hero_found:
        result = hero_pos / len(pos_lst)
    
    return result 
        
def BB(x): # did this super jankily, definitely return. just wanna see if i can get this loosely working
  
    def look_for_blinds(blinds):
        result = None 
        if len(blinds) == 2:
            if max(blinds) / min(blinds) == 2:
                result = max(x)


        else:
            if len(blinds) > 2:
                BB = blinds.pop(blinds.index(max(blinds)))
                SB = blinds.pop(blinds.index(max(blinds)))
                anti = blinds.pop(blinds.index(max(blinds)))
                if BB / SB == 2 and BB / anti == 10:
                    result = max(x)



        return result 

    result = look_for_blinds(x)

    
    return result

def BB_in_stack(stack, BB):
    result = None
    if BB != 0 and stack != '':
        result = stack / BB
    
    return result 

def all_in(stack, bet):
    result = None
    if stack == bet:
        result = 1
    else:
        if stack > 0:
            result = 0
    return result


            

In [62]:
df = fill_columns(df)
df

Unnamed: 0,HandHistoryId,HandNumber,PokerSiteId,HandHistory,HandHistoryTimestamp,GameType,TournamentNumber,buyin,the_deck,my_cards,blinds,starting_stack,tournament_type,won,bet,made_money,total_players,card_rank,position,BB,BB_in_stack,all_in
0,1,3951210633,1,"[<session sessioncode=""459742"">, <general>, <m...",2019-10-24 03:08:08,1,459742,1.10,"[D2, D3, D4, D5, D6, D7, D8, D9, D1, DJ, DQ, D...","[C5, H2]","[10.0, 20.0]",1500.0,3-Table Sit & Go,0,20.0,0,9,0.5,0.666667,20.0,75.000000,0.0
1,2,3951211988,1,"[<session sessioncode=""459742"">, <general>, <m...",2019-10-24 03:09:34,1,459742,1.10,"[D2, D3, D4, D5, D6, D7, D8, D9, D1, DJ, DQ, D...","[C9, S9]","[10.0, 20.0]",1480.0,3-Table Sit & Go,640,420.0,1,9,9.0,0.666667,20.0,74.000000,0.0
2,3,3951212830,1,"[<session sessioncode=""459742"">, <general>, <m...",2019-10-24 03:11:47,1,459742,1.10,"[D2, D3, D4, D5, D6, D7, D8, D9, D1, DJ, DQ, D...","[S3, DK]","[10.0, 20.0]",1700.0,3-Table Sit & Go,0,0.0,0,9,3.0,0.666667,20.0,85.000000,0.0
3,4,3951214131,1,"[<session sessioncode=""459742"">, <general>, <m...",2019-10-24 03:13:37,1,459742,1.10,"[D2, D3, D4, D5, D6, D7, D8, D9, D1, DJ, DQ, D...","[C9, D8]","[10.0, 20.0]",1700.0,3-Table Sit & Go,0,0.0,0,8,5.5,0.625000,20.0,85.000000,0.0
4,5,3951215154,1,"[<session sessioncode=""459742"">, <general>, <m...",2019-10-24 03:14:31,1,459742,1.10,"[D2, D3, D4, D5, D6, D7, D8, D9, D1, DJ, DQ, D...","[S7, HQ]","[10.0, 20.0]",1700.0,3-Table Sit & Go,0,0.0,0,8,2.0,0.625000,20.0,85.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49284,49301,4249934262,1,"[<session sessioncode=""2297128"">, <general>, <...",2021-02-04 04:19:06,77,40065723,1.05,"[D2, D3, D4, D5, D6, D7, D8, D9, D1, DJ, DQ, D...","[DK, CK]","[10.0, 20.0]",500.0,Hyper Turbo (500 Chips),840,500.0,1,6,16.0,0.500000,20.0,25.000000,1.0
49285,49302,4249935613,1,"[<session sessioncode=""2297128"">, <general>, <...",2021-02-04 04:19:33,77,40065723,1.05,"[D2, D3, D4, D5, D6, D7, D8, D9, D1, DJ, DQ, D...","[DA, D4]","[10.0, 20.0]",840.0,Hyper Turbo (500 Chips),0,20.0,0,6,7.0,0.500000,20.0,42.000000,0.0
49286,49303,4249936078,1,"[<session sessioncode=""2297128"">, <general>, <...",2021-02-04 04:20:33,78,40065723,1.05,"[D2, D3, D4, D5, D6, D7, D8, D9, D1, DJ, DQ, D...","[DQ, H9]",[30.0],820.0,Hyper Turbo (500 Chips),0,30.0,0,5,5.0,0.600000,,,0.0
49287,49304,4249936856,1,"[<session sessioncode=""2297128"">, <general>, <...",2021-02-04 04:21:01,78,40065723,1.05,"[D2, D3, D4, D5, D6, D7, D8, D9, D1, DJ, DQ, D...","[DQ, CA]","[15.0, 30.0]",790.0,Hyper Turbo (500 Chips),330,790.0,0,5,9.0,0.600000,30.0,26.333333,1.0


In [20]:
for elem in df.HandHistory.iloc[-100]:
    temp = None
    blind = None
    if 'cards="[cards]"' in elem and 'Hero' in elem:
        temp = elem.split()
        break
if temp != None:
    for elem in temp:
        if 'sum=' in elem:
            blind = int(elem.replace('sum="', '').replace('"', '').strip())


25

{'s': 994, 'f': 48295}

In [49]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49289 entries, 0 to 49288
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   HandHistoryId         49289 non-null  int64  
 1   HandNumber            49289 non-null  int64  
 2   PokerSiteId           49289 non-null  int64  
 3   HandHistory           49289 non-null  object 
 4   HandHistoryTimestamp  49289 non-null  object 
 5   GameType              49289 non-null  int64  
 6   TournamentNumber      49289 non-null  object 
 7   buyin                 49289 non-null  float64
 8   the_deck              49289 non-null  object 
 9   my_cards              48295 non-null  object 
 10  blinds                49289 non-null  object 
 11  starting_stack        48295 non-null  float64
 12  tournament_type       49289 non-null  object 
 13  won                   49289 non-null  object 
 14  bet                   49289 non-null  object 
 15  made_money         

In [36]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49289 entries, 0 to 49288
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   HandHistoryId         49289 non-null  int64  
 1   HandNumber            49289 non-null  int64  
 2   PokerSiteId           49289 non-null  int64  
 3   HandHistory           49289 non-null  object 
 4   HandHistoryTimestamp  49289 non-null  object 
 5   GameType              49289 non-null  int64  
 6   TournamentNumber      49289 non-null  object 
 7   buyin                 49289 non-null  float64
 8   the_deck              49289 non-null  object 
 9   my_cards              48295 non-null  object 
 10  blinds                49289 non-null  object 
 11  starting_stack        48295 non-null  float64
 12  tournament_type       49289 non-null  object 
 13  won                   49289 non-null  object 
 14  bet                   49289 non-null  object 
 15  made_money         

In [13]:
df.starting_stack.iloc[-1]

330.0

In [10]:
hand_history.HandHistory.iloc[-4]

['<session sessioncode="2297128">',
 '<general>',
 '<client_version>19.9.7.4</client_version>',
 '<uncalled_bet_enabled>false</uncalled_bet_enabled>',
 '<mode>real</mode>',
 '<gametype>Holdem NL</gametype>',
 '<tablename>Hyper Turbo (500 Chips) (#40065723)</tablename>',
 '<duration>N/A</duration>',
 '<gamecount>0</gamecount>',
 '<startdate>2021-02-04 04:16:37</startdate>',
 '<currency>USD</currency>',
 '<nickname>Hero</nickname>',
 '<bets>0</bets>',
 '<wins>0</wins>',
 '<chipsin>0</chipsin>',
 '<chipsout>0</chipsout>',
 '<ipoints>0</ipoints>',
 '<statuspoints>0</statuspoints>',
 '<awardpoints>0</awardpoints>',
 '<tournamentcode>40065723</tournamentcode>',
 '<tournamentname>Hyper Turbo (500 Chips) (#40065723)</tournamentname>',
 '<place>6</place>',
 '<buyin>$1+$0.05</buyin>',
 '<totalbuyin>$1.05</totalbuyin>',
 '<win>0</win>',
 '<maxplayers>6</maxplayers>',
 '<pokersite>Ignition</pokersite>',
 '</general>',
 '<game gamecode="4249935613">',
 '<general>',
 '<startdate>2021-02-04 04:19:33<

In [27]:
df.iloc[[75]]['TournamentNumber'].values[0]

'459742'

In [156]:
max(df.iloc[[300]]['blinds'])

[100.0, 200.0]

In [36]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

df.blinds.value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 1709, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[0.02, 0.05]                                                                                  13347
[50.0, 100.0]                                                                                  5254
[25.0, 50.0]                                                                                   4738
[10.0, 20.0]                                                                                   4360
[75.0, 150.0]                                                                                  4325
[15.0, 30.0]                                                                                   4296
[100.0, 200.0]                                                                                 2812
[20.0, 40.0]                                                                                    955
[0.02, 0.05, 0.05]                                                                              740
[30.0, 60.0]                                                                                    721


In [157]:
# maybe return to this 
def BB_in_stack(x):
    result = None  
    blinds = max(x['blinds'])
    
    consider_alt = False
    
    idx = x.name # get blinds of 1 above and 1 below if they are from the same tournament
    t_num = x['TournamentNumber']
    

    
    def look_for_blinds(blinds)
        if len(blinds) == 2:
            if max(blinds) / min(blinds) == 2:
                result = x['starting_stack'] / BB
                consider_alt = False
            else:
                consider_alt = True


        else:
            if len(blinds) > 2:
                BB = blinds.pop(blinds.index(max(blinds)))
                SB = blinds.pop(blinds.index(max(blinds)))
                anti = blinds.pop(blinds.index(max(blinds)))
                if BB / SB == 2 and BB / anti == 10:
                    result = x['starting_stack'] / BB
                    consider_alt = False
                else:
                    consider_alt = True
        
            else:
                consider_alt = True
        



IndentationError: expected an indented block (<ipython-input-157-01e327cd13f5>, line 12)

In [159]:
def BB_in_stack(x):
  
    blinds = max(x)
    #    result = None  
    #      blinds = max(x['blinds'])

    #     consider_alt = False

    #     def look_for_blinds(blinds):
    #         consider_alt = False
    #         result = None 
    #         if len(blinds) == 2:
    #             if max(blinds) / min(blinds) == 2:
    #                 result = x['starting_stack'] / BB
    #             else:
    #                 consider_alt = True


    #         else:
    #             if len(blinds) > 2:
    #                 BB = blinds.pop(blinds.index(max(blinds)))
    #                 SB = blinds.pop(blinds.index(max(blinds)))
    #                 anti = blinds.pop(blinds.index(max(blinds)))
    #                 if BB / SB == 2 and BB / anti == 10:
    #                     result = x['starting_stack'] / BB
    #                 else:
    #                     consider_alt = True

    #             else:
    #                 consider_alt = True

    #         return consider_alt, result 

    #     consider, result = look_for_blinds(blinds)

    
    return blinds

# use this to predict whether we made money or not
# couldn't get this working
# some more related code below
def fix_bb_nans(input_df):
    count = 0
    df = input_df.copy()
    df_idx = df.index
    for idx in df_idx:
        row = df.iloc[idx, :]
        
        if max(pd.isnull(row['BB_in_stack'])) == True:
            if len(df_idx) + 1 >= idx + 1: 
                alt = df.iloc[[idx + 1]]
                if max(alt['TournamentNumber']) == max(row['TournamentNumber']) and max(pd.isnull(alt['BB_in_stack'])) == False:
                    result = row['starting_stack'] / alt['BB'] 
                    df.at[idx, 'BB_in_stack'] = result
                    count += 1
    
    return df

[50.0, 100.0]

In [None]:
        up1 = df.iloc[[idx + 1]]['blinds']
        down1 = df.iloc[[idx - 1]]['blinds']
        
        up1_num = df.iloc[[idx + 1]]['TournamentNumber']
        d1_num = df.iloc[[idx - 1]]['TournamentNumber']

            if t_num < 
    alt1 = df.iloc[[idx + 1]]
    alt2 = df.iloc[[idx + 2]]
    alt3 = df.iloc[[idx + 3]]
    alt4 = df.iloc[[idx + 4]]
    alt5 = df.iloc[[idx - 1]]
    
    alt_lst = [alt1, alt2, alt3, alt4, alt5]
    