In [66]:
import pandas as pd
import sqlite3
import numpy as np 
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score, precision_score, accuracy_score
pd.set_option("display.max_columns", 100)
conn = sqlite3.connect("drivehud.db")


hand_players = pd.read_sql_query("select * from HandsPlayers;", conn)
tour = pd.read_sql_query("select * from Tournaments;", conn) # pretty sure these all connect to form a more full picture
hand_history = pd.read_sql_query("select * from HandHistories;", conn)

hand_history.HandHistory = hand_history.HandHistory.str.split('\r\n')
df = hand_history.copy()

In [67]:
def fill_columns(df):
    df['buyin'] = df.HandHistory.apply(lambda row: buyin(row)) 

    df['the_deck'] = df.HandHistory.apply(lambda row: possible_cards(row))

    df['my_cards'] = df.HandHistory.apply(lambda row: hole_cards(row))
    
    df['blinds'] = df.HandHistory.apply(lambda row: blinds(row))

    df['starting_stack'] = df.HandHistory.apply(lambda row: start_stack(row))
    
    df['tournament_type'] = df.HandHistory.apply(lambda row: tournament_type(row))

    df['won'] = df.HandHistory.apply(lambda row: won(row))

    df['bet'] = df.HandHistory.apply(lambda row: bet(row))
    
    df['made_money'] = df['won'] > df['bet']
    
    df['made_money'] = df.made_money.apply(lambda x: fix_made_money(x))
    
    df['total_players'] = df.HandHistory.apply(lambda x: total_players(x))
    
    df['card_rank'] = df.my_cards.apply(lambda x: cards_numeric(x))
    

    return df


def fix_made_money(x):
    result = 0
    if x == True:
        result = 1
    else:
        result = 0
    return result


def total_players(x):
    count = 0
    for elem in x:
        if 'Pocket' in elem:
            count += 1
    return count 

def buyin(x): # this seems to work, the implementation is functionalized above 
    for elem in x:
        if 'totalbuyin' in elem:
            temp = elem 
            temp = temp.replace('<totalbuyin>$', '')
            temp = temp.replace('</totalbuyin>', '')
            return float(temp)

def hole_cards(x):
    c1 = ''
    c2 = ''
    for elem in x:
        if 'Pocket' in elem and 'Hero' in elem:
            temp = elem.split(' ')
            for s in temp:
                if 'Hero' in s:
                    c1 = s.replace('player="Hero">', '')
                    c1 = c1.replace('0', '')
                if '</cards>' in s:
                    c2 = s.replace('</cards>', '')
                    c2 = c2.replace('0', '')
    if c1 != '':
        cards = [c1, c2]
        return cards
    else:
        return None
    
    
def possible_cards(x): # will do this first, and then remove cards in seperate functions, still not sure how to deal with all in vs seeing later action
    deck = []
    for s in ['D', 'S', 'C', 'H']:
        for c in ['2','3','4', '5', '6', '7', '8', '9', '1', 'J', 'Q', 'K', 'A']: # making 10 just 1 for consistency sake
            temp = s + c
            deck.append(temp)
    return deck 

def blinds(x):
    blind_list = []
    result = []
    for elem in x:
        if '[cards]' in elem:
            blind_list.append(elem)
    for elem in blind_list:
        temp = elem.split(' ')
        for s in temp:
            if 'sum' in s:
                temp2 = s.replace('sum="', '')
                temp2 = temp2.replace('"', '')
                result.append(float(temp2))
    return result 

def start_stack(x):
    result = ''
    temp = ''
    for elem in x:
        if 'Hero' in elem and 'addon' in elem:
            temp = elem
    
    if temp != '':
        temp = temp.split(' ')
        for elem in temp:
            if 'chips' in elem:
                result = elem.replace('chips="', '')
                result = result.replace('"', '')
                result = float(result)
    return result 

def won(x):
    won = ''
    temp = ''
    for elem in x:
        if 'Hero' in elem and 'addon' in elem:
            temp = elem
    
    if temp != '':
        temp = temp.split(' ')
        for elem in temp:
            if 'win' in elem:
                won = elem.replace('win="', '')
                won = won.replace('"', '')
                won = float(won)
    return won 

def bet(x):
    bet = ''
    temp = ''
    for elem in x:
        if 'Hero' in elem and 'addon' in elem:
            temp = elem
    
    if temp != '':
        temp = temp.split(' ')
        for elem in temp:
            if 'bet' in elem:
                bet = elem.replace('bet="', '')
                bet = bet.replace('"', '')
                bet = float(bet)
    return bet 


# some tournaments just labeled 'holdem, at somepoint try to figure out what these are
def tournament_type(x):
    result = ''
    cut = 0
    counter = 0
    for elem in x:
        if '<tournamentname>' in elem:
            temp = elem
            cut = temp.count('(') 
            temp = temp.replace('<tournamentname>', '')
            
            for char in temp:
                if char != '(':
                    result += char
                else:
                    counter += 1
                    if counter == cut:
                        break 
                    else:
                        result += char
    result = result.rstrip()
    result = result.replace('amp;', '')
    if 'Table3' in result:
        result = 'Jackpot Sit & Go $0.50'
    return result 


def cards_numeric(x):
    result = 0
    if isinstance(x, list):
        if len(x[0]) == 2 and len(x[1]) == 2:
            c1 = x[0]
            c2 = x[1]
            c1_rank = c1[1]
            c2_rank = c2[1]
            c1_suit = c1[0]
            c2_suit = c2[0]
            c_rank = [c1_rank, c2_rank]
            
            
            numeric_card_lst = []
            
            for card in c_rank: # assigning numeric value of card ranks
                if card in ['A', 'K', 'Q', 'J', '1']:
                    numeric_dic = {'A': 14, 'K': 13, 'Q': 12, 'J': 11, '1': 10}
                    numeric_card_lst.append(numeric_dic[card])
                else:
                    numeric_card_lst.append(int(card))
            
            highest = max(numeric_card_lst) # assigning points for highest card
            
            if highest > 10: # points for over 10
                high_vals = {14:10, 13:8, 12:7, 11:6}
                result += high_vals[highest]
            else: # points for 10 & under
                result += highest / 2
                
            
            if c1_suit == c2_suit: # assigning points for whether cards are suited
                result += 2
            
            
            
            val = highest - min(numeric_card_lst) # getting the distance betweeen the cards
            
            if val == 0: # doubling points of pocket pairs 
                result *= 2
                if result < 5: # worth minimum of 5 points
                    result = 5
            else: # now assigning points for connectedness between 
                if val <= 3: # 2 gapper and less
                    result -= val - 1
                else:
                    if val == 4: # 3 gapper
                        result -= val
                    else: # 4 gapper and more
                        result -= 5
            if val <= 2 and val != 0 and numeric_card_lst[0] < 12 and numeric_card_lst[1] < 12:
                result += 1
                
                    
    return result # I elected not to round up as the formula dictates, not sure what benefit it would have in this scenario
            

In [68]:
df = fill_columns(df)
df_reduced = df[['GameType', 'buyin', 'made_money', 'total_players', 'card_rank']] 
# i initially included starting stack, but decided to omit due to string values
# this is due to how i set up the lambda function for starting stack
# would be an esay fix, but i'll leave it for now since starting stack has little utlitity w/o BB as reference point

In [69]:
y = np.array(df_reduced.pop('made_money'))
X = np.array(df_reduced)

In [70]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [71]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)


RandomForestClassifier()

In [72]:
pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, pred)
precision = precision_score(y_test, pred)
recall = recall_score(y_test, pred)
print(accuracy)
print(precision)
print(recall)

0.8317779761421732
0.5541448842419716
0.3345356176735798


In [73]:
rf.feature_importances_

array([0.2091689 , 0.07152712, 0.14639988, 0.57290411])

In [74]:
df.made_money.sum() / len(df.made_money) # looks like we are just always predicting that we didn't make money 

0.1806082493051188

In [75]:
pred.sum() / len(pred)

0.10865860585896292