In [18]:
import numpy as np
import eval7
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from joblib import Parallel, delayed
from tqdm.notebook import tqdm
import random

In [2]:
NUM_CARDS = 5

def card_strs2vec(cards):
    return cards2vec([eval7.Card(card_str) for card_str in cards])

def cards2vec(cards, num_cards = NUM_CARDS):
    card_encodings = np.zeros((4+13)*num_cards)
    for card_idx, card in enumerate(cards):
        offset = (4+13)*card_idx
        card_encodings[offset + card.suit] = 1
        card_encodings[offset + 4 + card.rank] = 1
    return card_encodings

In [3]:
MC_ITERS = 100_000
_COMM = 5  # number of community cards we need to draw
_OPP = 2


def calc_hole_strength(hole_cards, iters=MC_ITERS):
    '''
    Using MC with iterations to evalute hand strength
    Args:
    hole - our hole cards
    iters - number of times we run MC
    community - community cards
    '''

    deck = eval7.Deck()  # deck of cards

    for card in hole_cards:  # removing our hole cards from the deck
        deck.cards.remove(card)

    # the score is the number of times we win, tie, or lose
    score = 0

    for _ in range(iters):  # MC the probability of winning
        deck.shuffle()

        draw = deck.peek(_COMM + _OPP)

        opp_hole = draw[:_OPP]
        # the community cards that we draw in the MC
        alt_community = draw[_OPP:]

        our_hand = hole_cards + alt_community
        opp_hand = opp_hole + alt_community

        our_hand_value = eval7.evaluate(our_hand)
        opp_hand_value = eval7.evaluate(opp_hand)

        if our_hand_value > opp_hand_value:
            score += 2

        if our_hand_value == opp_hand_value:
            score += 1
        else:
            score += 0

    hand_strength = score/(2*iters)  # win probability

    return cards2vec(hole_cards), hand_strength


def calc_5card_strength(hole_cards, community_cards, iters=MC_ITERS):
        '''
        Using MC with iterations to evalute hand strength
        Draws len(hole_cards) cards for opponent and evaluates winrate vs our hand
        Args:
        hole_cards
        community_cards
        iters - number of times we run MC
        community - community cards
        '''

        deck = eval7.Deck() # deck of cards

        for card in community_cards: #removing the current community cards from the deck
            deck.cards.remove(card)

        for card in hole_cards: #removing our hole cards from the deck
            deck.cards.remove(card)

        #the score is the number of times we win, tie, or lose
        score = 0

        for _ in range(iters): # MC the probability of winning
            deck.shuffle()

            _OPP = len(hole_cards)

            opp_hole = deck.peek(_OPP)

            our_hand = hole_cards + community_cards
            opp_hand = opp_hole + community_cards


            our_hand_value = eval7.evaluate(our_hand)
            opp_hand_value = eval7.evaluate(opp_hand)

            if our_hand_value > opp_hand_value:
                score += 2

            if our_hand_value == opp_hand_value:
                score += 1
            else:
                score += 0

        hand_strength = score/(2*iters) # win probability

        return cards2vec(our_hand), hand_strength


In [4]:
DATASET_POINTS = 25_000


def random_preflop_strength():
    deck = eval7.Deck()
    deck.shuffle()
    draw = deck.peek(2)
    return calc_hole_strength(draw)

def random_postflop_strength():
    deck = eval7.Deck()
    deck.shuffle()
    len_hole = random.choice((1,2))
    draw = deck.peek(5)
    hole = draw[:len_hole]
    community = draw[len_hole:]

    return calc_5card_strength(hole, community)

# computes post-flop winrates of a hole, aka our preflop strength
def generate_preflop_strengths():
    dataset = Parallel(n_jobs=-1)(delayed(random_preflop_strength)() for i in tqdm(range(DATASET_POINTS)))
    return dataset
# computes post-flop winrates of a hole, aka our preflop strength
def generate_postflop_strengths():
    dataset = Parallel(n_jobs=-1)(delayed(random_postflop_strength)() for i in tqdm(range(DATASET_POINTS)))
    return dataset

In [29]:
preflop_X, preflop_Y = zip(*generate_preflop_strengths())


  0%|          | 0/25000 [00:00<?, ?it/s]

 10%|▉         | 95/1000 [44:47<7:06:43, 28.29s/it]


In [30]:
import pickle
with open("2card_MC_strength_data.pkl", "wb") as f:
    pickle.dump((preflop_X, preflop_Y), f)

[array([1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), array([0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), array([0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0

In [None]:

xtrain, xtest, ytrain, ytest = train_test_split(preflop_X, preflop_Y, test_size=0.2)
print(xtrain[:3], ytrain[:3])

In [32]:
xgb_hole_r = xgb.XGBRegressor(n_estimators=1000, max_depth=7, eta=0.1, subsample=0.7, colsample_bytree=0.8)

# Fitting the model
xgb_hole_r.fit(xtrain, ytrain)

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eta=0.1, eval_metric=None,
             feature_types=None, gamma=None, gpu_id=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=None, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=7,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, n_estimators=1000, n_jobs=None,
             num_parallel_tree=None, predictor=None, ...)

In [44]:
with open("2card_xgbregressor.pkl", "wb") as f:
    pickle.dump(xgb_hole_r, f)

In [45]:

cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate model
scores = cross_val_score(xgb_hole_r, xtest, ytest, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# force scores to be positive
scores = np.absolute(scores)
print('Mean MAE: %.3f (%.3f)' % (scores.mean(), scores.std()) )

Mean MAE: 0.002 (0.000)


In [43]:
#import timeit
testhand1 = ['As','Ac']
testhand2 = ['Kc','3d']
testhand3 = ['5c','7d']

print("testhand1:",testhand1, "score:", xgb_hole_r.predict([card_strs2vec(testhand1)]))
print("true MC:", calc_hole_strength([eval7.Card(card) for card in testhand1])[1])
print("testhand2:",testhand2, "score:", xgb_hole_r.predict([card_strs2vec(testhand2)]))
print("true MC:", calc_hole_strength([eval7.Card(card) for card in testhand2])[1])
print("testhand3:",testhand3, "score:", xgb_hole_r.predict([card_strs2vec(testhand3)])) # avg 807 micro s
print("true MC:", calc_hole_strength([eval7.Card(card) for card in testhand3])[1]) # avg 1.8 s

testhand1: ['As', 'Ac'] score: [0.851856]
true MC: 0.851815
testhand2: ['Kc', '3d'] score: [0.5136813]
true MC: 0.51388
testhand3: ['5c', '7d'] score: [0.40464035]
true MC: 0.40415


In [5]:
postflop_X, postflop_Y = zip(*generate_postflop_strengths())

  0%|          | 0/25000 [00:00<?, ?it/s]

NameError: name 'pickle' is not defined

In [6]:
import pickle
with open("5card_MC_strength_data.pkl", "wb") as f:
    pickle.dump((postflop_X, postflop_Y), f)

In [9]:
xtrain, xtest, ytrain, ytest = train_test_split(postflop_X, postflop_Y, test_size=0.2)
print(xtrain[:3], ytrain[:3])

[array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]), array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]), array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 1

In [10]:
xgb_community_r = xgb.XGBRegressor(n_estimators=1000, max_depth=7, eta=0.1, subsample=0.7, colsample_bytree=0.8)

# Fitting the model
xgb_community_r.fit(xtrain, ytrain)

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eta=0.1, eval_metric=None,
             feature_types=None, gamma=None, gpu_id=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=None, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=7,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, n_estimators=1000, n_jobs=None,
             num_parallel_tree=None, predictor=None, ...)

In [13]:
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate model
scores = cross_val_score(xgb_community_r, xtest, ytest, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# force scores to be positive
scores = np.absolute(scores)
print('Mean MAE: %.3f (%.3f)' % (scores.mean(), scores.std()) )

Mean MAE: 0.127 (0.004)


In [17]:
#import timeit
testhand1 = ['As','Ac','Ah', 'Th','Td']
testhand1c = [eval7.Card(card) for card in testhand1]
testhand2 = ['5c','7d','2h','Js','Th']
testhand2c = [eval7.Card(card) for card in testhand2]

print("testhand1:",testhand1, "score:", xgb_community_r.predict([card_strs2vec(testhand1)]))
print("true MC:", calc_5card_strength(testhand1c[:2], testhand1c[2:])[1])
print("testhand3:",testhand3, "score:", xgb_community_r.predict([card_strs2vec(testhand2)])) # avg 807 micro s
print("true MC:", calc_5card_strength(testhand2c[:2], testhand2c[2:])[1])

testhand1: ['As', 'Ac', 'Ah', 'Th', 'Td'] score: [0.97692275]
true MC: 0.99908
testhand3: ['5c', '7d', '2h', 'Js', 'Th'] score: [0.23672412]
true MC: 0.105045
