In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
from deuces import Card, Evaluator

data = pd.read_csv('../data/pluribus_parsed.csv')

data.head()

Unnamed: 0,hand_id,flop_cards,turn_cards,river_cards,players,hole_cards,preflop_actions,flop_actions,turn_actions,river_actions,showdown_actions,winners
0,100000,7d 5h 9d,7c,Qh,"MrBlue, MrBlonde, MrWhite, MrPink, MrBrown, Pl...","MrBlue: Tc Qc, MrBlonde: 8s 4c, MrWhite: 9c 3d...","MrBlue: posts small blind 50, MrBlonde: posts ...","MrBlue: checks, MrPink: checks","MrBlue: checks, MrPink: checks","MrBlue: bets 230, MrPink: folds, MrBlue: uncal...",,MrBlue: collected 520.0 from pot
1,100001,7s 9c Tc,2c,,"MrBlonde, MrWhite, MrPink, MrBrown, Pluribus, ...","MrBlonde: Qh 5c, MrWhite: 9h 6h, MrPink: Kc Jh...","MrBlonde: posts small blind 50, MrWhite: posts...","MrWhite: checks, MrPink: bets 235, MrWhite: ca...","MrWhite: checks, MrPink: bets 600, MrWhite: fo...",,,MrPink: collected 940.0 from pot
2,100002,,,,"MrWhite, MrPink, MrBrown, Pluribus, MrBlue, Mr...","MrWhite: Jc 2c, MrPink: 2d Qh, MrBrown: 9d Jh,...","MrWhite: posts small blind 50, MrPink: posts b...",,,,,MrBlonde: collected 250.0 from pot
3,100003,3d 6h 9d,5s,,"MrPink, MrBrown, Pluribus, MrBlue, MrBlonde, M...","MrPink: 8d 8s, MrBrown: 2h Kc, Pluribus: 4s 9s...","MrPink: posts small blind 50, MrBrown: posts b...","MrPink: bets 170, MrBrown: folds, MrBlue: call...","MrPink: bets 600, MrBlue: folds, MrPink: uncal...",,,MrPink: collected 1015.0 from pot
4,100004,7c Ah Th,2s,9h,"MrBrown, Pluribus, MrBlue, MrBlonde, MrWhite, ...","MrBrown: Ts Ac, Pluribus: 2c 5c, MrBlue: 7d 3c...","MrBrown: posts small blind 50, Pluribus: posts...","MrBrown: checks, MrWhite: checks","MrBrown: checks, MrWhite: bets 400, MrBrown: r...","MrBrown: bets 3500, MrWhite: folds, MrBrown: u...",,MrBrown: collected 3500.0 from pot


In [2]:
features = []
labels = []
evaluator = Evaluator()

def convert_to_deuces_format(card_str):
    return Card.new(card_str)

for i, row in data.iterrows():
    winners = row['winners']
    
    flop_cards = str(row['flop_cards']).split() if pd.notna(row['flop_cards']) else []
    turn_cards = [str(row['turn_cards'])] if pd.notna(row['turn_cards']) else []
    river_cards = [str(row['river_cards'])] if pd.notna(row['river_cards']) else []

    community_cards = [convert_to_deuces_format(card) for card in flop_cards + turn_cards + river_cards]

    hole_cards_dict = dict([player.split(': ') for player in row['hole_cards'].split(', ')])
    for player, cards in hole_cards_dict.items():
        player_hole_cards = [convert_to_deuces_format(card) for card in cards.split()]

        # need to find evaluator for 2 card hands, ignoring for now
        if len(player_hole_cards)+ len(community_cards) not in [5, 6, 7]:
            continue
        
        hand_strength = evaluator.evaluate(player_hole_cards, community_cards)
        features.append(hand_strength)
        labels.append(1 if player in winners else 0)

In [3]:
X = np.array(features).reshape(-1, 1)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(class_weight='balanced')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred, zero_division=0))
print(confusion_matrix(y_test, y_pred))

Accuracy: 0.6152044957852014
              precision    recall  f1-score   support

           0       0.89      0.61      0.73      5337
           1       0.25      0.63      0.35      1069

    accuracy                           0.62      6406
   macro avg       0.57      0.62      0.54      6406
weighted avg       0.78      0.62      0.66      6406

[[3264 2073]
 [ 392  677]]
