In [1]:
import pandas as pd

# Load Parquet file
df = pd.read_parquet('pokerdata.parquet')

df.columns

Index(['file', 'room', 'blinds', 'game', 'hand', 'won', 'firstcard',
       'secondcard', 'flop1', 'flop2',
       ...
       'player6_preflop_check', 'player6_flop_fold', 'player6_flop_bet',
       'player6_flop_check', 'player6_turn_fold', 'player6_turn_bet',
       'player6_turn_check', 'player6_river_fold', 'player6_river_bet',
       'player6_river_check'],
      dtype='object', length=112)

In [2]:
played_hands = df[df['player0_preflop_bet'] > 0]

In [3]:
won_count = played_hands[played_hands['won'] == 1].shape[0]
loss_count = played_hands[played_hands['won'] == 0].shape[0]
print(f"Number of wins: {won_count}")
print(f"Number of losses: {loss_count}")
print(f"Total hands: {won_count + loss_count}")
print(f"Win rate: {won_count / (won_count + loss_count) * 100:.2f}%")

Number of wins: 6502
Number of losses: 5568
Total hands: 12070
Win rate: 53.87%


In [4]:
df['player0_bet'] =df['player0_preflop_bet'] + df['player0_flop_bet'] + df['player0_turn_bet'] + df['player0_river_bet']
df['player1_bet'] = df['player1_preflop_bet'] + df['player1_flop_bet'] + df['player1_turn_bet'] + df['player1_river_bet']
df['player2_bet'] = df['player2_preflop_bet'] + df['player2_flop_bet'] + df['player2_turn_bet'] + df['player2_river_bet']
df['player3_bet'] = df['player3_preflop_bet'] + df['player3_flop_bet'] + df['player3_turn_bet'] + df['player3_river_bet']
df['player4_bet'] = df['player4_preflop_bet'] + df['player4_flop_bet'] + df['player4_turn_bet'] + df['player4_river_bet']
df['player5_bet'] = df['player5_preflop_bet'] + df['player5_flop_bet'] + df['player5_turn_bet'] + df['player5_river_bet']
df['player6_bet'] = df['player6_preflop_bet'] + df['player6_flop_bet'] + df['player6_turn_bet'] + df['player6_river_bet']
df['opponent_bet'] = df['player1_bet'] + df['player2_bet'] + df['player3_bet'] + df['player4_bet'] + df['player5_bet'] + df['player6_bet']-df['player0_bet']

In [10]:
df[['room','blinds','game','hand','won','firstcard','secondcard','flop1','flop2','flop3','turn','river','player0_bet','opponent_bet']][df['player0_preflop_bet'] > 0]

Unnamed: 0,room,blinds,game,hand,won,firstcard,secondcard,flop1,flop2,flop3,turn,river,player0_bet,opponent_bet
0,Halley,0.02,1,1,1,7s,Ks,5s,6d,7h,Ts,,0.33,0.12
1,Halley,0.02,1,2,0,7c,7h,As,9c,Ks,,,0.01,0.04
3,Halley,0.02,1,4,0,6d,6h,4d,Qc,7d,Qs,5s,0.12,0.26
7,Halley,0.02,1,8,0,7d,Jd,,,,,,0.04,0.15
14,Halley,0.02,1,15,0,8d,8h,Qs,2s,Qc,Jc,,0.06,0.24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48376,Aludra,0.10,605,36,0,Js,As,9c,4s,4d,6s,5c,1.19,4.56
48378,Aludra,0.10,605,38,0,3h,3c,9d,2s,3s,Ad,8h,6.08,11.76
48379,Donati,0.05,606,1,1,Jc,Ah,9h,Qs,Ac,4h,,2.95,1.90
48380,Donati,0.05,606,2,0,Ah,3d,Kd,7c,2c,,,0.15,0.25


In [6]:
df_short = df[['file','game', 'hand','won','player0_bet','player1_bet','player2_bet','player3_bet','player4_bet','player5_bet','player6_bet','opponent_bet']]
df_short['profit'] = df_short['won'] * (2 * df_short['opponent_bet']) - (1 - df_short['won']) * df_short['player0_bet']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_short['profit'] = df_short['won'] * (2 * df_short['opponent_bet']) - (1 - df_short['won']) * df_short['player0_bet']


In [7]:
summary = df_short[['file','game','hand','won']].groupby(['file','game']).agg({
    'won': 'sum',
    'hand': 'count'
}).reset_index()

summary['win_rate'] = round(summary['won'] / summary['hand'],2)

print(summary[(summary['win_rate'] > 0.0) & (summary['hand'] >= 100)].to_string(index=False))

                                                          file  game  won  hand  win_rate
  HH20231101 Halley - $0.01-$0.02 - USD No Limit Hold'em 2.txt     1   14   111      0.13
    HH20231113 Halley - $0.01-$0.02 - USD No Limit Hold'em.txt     7   19   158      0.12
 HH20241002 Halley #3 - $0.01-$0.02 - USD No Limit Hold'em.txt    10   21   154      0.14
    HH20241002 Halley - $0.01-$0.02 - USD No Limit Hold'em.txt    12   33   135      0.24
HH20241122 Aludra #3 - $0.05-$0.10 - USD No Limit Hold'em2.txt    19   17   123      0.14
   HH20241122 Aludra - $0.05-$0.10 - USD No Limit Hold'em3.txt    23   19   100      0.19
   HH20241122 Donati - $0.02-$0.05 - USD No Limit Hold'em2.txt    27   45   286      0.16
    HH20241122 Halley - $0.01-$0.02 - USD No Limit Hold'em.txt    28   23   161      0.14
 HH20241124 Aludra #2 - $0.05-$0.10 - USD No Limit Hold'em.txt    33   37   209      0.18
    HH20241201 Aludra - $0.05-$0.10 - USD No Limit Hold'em.txt    46   16   104      0.15
    HH2024