In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.utils.rnn as rnn_utils
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [2]:
df = pd.read_csv('PreFlop.csv')

In [3]:
df = df[df['Button Seat'] != 0]

df[df.isna()] = 0

df.describe()

Unnamed: 0,Game ID,Button Seat,Players,Stack_Size,MyCards1,MyCards2,PreFlop_Action,PreFlop_Action_Amount
count,290526.0,290526.0,290526.0,290526.0,290526.0,290526.0,290526.0,290526.0
mean,787541900.0,3.915794,3.91123,80.613862,4.514632,4.49895,0.741046,0.896828
std,23107520.0,2.101172,2.098404,90.662315,11.725215,11.691881,1.432975,6.563276
min,718894900.0,1.0,1.0,0.1,0.0,0.0,0.0,0.0
25%,789337200.0,2.0,2.0,26.05,0.0,0.0,0.0,0.0
50%,792771200.0,4.0,4.0,50.86,0.0,0.0,0.0,0.0
75%,804348700.0,5.0,5.0,100.0,0.0,0.0,0.0,0.0
max,808943700.0,9.0,9.0,1524.1,52.0,52.0,5.0,646.55


In [4]:
max_num_players = df['Players'].max()
max_num_features = len(df.columns) - 1

In [5]:
df = df.groupby('Game ID').apply(lambda x: x.drop('Game ID',axis=1).values)
df

## Grouped acc to Game ID
## Number of total data rows == number of games
## Number of arrays in each row == Number of players in that game
## Number of values in each array == Number of features 

  df = df.groupby('Game ID').apply(lambda x: x.drop('Game ID',axis=1).values)


Game ID
718894932    [[4.0, 4.0, 205.4, 0.0, 0.0, 0.0, 0.0], [4.0, ...
718895155    [[5.0, 4.0, 200.0, 0.0, 0.0, 0.0, 0.0], [5.0, ...
718928069    [[3.0, 1.0, 174.47, 0.0, 0.0, 0.0, 0.0], [3.0,...
718928859    [[5.0, 1.0, 174.47, 0.0, 0.0, 0.0, 0.0], [5.0,...
718930029    [[6.0, 1.0, 174.47, 0.0, 0.0, 0.0, 0.0], [6.0,...
                                   ...                        
808941048    [[6.0, 1.0, 10.0, 47.0, 15.0, 0.0, 0.0], [6.0,...
808941103    [[9.0, 1.0, 40.0, 0.0, 0.0, 4.0, 10.5], [9.0, ...
808941818    [[1.0, 1.0, 46.0, 0.0, 0.0, 4.0, 2.0], [1.0, 2...
808942446    [[2.0, 1.0, 48.28, 0.0, 0.0, 0.0, 0.0], [2.0, ...
808943744    [[3.0, 1.0, 48.28, 0.0, 0.0, 2.0, 3.0], [3.0, ...
Length: 49586, dtype: object

In [6]:
df.loc[718894932]

array([[  4. ,   4. , 205.4,   0. ,   0. ,   0. ,   0. ],
       [  4. ,   5. ,  80. ,  52. ,  18. ,   5. ,  76. ]])

In [7]:
num_games = df.count()
print(num_games, '\n', max_num_players, '\n', max_num_features)

49586 
 9 
 7


In [8]:
padded_matrices = []
mask_matrices = []

for matrix in df:
    num_players = matrix.shape[0]
    pad_rows = max_num_players - num_players

    if pad_rows > 0:
        padded_matrix = np.pad(matrix, ((0,pad_rows), (0,0)), mode='constant', constant_values=0)
    else:
        padded_matrix = matrix

    padded_matrices.append(padded_matrix)

    mask_matrix = np.zeros((max_num_players, max_num_features))
    mask_matrix[:num_players, :] = 1
    mask_matrices.append(mask_matrix)

In [9]:
print(padded_matrices[0:3])
print(mask_matrices[0:3])

[array([[  4. ,   4. , 205.4,   0. ,   0. ,   0. ,   0. ],
       [  4. ,   5. ,  80. ,  52. ,  18. ,   5. ,  76. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ]]), array([[  5.,   4., 200.,   0.,   0.,   0.,   0.],
       [  5.,   5.,  88.,  33.,  46.,   5.,  86.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.]]), array([[  3.  

In [10]:
# Extracting the target action and chips for the player corresponding to yourself from each game matrix

actions = []
chips = []

for game_matrix in padded_matrices:
    # Extract the target action and chips for all players 
    your_action = list(map(int, game_matrix[:, -2]))
    your_chip = list(game_matrix[:, -1])    

    actions.append(your_action)
    chips.append(your_chip)

# Convert lists to tensors
actions = torch.tensor(actions, dtype=torch.long)
chips = torch.tensor(chips, dtype=torch.float32)

In [11]:
print(chips.shape)
print(chips[0:50])

torch.Size([49586, 9])
tensor([[  0.0000,  76.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
           0.0000,   0.0000],
        [  0.0000,  86.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
           0.0000,   0.0000],
        [  0.0000,   8.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
           0.0000,   0.0000],
        [  0.0000,   0.0000,   8.0000,   0.0000,   4.0000,   0.0000,   0.0000,
           0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,   0.0000,   2.0000,   0.0000,   4.0000,
           0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,   0.0000,   4.0000,   0.0000,   0.0000,
           0.0000,   0.0000],
        [  0.0000,  32.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
           0.0000,   0.0000],
        [ 12.4000,   0.0000,   0.0000,   0.0000,   0.0000,   8.4000,   0.0000,
           0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,   0.0000,  12.0000,   0.0000,  12.0000,
           0.0000,   8.000

In [12]:
padded_tensors = np.array(padded_matrices)
mask_tensors = np.array(mask_matrices)

print(mask_tensors[0:2])

print("Padded tensor shape", padded_tensors.shape)
print("Mask tensor shape", mask_tensors.shape)

[[[1. 1. 1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1. 1. 1.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[1. 1. 1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1. 1. 1.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]]
Padded tensor shape (49586, 9, 7)
Mask tensor shape (49586, 9, 7)


In [13]:
padded_tensors = torch.tensor(padded_tensors, dtype = torch.float32)
mask_tensors = torch.tensor(mask_tensors, dtype= torch.float32)

print('padded tensor', padded_tensors.shape)
print('mask tensor', mask_tensors.shape)

padded tensor torch.Size([49586, 9, 7])
mask tensor torch.Size([49586, 9, 7])


In [14]:
# Shuffling and splitting both tensors

indices = torch.randperm(num_games)

shuffled_input_data = padded_tensors[indices]
shuffled_masks = mask_tensors[indices]
shuffled_actions = actions[indices]
shuffled_chips = chips[indices]

train_ratio = 0.8

num_train = round(num_games * train_ratio)
num_test = num_games - num_train

train_data, test_data = torch.split(shuffled_input_data, [num_train, num_test])
train_mask, test_mask = torch.split(shuffled_masks, [num_train, num_test])
train_actions, test_actions = torch.split(shuffled_actions, [num_train, num_test])
train_chips, test_chips = torch.split(shuffled_chips, [num_train, num_test])

print(train_data[50], train_mask[50], len(train_mask), len(train_data))
print(test_data[60], test_mask[60], len(test_mask), len(test_data))
print(train_actions[50], train_chips[50], len(train_chips), len(train_actions))
print(test_actions[60], test_chips[60], len(test_actions), len(test_chips))
print(type(train_data), type(test_data), type(train_mask), type(test_mask), type(train_actions), type(test_chips))

tensor([[ 4.0000,  1.0000, 57.7700,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 4.0000,  2.0000, 64.5800,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 4.0000,  3.0000, 36.7200,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 4.0000,  4.0000, 50.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 4.0000,  5.0000, 20.0000, 35.0000, 15.0000,  0.0000,  0.0000],
        [ 4.0000,  6.0000, 55.1600,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]]) tensor([[1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0.]])

In [15]:
length = []

for game in shuffled_masks:
    count = 0
    for row in game:
        if row[0] == 1:
            count += 1
    length.append(count)

print(length)

length = torch.tensor(length, dtype=torch.int32)

train_length, test_length = torch.split(length, [num_train, num_test])
        

[6, 5, 5, 5, 5, 8, 8, 6, 8, 5, 6, 6, 5, 6, 6, 9, 4, 5, 6, 4, 6, 6, 6, 6, 7, 4, 5, 9, 5, 6, 9, 6, 4, 6, 6, 6, 6, 5, 6, 4, 6, 6, 5, 4, 5, 8, 6, 4, 6, 8, 6, 4, 6, 5, 5, 8, 6, 6, 5, 5, 6, 4, 8, 5, 9, 6, 4, 9, 9, 4, 5, 6, 7, 5, 9, 6, 6, 5, 6, 6, 8, 5, 5, 6, 6, 6, 5, 9, 4, 5, 8, 5, 5, 5, 6, 5, 4, 6, 6, 6, 6, 6, 9, 7, 5, 6, 9, 4, 4, 6, 4, 4, 6, 6, 5, 6, 7, 5, 5, 5, 9, 5, 8, 6, 6, 6, 5, 6, 6, 6, 6, 6, 7, 6, 6, 5, 7, 7, 6, 6, 5, 5, 6, 6, 6, 6, 5, 5, 6, 6, 6, 5, 5, 4, 6, 7, 6, 6, 6, 8, 5, 4, 7, 6, 7, 7, 6, 6, 9, 6, 5, 5, 6, 6, 7, 5, 5, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 9, 7, 6, 5, 4, 4, 4, 8, 5, 9, 9, 9, 4, 6, 6, 6, 4, 3, 6, 4, 5, 4, 8, 7, 7, 6, 6, 6, 9, 6, 6, 6, 5, 6, 5, 7, 7, 6, 5, 7, 6, 6, 6, 6, 6, 6, 8, 6, 6, 5, 4, 6, 6, 4, 8, 5, 5, 5, 6, 6, 6, 5, 6, 6, 6, 7, 6, 5, 6, 6, 5, 8, 8, 4, 6, 5, 8, 6, 8, 5, 6, 6, 6, 8, 6, 4, 5, 6, 6, 6, 3, 4, 5, 4, 4, 6, 5, 6, 6, 6, 6, 6, 6, 4, 9, 6, 7, 6, 6, 6, 8, 5, 5, 2, 3, 9, 6, 9, 8, 4, 6, 6, 6, 7, 6, 5, 6, 5, 6, 5, 6, 6, 5, 6, 5, 4, 6, 6, 6, 4, 6, 6, 6, 5, 5, 9, 

In [16]:
print(length.shape)
print(train_length.shape)
print(test_actions.shape)
print(test_actions[0:50])

torch.Size([49586])
torch.Size([39669])
torch.Size([9917, 9])
tensor([[0, 0, 4, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 2, 4, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 4, 0, 0, 0, 2, 0, 0, 0],
        [0, 0, 4, 2, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 4, 0, 0, 0, 0],
        [0, 0, 4, 0, 0, 0, 0, 0, 0],
        [0, 4, 2, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 4, 0, 0, 0],
        [0, 0, 5, 5, 0, 0, 0, 0, 0],
        [4, 0, 2, 0, 0, 2, 0, 0, 0],
        [0, 4, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 2, 4, 0, 0, 0, 0, 0],
        [0, 0, 4, 0, 0, 0, 0, 0, 0],
        [0, 0, 4, 0, 0, 0, 0, 0, 0],
        [0, 0, 4, 4, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 4, 0, 0, 0],
        [0, 4, 2, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [4, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [4, 0, 0, 0, 0, 0, 0, 0, 0],
        [2, 2, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 4, 4, 0, 0, 0],
        [4, 4

In [17]:
# Define the model architecture
class PokerModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_actions):
        super(PokerModel, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_actions = num_actions

        # Encoder layers
        self.encoder = nn.LSTM(input_size, hidden_size, batch_first=True)

        # Attention layer
        self.attention = nn.MultiheadAttention(hidden_size, num_heads=8, batch_first=True)

        # Output layers
        self.action_head = nn.Linear(hidden_size, num_actions)
        self.chip_head = nn.Linear(hidden_size, 1)

    def forward(self, x, mask, lengths):
        # Pack the padded sequences
        packed_x = rnn_utils.pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)

        # Encode the packed sequence
        packed_encoded, _ = self.encoder(packed_x)

        # Unpack the encoded sequence
        encoded, _ = rnn_utils.pad_packed_sequence(packed_encoded, batch_first=True)

        # Compute the number of players for each batch
        batch_size, max_num_players, feature_dim = encoded.size()
        mask = mask.reshape(batch_size, max_num_players, feature_dim)
        num_players = torch.sum(mask.any(-1), dim=1)

        # Create the key_padding_mask
        key_padding_mask = torch.zeros((batch_size, max_num_players), dtype=torch.bool, device=encoded.device)
        for batch_idx, num_player in enumerate(num_players):
            key_padding_mask[batch_idx, num_player:] = True
        
        # Apply attention
        attended, _ = self.attention(encoded, encoded, encoded, key_padding_mask=key_padding_mask)

        # Predict action and chip amount
        action_logits = self.action_head(attended)
        chip_amount = self.chip_head(attended).squeeze(-1)

        return action_logits, chip_amount

In [18]:
# Create PyTorch datasets
train_dataset = TensorDataset(train_data, train_mask, train_length, train_actions, train_chips)
test_dataset = TensorDataset(test_data, test_mask, test_length, test_actions, test_chips)

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [19]:
# Set up the model
input_size = 7  # Number of features in your dataset
hidden_size = 256  # You can adjust this value
num_actions = 7  # Number of possible actions

model = PokerModel(input_size, hidden_size, num_actions)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

PokerModel(
  (encoder): LSTM(7, 256, batch_first=True)
  (attention): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
  )
  (action_head): Linear(in_features=256, out_features=7, bias=True)
  (chip_head): Linear(in_features=256, out_features=1, bias=True)
)

In [20]:
# Set up the loss functions
action_criterion = nn.CrossEntropyLoss()
chip_criterion = nn.MSELoss()  # For continuous output

# Set up the optimizer
optimizer = optim.Adam(model.parameters())

In [21]:
def create_mask(x, lengths):
    batch_size, max_num_players, feature_dim = x.size()
    mask = torch.zeros((batch_size, max_num_players), dtype=torch.bool, device=x.device)
    for batch_idx, seq_len in enumerate(lengths):
        num_players = seq_len // feature_dim  # Assuming lengths are divisible by feature_dim
        mask[batch_idx, :num_players] = True
    return mask

In [25]:
# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for batch_x, batch_mask, batch_lengths, batch_y_action, batch_y_chips in train_loader:
        batch_x = batch_x.to(device)
        batch_mask = batch_mask.to(device)
        batch_lengths = batch_lengths.to(device)
        batch_y_action = batch_y_action.to(device)
        batch_y_chips = batch_y_chips.to(device)

        optimizer.zero_grad()

        action_logits, chip_amount = model(batch_x, create_mask(batch_mask, batch_lengths), batch_lengths)

        action_loss = action_criterion(action_logits.view(-1, num_actions), batch_y_action.view(-1))
        
        # Reshape chip_amount and batch_y_chips to match the expected shapes
        batch_size, num_players = chip_amount.size()
        chip_amount = chip_amount.view(batch_size, num_players, 1)
        batch_y_chips = batch_y_chips.view(batch_size, num_players)

        chip_loss = chip_criterion(chip_amount.squeeze(-1), batch_y_chips)

        
        loss = action_loss + chip_loss

        loss.backward()
        optimizer.step()

        train_loss += loss.item()

RuntimeError: shape '[32, 9, 256]' is invalid for input of size 288

In [24]:
print(batch_mask.shape)
print(batch_mask.numel())

torch.Size([32, 9, 7])
2016
