In [2]:
import pandas as pd
import numpy as np
import os

In [3]:
df = pd.read_csv("PreFlop.csv")

In [4]:
df = df[df['Button Seat'] != 0]

df[df.isna()] = 0

df.describe()


Unnamed: 0,Game ID,Button Seat,Players,Stack_Size,MyCards1,MyCards2,PreFlop_Action,PreFlop_Action_Amount
count,290526.0,290526.0,290526.0,290526.0,290526.0,290526.0,290526.0,290526.0
mean,787541900.0,3.915794,3.91123,80.613862,4.514632,4.49895,0.741046,0.896828
std,23107520.0,2.101172,2.098404,90.662315,11.725215,11.691881,1.432975,6.563276
min,718894900.0,1.0,1.0,0.1,0.0,0.0,0.0,0.0
25%,789337200.0,2.0,2.0,26.05,0.0,0.0,0.0,0.0
50%,792771200.0,4.0,4.0,50.86,0.0,0.0,0.0,0.0
75%,804348700.0,5.0,5.0,100.0,0.0,0.0,0.0,0.0
max,808943700.0,9.0,9.0,1524.1,52.0,52.0,5.0,646.55


In [5]:
df.loc[df['Game ID'] == 718894932]

Unnamed: 0,Game ID,Button Seat,Players,Stack_Size,MyCards1,MyCards2,PreFlop_Action,PreFlop_Action_Amount
247558,718894932,4,4,205.4,0,0,0.0,0.0
247559,718894932,4,5,80.0,52,18,5.0,76.0


In [6]:
max_num_players = df['Players'].max()
max_num_features = len(df.columns) - 1

In [7]:
df = df.groupby('Game ID').apply(lambda x: x.drop('Game ID',axis=1).values)
df

## Grouped acc to Game ID
## Number of total data rows == number of games
## Number of arrays in each row == Number of players in that game
## Number of values in each array == Number of features 

  df = df.groupby('Game ID').apply(lambda x: x.drop('Game ID',axis=1).values)


Game ID
718894932    [[4.0, 4.0, 205.4, 0.0, 0.0, 0.0, 0.0], [4.0, ...
718895155    [[5.0, 4.0, 200.0, 0.0, 0.0, 0.0, 0.0], [5.0, ...
718928069    [[3.0, 1.0, 174.47, 0.0, 0.0, 0.0, 0.0], [3.0,...
718928859    [[5.0, 1.0, 174.47, 0.0, 0.0, 0.0, 0.0], [5.0,...
718930029    [[6.0, 1.0, 174.47, 0.0, 0.0, 0.0, 0.0], [6.0,...
                                   ...                        
808941048    [[6.0, 1.0, 10.0, 47.0, 15.0, 0.0, 0.0], [6.0,...
808941103    [[9.0, 1.0, 40.0, 0.0, 0.0, 4.0, 10.5], [9.0, ...
808941818    [[1.0, 1.0, 46.0, 0.0, 0.0, 4.0, 2.0], [1.0, 2...
808942446    [[2.0, 1.0, 48.28, 0.0, 0.0, 0.0, 0.0], [2.0, ...
808943744    [[3.0, 1.0, 48.28, 0.0, 0.0, 2.0, 3.0], [3.0, ...
Length: 49586, dtype: object

In [8]:
df.loc[718894932]

array([[  4. ,   4. , 205.4,   0. ,   0. ,   0. ,   0. ],
       [  4. ,   5. ,  80. ,  52. ,  18. ,   5. ,  76. ]])

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [10]:
num_games = df.count()
print(num_games, '\n', max_num_players, '\n', max_num_features)

49586 
 9 
 7


In [11]:
padded_matrices = []
mask_matrices = []

for matrix in df:
    num_players = matrix.shape[0]
    pad_rows = max_num_players - num_players

    if pad_rows > 0:
        padded_matrix = np.pad(matrix, ((0,pad_rows), (0,0)), mode='constant', constant_values=0)
    else:
        padded_matrix = matrix

    padded_matrices.append(padded_matrix)

    mask_matrix = np.zeros((max_num_players, max_num_features))
    mask_matrix[:num_players, :] = 1
    mask_matrices.append(mask_matrix)

In [12]:
print(padded_matrices[0:3])
print(mask_matrices[0:3])

[array([[  4. ,   4. , 205.4,   0. ,   0. ,   0. ,   0. ],
       [  4. ,   5. ,  80. ,  52. ,  18. ,   5. ,  76. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ]]), array([[  5.,   4., 200.,   0.,   0.,   0.,   0.],
       [  5.,   5.,  88.,  33.,  46.,   5.,  86.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.]]), array([[  3.  

In [13]:
padded_tensors = np.array(padded_matrices)
mask_tensors = np.array(mask_matrices)

print(mask_tensors[0:2])

print("Padded tensor shape", padded_tensors.shape)
print("Mask tensor shape", mask_tensors.shape)

[[[1. 1. 1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1. 1. 1.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[1. 1. 1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1. 1. 1.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]]
Padded tensor shape (49586, 9, 7)
Mask tensor shape (49586, 9, 7)


In [14]:
padded_tensors = torch.tensor(padded_tensors, dtype = torch.float32)
mask_tensors = torch.tensor(mask_tensors, dtype= torch.float32)

print('padded tensor', padded_tensors.shape)
print('mask tensor', mask_tensors.shape)

padded tensor torch.Size([49586, 9, 7])
mask tensor torch.Size([49586, 9, 7])


In [15]:
# Shuffling both tensors

indices = torch.randperm(num_games)

shuffled_input_data = padded_tensors[indices]
shufled_masks = mask_tensors[indices]

train_ratio = 0.8

num_train = round(num_games * train_ratio)
num_test = num_games - num_train

train_data, test_data = torch.split(shuffled_input_data, [num_train, num_test])
train_mask, test_mask = torch.split(shufled_masks, [num_train, num_test])

print(train_data[50], train_mask[50], len(train_mask), len(train_data))
print(test_data[60], test_mask[60], len(test_mask), len(test_data))
print(type(train_data), type(test_data), type(train_mask), type(test_mask))

tensor([[  4.0000,   1.0000, 200.3000,   0.0000,   0.0000,   4.0000,   2.5000],
        [  4.0000,   2.0000,  27.2600,   0.0000,   0.0000,   0.0000,   0.0000],
        [  4.0000,   3.0000, 109.1800,   0.0000,   0.0000,   0.0000,   0.0000],
        [  4.0000,   4.0000, 249.8600,   0.0000,   0.0000,   0.0000,   0.0000],
        [  4.0000,   5.0000, 100.5000,   0.0000,   0.0000,   0.0000,   0.0000],
        [  4.0000,   6.0000,  40.0000,   4.0000,  15.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000]]) tensor([[1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0., 0.],
        [0., 

In [16]:
# Define the deep learning model architecture
class PokerModel(nn.Module):
    def __init__(self, input_size, hidden_size, action_output_size):
        super(PokerModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc_action = nn.Linear(hidden_size, action_output_size)  # Output layer for action prediction
        self.fc_chips = nn.Linear(hidden_size, 1)    # Output layer for continuous chips value prediction 
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        action_output = self.fc_action(x)
        chips_output = self.fc_chips(x)
        return action_output, chips_output

In [17]:
# Define model parameters
input_size = max_num_features  # Number of features
hidden_size = 64
action_output_size = 6  # Actions ranging from 0 to 5

# Initialize the model
model = PokerModel(input_size, hidden_size, action_output_size)

In [18]:
# Define loss functions for action and chips predictions
action_criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for classification
chips_criterion = nn.MSELoss()            # Mean squared error loss for regression

# Define optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

In [19]:
# Train the model

def train_model(model, data, mask, target_actions, target_chips, optimizer, action_criterion, chips_criterion, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_action_loss = 0.0
        total_chips_loss = 0.0
        num_samples = 0
        
        for train_matrix, target_action, target_chip in zip(data, target_actions, target_chips):
            optimizer.zero_grad()
            
            # Forward pass
            action_output, chips_output = model(train_matrix)
            
            # Action and chips of my player (Assuming for now 1st row corresponds to us)
            your_action = action_output[0]
            your_chips = chips_output[0]

            # Calculate losses
            action_loss = action_criterion(your_action.unsqueeze(0), target_action.unsqueeze(0))
            chips_loss = chips_criterion(your_chips.squeeze(), target_chip)
            
            # Backward pass
            loss = action_loss + chips_loss
            loss.backward()
            optimizer.step()
            
            # Update total losses and number of samples
            total_action_loss += action_loss.item()
            total_chips_loss += chips_loss.item()
            num_samples += 1
        
        # Calculate average losses
        avg_action_loss = total_action_loss / num_samples
        avg_chips_loss = total_chips_loss / num_samples
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Action Loss: {avg_action_loss:.4f}, Chips Loss: {avg_chips_loss:.4f}')

In [20]:
# Extracting the target action and chips for the player corresponding to yourself from each game matrix

target_actions = []
target_chips = []

for game_matrix in train_data:
    # Extract the target action and chips for your player (assuming it's the last row)
    your_action = int(game_matrix.numpy()[0, -2])  # Replace 'target_action' with the actual key in your dataset
    your_chip = game_matrix.numpy()[0, -1]      # Replace 'target_chip' with the actual key in your dataset

    target_actions.append(your_action)
    target_chips.append(your_chip)

# Convert lists to tensors
target_actions = torch.tensor(target_actions, dtype=torch.long)
target_chips = torch.tensor(target_chips, dtype=torch.float32)

In [21]:
print(target_actions.shape)

torch.Size([39669])


In [22]:
test_actions = []
test_chips = []

for game_matrix in test_data:
    # Extract the target action and chips for your player (assuming it's the last row)
    your_action = int(game_matrix.numpy()[0, -2])  # Replace 'target_action' with the actual key in your dataset
    your_chip = game_matrix.numpy()[0, -1]      # Replace 'target_chip' with the actual key in your dataset

    test_actions.append(your_action)
    test_chips.append(your_chip)

# Convert lists to tensors
test_actions = torch.tensor(test_actions, dtype=torch.long)
test_chips = torch.tensor(test_chips, dtype=torch.float32)

In [23]:
print(test_actions.shape)

torch.Size([9917])


In [24]:
# Example usage of train_model function
train_model(model, train_data, train_mask, target_actions=target_actions,
            target_chips= target_chips, optimizer= optimizer, action_criterion= action_criterion,
             chips_criterion= chips_criterion, num_epochs=10)


Epoch [1/10], Action Loss: 0.2875, Chips Loss: 12.4712
Epoch [2/10], Action Loss: 0.0920, Chips Loss: 0.7072


KeyboardInterrupt: 

In [None]:
with torch.no_grad():
    action_predictions, chips_predictions = model(test_data)

    your_action_prediction = action_predictions[0]
    print(your_action_prediction)

NameError: name 'torch' is not defined

In [72]:
# Evaluating model

def evaluate_model(model, test_input_data_tensor, test_mask_tensor, test_actions, test_chips, action_criterion, chips_criterion):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        for input_data, mask in zip(test_input_data_tensor, test_mask_tensor):
            # Forward pass
            action_output, chips_output = model(input_data)
            print(action_output.shape)
            
            # Action and chips of my player (Assuming for now 1st row corresponds to us)
            your_action = action_output[0]
            your_chips = chips_output[0]

            # Calculate losses
            action_loss = action_criterion(your_action, test_actions)
            chips_loss = chips_criterion(your_chips.squeeze(), test_chips)
            
            # Backward pass
            loss = action_loss + chips_loss
            loss.backward()
            optimizer.step()
            
            # Update total losses and number of samples
            total_action_loss += action_loss.item()
            total_chips_loss += chips_loss.item()
            num_samples += 1
        
        # Calculate average losses
        avg_action_loss = total_action_loss / num_samples
        avg_chips_loss = total_chips_loss / num_samples

    print(f'Action loss = ', {avg_action_loss}, '\n Chips loss = ', {avg_chips_loss})



In [73]:
# Testing model 
evaluate_model(model, test_data, test_mask, test_actions=target_actions,
                          test_chips= target_chips, action_criterion= action_criterion,
                           chips_criterion= chips_criterion)

torch.Size([9, 6])
torch.Size([1, 39669])


RuntimeError: size mismatch (got input: [6], target: [39669])