In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score,precision_score,recall_score,accuracy_score

import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, Dataset
import torchvision
from torchvision import models
import torch.optim as optim
import pandas as pd
import numpy as np
import cv2
import os
from sklearn import preprocessing
import matplotlib.pyplot as plt
%matplotlib inline
import kornia
from sklearn.preprocessing import StandardScaler

In [2]:
root_dir = '/scratch/prathyuakundi/aicrowd/poker/'

In [3]:
train_data_path = root_dir+"train.csv"
train_data = pd.read_csv(train_data_path)

In [4]:
train_data.head()

Unnamed: 0,S1,C1,S2,C2,S3,C3,S4,C4,S5,C5,label
0,1,1,1,13,2,4,2,3,1,12,0
1,3,12,3,2,3,11,4,5,2,5,1
2,1,9,4,6,1,4,3,2,3,9,1
3,1,4,3,13,2,13,2,1,3,6,1
4,3,10,2,7,1,2,2,11,4,9,0


In [5]:
train_data = train_data.to_numpy()

In [6]:
scaler = StandardScaler()

In [7]:
X_train, X_val= train_test_split(train_data, test_size=0.2, random_state=42)

X_train,y_train = X_train[:,:-1],X_train[:,-1]
X_val,y_val = X_val[:,:-1],X_val[:,-1]

scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)

In [8]:
class PokerData(Dataset):
    def __init__(self,cards,hand = None,transform=None,train=True):
        super().__init__()
        self.cards = cards
        self.hand = hand
        self.transform = transform
        self.train = train
    
    def __len__(self):
        return self.cards.shape[0]
    
    def __getitem__(self,item):
        
        if self.train:
            target = self.hand[item]
        
        suit_cards = self.cards[item]
        
        if self.train:
          return {
              'cards' : suit_cards,
              'hand' : torch.tensor(target)

          }
        else:
          return {
              'cards':suit_cards
          }

In [9]:
train_data = PokerData(X_train, hand = y_train, train=True)

valid_data = PokerData(X_val, hand = y_val, train=True)

In [10]:
y_train

array([1, 1, 5, ..., 1, 1, 0])

In [11]:
batch = 512


In [12]:
num_classes = len(np.unique(y_val))

In [13]:
num_classes

9

In [14]:
train_loader = DataLoader(train_data, batch_size = batch)
valid_loader = DataLoader(valid_data, batch_size = batch)

In [15]:
X_train.shape

(800000, 10)

In [16]:
y_train.shape

(800000,)

In [19]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # number of hidden nodes in each layer (512)
        hidden_1 = 1024
        hidden_2 = 512
        hidden_3 = 256
        self.batch_norm1 = nn.BatchNorm1d(hidden_1)
        self.batch_norm2 = nn.BatchNorm1d(hidden_2)
        self.batch_norm3 = nn.BatchNorm1d(hidden_3)
        # linear layer (10 -> hidden_1)
        self.fc1 = nn.Linear(10, hidden_1)
        # linear layer (n_hidden -> hidden_2)
        self.fc2 = nn.Linear(hidden_1, hidden_2)
        # linear layer (n_hidden -> 10)
        self.fc3 = nn.Linear(hidden_2, hidden_3)
        
        self.fc4 = nn.Linear(hidden_3, num_classes)
        # dropout layer (p=0.2)
        # dropout prevents overfitting of data
        self.dropout = nn.Dropout(0.2, inplace=True)

    def forward(self, x):
        # flatten image input
        x = x.view(-1, 10)
        # add hidden layer, with relu activation function
        x = F.relu(self.fc1(x))
        x = self.batch_norm1(x)
        # add dropout layer
        x = self.dropout(x)
        # add hidden layer, with relu activation function
        x = F.relu(self.fc2(x))
        x = self.batch_norm2(x)
        # add dropout layer
        x = self.dropout(x)
        # add output layer
        x = F.relu(self.fc3(x))
        x = self.batch_norm3(x)
        
        x = self.fc4(x)
        
        return x

In [20]:
model = Net()
print(model)
model = model.cuda()

Net(
  (batch_norm1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm3): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=10, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=9, bias=True)
  (dropout): Dropout(p=0.2, inplace=True)
)


In [22]:
# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss().cuda()

# specify optimizer (stochastic gradient descent) and learning rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, verbose=True, factor = 0.8 )

In [23]:
save_path = '/scratch/prathyuakundi/aicrowd/poker/model.pth'
loaders = {"train" : train_loader, "valid" : valid_loader}

In [24]:
# number of epochs to train the model
n_epochs = 100

# initialize tracker for minimum validation loss
valid_loss_min = np.Inf # set initial "min" to infinity
patience = 0
for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    valid_loss = 0.0
    
    ###################
    # train the model #
    ###################
    model.train() # prep model for training
    for dict_ in loaders['train']:
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        data, target = dict_['cards'],dict_['hand']
        data = data.float()
        data = data.cuda()
        target = target.cuda()
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss += loss.item()*data.size(0)
        
    ######################    
    # validate the model #
    ######################
    model.eval() # prep model for evaluation
    for dict_ in loaders['valid']:
        # forward pass: compute predicted outputs by passing inputs to the model
        data, target = dict_['cards'],dict_['hand']
        data = data.float()
        data = data.cuda()
        target = target.cuda()
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update running validation loss 
        valid_loss += loss.item()*data.size(0)
        
    # print training/validation statistics 
    # calculate average loss over an epoch
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)
    scheduler.step(valid_loss)
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch+1, 
        train_loss,
        valid_loss
        ))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(model.state_dict(), save_path)
        valid_loss_min = valid_loss
        patience = 0
        
    else:
        print('Validation loss not improved from ', valid_loss_min)
        patience+=1
        
        
    if(patience>=10):
        break

Epoch: 1 	Training Loss: 0.997954 	Validation Loss: 0.865497
Validation loss decreased (inf --> 0.865497).  Saving model ...
Epoch: 2 	Training Loss: 0.846299 	Validation Loss: 0.771711
Validation loss decreased (0.865497 --> 0.771711).  Saving model ...
Epoch: 3 	Training Loss: 0.708631 	Validation Loss: 0.408314
Validation loss decreased (0.771711 --> 0.408314).  Saving model ...
Epoch: 4 	Training Loss: 0.324866 	Validation Loss: 0.052914
Validation loss decreased (0.408314 --> 0.052914).  Saving model ...
Epoch: 5 	Training Loss: 0.107718 	Validation Loss: 0.011291
Validation loss decreased (0.052914 --> 0.011291).  Saving model ...
Epoch: 6 	Training Loss: 0.052582 	Validation Loss: 0.005266
Validation loss decreased (0.011291 --> 0.005266).  Saving model ...
Epoch: 7 	Training Loss: 0.033744 	Validation Loss: 0.002995
Validation loss decreased (0.005266 --> 0.002995).  Saving model ...
Epoch: 8 	Training Loss: 0.024918 	Validation Loss: 0.002000
Validation loss decreased (0.00299

Epoch: 67 	Training Loss: 0.001460 	Validation Loss: 0.000187
Validation loss not improved from  7.356461048126221e-05
Epoch: 68 	Training Loss: 0.001320 	Validation Loss: 0.000059
Validation loss decreased (0.000074 --> 0.000059).  Saving model ...
Epoch: 69 	Training Loss: 0.001670 	Validation Loss: 0.000097
Validation loss not improved from  5.8727869987778834e-05
Epoch: 70 	Training Loss: 0.001274 	Validation Loss: 0.000131
Validation loss not improved from  5.8727869987778834e-05
Epoch: 71 	Training Loss: 0.001463 	Validation Loss: 0.000114
Validation loss not improved from  5.8727869987778834e-05
Epoch: 72 	Training Loss: 0.001383 	Validation Loss: 0.000172
Validation loss not improved from  5.8727869987778834e-05
Epoch: 73 	Training Loss: 0.001113 	Validation Loss: 0.000038
Validation loss decreased (0.000059 --> 0.000038).  Saving model ...
Epoch: 74 	Training Loss: 0.001187 	Validation Loss: 0.000055
Validation loss not improved from  3.7575912474421785e-05
Epoch: 75 	Training

In [25]:
model.load_state_dict(torch.load(save_path))

<All keys matched successfully>

In [26]:
def test_valid(loaders, model, criterion):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.
    preds = []
    model.eval()
    for dict_ in loaders['valid']:
        data, target = dict_['cards'],dict_['hand']
        data = data.float()
        data = data.cuda()
        target = target.cuda()
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
#         test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        _, predicted = torch.max(output.data, 1)
        pred = predicted.detach().cpu().numpy()
        for i in pred:
            preds.append(i)
        # compare predictions to true label
        target_ = target.cpu().numpy()
        correct += np.sum(np.squeeze(pred==target_))
        total += data.size(0)
            
#     print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))
    return preds

In [27]:
preds = test_valid(loaders, model, criterion)


Test Accuracy: 100% (200000/200000)


In [28]:
final_test_path = root_dir+"test.csv"
final_test = pd.read_csv(final_test_path)

In [29]:
final_test = final_test.to_numpy()

In [30]:
final_test = scaler.transform(final_test)

In [31]:
test_data = PokerData(final_test, train=False)

In [32]:
test_loader = DataLoader(test_data, batch_size=batch, shuffle = False)

In [33]:
def test(test_loader, model, criterion):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.
    preds = []
    model.eval()
    for dict_ in test_loader:
        data = dict_['cards'].cuda()
        data = data.float()
        output = model(data)
        # calculate the loss
        
        _, predicted = torch.max(output.data, 1)
        pred = predicted.detach().cpu().numpy()
        for i in pred:
            preds.append(i)
       
    return preds

In [34]:
preds = test(test_loader, model, criterion)

In [35]:
submission = pd.DataFrame(preds)
submission.to_csv('submission.csv',header=['label'],index=False)

In [36]:
preds

[5,
 5,
 5,
 5,
 5,
 8,
 8,
 8,
 8,
 8,
 1,
 0,
 0,
 0,
 1,
 0,
 4,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 3,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 2,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 5,
 1,
 2,
 1,
 2,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 2,
 3,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 3,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 2,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 2,
 1,
 2,
 0,
