In [1]:
import os
import gc
import random
import math
import time
import numpy as np
import pandas as pd

import category_encoders as ce
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from sklearn import preprocessing, decomposition
from sklearn.decomposition import KernelPCA

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import torch.nn.functional as F
import torchvision
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from pytorch_tabnet.metrics import Metric

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
#Hyperparameters
#PCA
pca_num = 20
#Autoencoder
autoencoder_batch_size = 100
autoenecoder_latents = 40 
autoencoder_val_size = 400
autoencoder_epochs = 60
autoencoder_learning_rate = 0.005
autoencoder_hidden_size_1 = 500
autoencoder_hidden_size_2 = 200
autoencoder_hidden_size_3 = 150
#TabNet
tabnet_batch_size = 100
tabnet_val_size = 2000
tabnet_learning_rate = 2e-2
tabnet_weight_decay = 1e-5
decision_layer_size = 24
mask_attention_layer_size = 24
#Constants
feature_size = 874
lable_size = 206

In [3]:
train_features = pd.read_csv('Data/train_features.csv')
train_targets_scored = pd.read_csv('Data/train_targets_scored.csv')
train_targets_nonscored = pd.read_csv('Data/train_targets_nonscored.csv')
test_features = pd.read_csv('Data/test_features.csv')
submission = pd.read_csv('Data/sample_submission.csv')

In [4]:
train = train_features.merge(train_targets_scored, on='sig_id')
# constrcut train&test except 'cp_type'=='ctl_vehicle' data
print(train_features.shape, test_features.shape)
train = train[train['cp_type']!='ctl_vehicle'].reset_index(drop=True)
test = test_features

print(train.shape, test.shape)

(23814, 876) (3982, 876)
(21948, 1082) (3982, 876)


In [5]:
train['cp_time'] = train['cp_time'].map({24: -1, 48: 0, 72: 1})
train['cp_dose'] = train['cp_dose'].map({'D1': -0.5, 'D2': 0.5})

test['cp_time'] = test['cp_time'].map({24: -1, 48: 0, 72: 1})
test['cp_dose'] = test['cp_dose'].map({'D1': -0.5, 'D2': 0.5})

In [6]:
train = train.to_numpy()
test = test.to_numpy()
dist_len = 99 + 771
for d in range(dist_len):
    train[::, 4+d]  = preprocessing.scale(train[::, 4+d])
    test[::, 4+d]  = preprocessing.scale(test[::, 4+d])
train = train[::, 2:].astype('float64') 
test = test[::, 2:].astype('float64')

Feature Engineering 

Kernel PCA

In [7]:
transformer = KernelPCA(n_components=pca_num, kernel='linear')
X_transformed = transformer.fit_transform(train[::, :feature_size])
test_transformed = transformer.transform(test)

Autoencoder

In [10]:
mse = nn.MSELoss()

traningy = train[autoencoder_val_size:, :feature_size]
valdationy = train[:autoencoder_val_size, :feature_size]

transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

train_loader_ae = torch.utils.data.DataLoader(
    traningy, batch_size=autoencoder_batch_size, shuffle=False, pin_memory=True)

test_loader_ae = torch.utils.data.DataLoader(
    valdationy, batch_size=autoencoder_batch_size, shuffle=False, pin_memory=True)

class AE(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        self.encoder_il = nn.Linear(feature_size, autoencoder_hidden_size_1)
        self.bnorm1 = nn.BatchNorm1d(num_features=autoencoder_hidden_size_1)
        self.encoder_hl1 = nn.Linear(autoencoder_hidden_size_1, autoencoder_hidden_size_2)
        self.bnorm2 = nn.BatchNorm1d(num_features=autoencoder_hidden_size_2)
        self.encoder_hl2 = nn.Linear(autoencoder_hidden_size_2, autoencoder_hidden_size_3)
        self.bnorm3 = nn.BatchNorm1d(num_features=autoencoder_hidden_size_3)
        self.encoder_ol = nn.Linear(autoencoder_hidden_size_3, autoenecoder_latents)
        
        self.bnorm4 = nn.BatchNorm1d(num_features=autoenecoder_latents)
        self.decoder_il = nn.Linear(autoenecoder_latents, autoencoder_hidden_size_3)
        self.bnorm5 = nn.BatchNorm1d(num_features=autoencoder_hidden_size_3)
        self.decoder_hl1 = nn.Linear(autoencoder_hidden_size_3, autoencoder_hidden_size_2)
        self.bnorm6 = nn.BatchNorm1d(num_features=autoencoder_hidden_size_2)
        self.decoder_hl2 = nn.Linear(autoencoder_hidden_size_2, autoencoder_hidden_size_1)
        self.bnorm7 = nn.BatchNorm1d(num_features=autoencoder_hidden_size_1)
        self.decoder_ol = nn.Linear(autoencoder_hidden_size_1, feature_size)
        
        self.elu = nn.ELU()
        
    def forward_encoder(self, x):
        x = self.encoder_il(x)
        x = self.elu(x)
        x = self.bnorm1(x)
        x = self.encoder_hl1(x)
        x = self.elu(x)
        x = self.bnorm2(x)
        x = self.encoder_hl2(x)
        x = self.elu(x)
        x = self.bnorm3(x)
        emb = self.encoder_ol(x)
        return emb
    
    def forward_decoder(self, emb):    
        x = self.bnorm4(emb)
        x = self.decoder_il(x)
        x = self.elu(x)
        x = self.bnorm5(x)
        x = self.decoder_hl1(x)
        x = self.elu(x)
        x = self.bnorm6(x)
        x = self.decoder_hl2(x)
        x = self.elu(x)
        x = self.bnorm7(x)
        x = self.decoder_ol(x)
        return x
    
model_ae = AE().to(device)
optimizer_ae = optim.Adam(model_ae.parameters(), lr=autoencoder_learning_rate)

epoch_list = []
val_list = []

for epoch in range(autoencoder_epochs):
    train_loss_en = 0
    train_loss_de = 0
    loss = 0
    
    for x in train_loader_ae:
        
        x = x.to(device)
        optimizer_ae.zero_grad()
        x = x.view((-1, feature_size))
        emb = model_ae.forward_encoder(x.float())
        rec = model_ae.forward_decoder(emb)
        # compute training reconstruction loss
        train_loss = mse(rec.double(), x)

        # compute accumulated gradients
        train_loss.backward()

        # perform parameter update based on current gradients
        optimizer_ae.step()
 
        # add the mini-batch training loss to epoch loss
        loss += train_loss_en

    if (epoch % 1) == 0:
        val_loss_en = 0 
        val_loss_de = 0
        
        for x in test_loader_ae:
            x = x.to(device)
            
            x = x.view((-1, feature_size))
            emb = model_ae.forward_encoder(x.float())
            rec = model_ae.forward_decoder(emb)
            # compute training reconstruction loss
            val_loss = mse(rec.double(), x)
            
        val_loss = val_loss.cpu().detach().numpy()
        val_list.append(val_loss)
       
        epoch_list.append(epoch)
        
        print("Validation: epoch : {}/{}, loss = {:.4f}".format(epoch+1, autoencoder_epochs, val_loss))

Validation: epoch : 1/60, loss = 0.6566
Validation: epoch : 2/60, loss = 0.6138
Validation: epoch : 3/60, loss = 0.5810
Validation: epoch : 4/60, loss = 0.5513
Validation: epoch : 5/60, loss = 0.5377
Validation: epoch : 6/60, loss = 0.5346
Validation: epoch : 7/60, loss = 0.5252
Validation: epoch : 8/60, loss = 0.5170
Validation: epoch : 9/60, loss = 0.5201
Validation: epoch : 10/60, loss = 0.5284
Validation: epoch : 11/60, loss = 0.5170
Validation: epoch : 12/60, loss = 0.5049
Validation: epoch : 13/60, loss = 0.5039
Validation: epoch : 14/60, loss = 0.5002
Validation: epoch : 15/60, loss = 0.4980
Validation: epoch : 16/60, loss = 0.5062


KeyboardInterrupt: 

In [11]:
model_ae.eval()
enc_ae = np.empty(shape = (train.shape[0], autoenecoder_latents))
for i in range(enc_ae.shape[0]):
    x = torch.from_numpy(np.asarray(train[i, :feature_size])).to(device).float()
    x = x.view(-1, feature_size)
    x = model_ae.forward_encoder(x)
    enc_ae[i, ::] = np.reshape(x.cpu().detach().numpy(), (autoenecoder_latents))
    
enc_ae_test = np.empty(shape = (test.shape[0], autoenecoder_latents))
for i in range(enc_ae_test.shape[0]):
    x = torch.from_numpy(np.asarray(test[i, :feature_size])).to(device).float()
    x = x.view(-1, feature_size)
    x = model_ae.forward_encoder(x)
    enc_ae_test[i, ::] = np.reshape(x.cpu().detach().numpy(), (autoenecoder_latents))

In [12]:
train_no_lables = np.concatenate((train[::, :feature_size], X_transformed, enc_ae), axis = 1)

val = train_no_lables[:tabnet_val_size, ::]
train_d = train_no_lables[tabnet_val_size:, ::]

lables_train = train[tabnet_val_size:, feature_size:]
lables_val = train[:tabnet_val_size, feature_size:]

dataset = torch.utils.data.TensorDataset( torch.Tensor(train_d), torch.Tensor(lables_train) )
validationset = torch.utils.data.TensorDataset( torch.Tensor(val), torch.Tensor(lables_val) )

transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

train_loader = torch.utils.data.DataLoader(
    dataset, batch_size=tabnet_batch_size, shuffle=True, pin_memory=True)

test_loader = torch.utils.data.DataLoader(
    validationset, batch_size=tabnet_batch_size, shuffle=True, pin_memory=True)

Tabnet

In [13]:
class LogitsLogLoss(Metric):

    def __init__(self):
        self._name = "val_loss"
        self._maximize = False

    def __call__(self, y_true, y_pred):

        logits = 1 / (1 + np.exp(-y_pred))
        aux = (1 - y_true) * np.log(1 - logits + 1e-15) + y_true * np.log(logits + 1e-15)
        return np.mean(-aux)

In [None]:
model = TabNetRegressor(n_d=decision_layer_size, n_a=mask_attention_layer_size, n_steps=1, lambda_sparse=0, optimizer_fn=torch.optim.Adam,
                                    optimizer_params=dict(lr=tabnet_learning_rate, weight_decay=tabnet_weight_decay), mask_type='entmax', 
                                    scheduler_params=dict(milestones=[50, 100, 150], gamma=0.9), 
                                    scheduler_fn=torch.optim.lr_scheduler.MultiStepLR)
model.fit(
  X_train=train_d, y_train=lables_train,
  eval_set=[(val, lables_val)],
  loss_fn = torch.nn.BCEWithLogitsLoss(),
  eval_metric = [LogitsLogLoss])

Device used : cuda


In [None]:
train_aug = np.concatenate((test[::, :feature_size], test_transformed, enc_ae_test), axis = 1)
pred_loader = torch.utils.data.DataLoader(train_aug, batch_size=batch_size, shuffle=True, pin_memory=True)

In [None]:
pred_encode = np.empty(shape = (test.shape[0], lable_size))
i = 1
for x in pred_loader:
    x = x.to(device)
    outputs = model.predict(x.float())
    pred_encode[((i-1)*(outputs.shape[0])):(i*(outputs.shape[0])), ::] = 1 / (1 + np.exp(-outputs))
    i += 1

In [None]:
# take a copy of all our training sig_ids for reference
test_sig_ids = test_features['sig_id'].copy()

# select all indices when 'cp_type' is 'ctl_vehicle'
test_ctl_vehicle_idx = (test_features['cp_type'] == 'ctl_vehicle')

# change all cp_type == ctl_vehicle predictions to zero
pred_encode[test_sig_ids[test_ctl_vehicle_idx].index.values] = 0
test_submission = pd.DataFrame({'sig_id' : test_sig_ids})
test_preds_df = pd.DataFrame(pred_encode, columns=train_targets_scored.columns[1:])
test_submission = pd.concat([test_submission, test_preds_df], axis=1)
test_submission.head(3)

In [None]:
test_submission.to_csv('submission.csv', index=False)