In [8]:
import os
import gc
import random
import math
import time
import numpy as np
import pandas as pd

import category_encoders as ce
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from sklearn import preprocessing, decomposition
from sklearn.decomposition import KernelPCA

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import torch.nn.functional as F
import torchvision
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from pytorch_tabnet.metrics import Metric
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [38]:
#Hyperparameters
#PCA
pca_num = 20
#Autoencoder
autoencoder_batch_size = 100
autoenecoder_latents = 40 
autoencoder_val_size = 400
autoencoder_epochs = 60
autoencoder_learning_rate = 0.005
autoencoder_hidden_size_1 = 500
autoencoder_hidden_size_2 = 200
autoencoder_hidden_size_3 = 150
#TabNet
n_folds = 10
tabnet_batch_size = 100
tabnet_learning_rate = 2e-2
tabnet_weight_decay = 1e-5
decision_layer_size = 32
mask_attention_layer_size = 32
#Blender
blend_val_size =2000
blender_batch_size = 100
#Constants
feature_size = 874
lable_size = 206

In [10]:
train_features = pd.read_csv('Data/train_features.csv')
train_targets_scored = pd.read_csv('Data/train_targets_scored.csv')
train_targets_nonscored = pd.read_csv('Data/train_targets_nonscored.csv')
test_features = pd.read_csv('Data/test_features.csv')
submission = pd.read_csv('Data/sample_submission.csv')

In [11]:
train = train_features.merge(train_targets_scored, on='sig_id')
print(train_features.shape, test_features.shape)
train = train[train['cp_type']!='ctl_vehicle'].reset_index(drop=True)
test = test_features
print(train.shape, test.shape)

(23814, 876) (3982, 876)
(21948, 1082) (3982, 876)


In [12]:
train['cp_time'] = train['cp_time'].map({24: -1, 48: 0, 72: 1})
train['cp_dose'] = train['cp_dose'].map({'D1': -0.5, 'D2': 0.5})

test['cp_time'] = test['cp_time'].map({24: -1, 48: 0, 72: 1})
test['cp_dose'] = test['cp_dose'].map({'D1': -0.5, 'D2': 0.5})

In [13]:
train = train.to_numpy()
test = test.to_numpy()
dist_len = 99 + 771
for d in range(dist_len):
    train[::, 4+d]  = preprocessing.scale(train[::, 4+d])
    test[::, 4+d]  = preprocessing.scale(test[::, 4+d])
train = train[::, 2:].astype('float64') 
test = test[::, 2:].astype('float64')

Feature Engineering 

Kernel PCA

In [14]:
transformer = KernelPCA(n_components=pca_num, kernel='linear')
X_transformed = transformer.fit_transform(train[::, :feature_size])
test_transformed = transformer.transform(test)

Autoencoder

In [15]:
mse = nn.MSELoss()

traningy = train[autoencoder_val_size:, :feature_size]
valdationy = train[:autoencoder_val_size, :feature_size]

transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

train_loader_ae = torch.utils.data.DataLoader(
    traningy, batch_size=autoencoder_batch_size, shuffle=False, pin_memory=True)

test_loader_ae = torch.utils.data.DataLoader(
    valdationy, batch_size=autoencoder_batch_size, shuffle=False, pin_memory=True)

class AE(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        self.encoder_il = nn.Linear(feature_size, autoencoder_hidden_size_1)
        self.bnorm1 = nn.BatchNorm1d(num_features=autoencoder_hidden_size_1)
        self.encoder_hl1 = nn.Linear(autoencoder_hidden_size_1, autoencoder_hidden_size_2)
        self.bnorm2 = nn.BatchNorm1d(num_features=autoencoder_hidden_size_2)
        self.encoder_hl2 = nn.Linear(autoencoder_hidden_size_2, autoencoder_hidden_size_3)
        self.bnorm3 = nn.BatchNorm1d(num_features=autoencoder_hidden_size_3)
        self.encoder_ol = nn.Linear(autoencoder_hidden_size_3, autoenecoder_latents)
        
        self.bnorm4 = nn.BatchNorm1d(num_features=autoenecoder_latents)
        self.decoder_il = nn.Linear(autoenecoder_latents, autoencoder_hidden_size_3)
        self.bnorm5 = nn.BatchNorm1d(num_features=autoencoder_hidden_size_3)
        self.decoder_hl1 = nn.Linear(autoencoder_hidden_size_3, autoencoder_hidden_size_2)
        self.bnorm6 = nn.BatchNorm1d(num_features=autoencoder_hidden_size_2)
        self.decoder_hl2 = nn.Linear(autoencoder_hidden_size_2, autoencoder_hidden_size_1)
        self.bnorm7 = nn.BatchNorm1d(num_features=autoencoder_hidden_size_1)
        self.decoder_ol = nn.Linear(autoencoder_hidden_size_1, feature_size)
        
        self.elu = nn.ELU()
        
    def forward_encoder(self, x):
        x = self.encoder_il(x)
        x = self.elu(x)
        x = self.bnorm1(x)
        x = self.encoder_hl1(x)
        x = self.elu(x)
        x = self.bnorm2(x)
        x = self.encoder_hl2(x)
        x = self.elu(x)
        x = self.bnorm3(x)
        emb = self.encoder_ol(x)
        return emb
    
    def forward_decoder(self, emb):    
        x = self.bnorm4(emb)
        x = self.decoder_il(x)
        x = self.elu(x)
        x = self.bnorm5(x)
        x = self.decoder_hl1(x)
        x = self.elu(x)
        x = self.bnorm6(x)
        x = self.decoder_hl2(x)
        x = self.elu(x)
        x = self.bnorm7(x)
        x = self.decoder_ol(x)
        return x
    
model_ae = AE().to(device)
optimizer_ae = optim.Adam(model_ae.parameters(), lr=autoencoder_learning_rate)

epoch_list = []
val_list = []

for epoch in range(autoencoder_epochs):
    train_loss_en = 0
    train_loss_de = 0
    loss = 0
    
    for x in train_loader_ae:
        
        x = x.to(device)
        optimizer_ae.zero_grad()
        x = x.view((-1, feature_size))
        emb = model_ae.forward_encoder(x.float())
        rec = model_ae.forward_decoder(emb)
        # compute training reconstruction loss
        train_loss = mse(rec.double(), x)

        # compute accumulated gradients
        train_loss.backward()

        # perform parameter update based on current gradients
        optimizer_ae.step()
 
        # add the mini-batch training loss to epoch loss
        loss += train_loss_en

    if (epoch % 1) == 0:
        val_loss_en = 0 
        val_loss_de = 0
        
        for x in test_loader_ae:
            x = x.to(device)
            
            x = x.view((-1, feature_size))
            emb = model_ae.forward_encoder(x.float())
            rec = model_ae.forward_decoder(emb)
            # compute training reconstruction loss
            val_loss = mse(rec.double(), x)
            
        val_loss = val_loss.cpu().detach().numpy()
        val_list.append(val_loss)
       
        epoch_list.append(epoch)
        
        print("Validation: epoch : {}/{}, loss = {:.4f}".format(epoch+1, autoencoder_epochs, val_loss))

Validation: epoch : 1/60, loss = 0.6656
Validation: epoch : 2/60, loss = 0.5993
Validation: epoch : 3/60, loss = 0.5743
Validation: epoch : 4/60, loss = 0.5554
Validation: epoch : 5/60, loss = 0.5439
Validation: epoch : 6/60, loss = 0.5301
Validation: epoch : 7/60, loss = 0.5188
Validation: epoch : 8/60, loss = 0.5129
Validation: epoch : 9/60, loss = 0.5229
Validation: epoch : 10/60, loss = 0.5265
Validation: epoch : 11/60, loss = 0.5222
Validation: epoch : 12/60, loss = 0.5064
Validation: epoch : 13/60, loss = 0.4976
Validation: epoch : 14/60, loss = 0.4996
Validation: epoch : 15/60, loss = 0.4920
Validation: epoch : 16/60, loss = 0.4948
Validation: epoch : 17/60, loss = 0.4931
Validation: epoch : 18/60, loss = 0.4932
Validation: epoch : 19/60, loss = 0.4932
Validation: epoch : 20/60, loss = 0.4856
Validation: epoch : 21/60, loss = 0.4911
Validation: epoch : 22/60, loss = 0.4908
Validation: epoch : 23/60, loss = 0.4995
Validation: epoch : 24/60, loss = 0.4948
Validation: epoch : 25/60

In [16]:
best_epoch = np.argmin(val_list)
model_ae.load_state_dict(torch.load("./saves/ae"+str(best_epoch)))
model_ae.eval()

AE(
  (encoder_il): Linear(in_features=874, out_features=500, bias=True)
  (bnorm1): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (encoder_hl1): Linear(in_features=500, out_features=200, bias=True)
  (bnorm2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (encoder_hl2): Linear(in_features=200, out_features=150, bias=True)
  (bnorm3): BatchNorm1d(150, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (encoder_ol): Linear(in_features=150, out_features=40, bias=True)
  (bnorm4): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (decoder_il): Linear(in_features=40, out_features=150, bias=True)
  (bnorm5): BatchNorm1d(150, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (decoder_hl1): Linear(in_features=150, out_features=200, bias=True)
  (bnorm6): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (decoder_hl2): Linear(

In [17]:
model_ae.eval()
enc_ae = np.empty(shape = (train.shape[0], autoenecoder_latents))
for i in range(enc_ae.shape[0]):
    x = torch.from_numpy(np.asarray(train[i, :feature_size])).to(device).float()
    x = x.view(-1, feature_size)
    x = model_ae.forward_encoder(x)
    enc_ae[i, ::] = np.reshape(x.cpu().detach().numpy(), (autoenecoder_latents))
    
enc_ae_test = np.empty(shape = (test.shape[0], autoenecoder_latents))
for i in range(enc_ae_test.shape[0]):
    x = torch.from_numpy(np.asarray(test[i, :feature_size])).to(device).float()
    x = x.view(-1, feature_size)
    x = model_ae.forward_encoder(x)
    enc_ae_test[i, ::] = np.reshape(x.cpu().detach().numpy(), (autoenecoder_latents))

In [23]:
train_d = np.concatenate((train[::, :feature_size], X_transformed, enc_ae), axis = 1)
train_aug = np.concatenate((test[::, :feature_size], test_transformed, enc_ae_test), axis = 1)

lables_train = train[::, feature_size:]
dataset = torch.utils.data.TensorDataset(torch.Tensor(train_d), torch.Tensor(lables_train) )

train_loader = torch.utils.data.DataLoader(
    dataset, batch_size=tabnet_batch_size, shuffle=True, pin_memory=True)

pred_loader = torch.utils.data.DataLoader(train_aug, batch_size=tabnet_batch_size, shuffle=True, pin_memory=True)

Tabnet

In [25]:
class LogitsLogLoss(Metric):

    def __init__(self):
        self._name = "val_loss"
        self._maximize = False

    def __call__(self, y_true, y_pred):

        logits = 1 / (1 + np.exp(-y_pred))
        aux = (1 - y_true) * np.log(1 - logits + 1e-15) + y_true * np.log(logits + 1e-15)
        return np.mean(-aux)

In [42]:
mskf = MultilabelStratifiedKFold(n_splits = n_folds, random_state = 0, shuffle = True)
models = {}
for fold_nb, (train_idx, val_idx) in enumerate(mskf.split(train_d, lables_train)):
    print("FOLD: ", fold_nb + 1)
    
    train_split, lables_train_split = train_d[train_idx, ::], lables_train[train_idx, ::]
    val_split, lables_val_split = train_d[val_idx, ::], lables_train[val_idx, ::]
    ### Model ###
    name = "model"+str(fold_nb)
    models[name] = TabNetRegressor(n_d=decision_layer_size, n_a=mask_attention_layer_size, n_steps=1, lambda_sparse=0, optimizer_fn=torch.optim.Adam,
                                    optimizer_params=dict(lr=tabnet_learning_rate, weight_decay=tabnet_weight_decay), mask_type='entmax', 
                                    scheduler_params=dict(milestones=[50, 100, 150], gamma=0.9), 
                                    scheduler_fn=torch.optim.lr_scheduler.MultiStepLR)
    models[name].fit(X_train=train_split, y_train=lables_train_split,
              eval_set=[(val_split, lables_val_split)],
              loss_fn = torch.nn.BCEWithLogitsLoss(),
              eval_metric = [LogitsLogLoss])

FOLD:  1
Device used : cuda
epoch 0  | loss: 0.36222 | val_0_val_loss: 0.03795 |  0:00:01s


  if __name__ == '__main__':


epoch 1  | loss: 0.0286  | val_0_val_loss: 0.02675 |  0:00:02s
epoch 2  | loss: 0.02271 | val_0_val_loss: 0.02206 |  0:00:04s
epoch 3  | loss: 0.02129 | val_0_val_loss: 0.02104 |  0:00:05s
epoch 4  | loss: 0.02077 | val_0_val_loss: 0.02078 |  0:00:07s
epoch 5  | loss: 0.02051 | val_0_val_loss: 0.02084 |  0:00:08s
epoch 6  | loss: 0.02023 | val_0_val_loss: 0.02084 |  0:00:10s
epoch 7  | loss: 0.0199  | val_0_val_loss: 0.01997 |  0:00:11s
epoch 8  | loss: 0.01966 | val_0_val_loss: 0.02086 |  0:00:13s
epoch 9  | loss: 0.01935 | val_0_val_loss: 0.0195  |  0:00:14s
epoch 10 | loss: 0.01902 | val_0_val_loss: 0.01934 |  0:00:16s
epoch 11 | loss: 0.01881 | val_0_val_loss: 0.01982 |  0:00:17s
epoch 12 | loss: 0.01857 | val_0_val_loss: 0.0188  |  0:00:18s
epoch 13 | loss: 0.01834 | val_0_val_loss: 0.02042 |  0:00:20s
epoch 14 | loss: 0.0183  | val_0_val_loss: 0.01844 |  0:00:21s
epoch 15 | loss: 0.01798 | val_0_val_loss: 0.01833 |  0:00:22s
epoch 16 | loss: 0.01775 | val_0_val_loss: 0.01788 |  0

  if __name__ == '__main__':


epoch 1  | loss: 0.02888 | val_0_val_loss: 0.02691 |  0:00:03s
epoch 2  | loss: 0.02306 | val_0_val_loss: 0.0215  |  0:00:05s
epoch 3  | loss: 0.02132 | val_0_val_loss: 0.02076 |  0:00:06s
epoch 4  | loss: 0.02085 | val_0_val_loss: 0.02051 |  0:00:08s
epoch 5  | loss: 0.02062 | val_0_val_loss: 0.02039 |  0:00:10s
epoch 6  | loss: 0.02043 | val_0_val_loss: 0.02018 |  0:00:11s
epoch 7  | loss: 0.02017 | val_0_val_loss: 0.01999 |  0:00:13s
epoch 8  | loss: 0.01984 | val_0_val_loss: 0.01973 |  0:00:14s
epoch 9  | loss: 0.01962 | val_0_val_loss: 0.01953 |  0:00:15s
epoch 10 | loss: 0.01932 | val_0_val_loss: 0.02042 |  0:00:17s
epoch 11 | loss: 0.01902 | val_0_val_loss: 0.01922 |  0:00:18s
epoch 12 | loss: 0.01866 | val_0_val_loss: 0.01916 |  0:00:20s
epoch 13 | loss: 0.0185  | val_0_val_loss: 0.01898 |  0:00:22s
epoch 14 | loss: 0.01819 | val_0_val_loss: 0.01857 |  0:00:23s
epoch 15 | loss: 0.01805 | val_0_val_loss: 0.01899 |  0:00:25s
epoch 16 | loss: 0.01787 | val_0_val_loss: 0.01839 |  0

epoch 59 | loss: 0.016   | val_0_val_loss: 0.01713 |  0:01:31s
epoch 60 | loss: 0.01589 | val_0_val_loss: 0.01718 |  0:01:33s
epoch 61 | loss: 0.01579 | val_0_val_loss: 0.01731 |  0:01:34s
epoch 62 | loss: 0.01586 | val_0_val_loss: 0.01716 |  0:01:35s
epoch 63 | loss: 0.01587 | val_0_val_loss: 0.01722 |  0:01:37s
epoch 64 | loss: 0.01588 | val_0_val_loss: 0.01759 |  0:01:39s
epoch 65 | loss: 0.01573 | val_0_val_loss: 0.01723 |  0:01:40s
epoch 66 | loss: 0.01578 | val_0_val_loss: 0.01999 |  0:01:42s
epoch 67 | loss: 0.01573 | val_0_val_loss: 0.01811 |  0:01:43s
epoch 68 | loss: 0.01579 | val_0_val_loss: 0.01768 |  0:01:45s
epoch 69 | loss: 0.01571 | val_0_val_loss: 0.01717 |  0:01:46s

Early stopping occured at epoch 69 with best_epoch = 59 and best_val_0_val_loss = 0.01713
Best weights from best epoch are automatically used!
FOLD:  4
Device used : cuda
epoch 0  | loss: 0.36588 | val_0_val_loss: 0.03827 |  0:00:01s
epoch 1  | loss: 0.02847 | val_0_val_loss: 0.02616 |  0:00:02s
epoch 2  

FOLD:  6
Device used : cuda
epoch 0  | loss: 0.36335 | val_0_val_loss: 0.03747 |  0:00:01s
epoch 1  | loss: 0.02876 | val_0_val_loss: 0.02738 |  0:00:02s
epoch 2  | loss: 0.02314 | val_0_val_loss: 0.02173 |  0:00:04s
epoch 3  | loss: 0.02148 | val_0_val_loss: 0.02095 |  0:00:06s
epoch 4  | loss: 0.02093 | val_0_val_loss: 0.02059 |  0:00:07s
epoch 5  | loss: 0.02067 | val_0_val_loss: 0.02049 |  0:00:09s
epoch 6  | loss: 0.02053 | val_0_val_loss: 0.02033 |  0:00:10s
epoch 7  | loss: 0.02029 | val_0_val_loss: 0.02016 |  0:00:12s
epoch 8  | loss: 0.02007 | val_0_val_loss: 0.0201  |  0:00:13s
epoch 9  | loss: 0.01981 | val_0_val_loss: 0.01981 |  0:00:15s
epoch 10 | loss: 0.01956 | val_0_val_loss: 0.01952 |  0:00:16s
epoch 11 | loss: 0.01924 | val_0_val_loss: 0.01941 |  0:00:18s
epoch 12 | loss: 0.01887 | val_0_val_loss: 0.01879 |  0:00:19s
epoch 13 | loss: 0.01861 | val_0_val_loss: 0.01878 |  0:00:21s
epoch 14 | loss: 0.01827 | val_0_val_loss: 0.02002 |  0:00:22s
epoch 15 | loss: 0.01809 | 

  if __name__ == '__main__':


epoch 1  | loss: 0.02845 | val_0_val_loss: 0.02553 |  0:00:03s
epoch 2  | loss: 0.02257 | val_0_val_loss: 0.02154 |  0:00:06s
epoch 3  | loss: 0.02144 | val_0_val_loss: 0.02116 |  0:00:08s
epoch 4  | loss: 0.02112 | val_0_val_loss: 0.02096 |  0:00:10s
epoch 5  | loss: 0.02084 | val_0_val_loss: 0.02064 |  0:00:11s
epoch 6  | loss: 0.02051 | val_0_val_loss: 0.02027 |  0:00:13s
epoch 7  | loss: 0.02014 | val_0_val_loss: 0.0212  |  0:00:15s
epoch 8  | loss: 0.01971 | val_0_val_loss: 0.01956 |  0:00:17s
epoch 9  | loss: 0.01932 | val_0_val_loss: 0.01909 |  0:00:19s
epoch 10 | loss: 0.0189  | val_0_val_loss: 0.01969 |  0:00:20s
epoch 11 | loss: 0.0186  | val_0_val_loss: 0.0191  |  0:00:22s
epoch 12 | loss: 0.01841 | val_0_val_loss: 0.01866 |  0:00:24s
epoch 13 | loss: 0.01817 | val_0_val_loss: 0.01848 |  0:00:26s
epoch 14 | loss: 0.01801 | val_0_val_loss: 0.01816 |  0:00:27s
epoch 15 | loss: 0.01785 | val_0_val_loss: 0.0184  |  0:00:29s
epoch 16 | loss: 0.01774 | val_0_val_loss: 0.01832 |  0

epoch 3  | loss: 0.02138 | val_0_val_loss: 0.0208  |  0:00:07s
epoch 4  | loss: 0.02092 | val_0_val_loss: 0.02051 |  0:00:08s
epoch 5  | loss: 0.02065 | val_0_val_loss: 0.0203  |  0:00:10s
epoch 6  | loss: 0.02027 | val_0_val_loss: 0.02012 |  0:00:12s
epoch 7  | loss: 0.02004 | val_0_val_loss: 0.01979 |  0:00:13s
epoch 8  | loss: 0.01967 | val_0_val_loss: 0.0196  |  0:00:15s
epoch 9  | loss: 0.01936 | val_0_val_loss: 0.01923 |  0:00:17s
epoch 10 | loss: 0.01894 | val_0_val_loss: 0.01956 |  0:00:19s
epoch 11 | loss: 0.01868 | val_0_val_loss: 0.01999 |  0:00:20s
epoch 12 | loss: 0.01834 | val_0_val_loss: 0.01867 |  0:00:22s
epoch 13 | loss: 0.01809 | val_0_val_loss: 0.01871 |  0:00:24s
epoch 14 | loss: 0.01787 | val_0_val_loss: 0.01941 |  0:00:26s
epoch 15 | loss: 0.01775 | val_0_val_loss: 0.02055 |  0:00:28s
epoch 16 | loss: 0.01753 | val_0_val_loss: 0.01867 |  0:00:29s
epoch 17 | loss: 0.01741 | val_0_val_loss: 0.0179  |  0:00:31s
epoch 18 | loss: 0.01728 | val_0_val_loss: 0.01798 |  0

epoch 53 | loss: 0.01602 | val_0_val_loss: 0.01753 |  0:01:33s
epoch 54 | loss: 0.01617 | val_0_val_loss: 0.01701 |  0:01:35s
epoch 55 | loss: 0.01595 | val_0_val_loss: 0.01697 |  0:01:37s
epoch 56 | loss: 0.01586 | val_0_val_loss: 0.01733 |  0:01:39s
epoch 57 | loss: 0.01588 | val_0_val_loss: 0.01696 |  0:01:41s
epoch 58 | loss: 0.01584 | val_0_val_loss: 0.01701 |  0:01:42s
epoch 59 | loss: 0.01583 | val_0_val_loss: 0.01675 |  0:01:44s
epoch 60 | loss: 0.01588 | val_0_val_loss: 0.01714 |  0:01:46s
epoch 61 | loss: 0.0159  | val_0_val_loss: 0.01677 |  0:01:48s
epoch 62 | loss: 0.01589 | val_0_val_loss: 0.01752 |  0:01:49s
epoch 63 | loss: 0.01585 | val_0_val_loss: 0.01754 |  0:01:51s
epoch 64 | loss: 0.0159  | val_0_val_loss: 0.01726 |  0:01:53s
epoch 65 | loss: 0.016   | val_0_val_loss: 0.01753 |  0:01:55s
epoch 66 | loss: 0.0158  | val_0_val_loss: 0.01684 |  0:01:57s
epoch 67 | loss: 0.01582 | val_0_val_loss: 0.01712 |  0:01:58s
epoch 68 | loss: 0.01586 | val_0_val_loss: 0.01689 |  0

Model Blending

In [43]:
class Blender(nn.Module):
    def __init__(self, **kwargs):
            super().__init__()
            self.scaler = torch.nn.parameter.Parameter(torch.rand(size = (NB_SPLITS, lable_size)), requires_grad=True)
            self.sig = nn.Sigmoid()
            
    def forward(self, x):
    
        placeholder = torch.zeros(x.shape[0], lable_size).to(device)
        
        for i in range(NB_SPLITS):
            placeholder += 0.2*self.scaler[i]*torch.tensor(models["model"+str(i)].predict(x.cpu().detach().numpy()), requires_grad=False).to(device)
            
        pred = placeholder
        return pred

In [44]:
blender = Blender().to(device)
optimizer_blender = optim.Adam(blender.parameters(), lr=0.00001)

In [45]:
train_loader = torch.utils.data.DataLoader(
    dataset, batch_size=blender_batch_size, shuffle=True, pin_memory=True, drop_last=True)

test_loader = torch.utils.data.DataLoader(
    validationset, batch_size=blender_batch_size, shuffle=True, pin_memory=True, drop_last=True)

In [47]:
epochs = 20
epoch_list = []
val_list = []
for epoch in range(epochs):    
    for x, y in train_loader:
        
        x = x.to(device)
        y = y.to(device)
        optimizer_blender.zero_grad()
        
        outputs = blender.forward(x.float())
        # compute training reconstruction loss
        train_loss = nn.BCEWithLogitsLoss()(outputs, y)

        # compute accumulated gradients
        train_loss.backward()

        # perform parameter update based on current gradients
        optimizer_blender.step()
 
        train_loss = train_loss.cpu().detach().numpy()
        val_list.append(train_loss)
        torch.save(blender.state_dict(), "./saves/blend"+str(epoch))
        epoch_list.append(epoch)
        print("epoch : {}/{}, loss = {:.4f}".format(epoch + 1, epochs, train_loss))

epoch : 1/20, loss = 0.0181
epoch : 2/20, loss = 0.0178
epoch : 3/20, loss = 0.0175
epoch : 4/20, loss = 0.0178
epoch : 5/20, loss = 0.0185
epoch : 6/20, loss = 0.0173
epoch : 7/20, loss = 0.0179
epoch : 8/20, loss = 0.0171
epoch : 9/20, loss = 0.0173
epoch : 10/20, loss = 0.0179
epoch : 11/20, loss = 0.0172
epoch : 12/20, loss = 0.0179
epoch : 13/20, loss = 0.0167
epoch : 14/20, loss = 0.0168
epoch : 15/20, loss = 0.0177
epoch : 16/20, loss = 0.0156
epoch : 17/20, loss = 0.0127
epoch : 18/20, loss = 0.0170
epoch : 19/20, loss = 0.0165
epoch : 20/20, loss = 0.0181


In [48]:
best_epoch = np.argmin(val_list)
blender.load_state_dict(torch.load("./saves/blend"+str(best_epoch)))
blender.eval()

Blender(
  (sig): Sigmoid()
)

In [49]:
best_epoch

16

In [51]:
i = 1
pred_encode = np.empty(shape = (test.shape[0], lable_size))
for x in pred_loader:
    x = x.to(device)
    outputs = blender.forward(x)
    pred_encode[((i-1)*(outputs.shape[0])):(i*(outputs.shape[0])), ::] = 1 / (1 + np.exp(-outputs.cpu().detach().numpy()))
    i += 1    

In [54]:
# take a copy of all our training sig_id s for reference
test_sig_ids = test_features['sig_id'].copy()

# select all indices when 'cp_type' is 'ctl_vehicle'
test_ctl_vehicle_idx = (test_features['cp_type'] == 'ctl_vehicle')

# change all cp_type == ctl_vehicle predictions to zero
pred_encode[test_sig_ids[test_ctl_vehicle_idx].index.values] = 0
test_submission = pd.DataFrame({'sig_id' : test_sig_ids})
test_preds_df = pd.DataFrame(pred_encode, columns=train_targets_scored.columns[1:])
test_submission = pd.concat([test_submission, test_preds_df], axis=1)
test_submission.head(3)

Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,...,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_0004d9e33,0.002052,0.010072,0.00331,0.005938,0.015749,0.018587,0.00778,0.002433,5.3e-05,...,0.000528,0.003955,0.006279,0.001776,0.007222,0.000584,0.000751,0.001802,0.002585,0.001044
1,id_001897cda,0.001512,0.00664,0.003262,0.00824,0.026643,0.016565,0.005519,0.002926,2.8e-05,...,0.000518,0.003208,0.004323,0.001339,0.009365,0.000454,0.00207,0.002292,0.001957,0.001105
2,id_002429b5b,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
test_submission.to_csv('submission.csv', index=False)