In [2]:
!cp /content/drive/"My Drive"/kaggle/moa/lish-moa.zip /content/

In [3]:
!unzip lish-moa.zip

Archive:  lish-moa.zip
  inflating: sample_submission.csv   
  inflating: test_features.csv       
  inflating: train_features.csv      
  inflating: train_targets_nonscored.csv  
  inflating: train_targets_scored.csv  


In [4]:
!pip install pip install iterative-stratification

Collecting install
  Downloading https://files.pythonhosted.org/packages/41/cf/e3e6b4d494051c07261cae8c403f0f0d0cedad43d980e5255f2c88fd5edf/install-1.3.3-py3-none-any.whl
Collecting iterative-stratification
  Downloading https://files.pythonhosted.org/packages/9d/79/9ba64c8c07b07b8b45d80725b2ebd7b7884701c1da34f70d4749f7b45f9a/iterative_stratification-0.1.6-py3-none-any.whl
Installing collected packages: install, iterative-stratification
Successfully installed install-1.3.3 iterative-stratification-0.1.6


In [5]:
from sklearn.preprocessing import MinMaxScaler
        
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.dataset import random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold
from imblearn.over_sampling import RandomOverSampler


import numpy as np 
import pandas as pd 
from IPython.display import clear_output
import matplotlib.pyplot as plt
from scipy.ndimage.filters import gaussian_filter1d   ## smoother
from tqdm.notebook import tqdm, tnrange
import random
import os

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

plt.rcParams['figure.figsize'] = 15, 7

CGREEN  = '\33[32m'
CBLUE =  '\033[34m'
CRED = '\033[1;31m'
CEND  = '\33[0m'

def seed_everything(seed=1903):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)



In [6]:
if torch.cuda.is_available():
    device='cuda'
else:
    device='cpu'
    
device


'cuda'

In [7]:
train_features = pd.read_csv('train_features.csv')
train_targets = pd.read_csv('train_targets_scored.csv')
test_features = pd.read_csv('test_features.csv')

In [8]:
def preprocess(df):
    df = df.copy()
    df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
    df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    return df

train = preprocess(train_features)
test = preprocess(test_features)

del train_targets['sig_id']

target = train_targets.loc[train['cp_type']==0].reset_index(drop=True)
train = train.loc[train['cp_type']==0].reset_index(drop=True)

In [9]:
top_features = [  1,   2,   3,   4,   5,   6,   7,   9,  11,  14,  15,  16,  17,
        18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  29,  30,  31,
        32,  33,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  46,
        47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  58,  59,  60,
        61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,
        74,  75,  76,  78,  79,  80,  81,  82,  83,  84,  86,  87,  88,
        89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101,
       102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114,
       115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126, 127, 128,
       129, 130, 131, 132, 133, 136, 137, 138, 139, 140, 141, 142, 143,
       144, 145, 146, 147, 149, 150, 151, 152, 153, 154, 155, 156, 157,
       158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170,
       171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183,
       184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 197,
       198, 199, 200, 202, 203, 204, 205, 206, 208, 209, 210, 211, 212,
       213, 214, 215, 216, 217, 218, 219, 220, 221, 223, 224, 225, 226,
       227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
       240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,
       254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266,
       267, 268, 269, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280,
       281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 294,
       295, 296, 298, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309,
       310, 311, 312, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323,
       324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336,
       337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349,
       350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362,
       363, 364, 365, 366, 367, 368, 369, 370, 371, 374, 375, 376, 377,
       378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 390, 391,
       392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
       405, 406, 407, 408, 409, 411, 412, 413, 414, 415, 416, 417, 418,
       419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431,
       432, 434, 435, 436, 437, 438, 439, 440, 442, 443, 444, 445, 446,
       447, 448, 449, 450, 453, 454, 456, 457, 458, 459, 460, 461, 462,
       463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475,
       476, 477, 478, 479, 481, 482, 483, 484, 485, 486, 487, 488, 489,
       490, 491, 492, 493, 494, 495, 496, 498, 500, 501, 502, 503, 505,
       506, 507, 509, 510, 511, 512, 513, 514, 515, 518, 519, 520, 521,
       522, 523, 524, 525, 526, 527, 528, 530, 531, 532, 534, 535, 536,
       538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 549, 550, 551,
       552, 554, 557, 559, 560, 561, 562, 565, 566, 567, 568, 569, 570,
       571, 572, 573, 574, 575, 577, 578, 580, 581, 582, 583, 584, 585,
       586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 599,
       600, 601, 602, 606, 607, 608, 609, 611, 612, 613, 615, 616, 617,
       618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630,
       631, 632, 633, 634, 635, 636, 637, 638, 639, 641, 642, 643, 644,
       645, 646, 647, 648, 649, 650, 651, 652, 654, 655, 656, 658, 659,
       660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672,
       673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685,
       686, 687, 688, 689, 691, 692, 693, 694, 695, 696, 697, 699, 700,
       701, 702, 704, 705, 707, 708, 709, 710, 711, 713, 714, 716, 717,
       718, 720, 721, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732,
       733, 734, 735, 737, 738, 739, 740, 742, 743, 744, 745, 746, 747,
       748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 759, 760, 761,
       762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774,
       775, 776, 777, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788,
       789, 790, 792, 793, 794, 795, 796, 797, 798, 800, 801, 802, 803,
       804, 805, 806, 808, 809, 811, 813, 814, 815, 816, 817, 818, 819,
       821, 822, 823, 825, 826, 827, 828, 829, 830, 831, 832, 834, 835,
       837, 838, 839, 840, 841, 842, 845, 846, 847, 848, 850, 851, 852,
       854, 855, 856, 858, 859, 860, 861, 862, 864, 866, 867, 868, 869,
       870, 871, 872, 873, 874]



In [10]:
all_columns = train.columns
train=train[all_columns[top_features]]
test = test[all_columns[top_features]]

In [11]:
train.shape, test.shape

((21948, 785), (3982, 785))

In [12]:
train = train.values
target = target.values
test = test.values

In [14]:
class TrainDataset(Dataset):
    def __init__(self, train,targets, noise ):
        
        self.features  = train
        self.targets = targets
        self.noise = noise
        
    def sizes(self):
        print("features size = ", self.features.shape[1])
        print("targets size = ", self.targets.shape[1])

        
    def __len__(self):
        return self.features.shape[0]

    def __getitem__(self, idx):
        feature = torch.tensor(self.features[idx]).float()
            
        target = torch.tensor(self.targets[idx]).float()
        
        return feature, target


In [15]:
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']
        
def show_lr(learning_rates):
    plt.plot(learning_rates, label = "learning rate")
    plt.ylabel("Learning rate", fontsize = 15)
    plt.grid(True)
    plt.legend()
    plt.show()

def train_step(x, y, model, optimizer, criterion):
    optimizer.zero_grad()
    pred = model(x.to(device))
    y = y.float()
    loss = criterion(pred,y.to(device))
    loss.backward()
    optimizer.step()
    return loss.item()

In [16]:

"""
[I 2020-09-26 23:26:27,234] Trial 77 finished with value: 0.015594600699841976 
and parameters: {
    'num_layer': 3, 
    'hidden_size': 2076, 
    'dropout': 0.5145663015913359, 
    'learning_rate': 0.0037416442804666648
}. t
Best is trial 77 with value: 0.015594600699841976.
"""


"\n[I 2020-09-26 23:26:27,234] Trial 77 finished with value: 0.015594600699841976 \nand parameters: {\n    'num_layer': 3, \n    'hidden_size': 2076, \n    'dropout': 0.5145663015913359, \n    'learning_rate': 0.0037416442804666648\n}. t\nBest is trial 77 with value: 0.015594600699841976.\n"

In [17]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(785)
        self.dropout1 = nn.Dropout(0.2)
        self.dense1 = nn.utils.weight_norm(nn.Linear(785, 2048))
        
        self.batch_norm2 = nn.BatchNorm1d(2048)
        self.dropout2 = nn.Dropout(0.5)
        self.dense2 = nn.utils.weight_norm(nn.Linear(2048, 1048))
        
        self.batch_norm3 = nn.BatchNorm1d(1048)
        self.dropout3 = nn.Dropout(0.5)
        self.dense3 = nn.utils.weight_norm(nn.Linear(1048, 206))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = F.relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)
        
        return x



In [82]:

def train_one_fold(model,num_epochs , train_loader,val_loader, optimizer, scheduler, criterion, fold_number = 1, show_plots = False, train = True, validate = True):
    
    losses = []
    val_losses = []
    learning_rates = []    
    best_loss = 1000000

    for epoch in range(num_epochs):

            
        if train == True:
            model.train()
            losses_temp = []
            for batch in train_loader:
                (x_batch, y_batch) = batch
                loss = train_step(x_batch.to(device), y_batch.to(device), model, optimizer, criterion)
                losses_temp.append(loss)
            losses.append(torch.mean(torch.tensor(losses_temp)))
            scheduler.step(1.)   ## lr decay caller 
            learning_rates.append(get_lr(optimizer))
            

        if validate == True:
            with torch.no_grad():
                model.eval()
                val_losses_temp = []
                for x_val, y_val in val_loader:
                    yhat =model(x_val.to(device))  # pred 
                    val_loss = criterion(yhat.to(device), y_val.to(device))
                    val_losses_temp.append(val_loss.item())  ## metrics 
                val_losses.append(torch.mean(torch.tensor(val_losses_temp)).item())  ## metrics 

        
        if train == True:
            print ("epoch ", epoch+1, " out of ", num_epochs, end = "      >" )

            if val_losses[-1] <= best_loss:

                print(CGREEN, "Val loss decreased from:", best_loss, " to ", val_losses[-1], CEND, end = "   >")
                best_loss = val_losses[-1]
                name = "./model_" + str(fold_number)+".pth"
                print("saving model as: ", name)
                torch.save(model.state_dict(), name)

            else: 
                print("showing no improvements, best loss yet:", best_loss)

        if show_plots == True:

            show_lr(learning_rates)
            plt.plot(val_losses, label = "val")
            plt.axhline(min(val_losses), linestyle = "--", c = "r")
            plt.legend()
            plt.grid()
            plt.show()


            plt.plot(val_losses[4:], label = "val after main drop", c = "g")
            plt.axhline(min(val_losses), linestyle = "--", c = "r")
            plt.legend()
            plt.grid()
            plt.show()


            plt.plot(losses, label = "train")
            plt.legend()
            plt.grid()
            plt.show()
    if train == True:
        return losses, val_losses, name 
    else:
        return losses, val_losses

In [53]:


def train_model(num_folds, num_epochs, batch_size, lr =  0.004299882049752947, save_code = 0, ensemble = False, ensemble_model_paths = [] ):

    mskf = MultilabelStratifiedKFold(n_splits=num_folds, shuffle=True, random_state=0)

    fold_val_losses = list()
    filenames = []


    for k , (train_idx,valid_idx) in enumerate(mskf.split(train,target)):

        x_train,x_valid,y_train,y_valid = train[train_idx,:],train[valid_idx,:],target[train_idx,:],target[valid_idx,:]

        input_size = x_train.shape[1]
        output_size = target.shape[1]
        
        train_dataset = TrainDataset(x_train, y_train, noise = False)
        valid_dataset = TrainDataset(x_valid, y_valid, noise = False)
        
        train_loader = DataLoader(dataset=train_dataset, batch_size= batch_size, shuffle=True)

        val_loader = DataLoader(dataset=valid_dataset, batch_size=256, shuffle = True)
        
        if ensemble == False:
            model = Model()
        else:
            model = MyEnsemble(ensemble_model_paths, device)

        model = model.to(device)

        optimizer = optim.Adam(model.parameters(), lr = lr , weight_decay=1e-5)

        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                        mode='min', 
                                                        factor=0.1, ## wooo hoo
                                                        patience=7, ## was 3 for 158 
                                                        eps=1e-4, 
                                                        verbose=True)
        criterion = nn.BCEWithLogitsLoss()

        if k > 1:
            print(CRED ,"fold ", str(k+1), "  :: mean loss on all folds: ", np.array([min(l) for l in fold_val_losses]).mean(), CEND)
    

        losses, val_losses, filename = train_one_fold(model, num_epochs , train_loader,val_loader, optimizer, scheduler, criterion, fold_number = k+ save_code+1)

        fold_val_losses.append(val_losses)
        filenames.append(filename)
    print(CBLUE, "Training complete", CEND)

    return fold_val_losses, filenames

In [68]:
individual_losses , filenames = train_model(
                                    num_folds = 7,
                                    num_epochs = 45,
                                    batch_size = 128,
                                    save_code = 0
                                )

epoch  1  out of  45      >[32m Val loss decreased from: 1000000  to  0.020819269120693207 [0m   >saving model as:  ./model_1.pth
epoch  2  out of  45      >[32m Val loss decreased from: 0.020819269120693207  to  0.019304858520627022 [0m   >saving model as:  ./model_1.pth
epoch  3  out of  45      >[32m Val loss decreased from: 0.019304858520627022  to  0.01851486973464489 [0m   >saving model as:  ./model_1.pth
epoch  4  out of  45      >[32m Val loss decreased from: 0.01851486973464489  to  0.01765020377933979 [0m   >saving model as:  ./model_1.pth
epoch  5  out of  45      >[32m Val loss decreased from: 0.01765020377933979  to  0.017595935612916946 [0m   >saving model as:  ./model_1.pth
epoch  6  out of  45      >[32m Val loss decreased from: 0.017595935612916946  to  0.017429543659090996 [0m   >saving model as:  ./model_1.pth
epoch  7  out of  45      >showing no improvements, best loss yet: 0.017429543659090996
epoch  8  out of  45      >[32m Val loss decreased from: 0

In [103]:
class MyEnsemble(nn.Module):
    def __init__(self, model_list, device):
        super(MyEnsemble, self).__init__()
        
        print("loading models...")
        self.model_filenames = model_list
        self.model_list = [Model() for m in range(len(self.model_filenames))]

        for i in range(len(self.model_filenames)):
            self.model_list[i].load_state_dict(torch.load(self.model_filenames[i]))
            self.model_list[i].to(device)
            self.model_list[i].eval()

        self.fc1 = nn.Linear(206*len(self.model_list), 2024)
        self.fc2 = nn.Linear(2024, 1024)
        self.fc3 = nn.Linear(1024, 206)
        print("ensemble initialised with " , len(self.model_list), " models")

    def forward(self, x):
        
        x_list = [self.model_list[i](x) for i in range(len(self.model_list))]
        joined =  torch.cat(tuple(x_list), dim=1)
        x = F.relu(self.fc1(joined))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x



In [70]:
filenames

['./model_1.pth',
 './model_2.pth',
 './model_3.pth',
 './model_4.pth',
 './model_5.pth',
 './model_6.pth',
 './model_7.pth']

In [None]:
en_individual_losses , en_filenames = train_model(
                                    num_folds = 7,
                                    num_epochs = 75,
                                    batch_size = 128,
                                    save_code = 100,
                                    ensemble = True,
                                    ensemble_model_paths = filenames,
                                    lr = 1e-3
                                )

loading models...
ensemble initialised with  7  models
epoch  1  out of  75      >[32m Val loss decreased from: 1000000  to  0.0197810847312212 [0m   >saving model as:  ./model_101.pth
epoch  2  out of  75      >[32m Val loss decreased from: 0.0197810847312212  to  0.017576148733496666 [0m   >saving model as:  ./model_101.pth
epoch  3  out of  75      >[32m Val loss decreased from: 0.017576148733496666  to  0.016240771859884262 [0m   >saving model as:  ./model_101.pth
epoch  4  out of  75      >[32m Val loss decreased from: 0.016240771859884262  to  0.015310490503907204 [0m   >saving model as:  ./model_101.pth
epoch  5  out of  75      >[32m Val loss decreased from: 0.015310490503907204  to  0.014800924807786942 [0m   >saving model as:  ./model_101.pth
epoch  6  out of  75      >[32m Val loss decreased from: 0.014800924807786942  to  0.014158079400658607 [0m   >saving model as:  ./model_101.pth
epoch  7  out of  75      >showing no improvements, best loss yet: 0.01415807940

In [72]:
!cp model_100.pth /content/drive/"My Drive"/kaggle/moa
!cp model_101.pth /content/drive/"My Drive"/kaggle/moa
!cp model_102.pth /content/drive/"My Drive"/kaggle/moa
!cp model_103.pth /content/drive/"My Drive"/kaggle/moa
!cp model_104.pth /content/drive/"My Drive"/kaggle/moa
!cp model_105.pth /content/drive/"My Drive"/kaggle/moa
!cp model_106.pth /content/drive/"My Drive"/kaggle/moa
!cp model_107.pth /content/drive/"My Drive"/kaggle/moa
!cp model_108.pth /content/drive/"My Drive"/kaggle/moa
!cp model_109.pth /content/drive/"My Drive"/kaggle/moa


In [1]:
# !cp /content/drive/"My Drive"/kaggle/moa/model_100.pth /content/
# !cp /content/drive/"My Drive"/kaggle/moa/model_101.pth /content/
# !cp /content/drive/"My Drive"/kaggle/moa/model_102.pth /content/
# !cp /content/drive/"My Drive"/kaggle/moa/model_103.pth /content/
# !cp /content/drive/"My Drive"/kaggle/moa/model_104.pth /content/
# !cp /content/drive/"My Drive"/kaggle/moa/model_105.pth /content/
# !cp /content/drive/"My Drive"/kaggle/moa/model_106.pth /content/
# !cp /content/drive/"My Drive"/kaggle/moa/model_107.pth /content/
# !cp /content/drive/"My Drive"/kaggle/moa/model_108.pth /content/
# !cp /content/drive/"My Drive"/kaggle/moa/model_109.pth /content/

In [76]:
filenames

['./model_1.pth',
 './model_2.pth',
 './model_3.pth',
 './model_4.pth',
 './model_5.pth',
 './model_6.pth',
 './model_7.pth']

In [104]:
all_models = [MyEnsemble(filenames, device) for i in range (5)]

for i in range (len(all_models)):
    
    name = "./model_" + str(100 + i + 1) + ".pth"
    all_models[i].load_state_dict(torch.load(name))
    all_models[i].to(device)
    all_models[i].eval()
    print("Loaded: ", name)


loading models...
ensemble initialised with  7  models
loading models...
ensemble initialised with  7  models
loading models...
ensemble initialised with  7  models
loading models...
ensemble initialised with  7  models
loading models...
ensemble initialised with  7  models
Loaded:  ./model_101.pth
Loaded:  ./model_102.pth
Loaded:  ./model_103.pth
Loaded:  ./model_104.pth
Loaded:  ./model_105.pth


In [105]:
all_val_losses = []
for i in range(len(all_models)):
    print(i)
    losses, val_losses = train_one_fold(all_models[i],1 , train_loader,val_loader, optimizer, scheduler, criterion, fold_number = 0, train = False, validate = True)
    all_val_losses.append(np.mean(np.array(val_losses)))
all_val_losses = np.array(all_val_losses)
print("done validating")

0
1
2
3
4
done validating


In [95]:
all_val_losses

array([0.01221509, 0.01267824, 0.01224699, 0.01227677, 0.01283966])

In [106]:
class model_jury(object):   ## only works for dataloaders for batch size 1 
    def __init__(self, all_models):
        self.all_models = all_models
             
    def predict(self, x, plot = False, sigmoid = False):
        
        with torch.no_grad():
            
            if sigmoid == False:
                preds = [self.all_models[i](x.to(device)).view(-1).cpu().tolist() for i in range(len(self.all_models))]
            else:
                preds = [self.all_models[i](x.to(device)).view(-1).cpu().sigmoid().tolist() for i in range(len(self.all_models))]

        if plot == True:
            for pred in preds:
                plt.plot(pred)
            plt.show()
            
        preds = np.array(preds)
        mean = np.mean(preds, axis = 0)
        return mean.flatten()


In [107]:
jury = model_jury(all_models)
"Using " + str(len(jury.all_models)) + "  models"

'Using 5  models'

In [108]:
test_dataset = TrainDataset(test, target, noise = False)
test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)
val_loader_test_jury = DataLoader(dataset= valid_dataset, batch_size=1, shuffle=False)

In [109]:
list_of_preds = []
for batch in tqdm(test_loader):
    x, y = batch
    foo = jury.predict(x, plot = False, sigmoid = True)
    list_of_preds.append(foo)


HBox(children=(FloatProgress(value=0.0, max=3982.0), HTML(value='')))




In [110]:
submission = pd.read_csv('sample_submission.csv')
sub_cp = submission
sub_cp.to_csv('./submission_cp.csv', index=None, header=True)

import csv 
a = list_of_preds  
with open('./submission_cp.csv', "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(a)

final_sub = pd.read_csv('./submission_cp.csv', header = None)

final_sub.columns = submission.columns[1:]
final_sub["sig_id"] = submission["sig_id"]

good_cols = np.roll(final_sub.columns.values, 1)
final_sub = final_sub[good_cols]

In [111]:
targets = [col for col in final_sub.columns]
final_sub.loc[test_features['cp_type']=='ctl_vehicle', targets[1:]] = 0
final_sub.to_csv('submission.csv', index=False)

In [113]:
final_sub

Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,adrenergic_receptor_agonist,adrenergic_receptor_antagonist,akt_inhibitor,aldehyde_dehydrogenase_inhibitor,alk_inhibitor,ampk_activator,analgesic,androgen_receptor_agonist,androgen_receptor_antagonist,anesthetic_-_local,angiogenesis_inhibitor,angiotensin_receptor_antagonist,anti-inflammatory,antiarrhythmic,antibiotic,anticonvulsant,antifungal,antihistamine,antimalarial,antioxidant,antiprotozoal,antiviral,apoptosis_stimulant,aromatase_inhibitor,atm_kinase_inhibitor,atp-sensitive_potassium_channel_antagonist,atp_synthase_inhibitor,atpase_inhibitor,atr_kinase_inhibitor,aurora_kinase_inhibitor,...,protein_synthesis_inhibitor,protein_tyrosine_kinase_inhibitor,radiopaque_medium,raf_inhibitor,ras_gtpase_inhibitor,retinoid_receptor_agonist,retinoid_receptor_antagonist,rho_associated_kinase_inhibitor,ribonucleoside_reductase_inhibitor,rna_polymerase_inhibitor,serotonin_receptor_agonist,serotonin_receptor_antagonist,serotonin_reuptake_inhibitor,sigma_receptor_agonist,sigma_receptor_antagonist,smoothened_receptor_antagonist,sodium_channel_inhibitor,sphingosine_receptor_agonist,src_inhibitor,steroid,syk_inhibitor,tachykinin_antagonist,tgf-beta_receptor_inhibitor,thrombin_inhibitor,thymidylate_synthase_inhibitor,tlr_agonist,tlr_antagonist,tnf_inhibitor,topoisomerase_inhibitor,transient_receptor_potential_channel_antagonist,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_0004d9e33,1.062902e-05,0.000042,0.001110,0.018909,0.030439,0.005953,0.000219,0.003617,1.160462e-08,0.006177,0.008534,0.000030,2.885769e-07,0.000004,0.001898,5.044409e-04,0.000212,0.005188,0.008425,0.001180,0.000625,0.003545,0.000033,0.000367,0.000020,0.000005,0.001373,0.000292,0.003815,0.000459,0.000094,0.004068,0.004161,1.112736e-07,0.000001,1.415943e-07,0.002361,1.109351e-07,1.197417e-06,...,0.000754,0.000075,0.004941,1.039965e-06,0.000114,3.115015e-05,0.000072,0.000021,0.000143,0.001288,0.004494,0.001835,0.006124,0.003520,0.000125,0.000156,0.054740,0.004302,0.000010,2.165041e-05,0.000002,0.000188,1.240706e-08,0.000072,7.635799e-05,0.000622,0.000067,0.001906,0.000017,0.000051,0.000177,0.000514,0.000957,2.396159e-04,0.000026,0.000045,0.000016,0.000349,7.569705e-04,0.000079
1,id_001897cda,2.260456e-07,0.000045,0.000749,0.000532,0.000036,0.000350,0.002305,0.008590,1.545862e-02,0.005523,0.004852,0.000157,7.033517e-08,0.021664,0.000035,2.905526e-07,0.000007,0.000198,0.000316,0.003985,0.000237,0.000109,0.000006,0.000032,0.000016,0.000083,0.000006,0.000001,0.000197,0.000103,0.000031,0.000184,0.000090,1.451287e-04,0.000002,4.654794e-08,0.001973,8.279389e-03,2.352935e-04,...,0.000254,0.000194,0.000003,7.278010e-07,0.001323,9.111682e-06,0.000403,0.050590,0.000018,0.001017,0.006411,0.000098,0.000009,0.000023,0.000103,0.004387,0.003737,0.000011,0.028157,8.230975e-07,0.000141,0.002597,6.307424e-05,0.000029,5.678565e-08,0.000058,0.000012,0.000505,0.000176,0.001239,0.000231,0.000006,0.003396,7.315809e-07,0.007415,0.000002,0.002013,0.000078,7.318613e-06,0.002245
2,id_002429b5b,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,...,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000
3,id_00276f245,5.039742e-06,0.000008,0.000439,0.013312,0.019374,0.001753,0.001140,0.006905,5.773765e-08,0.002577,0.019274,0.000278,1.405586e-06,0.000303,0.001265,2.383033e-03,0.000512,0.003092,0.001560,0.001367,0.002063,0.001486,0.000348,0.001784,0.000315,0.000029,0.001416,0.002848,0.006056,0.001161,0.001185,0.000570,0.001282,1.839997e-04,0.000187,2.251017e-07,0.000432,9.487835e-07,4.553504e-06,...,0.001005,0.000486,0.001260,9.466257e-06,0.000228,3.879312e-07,0.000445,0.000009,0.000437,0.000099,0.019126,0.052328,0.000734,0.000272,0.005107,0.000780,0.005728,0.001340,0.000347,3.804671e-04,0.000008,0.007915,8.596392e-08,0.000413,2.846896e-04,0.002335,0.000070,0.000078,0.000001,0.000438,0.000037,0.000087,0.000452,8.748351e-04,0.006550,0.000023,0.000087,0.002722,2.169942e-05,0.001038
4,id_0027f1083,1.390811e-04,0.000036,0.005404,0.032306,0.033079,0.004518,0.003484,0.000748,2.356906e-07,0.006107,0.012963,0.000188,3.359609e-08,0.000041,0.003583,1.936807e-03,0.002033,0.003428,0.000459,0.000752,0.004104,0.004884,0.000099,0.006739,0.000170,0.000017,0.000283,0.000111,0.004921,0.003509,0.000991,0.009773,0.003578,1.457312e-07,0.000004,2.512226e-07,0.000606,1.813239e-06,5.262109e-07,...,0.006769,0.000700,0.007224,1.768752e-03,0.000013,4.041204e-06,0.001240,0.000397,0.001642,0.001359,0.009353,0.003890,0.000220,0.000537,0.000270,0.001413,0.024483,0.002872,0.000091,5.411162e-03,0.000198,0.001166,8.198553e-07,0.000053,4.900316e-04,0.001883,0.000075,0.000032,0.000361,0.000131,0.000911,0.000020,0.008252,7.665242e-04,0.000164,0.000023,0.000054,0.003331,1.829346e-06,0.000269
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3977,id_ff7004b87,4.496332e-08,0.000093,0.001364,0.001956,0.000749,0.000327,0.000097,0.004427,4.630411e-07,0.000540,0.002443,0.000398,6.856277e-04,0.018061,0.000554,2.452804e-05,0.000223,0.001142,0.002220,0.000975,0.000645,0.000215,0.000548,0.000331,0.000190,0.001012,0.000843,0.000975,0.001678,0.000020,0.000085,0.001575,0.000023,3.303657e-04,0.000014,3.039603e-06,0.004623,2.765309e-03,5.504805e-04,...,0.004926,0.000191,0.000029,8.996179e-07,0.000085,1.649611e-05,0.000116,0.000029,0.000045,0.000066,0.007571,0.002244,0.000109,0.000021,0.001591,0.000094,0.006371,0.000062,0.005658,3.844468e-06,0.000052,0.000613,1.170486e-06,0.000074,8.700783e-06,0.000141,0.000002,0.000118,0.000005,0.001740,0.000065,0.008044,0.000430,1.696990e-02,0.003613,0.000529,0.000142,0.000468,3.231545e-05,0.000029
3978,id_ff925dd0d,1.369874e-02,0.002163,0.000094,0.006304,0.037603,0.019022,0.007613,0.001681,5.002112e-06,0.074441,0.047781,0.000077,1.575477e-08,0.000037,0.000109,6.535483e-05,0.001181,0.006271,0.004027,0.003050,0.000400,0.001442,0.000222,0.000244,0.001601,0.000047,0.000042,0.000023,0.001896,0.003481,0.001822,0.000238,0.008843,4.087827e-07,0.000009,7.275176e-08,0.001565,2.104502e-07,5.226795e-07,...,0.000451,0.001321,0.009043,1.589798e-06,0.000412,2.437357e-06,0.000050,0.001253,0.000073,0.001123,0.011488,0.032768,0.001873,0.006135,0.000572,0.000331,0.035377,0.000180,0.000295,1.491121e-05,0.000173,0.003610,6.385940e-07,0.002515,1.852057e-04,0.002225,0.000019,0.001310,0.000200,0.000475,0.000017,0.000007,0.001341,2.382031e-05,0.000375,0.000038,0.000310,0.000385,3.369216e-07,0.001181
3979,id_ffb710450,6.301934e-05,0.000030,0.000240,0.025946,0.037948,0.006070,0.002633,0.002967,2.572523e-08,0.012696,0.043949,0.000093,2.896414e-08,0.000034,0.000771,7.937054e-04,0.000470,0.003788,0.003312,0.001811,0.000867,0.003632,0.000143,0.000134,0.000252,0.000012,0.000395,0.000114,0.012567,0.002305,0.000836,0.000489,0.002105,2.850352e-06,0.000022,6.343789e-09,0.000202,3.855445e-08,9.024463e-07,...,0.000100,0.000103,0.007900,3.800623e-07,0.000227,6.437999e-07,0.000263,0.000072,0.000310,0.000250,0.015059,0.012817,0.001037,0.000863,0.000711,0.000192,0.021389,0.000996,0.000063,1.022252e-04,0.000002,0.004285,1.485096e-08,0.000882,1.326317e-04,0.003754,0.000038,0.000836,0.000003,0.000296,0.000035,0.000028,0.000586,2.673601e-04,0.000555,0.000007,0.000020,0.000663,2.085500e-05,0.000748
3980,id_ffbb869f2,1.884813e-03,0.000163,0.000223,0.025084,0.024609,0.011300,0.002004,0.004524,3.676797e-07,0.033525,0.019832,0.000126,4.353855e-09,0.000020,0.000483,5.424684e-04,0.000264,0.007971,0.002782,0.001608,0.001055,0.001487,0.000078,0.000342,0.001387,0.000008,0.000014,0.000134,0.006992,0.014400,0.000233,0.000639,0.013761,2.798842e-06,0.000025,9.780451e-08,0.001816,8.285455e-08,1.011582e-07,...,0.000918,0.001386,0.023272,2.648556e-06,0.000057,2.571455e-06,0.000088,0.000209,0.000152,0.000343,0.005656,0.004529,0.001210,0.001091,0.000343,0.000107,0.024251,0.000646,0.000042,1.987313e-04,0.000014,0.003675,6.409944e-08,0.000861,1.496265e-04,0.003109,0.000113,0.000201,0.000106,0.000351,0.000013,0.000003,0.001484,1.890469e-05,0.000436,0.000012,0.000457,0.000752,4.711344e-06,0.002471
