In [None]:
import time
import numpy as np 
import pandas as pd 
from IPython.display import clear_output
import matplotlib.pyplot as plt
from scipy.ndimage.filters import gaussian_filter1d   ## smoother
from tqdm.notebook import tqdm, tnrange

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
from sklearn.preprocessing import MinMaxScaler
        
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.dataset import random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import KFold


def seed_everything(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

In [None]:
if torch.cuda.is_available():
    device='cuda'
else:
    device='cpu'
    
device

In [None]:
train_features = pd.read_csv('../input/lish-moa/train_features.csv')
train_targets_scored = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
train_targets_nonscored = pd.read_csv('../input/lish-moa/train_targets_nonscored.csv')
test_features = pd.read_csv('../input/lish-moa/test_features.csv')
submission = pd.read_csv('../input/lish-moa/sample_submission.csv')

In [101]:
train_features[train_features.columns[775]]

0       -0.0224
1       -0.2718
2        0.7923
3       -0.0343
4       -0.6269
          ...  
23809    0.3806
23810   -0.4751
23811    0.3603
23812   -0.4741
23813    2.2190
Name: g-771, Length: 23814, dtype: float64

In [None]:
ignore_columns = ['sig_id', "cp_type"]

train_columns = [x for x in train_features.columns if x not in ignore_columns]

train = train_features[train_columns]
test = test_features[train_columns]
target = train_targets_scored.iloc[:,1:].values

In [None]:
transform = ColumnTransformer([
                            ('o',OneHotEncoder(),[0,1]),
                            ('s',Normalizer(),list(range(3,train.shape[1])))  ## remove
                        ])


train = transform.fit_transform(train)
test = transform.transform(test)

In [None]:
train.shape, test.shape, target.shape

In [102]:
class TrainDataset(Dataset):
    def __init__(self, train,targets, noise ):
        
        self.features  = train
        self.targets = targets
        self.noise = noise
        
    def sizes(self):
        print("features size = ", self.features.shape[1])
        print("targets size = ", self.targets.shape[1])

        
    def __len__(self):
        return self.features.shape[0]

    def __getitem__(self, idx):
        feature = torch.tensor(self.features[idx]).float()
        
#         if self.noise == True:
# #             print("noisy boi")
#             feature  = feature + torch.randn_like(feature)/150
            
        target = torch.tensor(self.targets[idx]).float()
        
        return feature, target

In [103]:
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']
        
def show_lr(learning_rates):
    plt.plot(learning_rates, label = "learning rate")
    plt.ylabel("Learning rate", fontsize = 15)
    plt.grid(True)
    plt.legend()
    plt.show()

def train_step(x, y, model, optimizer, criterion):
    optimizer.zero_grad()
    pred = model(x.to(device))
    y = y.float()
    loss = criterion(pred,y.to(device))
    loss.backward()
    optimizer.step()
    return loss.item()

In [177]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)
    elif classname.find('Linear') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
        

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        
        self.batch_norm1 = nn.BatchNorm1d(776)
        self.dropout1 = nn.Dropout(0.2)
        self.dense1 = nn.utils.weight_norm(nn.Linear(776, 1024))
        
        self.batch_norm2 = nn.BatchNorm1d(1024)
        self.dropout2 = nn.Dropout(0.5)
        self.dense2 = nn.utils.weight_norm(nn.Linear(1024, 2048))
        
        self.batch_norm3 = nn.BatchNorm1d(2048)
        self.dropout3 = nn.Dropout(0.3)
        self.dense3 = nn.utils.weight_norm(nn.Linear(2048, 2048))
        
        self.batch_norm4 = nn.BatchNorm1d(2048)
        self.dropout4 = nn.Dropout(0.2)
        self.dense4 = nn.utils.weight_norm(nn.Linear(2048, 1024))
        
        self.batch_norm5 = nn.BatchNorm1d(1024)
        #self.dropout5 = nn.Dropout(0.5)
        self.dense5 = nn.utils.weight_norm(nn.Linear(1024, 1024))
        
        #######################################################
        
        self.batch_norm21 = nn.BatchNorm1d(100)
        self.dropout21 = nn.Dropout(0.1)
        self.dense21 = nn.utils.weight_norm(nn.Linear(100, 1024))
        
        self.batch_norm22 = nn.BatchNorm1d(1024)
        self.dropout22 = nn.Dropout(0.1)
        self.dense22 = nn.utils.weight_norm(nn.Linear(1024, 256))
        
        #######################################################
        
        self.combined_fc1 = nn.Linear(256+1024, 1024)
        self.combined_fc2 = nn.Linear(1024, 512)
        self.combined_fc3 = nn.Linear(512, 206)
        
    
    def forward(self, x):
        
                
        x_up = x[:, :-100]
        x_down = x[:, -100:]
        
        #####################################
        
        x_up = self.batch_norm1(x_up)
        x_up = self.dropout1(x_up)
        x_up = F.leaky_relu(self.dense1(x_up))
        
        x_up = self.batch_norm2(x_up)
        x_up = self.dropout2(x_up)
        x_up = F.leaky_relu(self.dense2(x_up))
        
        x_up = self.batch_norm3(x_up)
        x_up = self.dropout3(x_up)
        x_up = F.leaky_relu(self.dense3(x_up))
        
        x_up = self.batch_norm4(x_up)
        x_up = self.dropout4(x_up)
        x_up = F.leaky_relu(self.dense4(x_up))
        
        x_up = self.batch_norm5(x_up)
        #x = self.dropout5(x)
        x_up = (self.dense5(x_up))
        
        ##########################################
        
        x_down = self.batch_norm21(x_down)
        x_down = self.dropout21(x_down)
        x_down = F.leaky_relu(self.dense21(x_down))
        
        x_down = self.batch_norm22(x_down)
        x_down = self.dropout22(x_down)
        x_down = F.leaky_relu(self.dense22(x_down))
        
        ###########################################
        
        x = torch.cat((x_up.squeeze() ,x_down.squeeze()), dim=1)
        #print(x.size())
        x = F.leaky_relu(self.combined_fc1(x))
        x = F.leaky_relu(self.combined_fc2(x))
        x = self.combined_fc3(x)
        
        
        return x
    
    
    
model = Model()

In [178]:
def train_one_fold(model,num_epochs , train_loader,val_loader, optimizer, scheduler, criterion, fold_number = 1, show_plots = False, train = True, validate = True):
    
    losses = []
    val_losses = []
    learning_rates = []    
    best_loss = 1000000

    for epoch in range(num_epochs):

            
        if train == True:
            model.train()
            losses_temp = []
            for batch in train_loader:
                (x_batch, y_batch) = batch
                loss = train_step(x_batch.to(device), y_batch.to(device), model, optimizer, criterion)
                losses_temp.append(loss)
            losses.append(torch.mean(torch.tensor(losses_temp)))
            scheduler.step(1.)   ## lr decay caller 
            learning_rates.append(get_lr(optimizer))
            

        if validate == True:
            with torch.no_grad():
                model.eval()
                val_losses_temp = []
                for x_val, y_val in val_loader:
                    yhat =model(x_val.to(device))  # pred 
                    val_loss = criterion(yhat.to(device), y_val.to(device))
                    val_losses_temp.append(val_loss.item())  ## metrics 
                val_losses.append(torch.mean(torch.tensor(val_losses_temp)).item())  ## metrics 

        
        if train == True:
            print ("epoch ", epoch+1, " out of ", num_epochs, end = "      >" )

            if val_losses[-1] <= best_loss:

                print(CGREEN, "Val loss decreased from:", best_loss, " to ", val_losses[-1], CEND, end = "   >")
                best_loss = val_losses[-1]

                name = "./model_" + str(fold_number)+".pth"

                print("saving model as: ", name)

                torch.save(model.state_dict(), name)

            else: 
                print("showing no improvements, best loss yet:", best_loss)

        if show_plots == True:

            show_lr(learning_rates)
            plt.plot(val_losses, label = "val")
            plt.axhline(min(val_losses), linestyle = "--", c = "r")
            plt.legend()
            plt.grid()
            plt.show()


            plt.plot(val_losses[4:], label = "val after main drop", c = "g")
            plt.axhline(min(val_losses), linestyle = "--", c = "r")
            plt.legend()
            plt.grid()
            plt.show()


            plt.plot(losses, label = "train")
            plt.legend()
            plt.grid()
            plt.show()
        
    return losses, val_losses

In [179]:
## building folds 

CGREEN  = '\33[32m'
CBLUE =  '\033[34m'
CRED = '\033[1;31m'
CEND  = '\33[0m'

NFOLDS =10
num_epochs = 40

kfold = KFold(NFOLDS,shuffle=True,random_state=42)
fold_train_losses = list()
fold_valid_losses = list()


for k , (train_idx,valid_idx) in enumerate(kfold.split(train)):

    x_train,x_valid,y_train,y_valid = train[train_idx,:],train[valid_idx,:],target[train_idx,:],target[valid_idx,:]

    input_size = x_train.shape[1]
    output_size = target.shape[1]
    
    
    train_dataset = TrainDataset(x_train, y_train, noise = False)
    valid_dataset = TrainDataset(x_valid, y_valid, noise = False)
    
    train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=False, num_workers = 8)

    val_loader = DataLoader(dataset=valid_dataset, batch_size=256, shuffle = True, num_workers = 8)
    
    model = Model()
    model = model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=0.1e-3)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                     mode='min', 
                                                     factor=0.5, 
                                                     patience=5, 
                                                     eps=1e-5, 
                                                     verbose=True)
    criterion = nn.BCEWithLogitsLoss()
    print(CRED ,"fold ", str(k+1), CEND)

    train_one_fold(model, num_epochs , train_loader,val_loader, optimizer, scheduler, criterion, fold_number = k+1)
    
print(CBLUE, "Training complete", CEND)

[1;31m fold  1 [0m
epoch  1  out of  40      >[32m Val loss decreased from: 1000000  to  0.019628141075372696 [0m   >saving model as:  ./model_1.pth
epoch  2  out of  40      >[32m Val loss decreased from: 0.019628141075372696  to  0.01875883713364601 [0m   >saving model as:  ./model_1.pth
epoch  3  out of  40      >[32m Val loss decreased from: 0.01875883713364601  to  0.017806218937039375 [0m   >saving model as:  ./model_1.pth
epoch  4  out of  40      >[32m Val loss decreased from: 0.017806218937039375  to  0.01737024262547493 [0m   >saving model as:  ./model_1.pth
epoch  5  out of  40      >[32m Val loss decreased from: 0.01737024262547493  to  0.016918351873755455 [0m   >saving model as:  ./model_1.pth
epoch  6  out of  40      >[32m Val loss decreased from: 0.016918351873755455  to  0.016733521595597267 [0m   >saving model as:  ./model_1.pth
Epoch     7: reducing learning rate of group 0 to 5.0000e-05.
epoch  7  out of  40      >[32m Val loss decreased from: 0.0167

KeyboardInterrupt: 

In [None]:
all_models = [Model() for i in range (NFOLDS)]

for i in range (len(all_models)):
    
    name = "./model_" + str(i + 1) + ".pth"
    all_models[i].load_state_dict(torch.load(name))
    all_models[i].to(device)
    print("Loaded: ", name)

In [None]:
all_val_losses = []
for i in range(NFOLDS):
    losses, val_losses = train_one_fold(all_models[i],5 , train_loader,val_loader, optimizer, scheduler, criterion, fold_number = 0, train = False, validate = True)
    all_val_losses.append(np.mean(np.array(val_losses)))
all_val_losses = np.array(all_val_losses)

In [None]:
class model_jury(object):   ## only works for dataloaders for batch size 1 
    def __init__(self, all_models):
        self.all_models = all_models
        
        
        
    def predict(self, x, plot = False, sigmoid = False):
        
        with torch.no_grad():
            
            if sigmoid == False:
                preds = [self.all_models[i](x.to(device)).view(-1).cpu().tolist() for i in range(len(self.all_models))]
            else:
                preds = [self.all_models[i](x.to(device)).view(-1).cpu().sigmoid().tolist() for i in range(len(self.all_models))]

        
        if plot == True:
            for pred in preds:
                plt.plot(pred)
            plt.show()
            
        preds = np.array(preds)
        mean = np.mean(preds, axis = 0)
        return mean.flatten()
jury = model_jury(all_models)


In [None]:
test_dataset = TrainDataset(test, target, noise = False)
test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, num_workers = 8)
val_loader_test_jury = DataLoader(dataset= valid_dataset, batch_size=1, shuffle=False, num_workers = 8)

In [None]:
with torch.no_grad():
    benchmark_losses = []
    criterion = nn.BCEWithLogitsLoss()
    for batch in tqdm(val_loader_test_jury):
        x, y = batch
        pred = jury.predict(x, plot = False, sigmoid = False)
        pred = torch.tensor(pred).view(1,-1)

        benchmark_losses.append(criterion(pred, y))

In [None]:
plt.plot(all_val_losses)
plt.axhline(y = all_val_losses.mean(), label = "loss mean = " + str(all_val_losses.mean()), c = "r", linestyle = "--")
plt.axhline(y = np.array(benchmark_losses).mean(), label = "jury loss = " + str(np.array(benchmark_losses).mean()), c = "g", linestyle = "--")
plt.legend(fontsize = 17)
plt.grid()
plt.show()

In [None]:
list_of_preds = []
for batch in tqdm(test_loader):
    x, y = batch
    foo = jury.predict(x, plot = False, sigmoid = True)
    list_of_preds.append(foo)

submission = pd.read_csv('../input/lish-moa/sample_submission.csv')
sub_cp = submission
sub_cp.to_csv('./submission_cp.csv', index=None, header=True)

import csv 
a = list_of_preds  
with open('./submission_cp.csv', "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(a)

final_sub = pd.read_csv('./submission_cp.csv', header = None)

final_sub.columns = submission.columns[1:]
final_sub["sig_id"] = submission["sig_id"]

good_cols = np.roll(final_sub.columns.values, 1)
final_sub = final_sub[good_cols]

In [None]:
final_sub.head()

In [None]:
targets = [col for col in final_sub.columns]
final_sub.loc[test_features['cp_type']=='ctl_vehicle', targets[1:]] = 0
final_sub.to_csv('submission.csv', index=False)

In [None]:
final_sub[:30]