In [1]:
import sys
sys.path.append('../input/iterativestratification')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [2]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
import copy
import seaborn as sns

from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import warnings
warnings.filterwarnings('ignore')

In [3]:
from sklearn.preprocessing import QuantileTransformer

In [4]:
train_features = pd.read_csv('../lish-moa/train_features.csv')
train_targets_scored = pd.read_csv('../lish-moa/train_targets_scored.csv')
train_targets_nonscored = pd.read_csv('../lish-moa/train_targets_nonscored.csv')

test_features = pd.read_csv('../lish-moa/test_features.csv')
sample_submission = pd.read_csv('../lish-moa/sample_submission.csv')

In [5]:
GENES = [col for col in train_features.columns if col.startswith('g-')]
CELLS = [col for col in train_features.columns if col.startswith('c-')]

In [6]:
#RankGauss

for col in (GENES + CELLS):

    transformer = QuantileTransformer(n_quantiles=100,random_state=0, output_distribution="normal")
    vec_len = len(train_features[col].values)
    vec_len_test = len(test_features[col].values)
    raw_vec = train_features[col].values.reshape(vec_len, 1)
    transformer.fit(raw_vec)

    train_features[col] = transformer.transform(raw_vec).reshape(1, vec_len)[0]
    test_features[col] = transformer.transform(test_features[col].values.reshape(vec_len_test, 1)).reshape(1, vec_len_test)[0]

In [7]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

# PCA features + Existing features

In [8]:
# GENES
n_comp = 600  #<--Update

data = pd.concat([pd.DataFrame(train_features[GENES]), pd.DataFrame(test_features[GENES])])
data2 = (PCA(n_components=n_comp, random_state=42).fit_transform(data[GENES]))
train2 = data2[:train_features.shape[0]]; test2 = data2[-test_features.shape[0]:]

train2 = pd.DataFrame(train2, columns=[f'pca_G-{i}' for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f'pca_G-{i}' for i in range(n_comp)])

# drop_cols = [f'c-{i}' for i in range(n_comp,len(GENES))]
train_features = pd.concat((train_features, train2), axis=1)
test_features = pd.concat((test_features, test2), axis=1)

In [9]:
#CELLS
n_comp = 50  #<--Update

data = pd.concat([pd.DataFrame(train_features[CELLS]), pd.DataFrame(test_features[CELLS])])
data2 = (PCA(n_components=n_comp, random_state=42).fit_transform(data[CELLS]))
train2 = data2[:train_features.shape[0]]; test2 = data2[-test_features.shape[0]:]

train2 = pd.DataFrame(train2, columns=[f'pca_C-{i}' for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f'pca_C-{i}' for i in range(n_comp)])

# drop_cols = [f'c-{i}' for i in range(n_comp,len(CELLS))]
train_features = pd.concat((train_features, train2), axis=1)
test_features = pd.concat((test_features, test2), axis=1)

# feature Selection using Variance Encoding

In [10]:
from sklearn.feature_selection import VarianceThreshold


var_thresh = VarianceThreshold(0.8)  #<-- Update
data = train_features.append(test_features)
data_transformed = var_thresh.fit_transform(data.iloc[:, 4:])

train_features_transformed = data_transformed[ : train_features.shape[0]]
test_features_transformed = data_transformed[-test_features.shape[0] : ]


train_features = pd.DataFrame(train_features[['sig_id','cp_type','cp_time','cp_dose']].values.reshape(-1, 4),\
                              columns=['sig_id','cp_type','cp_time','cp_dose'])

train_features = pd.concat([train_features, pd.DataFrame(train_features_transformed)], axis=1)


test_features = pd.DataFrame(test_features[['sig_id','cp_type','cp_time','cp_dose']].values.reshape(-1, 4),\
                             columns=['sig_id','cp_type','cp_time','cp_dose'])

test_features = pd.concat([test_features, pd.DataFrame(test_features_transformed)], axis=1)

In [11]:
train = train_features.merge(train_targets_scored, on='sig_id')
train = train[train['cp_type']!='ctl_vehicle'].reset_index(drop=True)
test = test_features[test_features['cp_type']!='ctl_vehicle'].reset_index(drop=True)

target = train[train_targets_scored.columns]

In [12]:
train = train.drop('cp_type', axis=1)
test = test.drop('cp_type', axis=1)

In [13]:
target_cols = target.drop('sig_id', axis=1).columns.values.tolist()

# CV folds

In [14]:
folds = train.copy()

mskf = MultilabelStratifiedKFold(n_splits=7)

for f, (t_idx, v_idx) in enumerate(mskf.split(X=train, y=target)):
    folds.loc[v_idx, 'kfold'] = int(f)

folds['kfold'] = folds['kfold'].astype(int)

# Dataset Classes

In [15]:
class MoADataset:
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.targets[idx, :], dtype=torch.float)            
        }
        return dct
    
class TestDataset:
    def __init__(self, features):
        self.features = features
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float)
        }
        return dct
    

In [16]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0
    
    for data in dataloader:
        optimizer.zero_grad()
        inputs, targets = data['x'].to(device), data['y'].to(device)
#         print(inputs.shape)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        #scheduler.step()
        
        final_loss += loss.item()
        
    final_loss /= len(dataloader)
    
    return final_loss


def valid_fn(model, loss_fn, dataloader, device):
    model.eval()
    final_loss = 0
    valid_preds = []
    
    for data in dataloader:
        inputs, targets = data['x'].to(device), data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        
        final_loss += loss.item()
        valid_preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    final_loss /= len(dataloader)
    valid_preds = np.concatenate(valid_preds)
    
    return final_loss, valid_preds

def inference_fn(model, dataloader, device):
    model.eval()
    preds = []
    
    for data in dataloader:
        inputs = data['x'].to(device)

        with torch.no_grad():
            outputs = model(inputs)
        
        preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    preds = np.concatenate(preds)
    
    return preds

In [17]:
import torch
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

# Model

In [18]:
class Model(nn.Module):
    def __init__(self, num_features, num_targets, hidden_size, dropout2, dropout3, negative_slope1, negative_slope2):
        super(Model, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, hidden_size))
        
        self.batch_norm2 = nn.BatchNorm1d(hidden_size)
        self.dropout2 = nn.Dropout(dropout2)
        self.leaky_relu1 = nn.LeakyReLU(negative_slope1)
        self.dense2 = nn.utils.weight_norm(nn.Linear(hidden_size, hidden_size))
        
        self.batch_norm3 = nn.BatchNorm1d(hidden_size)
        self.dropout3 = nn.Dropout(dropout3)
        self.leaky_relu2 = nn.LeakyReLU(negative_slope2)
        self.dense3 = nn.utils.weight_norm(nn.Linear(hidden_size, num_targets))
        
    def recalibrate_layer(self, layer):

        if(torch.isnan(layer.weight_v).sum() > 0):
            print ('recalibrate layer.weight_v')
            layer.weight_v = torch.nn.Parameter(torch.where(torch.isnan(layer.weight_v), torch.zeros_like(layer.weight_v), layer.weight_v))
            layer.weight_v = torch.nn.Parameter(layer.weight_v + 1e-7)

        if(torch.isnan(layer.weight).sum() > 0):
            print ('recalibrate layer.weight')
            layer.weight = torch.where(torch.isnan(layer.weight), torch.zeros_like(layer.weight), layer.weight)
            layer.weight += 1e-7
    
    def forward(self, x):
        x = self.batch_norm1(x)
        #self.recalibrate_layer(self.dense1)
        x = self.leaky_relu1(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        #self.recalibrate_layer(self.dense2)
        x = self.leaky_relu2(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)
        
        return x
    
class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            # true_dist = pred.data.clone()
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))    

# Preprocessing steps

In [19]:
def process_data(data):
    data = pd.get_dummies(data, columns=['cp_time','cp_dose'])
    return data

In [20]:
feature_cols = [c for c in process_data(folds).columns if c not in target_cols]
feature_cols = [c for c in feature_cols if c not in ['kfold','sig_id']]

In [21]:
# HyperParameters

DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 50
#BATCH_SIZE = 192
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
NFOLDS = 7           
EARLY_STOPPING_STEPS = 10
EARLY_STOP = False

num_features=len(feature_cols)
num_targets=len(target_cols)
#hidden_size=1500

# Single fold training

In [22]:
def run_training(fold, params):
    
    seed_everything(1)
    
    train = process_data(folds)
    test_ = process_data(test)
    
    trn_idx = train[train['kfold'] != fold].index
    val_idx = train[train['kfold'] == fold].index
    
    train_df = train[train['kfold'] != fold].reset_index(drop=True)
    valid_df = train[train['kfold'] == fold].reset_index(drop=True)
    
    x_train, y_train  = train_df[feature_cols].values, train_df[target_cols].values
    x_valid, y_valid =  valid_df[feature_cols].values, valid_df[target_cols].values
    
    train_dataset = MoADataset(x_train, y_train)
    valid_dataset = MoADataset(x_valid, y_valid)
    
    batch_size_params = params['batch_size']
    
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size_params, shuffle=True)
    validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size_params, shuffle=False)
    
    model = Model(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=params['hidden_size'],
        dropout2=params['dropout2'],
        dropout3=params['dropout3'],
        negative_slope1=params['negative_slope1'],
        negative_slope2=params['negative_slope2']
    )
    
    model.to(DEVICE)
    
    #optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    #scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
    #                                          max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader))
    
    lr_params = params['lr']
    lr_decay_params = params['lr_decay']
    weight_decay_params = params['weight_decay']
    
    factor_params = params['factor']
    patience_params = params['patience']
    
    optimizer = optim.Adagrad(model.parameters(), lr=lr_params, lr_decay=lr_decay_params, weight_decay=weight_decay_params, initial_accumulator_value=0, eps=1e-10)  
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=factor_params, patience=patience_params, threshold=1e-8, eps=1e-10, verbose=True)
    
    smoothing_params = params['smoothing']
    
    loss_fn = nn.BCEWithLogitsLoss()
    loss_tr = SmoothBCEwLogits(smoothing = smoothing_params)
    
    early_stopping_steps = EARLY_STOPPING_STEPS
    early_step = 0
   
    oof = np.zeros((len(train), target.iloc[:, 1:].shape[1]))
    best_loss = np.inf
    
    for epoch in range(EPOCHS):
        
        train_loss = train_fn(model, optimizer,scheduler, loss_tr, trainloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, train_loss: {train_loss}")
        valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, valid_loss: {valid_loss}")
        
        scheduler.step(valid_loss)
        
        if valid_loss < best_loss:
            
            best_loss = valid_loss
            oof[val_idx] = valid_preds
            #torch.save(model.state_dict(), f"FOLD{fold}_.pth")
        
        elif(EARLY_STOP == True):
            
            early_step += 1
            if (early_step >= early_stopping_steps):
                break
            
    
    #--------------------- PREDICTION---------------------
    #x_test = test_[feature_cols].values
    #testdataset = TestDataset(x_test)
    #testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
    #model = Model(
    #    num_features=num_features,
    #    num_targets=num_targets,
    #    hidden_size=hidden_size,

    #)
    
    #model.load_state_dict(torch.load(f"FOLD{fold}_.pth"))
    #model.to(DEVICE)
    
    #predictions = np.zeros((len(test_), target.iloc[:, 1:].shape[1]))
    #predictions = inference_fn(model, testloader, DEVICE)
    
    #return oof, predictions
    return best_loss

In [23]:
import optuna
from optuna import Trial, create_study

params={}

def get_nn_parameter_suggestions(trial: Trial) -> dict:
    return {
        "hidden_size": trial.suggest_int("hidden_size", 512, 2560, step = 256, log = False), #range_update
        "dropout2": trial.suggest_float("dropout2", 0.1, 0.95, step = None, log = False), #range_update
        "dropout3": trial.suggest_float("dropout3", 0.1, 0.95, step = None, log = False), #range_update
        "negative_slope1":trial.suggest_float("negative_slope1", 0.001, 0.1, step = 0.001, log = False),
        "negative_slope2":trial.suggest_float("negative_slope2", 0.001, 0.1, step = 0.001, log = False),
        "lr": trial.suggest_float("lr", 5e-2, 5e-1, step = None, log = False),
        "lr_decay": trial.suggest_float("lr_decay", 5e-4, 1e-2, step = None, log = False), #range_update
        "weight_decay": trial.suggest_float("weight_decay",1e-6, 5e-5, step = None, log = False), #range_update
        "factor": trial.suggest_float("factor",0.1, 0.9, step = None, log = False),
        "patience": trial.suggest_int("patience",1, 10, step = 1, log =False),
        "batch_size":trial.suggest_int("batch_size",128, 512, step = 64, log =False),
        "smoothing":trial.suggest_float("smoothing",0.001, 0.01, step = 0.001, log =False)
    }

def objective(trial: Trial) -> dict:
    
    _params = get_nn_parameter_suggestions(trial)
    
    all_losses=[]
    for f_ in range(7): #<-- 5folds
        temp_loss=run_training(f_,params=_params)
        all_losses.append(temp_loss)
    return np.mean(all_losses)

In [24]:
study=optuna.create_study(direction="minimize")
study.optimize(objective,n_trials=500)

[32m[I 2020-11-12 09:57:10,951][0m A new study created in memory with name: no-name-f8f41e2f-fc05-4df1-bfd3-063cccf561e8[0m


FOLD: 0, EPOCH: 0, train_loss: 0.0867878794670105
FOLD: 0, EPOCH: 0, valid_loss: 0.023894680131758963
FOLD: 0, EPOCH: 1, train_loss: 0.046229043265893346
FOLD: 0, EPOCH: 1, valid_loss: 0.02283282551382269
FOLD: 0, EPOCH: 2, train_loss: 0.04548271665615695
FOLD: 0, EPOCH: 2, valid_loss: 0.022579885220953395
FOLD: 0, EPOCH: 3, train_loss: 0.0445960954363857
FOLD: 0, EPOCH: 3, valid_loss: 0.02208471218390124
FOLD: 0, EPOCH: 4, train_loss: 0.0444127352287372
FOLD: 0, EPOCH: 4, valid_loss: 0.022495011932083538
FOLD: 0, EPOCH: 5, train_loss: 0.04415704097066607
FOLD: 0, EPOCH: 5, valid_loss: 0.02337102565382208
FOLD: 0, EPOCH: 6, train_loss: 0.044041545529450686
FOLD: 0, EPOCH: 6, valid_loss: 0.021748279354401996
FOLD: 0, EPOCH: 7, train_loss: 0.04393211805394718
FOLD: 0, EPOCH: 7, valid_loss: 0.02202375073518072
FOLD: 0, EPOCH: 8, train_loss: 0.0437739807225409
FOLD: 0, EPOCH: 8, valid_loss: 0.02143694566828864
FOLD: 0, EPOCH: 9, train_loss: 0.04369424602815083
FOLD: 0, EPOCH: 9, valid_loss

KeyboardInterrupt: 

In [None]:
study.best_params

In [25]:
params_192 = {'hidden_size': 1280, 'dropout2': 0.8160433144844258, 'dropout3': 0.36274350472386446, 'negative_slope1': 0.077, 'negative_slope2': 0.002, 'lr': 0.2415579776271606, 'lr_decay': 0.0030829965450683383, 'weight_decay': 1.6921803805971663e-06, 'factor': 0.6836043082432074, 'patience': 6, 'batch_size': 256, 'smoothing': 0.001}

In [26]:
def run_final_training(fold, params, seed):
    
    seed_everything(seed)
    
    train = process_data(folds)
    test_ = process_data(test)
    
    trn_idx = train[train['kfold'] != fold].index
    val_idx = train[train['kfold'] == fold].index
    
    train_df = train[train['kfold'] != fold].reset_index(drop=True)
    valid_df = train[train['kfold'] == fold].reset_index(drop=True)
    
    x_train, y_train  = train_df[feature_cols].values, train_df[target_cols].values
    x_valid, y_valid =  valid_df[feature_cols].values, valid_df[target_cols].values
    
    train_dataset = MoADataset(x_train, y_train)
    valid_dataset = MoADataset(x_valid, y_valid)
    
    batch_size_params = params['batch_size']
    
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size_params, shuffle=True)
    validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size_params, shuffle=False)
    
    model = Model(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=params['hidden_size'],
        dropout2=params['dropout2'],
        dropout3=params['dropout3'],
        negative_slope1=params['negative_slope1'],
        negative_slope2=params['negative_slope2']
    )
    
    model.to(DEVICE)
    
    #optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    #scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
    #                                          max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader))
    
    lr_params = params['lr']
    lr_decay_params = params['lr_decay']
    weight_decay_params = params['weight_decay']
    
    factor_params = params['factor']
    patience_params = params['patience']
    
    optimizer = optim.Adagrad(model.parameters(), lr=lr_params, lr_decay=lr_decay_params, weight_decay=weight_decay_params, initial_accumulator_value=0, eps=1e-10)  
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=factor_params, patience=patience_params, threshold=1e-8, eps=1e-10, verbose=True)
    
    smoothing_params = params['smoothing']
    
    loss_fn = nn.BCEWithLogitsLoss()
    loss_tr = SmoothBCEwLogits(smoothing = smoothing_params)
    
    early_stopping_steps = EARLY_STOPPING_STEPS
    early_step = 0
   
    oof = np.zeros((len(train), target.iloc[:, 1:].shape[1]))
    best_loss = np.inf
    
    for epoch in range(EPOCHS):
        
        train_loss = train_fn(model, optimizer,scheduler, loss_tr, trainloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, train_loss: {train_loss}")
        valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, valid_loss: {valid_loss}")
        
        scheduler.step(valid_loss)
        
        if valid_loss < best_loss:
            
            best_loss = valid_loss
            oof[val_idx] = valid_preds
            torch.save(model.state_dict(), f"pytorch-01859-rankgauss-SEED{seed}_FOLD{fold}_.pth")
        
        elif(EARLY_STOP == True):
            
            early_step += 1
            if (early_step >= early_stopping_steps):
                break
            
    
    #--------------------- PREDICTION---------------------
    x_test = test_[feature_cols].values
    testdataset = TestDataset(x_test)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=batch_size_params, shuffle=False)
    
    #model = Model(
    #    num_features=num_features,
    #    num_targets=num_targets,
    #    hidden_size=hidden_size,

    #)
    
    model.load_state_dict(torch.load(f"pytorch-01859-rankgauss-SEED{seed}_FOLD{fold}_.pth"))
    model.to(DEVICE)
    
    predictions = np.zeros((len(test_), target.iloc[:, 1:].shape[1]))
    predictions = inference_fn(model, testloader, DEVICE)
    
    return oof, predictions

In [27]:
def run_k_fold(NFOLDS, params, seed):
    oof = np.zeros((len(train), len(target_cols)))
    predictions = np.zeros((len(test), len(target_cols)))
    
    for fold in range(NFOLDS):
        oof_, pred_ = run_final_training(fold, params, seed)
        
        predictions += pred_ / NFOLDS
        oof += oof_
        
    return oof, predictions

In [28]:
# Averaging on multiple SEEDS

SEED = [0,1,2,3,4,5,6] #<-- Update
oof = np.zeros((len(train), len(target_cols)))
predictions = np.zeros((len(test), len(target_cols)))

for seed in SEED:
    params_= params_192
    oof_, predictions_ = run_k_fold(NFOLDS, params_, seed)
    oof += oof_ / len(SEED)
    predictions += predictions_ / len(SEED)

train[target_cols] = oof
test[target_cols] = predictions

FOLD: 0, EPOCH: 0, train_loss: 0.04423861001693719
FOLD: 0, EPOCH: 0, valid_loss: 0.02054226054595067
FOLD: 0, EPOCH: 1, train_loss: 0.022654341231729533
FOLD: 0, EPOCH: 1, valid_loss: 0.018971411081460807
FOLD: 0, EPOCH: 2, train_loss: 0.021762436205470883
FOLD: 0, EPOCH: 2, valid_loss: 0.018321416125847742
FOLD: 0, EPOCH: 3, train_loss: 0.021281203808816703
FOLD: 0, EPOCH: 3, valid_loss: 0.017842077291928805
FOLD: 0, EPOCH: 4, train_loss: 0.02097139082144241
FOLD: 0, EPOCH: 4, valid_loss: 0.017662380893643085
FOLD: 0, EPOCH: 5, train_loss: 0.020710097399313707
FOLD: 0, EPOCH: 5, valid_loss: 0.01740763630144871
FOLD: 0, EPOCH: 6, train_loss: 0.020528308845855093
FOLD: 0, EPOCH: 6, valid_loss: 0.017080991743848875
FOLD: 0, EPOCH: 7, train_loss: 0.020330401489863526
FOLD: 0, EPOCH: 7, valid_loss: 0.016996620867687922
FOLD: 0, EPOCH: 8, train_loss: 0.02018516357182651
FOLD: 0, EPOCH: 8, valid_loss: 0.01696788455144717
FOLD: 0, EPOCH: 9, train_loss: 0.01998929868175371
FOLD: 0, EPOCH: 9, 

FOLD: 1, EPOCH: 28, train_loss: 0.017838430112680874
FOLD: 1, EPOCH: 28, valid_loss: 0.01563360384450509
FOLD: 1, EPOCH: 29, train_loss: 0.01768105278244695
FOLD: 1, EPOCH: 29, valid_loss: 0.01564582000271632
FOLD: 1, EPOCH: 30, train_loss: 0.01762270215093284
FOLD: 1, EPOCH: 30, valid_loss: 0.015597246014154874
FOLD: 1, EPOCH: 31, train_loss: 0.01752921213974824
FOLD: 1, EPOCH: 31, valid_loss: 0.015615656685370665
FOLD: 1, EPOCH: 32, train_loss: 0.017426894121878856
FOLD: 1, EPOCH: 32, valid_loss: 0.015639893137491666
FOLD: 1, EPOCH: 33, train_loss: 0.01736023197756023
FOLD: 1, EPOCH: 33, valid_loss: 0.015631417457300883
FOLD: 1, EPOCH: 34, train_loss: 0.01721292162767133
FOLD: 1, EPOCH: 34, valid_loss: 0.015573990531265736
FOLD: 1, EPOCH: 35, train_loss: 0.017114044695689872
FOLD: 1, EPOCH: 35, valid_loss: 0.015551575387899693
FOLD: 1, EPOCH: 36, train_loss: 0.017019923814144487
FOLD: 1, EPOCH: 36, valid_loss: 0.015573673093548188
FOLD: 1, EPOCH: 37, train_loss: 0.016923134580153872


FOLD: 3, EPOCH: 5, train_loss: 0.02076798624585609
FOLD: 3, EPOCH: 5, valid_loss: 0.017327164514706686
FOLD: 3, EPOCH: 6, train_loss: 0.020554148717909247
FOLD: 3, EPOCH: 6, valid_loss: 0.017185897924579106
FOLD: 3, EPOCH: 7, train_loss: 0.020300085505319608
FOLD: 3, EPOCH: 7, valid_loss: 0.01707368530333042
FOLD: 3, EPOCH: 8, train_loss: 0.020150391259104818
FOLD: 3, EPOCH: 8, valid_loss: 0.01693414359425123
FOLD: 3, EPOCH: 9, train_loss: 0.02001273715113466
FOLD: 3, EPOCH: 9, valid_loss: 0.016859893042307634
FOLD: 3, EPOCH: 10, train_loss: 0.01985811995896133
FOLD: 3, EPOCH: 10, valid_loss: 0.016656495344180327
FOLD: 3, EPOCH: 11, train_loss: 0.019730413005360076
FOLD: 3, EPOCH: 11, valid_loss: 0.016617755907086227
FOLD: 3, EPOCH: 12, train_loss: 0.019589165653530007
FOLD: 3, EPOCH: 12, valid_loss: 0.016537740969887145
FOLD: 3, EPOCH: 13, train_loss: 0.019450302311294788
FOLD: 3, EPOCH: 13, valid_loss: 0.016535737909949742
FOLD: 3, EPOCH: 14, train_loss: 0.019362442774345744
FOLD: 3,

FOLD: 4, EPOCH: 33, train_loss: 0.017368888432109677
FOLD: 4, EPOCH: 33, valid_loss: 0.01602112716780259
FOLD: 4, EPOCH: 34, train_loss: 0.017251581809407956
FOLD: 4, EPOCH: 34, valid_loss: 0.016027843293089133
FOLD: 4, EPOCH: 35, train_loss: 0.017163687159080763
FOLD: 4, EPOCH: 35, valid_loss: 0.016034708358347416
FOLD: 4, EPOCH: 36, train_loss: 0.017094769817147706
FOLD: 4, EPOCH: 36, valid_loss: 0.016050964164046142
FOLD: 4, EPOCH: 37, train_loss: 0.016998917858644918
FOLD: 4, EPOCH: 37, valid_loss: 0.01603271411015437
FOLD: 4, EPOCH: 38, train_loss: 0.016868287558994582
FOLD: 4, EPOCH: 38, valid_loss: 0.016036424785852432
FOLD: 4, EPOCH: 39, train_loss: 0.0167729125635044
FOLD: 4, EPOCH: 39, valid_loss: 0.016027817502617836
FOLD: 4, EPOCH: 40, train_loss: 0.016651472013846442
FOLD: 4, EPOCH: 40, valid_loss: 0.015985964869077388
FOLD: 4, EPOCH: 41, train_loss: 0.016582591786376527
FOLD: 4, EPOCH: 41, valid_loss: 0.015979441455923594
FOLD: 4, EPOCH: 42, train_loss: 0.0164433975351621

FOLD: 6, EPOCH: 11, train_loss: 0.019749759721594887
FOLD: 6, EPOCH: 11, valid_loss: 0.01651351863088516
FOLD: 6, EPOCH: 12, train_loss: 0.01962226173664267
FOLD: 6, EPOCH: 12, valid_loss: 0.016382532480817575
FOLD: 6, EPOCH: 13, train_loss: 0.01945939161688895
FOLD: 6, EPOCH: 13, valid_loss: 0.016320309458443753
FOLD: 6, EPOCH: 14, train_loss: 0.019367297482047532
FOLD: 6, EPOCH: 14, valid_loss: 0.016261812371130172
FOLD: 6, EPOCH: 15, train_loss: 0.019219996130748374
FOLD: 6, EPOCH: 15, valid_loss: 0.016189617319748953
FOLD: 6, EPOCH: 16, train_loss: 0.019106568642766088
FOLD: 6, EPOCH: 16, valid_loss: 0.016138980594965126
FOLD: 6, EPOCH: 17, train_loss: 0.01897832111933747
FOLD: 6, EPOCH: 17, valid_loss: 0.016130568029788826
FOLD: 6, EPOCH: 18, train_loss: 0.018887281669555483
FOLD: 6, EPOCH: 18, valid_loss: 0.01614520548341366
FOLD: 6, EPOCH: 19, train_loss: 0.01880193023464164
FOLD: 6, EPOCH: 19, valid_loss: 0.01598673392660343
FOLD: 6, EPOCH: 20, train_loss: 0.018673177607156134


FOLD: 0, EPOCH: 38, train_loss: 0.016850506924596186
FOLD: 0, EPOCH: 38, valid_loss: 0.01578588396883928
FOLD: 0, EPOCH: 39, train_loss: 0.016744282625212863
FOLD: 0, EPOCH: 39, valid_loss: 0.015781953644294005
FOLD: 0, EPOCH: 40, train_loss: 0.016666306497378124
FOLD: 0, EPOCH: 40, valid_loss: 0.01581089162769226
FOLD: 0, EPOCH: 41, train_loss: 0.016600427154854342
FOLD: 0, EPOCH: 41, valid_loss: 0.015790745687599365
FOLD: 0, EPOCH: 42, train_loss: 0.016467317775193904
FOLD: 0, EPOCH: 42, valid_loss: 0.01581373531371355
FOLD: 0, EPOCH: 43, train_loss: 0.016366051170169503
FOLD: 0, EPOCH: 43, valid_loss: 0.01579198570778737
Epoch    44: reducing learning rate of group 0 to 1.6513e-01.
FOLD: 0, EPOCH: 44, train_loss: 0.01618787137841856
FOLD: 0, EPOCH: 44, valid_loss: 0.015796546537715655
FOLD: 0, EPOCH: 45, train_loss: 0.01601146921716832
FOLD: 0, EPOCH: 45, valid_loss: 0.01581272676300544
FOLD: 0, EPOCH: 46, train_loss: 0.015920038687417638
FOLD: 0, EPOCH: 46, valid_loss: 0.0158070219

FOLD: 2, EPOCH: 16, train_loss: 0.01898739207535982
FOLD: 2, EPOCH: 16, valid_loss: 0.016091331266439877
FOLD: 2, EPOCH: 17, train_loss: 0.01886495097062072
FOLD: 2, EPOCH: 17, valid_loss: 0.016049676932967626
FOLD: 2, EPOCH: 18, train_loss: 0.018767427048973134
FOLD: 2, EPOCH: 18, valid_loss: 0.015945826657116413
FOLD: 2, EPOCH: 19, train_loss: 0.01864168344921357
FOLD: 2, EPOCH: 19, valid_loss: 0.015943430077571135
FOLD: 2, EPOCH: 20, train_loss: 0.018582114194696013
FOLD: 2, EPOCH: 20, valid_loss: 0.015912228765395973
FOLD: 2, EPOCH: 21, train_loss: 0.01837402129092732
FOLD: 2, EPOCH: 21, valid_loss: 0.01588512147561862
FOLD: 2, EPOCH: 22, train_loss: 0.018314813439910475
FOLD: 2, EPOCH: 22, valid_loss: 0.01583918255682175
FOLD: 2, EPOCH: 23, train_loss: 0.01818231868280752
FOLD: 2, EPOCH: 23, valid_loss: 0.015824969643010542
FOLD: 2, EPOCH: 24, train_loss: 0.018136297377782898
FOLD: 2, EPOCH: 24, valid_loss: 0.015826561559851352
FOLD: 2, EPOCH: 25, train_loss: 0.01798084861523396
F

FOLD: 3, EPOCH: 43, train_loss: 0.016168874298297876
FOLD: 3, EPOCH: 43, valid_loss: 0.015822183054227095
FOLD: 3, EPOCH: 44, train_loss: 0.01605021909533723
FOLD: 3, EPOCH: 44, valid_loss: 0.01586298759167011
FOLD: 3, EPOCH: 45, train_loss: 0.015973625994111236
FOLD: 3, EPOCH: 45, valid_loss: 0.015859904770667736
FOLD: 3, EPOCH: 46, train_loss: 0.01590255881986908
FOLD: 3, EPOCH: 46, valid_loss: 0.015890800537398227
Epoch    47: reducing learning rate of group 0 to 1.6513e-01.
FOLD: 3, EPOCH: 47, train_loss: 0.01566938843225708
FOLD: 3, EPOCH: 47, valid_loss: 0.015872390152743228
FOLD: 3, EPOCH: 48, train_loss: 0.015547844744916703
FOLD: 3, EPOCH: 48, valid_loss: 0.015898454576157607
FOLD: 3, EPOCH: 49, train_loss: 0.015383687789073668
FOLD: 3, EPOCH: 49, valid_loss: 0.015894604560274344
FOLD: 4, EPOCH: 0, train_loss: 0.04281797428690904
FOLD: 4, EPOCH: 0, valid_loss: 0.020531180386359874
FOLD: 4, EPOCH: 1, train_loss: 0.02262191819278775
FOLD: 4, EPOCH: 1, valid_loss: 0.0188430371479

FOLD: 5, EPOCH: 20, train_loss: 0.018689880147576332
FOLD: 5, EPOCH: 20, valid_loss: 0.01616632343771366
FOLD: 5, EPOCH: 21, train_loss: 0.018599385974576343
FOLD: 5, EPOCH: 21, valid_loss: 0.016121702985121653
FOLD: 5, EPOCH: 22, train_loss: 0.018486285003254544
FOLD: 5, EPOCH: 22, valid_loss: 0.01611044379667594
FOLD: 5, EPOCH: 23, train_loss: 0.01838795400249797
FOLD: 5, EPOCH: 23, valid_loss: 0.016079285110418614
FOLD: 5, EPOCH: 24, train_loss: 0.018273350581325388
FOLD: 5, EPOCH: 24, valid_loss: 0.01605986409748976
FOLD: 5, EPOCH: 25, train_loss: 0.01820840112663604
FOLD: 5, EPOCH: 25, valid_loss: 0.01605120960336465
FOLD: 5, EPOCH: 26, train_loss: 0.018107147283248
FOLD: 5, EPOCH: 26, valid_loss: 0.016018008096860006
FOLD: 5, EPOCH: 27, train_loss: 0.01798795475750356
FOLD: 5, EPOCH: 27, valid_loss: 0.016027196310460567
FOLD: 5, EPOCH: 28, train_loss: 0.017891556704165163
FOLD: 5, EPOCH: 28, valid_loss: 0.015978270568526708
FOLD: 5, EPOCH: 29, train_loss: 0.017806048026761494
FOL

FOLD: 6, EPOCH: 47, train_loss: 0.015648223736600297
FOLD: 6, EPOCH: 47, valid_loss: 0.015628588744080983
FOLD: 6, EPOCH: 48, train_loss: 0.015551711432635784
FOLD: 6, EPOCH: 48, valid_loss: 0.01565563614265277
Epoch    49: reducing learning rate of group 0 to 1.1288e-01.
FOLD: 6, EPOCH: 49, train_loss: 0.015368871944578918
FOLD: 6, EPOCH: 49, valid_loss: 0.01564473185974818
FOLD: 0, EPOCH: 0, train_loss: 0.04383363594879975
FOLD: 0, EPOCH: 0, valid_loss: 0.020065714676792804
FOLD: 0, EPOCH: 1, train_loss: 0.022672237450810703
FOLD: 0, EPOCH: 1, valid_loss: 0.018755562603473663
FOLD: 0, EPOCH: 2, train_loss: 0.02172003739286919
FOLD: 0, EPOCH: 2, valid_loss: 0.018207841337873384
FOLD: 0, EPOCH: 3, train_loss: 0.02125492022448295
FOLD: 0, EPOCH: 3, valid_loss: 0.017824097321583673
FOLD: 0, EPOCH: 4, train_loss: 0.020955089932760677
FOLD: 0, EPOCH: 4, valid_loss: 0.017547360478112332
FOLD: 0, EPOCH: 5, train_loss: 0.020702747477067483
FOLD: 0, EPOCH: 5, valid_loss: 0.017295837832184937
F

FOLD: 1, EPOCH: 24, train_loss: 0.018346878775470966
FOLD: 1, EPOCH: 24, valid_loss: 0.015745830793793384
FOLD: 1, EPOCH: 25, train_loss: 0.018222794087754714
FOLD: 1, EPOCH: 25, valid_loss: 0.015743515239312098
FOLD: 1, EPOCH: 26, train_loss: 0.018143117729876493
FOLD: 1, EPOCH: 26, valid_loss: 0.01571254740254237
FOLD: 1, EPOCH: 27, train_loss: 0.01801227529004619
FOLD: 1, EPOCH: 27, valid_loss: 0.015708041807206776
FOLD: 1, EPOCH: 28, train_loss: 0.017925742384348367
FOLD: 1, EPOCH: 28, valid_loss: 0.015648719997933276
FOLD: 1, EPOCH: 29, train_loss: 0.017841548986128858
FOLD: 1, EPOCH: 29, valid_loss: 0.015612729968359837
FOLD: 1, EPOCH: 30, train_loss: 0.01773825494220128
FOLD: 1, EPOCH: 30, valid_loss: 0.015588730143812986
FOLD: 1, EPOCH: 31, train_loss: 0.01764004196770288
FOLD: 1, EPOCH: 31, valid_loss: 0.015635240608109877
FOLD: 1, EPOCH: 32, train_loss: 0.017585947027278913
FOLD: 1, EPOCH: 32, valid_loss: 0.015627272427082062
FOLD: 1, EPOCH: 33, train_loss: 0.0173996182007564

FOLD: 3, EPOCH: 1, train_loss: 0.02290427133541655
FOLD: 3, EPOCH: 1, valid_loss: 0.019068769824046355
FOLD: 3, EPOCH: 2, train_loss: 0.021943967084626894
FOLD: 3, EPOCH: 2, valid_loss: 0.018426450399252083
FOLD: 3, EPOCH: 3, train_loss: 0.02140054270323064
FOLD: 3, EPOCH: 3, valid_loss: 0.01790754869580269
FOLD: 3, EPOCH: 4, train_loss: 0.021064626153659175
FOLD: 3, EPOCH: 4, valid_loss: 0.0176787397895868
FOLD: 3, EPOCH: 5, train_loss: 0.020819522062870296
FOLD: 3, EPOCH: 5, valid_loss: 0.017347340257121965
FOLD: 3, EPOCH: 6, train_loss: 0.02058813693253575
FOLD: 3, EPOCH: 6, valid_loss: 0.017136017935207255
FOLD: 3, EPOCH: 7, train_loss: 0.02036995000231105
FOLD: 3, EPOCH: 7, valid_loss: 0.017062071066063184
FOLD: 3, EPOCH: 8, train_loss: 0.020227911655564566
FOLD: 3, EPOCH: 8, valid_loss: 0.016954462258861616
FOLD: 3, EPOCH: 9, train_loss: 0.020035974756890052
FOLD: 3, EPOCH: 9, valid_loss: 0.016828317553378068
FOLD: 3, EPOCH: 10, train_loss: 0.0199164384220903
FOLD: 3, EPOCH: 10, 

FOLD: 4, EPOCH: 29, train_loss: 0.017784973606467247
FOLD: 4, EPOCH: 29, valid_loss: 0.016067381733312056
FOLD: 4, EPOCH: 30, train_loss: 0.01768366025911795
FOLD: 4, EPOCH: 30, valid_loss: 0.016076096118642733
FOLD: 4, EPOCH: 31, train_loss: 0.01759505141023043
FOLD: 4, EPOCH: 31, valid_loss: 0.016020184239515893
FOLD: 4, EPOCH: 32, train_loss: 0.017471563800968027
FOLD: 4, EPOCH: 32, valid_loss: 0.016031510196626186
FOLD: 4, EPOCH: 33, train_loss: 0.017382890363601414
FOLD: 4, EPOCH: 33, valid_loss: 0.01604658107344921
FOLD: 4, EPOCH: 34, train_loss: 0.017311119046565647
FOLD: 4, EPOCH: 34, valid_loss: 0.016029635730844278
FOLD: 4, EPOCH: 35, train_loss: 0.017204406520200742
FOLD: 4, EPOCH: 35, valid_loss: 0.015993928393492333
FOLD: 4, EPOCH: 36, train_loss: 0.017050557052464905
FOLD: 4, EPOCH: 36, valid_loss: 0.015994800111422174
FOLD: 4, EPOCH: 37, train_loss: 0.016981840221765073
FOLD: 4, EPOCH: 37, valid_loss: 0.01597937812598852
FOLD: 4, EPOCH: 38, train_loss: 0.0169039930695214

FOLD: 6, EPOCH: 8, train_loss: 0.02018201001290534
FOLD: 6, EPOCH: 8, valid_loss: 0.016760201050111882
FOLD: 6, EPOCH: 9, train_loss: 0.020082400532791745
FOLD: 6, EPOCH: 9, valid_loss: 0.016627704366468467
FOLD: 6, EPOCH: 10, train_loss: 0.01988331988655232
FOLD: 6, EPOCH: 10, valid_loss: 0.016569219887829743
FOLD: 6, EPOCH: 11, train_loss: 0.01977594551705831
FOLD: 6, EPOCH: 11, valid_loss: 0.016490184487058565
FOLD: 6, EPOCH: 12, train_loss: 0.019616418090221042
FOLD: 6, EPOCH: 12, valid_loss: 0.016341122440420665
FOLD: 6, EPOCH: 13, train_loss: 0.01949381788034697
FOLD: 6, EPOCH: 13, valid_loss: 0.016294332006229803
FOLD: 6, EPOCH: 14, train_loss: 0.019405210768250195
FOLD: 6, EPOCH: 14, valid_loss: 0.0162348819610018
FOLD: 6, EPOCH: 15, train_loss: 0.0192718199026343
FOLD: 6, EPOCH: 15, valid_loss: 0.016224384451141723
FOLD: 6, EPOCH: 16, train_loss: 0.019154530245105963
FOLD: 6, EPOCH: 16, valid_loss: 0.01614917929355915
FOLD: 6, EPOCH: 17, train_loss: 0.01904726433693557
FOLD: 6

FOLD: 0, EPOCH: 37, train_loss: 0.01684113498777151
FOLD: 0, EPOCH: 37, valid_loss: 0.015786786348773882
FOLD: 0, EPOCH: 38, train_loss: 0.016750604757485358
FOLD: 0, EPOCH: 38, valid_loss: 0.01573751329515989
FOLD: 0, EPOCH: 39, train_loss: 0.016632518189883715
FOLD: 0, EPOCH: 39, valid_loss: 0.015801400304413758
FOLD: 0, EPOCH: 40, train_loss: 0.016509925317321275
FOLD: 0, EPOCH: 40, valid_loss: 0.015772120883831613
FOLD: 0, EPOCH: 41, train_loss: 0.016406131140585686
FOLD: 0, EPOCH: 41, valid_loss: 0.015830335422204092
FOLD: 0, EPOCH: 42, train_loss: 0.01632992850264182
FOLD: 0, EPOCH: 42, valid_loss: 0.015762586862995073
FOLD: 0, EPOCH: 43, train_loss: 0.016195871829483156
FOLD: 0, EPOCH: 43, valid_loss: 0.01578748419594306
FOLD: 0, EPOCH: 44, train_loss: 0.016143007317205537
FOLD: 0, EPOCH: 44, valid_loss: 0.015821033873810217
FOLD: 0, EPOCH: 45, train_loss: 0.015987839675634295
FOLD: 0, EPOCH: 45, valid_loss: 0.01580307876261381
Epoch    46: reducing learning rate of group 0 to 1

FOLD: 2, EPOCH: 15, train_loss: 0.01914093298585834
FOLD: 2, EPOCH: 15, valid_loss: 0.016118547306037866
FOLD: 2, EPOCH: 16, train_loss: 0.01901620482975567
FOLD: 2, EPOCH: 16, valid_loss: 0.016062326800937835
FOLD: 2, EPOCH: 17, train_loss: 0.018939809395453415
FOLD: 2, EPOCH: 17, valid_loss: 0.01597159792884038
FOLD: 2, EPOCH: 18, train_loss: 0.018779619647240318
FOLD: 2, EPOCH: 18, valid_loss: 0.01598059242734542
FOLD: 2, EPOCH: 19, train_loss: 0.01868926512228476
FOLD: 2, EPOCH: 19, valid_loss: 0.01590699372956386
FOLD: 2, EPOCH: 20, train_loss: 0.018590990026053543
FOLD: 2, EPOCH: 20, valid_loss: 0.015879642719832752
FOLD: 2, EPOCH: 21, train_loss: 0.01850477823786236
FOLD: 2, EPOCH: 21, valid_loss: 0.015899841602032002
FOLD: 2, EPOCH: 22, train_loss: 0.01838536190523489
FOLD: 2, EPOCH: 22, valid_loss: 0.015875505283474922
FOLD: 2, EPOCH: 23, train_loss: 0.018295156366720394
FOLD: 2, EPOCH: 23, valid_loss: 0.01585120542977865
FOLD: 2, EPOCH: 24, train_loss: 0.01816325672474262
FOL

FOLD: 3, EPOCH: 42, train_loss: 0.016338746529072523
FOLD: 3, EPOCH: 42, valid_loss: 0.015856394687524207
FOLD: 3, EPOCH: 43, train_loss: 0.016300321896434634
FOLD: 3, EPOCH: 43, valid_loss: 0.01589892045236551
FOLD: 3, EPOCH: 44, train_loss: 0.01623148555439469
FOLD: 3, EPOCH: 44, valid_loss: 0.01589785874463045
FOLD: 3, EPOCH: 45, train_loss: 0.01609619358252432
FOLD: 3, EPOCH: 45, valid_loss: 0.01591293879139882
FOLD: 3, EPOCH: 46, train_loss: 0.01596982720484202
FOLD: 3, EPOCH: 46, valid_loss: 0.015914214846606437
FOLD: 3, EPOCH: 47, train_loss: 0.01592936336591437
FOLD: 3, EPOCH: 47, valid_loss: 0.015888767531858042
Epoch    48: reducing learning rate of group 0 to 1.6513e-01.
FOLD: 3, EPOCH: 48, train_loss: 0.015725172766660515
FOLD: 3, EPOCH: 48, valid_loss: 0.015883595682680607
FOLD: 3, EPOCH: 49, train_loss: 0.015576615375844208
FOLD: 3, EPOCH: 49, valid_loss: 0.01588254629705961
FOLD: 4, EPOCH: 0, train_loss: 0.04485300872978326
FOLD: 4, EPOCH: 0, valid_loss: 0.02109940751240

FOLD: 5, EPOCH: 20, train_loss: 0.01876668055617326
FOLD: 5, EPOCH: 20, valid_loss: 0.016187313442619946
FOLD: 5, EPOCH: 21, train_loss: 0.018679194051671674
FOLD: 5, EPOCH: 21, valid_loss: 0.016178363504318092
FOLD: 5, EPOCH: 22, train_loss: 0.018535998322673747
FOLD: 5, EPOCH: 22, valid_loss: 0.01614727509709505
FOLD: 5, EPOCH: 23, train_loss: 0.018460578461353842
FOLD: 5, EPOCH: 23, valid_loss: 0.016131090573393382
FOLD: 5, EPOCH: 24, train_loss: 0.018311377463711274
FOLD: 5, EPOCH: 24, valid_loss: 0.016069673001766205
FOLD: 5, EPOCH: 25, train_loss: 0.018233122933353926
FOLD: 5, EPOCH: 25, valid_loss: 0.016090036942981757
FOLD: 5, EPOCH: 26, train_loss: 0.018133151747689053
FOLD: 5, EPOCH: 26, valid_loss: 0.016102236623947438
FOLD: 5, EPOCH: 27, train_loss: 0.018022926146718295
FOLD: 5, EPOCH: 27, valid_loss: 0.016024512525361318
FOLD: 5, EPOCH: 28, train_loss: 0.017908355145639664
FOLD: 5, EPOCH: 28, valid_loss: 0.016039254143834114
FOLD: 5, EPOCH: 29, train_loss: 0.01782293043829

FOLD: 6, EPOCH: 47, train_loss: 0.015743235264577576
FOLD: 6, EPOCH: 47, valid_loss: 0.01565410211109198
FOLD: 6, EPOCH: 48, train_loss: 0.015642994863761438
FOLD: 6, EPOCH: 48, valid_loss: 0.015672298363195017
FOLD: 6, EPOCH: 49, train_loss: 0.015598701715872094
FOLD: 6, EPOCH: 49, valid_loss: 0.015665337729912538
FOLD: 0, EPOCH: 0, train_loss: 0.043967811088706996
FOLD: 0, EPOCH: 0, valid_loss: 0.02026640552167709
FOLD: 0, EPOCH: 1, train_loss: 0.022901997473594303
FOLD: 0, EPOCH: 1, valid_loss: 0.018945199652360037
FOLD: 0, EPOCH: 2, train_loss: 0.021934829425771494
FOLD: 0, EPOCH: 2, valid_loss: 0.01829454403084058
FOLD: 0, EPOCH: 3, train_loss: 0.021339312397144938
FOLD: 0, EPOCH: 3, valid_loss: 0.017914365977048874
FOLD: 0, EPOCH: 4, train_loss: 0.020942116861005087
FOLD: 0, EPOCH: 4, valid_loss: 0.01751746738759371
FOLD: 0, EPOCH: 5, train_loss: 0.020676881679006526
FOLD: 0, EPOCH: 5, valid_loss: 0.017311551321584445
FOLD: 0, EPOCH: 6, train_loss: 0.02050801195405625
FOLD: 0, EP

FOLD: 1, EPOCH: 25, train_loss: 0.018058400424952444
FOLD: 1, EPOCH: 25, valid_loss: 0.015726066409395292
FOLD: 1, EPOCH: 26, train_loss: 0.017988257462511193
FOLD: 1, EPOCH: 26, valid_loss: 0.015704805676180583
FOLD: 1, EPOCH: 27, train_loss: 0.017886939458549023
FOLD: 1, EPOCH: 27, valid_loss: 0.015672199714642305
FOLD: 1, EPOCH: 28, train_loss: 0.017783876030227623
FOLD: 1, EPOCH: 28, valid_loss: 0.015632070386065886
FOLD: 1, EPOCH: 29, train_loss: 0.01768762671162148
FOLD: 1, EPOCH: 29, valid_loss: 0.015621637567304648
FOLD: 1, EPOCH: 30, train_loss: 0.017562359382675308
FOLD: 1, EPOCH: 30, valid_loss: 0.015606552434082214
FOLD: 1, EPOCH: 31, train_loss: 0.017452847791483272
FOLD: 1, EPOCH: 31, valid_loss: 0.015613885524754342
FOLD: 1, EPOCH: 32, train_loss: 0.017385322223039897
FOLD: 1, EPOCH: 32, valid_loss: 0.01560141726468618
FOLD: 1, EPOCH: 33, train_loss: 0.01732496903403788
FOLD: 1, EPOCH: 33, valid_loss: 0.015575916721270634
FOLD: 1, EPOCH: 34, train_loss: 0.017156003219251

FOLD: 3, EPOCH: 3, train_loss: 0.021275768182366282
FOLD: 3, EPOCH: 3, valid_loss: 0.01778358665223305
FOLD: 3, EPOCH: 4, train_loss: 0.020918725184290797
FOLD: 3, EPOCH: 4, valid_loss: 0.017418536357581615
FOLD: 3, EPOCH: 5, train_loss: 0.02064981311559677
FOLD: 3, EPOCH: 5, valid_loss: 0.017311836664493267
FOLD: 3, EPOCH: 6, train_loss: 0.020444920362048858
FOLD: 3, EPOCH: 6, valid_loss: 0.01721440112361541
FOLD: 3, EPOCH: 7, train_loss: 0.02029644446195783
FOLD: 3, EPOCH: 7, valid_loss: 0.016952629989156358
FOLD: 3, EPOCH: 8, train_loss: 0.020106138334282347
FOLD: 3, EPOCH: 8, valid_loss: 0.016870733851996753
FOLD: 3, EPOCH: 9, train_loss: 0.01998839313416062
FOLD: 3, EPOCH: 9, valid_loss: 0.016750455762331303
FOLD: 3, EPOCH: 10, train_loss: 0.019778782350791467
FOLD: 3, EPOCH: 10, valid_loss: 0.016749982363902606
FOLD: 3, EPOCH: 11, train_loss: 0.01966955462420309
FOLD: 3, EPOCH: 11, valid_loss: 0.01656205500834263
FOLD: 3, EPOCH: 12, train_loss: 0.019556278539066378
FOLD: 3, EPOCH

FOLD: 4, EPOCH: 31, train_loss: 0.017405827731095457
FOLD: 4, EPOCH: 31, valid_loss: 0.015958107219865687
FOLD: 4, EPOCH: 32, train_loss: 0.017339119423382187
FOLD: 4, EPOCH: 32, valid_loss: 0.015980290678831246
FOLD: 4, EPOCH: 33, train_loss: 0.017237257892014208
FOLD: 4, EPOCH: 33, valid_loss: 0.015993374184920237
FOLD: 4, EPOCH: 34, train_loss: 0.017123570011274236
FOLD: 4, EPOCH: 34, valid_loss: 0.015966418055960767
FOLD: 4, EPOCH: 35, train_loss: 0.017026424319860903
FOLD: 4, EPOCH: 35, valid_loss: 0.015982786694971416
Epoch    36: reducing learning rate of group 0 to 1.6513e-01.
FOLD: 4, EPOCH: 36, train_loss: 0.016795167649114453
FOLD: 4, EPOCH: 36, valid_loss: 0.015932804618317347
FOLD: 4, EPOCH: 37, train_loss: 0.016682972716217912
FOLD: 4, EPOCH: 37, valid_loss: 0.015915152473518483
FOLD: 4, EPOCH: 38, train_loss: 0.01654561440384871
FOLD: 4, EPOCH: 38, valid_loss: 0.0159423746741735
FOLD: 4, EPOCH: 39, train_loss: 0.016493618475726328
FOLD: 4, EPOCH: 39, valid_loss: 0.015919

FOLD: 6, EPOCH: 9, train_loss: 0.019993781590381184
FOLD: 6, EPOCH: 9, valid_loss: 0.016560936346650124
FOLD: 6, EPOCH: 10, train_loss: 0.019877779373043292
FOLD: 6, EPOCH: 10, valid_loss: 0.016494812730413217
FOLD: 6, EPOCH: 11, train_loss: 0.01974983702136858
FOLD: 6, EPOCH: 11, valid_loss: 0.016406491685372133
FOLD: 6, EPOCH: 12, train_loss: 0.01959798950701952
FOLD: 6, EPOCH: 12, valid_loss: 0.016397329333883066
FOLD: 6, EPOCH: 13, train_loss: 0.019459616292167355
FOLD: 6, EPOCH: 13, valid_loss: 0.016286679185353793
FOLD: 6, EPOCH: 14, train_loss: 0.019318095055987704
FOLD: 6, EPOCH: 14, valid_loss: 0.01615478084064447
FOLD: 6, EPOCH: 15, train_loss: 0.019239468927923088
FOLD: 6, EPOCH: 15, valid_loss: 0.016165853406374272
FOLD: 6, EPOCH: 16, train_loss: 0.019114989787340164
FOLD: 6, EPOCH: 16, valid_loss: 0.016119879168959763
FOLD: 6, EPOCH: 17, train_loss: 0.019012950587312918
FOLD: 6, EPOCH: 17, valid_loss: 0.016050857276870653
FOLD: 6, EPOCH: 18, train_loss: 0.01889185224835937

FOLD: 0, EPOCH: 37, train_loss: 0.016880733904907026
FOLD: 0, EPOCH: 37, valid_loss: 0.015756685573321123
FOLD: 0, EPOCH: 38, train_loss: 0.016799295044227225
FOLD: 0, EPOCH: 38, valid_loss: 0.015774532077977292
FOLD: 0, EPOCH: 39, train_loss: 0.016668770009198704
FOLD: 0, EPOCH: 39, valid_loss: 0.015752864858278863
FOLD: 0, EPOCH: 40, train_loss: 0.016615505267337367
FOLD: 0, EPOCH: 40, valid_loss: 0.015750863660986606
FOLD: 0, EPOCH: 41, train_loss: 0.01646681082107731
FOLD: 0, EPOCH: 41, valid_loss: 0.015753307093221407
FOLD: 0, EPOCH: 42, train_loss: 0.016433504523357022
FOLD: 0, EPOCH: 42, valid_loss: 0.01575416111602233
Epoch    43: reducing learning rate of group 0 to 1.6513e-01.
FOLD: 0, EPOCH: 43, train_loss: 0.016199531436369225
FOLD: 0, EPOCH: 43, valid_loss: 0.01576621551066637
FOLD: 0, EPOCH: 44, train_loss: 0.016051681196266734
FOLD: 0, EPOCH: 44, valid_loss: 0.015751414502469394
FOLD: 0, EPOCH: 45, train_loss: 0.015985969403708302
FOLD: 0, EPOCH: 45, valid_loss: 0.015751

FOLD: 2, EPOCH: 15, train_loss: 0.019238017700813913
FOLD: 2, EPOCH: 15, valid_loss: 0.01616989620603048
FOLD: 2, EPOCH: 16, train_loss: 0.019131653690458956
FOLD: 2, EPOCH: 16, valid_loss: 0.01611455909621257
FOLD: 2, EPOCH: 17, train_loss: 0.018958369219625317
FOLD: 2, EPOCH: 17, valid_loss: 0.01607300477245679
FOLD: 2, EPOCH: 18, train_loss: 0.0189001826770805
FOLD: 2, EPOCH: 18, valid_loss: 0.016016691493300293
FOLD: 2, EPOCH: 19, train_loss: 0.0187450929771404
FOLD: 2, EPOCH: 19, valid_loss: 0.01597966583302388
FOLD: 2, EPOCH: 20, train_loss: 0.018677360403376655
FOLD: 2, EPOCH: 20, valid_loss: 0.015936339990450785
FOLD: 2, EPOCH: 21, train_loss: 0.018523945875868603
FOLD: 2, EPOCH: 21, valid_loss: 0.01593017979310109
FOLD: 2, EPOCH: 22, train_loss: 0.018477206219088386
FOLD: 2, EPOCH: 22, valid_loss: 0.015856819800459422
FOLD: 2, EPOCH: 23, train_loss: 0.018298478797078133
FOLD: 2, EPOCH: 23, valid_loss: 0.015864833902854186
FOLD: 2, EPOCH: 24, train_loss: 0.018236880543062817
FO

FOLD: 3, EPOCH: 43, train_loss: 0.016401446751646093
FOLD: 3, EPOCH: 43, valid_loss: 0.015878880468125526
FOLD: 3, EPOCH: 44, train_loss: 0.016321707365883363
FOLD: 3, EPOCH: 44, valid_loss: 0.015896554893025987
FOLD: 3, EPOCH: 45, train_loss: 0.016210347376260405
FOLD: 3, EPOCH: 45, valid_loss: 0.015888083869448073
FOLD: 3, EPOCH: 46, train_loss: 0.01609137233950802
FOLD: 3, EPOCH: 46, valid_loss: 0.0158843701132215
FOLD: 3, EPOCH: 47, train_loss: 0.0160370267222862
FOLD: 3, EPOCH: 47, valid_loss: 0.015894478903366968
FOLD: 3, EPOCH: 48, train_loss: 0.01591046841664089
FOLD: 3, EPOCH: 48, valid_loss: 0.015875262279923145
FOLD: 3, EPOCH: 49, train_loss: 0.01581121179451411
FOLD: 3, EPOCH: 49, valid_loss: 0.015882248273835733
Epoch    50: reducing learning rate of group 0 to 1.6513e-01.
FOLD: 4, EPOCH: 0, train_loss: 0.04279072149782567
FOLD: 4, EPOCH: 0, valid_loss: 0.02017496726833857
FOLD: 4, EPOCH: 1, train_loss: 0.022595249544325714
FOLD: 4, EPOCH: 1, valid_loss: 0.0187327999335068

FOLD: 5, EPOCH: 21, train_loss: 0.01855366279345912
FOLD: 5, EPOCH: 21, valid_loss: 0.016127004359777156
FOLD: 5, EPOCH: 22, train_loss: 0.018432472454937728
FOLD: 5, EPOCH: 22, valid_loss: 0.016128851745564204
FOLD: 5, EPOCH: 23, train_loss: 0.01834076874562212
FOLD: 5, EPOCH: 23, valid_loss: 0.016077903672479667
FOLD: 5, EPOCH: 24, train_loss: 0.01823099994578877
FOLD: 5, EPOCH: 24, valid_loss: 0.01603463335106006
FOLD: 5, EPOCH: 25, train_loss: 0.018132918010893707
FOLD: 5, EPOCH: 25, valid_loss: 0.01601272212484708
FOLD: 5, EPOCH: 26, train_loss: 0.018043692475436506
FOLD: 5, EPOCH: 26, valid_loss: 0.016001687170221254
FOLD: 5, EPOCH: 27, train_loss: 0.01793524804147514
FOLD: 5, EPOCH: 27, valid_loss: 0.015999598428606987
FOLD: 5, EPOCH: 28, train_loss: 0.0177825785468559
FOLD: 5, EPOCH: 28, valid_loss: 0.015956143347116616
FOLD: 5, EPOCH: 29, train_loss: 0.017710982570172968
FOLD: 5, EPOCH: 29, valid_loss: 0.01595881789063032
FOLD: 5, EPOCH: 30, train_loss: 0.01758947749496312
FOL

FOLD: 6, EPOCH: 48, train_loss: 0.01554540525870146
FOLD: 6, EPOCH: 48, valid_loss: 0.015716953847843867
FOLD: 6, EPOCH: 49, train_loss: 0.015480161392809572
FOLD: 6, EPOCH: 49, valid_loss: 0.015716306507014312
FOLD: 0, EPOCH: 0, train_loss: 0.04310849477612489
FOLD: 0, EPOCH: 0, valid_loss: 0.019976193228593238
FOLD: 0, EPOCH: 1, train_loss: 0.022612012998276466
FOLD: 0, EPOCH: 1, valid_loss: 0.01896993242777311
FOLD: 0, EPOCH: 2, train_loss: 0.021742603036801558
FOLD: 0, EPOCH: 2, valid_loss: 0.0180288150620002
FOLD: 0, EPOCH: 3, train_loss: 0.02127743688588207
FOLD: 0, EPOCH: 3, valid_loss: 0.017878076921288785
FOLD: 0, EPOCH: 4, train_loss: 0.020959328079747187
FOLD: 0, EPOCH: 4, valid_loss: 0.017617965546938088
FOLD: 0, EPOCH: 5, train_loss: 0.02066002710646874
FOLD: 0, EPOCH: 5, valid_loss: 0.01732860985570229
FOLD: 0, EPOCH: 6, train_loss: 0.020485257453008276
FOLD: 0, EPOCH: 6, valid_loss: 0.01720005660676039
FOLD: 0, EPOCH: 7, train_loss: 0.020288947441086575
FOLD: 0, EPOCH: 7

FOLD: 1, EPOCH: 26, train_loss: 0.018084845782534498
FOLD: 1, EPOCH: 26, valid_loss: 0.01564470026642084
FOLD: 1, EPOCH: 27, train_loss: 0.017994374087130702
FOLD: 1, EPOCH: 27, valid_loss: 0.01565960680062954
FOLD: 1, EPOCH: 28, train_loss: 0.01794724979412717
FOLD: 1, EPOCH: 28, valid_loss: 0.015663012145803526
FOLD: 1, EPOCH: 29, train_loss: 0.01777899802687603
FOLD: 1, EPOCH: 29, valid_loss: 0.015633107019731633
FOLD: 1, EPOCH: 30, train_loss: 0.017722535022609943
FOLD: 1, EPOCH: 30, valid_loss: 0.015623601869894909
FOLD: 1, EPOCH: 31, train_loss: 0.017581746762467397
FOLD: 1, EPOCH: 31, valid_loss: 0.015573444704596814
FOLD: 1, EPOCH: 32, train_loss: 0.0174730544456759
FOLD: 1, EPOCH: 32, valid_loss: 0.01557600125670433
FOLD: 1, EPOCH: 33, train_loss: 0.01741067602022274
FOLD: 1, EPOCH: 33, valid_loss: 0.01562482605759914
FOLD: 1, EPOCH: 34, train_loss: 0.01731087765782266
FOLD: 1, EPOCH: 34, valid_loss: 0.015581342463309947
FOLD: 1, EPOCH: 35, train_loss: 0.017202129788898134
FOL

FOLD: 3, EPOCH: 3, train_loss: 0.021289080633102236
FOLD: 3, EPOCH: 3, valid_loss: 0.017772382984940823
FOLD: 3, EPOCH: 4, train_loss: 0.02095199897381905
FOLD: 3, EPOCH: 4, valid_loss: 0.017516905513520423
FOLD: 3, EPOCH: 5, train_loss: 0.020712004999655323
FOLD: 3, EPOCH: 5, valid_loss: 0.01726744667841838
FOLD: 3, EPOCH: 6, train_loss: 0.020481778233236558
FOLD: 3, EPOCH: 6, valid_loss: 0.017157433912731133
FOLD: 3, EPOCH: 7, train_loss: 0.020336692573855054
FOLD: 3, EPOCH: 7, valid_loss: 0.016975354546537764
FOLD: 3, EPOCH: 8, train_loss: 0.020140419789665454
FOLD: 3, EPOCH: 8, valid_loss: 0.016943871330183286
FOLD: 3, EPOCH: 9, train_loss: 0.020016768695534887
FOLD: 3, EPOCH: 9, valid_loss: 0.01675512971213231
FOLD: 3, EPOCH: 10, train_loss: 0.019831350048047466
FOLD: 3, EPOCH: 10, valid_loss: 0.016732905633174457
FOLD: 3, EPOCH: 11, train_loss: 0.019680214265512454
FOLD: 3, EPOCH: 11, valid_loss: 0.016589128627226904
FOLD: 3, EPOCH: 12, train_loss: 0.019564569147454726
FOLD: 3, E

FOLD: 4, EPOCH: 30, train_loss: 0.017512010939016536
FOLD: 4, EPOCH: 30, valid_loss: 0.01607514975162653
FOLD: 4, EPOCH: 31, train_loss: 0.017444819639864807
FOLD: 4, EPOCH: 31, valid_loss: 0.016016272326501515
FOLD: 4, EPOCH: 32, train_loss: 0.017283484792789898
FOLD: 4, EPOCH: 32, valid_loss: 0.016052726584558304
FOLD: 4, EPOCH: 33, train_loss: 0.017225608783396514
FOLD: 4, EPOCH: 33, valid_loss: 0.016030168805557948
FOLD: 4, EPOCH: 34, train_loss: 0.017064027249108295
FOLD: 4, EPOCH: 34, valid_loss: 0.016027372473707564
FOLD: 4, EPOCH: 35, train_loss: 0.01702087314648403
FOLD: 4, EPOCH: 35, valid_loss: 0.0163924338725897
FOLD: 4, EPOCH: 36, train_loss: 0.01688576623092632
FOLD: 4, EPOCH: 36, valid_loss: 0.016031375011572473
FOLD: 4, EPOCH: 37, train_loss: 0.016801027644022897
FOLD: 4, EPOCH: 37, valid_loss: 0.016013886134784956
FOLD: 4, EPOCH: 38, train_loss: 0.01672727697705095
FOLD: 4, EPOCH: 38, valid_loss: 0.016054262048923053
FOLD: 4, EPOCH: 39, train_loss: 0.016583757084869855

FOLD: 6, EPOCH: 8, train_loss: 0.02017180241543699
FOLD: 6, EPOCH: 8, valid_loss: 0.016769368702975605
FOLD: 6, EPOCH: 9, train_loss: 0.020027759082212642
FOLD: 6, EPOCH: 9, valid_loss: 0.016610408345094092
FOLD: 6, EPOCH: 10, train_loss: 0.01983382598169752
FOLD: 6, EPOCH: 10, valid_loss: 0.016547901914096795
FOLD: 6, EPOCH: 11, train_loss: 0.019706026827161376
FOLD: 6, EPOCH: 11, valid_loss: 0.01647917188417453
FOLD: 6, EPOCH: 12, train_loss: 0.019544546542739547
FOLD: 6, EPOCH: 12, valid_loss: 0.016391687095165253
FOLD: 6, EPOCH: 13, train_loss: 0.01944319755342361
FOLD: 6, EPOCH: 13, valid_loss: 0.016302313870535448
FOLD: 6, EPOCH: 14, train_loss: 0.01936382324611013
FOLD: 6, EPOCH: 14, valid_loss: 0.0162341041633716
FOLD: 6, EPOCH: 15, train_loss: 0.01922489851209763
FOLD: 6, EPOCH: 15, valid_loss: 0.016158778721896503
FOLD: 6, EPOCH: 16, train_loss: 0.019078091114155343
FOLD: 6, EPOCH: 16, valid_loss: 0.016132339691886537
FOLD: 6, EPOCH: 17, train_loss: 0.018967921895956672
FOLD:

In [29]:
valid_results = train_targets_scored.drop(columns=target_cols).merge(train[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)

y_true = train_targets_scored[target_cols].values
y_pred = valid_results[target_cols].values

score = 0
for i in range(len(target_cols)):
    score_ = log_loss(y_true[:, i], y_pred[:, i])
    score += score_ / target.shape[1]
    
print("CV log_loss: ", score)

CV log_loss:  0.014384479622231678


In [31]:
sub = sample_submission.drop(columns=target_cols).merge(test[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
sub.to_csv('submission3.csv', index=False)

In [32]:
sub.shape

(3982, 207)