In [1]:
import sys
#sys.path.append('../iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [2]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
import copy
import seaborn as sns

from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import optuna

import warnings
warnings.filterwarnings('ignore')

In [3]:
train_features = pd.read_csv('../lish-moa/train_features.csv')
train_targets_scored = pd.read_csv('../lish-moa/train_targets_scored.csv')

#label_smoothing
train_targets_scored_sigid_value = train_targets_scored.loc[:,['sig_id']]
train_targets_scored_float_value = train_targets_scored.loc[:,'5-alpha_reductase_inhibitor':'wnt_inhibitor']
train_targets_scored_float_value_smooth = train_targets_scored_float_value.clip(0.0005, 0.9995)
train_targets_scored = pd.concat([train_targets_scored_sigid_value, train_targets_scored_float_value_smooth], axis=1)

train_targets_scored_forCV = pd.concat([train_targets_scored_sigid_value, train_targets_scored_float_value], axis=1)

train_targets_nonscored = pd.read_csv('../lish-moa/train_targets_nonscored.csv')
test_features = pd.read_csv('../lish-moa/test_features.csv')

sample_submission = pd.read_csv('../lish-moa/sample_submission.csv')

In [4]:
GENES = [col for col in train_features.columns if col.startswith('g-')]
CELLS = [col for col in train_features.columns if col.startswith('c-')]

In [5]:
def seed_everything(seed=1903):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=1903)

In [6]:
# GENES
n_comp = 28

data = pd.concat([pd.DataFrame(train_features[GENES]), pd.DataFrame(test_features[GENES])])
data2 = (PCA(n_components=n_comp, random_state=1903).fit_transform(data[GENES]))
train2 = data2[:train_features.shape[0]]; test2 = data2[-test_features.shape[0]:]

train2 = pd.DataFrame(train2, columns=[f'pca_G-{i}' for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f'pca_G-{i}' for i in range(n_comp)])

# drop_cols = [f'c-{i}' for i in range(n_comp,len(GENES))]
train_features = pd.concat((train_features, train2), axis=1)
test_features = pd.concat((test_features, test2), axis=1)

#CELLS
n_comp = 5

data = pd.concat([pd.DataFrame(train_features[CELLS]), pd.DataFrame(test_features[CELLS])])
data2 = (PCA(n_components=n_comp, random_state=1903).fit_transform(data[CELLS]))
train2 = data2[:train_features.shape[0]]; test2 = data2[-test_features.shape[0]:]

train2 = pd.DataFrame(train2, columns=[f'pca_C-{i}' for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f'pca_C-{i}' for i in range(n_comp)])

# drop_cols = [f'c-{i}' for i in range(n_comp,len(CELLS))]
train_features = pd.concat((train_features, train2), axis=1)
test_features = pd.concat((test_features, test2), axis=1)

In [7]:
from sklearn.feature_selection import VarianceThreshold


var_thresh = VarianceThreshold(threshold=0.4)
data = train_features.append(test_features)
data_transformed = var_thresh.fit_transform(data.iloc[:, 4:])

train_features_transformed = data_transformed[ : train_features.shape[0]]
test_features_transformed = data_transformed[-test_features.shape[0] : ]


train_features = pd.DataFrame(train_features[['sig_id','cp_type','cp_time','cp_dose']].values.reshape(-1, 4),\
                              columns=['sig_id','cp_type','cp_time','cp_dose'])

train_features_transformed = np.arcsinh(train_features_transformed)
train_features = pd.concat([train_features, pd.DataFrame(train_features_transformed)], axis=1)


test_features = pd.DataFrame(test_features[['sig_id','cp_type','cp_time','cp_dose']].values.reshape(-1, 4),\
                             columns=['sig_id','cp_type','cp_time','cp_dose'])

test_features_transformed = np.arcsinh(test_features_transformed)
test_features = pd.concat([test_features, pd.DataFrame(test_features_transformed)], axis=1)

In [8]:
train = train_features.merge(train_targets_scored, on='sig_id')
train = train[train['cp_type']!='ctl_vehicle'].reset_index(drop=True)
test = test_features[test_features['cp_type']!='ctl_vehicle'].reset_index(drop=True)

target = train[train_targets_scored.columns]

train = train.drop('cp_type', axis=1)
test = test.drop('cp_type', axis=1)

In [9]:
target_cols = target.drop('sig_id', axis=1).columns.values.tolist()

In [10]:
folds = train.copy()

mskf = MultilabelStratifiedKFold(n_splits=7)

for f, (t_idx, v_idx) in enumerate(mskf.split(X=train, y=target)):
    folds.loc[v_idx, 'kfold'] = int(f)

folds['kfold'] = folds['kfold'].astype(int)

In [11]:
class MoADataset:
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.targets[idx, :], dtype=torch.float)            
        }
        return dct
    
class TestDataset:
    def __init__(self, features):
        self.features = features
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float)
        }
        return dct

In [12]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0
    
    for data in dataloader:
        optimizer.zero_grad()
        inputs, targets = data['x'].to(device), data['y'].to(device)
        #print(inputs.shape)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        #scheduler.step()
        
        final_loss += loss.item()
        
    final_loss /= len(dataloader)
    
    return final_loss


def valid_fn(model, loss_fn, dataloader, device):
    model.eval()
    final_loss = 0
    valid_preds = []
    
    for data in dataloader:
        inputs, targets = data['x'].to(device), data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        
        final_loss += loss.item()
        valid_preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    final_loss /= len(dataloader)
    valid_preds = np.concatenate(valid_preds)
    
    return final_loss, valid_preds

def inference_fn(model, dataloader, device):
    model.eval()
    preds = []
    
    for data in dataloader:
        inputs = data['x'].to(device)

        with torch.no_grad():
            outputs = model(inputs)
        
        preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    preds = np.concatenate(preds)
    
    return preds
   

In [13]:
class Model(nn.Module):
    def __init__(self, num_features, num_targets, hidden_size, dropout1, dropout2, dropout3):
        super(Model, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dropout1 = nn.Dropout(dropout1)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, hidden_size))
        
        self.batch_norm2 = nn.BatchNorm1d(hidden_size)
        self.dropout2 = nn.Dropout(dropout2)
        self.dense2 = nn.utils.weight_norm(nn.Linear(hidden_size, hidden_size))
        
        self.batch_norm3 = nn.BatchNorm1d(hidden_size)
        self.dropout3 = nn.Dropout(dropout3)
        self.dense3 = nn.utils.weight_norm(nn.Linear(hidden_size, num_targets))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = F.leaky_relu(self.dense1(x), negative_slope=0.01)
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.leaky_relu(self.dense2(x), negative_slope=0.01)
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)
        
        return x

In [14]:
def process_data(data):
    data = pd.get_dummies(data, columns=['cp_time','cp_dose'])    
    return data

In [15]:
feature_cols = [c for c in process_data(folds).columns if c not in target_cols]
feature_cols = [c for c in feature_cols if c not in ['kfold','sig_id']]
len(feature_cols)

906

In [16]:
# HyperParameters

DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 50
BATCH_SIZE = 256
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
NFOLDS = 7
EARLY_STOPPING_STEPS = 10
EARLY_STOP = False

num_features=len(feature_cols)
num_targets=len(target_cols)
hidden_size=1024

In [17]:
def run_training(fold,params):
    
    seed_everything(1903)
    
    train = process_data(folds)
    test_ = process_data(test)
    
    trn_idx = train[train['kfold'] != fold].index
    val_idx = train[train['kfold'] == fold].index
    
    train_df = train[train['kfold'] != fold].reset_index(drop=True)
    valid_df = train[train['kfold'] == fold].reset_index(drop=True)
    
    x_train, y_train  = train_df[feature_cols].values, train_df[target_cols].values
    x_valid, y_valid =  valid_df[feature_cols].values, valid_df[target_cols].values
    
    train_dataset = MoADataset(x_train, y_train)
    valid_dataset = MoADataset(x_valid, y_valid)
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = Model(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=params['hidden_size'],
        dropout1=params['dropout1'],
        dropout2=params['dropout2'],
        dropout3=params['dropout3']
    )
    
    model.to(DEVICE)
    
    #optimizer = optim.Adagrad(model.parameters(), lr=0.1, lr_decay=0.001, weight_decay=1e-5, initial_accumulator_value=0, eps=1e-10)  
    #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=1e-8, eps=1e-10, verbose=True)
    
    lr_params = params['lr']
    lr_decay_params = params['lr_decay']
    weight_decay_params = params['weight_decay']
    
    factor_params = params['factor']
    patience_params = params['patience']
    
    optimizer = optim.Adagrad(model.parameters(), lr=lr_params, lr_decay=lr_decay_params, weight_decay=weight_decay_params, initial_accumulator_value=0, eps=1e-10)  
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=factor_params, patience=patience_params, threshold=1e-8, eps=1e-10, verbose=True)
    
    loss_fn = nn.BCEWithLogitsLoss()
    
    early_stopping_steps = EARLY_STOPPING_STEPS
    early_step = 0
    
    oof = np.zeros((len(train), target.iloc[:, 1:].shape[1]))
    best_loss = np.inf
    
    for epoch in range(EPOCHS):
        
        train_loss = train_fn(model, optimizer,scheduler, loss_fn, trainloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, train_loss: {train_loss}")
        valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, valid_loss: {valid_loss}")
        
        scheduler.step(valid_loss)
        #scheduler.step()
        
        if valid_loss < best_loss:
            
            best_loss = valid_loss
            oof[val_idx] = valid_preds
            #torch.save(model.state_dict(), f"FOLD{fold}_.pth")
        
        elif(EARLY_STOP == True):
            
            early_step += 1
            if (early_step >= early_stopping_steps):
                break
            
    
    return best_loss

In [18]:
from optuna import Trial, create_study

def objective(trial: Trial) -> dict:
    params={
        "hidden_size": trial.suggest_int("hidden_size", 512, 2048, step = 256, log = False),
        "dropout1": trial.suggest_float("dropout1", 0.1, 0.8, step = None, log = False),
        "dropout2": trial.suggest_float("dropout2", 0.1, 0.8, step = None, log = False),
        "dropout3": trial.suggest_float("dropout3", 0.1, 0.8, step = None, log = False),
        #"lr": trial.suggest_loguniform("lr",1e-2,1),
        "lr": trial.suggest_float("lr", 5e-2, 5e-1, step = None, log = False),
        #"lr_decay": trial.suggest_loguniform("lr_decay",1e-4,1e-2),
        "lr_decay": trial.suggest_float("lr_decay", 5e-4, 5e-3, step = None, log = False),
        #"weight_decay": trial.suggest_loguniform("weight_decay",1e-6,1e-4),
        "weight_decay": trial.suggest_float("weight_decay",5e-6, 5e-5, step = None, log = False),
        "factor": trial.suggest_float("factor",0.1, 0.9, step = None, log = False),
        "patience": trial.suggest_int("patience",1,10, step = 1, log =False)
    }
    all_losses=[]
    for f_ in range(7): #<-- 5folds
        temp_loss=run_training(f_,params)
        all_losses.append(temp_loss)
    return np.mean(all_losses)    

In [19]:
study=optuna.create_study(direction="minimize")
study.optimize(objective,n_trials=250)

[32m[I 2020-11-11 12:25:18,174][0m A new study created in memory with name: no-name-f3e7b05d-8bd1-4b36-9bc1-79052039146a[0m


FOLD: 0, EPOCH: 0, train_loss: 0.04607089128143884
FOLD: 0, EPOCH: 0, valid_loss: 0.026742163902291886
FOLD: 0, EPOCH: 1, train_loss: 0.024663045675166556
FOLD: 0, EPOCH: 1, valid_loss: 0.025656549976422235
FOLD: 0, EPOCH: 2, train_loss: 0.024273096583783627
FOLD: 0, EPOCH: 2, valid_loss: 0.0249568556363766
FOLD: 0, EPOCH: 3, train_loss: 0.02405828222431041
FOLD: 0, EPOCH: 3, valid_loss: 0.023822412467919864
FOLD: 0, EPOCH: 4, train_loss: 0.02385869143983802
FOLD: 0, EPOCH: 4, valid_loss: 0.024258543665592488
FOLD: 0, EPOCH: 5, train_loss: 0.023625186568981892
FOLD: 0, EPOCH: 5, valid_loss: 0.023386123231970347
FOLD: 0, EPOCH: 6, train_loss: 0.02352020801422564
FOLD: 0, EPOCH: 6, valid_loss: 0.02301327296747611
FOLD: 0, EPOCH: 7, train_loss: 0.02334619394025287
FOLD: 0, EPOCH: 7, valid_loss: 0.023302559525920793


KeyboardInterrupt: 

In [None]:
study.best_params

In [20]:
params_226 = {'hidden_size': 1792, 'dropout1': 0.3003906575355876, 'dropout2': 0.6467618201427745, 'dropout3': 0.2573148373719613, 'lr': 0.17808542072654637, 'lr_decay': 0.004473634402592102, 'weight_decay': 5.018499734993564e-06, 'factor': 0.14966142745406366, 'patience': 2}

In [21]:
def run_final_training(fold, params, seed):
    
    seed_everything(seed)
    
    train = process_data(folds)
    test_ = process_data(test)
    
    trn_idx = train[train['kfold'] != fold].index
    val_idx = train[train['kfold'] == fold].index
    
    train_df = train[train['kfold'] != fold].reset_index(drop=True)
    valid_df = train[train['kfold'] == fold].reset_index(drop=True)
    
    x_train, y_train  = train_df[feature_cols].values, train_df[target_cols].values
    x_valid, y_valid =  valid_df[feature_cols].values, valid_df[target_cols].values
    
    train_dataset = MoADataset(x_train, y_train)
    valid_dataset = MoADataset(x_valid, y_valid)
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = Model(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=params['hidden_size'],
        dropout1=params['dropout1'],
        dropout2=params['dropout2'],
        dropout3=params['dropout3']
    )
    
    model.to(DEVICE)
    
    #optimizer = optim.Adagrad(model.parameters(), lr=0.1, lr_decay=0.001, weight_decay=1e-5, initial_accumulator_value=0, eps=1e-10)  
    #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, threshold=1e-8, eps=1e-10, verbose=True)
    
    lr_params = params['lr']
    lr_decay_params = params['lr_decay']
    weight_decay_params = params['weight_decay']
    
    factor_params = params['factor']
    patience_params = params['patience']
    
    optimizer = optim.Adagrad(model.parameters(), lr=lr_params, lr_decay=lr_decay_params, weight_decay=weight_decay_params, initial_accumulator_value=0, eps=1e-10)  
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=factor_params, patience=patience_params, threshold=1e-8, eps=1e-10, verbose=True)
    
    loss_fn = nn.BCEWithLogitsLoss()
    
    early_stopping_steps = EARLY_STOPPING_STEPS
    early_step = 0
    
    oof = np.zeros((len(train), target.iloc[:, 1:].shape[1]))
    best_loss = np.inf
    
    for epoch in range(EPOCHS):
        
        train_loss = train_fn(model, optimizer,scheduler, loss_fn, trainloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, train_loss: {train_loss}")
        valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, valid_loss: {valid_loss}")
        
        scheduler.step(valid_loss)
        #scheduler.step()
        
        if valid_loss < best_loss:
            
            best_loss = valid_loss
            oof[val_idx] = valid_preds
            torch.save(model.state_dict(), f"moa-1867-SEED{seed}_FOLD{fold}.pth")
        
        elif(EARLY_STOP == True):
            
            early_step += 1
            if (early_step >= early_stopping_steps):
                break
            
    #--------------------- PREDICTION---------------------
    x_test = test_[feature_cols].values
    testdataset = TestDataset(x_test)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model.load_state_dict(torch.load(f"moa-1867-SEED{seed}_FOLD{fold}.pth"))
    model.to(DEVICE)
    
    predictions = np.zeros((len(test_), target.iloc[:, 1:].shape[1]))
    predictions = inference_fn(model, testloader, DEVICE)
    
    return oof, predictions

In [22]:
def run_k_fold(NFOLDS, params, seed):
    oof = np.zeros((len(train), len(target_cols)))
    predictions = np.zeros((len(test), len(target_cols)))
    
    for fold in range(NFOLDS):
        oof_, pred_ = run_final_training(fold, params, seed)
        
        predictions += pred_ / NFOLDS
        oof += oof_
        
    return oof, predictions

In [23]:
# Averaging on multiple SEEDS

SEED = [7, 11, 13, 17, 19, 1881, 1903]
oof = np.zeros((len(train), len(target_cols)))
predictions = np.zeros((len(test), len(target_cols)))

for seed in SEED:
    
    oof_, predictions_ = run_k_fold(NFOLDS, params_226, seed)
    oof += oof_ / len(SEED)
    predictions += predictions_ / len(SEED)

train[target_cols] = oof
test[target_cols] = predictions

FOLD: 0, EPOCH: 0, train_loss: 0.048930756521184705
FOLD: 0, EPOCH: 0, valid_loss: 0.023167151814469926
FOLD: 0, EPOCH: 1, train_loss: 0.022327734509835374
FOLD: 0, EPOCH: 1, valid_loss: 0.021408247403227366
FOLD: 0, EPOCH: 2, train_loss: 0.02156349493039621
FOLD: 0, EPOCH: 2, valid_loss: 0.020825105934188917
FOLD: 0, EPOCH: 3, train_loss: 0.02118993948238927
FOLD: 0, EPOCH: 3, valid_loss: 0.02078582704640352
FOLD: 0, EPOCH: 4, train_loss: 0.02084435457112016
FOLD: 0, EPOCH: 4, valid_loss: 0.02042653927436242
FOLD: 0, EPOCH: 5, train_loss: 0.020600171633870214
FOLD: 0, EPOCH: 5, valid_loss: 0.020261978873839744
FOLD: 0, EPOCH: 6, train_loss: 0.020425536619448983
FOLD: 0, EPOCH: 6, valid_loss: 0.020016935725624744
FOLD: 0, EPOCH: 7, train_loss: 0.02018555356944735
FOLD: 0, EPOCH: 7, valid_loss: 0.01983450439113837
FOLD: 0, EPOCH: 8, train_loss: 0.020027331177245925
FOLD: 0, EPOCH: 8, valid_loss: 0.019710739501393758
FOLD: 0, EPOCH: 9, train_loss: 0.019893932835878553
FOLD: 0, EPOCH: 9, 

FOLD: 1, EPOCH: 28, train_loss: 0.017719675912647635
FOLD: 1, EPOCH: 28, valid_loss: 0.01908744513415373
FOLD: 1, EPOCH: 29, train_loss: 0.01763689940845644
FOLD: 1, EPOCH: 29, valid_loss: 0.019049719692422792
FOLD: 1, EPOCH: 30, train_loss: 0.017550527369855223
FOLD: 1, EPOCH: 30, valid_loss: 0.019097520038485527
FOLD: 1, EPOCH: 31, train_loss: 0.017463210617771018
FOLD: 1, EPOCH: 31, valid_loss: 0.01902528737600033
FOLD: 1, EPOCH: 32, train_loss: 0.01729562787993534
FOLD: 1, EPOCH: 32, valid_loss: 0.01904777265512026
FOLD: 1, EPOCH: 33, train_loss: 0.017189328105667152
FOLD: 1, EPOCH: 33, valid_loss: 0.019006030490765206
FOLD: 1, EPOCH: 34, train_loss: 0.017035959804480947
FOLD: 1, EPOCH: 34, valid_loss: 0.019108981180649538
FOLD: 1, EPOCH: 35, train_loss: 0.016963534472459876
FOLD: 1, EPOCH: 35, valid_loss: 0.019044594266093694
FOLD: 1, EPOCH: 36, train_loss: 0.01682031121910424
FOLD: 1, EPOCH: 36, valid_loss: 0.019032288485994704
Epoch    37: reducing learning rate of group 0 to 2.

FOLD: 3, EPOCH: 2, train_loss: 0.021591766032616835
FOLD: 3, EPOCH: 2, valid_loss: 0.021145380890140168
FOLD: 3, EPOCH: 3, train_loss: 0.021215954946505057
FOLD: 3, EPOCH: 3, valid_loss: 0.020855539693282202
FOLD: 3, EPOCH: 4, train_loss: 0.02095985858122239
FOLD: 3, EPOCH: 4, valid_loss: 0.020667840368472613
FOLD: 3, EPOCH: 5, train_loss: 0.0206642455446559
FOLD: 3, EPOCH: 5, valid_loss: 0.020451546718294803
FOLD: 3, EPOCH: 6, train_loss: 0.020459633496766154
FOLD: 3, EPOCH: 6, valid_loss: 0.0202598819652429
FOLD: 3, EPOCH: 7, train_loss: 0.020220913143979537
FOLD: 3, EPOCH: 7, valid_loss: 0.019956715834828522
FOLD: 3, EPOCH: 8, train_loss: 0.020114541305480776
FOLD: 3, EPOCH: 8, valid_loss: 0.01994357444345951
FOLD: 3, EPOCH: 9, train_loss: 0.019977252930402756
FOLD: 3, EPOCH: 9, valid_loss: 0.019974462831249602
FOLD: 3, EPOCH: 10, train_loss: 0.019797446432749968
FOLD: 3, EPOCH: 10, valid_loss: 0.019833165149276074
FOLD: 3, EPOCH: 11, train_loss: 0.019705451873911393
FOLD: 3, EPOCH:

FOLD: 4, EPOCH: 28, train_loss: 0.017533178491568244
FOLD: 4, EPOCH: 28, valid_loss: 0.019417384639382362
FOLD: 4, EPOCH: 29, train_loss: 0.01738074849787596
FOLD: 4, EPOCH: 29, valid_loss: 0.019424347636791375
Epoch    30: reducing learning rate of group 0 to 2.6653e-02.
FOLD: 4, EPOCH: 30, train_loss: 0.017109523917472846
FOLD: 4, EPOCH: 30, valid_loss: 0.01933650658107721
FOLD: 4, EPOCH: 31, train_loss: 0.016929136110922775
FOLD: 4, EPOCH: 31, valid_loss: 0.01929871651988763
FOLD: 4, EPOCH: 32, train_loss: 0.016884078611493915
FOLD: 4, EPOCH: 32, valid_loss: 0.019291939930273935
FOLD: 4, EPOCH: 33, train_loss: 0.01679362214447276
FOLD: 4, EPOCH: 33, valid_loss: 0.01930452582354729
FOLD: 4, EPOCH: 34, train_loss: 0.01671813294996281
FOLD: 4, EPOCH: 34, valid_loss: 0.019270314190250177
FOLD: 4, EPOCH: 35, train_loss: 0.016703005250845407
FOLD: 4, EPOCH: 35, valid_loss: 0.01928087481512473
FOLD: 4, EPOCH: 36, train_loss: 0.016643797248803282
FOLD: 4, EPOCH: 36, valid_loss: 0.0192709123

FOLD: 6, EPOCH: 4, train_loss: 0.02090061128743597
FOLD: 6, EPOCH: 4, valid_loss: 0.02049742194895561
FOLD: 6, EPOCH: 5, train_loss: 0.020686356325608654
FOLD: 6, EPOCH: 5, valid_loss: 0.020144492387771606
FOLD: 6, EPOCH: 6, train_loss: 0.020453415559353056
FOLD: 6, EPOCH: 6, valid_loss: 0.020037393157298748
FOLD: 6, EPOCH: 7, train_loss: 0.020234491701263027
FOLD: 6, EPOCH: 7, valid_loss: 0.02005587160014189
FOLD: 6, EPOCH: 8, train_loss: 0.020073069913967234
FOLD: 6, EPOCH: 8, valid_loss: 0.019884405227807853
FOLD: 6, EPOCH: 9, train_loss: 0.019948121890224314
FOLD: 6, EPOCH: 9, valid_loss: 0.019729068789344568
FOLD: 6, EPOCH: 10, train_loss: 0.01977003609912621
FOLD: 6, EPOCH: 10, valid_loss: 0.019606357726913232
FOLD: 6, EPOCH: 11, train_loss: 0.019580258861989587
FOLD: 6, EPOCH: 11, valid_loss: 0.019512857525394514
FOLD: 6, EPOCH: 12, train_loss: 0.019443225115537643
FOLD: 6, EPOCH: 12, valid_loss: 0.019526845417343654
FOLD: 6, EPOCH: 13, train_loss: 0.019368126057088375
FOLD: 6, 

FOLD: 0, EPOCH: 29, train_loss: 0.017741740177813416
FOLD: 0, EPOCH: 29, valid_loss: 0.01881130340580757
FOLD: 0, EPOCH: 30, train_loss: 0.017616788428780193
FOLD: 0, EPOCH: 30, valid_loss: 0.01882674344457113
FOLD: 0, EPOCH: 31, train_loss: 0.01760217416528109
FOLD: 0, EPOCH: 31, valid_loss: 0.01881805506463234
FOLD: 0, EPOCH: 32, train_loss: 0.01758918835706002
FOLD: 0, EPOCH: 32, valid_loss: 0.018792373056595143
FOLD: 0, EPOCH: 33, train_loss: 0.01752844694498423
FOLD: 0, EPOCH: 33, valid_loss: 0.01878967694938183
FOLD: 0, EPOCH: 34, train_loss: 0.017507771432802483
FOLD: 0, EPOCH: 34, valid_loss: 0.018801318481564522
FOLD: 0, EPOCH: 35, train_loss: 0.017466841894831206
FOLD: 0, EPOCH: 35, valid_loss: 0.01878587256830472
FOLD: 0, EPOCH: 36, train_loss: 0.017412224180392316
FOLD: 0, EPOCH: 36, valid_loss: 0.01878732557480152
FOLD: 0, EPOCH: 37, train_loss: 0.017409449594246375
FOLD: 0, EPOCH: 37, valid_loss: 0.018778633183011643
FOLD: 0, EPOCH: 38, train_loss: 0.01738965657313128
FOL

FOLD: 2, EPOCH: 4, train_loss: 0.02084033553664749
FOLD: 2, EPOCH: 4, valid_loss: 0.020819539634081032
FOLD: 2, EPOCH: 5, train_loss: 0.020578328268350782
FOLD: 2, EPOCH: 5, valid_loss: 0.020549608681064386
FOLD: 2, EPOCH: 6, train_loss: 0.02037570670851179
FOLD: 2, EPOCH: 6, valid_loss: 0.02056210660017454
FOLD: 2, EPOCH: 7, train_loss: 0.02021851607069776
FOLD: 2, EPOCH: 7, valid_loss: 0.02041457607769049
FOLD: 2, EPOCH: 8, train_loss: 0.020025642588734627
FOLD: 2, EPOCH: 8, valid_loss: 0.020242959547501344
FOLD: 2, EPOCH: 9, train_loss: 0.019836497241379442
FOLD: 2, EPOCH: 9, valid_loss: 0.020079311843101796
FOLD: 2, EPOCH: 10, train_loss: 0.019699534099247004
FOLD: 2, EPOCH: 10, valid_loss: 0.019927608135801095
FOLD: 2, EPOCH: 11, train_loss: 0.01958974523822198
FOLD: 2, EPOCH: 11, valid_loss: 0.019867317464489203
FOLD: 2, EPOCH: 12, train_loss: 0.019423409742680756
FOLD: 2, EPOCH: 12, valid_loss: 0.020170222251461103
FOLD: 2, EPOCH: 13, train_loss: 0.019306778681237955
FOLD: 2, EP

FOLD: 3, EPOCH: 29, train_loss: 0.01716002673414108
FOLD: 3, EPOCH: 29, valid_loss: 0.01898424943479208
FOLD: 3, EPOCH: 30, train_loss: 0.01702260661467507
FOLD: 3, EPOCH: 30, valid_loss: 0.01898503117263317
FOLD: 3, EPOCH: 31, train_loss: 0.017006683113003098
FOLD: 3, EPOCH: 31, valid_loss: 0.018956066467441045
FOLD: 3, EPOCH: 32, train_loss: 0.016960639607261966
FOLD: 3, EPOCH: 32, valid_loss: 0.018976960904323138
FOLD: 3, EPOCH: 33, train_loss: 0.01689529960119241
FOLD: 3, EPOCH: 33, valid_loss: 0.01895731157408311
FOLD: 3, EPOCH: 34, train_loss: 0.016825365330520516
FOLD: 3, EPOCH: 34, valid_loss: 0.018978025334385727
Epoch    35: reducing learning rate of group 0 to 3.9889e-03.
FOLD: 3, EPOCH: 35, train_loss: 0.01682449589055535
FOLD: 3, EPOCH: 35, valid_loss: 0.018952862717784368
FOLD: 3, EPOCH: 36, train_loss: 0.016761131949622084
FOLD: 3, EPOCH: 36, valid_loss: 0.018953455612063408
FOLD: 3, EPOCH: 37, train_loss: 0.016756514024392172
FOLD: 3, EPOCH: 37, valid_loss: 0.0189617415

FOLD: 5, EPOCH: 4, train_loss: 0.020918323280843528
FOLD: 5, EPOCH: 4, valid_loss: 0.02068101055920124
FOLD: 5, EPOCH: 5, train_loss: 0.02076871763612773
FOLD: 5, EPOCH: 5, valid_loss: 0.020616684682094134
FOLD: 5, EPOCH: 6, train_loss: 0.020441792200546007
FOLD: 5, EPOCH: 6, valid_loss: 0.02041274610047157
FOLD: 5, EPOCH: 7, train_loss: 0.02033298505419815
FOLD: 5, EPOCH: 7, valid_loss: 0.02030740649654315
FOLD: 5, EPOCH: 8, train_loss: 0.020145409438457038
FOLD: 5, EPOCH: 8, valid_loss: 0.020005515275093224
FOLD: 5, EPOCH: 9, train_loss: 0.01997290669965583
FOLD: 5, EPOCH: 9, valid_loss: 0.019961529053174533
FOLD: 5, EPOCH: 10, train_loss: 0.01980783714837319
FOLD: 5, EPOCH: 10, valid_loss: 0.019935130356596067
FOLD: 5, EPOCH: 11, train_loss: 0.01967670939661361
FOLD: 5, EPOCH: 11, valid_loss: 0.019631838282713525
FOLD: 5, EPOCH: 12, train_loss: 0.019530409821183294
FOLD: 5, EPOCH: 12, valid_loss: 0.019667098871790446
FOLD: 5, EPOCH: 13, train_loss: 0.019412782974541187
FOLD: 5, EPOC

FOLD: 6, EPOCH: 31, train_loss: 0.017143455132640695
FOLD: 6, EPOCH: 31, valid_loss: 0.018986377578515273
FOLD: 6, EPOCH: 32, train_loss: 0.01704011858768157
FOLD: 6, EPOCH: 32, valid_loss: 0.01900093902188998
Epoch    33: reducing learning rate of group 0 to 2.6653e-02.
FOLD: 6, EPOCH: 33, train_loss: 0.016643259661725245
FOLD: 6, EPOCH: 33, valid_loss: 0.018917482203015916
FOLD: 6, EPOCH: 34, train_loss: 0.016527983246723544
FOLD: 6, EPOCH: 34, valid_loss: 0.018889211404782075
FOLD: 6, EPOCH: 35, train_loss: 0.01642210387649971
FOLD: 6, EPOCH: 35, valid_loss: 0.018898876957022227
FOLD: 6, EPOCH: 36, train_loss: 0.016398533297753013
FOLD: 6, EPOCH: 36, valid_loss: 0.018880289907638844
FOLD: 6, EPOCH: 37, train_loss: 0.016322233814846824
FOLD: 6, EPOCH: 37, valid_loss: 0.018870628367249783
FOLD: 6, EPOCH: 38, train_loss: 0.016300997382180916
FOLD: 6, EPOCH: 38, valid_loss: 0.018876720363130935
FOLD: 6, EPOCH: 39, train_loss: 0.016272891992451372
FOLD: 6, EPOCH: 39, valid_loss: 0.018878

FOLD: 1, EPOCH: 4, train_loss: 0.020829186170689157
FOLD: 1, EPOCH: 4, valid_loss: 0.020942061996230714
FOLD: 1, EPOCH: 5, train_loss: 0.02072029047318407
FOLD: 1, EPOCH: 5, valid_loss: 0.020622925976148017
FOLD: 1, EPOCH: 6, train_loss: 0.020446517655776965
FOLD: 1, EPOCH: 6, valid_loss: 0.021101596407019176
FOLD: 1, EPOCH: 7, train_loss: 0.02026171544315042
FOLD: 1, EPOCH: 7, valid_loss: 0.02027131846317878
FOLD: 1, EPOCH: 8, train_loss: 0.0200813643483294
FOLD: 1, EPOCH: 8, valid_loss: 0.020310632024820034
FOLD: 1, EPOCH: 9, train_loss: 0.0198917221529661
FOLD: 1, EPOCH: 9, valid_loss: 0.02002073437548601
FOLD: 1, EPOCH: 10, train_loss: 0.019723041108935267
FOLD: 1, EPOCH: 10, valid_loss: 0.019857927727011535
FOLD: 1, EPOCH: 11, train_loss: 0.019587257776308705
FOLD: 1, EPOCH: 11, valid_loss: 0.019683491438627243
FOLD: 1, EPOCH: 12, train_loss: 0.01937985244030888
FOLD: 1, EPOCH: 12, valid_loss: 0.019675361135831244
FOLD: 1, EPOCH: 13, train_loss: 0.019361243935654294
FOLD: 1, EPOCH

FOLD: 2, EPOCH: 30, train_loss: 0.01737299030394973
FOLD: 2, EPOCH: 30, valid_loss: 0.019346454109136876
FOLD: 2, EPOCH: 31, train_loss: 0.017243675207970915
FOLD: 2, EPOCH: 31, valid_loss: 0.019319826593765847
FOLD: 2, EPOCH: 32, train_loss: 0.017115836220516545
FOLD: 2, EPOCH: 32, valid_loss: 0.019298100700745217
Epoch    33: reducing learning rate of group 0 to 2.6653e-02.
FOLD: 2, EPOCH: 33, train_loss: 0.016785540013901284
FOLD: 2, EPOCH: 33, valid_loss: 0.019204720711478822
FOLD: 2, EPOCH: 34, train_loss: 0.01665512390585767
FOLD: 2, EPOCH: 34, valid_loss: 0.01916576334490226
FOLD: 2, EPOCH: 35, train_loss: 0.01659038516919355
FOLD: 2, EPOCH: 35, valid_loss: 0.019175881806474466
FOLD: 2, EPOCH: 36, train_loss: 0.016484110414780474
FOLD: 2, EPOCH: 36, valid_loss: 0.019186590010156997
FOLD: 2, EPOCH: 37, train_loss: 0.016475427102901646
FOLD: 2, EPOCH: 37, valid_loss: 0.019180158009895913
Epoch    38: reducing learning rate of group 0 to 3.9889e-03.
FOLD: 2, EPOCH: 38, train_loss: 

FOLD: 4, EPOCH: 5, train_loss: 0.02062348535636792
FOLD: 4, EPOCH: 5, valid_loss: 0.020746873405117255
FOLD: 4, EPOCH: 6, train_loss: 0.02035991130145015
FOLD: 4, EPOCH: 6, valid_loss: 0.020515491612828694
FOLD: 4, EPOCH: 7, train_loss: 0.020249392937969516
FOLD: 4, EPOCH: 7, valid_loss: 0.020489928384239856
FOLD: 4, EPOCH: 8, train_loss: 0.020033554350202147
FOLD: 4, EPOCH: 8, valid_loss: 0.020254490610498648
FOLD: 4, EPOCH: 9, train_loss: 0.019948310545972875
FOLD: 4, EPOCH: 9, valid_loss: 0.02019489212678029
FOLD: 4, EPOCH: 10, train_loss: 0.01974604480169915
FOLD: 4, EPOCH: 10, valid_loss: 0.020178231625602797
FOLD: 4, EPOCH: 11, train_loss: 0.0196411648412814
FOLD: 4, EPOCH: 11, valid_loss: 0.019981969864322588
FOLD: 4, EPOCH: 12, train_loss: 0.019490557647234685
FOLD: 4, EPOCH: 12, valid_loss: 0.019850651661937054
FOLD: 4, EPOCH: 13, train_loss: 0.01941955864832208
FOLD: 4, EPOCH: 13, valid_loss: 0.01990026221252405
FOLD: 4, EPOCH: 14, train_loss: 0.01928970290700326
FOLD: 4, EPO

FOLD: 5, EPOCH: 31, train_loss: 0.01715001219732536
FOLD: 5, EPOCH: 31, valid_loss: 0.019061085839684192
FOLD: 5, EPOCH: 32, train_loss: 0.017072088286482
FOLD: 5, EPOCH: 32, valid_loss: 0.019106663620242707
FOLD: 5, EPOCH: 33, train_loss: 0.01688041174280885
FOLD: 5, EPOCH: 33, valid_loss: 0.01905687253635663
FOLD: 5, EPOCH: 34, train_loss: 0.01679219165816903
FOLD: 5, EPOCH: 34, valid_loss: 0.019074383549965344
FOLD: 5, EPOCH: 35, train_loss: 0.01666163291933166
FOLD: 5, EPOCH: 35, valid_loss: 0.019142385572195053
FOLD: 5, EPOCH: 36, train_loss: 0.016495619711743015
FOLD: 5, EPOCH: 36, valid_loss: 0.01909096180819548
Epoch    37: reducing learning rate of group 0 to 2.6653e-02.
FOLD: 5, EPOCH: 37, train_loss: 0.016183237092116394
FOLD: 5, EPOCH: 37, valid_loss: 0.019006657055937327
FOLD: 5, EPOCH: 38, train_loss: 0.01608941607126916
FOLD: 5, EPOCH: 38, valid_loss: 0.01900071593431326
FOLD: 5, EPOCH: 39, train_loss: 0.016024757428346453
FOLD: 5, EPOCH: 39, valid_loss: 0.01897880477974

FOLD: 0, EPOCH: 5, train_loss: 0.020680150369534623
FOLD: 0, EPOCH: 5, valid_loss: 0.020513409318832252
FOLD: 0, EPOCH: 6, train_loss: 0.02052354080149451
FOLD: 0, EPOCH: 6, valid_loss: 0.02000392686862212
FOLD: 0, EPOCH: 7, train_loss: 0.02026638909670952
FOLD: 0, EPOCH: 7, valid_loss: 0.020027117803692818
FOLD: 0, EPOCH: 8, train_loss: 0.020160728511778084
FOLD: 0, EPOCH: 8, valid_loss: 0.019730201850716885
FOLD: 0, EPOCH: 9, train_loss: 0.019924115968515742
FOLD: 0, EPOCH: 9, valid_loss: 0.019755459318940457
FOLD: 0, EPOCH: 10, train_loss: 0.01982580324181834
FOLD: 0, EPOCH: 10, valid_loss: 0.0196626832565436
FOLD: 0, EPOCH: 11, train_loss: 0.01964705643823018
FOLD: 0, EPOCH: 11, valid_loss: 0.01944740775686044
FOLD: 0, EPOCH: 12, train_loss: 0.01953865889761899
FOLD: 0, EPOCH: 12, valid_loss: 0.019508892526993386
FOLD: 0, EPOCH: 13, train_loss: 0.019391140875381394
FOLD: 0, EPOCH: 13, valid_loss: 0.01937232008920266
FOLD: 0, EPOCH: 14, train_loss: 0.019273534391981526
FOLD: 0, EPOC

FOLD: 1, EPOCH: 32, train_loss: 0.017171218588545516
FOLD: 1, EPOCH: 32, valid_loss: 0.018966222898318216
FOLD: 1, EPOCH: 33, train_loss: 0.01709592863413933
FOLD: 1, EPOCH: 33, valid_loss: 0.018962539732456207
FOLD: 1, EPOCH: 34, train_loss: 0.017090287487450485
FOLD: 1, EPOCH: 34, valid_loss: 0.018969887437728736
FOLD: 1, EPOCH: 35, train_loss: 0.017055650040305948
FOLD: 1, EPOCH: 35, valid_loss: 0.01897859716644654
FOLD: 1, EPOCH: 36, train_loss: 0.017009781849746768
FOLD: 1, EPOCH: 36, valid_loss: 0.01894893253651949
FOLD: 1, EPOCH: 37, train_loss: 0.01696570312000207
FOLD: 1, EPOCH: 37, valid_loss: 0.01895279365663345
FOLD: 1, EPOCH: 38, train_loss: 0.016941236281717145
FOLD: 1, EPOCH: 38, valid_loss: 0.018958549659985762
FOLD: 1, EPOCH: 39, train_loss: 0.01690115311459915
FOLD: 1, EPOCH: 39, valid_loss: 0.018948504128135167
FOLD: 1, EPOCH: 40, train_loss: 0.016891431894052674
FOLD: 1, EPOCH: 40, valid_loss: 0.01893133383530837
FOLD: 1, EPOCH: 41, train_loss: 0.0168382910306792
FO

FOLD: 3, EPOCH: 5, train_loss: 0.020666576040959037
FOLD: 3, EPOCH: 5, valid_loss: 0.020379688590765
FOLD: 3, EPOCH: 6, train_loss: 0.020482496937384475
FOLD: 3, EPOCH: 6, valid_loss: 0.020229538759359948
FOLD: 3, EPOCH: 7, train_loss: 0.020254410954343306
FOLD: 3, EPOCH: 7, valid_loss: 0.020228037753930457
FOLD: 3, EPOCH: 8, train_loss: 0.020121797994786018
FOLD: 3, EPOCH: 8, valid_loss: 0.019939599701991446
FOLD: 3, EPOCH: 9, train_loss: 0.01995792375827158
FOLD: 3, EPOCH: 9, valid_loss: 0.019957868239054315
FOLD: 3, EPOCH: 10, train_loss: 0.019771645852440112
FOLD: 3, EPOCH: 10, valid_loss: 0.019731640959015258
FOLD: 3, EPOCH: 11, train_loss: 0.019656472335997467
FOLD: 3, EPOCH: 11, valid_loss: 0.019701170949981764
FOLD: 3, EPOCH: 12, train_loss: 0.019518735894077533
FOLD: 3, EPOCH: 12, valid_loss: 0.019613428471180108
FOLD: 3, EPOCH: 13, train_loss: 0.019404902928382962
FOLD: 3, EPOCH: 13, valid_loss: 0.019596653059124947
FOLD: 3, EPOCH: 14, train_loss: 0.019282273822338193
FOLD: 3

FOLD: 4, EPOCH: 32, train_loss: 0.01719733012991177
FOLD: 4, EPOCH: 32, valid_loss: 0.019390923902392387
Epoch    33: reducing learning rate of group 0 to 2.6653e-02.
FOLD: 4, EPOCH: 33, train_loss: 0.016861141432781477
FOLD: 4, EPOCH: 33, valid_loss: 0.019265757013971988
FOLD: 4, EPOCH: 34, train_loss: 0.016744091691499628
FOLD: 4, EPOCH: 34, valid_loss: 0.019235592478742965
FOLD: 4, EPOCH: 35, train_loss: 0.016674773251587473
FOLD: 4, EPOCH: 35, valid_loss: 0.0192318342339534
FOLD: 4, EPOCH: 36, train_loss: 0.016607894637697452
FOLD: 4, EPOCH: 36, valid_loss: 0.019206367433071136
FOLD: 4, EPOCH: 37, train_loss: 0.016660930085423832
FOLD: 4, EPOCH: 37, valid_loss: 0.019216391616142713
FOLD: 4, EPOCH: 38, train_loss: 0.016518996336270828
FOLD: 4, EPOCH: 38, valid_loss: 0.019230737422521297
FOLD: 4, EPOCH: 39, train_loss: 0.01648408340642581
FOLD: 4, EPOCH: 39, valid_loss: 0.019197406963660166
FOLD: 4, EPOCH: 40, train_loss: 0.016476731973926764
FOLD: 4, EPOCH: 40, valid_loss: 0.0192173

FOLD: 6, EPOCH: 6, train_loss: 0.020428666442229942
FOLD: 6, EPOCH: 6, valid_loss: 0.02015683499093239
FOLD: 6, EPOCH: 7, train_loss: 0.020179159310017084
FOLD: 6, EPOCH: 7, valid_loss: 0.020028877430237256
FOLD: 6, EPOCH: 8, train_loss: 0.020028105282501596
FOLD: 6, EPOCH: 8, valid_loss: 0.01994160505441519
FOLD: 6, EPOCH: 9, train_loss: 0.019835044000599836
FOLD: 6, EPOCH: 9, valid_loss: 0.019768357563477296
FOLD: 6, EPOCH: 10, train_loss: 0.019726154520302207
FOLD: 6, EPOCH: 10, valid_loss: 0.019572397264150474
FOLD: 6, EPOCH: 11, train_loss: 0.019548926502466202
FOLD: 6, EPOCH: 11, valid_loss: 0.01952944380732683
FOLD: 6, EPOCH: 12, train_loss: 0.019450718763510923
FOLD: 6, EPOCH: 12, valid_loss: 0.01944322970051032
FOLD: 6, EPOCH: 13, train_loss: 0.01932521182036883
FOLD: 6, EPOCH: 13, valid_loss: 0.019435399283583347
FOLD: 6, EPOCH: 14, train_loss: 0.01915197165028469
FOLD: 6, EPOCH: 14, valid_loss: 0.019298624533873338
FOLD: 6, EPOCH: 15, train_loss: 0.019035545656004467
FOLD: 6

FOLD: 0, EPOCH: 31, train_loss: 0.016843158965678635
FOLD: 0, EPOCH: 31, valid_loss: 0.018825422112758342
FOLD: 0, EPOCH: 32, train_loss: 0.016688668320106494
FOLD: 0, EPOCH: 32, valid_loss: 0.018791536872203533
FOLD: 0, EPOCH: 33, train_loss: 0.016620172250613168
FOLD: 0, EPOCH: 33, valid_loss: 0.01879467012790533
FOLD: 0, EPOCH: 34, train_loss: 0.016537196279780286
FOLD: 0, EPOCH: 34, valid_loss: 0.018777829523269948
FOLD: 0, EPOCH: 35, train_loss: 0.01648849678049619
FOLD: 0, EPOCH: 35, valid_loss: 0.018756215245677874
FOLD: 0, EPOCH: 36, train_loss: 0.016450771243890393
FOLD: 0, EPOCH: 36, valid_loss: 0.018785175365897324
FOLD: 0, EPOCH: 37, train_loss: 0.016415572700065537
FOLD: 0, EPOCH: 37, valid_loss: 0.018754229379388
FOLD: 0, EPOCH: 38, train_loss: 0.016361312525397218
FOLD: 0, EPOCH: 38, valid_loss: 0.018759317696094513
FOLD: 0, EPOCH: 39, train_loss: 0.016239438356982695
FOLD: 0, EPOCH: 39, valid_loss: 0.018761681822630074
FOLD: 0, EPOCH: 40, train_loss: 0.01630676499995831

FOLD: 2, EPOCH: 7, train_loss: 0.020298814703081106
FOLD: 2, EPOCH: 7, valid_loss: 0.020345458760857582
FOLD: 2, EPOCH: 8, train_loss: 0.020076358992908452
FOLD: 2, EPOCH: 8, valid_loss: 0.020972111620582067
FOLD: 2, EPOCH: 9, train_loss: 0.019934533314930426
FOLD: 2, EPOCH: 9, valid_loss: 0.020285348479564373
FOLD: 2, EPOCH: 10, train_loss: 0.01977114225863605
FOLD: 2, EPOCH: 10, valid_loss: 0.02011576696084096
FOLD: 2, EPOCH: 11, train_loss: 0.019621609080884908
FOLD: 2, EPOCH: 11, valid_loss: 0.01995074677352722
FOLD: 2, EPOCH: 12, train_loss: 0.0195021724479424
FOLD: 2, EPOCH: 12, valid_loss: 0.01983467460824893
FOLD: 2, EPOCH: 13, train_loss: 0.0194129443450554
FOLD: 2, EPOCH: 13, valid_loss: 0.019799100378384955
FOLD: 2, EPOCH: 14, train_loss: 0.019251911120640265
FOLD: 2, EPOCH: 14, valid_loss: 0.01973421570773308
FOLD: 2, EPOCH: 15, train_loss: 0.019152759675037215
FOLD: 2, EPOCH: 15, valid_loss: 0.01968668229304827
FOLD: 2, EPOCH: 16, train_loss: 0.019057743100298417
FOLD: 2, 

FOLD: 3, EPOCH: 33, train_loss: 0.017277713361624127
FOLD: 3, EPOCH: 33, valid_loss: 0.018965000859819926
FOLD: 3, EPOCH: 34, train_loss: 0.017235017054387042
FOLD: 3, EPOCH: 34, valid_loss: 0.018948414004766025
FOLD: 3, EPOCH: 35, train_loss: 0.017166056569564988
FOLD: 3, EPOCH: 35, valid_loss: 0.01893024891614914
Epoch    36: reducing learning rate of group 0 to 3.9889e-03.
FOLD: 3, EPOCH: 36, train_loss: 0.017196549794863205
FOLD: 3, EPOCH: 36, valid_loss: 0.018936362117528915
FOLD: 3, EPOCH: 37, train_loss: 0.01715491140714368
FOLD: 3, EPOCH: 37, valid_loss: 0.01893664667239556
FOLD: 3, EPOCH: 38, train_loss: 0.017113044493001054
FOLD: 3, EPOCH: 38, valid_loss: 0.018937543607675113
Epoch    39: reducing learning rate of group 0 to 5.9698e-04.
FOLD: 3, EPOCH: 39, train_loss: 0.017104537540895713
FOLD: 3, EPOCH: 39, valid_loss: 0.01893820656606784
FOLD: 3, EPOCH: 40, train_loss: 0.017164890188723803
FOLD: 3, EPOCH: 40, valid_loss: 0.018924595931401618
FOLD: 3, EPOCH: 41, train_loss: 

FOLD: 5, EPOCH: 6, train_loss: 0.020437168611868006
FOLD: 5, EPOCH: 6, valid_loss: 0.02021872395506272
FOLD: 5, EPOCH: 7, train_loss: 0.020234037719264224
FOLD: 5, EPOCH: 7, valid_loss: 0.019972931307095747
FOLD: 5, EPOCH: 8, train_loss: 0.020103295560221415
FOLD: 5, EPOCH: 8, valid_loss: 0.019957246258854866
FOLD: 5, EPOCH: 9, train_loss: 0.019906902937470255
FOLD: 5, EPOCH: 9, valid_loss: 0.01989455736027314
FOLD: 5, EPOCH: 10, train_loss: 0.019703131320106017
FOLD: 5, EPOCH: 10, valid_loss: 0.01972333900630474
FOLD: 5, EPOCH: 11, train_loss: 0.01961669167252006
FOLD: 5, EPOCH: 11, valid_loss: 0.01966142439498351
FOLD: 5, EPOCH: 12, train_loss: 0.01949122454971075
FOLD: 5, EPOCH: 12, valid_loss: 0.019615286388076268
FOLD: 5, EPOCH: 13, train_loss: 0.01932739046075054
FOLD: 5, EPOCH: 13, valid_loss: 0.01944657902304943
FOLD: 5, EPOCH: 14, train_loss: 0.019199122196516476
FOLD: 5, EPOCH: 14, valid_loss: 0.019365249774776973
FOLD: 5, EPOCH: 15, train_loss: 0.01910655917851506
FOLD: 5, E

FOLD: 6, EPOCH: 32, train_loss: 0.017212905983969167
FOLD: 6, EPOCH: 32, valid_loss: 0.018813236974752866
FOLD: 6, EPOCH: 33, train_loss: 0.017155244216524267
FOLD: 6, EPOCH: 33, valid_loss: 0.018822044993822392
FOLD: 6, EPOCH: 34, train_loss: 0.01711703067947481
FOLD: 6, EPOCH: 34, valid_loss: 0.01881418663721818
FOLD: 6, EPOCH: 35, train_loss: 0.01707313199703758
FOLD: 6, EPOCH: 35, valid_loss: 0.01881986068418393
Epoch    36: reducing learning rate of group 0 to 3.9889e-03.
FOLD: 6, EPOCH: 36, train_loss: 0.017036265039162057
FOLD: 6, EPOCH: 36, valid_loss: 0.01879924736343897
FOLD: 6, EPOCH: 37, train_loss: 0.017006144279966485
FOLD: 6, EPOCH: 37, valid_loss: 0.018805392946188267
FOLD: 6, EPOCH: 38, train_loss: 0.017001652888752317
FOLD: 6, EPOCH: 38, valid_loss: 0.018819789330546673
FOLD: 6, EPOCH: 39, train_loss: 0.017007831660275523
FOLD: 6, EPOCH: 39, valid_loss: 0.018813977304559488
Epoch    40: reducing learning rate of group 0 to 5.9698e-04.
FOLD: 6, EPOCH: 40, train_loss: 0

FOLD: 1, EPOCH: 7, train_loss: 0.020299399548487085
FOLD: 1, EPOCH: 7, valid_loss: 0.020404616943918742
FOLD: 1, EPOCH: 8, train_loss: 0.020119524435014337
FOLD: 1, EPOCH: 8, valid_loss: 0.020245074222867306
FOLD: 1, EPOCH: 9, train_loss: 0.020010655745863914
FOLD: 1, EPOCH: 9, valid_loss: 0.02001202550645058
FOLD: 1, EPOCH: 10, train_loss: 0.01981890735191268
FOLD: 1, EPOCH: 10, valid_loss: 0.019879810655346282
FOLD: 1, EPOCH: 11, train_loss: 0.019640007106637634
FOLD: 1, EPOCH: 11, valid_loss: 0.019807045992750388
FOLD: 1, EPOCH: 12, train_loss: 0.019436009530280088
FOLD: 1, EPOCH: 12, valid_loss: 0.019735825033142015
FOLD: 1, EPOCH: 13, train_loss: 0.019363631267805357
FOLD: 1, EPOCH: 13, valid_loss: 0.019983963611034248
FOLD: 1, EPOCH: 14, train_loss: 0.01918348204344511
FOLD: 1, EPOCH: 14, valid_loss: 0.019585331042225543
FOLD: 1, EPOCH: 15, train_loss: 0.019029874430113548
FOLD: 1, EPOCH: 15, valid_loss: 0.019498771343093652
FOLD: 1, EPOCH: 16, train_loss: 0.018917202999865688
FO

FOLD: 2, EPOCH: 34, train_loss: 0.016631914516659203
FOLD: 2, EPOCH: 34, valid_loss: 0.01932168551362478
Epoch    35: reducing learning rate of group 0 to 2.6653e-02.
FOLD: 2, EPOCH: 35, train_loss: 0.016293211047210404
FOLD: 2, EPOCH: 35, valid_loss: 0.019218263144676503
FOLD: 2, EPOCH: 36, train_loss: 0.01614680450812385
FOLD: 2, EPOCH: 36, valid_loss: 0.019188737926574852
FOLD: 2, EPOCH: 37, train_loss: 0.01610965035050302
FOLD: 2, EPOCH: 37, valid_loss: 0.01916800969495223
FOLD: 2, EPOCH: 38, train_loss: 0.016009325476212276
FOLD: 2, EPOCH: 38, valid_loss: 0.01919376148054233
FOLD: 2, EPOCH: 39, train_loss: 0.01597806638912172
FOLD: 2, EPOCH: 39, valid_loss: 0.019208112731575966
FOLD: 2, EPOCH: 40, train_loss: 0.015931327487467915
FOLD: 2, EPOCH: 40, valid_loss: 0.01916143326805188
FOLD: 2, EPOCH: 41, train_loss: 0.01581421897218034
FOLD: 2, EPOCH: 41, valid_loss: 0.01916903228713916
FOLD: 2, EPOCH: 42, train_loss: 0.0157697565375349
FOLD: 2, EPOCH: 42, valid_loss: 0.01919498839057

FOLD: 4, EPOCH: 9, train_loss: 0.019911694239724328
FOLD: 4, EPOCH: 9, valid_loss: 0.020488454602085628
FOLD: 4, EPOCH: 10, train_loss: 0.01977526645704701
FOLD: 4, EPOCH: 10, valid_loss: 0.020118279812427666
FOLD: 4, EPOCH: 11, train_loss: 0.01958576501724688
FOLD: 4, EPOCH: 11, valid_loss: 0.02009783685207367
FOLD: 4, EPOCH: 12, train_loss: 0.019454004444383288
FOLD: 4, EPOCH: 12, valid_loss: 0.020021661256368343
FOLD: 4, EPOCH: 13, train_loss: 0.019264971585692587
FOLD: 4, EPOCH: 13, valid_loss: 0.01993207074701786
FOLD: 4, EPOCH: 14, train_loss: 0.019167912217813568
FOLD: 4, EPOCH: 14, valid_loss: 0.01978059571522933
FOLD: 4, EPOCH: 15, train_loss: 0.018992558373390016
FOLD: 4, EPOCH: 15, valid_loss: 0.019878117224344842
FOLD: 4, EPOCH: 16, train_loss: 0.018888878535378625
FOLD: 4, EPOCH: 16, valid_loss: 0.019625589538079042
FOLD: 4, EPOCH: 17, train_loss: 0.018778007326496614
FOLD: 4, EPOCH: 17, valid_loss: 0.019749191088172104
FOLD: 4, EPOCH: 18, train_loss: 0.0186510193035812
FO

FOLD: 5, EPOCH: 35, train_loss: 0.017095841995969013
FOLD: 5, EPOCH: 35, valid_loss: 0.018856853246688843
FOLD: 5, EPOCH: 36, train_loss: 0.01698720091450456
FOLD: 5, EPOCH: 36, valid_loss: 0.0188570932413523
Epoch    37: reducing learning rate of group 0 to 3.9889e-03.
FOLD: 5, EPOCH: 37, train_loss: 0.016947684167708095
FOLD: 5, EPOCH: 37, valid_loss: 0.01882612776870911
FOLD: 5, EPOCH: 38, train_loss: 0.016953870515666297
FOLD: 5, EPOCH: 38, valid_loss: 0.01883897973367801
FOLD: 5, EPOCH: 39, train_loss: 0.017011959760172946
FOLD: 5, EPOCH: 39, valid_loss: 0.018838583563382808
FOLD: 5, EPOCH: 40, train_loss: 0.016925444927167247
FOLD: 5, EPOCH: 40, valid_loss: 0.018828160917529695
Epoch    41: reducing learning rate of group 0 to 5.9698e-04.
FOLD: 5, EPOCH: 41, train_loss: 0.016939117510274455
FOLD: 5, EPOCH: 41, valid_loss: 0.018845122307538986
FOLD: 5, EPOCH: 42, train_loss: 0.016887817708020274
FOLD: 5, EPOCH: 42, valid_loss: 0.01884371844621805
FOLD: 5, EPOCH: 43, train_loss: 0.

FOLD: 0, EPOCH: 7, train_loss: 0.020329699448838428
FOLD: 0, EPOCH: 7, valid_loss: 0.019917563177072085
FOLD: 0, EPOCH: 8, train_loss: 0.020121264271438122
FOLD: 0, EPOCH: 8, valid_loss: 0.02015346374649268
FOLD: 0, EPOCH: 9, train_loss: 0.019971843758547627
FOLD: 0, EPOCH: 9, valid_loss: 0.019704866867799025
FOLD: 0, EPOCH: 10, train_loss: 0.019774818667084783
FOLD: 0, EPOCH: 10, valid_loss: 0.019522119886600055
FOLD: 0, EPOCH: 11, train_loss: 0.019639673592472397
FOLD: 0, EPOCH: 11, valid_loss: 0.01940762925033386
FOLD: 0, EPOCH: 12, train_loss: 0.019521380573309755
FOLD: 0, EPOCH: 12, valid_loss: 0.01955756745659388
FOLD: 0, EPOCH: 13, train_loss: 0.019302217516343336
FOLD: 0, EPOCH: 13, valid_loss: 0.019296553988869373
FOLD: 0, EPOCH: 14, train_loss: 0.019193799612489907
FOLD: 0, EPOCH: 14, valid_loss: 0.01924011354836134
FOLD: 0, EPOCH: 15, train_loss: 0.01908868571390977
FOLD: 0, EPOCH: 15, valid_loss: 0.019265892557226695
FOLD: 0, EPOCH: 16, train_loss: 0.018961087951587664
FOLD

FOLD: 1, EPOCH: 33, train_loss: 0.01740641282821024
FOLD: 1, EPOCH: 33, valid_loss: 0.01897606196311804
FOLD: 1, EPOCH: 34, train_loss: 0.017349742378133373
FOLD: 1, EPOCH: 34, valid_loss: 0.018970798127926312
FOLD: 1, EPOCH: 35, train_loss: 0.017305268704689836
FOLD: 1, EPOCH: 35, valid_loss: 0.0189628628297494
FOLD: 1, EPOCH: 36, train_loss: 0.017311374329634616
FOLD: 1, EPOCH: 36, valid_loss: 0.018966863504969157
FOLD: 1, EPOCH: 37, train_loss: 0.017245334308795834
FOLD: 1, EPOCH: 37, valid_loss: 0.01896517680814633
FOLD: 1, EPOCH: 38, train_loss: 0.01727060109024515
FOLD: 1, EPOCH: 38, valid_loss: 0.018949805543972895
FOLD: 1, EPOCH: 39, train_loss: 0.01721925922744983
FOLD: 1, EPOCH: 39, valid_loss: 0.018957475630136635
FOLD: 1, EPOCH: 40, train_loss: 0.01716149746868256
FOLD: 1, EPOCH: 40, valid_loss: 0.01894567648951824
FOLD: 1, EPOCH: 41, train_loss: 0.01711760399661757
FOLD: 1, EPOCH: 41, valid_loss: 0.018951227888464928
FOLD: 1, EPOCH: 42, train_loss: 0.01713451710403771
FOLD

FOLD: 3, EPOCH: 9, train_loss: 0.01988876877805671
FOLD: 3, EPOCH: 9, valid_loss: 0.019787366430346783
FOLD: 3, EPOCH: 10, train_loss: 0.019736376037267415
FOLD: 3, EPOCH: 10, valid_loss: 0.019658304607638948
FOLD: 3, EPOCH: 11, train_loss: 0.01960484217852354
FOLD: 3, EPOCH: 11, valid_loss: 0.019651436891693335
FOLD: 3, EPOCH: 12, train_loss: 0.0194864676308793
FOLD: 3, EPOCH: 12, valid_loss: 0.0195966841509709
FOLD: 3, EPOCH: 13, train_loss: 0.01933270866504392
FOLD: 3, EPOCH: 13, valid_loss: 0.019544123600308713
FOLD: 3, EPOCH: 14, train_loss: 0.01915360921742143
FOLD: 3, EPOCH: 14, valid_loss: 0.019385039519805174
FOLD: 3, EPOCH: 15, train_loss: 0.01911104646687572
FOLD: 3, EPOCH: 15, valid_loss: 0.019382893322752073
FOLD: 3, EPOCH: 16, train_loss: 0.018982504293121195
FOLD: 3, EPOCH: 16, valid_loss: 0.01939112449494692
FOLD: 3, EPOCH: 17, train_loss: 0.01886448953804132
FOLD: 3, EPOCH: 17, valid_loss: 0.019318954159434024
FOLD: 3, EPOCH: 18, train_loss: 0.01873425839821229
FOLD: 3

FOLD: 4, EPOCH: 33, train_loss: 0.01735186279826873
FOLD: 4, EPOCH: 33, valid_loss: 0.019220992922782898
FOLD: 4, EPOCH: 34, train_loss: 0.017323415940375748
FOLD: 4, EPOCH: 34, valid_loss: 0.019218180042046767
FOLD: 4, EPOCH: 35, train_loss: 0.0172627676857283
FOLD: 4, EPOCH: 35, valid_loss: 0.0191975231640614
FOLD: 4, EPOCH: 36, train_loss: 0.017261961147793242
FOLD: 4, EPOCH: 36, valid_loss: 0.019216191166868575
FOLD: 4, EPOCH: 37, train_loss: 0.017195918723135382
FOLD: 4, EPOCH: 37, valid_loss: 0.01920261835822692
FOLD: 4, EPOCH: 38, train_loss: 0.0171957299667033
FOLD: 4, EPOCH: 38, valid_loss: 0.01918753222204172
FOLD: 4, EPOCH: 39, train_loss: 0.017175410849017067
FOLD: 4, EPOCH: 39, valid_loss: 0.019196188220610984
FOLD: 4, EPOCH: 40, train_loss: 0.01713409914156875
FOLD: 4, EPOCH: 40, valid_loss: 0.019202750032910935
FOLD: 4, EPOCH: 41, train_loss: 0.017080374640991557
FOLD: 4, EPOCH: 41, valid_loss: 0.019212599843740463
Epoch    42: reducing learning rate of group 0 to 3.9889

FOLD: 6, EPOCH: 9, train_loss: 0.01992617985485373
FOLD: 6, EPOCH: 9, valid_loss: 0.019749299264871158
FOLD: 6, EPOCH: 10, train_loss: 0.019790624796941474
FOLD: 6, EPOCH: 10, valid_loss: 0.019618956801983025
FOLD: 6, EPOCH: 11, train_loss: 0.019583142740098206
FOLD: 6, EPOCH: 11, valid_loss: 0.019627758803275917
FOLD: 6, EPOCH: 12, train_loss: 0.019464952241931413
FOLD: 6, EPOCH: 12, valid_loss: 0.019562008862312023
FOLD: 6, EPOCH: 13, train_loss: 0.01930519301645659
FOLD: 6, EPOCH: 13, valid_loss: 0.019392884551332548
FOLD: 6, EPOCH: 14, train_loss: 0.019157464842538576
FOLD: 6, EPOCH: 14, valid_loss: 0.01961417295611822
FOLD: 6, EPOCH: 15, train_loss: 0.01902883152502614
FOLD: 6, EPOCH: 15, valid_loss: 0.01935536342744644
FOLD: 6, EPOCH: 16, train_loss: 0.018931707996572997
FOLD: 6, EPOCH: 16, valid_loss: 0.019367372187284324
FOLD: 6, EPOCH: 17, train_loss: 0.018781577564171842
FOLD: 6, EPOCH: 17, valid_loss: 0.019220932028614558
FOLD: 6, EPOCH: 18, train_loss: 0.018662971897503815


In [24]:
valid_results = train_targets_scored.drop(columns=target_cols).merge(train[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)

y_true = train_targets_scored_forCV[target_cols].values
y_pred = valid_results[target_cols].values

score = 0
for i in range(len(target_cols)):
    score_ = log_loss(y_true[:, i], y_pred[:, i])
    score += score_ / target.shape[1]
    
print("CV log_loss: ", score)

CV log_loss:  0.014435448793879781


In [25]:
sub = sample_submission.drop(columns=target_cols).merge(test[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
sub.to_csv('submission1.csv', index=False)