In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import os
import pandas as pd 
import numpy as np
import json
import random, string
import seaborn as sns
import matplotlib.pyplot as plt
import logging
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import StandardScaler

%matplotlib inline

In [2]:
def seed_everything(seed=1903):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

In [3]:
maindir = "data/" # Directory with your files
traincsv = maindir+"/Train.csv"
testcsv = maindir+"/Test.csv"

train = pd.read_csv(traincsv)
test = pd.read_csv(testcsv)

In [4]:
cols = [col for col in train.columns if 'absorbance' in col]


for col in cols:
    vec_len = len(train[col].values)
    vec_len_test = len(test[col].values)
    
    raw_vec = pd.concat([train, test])[col].values.reshape(vec_len+vec_len_test, 1)
    
    transformer = QuantileTransformer(n_quantiles = 100, random_state = 1234, output_distribution = "normal")
    transformer.fit(raw_vec)

    train[col] = transformer.transform(train[col].values.reshape(vec_len, 1)).reshape(1, vec_len)[0]
    test[col] = transformer.transform(test[col].values.reshape(vec_len_test, 1)).reshape(1, vec_len_test)[0]

In [5]:
def double_spectral_collator(batch):
    
    x  = [el['x'] for el in batch]
    x_env = [el['x_env'] for el in batch]

    y  = [el['y'] for el in batch]
    
    x = torch.tensor(x, dtype = torch.float)
    x_env = torch.tensor(x_env, dtype = torch.float)
        
    x = filter_signal(x)
    
    y  = torch.tensor(y, dtype = torch.float)
     
    return x, x_env, y

def test_double_spectral_collator(batch):
    
    x  = [el['x'] for el in batch]
    x_env = [el['x_env'] for el in batch]
    
    x = torch.tensor(x, dtype = torch.float)
    x_env = torch.tensor(x_env, dtype = torch.float)
    
    x = filter_signal(x)    
        
    return x, x_env


def single_spectral_collator(batch):
    
    x  = [el['x'] for el in batch]

    y  = [el['y'] for el in batch]
    
    x = torch.tensor(x, dtype = torch.float)
        
    x = filter_signal(x)
    
    y  = torch.tensor(y, dtype = torch.float)
     
    return x, y

def test_single_spectral_collator(batch):
    
    x  = [el['x'] for el in batch]
    
    x = torch.tensor(x, dtype = torch.float)
    
    x = filter_signal(x)    
        
    return x


def filter_signal(signal):
            
    sig = torch.fft.fft2(signal)
        
    bs = sig.shape[0]
    sig_dim = sig.shape[1]
    
    if not use_real_only:
        
        if use_threshold:
            arr = torch.zeros((bs, threshold, threshold, 1))

            for i in range(bs):
                arr[i, 1] = sig.real[i, :].unsqueeze(1)[:threshold]
                arr[i, 2] = sig.imag[i, :].unsqueeze(1)[:threshold]

            arr = arr.view(bs, -1)

            return arr
        
        else:
            arr = torch.zeros((bs, sig_dim, sig_dim, 1))
            
            for i in range(bs):
                arr[i, 1] = sig.real[i, :].unsqueeze(1)
                arr[i, 2] = sig.imag[i, :].unsqueeze(1)

            arr = arr.view(bs, -1)

            return arr
    else:
        return sig.real

class BloodDataset(Dataset):
    
    def __init__(self, features, env_features = None, targets = None, train_mode = True):
        
        self.train_mode = train_mode
        self.features = features
        self.env_features = env_features
        if train_mode:
            self.targets = targets
        
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, item):
                
        x = self.features[item,:]
                
        if self.train_mode:

            y = self.targets[item,:]

            if not single_inp:
                x_env = self.env_features[item, :]

                return {

                    'x' : x,
                    'x_env' : x_env,
                    'y' : y
                }
            else:
                return {

                    'x' : x,
                    'y' : y
                }
        else:
            if not single_inp:
                
                x_env = self.env_features[item, :]

                return {

                    'x' : x,
                    'x_env' : x_env,
                }
            else:
                return {

                    'x' : x,
                }

In [6]:
folds = train.copy()

In [7]:
# Create new labels - Flatten 3 to 9 multilabel dataset

new_cols = ['hdl_cholesterol_human_ok','hdl_cholesterol_human_high', 'hdl_cholesterol_human_low', 
            'cholesterol_ldl_human_ok', 'cholesterol_ldl_human_high', 'cholesterol_ldl_human_low',
           'hemoglobin(hgb)_human_ok', 'hemoglobin(hgb)_human_high', 'hemoglobin(hgb)_human_low'
           ]

In [8]:
for col in new_cols:
    name, status = col.split('_')[:-1], col.split('_')[-1]
    name = '_'.join(name)
    
    if status == 'ok':
        folds.loc[:,col] = np.where(folds.loc[:, name] == 'ok' , 1, 0)
    elif status == 'high':
        folds.loc[:,col] = np.where(folds.loc[:, name] == 'high' , 1, 0)
    elif status == 'low':
        folds.loc[:,col] = np.where(folds.loc[:, name] == 'low' , 1, 0)


In [9]:
train

Unnamed: 0,Reading_ID,absorbance0,absorbance1,absorbance2,absorbance3,absorbance4,absorbance5,absorbance6,absorbance7,absorbance8,...,absorbance165,absorbance166,absorbance167,absorbance168,absorbance169,temperature,humidity,hdl_cholesterol_human,hemoglobin(hgb)_human,cholesterol_ldl_human
0,ID_3SSHI56C,-0.788436,-0.870233,-0.751360,-0.760723,-0.719232,-0.813073,-0.759374,-0.791442,-0.779096,...,-0.639104,-0.583443,-0.686752,-0.786533,-0.842865,42.51,34.01,ok,ok,ok
1,ID_599OOLZA,-0.932002,-0.938503,-0.892502,-0.879632,-0.899648,-0.924872,-0.891111,-0.867000,-0.814230,...,0.385571,0.604894,0.439474,-0.126239,-0.009300,44.52,32.09,ok,high,high
2,ID_MVJGPQ75,-1.467630,-1.257684,-1.553288,-1.432158,-1.463622,-1.387477,-1.501475,-1.488373,-1.477999,...,0.608173,0.026447,0.723730,0.126522,0.772604,45.77,24.80,ok,ok,high
3,ID_CK6RF8YV,0.095574,0.003712,0.210278,-0.080552,-0.215119,-0.208997,-0.085667,-0.055306,-0.119857,...,0.303939,0.366705,0.495061,0.260789,0.814038,45.84,36.93,low,ok,high
4,ID_82N6QE6I,-0.025650,0.245087,0.275626,0.202351,0.214480,0.304999,0.267150,0.306523,0.365314,...,-0.027919,0.250123,0.750729,0.456742,0.806029,38.92,23.88,ok,ok,high
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13135,ID_NGPC0DA3,-0.731347,-0.777306,-0.757571,-0.664033,-0.716014,-0.752914,-0.754194,-0.730887,-0.670193,...,-0.971404,-0.916751,-0.595498,-0.861080,-0.768589,34.79,36.93,high,ok,ok
13136,ID_XRBUD5U8,0.534390,0.515836,0.429998,0.312602,0.426099,0.378080,0.344882,0.368488,0.360771,...,1.266345,1.101625,1.055293,1.100618,1.139834,43.12,19.14,ok,ok,ok
13137,ID_2M9L5NV2,0.065681,0.186262,0.296918,0.089832,0.262038,0.218413,0.277632,0.363968,0.384738,...,1.371728,1.127775,1.586676,1.549625,1.111143,42.48,43.41,ok,ok,ok
13138,ID_C5V5SD2D,-1.219791,-0.972191,-1.138300,-1.244377,-1.158529,-1.181975,-1.104500,-1.157134,-1.086590,...,-1.058979,-1.025315,-0.930521,-1.202969,-0.519076,41.86,35.10,ok,ok,ok


In [10]:
folds

Unnamed: 0,Reading_ID,absorbance0,absorbance1,absorbance2,absorbance3,absorbance4,absorbance5,absorbance6,absorbance7,absorbance8,...,cholesterol_ldl_human,hdl_cholesterol_human_ok,hdl_cholesterol_human_high,hdl_cholesterol_human_low,cholesterol_ldl_human_ok,cholesterol_ldl_human_high,cholesterol_ldl_human_low,hemoglobin(hgb)_human_ok,hemoglobin(hgb)_human_high,hemoglobin(hgb)_human_low
0,ID_3SSHI56C,-0.788436,-0.870233,-0.751360,-0.760723,-0.719232,-0.813073,-0.759374,-0.791442,-0.779096,...,ok,1,0,0,1,0,0,1,0,0
1,ID_599OOLZA,-0.932002,-0.938503,-0.892502,-0.879632,-0.899648,-0.924872,-0.891111,-0.867000,-0.814230,...,high,1,0,0,0,1,0,0,1,0
2,ID_MVJGPQ75,-1.467630,-1.257684,-1.553288,-1.432158,-1.463622,-1.387477,-1.501475,-1.488373,-1.477999,...,high,1,0,0,0,1,0,1,0,0
3,ID_CK6RF8YV,0.095574,0.003712,0.210278,-0.080552,-0.215119,-0.208997,-0.085667,-0.055306,-0.119857,...,high,0,0,1,0,1,0,1,0,0
4,ID_82N6QE6I,-0.025650,0.245087,0.275626,0.202351,0.214480,0.304999,0.267150,0.306523,0.365314,...,high,1,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13135,ID_NGPC0DA3,-0.731347,-0.777306,-0.757571,-0.664033,-0.716014,-0.752914,-0.754194,-0.730887,-0.670193,...,ok,0,1,0,1,0,0,1,0,0
13136,ID_XRBUD5U8,0.534390,0.515836,0.429998,0.312602,0.426099,0.378080,0.344882,0.368488,0.360771,...,ok,1,0,0,1,0,0,1,0,0
13137,ID_2M9L5NV2,0.065681,0.186262,0.296918,0.089832,0.262038,0.218413,0.277632,0.363968,0.384738,...,ok,1,0,0,1,0,0,1,0,0
13138,ID_C5V5SD2D,-1.219791,-0.972191,-1.138300,-1.244377,-1.158529,-1.181975,-1.104500,-1.157134,-1.086590,...,ok,1,0,0,1,0,0,1,0,0


In [11]:
targets = ['hdl_cholesterol_human', 'cholesterol_ldl_human', 'hemoglobin(hgb)_human']

# drop  old columns
folds.drop(columns = targets, axis = 1)

Unnamed: 0,Reading_ID,absorbance0,absorbance1,absorbance2,absorbance3,absorbance4,absorbance5,absorbance6,absorbance7,absorbance8,...,humidity,hdl_cholesterol_human_ok,hdl_cholesterol_human_high,hdl_cholesterol_human_low,cholesterol_ldl_human_ok,cholesterol_ldl_human_high,cholesterol_ldl_human_low,hemoglobin(hgb)_human_ok,hemoglobin(hgb)_human_high,hemoglobin(hgb)_human_low
0,ID_3SSHI56C,-0.788436,-0.870233,-0.751360,-0.760723,-0.719232,-0.813073,-0.759374,-0.791442,-0.779096,...,34.01,1,0,0,1,0,0,1,0,0
1,ID_599OOLZA,-0.932002,-0.938503,-0.892502,-0.879632,-0.899648,-0.924872,-0.891111,-0.867000,-0.814230,...,32.09,1,0,0,0,1,0,0,1,0
2,ID_MVJGPQ75,-1.467630,-1.257684,-1.553288,-1.432158,-1.463622,-1.387477,-1.501475,-1.488373,-1.477999,...,24.80,1,0,0,0,1,0,1,0,0
3,ID_CK6RF8YV,0.095574,0.003712,0.210278,-0.080552,-0.215119,-0.208997,-0.085667,-0.055306,-0.119857,...,36.93,0,0,1,0,1,0,1,0,0
4,ID_82N6QE6I,-0.025650,0.245087,0.275626,0.202351,0.214480,0.304999,0.267150,0.306523,0.365314,...,23.88,1,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13135,ID_NGPC0DA3,-0.731347,-0.777306,-0.757571,-0.664033,-0.716014,-0.752914,-0.754194,-0.730887,-0.670193,...,36.93,0,1,0,1,0,0,1,0,0
13136,ID_XRBUD5U8,0.534390,0.515836,0.429998,0.312602,0.426099,0.378080,0.344882,0.368488,0.360771,...,19.14,1,0,0,1,0,0,1,0,0
13137,ID_2M9L5NV2,0.065681,0.186262,0.296918,0.089832,0.262038,0.218413,0.277632,0.363968,0.384738,...,43.41,1,0,0,1,0,0,1,0,0
13138,ID_C5V5SD2D,-1.219791,-0.972191,-1.138300,-1.244377,-1.158529,-1.181975,-1.104500,-1.157134,-1.086590,...,35.10,1,0,0,1,0,0,1,0,0


In [12]:
# Hyperparameters

global use_real_only
global use_threshold
global num_features
global threshold
global use_smoothing_loss
global single_inp


single_inp = True
use_smoothing_loss = True
use_threshold = True
use_real_only = False
threshold = 5

if use_real_only:
    num_features = 170
else:
    if use_threshold:
        num_features = threshold**2
    else:
        num_features = 170**2
        
num_env_features = 2
hidden_size_env = 64
num_targets = 9
hidden_size = 512

DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 1000
LEARNING_RATE = 5e-3
WEIGHT_DECAY = 1e-4
NFOLDS = 10 
EARLY_STOPPING_STEPS = 50
EARLY_STOP = True
BATCH_SIZE = 64

feature_cols = [col for col in folds.columns if "absorbance" in col]
feature_cols_env = ['temperature' , 'humidity']

model_output_folder = maindir

In [13]:
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold


mskf = MultilabelStratifiedKFold(n_splits = NFOLDS)

for fold, (tr_idx, vl_idx) in enumerate(mskf.split(X = folds, y= folds[new_cols])):
    
    folds.loc[vl_idx, 'kfold'] = int(fold)
    
folds['kfold'] = folds.kfold.astype(int)
folds

Unnamed: 0,Reading_ID,absorbance0,absorbance1,absorbance2,absorbance3,absorbance4,absorbance5,absorbance6,absorbance7,absorbance8,...,hdl_cholesterol_human_ok,hdl_cholesterol_human_high,hdl_cholesterol_human_low,cholesterol_ldl_human_ok,cholesterol_ldl_human_high,cholesterol_ldl_human_low,hemoglobin(hgb)_human_ok,hemoglobin(hgb)_human_high,hemoglobin(hgb)_human_low,kfold
0,ID_3SSHI56C,-0.788436,-0.870233,-0.751360,-0.760723,-0.719232,-0.813073,-0.759374,-0.791442,-0.779096,...,1,0,0,1,0,0,1,0,0,8
1,ID_599OOLZA,-0.932002,-0.938503,-0.892502,-0.879632,-0.899648,-0.924872,-0.891111,-0.867000,-0.814230,...,1,0,0,0,1,0,0,1,0,8
2,ID_MVJGPQ75,-1.467630,-1.257684,-1.553288,-1.432158,-1.463622,-1.387477,-1.501475,-1.488373,-1.477999,...,1,0,0,0,1,0,1,0,0,4
3,ID_CK6RF8YV,0.095574,0.003712,0.210278,-0.080552,-0.215119,-0.208997,-0.085667,-0.055306,-0.119857,...,0,0,1,0,1,0,1,0,0,3
4,ID_82N6QE6I,-0.025650,0.245087,0.275626,0.202351,0.214480,0.304999,0.267150,0.306523,0.365314,...,1,0,0,0,1,0,1,0,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13135,ID_NGPC0DA3,-0.731347,-0.777306,-0.757571,-0.664033,-0.716014,-0.752914,-0.754194,-0.730887,-0.670193,...,0,1,0,1,0,0,1,0,0,1
13136,ID_XRBUD5U8,0.534390,0.515836,0.429998,0.312602,0.426099,0.378080,0.344882,0.368488,0.360771,...,1,0,0,1,0,0,1,0,0,8
13137,ID_2M9L5NV2,0.065681,0.186262,0.296918,0.089832,0.262038,0.218413,0.277632,0.363968,0.384738,...,1,0,0,1,0,0,1,0,0,1
13138,ID_C5V5SD2D,-1.219791,-0.972191,-1.138300,-1.244377,-1.158529,-1.181975,-1.104500,-1.157134,-1.086590,...,1,0,0,1,0,0,1,0,0,5


In [14]:
# model train and validation utils

def train_fn(model, train_dataloader, criterion, optimizer, scheduler , device):
    
    logging.info("TRAIN")
    
    model.train()
    
    start_iter = 0
    final_loss = 0
    
    pbar = tqdm(iter(train_dataloader), leave = True, total = len(train_dataloader))
    
    for i, (data) in enumerate(pbar, start = start_iter):
        
        if not single_inp:

            x, x_env, y = data
            inputs , inputs_env, targets = x.to(device), x_env.to(device), y.to(device)
            output = model(inputs, inputs_env)
            
        else:
            x, y = data
            inputs, targets = x.to(device), y.to(device)
            output = model(inputs)

            
        optimizer.zero_grad()
            
        
        loss = criterion(output, targets)
        
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        final_loss += loss.item()
        
    final_loss /= len(train_dataloader)    
        
    return final_loss

def val_fn(model, valid_dataloader, criterion, device):
    
    logging.info("VALID")
    
    model.eval()
    
    final_loss = 0
    start_iter = 0
    valid_preds = []
    
    pbar= tqdm(iter(valid_dataloader), leave = True, total = len(valid_dataloader))
        
    
    for i, (data) in enumerate(pbar, start = start_iter):
        
        if not single_inp:

            x, x_env, y = data
            inputs , inputs_env, targets = x.to(device), x_env.to(device), y.to(device)
            output = model(inputs, inputs_env)
            
        else:
            x, y = data
            inputs, targets = x.to(device), y.to(device)
            output = model(inputs)
            
                
        loss = criterion(output, targets)
        
        final_loss += loss.item()
        
        valid_preds.append(output.sigmoid().detach().cpu().numpy())
        
    final_loss /= len(valid_dataloader)
    valid_preds = np.concatenate(valid_preds)
    
    return final_loss, valid_preds


def inference_fn(model, test_dataloader, device):
    
    model.eval()
    
    preds = []

    pbar= tqdm(iter(test_dataloader), leave = True, total = len(test_dataloader))
        
    start_iter = 0
    
    for i, (data) in enumerate(pbar, start = start_iter):
        
        
        if not single_inp:

            x, x_env = data
            inputs , inputs_env= x.to(device), x_env.to(device)
            
            with torch.no_grad():
                outputs = model(inputs, inputs_env)            
        else:
            x  = data
            inputs = x.to(device)
            
            with torch.no_grad():
                outputs = model(inputs)   
                
        preds.append(outputs.sigmoid().detach().cpu().numpy())
    
    preds = np.concatenate(preds)
    
    return preds

In [15]:
import torch
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F


class SmoothBCEwLogits(_WeightedLoss):

    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets: torch.Tensor, n_labels: int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
                                           self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets, self.weight)

        if self.reduction == 'sum':
            loss = loss.sum()
        elif self.reduction == 'mean':
            loss = loss.mean()

        return loss
    

In [16]:
class CReLU(nn.Module):
    
    """CReLU Activation
     This is a modification of the classical CReLU activation proposed in this paper (https://arxiv.org/pdf/1603.05201.pdf)
    returns : CONCAT(relu(x), relu(-x))
    """
    def __init__(self):        
        super(CReLU, self).__init__()
    def forward(self,x):
        return torch.cat((F.relu(x), -F.relu(-x)), dim =1)


class DModel(nn.Module):

    def __init__(self, num_features, num_env_features, num_targets, hidden_size, hidden_size_env):
        super(DModel, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.batch_norm_env = nn.BatchNorm1d(num_env_features)
        self.dense_env      = nn.utils.weight_norm(nn.Linear(num_env_features, hidden_size_env))
        self.dense1 = nn.utils.weight_norm(
            nn.Linear(num_features, hidden_size))

        self.batch_norm2 = nn.BatchNorm1d(hidden_size*2)
        self.dropout2 = nn.Dropout(0.25)
        self.dense2 = nn.Linear(hidden_size*2, hidden_size)

        self.batch_norm3 = nn.BatchNorm1d((hidden_size)*2 + hidden_size_env)
        self.dropout3 = nn.Dropout(0.25)
        self.dense3 = nn.utils.weight_norm(nn.Linear((hidden_size)*2 + hidden_size_env, num_targets))
        self.crelu = CReLU()

    def forward(self, x, x_env):
        x = self.batch_norm1(x)
        x = self.crelu(self.dense1(x))

        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = self.crelu(self.dense2(x))
        
        x_env = self.batch_norm_env(x_env)
        x_env = self.dense_env(x_env)
        
        x = torch.cat((x, x_env), dim = 1)

        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)

        return x

In [22]:
class SModel(nn.Module):

    def __init__(self, num_features, num_targets, hidden_size):
        super(SModel, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dense1 = nn.utils.weight_norm(
            nn.Linear(num_features, hidden_size))

        self.batch_norm2 = nn.BatchNorm1d(hidden_size*2)
        self.dropout2 = nn.Dropout(0.25)
        self.dense2 = nn.Linear(hidden_size*2, hidden_size)

        self.batch_norm3 = nn.BatchNorm1d((hidden_size)*2)
        self.dropout3 = nn.Dropout(0.25)
        self.dense3 = nn.utils.weight_norm(nn.Linear((hidden_size)*2, num_targets))
        self.crelu = CReLU()

    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.crelu(self.dense1(x))

        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = self.crelu(self.dense2(x))

        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)

        return x

In [18]:
test_ = test.copy()

In [23]:
def run_training(fold, seed):
    
    seed_everything(seed)
    
    train_idx = folds[folds['kfold'] != fold].index
    valid_idx = folds[folds['kfold'] == fold].index
    
    train_df = folds.iloc[train_idx].reset_index(drop =True)
    valid_df = folds.iloc[valid_idx].reset_index(drop =True)
    
    x_train, y_train = train_df[feature_cols].values, train_df[new_cols].values
    x_valid, y_valid = valid_df[feature_cols].values, valid_df[new_cols].values
    
    x_train_env, x_valid_env = train_df[feature_cols_env].values, valid_df[feature_cols_env].values
    
    x_test, x_test_env = test_[feature_cols].values, test_[feature_cols_env].values
    
    scaler = StandardScaler()
    
    scaler.fit(folds[feature_cols].values)
    
    x_train = scaler.transform(x_train)
    x_valid = scaler.transform(x_valid)
    x_test  = scaler.transform(x_test)
    
    train_dataset = BloodDataset(features =x_train, env_features=x_train_env, targets=y_train, train_mode= True)
    valid_dataset = BloodDataset(features =x_valid, env_features=x_valid_env, targets=y_valid, train_mode=True)
    testdataset   = BloodDataset(features = x_test, env_features=x_test_env, targets = None, train_mode = False)

    trainloader = DataLoader(
        train_dataset, collate_fn =  single_spectral_collator if single_inp else double_spectral_collator, batch_size=BATCH_SIZE, shuffle=True)
    validloader = DataLoader(
        valid_dataset,collate_fn = single_spectral_collator if single_inp else double_spectral_collator, batch_size=BATCH_SIZE, shuffle=False)
    testloader = DataLoader(
        testdataset, collate_fn = test_single_spectral_collator if single_inp else test_double_spectral_collator, batch_size=BATCH_SIZE, shuffle=False)
    
    if single_inp:
        
        model = SModel(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=hidden_size)
    else:
        model = DModel(
            num_features=num_features,
            num_env_features = num_env_features,
            num_targets=num_targets,
            hidden_size=hidden_size,
            hidden_size_env= hidden_size_env
        )

    model.to(DEVICE)
    
    optimizer = optim.Adam(
        model.parameters(), lr=5e-3, weight_decay=WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3,
                                              max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader))
    if not use_smoothing_loss:
        criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = SmoothBCEwLogits(smoothing=0.001)

    oof = np.zeros((len(train), len(new_cols)))
   
    early_stopping_steps = EARLY_STOPPING_STEPS
    early_step = 0
    
    min_loss = np.inf
    best_loss_epoch = -1
    
    for epoch in range(EPOCHS):
        
        logging.info(f"Epoch {epoch + 1}")
        
        #--------------------- TRAIN---------------------

        train_loss = train_fn(model, trainloader, criterion, optimizer, scheduler , DEVICE)
        
        #--------------------- VALID---------------------

        valid_loss, valid_preds = val_fn(model, validloader, criterion, DEVICE)
        
        if valid_loss < min_loss:
            min_loss = valid_loss
            best_loss_epoch = epoch
            oof[valid_idx] = valid_preds
            
            torch.save(model.state_dict(), f"{model_output_folder}/SEED{seed}_FOLD{fold}_.pth")
            
        elif(EARLY_STOP == True):
            early_step += 1
            
            if(early_step >= early_stopping_steps):
                break
            
            
        if (epoch % 10 == 0)  or (epoch == EPOCHS - 1):
            print(f"Fold {fold}--Seed {seed}--Epoch {epoch}--Train Loss {train_loss:.6f}--Valid Loss {valid_loss:.6f}--Best Loss {min_loss:.6f}")

    
    #--------------------- PREDICTION---------------------

    
    if single_inp:
        
        model = SModel(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=hidden_size)
    else:
        model = DModel(
            num_features=num_features,
            num_env_features = num_env_features,
            num_targets=num_targets,
            hidden_size=hidden_size,
            hidden_size_env= hidden_size_env
        )
        
    # Load the best model
    model.load_state_dict(torch.load(f"{model_output_folder}/SEED{seed}_FOLD{fold}_.pth"))
    model.to(DEVICE)
    
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    num_params = sum([np.prod(p.size()) for p in model_parameters])
    print(f"Model Size: {num_params:,} trainable parameters")

    predictions = np.zeros((len(test_), len(new_cols)))
    predictions = inference_fn(model, testloader, DEVICE)

    return oof, predictions

In [24]:
def run_k_fold(NFOLDS, seed):
    oof = np.zeros((len(train), len(new_cols)))
    predictions = np.zeros((len(test), len(new_cols)))

    for fold in range(NFOLDS):
        oof_, pred_ = run_training(fold, seed)

        predictions += pred_ / NFOLDS
        oof += oof_

    return oof, predictions

In [None]:
# Averaging on multiple SEEDS

SEED = [940, 1513, 1269]
oof = np.zeros((len(train), len(new_cols)))
predictions = np.zeros((len(test), len(new_cols)))

for seed in SEED:

    oof_, predictions_ = run_k_fold(NFOLDS, seed)
    oof += oof_ / len(SEED)
    predictions += predictions_ / len(SEED)

train[new_cols] = oof
test[new_cols] = predictions

100%|██████████| 185/185 [00:07<00:00, 24.48it/s]
100%|██████████| 21/21 [00:00<00:00, 83.39it/s]
  2%|▏         | 3/185 [00:00<00:06, 26.22it/s]

Fold 0--Seed 940--Epoch 0--Train Loss 0.728915--Valid Loss 0.697664--Best Loss 0.697664


100%|██████████| 185/185 [00:07<00:00, 23.61it/s]
100%|██████████| 21/21 [00:00<00:00, 69.70it/s]
100%|██████████| 185/185 [00:08<00:00, 22.32it/s]
100%|██████████| 21/21 [00:00<00:00, 52.27it/s]
100%|██████████| 185/185 [00:07<00:00, 24.20it/s]
100%|██████████| 21/21 [00:00<00:00, 84.22it/s]
100%|██████████| 185/185 [00:07<00:00, 24.20it/s]
100%|██████████| 21/21 [00:00<00:00, 74.65it/s]
100%|██████████| 185/185 [00:08<00:00, 22.53it/s]
100%|██████████| 21/21 [00:00<00:00, 74.01it/s]
100%|██████████| 185/185 [00:08<00:00, 21.59it/s]
100%|██████████| 21/21 [00:00<00:00, 75.49it/s]
100%|██████████| 185/185 [00:08<00:00, 22.65it/s]
100%|██████████| 21/21 [00:00<00:00, 75.23it/s]
100%|██████████| 185/185 [00:07<00:00, 24.66it/s]
100%|██████████| 21/21 [00:00<00:00, 83.77it/s]
100%|██████████| 185/185 [00:06<00:00, 26.84it/s]
100%|██████████| 21/21 [00:00<00:00, 61.89it/s]
100%|██████████| 185/185 [00:08<00:00, 21.77it/s]
100%|██████████| 21/21 [00:00<00:00, 72.41it/s]
  2%|▏         | 3/1

Fold 0--Seed 940--Epoch 10--Train Loss 0.461659--Valid Loss 0.453551--Best Loss 0.453551


100%|██████████| 185/185 [00:09<00:00, 19.75it/s]
100%|██████████| 21/21 [00:00<00:00, 61.80it/s]
100%|██████████| 185/185 [00:09<00:00, 18.51it/s]
100%|██████████| 21/21 [00:00<00:00, 60.35it/s]
100%|██████████| 185/185 [00:08<00:00, 21.84it/s]
100%|██████████| 21/21 [00:00<00:00, 67.48it/s]
100%|██████████| 185/185 [00:07<00:00, 24.85it/s]
100%|██████████| 21/21 [00:00<00:00, 83.31it/s]
100%|██████████| 185/185 [00:06<00:00, 27.25it/s]
100%|██████████| 21/21 [00:00<00:00, 80.59it/s]
100%|██████████| 185/185 [00:06<00:00, 26.50it/s]
100%|██████████| 21/21 [00:00<00:00, 85.22it/s]
100%|██████████| 185/185 [00:07<00:00, 25.67it/s]
100%|██████████| 21/21 [00:00<00:00, 85.75it/s]
100%|██████████| 185/185 [00:06<00:00, 27.46it/s]
100%|██████████| 21/21 [00:00<00:00, 83.96it/s]
100%|██████████| 185/185 [00:06<00:00, 27.65it/s]
100%|██████████| 21/21 [00:00<00:00, 85.07it/s]
100%|██████████| 185/185 [00:07<00:00, 26.26it/s]
100%|██████████| 21/21 [00:00<00:00, 79.55it/s]
  2%|▏         | 3/1

Fold 0--Seed 940--Epoch 20--Train Loss 0.452242--Valid Loss 0.450653--Best Loss 0.449321


100%|██████████| 185/185 [00:07<00:00, 24.17it/s]
100%|██████████| 21/21 [00:00<00:00, 84.92it/s]
100%|██████████| 185/185 [00:06<00:00, 26.74it/s]
100%|██████████| 21/21 [00:00<00:00, 78.90it/s]
100%|██████████| 185/185 [00:06<00:00, 27.25it/s]
100%|██████████| 21/21 [00:00<00:00, 84.83it/s]
100%|██████████| 185/185 [00:06<00:00, 27.76it/s]
100%|██████████| 21/21 [00:00<00:00, 85.94it/s]
100%|██████████| 185/185 [00:06<00:00, 27.68it/s]
100%|██████████| 21/21 [00:00<00:00, 85.63it/s]
100%|██████████| 185/185 [00:06<00:00, 27.41it/s]
100%|██████████| 21/21 [00:00<00:00, 84.75it/s]
100%|██████████| 185/185 [00:06<00:00, 27.20it/s]
100%|██████████| 21/21 [00:00<00:00, 85.30it/s]
100%|██████████| 185/185 [00:06<00:00, 27.53it/s]
100%|██████████| 21/21 [00:00<00:00, 85.61it/s]
100%|██████████| 185/185 [00:06<00:00, 27.62it/s]
100%|██████████| 21/21 [00:00<00:00, 84.55it/s]
100%|██████████| 185/185 [00:06<00:00, 27.54it/s]
100%|██████████| 21/21 [00:00<00:00, 85.02it/s]
  2%|▏         | 3/1

Fold 0--Seed 940--Epoch 30--Train Loss 0.449303--Valid Loss 0.448325--Best Loss 0.448325


100%|██████████| 185/185 [00:06<00:00, 27.68it/s]
100%|██████████| 21/21 [00:00<00:00, 84.95it/s]
100%|██████████| 185/185 [00:07<00:00, 25.74it/s]
100%|██████████| 21/21 [00:00<00:00, 75.48it/s]
100%|██████████| 185/185 [00:11<00:00, 16.58it/s]
100%|██████████| 21/21 [00:00<00:00, 28.90it/s]
100%|██████████| 185/185 [00:14<00:00, 12.47it/s]
100%|██████████| 21/21 [00:00<00:00, 39.43it/s]
100%|██████████| 185/185 [00:14<00:00, 12.91it/s]
100%|██████████| 21/21 [00:00<00:00, 39.56it/s]
100%|██████████| 185/185 [00:16<00:00, 11.50it/s]
100%|██████████| 21/21 [00:00<00:00, 72.28it/s]
100%|██████████| 185/185 [00:10<00:00, 17.28it/s]
100%|██████████| 21/21 [00:00<00:00, 52.20it/s]
100%|██████████| 185/185 [00:10<00:00, 17.20it/s]
100%|██████████| 21/21 [00:00<00:00, 51.58it/s]
100%|██████████| 185/185 [00:10<00:00, 17.73it/s]
100%|██████████| 21/21 [00:00<00:00, 43.29it/s]
100%|██████████| 185/185 [00:10<00:00, 18.26it/s]
100%|██████████| 21/21 [00:00<00:00, 78.24it/s]
  2%|▏         | 3/1

Fold 0--Seed 940--Epoch 40--Train Loss 0.449231--Valid Loss 0.448405--Best Loss 0.448206


100%|██████████| 185/185 [00:08<00:00, 22.93it/s]
100%|██████████| 21/21 [00:00<00:00, 72.99it/s]
100%|██████████| 185/185 [00:07<00:00, 26.05it/s]
100%|██████████| 21/21 [00:00<00:00, 84.55it/s]
100%|██████████| 185/185 [00:07<00:00, 26.36it/s]
100%|██████████| 21/21 [00:00<00:00, 79.60it/s]
100%|██████████| 185/185 [00:07<00:00, 26.37it/s]
100%|██████████| 21/21 [00:00<00:00, 82.34it/s]
100%|██████████| 185/185 [00:07<00:00, 25.54it/s]
100%|██████████| 21/21 [00:00<00:00, 82.07it/s]
100%|██████████| 185/185 [00:07<00:00, 25.02it/s]
100%|██████████| 21/21 [00:00<00:00, 73.59it/s]
100%|██████████| 185/185 [00:07<00:00, 24.92it/s]
100%|██████████| 21/21 [00:00<00:00, 80.19it/s]
100%|██████████| 185/185 [00:08<00:00, 20.77it/s]
100%|██████████| 21/21 [00:00<00:00, 44.55it/s]
100%|██████████| 185/185 [00:08<00:00, 20.66it/s]
100%|██████████| 21/21 [00:00<00:00, 51.60it/s]
100%|██████████| 185/185 [00:08<00:00, 21.97it/s]
100%|██████████| 21/21 [00:00<00:00, 77.64it/s]
  2%|▏         | 3/1

Fold 0--Seed 940--Epoch 50--Train Loss 0.449298--Valid Loss 0.449777--Best Loss 0.448206


100%|██████████| 185/185 [00:08<00:00, 21.60it/s]
100%|██████████| 21/21 [00:00<00:00, 52.19it/s]
100%|██████████| 185/185 [00:08<00:00, 21.69it/s]
100%|██████████| 21/21 [00:00<00:00, 76.29it/s]
100%|██████████| 185/185 [00:08<00:00, 21.80it/s]
100%|██████████| 21/21 [00:00<00:00, 81.61it/s]
100%|██████████| 185/185 [00:07<00:00, 24.29it/s]
100%|██████████| 21/21 [00:00<00:00, 79.97it/s]
100%|██████████| 185/185 [00:07<00:00, 24.31it/s]
100%|██████████| 21/21 [00:00<00:00, 79.19it/s]
100%|██████████| 185/185 [00:07<00:00, 23.69it/s]
100%|██████████| 21/21 [00:00<00:00, 70.95it/s]
100%|██████████| 185/185 [00:10<00:00, 17.80it/s]
100%|██████████| 21/21 [00:00<00:00, 48.05it/s]
100%|██████████| 185/185 [00:08<00:00, 21.61it/s]
100%|██████████| 21/21 [00:00<00:00, 78.20it/s]
100%|██████████| 185/185 [00:08<00:00, 21.96it/s]
100%|██████████| 21/21 [00:00<00:00, 81.66it/s]
100%|██████████| 185/185 [00:08<00:00, 22.90it/s]
100%|██████████| 21/21 [00:00<00:00, 79.04it/s]
  2%|▏         | 3/1

Fold 0--Seed 940--Epoch 60--Train Loss nan--Valid Loss nan--Best Loss 0.448206


100%|██████████| 185/185 [00:08<00:00, 21.55it/s]
100%|██████████| 21/21 [00:00<00:00, 76.97it/s]
100%|██████████| 185/185 [00:08<00:00, 21.85it/s]
100%|██████████| 21/21 [00:00<00:00, 81.17it/s]
100%|██████████| 185/185 [00:07<00:00, 24.48it/s]
100%|██████████| 21/21 [00:00<00:00, 81.82it/s]
100%|██████████| 185/185 [00:07<00:00, 25.32it/s]
100%|██████████| 21/21 [00:00<00:00, 83.84it/s]
100%|██████████| 185/185 [00:07<00:00, 23.44it/s]
100%|██████████| 21/21 [00:00<00:00, 63.39it/s]
100%|██████████| 185/185 [00:08<00:00, 21.20it/s]
100%|██████████| 21/21 [00:00<00:00, 69.11it/s]
100%|██████████| 185/185 [00:09<00:00, 20.09it/s]
100%|██████████| 21/21 [00:00<00:00, 58.86it/s]
100%|██████████| 185/185 [00:09<00:00, 19.50it/s]
100%|██████████| 21/21 [00:00<00:00, 55.47it/s]
 14%|█▍        | 8/58 [00:00<00:00, 71.77it/s]

Model Size: 552,004 trainable parameters


100%|██████████| 58/58 [00:01<00:00, 56.87it/s]
100%|██████████| 185/185 [00:09<00:00, 20.51it/s]
100%|██████████| 21/21 [00:00<00:00, 63.75it/s]
  1%|          | 2/185 [00:00<00:09, 19.54it/s]

Fold 1--Seed 940--Epoch 0--Train Loss 0.728827--Valid Loss 0.695088--Best Loss 0.695088


100%|██████████| 185/185 [00:09<00:00, 19.50it/s]
100%|██████████| 21/21 [00:00<00:00, 58.67it/s]
100%|██████████| 185/185 [00:10<00:00, 18.10it/s]
100%|██████████| 21/21 [00:00<00:00, 69.90it/s]
100%|██████████| 185/185 [00:08<00:00, 22.42it/s]
100%|██████████| 21/21 [00:00<00:00, 62.75it/s]
100%|██████████| 185/185 [00:09<00:00, 19.30it/s]
100%|██████████| 21/21 [00:00<00:00, 56.40it/s]
100%|██████████| 185/185 [00:10<00:00, 18.39it/s]
100%|██████████| 21/21 [00:00<00:00, 58.96it/s]
100%|██████████| 185/185 [00:08<00:00, 20.80it/s]
100%|██████████| 21/21 [00:00<00:00, 68.54it/s]
100%|██████████| 185/185 [00:09<00:00, 20.37it/s]
100%|██████████| 21/21 [00:00<00:00, 56.69it/s]
100%|██████████| 185/185 [00:11<00:00, 16.34it/s]
100%|██████████| 21/21 [00:00<00:00, 57.48it/s]
100%|██████████| 185/185 [00:10<00:00, 18.03it/s]
100%|██████████| 21/21 [00:00<00:00, 61.04it/s]
100%|██████████| 185/185 [00:09<00:00, 19.24it/s]
100%|██████████| 21/21 [00:00<00:00, 60.36it/s]
  1%|          | 2/1

Fold 1--Seed 940--Epoch 10--Train Loss 0.463079--Valid Loss 0.454024--Best Loss 0.454024


100%|██████████| 185/185 [00:10<00:00, 17.29it/s]
100%|██████████| 21/21 [00:00<00:00, 80.95it/s]
100%|██████████| 185/185 [00:07<00:00, 24.93it/s]
100%|██████████| 21/21 [00:00<00:00, 77.51it/s]
100%|██████████| 185/185 [00:07<00:00, 25.36it/s]
100%|██████████| 21/21 [00:00<00:00, 76.74it/s]
100%|██████████| 185/185 [00:07<00:00, 25.56it/s]
100%|██████████| 21/21 [00:00<00:00, 73.82it/s]
100%|██████████| 185/185 [00:07<00:00, 25.50it/s]
100%|██████████| 21/21 [00:00<00:00, 77.98it/s]
100%|██████████| 185/185 [00:09<00:00, 20.29it/s]
100%|██████████| 21/21 [00:00<00:00, 51.73it/s]
100%|██████████| 185/185 [00:09<00:00, 19.03it/s]
100%|██████████| 21/21 [00:00<00:00, 67.78it/s]
100%|██████████| 185/185 [00:09<00:00, 19.45it/s]
100%|██████████| 21/21 [00:00<00:00, 60.87it/s]
100%|██████████| 185/185 [00:09<00:00, 19.22it/s]
100%|██████████| 21/21 [00:00<00:00, 55.90it/s]
100%|██████████| 185/185 [00:08<00:00, 20.60it/s]
100%|██████████| 21/21 [00:00<00:00, 62.40it/s]
  1%|          | 2/1

Fold 1--Seed 940--Epoch 20--Train Loss 0.451949--Valid Loss 0.451519--Best Loss 0.450061


100%|██████████| 185/185 [00:09<00:00, 20.19it/s]
100%|██████████| 21/21 [00:00<00:00, 64.32it/s]
100%|██████████| 185/185 [00:09<00:00, 19.16it/s]
100%|██████████| 21/21 [00:00<00:00, 59.25it/s]
100%|██████████| 185/185 [00:10<00:00, 18.26it/s]
100%|██████████| 21/21 [00:00<00:00, 57.55it/s]
100%|██████████| 185/185 [00:11<00:00, 16.63it/s]
100%|██████████| 21/21 [00:00<00:00, 56.34it/s]
100%|██████████| 185/185 [00:09<00:00, 18.54it/s]
100%|██████████| 21/21 [00:00<00:00, 81.30it/s]
100%|██████████| 185/185 [00:06<00:00, 26.76it/s]
100%|██████████| 21/21 [00:00<00:00, 81.90it/s]
100%|██████████| 185/185 [00:06<00:00, 26.67it/s]
100%|██████████| 21/21 [00:00<00:00, 81.38it/s]
100%|██████████| 185/185 [00:06<00:00, 26.86it/s]
100%|██████████| 21/21 [00:00<00:00, 80.91it/s]
100%|██████████| 185/185 [00:06<00:00, 26.70it/s]
100%|██████████| 21/21 [00:00<00:00, 80.74it/s]
100%|██████████| 185/185 [00:06<00:00, 26.70it/s]
100%|██████████| 21/21 [00:00<00:00, 81.47it/s]
  2%|▏         | 3/1

Fold 1--Seed 940--Epoch 30--Train Loss 0.449502--Valid Loss 0.449437--Best Loss 0.448893


100%|██████████| 185/185 [00:06<00:00, 26.74it/s]
100%|██████████| 21/21 [00:00<00:00, 80.85it/s]
100%|██████████| 185/185 [00:07<00:00, 25.85it/s]
100%|██████████| 21/21 [00:00<00:00, 77.14it/s]
100%|██████████| 185/185 [00:08<00:00, 20.93it/s]
100%|██████████| 21/21 [00:00<00:00, 56.86it/s]
100%|██████████| 185/185 [00:08<00:00, 20.67it/s]
100%|██████████| 21/21 [00:00<00:00, 76.72it/s]
100%|██████████| 185/185 [00:08<00:00, 22.58it/s]
100%|██████████| 21/21 [00:00<00:00, 65.88it/s]
100%|██████████| 185/185 [00:09<00:00, 19.58it/s]
100%|██████████| 21/21 [00:00<00:00, 78.67it/s]
100%|██████████| 185/185 [00:07<00:00, 25.37it/s]
100%|██████████| 21/21 [00:00<00:00, 74.88it/s]
100%|██████████| 185/185 [00:08<00:00, 21.76it/s]
100%|██████████| 21/21 [00:00<00:00, 79.55it/s]
100%|██████████| 185/185 [00:07<00:00, 25.79it/s]
100%|██████████| 21/21 [00:00<00:00, 71.67it/s]
100%|██████████| 185/185 [00:07<00:00, 24.96it/s]
100%|██████████| 21/21 [00:00<00:00, 79.42it/s]
  2%|▏         | 3/1

Fold 1--Seed 940--Epoch 40--Train Loss 0.449330--Valid Loss 0.448940--Best Loss 0.448832


100%|██████████| 185/185 [00:07<00:00, 25.35it/s]
100%|██████████| 21/21 [00:00<00:00, 69.85it/s]
100%|██████████| 185/185 [00:07<00:00, 25.25it/s]
100%|██████████| 21/21 [00:00<00:00, 75.74it/s]
100%|██████████| 185/185 [00:07<00:00, 25.39it/s]
100%|██████████| 21/21 [00:00<00:00, 82.65it/s]
100%|██████████| 185/185 [00:07<00:00, 25.98it/s]
100%|██████████| 21/21 [00:00<00:00, 79.04it/s]
100%|██████████| 185/185 [00:07<00:00, 24.30it/s]
100%|██████████| 21/21 [00:00<00:00, 78.82it/s]
100%|██████████| 185/185 [00:07<00:00, 26.32it/s]
100%|██████████| 21/21 [00:00<00:00, 79.37it/s]
100%|██████████| 185/185 [00:07<00:00, 24.63it/s]
100%|██████████| 21/21 [00:00<00:00, 64.16it/s]
100%|██████████| 185/185 [00:07<00:00, 25.52it/s]
100%|██████████| 21/21 [00:00<00:00, 77.03it/s]
100%|██████████| 185/185 [00:07<00:00, 24.87it/s]
100%|██████████| 21/21 [00:00<00:00, 71.76it/s]
100%|██████████| 185/185 [00:07<00:00, 24.80it/s]
100%|██████████| 21/21 [00:00<00:00, 73.45it/s]
  2%|▏         | 3/1

Fold 1--Seed 940--Epoch 50--Train Loss 0.449378--Valid Loss 0.449218--Best Loss 0.448832


100%|██████████| 185/185 [00:08<00:00, 22.00it/s]
100%|██████████| 21/21 [00:00<00:00, 57.36it/s]
100%|██████████| 185/185 [00:07<00:00, 25.49it/s]
100%|██████████| 21/21 [00:00<00:00, 80.42it/s]
100%|██████████| 185/185 [00:07<00:00, 25.05it/s]
100%|██████████| 21/21 [00:00<00:00, 68.61it/s]
100%|██████████| 185/185 [00:07<00:00, 25.26it/s]
100%|██████████| 21/21 [00:00<00:00, 76.12it/s]
100%|██████████| 185/185 [00:07<00:00, 24.19it/s]
100%|██████████| 21/21 [00:00<00:00, 77.69it/s]
100%|██████████| 185/185 [00:07<00:00, 24.77it/s]
100%|██████████| 21/21 [00:00<00:00, 79.88it/s]
100%|██████████| 185/185 [00:07<00:00, 23.82it/s]
100%|██████████| 21/21 [00:00<00:00, 77.12it/s]
100%|██████████| 185/185 [00:07<00:00, 23.94it/s]
100%|██████████| 21/21 [00:00<00:00, 77.48it/s]
100%|██████████| 185/185 [00:08<00:00, 20.87it/s]
100%|██████████| 21/21 [00:00<00:00, 78.06it/s]
100%|██████████| 185/185 [00:07<00:00, 24.33it/s]
100%|██████████| 21/21 [00:00<00:00, 80.85it/s]
  2%|▏         | 3/1

Fold 1--Seed 940--Epoch 60--Train Loss nan--Valid Loss nan--Best Loss 0.448832


100%|██████████| 185/185 [00:07<00:00, 24.11it/s]
100%|██████████| 21/21 [00:00<00:00, 81.94it/s]
100%|██████████| 185/185 [00:07<00:00, 24.03it/s]
100%|██████████| 21/21 [00:00<00:00, 79.80it/s]
100%|██████████| 185/185 [00:07<00:00, 24.08it/s]
100%|██████████| 21/21 [00:00<00:00, 80.54it/s]
100%|██████████| 185/185 [00:07<00:00, 24.93it/s]
100%|██████████| 21/21 [00:00<00:00, 80.71it/s]
100%|██████████| 185/185 [00:07<00:00, 24.22it/s]
100%|██████████| 21/21 [00:00<00:00, 79.72it/s]
 14%|█▍        | 8/58 [00:00<00:00, 78.73it/s]

Model Size: 552,004 trainable parameters


100%|██████████| 58/58 [00:00<00:00, 79.21it/s]
100%|██████████| 185/185 [00:07<00:00, 25.70it/s]
100%|██████████| 21/21 [00:00<00:00, 76.03it/s]
  2%|▏         | 3/185 [00:00<00:06, 26.09it/s]

Fold 2--Seed 940--Epoch 0--Train Loss 0.728980--Valid Loss 0.697799--Best Loss 0.697799


100%|██████████| 185/185 [00:07<00:00, 25.98it/s]
100%|██████████| 21/21 [00:00<00:00, 81.78it/s]
100%|██████████| 185/185 [00:07<00:00, 25.82it/s]
100%|██████████| 21/21 [00:00<00:00, 81.17it/s]
100%|██████████| 185/185 [00:07<00:00, 26.11it/s]
100%|██████████| 21/21 [00:00<00:00, 74.58it/s]
100%|██████████| 185/185 [00:07<00:00, 25.63it/s]
100%|██████████| 21/21 [00:00<00:00, 78.13it/s]
100%|██████████| 185/185 [00:07<00:00, 23.76it/s]
100%|██████████| 21/21 [00:00<00:00, 79.58it/s]
100%|██████████| 185/185 [00:07<00:00, 25.56it/s]
100%|██████████| 21/21 [00:00<00:00, 79.36it/s]
100%|██████████| 185/185 [00:07<00:00, 24.84it/s]
100%|██████████| 21/21 [00:00<00:00, 74.41it/s]
100%|██████████| 185/185 [00:08<00:00, 22.28it/s]
100%|██████████| 21/21 [00:00<00:00, 68.08it/s]
100%|██████████| 185/185 [00:08<00:00, 20.80it/s]
100%|██████████| 21/21 [00:00<00:00, 58.12it/s]
100%|██████████| 185/185 [00:09<00:00, 18.66it/s]
100%|██████████| 21/21 [00:00<00:00, 66.32it/s]
  2%|▏         | 3/1

Fold 2--Seed 940--Epoch 10--Train Loss 0.462252--Valid Loss 0.453955--Best Loss 0.453955


100%|██████████| 185/185 [00:08<00:00, 20.74it/s]
100%|██████████| 21/21 [00:00<00:00, 75.09it/s]
100%|██████████| 185/185 [00:07<00:00, 25.51it/s]
100%|██████████| 21/21 [00:00<00:00, 76.80it/s]
100%|██████████| 185/185 [00:07<00:00, 25.61it/s]
100%|██████████| 21/21 [00:00<00:00, 81.50it/s]
100%|██████████| 185/185 [00:07<00:00, 26.20it/s]
100%|██████████| 21/21 [00:00<00:00, 79.28it/s]
100%|██████████| 185/185 [00:07<00:00, 25.82it/s]
100%|██████████| 21/21 [00:00<00:00, 79.82it/s]
100%|██████████| 185/185 [00:07<00:00, 25.58it/s]
100%|██████████| 21/21 [00:00<00:00, 75.50it/s]
100%|██████████| 185/185 [00:07<00:00, 25.89it/s]
100%|██████████| 21/21 [00:00<00:00, 79.97it/s]
100%|██████████| 185/185 [00:07<00:00, 26.34it/s]
100%|██████████| 21/21 [00:00<00:00, 79.57it/s]
100%|██████████| 185/185 [00:07<00:00, 25.80it/s]
100%|██████████| 21/21 [00:00<00:00, 76.63it/s]
100%|██████████| 185/185 [00:07<00:00, 25.42it/s]
100%|██████████| 21/21 [00:00<00:00, 76.47it/s]
  2%|▏         | 3/1

Fold 2--Seed 940--Epoch 20--Train Loss 0.451880--Valid Loss 0.450156--Best Loss 0.450156


100%|██████████| 185/185 [00:07<00:00, 25.87it/s]
100%|██████████| 21/21 [00:00<00:00, 77.03it/s]
100%|██████████| 185/185 [00:07<00:00, 25.65it/s]
100%|██████████| 21/21 [00:00<00:00, 77.11it/s]
100%|██████████| 185/185 [00:07<00:00, 25.96it/s]
100%|██████████| 21/21 [00:00<00:00, 78.05it/s]
100%|██████████| 185/185 [00:07<00:00, 26.19it/s]
100%|██████████| 21/21 [00:00<00:00, 77.53it/s]
100%|██████████| 185/185 [00:06<00:00, 26.43it/s]
100%|██████████| 21/21 [00:00<00:00, 80.88it/s]
100%|██████████| 185/185 [00:07<00:00, 25.96it/s]
100%|██████████| 21/21 [00:00<00:00, 80.31it/s]
100%|██████████| 185/185 [00:07<00:00, 26.05it/s]
100%|██████████| 21/21 [00:00<00:00, 78.46it/s]
100%|██████████| 185/185 [00:07<00:00, 25.81it/s]
100%|██████████| 21/21 [00:00<00:00, 80.49it/s]
100%|██████████| 185/185 [00:07<00:00, 25.58it/s]
100%|██████████| 21/21 [00:00<00:00, 63.71it/s]
100%|██████████| 185/185 [00:07<00:00, 23.87it/s]
100%|██████████| 21/21 [00:00<00:00, 74.13it/s]
  2%|▏         | 3/1

Fold 2--Seed 940--Epoch 30--Train Loss 0.449308--Valid Loss 0.449372--Best Loss 0.449372


100%|██████████| 185/185 [00:07<00:00, 24.85it/s]
100%|██████████| 21/21 [00:00<00:00, 80.49it/s]
100%|██████████| 185/185 [00:07<00:00, 25.31it/s]
100%|██████████| 21/21 [00:00<00:00, 77.86it/s]
100%|██████████| 185/185 [00:07<00:00, 25.39it/s]
100%|██████████| 21/21 [00:00<00:00, 82.03it/s]
100%|██████████| 185/185 [00:07<00:00, 25.57it/s]
100%|██████████| 21/21 [00:00<00:00, 72.79it/s]
100%|██████████| 185/185 [00:07<00:00, 25.84it/s]
100%|██████████| 21/21 [00:00<00:00, 68.96it/s]
100%|██████████| 185/185 [00:07<00:00, 26.20it/s]
100%|██████████| 21/21 [00:00<00:00, 80.82it/s]
100%|██████████| 185/185 [00:07<00:00, 25.91it/s]
100%|██████████| 21/21 [00:00<00:00, 76.89it/s]
100%|██████████| 185/185 [00:07<00:00, 23.89it/s]
100%|██████████| 21/21 [00:00<00:00, 75.85it/s]
100%|██████████| 185/185 [00:07<00:00, 25.55it/s]
100%|██████████| 21/21 [00:00<00:00, 75.25it/s]
100%|██████████| 185/185 [00:07<00:00, 25.69it/s]
100%|██████████| 21/21 [00:00<00:00, 80.69it/s]
  2%|▏         | 3/1

Fold 2--Seed 940--Epoch 40--Train Loss 0.449139--Valid Loss 0.450010--Best Loss 0.449034


100%|██████████| 185/185 [00:07<00:00, 26.30it/s]
100%|██████████| 21/21 [00:00<00:00, 80.09it/s]
100%|██████████| 185/185 [00:07<00:00, 26.24it/s]
100%|██████████| 21/21 [00:00<00:00, 77.67it/s]
100%|██████████| 185/185 [00:07<00:00, 25.66it/s]
100%|██████████| 21/21 [00:00<00:00, 79.18it/s]
100%|██████████| 185/185 [00:07<00:00, 26.18it/s]
100%|██████████| 21/21 [00:00<00:00, 80.06it/s]
100%|██████████| 185/185 [00:07<00:00, 26.24it/s]
100%|██████████| 21/21 [00:00<00:00, 78.59it/s]
100%|██████████| 185/185 [00:07<00:00, 26.32it/s]
100%|██████████| 21/21 [00:00<00:00, 72.40it/s]
100%|██████████| 185/185 [00:07<00:00, 26.10it/s]
100%|██████████| 21/21 [00:00<00:00, 79.13it/s]
100%|██████████| 185/185 [00:07<00:00, 26.07it/s]
100%|██████████| 21/21 [00:00<00:00, 79.70it/s]
100%|██████████| 185/185 [00:07<00:00, 25.86it/s]
100%|██████████| 21/21 [00:00<00:00, 78.50it/s]
100%|██████████| 185/185 [00:07<00:00, 25.67it/s]
100%|██████████| 21/21 [00:00<00:00, 80.51it/s]
  2%|▏         | 3/1

Fold 2--Seed 940--Epoch 50--Train Loss 0.449318--Valid Loss 0.449713--Best Loss 0.449034


100%|██████████| 185/185 [00:07<00:00, 25.67it/s]
100%|██████████| 21/21 [00:00<00:00, 78.06it/s]
100%|██████████| 185/185 [00:07<00:00, 25.40it/s]
100%|██████████| 21/21 [00:00<00:00, 80.79it/s]
100%|██████████| 185/185 [00:07<00:00, 25.78it/s]
100%|██████████| 21/21 [00:00<00:00, 77.34it/s]
100%|██████████| 185/185 [00:07<00:00, 25.15it/s]
100%|██████████| 21/21 [00:00<00:00, 80.35it/s]
100%|██████████| 185/185 [00:08<00:00, 22.83it/s]
100%|██████████| 21/21 [00:00<00:00, 79.13it/s]
100%|██████████| 185/185 [00:07<00:00, 23.49it/s]
100%|██████████| 21/21 [00:00<00:00, 78.98it/s]
100%|██████████| 185/185 [00:07<00:00, 24.73it/s]
100%|██████████| 21/21 [00:00<00:00, 78.08it/s]
100%|██████████| 185/185 [00:07<00:00, 24.83it/s]
100%|██████████| 21/21 [00:00<00:00, 76.20it/s]
100%|██████████| 185/185 [00:07<00:00, 24.88it/s]
100%|██████████| 21/21 [00:00<00:00, 74.95it/s]
100%|██████████| 185/185 [00:07<00:00, 24.74it/s]
100%|██████████| 21/21 [00:00<00:00, 81.21it/s]
  2%|▏         | 3/1

Fold 2--Seed 940--Epoch 60--Train Loss nan--Valid Loss nan--Best Loss 0.449034


100%|██████████| 185/185 [00:07<00:00, 24.40it/s]
100%|██████████| 21/21 [00:00<00:00, 75.02it/s]
100%|██████████| 185/185 [00:07<00:00, 25.02it/s]
100%|██████████| 21/21 [00:00<00:00, 80.46it/s]
100%|██████████| 185/185 [00:07<00:00, 24.80it/s]
100%|██████████| 21/21 [00:00<00:00, 78.85it/s]
100%|██████████| 185/185 [00:07<00:00, 24.74it/s]
100%|██████████| 21/21 [00:00<00:00, 81.54it/s]
100%|██████████| 185/185 [00:07<00:00, 24.37it/s]
100%|██████████| 21/21 [00:00<00:00, 78.20it/s]
100%|██████████| 185/185 [00:07<00:00, 24.81it/s]
100%|██████████| 21/21 [00:00<00:00, 81.64it/s]
100%|██████████| 185/185 [00:07<00:00, 24.52it/s]
100%|██████████| 21/21 [00:00<00:00, 78.49it/s]
100%|██████████| 185/185 [00:07<00:00, 24.52it/s]
100%|██████████| 21/21 [00:00<00:00, 72.98it/s]
100%|██████████| 185/185 [00:07<00:00, 24.14it/s]
100%|██████████| 21/21 [00:00<00:00, 78.64it/s]
100%|██████████| 185/185 [00:07<00:00, 24.34it/s]
100%|██████████| 21/21 [00:00<00:00, 77.54it/s]
 16%|█▌        | 9/5

Model Size: 552,004 trainable parameters


100%|██████████| 58/58 [00:00<00:00, 84.52it/s]
100%|██████████| 185/185 [00:07<00:00, 23.84it/s]
100%|██████████| 21/21 [00:00<00:00, 78.67it/s]
  2%|▏         | 3/185 [00:00<00:06, 27.18it/s]

Fold 3--Seed 940--Epoch 0--Train Loss 0.727362--Valid Loss 0.698009--Best Loss 0.698009


100%|██████████| 185/185 [00:07<00:00, 23.47it/s]
100%|██████████| 21/21 [00:00<00:00, 74.12it/s]
100%|██████████| 185/185 [00:07<00:00, 24.96it/s]
100%|██████████| 21/21 [00:00<00:00, 76.71it/s]
100%|██████████| 185/185 [00:08<00:00, 22.27it/s]
100%|██████████| 21/21 [00:00<00:00, 63.72it/s]
100%|██████████| 185/185 [00:09<00:00, 18.79it/s]
100%|██████████| 21/21 [00:00<00:00, 57.04it/s]
100%|██████████| 185/185 [00:07<00:00, 25.31it/s]
100%|██████████| 21/21 [00:00<00:00, 80.91it/s]
100%|██████████| 185/185 [00:07<00:00, 26.42it/s]
100%|██████████| 21/21 [00:00<00:00, 79.25it/s]
100%|██████████| 185/185 [00:08<00:00, 22.00it/s]
100%|██████████| 21/21 [00:00<00:00, 65.02it/s]
100%|██████████| 185/185 [00:07<00:00, 24.04it/s]
100%|██████████| 21/21 [00:00<00:00, 79.82it/s]
100%|██████████| 185/185 [00:07<00:00, 25.98it/s]
100%|██████████| 21/21 [00:00<00:00, 78.82it/s]
100%|██████████| 185/185 [00:07<00:00, 24.90it/s]
100%|██████████| 21/21 [00:00<00:00, 77.30it/s]
  2%|▏         | 3/1

Fold 3--Seed 940--Epoch 10--Train Loss 0.462348--Valid Loss 0.452413--Best Loss 0.452413


100%|██████████| 185/185 [00:07<00:00, 26.05it/s]
100%|██████████| 21/21 [00:00<00:00, 66.45it/s]
100%|██████████| 185/185 [00:08<00:00, 20.87it/s]
100%|██████████| 21/21 [00:00<00:00, 50.29it/s]
100%|██████████| 185/185 [00:09<00:00, 19.02it/s]
100%|██████████| 21/21 [00:00<00:00, 55.80it/s]
100%|██████████| 185/185 [00:07<00:00, 24.21it/s]
100%|██████████| 21/21 [00:00<00:00, 79.86it/s]
100%|██████████| 185/185 [00:06<00:00, 26.65it/s]
100%|██████████| 21/21 [00:00<00:00, 80.32it/s]
100%|██████████| 185/185 [00:08<00:00, 23.04it/s]
100%|██████████| 21/21 [00:00<00:00, 62.41it/s]
100%|██████████| 185/185 [00:09<00:00, 19.42it/s]
100%|██████████| 21/21 [00:00<00:00, 57.46it/s]
100%|██████████| 185/185 [00:10<00:00, 17.39it/s]
100%|██████████| 21/21 [00:00<00:00, 70.70it/s]
100%|██████████| 185/185 [00:07<00:00, 25.36it/s]
100%|██████████| 21/21 [00:00<00:00, 79.64it/s]
100%|██████████| 185/185 [00:06<00:00, 26.53it/s]
100%|██████████| 21/21 [00:00<00:00, 80.55it/s]
  2%|▏         | 3/1

Fold 3--Seed 940--Epoch 20--Train Loss 0.452054--Valid Loss 0.448800--Best Loss 0.448800


100%|██████████| 185/185 [00:06<00:00, 26.70it/s]
100%|██████████| 21/21 [00:00<00:00, 82.60it/s]
100%|██████████| 185/185 [00:06<00:00, 26.79it/s]
100%|██████████| 21/21 [00:00<00:00, 82.70it/s]
100%|██████████| 185/185 [00:07<00:00, 24.80it/s]
100%|██████████| 21/21 [00:00<00:00, 71.64it/s]
100%|██████████| 185/185 [00:07<00:00, 23.91it/s]
100%|██████████| 21/21 [00:00<00:00, 59.11it/s]
100%|██████████| 185/185 [00:08<00:00, 21.59it/s]
100%|██████████| 21/21 [00:00<00:00, 71.99it/s]
100%|██████████| 185/185 [00:09<00:00, 19.96it/s]
100%|██████████| 21/21 [00:00<00:00, 51.84it/s]
100%|██████████| 185/185 [00:09<00:00, 19.22it/s]
100%|██████████| 21/21 [00:00<00:00, 67.24it/s]
100%|██████████| 185/185 [00:08<00:00, 21.38it/s]
100%|██████████| 21/21 [00:00<00:00, 66.45it/s]
100%|██████████| 185/185 [00:08<00:00, 21.32it/s]
100%|██████████| 21/21 [00:00<00:00, 70.06it/s]
100%|██████████| 185/185 [00:08<00:00, 22.66it/s]
100%|██████████| 21/21 [00:00<00:00, 70.23it/s]
  2%|▏         | 3/1

Fold 3--Seed 940--Epoch 30--Train Loss 0.449329--Valid Loss 0.448568--Best Loss 0.448568


100%|██████████| 185/185 [00:08<00:00, 22.05it/s]
100%|██████████| 21/21 [00:00<00:00, 69.52it/s]
100%|██████████| 185/185 [00:08<00:00, 22.32it/s]
100%|██████████| 21/21 [00:00<00:00, 69.51it/s]
100%|██████████| 185/185 [00:08<00:00, 22.12it/s]
100%|██████████| 21/21 [00:00<00:00, 69.15it/s]
100%|██████████| 185/185 [00:07<00:00, 25.24it/s]
100%|██████████| 21/21 [00:00<00:00, 75.93it/s]
100%|██████████| 185/185 [00:06<00:00, 26.78it/s]
100%|██████████| 21/21 [00:00<00:00, 79.13it/s]
100%|██████████| 185/185 [00:06<00:00, 26.61it/s]
100%|██████████| 21/21 [00:00<00:00, 79.87it/s]
100%|██████████| 185/185 [00:07<00:00, 23.94it/s]
100%|██████████| 21/21 [00:00<00:00, 79.56it/s]
100%|██████████| 185/185 [00:07<00:00, 26.20it/s]
100%|██████████| 21/21 [00:00<00:00, 75.17it/s]
100%|██████████| 185/185 [00:07<00:00, 26.19it/s]
100%|██████████| 21/21 [00:00<00:00, 75.77it/s]
100%|██████████| 185/185 [00:07<00:00, 25.73it/s]
100%|██████████| 21/21 [00:00<00:00, 76.66it/s]
  2%|▏         | 3/1

Fold 3--Seed 940--Epoch 40--Train Loss 0.449107--Valid Loss 0.448773--Best Loss 0.448439


100%|██████████| 185/185 [00:07<00:00, 25.01it/s]
100%|██████████| 21/21 [00:00<00:00, 47.89it/s]
100%|██████████| 185/185 [00:08<00:00, 20.67it/s]
100%|██████████| 21/21 [00:00<00:00, 76.90it/s]
100%|██████████| 185/185 [00:07<00:00, 23.70it/s]
100%|██████████| 21/21 [00:00<00:00, 79.83it/s]
100%|██████████| 185/185 [00:06<00:00, 26.81it/s]
100%|██████████| 21/21 [00:00<00:00, 79.16it/s]
100%|██████████| 185/185 [00:07<00:00, 26.34it/s]
100%|██████████| 21/21 [00:00<00:00, 80.27it/s]
100%|██████████| 185/185 [00:06<00:00, 26.78it/s]
100%|██████████| 21/21 [00:00<00:00, 72.01it/s]
100%|██████████| 185/185 [00:06<00:00, 26.68it/s]
100%|██████████| 21/21 [00:00<00:00, 82.24it/s]
100%|██████████| 185/185 [00:06<00:00, 26.68it/s]
100%|██████████| 21/21 [00:00<00:00, 81.09it/s]
100%|██████████| 185/185 [00:06<00:00, 26.88it/s]
100%|██████████| 21/21 [00:00<00:00, 79.92it/s]
100%|██████████| 185/185 [00:06<00:00, 26.74it/s]
100%|██████████| 21/21 [00:00<00:00, 80.88it/s]
  2%|▏         | 3/1

Fold 3--Seed 940--Epoch 50--Train Loss 0.449306--Valid Loss 0.448915--Best Loss 0.448439


100%|██████████| 185/185 [00:07<00:00, 24.42it/s]
100%|██████████| 21/21 [00:00<00:00, 74.23it/s]
100%|██████████| 185/185 [00:08<00:00, 21.98it/s]
100%|██████████| 21/21 [00:00<00:00, 81.58it/s]
100%|██████████| 185/185 [00:07<00:00, 25.27it/s]
100%|██████████| 21/21 [00:00<00:00, 80.98it/s]
100%|██████████| 185/185 [00:06<00:00, 26.82it/s]
100%|██████████| 21/21 [00:00<00:00, 78.98it/s]
  5%|▍         | 9/185 [00:00<00:06, 26.65it/s]

In [21]:
test.to_csv(maindir+"/test_submission_NNet_2_inputs_threshold_10.csv", index=False)

In [22]:
test = pd.read_csv(maindir+'/test_submission_NNet_2_inputs_threshold_10.csv')
test.head()

Unnamed: 0,Reading_ID,absorbance0,absorbance1,absorbance2,absorbance3,absorbance4,absorbance5,absorbance6,absorbance7,absorbance8,...,humidity,hdl_cholesterol_human_ok,hdl_cholesterol_human_high,hdl_cholesterol_human_low,cholesterol_ldl_human_ok,cholesterol_ldl_human_high,cholesterol_ldl_human_low,hemoglobin(hgb)_human_ok,hemoglobin(hgb)_human_high,hemoglobin(hgb)_human_low
0,ID_37BEI22R,0.449736,0.449798,0.447488,0.464694,0.466377,0.48535,0.488915,0.495073,0.504129,...,25.7,0.59078,0.223719,0.18838,0.57599,0.36964,0.050607,0.896298,0.058462,0.046193
1,ID_4W85V5DV,0.495429,0.505488,0.510239,0.51888,0.533147,0.543142,0.55167,0.558261,0.564027,...,30.16,0.5756,0.202328,0.221096,0.539524,0.419337,0.039337,0.899391,0.060092,0.04051
2,ID_L4YR3NDY,0.437904,0.439064,0.442527,0.450437,0.455363,0.465817,0.471249,0.479145,0.482595,...,25.51,0.596007,0.216324,0.189425,0.541802,0.415154,0.040871,0.902498,0.055435,0.04294
3,ID_U88E3SQ6,0.495038,0.506246,0.50873,0.518995,0.529961,0.537583,0.539696,0.5404,0.547279,...,41.32,0.527362,0.170076,0.307483,0.538031,0.424311,0.037432,0.890265,0.074587,0.035469
4,ID_NW7Z3XU7,0.531306,0.525309,0.535306,0.541387,0.551364,0.559821,0.564851,0.570824,0.577426,...,24.21,0.610282,0.203118,0.188446,0.465491,0.515336,0.026935,0.915633,0.047852,0.037117


In [62]:
test2 = pd.read_csv(maindir+'/test_submission.csv')
test2.head()

Unnamed: 0,Reading_ID,absorbance0,absorbance1,absorbance2,absorbance3,absorbance4,absorbance5,absorbance6,absorbance7,absorbance8,...,humidity,hdl_cholesterol_human_ok,hdl_cholesterol_human_high,hdl_cholesterol_human_low,cholesterol_ldl_human_ok,cholesterol_ldl_human_high,cholesterol_ldl_human_low,hemoglobin(hgb)_human_ok,hemoglobin(hgb)_human_high,hemoglobin(hgb)_human_low
0,ID_37BEI22R,0.449736,0.449798,0.447488,0.464694,0.466377,0.48535,0.488915,0.495073,0.504129,...,25.7,0.574538,0.193993,0.234114,0.558803,0.397907,0.043285,0.889145,0.067482,0.042265
1,ID_4W85V5DV,0.495429,0.505488,0.510239,0.51888,0.533147,0.543142,0.55167,0.558261,0.564027,...,30.16,0.574224,0.195325,0.232993,0.558869,0.3976,0.04452,0.890879,0.067606,0.041634
2,ID_L4YR3NDY,0.437904,0.439064,0.442527,0.450437,0.455363,0.465817,0.471249,0.479145,0.482595,...,25.51,0.574304,0.194562,0.23268,0.5589,0.397578,0.044405,0.888642,0.069261,0.04255
3,ID_U88E3SQ6,0.495038,0.506246,0.50873,0.518995,0.529961,0.537583,0.539696,0.5404,0.547279,...,41.32,0.574501,0.193202,0.233977,0.558812,0.397734,0.043114,0.892265,0.065644,0.04036
4,ID_NW7Z3XU7,0.531306,0.525309,0.535306,0.541387,0.551364,0.559821,0.564851,0.570824,0.577426,...,24.21,0.574221,0.194536,0.232825,0.559274,0.39759,0.043004,0.889573,0.068451,0.042052


In [26]:
test

Unnamed: 0,Reading_ID,absorbance0,absorbance1,absorbance2,absorbance3,absorbance4,absorbance5,absorbance6,absorbance7,absorbance8,...,absorbance162,absorbance163,absorbance164,absorbance165,absorbance166,absorbance167,absorbance168,absorbance169,temperature,humidity
0,ID_37BEI22R,0.449736,0.449798,0.447488,0.464694,0.466377,0.485350,0.488915,0.495073,0.504129,...,1.223793,1.195193,1.205222,1.164610,1.151594,1.166565,1.157079,1.200857,40.88,25.70
1,ID_4W85V5DV,0.495429,0.505488,0.510239,0.518880,0.533147,0.543142,0.551670,0.558261,0.564027,...,1.310219,1.294600,1.269705,1.244257,1.238365,1.218063,1.252711,1.255433,42.35,30.16
2,ID_L4YR3NDY,0.437904,0.439064,0.442527,0.450437,0.455363,0.465817,0.471249,0.479145,0.482595,...,1.162556,1.161711,1.160406,1.159570,1.159641,1.157491,1.177478,1.169607,42.83,25.51
3,ID_U88E3SQ6,0.495038,0.506246,0.508730,0.518995,0.529961,0.537583,0.539696,0.540400,0.547279,...,1.168321,1.137272,1.109380,1.047561,1.050649,1.020026,1.033139,1.023882,40.95,41.32
4,ID_NW7Z3XU7,0.531306,0.525309,0.535306,0.541387,0.551364,0.559821,0.564851,0.570824,0.577426,...,1.288590,1.287614,1.300164,1.285028,1.272818,1.277348,1.248892,1.290145,46.99,24.21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3655,ID_ADCBL266,0.506681,0.506536,0.510056,0.511791,0.518384,0.524765,0.530773,0.538229,0.549651,...,1.372433,1.354794,1.365435,1.362795,1.379104,1.372664,1.352756,1.307393,35.54,37.52
3656,ID_SW51B61O,0.488276,0.501509,0.498858,0.500627,0.511329,0.522876,0.530738,0.538328,0.542644,...,1.273410,1.274540,1.287052,1.274680,1.273053,1.255953,1.302455,1.286221,41.29,44.81
3657,ID_CO8IHJRA,0.494581,0.501446,0.499981,0.509865,0.512139,0.519129,0.521906,0.527789,0.538997,...,1.257945,1.244453,1.233102,1.271213,1.241361,1.253899,1.290227,1.314712,41.56,29.32
3658,ID_VN5CP3ZZ,0.431551,0.434236,0.433433,0.437899,0.451583,0.461391,0.471832,0.468035,0.471895,...,1.190920,1.173793,1.140725,1.114328,1.087129,1.135087,1.116364,1.171126,43.25,37.00


In [23]:
predictions_ = test[new_cols].values

In [24]:
preds = (predictions_ > 0.5).astype(int)
preds

array([[1, 0, 0, ..., 1, 0, 0],
       [1, 0, 0, ..., 1, 0, 0],
       [1, 0, 0, ..., 1, 0, 0],
       ...,
       [1, 0, 0, ..., 1, 0, 0],
       [1, 0, 0, ..., 1, 0, 0],
       [1, 0, 0, ..., 1, 0, 0]])

In [25]:
new_cols

['hdl_cholesterol_human_ok',
 'hdl_cholesterol_human_high',
 'hdl_cholesterol_human_low',
 'cholesterol_ldl_human_ok',
 'cholesterol_ldl_human_high',
 'cholesterol_ldl_human_low',
 'hemoglobin(hgb)_human_ok',
 'hemoglobin(hgb)_human_high',
 'hemoglobin(hgb)_human_low']

In [26]:
test[new_cols] = preds

In [27]:
test[new_cols]

Unnamed: 0,hdl_cholesterol_human_ok,hdl_cholesterol_human_high,hdl_cholesterol_human_low,cholesterol_ldl_human_ok,cholesterol_ldl_human_high,cholesterol_ldl_human_low,hemoglobin(hgb)_human_ok,hemoglobin(hgb)_human_high,hemoglobin(hgb)_human_low
0,1,0,0,1,0,0,1,0,0
1,1,0,0,1,0,0,1,0,0
2,1,0,0,1,0,0,1,0,0
3,1,0,0,1,0,0,1,0,0
4,1,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...
3655,1,0,0,1,0,0,1,0,0
3656,1,0,0,1,0,0,1,0,0
3657,1,0,0,1,0,0,1,0,0
3658,1,0,0,1,0,0,1,0,0


In [89]:
new_cols

['hdl_cholesterol_human_ok',
 'hdl_cholesterol_human_high',
 'hdl_cholesterol_human_low',
 'cholesterol_ldl_human_ok',
 'cholesterol_ldl_human_high',
 'cholesterol_ldl_human_low',
 'hemoglobin(hgb)_human_ok',
 'hemoglobin(hgb)_human_high',
 'hemoglobin(hgb)_human_low']

In [28]:
def inverse_transform(data):
    
    
    def extract(vals, cols):
        
        index= np.argmax(vals)
        
        return cols[index]
        
        
    df = data.copy()
            
    step_size = 3
    start = 0
    
    for i, cols_j in enumerate(range(start, len(new_cols), step_size)):
        
        start = cols_j
        
        cols_i = new_cols[start: (start + step_size)]
        
        print(f'Columns idexed from {start} to {start + step_size} --> {cols_i}')
        
        df.loc[:, 'temp_col_'+str(i)] = df[cols_i].apply(lambda s : extract(s.values, cols_i), axis = 1)
                
        col_name = '_'.join(cols_i[0].split('_')[:-1])
                
        df.loc[:, col_name] = df['temp_col_'+str(i)].apply(lambda k : k.split('_')[-1])
        
    return df

In [29]:
test_ = inverse_transform(test)
test_

Columns idexed from 0 to 3 --> ['hdl_cholesterol_human_ok', 'hdl_cholesterol_human_high', 'hdl_cholesterol_human_low']
Columns idexed from 3 to 6 --> ['cholesterol_ldl_human_ok', 'cholesterol_ldl_human_high', 'cholesterol_ldl_human_low']
Columns idexed from 6 to 9 --> ['hemoglobin(hgb)_human_ok', 'hemoglobin(hgb)_human_high', 'hemoglobin(hgb)_human_low']


Unnamed: 0,Reading_ID,absorbance0,absorbance1,absorbance2,absorbance3,absorbance4,absorbance5,absorbance6,absorbance7,absorbance8,...,cholesterol_ldl_human_low,hemoglobin(hgb)_human_ok,hemoglobin(hgb)_human_high,hemoglobin(hgb)_human_low,temp_col_0,hdl_cholesterol_human,temp_col_1,cholesterol_ldl_human,temp_col_2,hemoglobin(hgb)_human
0,ID_37BEI22R,0.449736,0.449798,0.447488,0.464694,0.466377,0.485350,0.488915,0.495073,0.504129,...,0,1,0,0,hdl_cholesterol_human_ok,ok,cholesterol_ldl_human_ok,ok,hemoglobin(hgb)_human_ok,ok
1,ID_4W85V5DV,0.495429,0.505488,0.510239,0.518880,0.533147,0.543142,0.551670,0.558261,0.564027,...,0,1,0,0,hdl_cholesterol_human_ok,ok,cholesterol_ldl_human_ok,ok,hemoglobin(hgb)_human_ok,ok
2,ID_L4YR3NDY,0.437904,0.439064,0.442527,0.450437,0.455363,0.465817,0.471249,0.479145,0.482595,...,0,1,0,0,hdl_cholesterol_human_ok,ok,cholesterol_ldl_human_ok,ok,hemoglobin(hgb)_human_ok,ok
3,ID_U88E3SQ6,0.495038,0.506246,0.508730,0.518995,0.529961,0.537583,0.539696,0.540400,0.547279,...,0,1,0,0,hdl_cholesterol_human_ok,ok,cholesterol_ldl_human_ok,ok,hemoglobin(hgb)_human_ok,ok
4,ID_NW7Z3XU7,0.531306,0.525309,0.535306,0.541387,0.551364,0.559821,0.564851,0.570824,0.577426,...,0,1,0,0,hdl_cholesterol_human_ok,ok,cholesterol_ldl_human_high,high,hemoglobin(hgb)_human_ok,ok
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3655,ID_ADCBL266,0.506681,0.506536,0.510056,0.511791,0.518384,0.524765,0.530773,0.538229,0.549651,...,0,1,0,0,hdl_cholesterol_human_ok,ok,cholesterol_ldl_human_ok,ok,hemoglobin(hgb)_human_ok,ok
3656,ID_SW51B61O,0.488276,0.501509,0.498858,0.500627,0.511329,0.522876,0.530738,0.538328,0.542644,...,0,1,0,0,hdl_cholesterol_human_ok,ok,cholesterol_ldl_human_ok,ok,hemoglobin(hgb)_human_ok,ok
3657,ID_CO8IHJRA,0.494581,0.501446,0.499981,0.509865,0.512139,0.519129,0.521906,0.527789,0.538997,...,0,1,0,0,hdl_cholesterol_human_ok,ok,cholesterol_ldl_human_ok,ok,hemoglobin(hgb)_human_ok,ok
3658,ID_VN5CP3ZZ,0.431551,0.434236,0.433433,0.437899,0.451583,0.461391,0.471832,0.468035,0.471895,...,0,1,0,0,hdl_cholesterol_human_ok,ok,cholesterol_ldl_human_ok,ok,hemoglobin(hgb)_human_ok,ok


In [82]:
y_oof

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

In [73]:
# oof_preds = np.argmax(oof_preds, axis = 1)
# y_true =  folds[target_cols].values

In [74]:
oof_preds.shape, y_true.shape

((13140,), (13140, 3))

In [47]:
# score = 0
# for i in range(len(target_cols)):
#     score_ = accuracy_score(y_true[:, i], oof_preds[:, i])
#     score += score_ / len(target_cols)

# print("CV Accuracy: ", score)

#### -  Convert our submission as per the sample submission 

In [30]:
def transform_c_hdl(row):
    return str(row["Reading_ID"]) + "_hdl_cholesterol_human" + "-" +  row["hdl_cholesterol_human"]

In [31]:
hdl_rows = pd.DataFrame(test_[['Reading_ID'] + targets].apply(transform_c_hdl, axis=1))

In [32]:
def transform_hemo(row):
    return str(row["Reading_ID"]) + "_hemoglobin(hgb)_human" +  "-" + row["hemoglobin(hgb)_human"]

In [33]:
hemo_rows = pd.DataFrame(test_[['Reading_ID'] + targets].apply(transform_hemo, axis=1))

In [34]:
def transform_c_ldl(row):
    return str(row["Reading_ID"]) + "_cholesterol_ldl_human" +  "-" + row["cholesterol_ldl_human"]

In [35]:
ldl_rows = pd.DataFrame(test_[['Reading_ID'] + targets].apply(transform_c_ldl, axis=1))

In [36]:
ss = pd.concat([hdl_rows, hemo_rows, ldl_rows]).reset_index(drop=True)

In [37]:
ss["target"] = ss[0].apply(lambda x: x.split("-")[1])
ss[0] = ss[0].apply(lambda x: x.split("-")[0])

In [38]:
ss = ss.rename(columns={0:"Reading_ID"})

In [39]:
ss.head()

Unnamed: 0,Reading_ID,target
0,ID_37BEI22R_hdl_cholesterol_human,ok
1,ID_4W85V5DV_hdl_cholesterol_human,ok
2,ID_L4YR3NDY_hdl_cholesterol_human,ok
3,ID_U88E3SQ6_hdl_cholesterol_human,ok
4,ID_NW7Z3XU7_hdl_cholesterol_human,ok


In [40]:
ss.to_csv(maindir+"/submission_NNet_2_inputs_threshold_10.csv", index=False)

### To Do
- Try other models,
- Cross validation