In [None]:
!nvidia-smi

In [None]:
import os
import random
import pickle
import joblib
import json
import time
from collections import deque
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd import Variable
import matplotlib.pyplot as plt
import warnings
from torch.utils.data import Dataset, DataLoader, TensorDataset, WeightedRandomSampler

from sklearn import datasets, linear_model
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.metrics import f1_score, roc_auc_score
from ast import literal_eval
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler

from pytorch_metric_learning import losses
from torcheval.metrics.functional import multiclass_f1_score
from torcheval.metrics.functional.classification import multiclass_recall
from torcheval.metrics.functional import multiclass_accuracy
from torcheval.metrics.functional import multiclass_auprc
from torcheval.metrics.functional import multiclass_auroc

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

SEED = 220604 #9861
seed_everything(seed = SEED)

In [None]:
class Contrastive_Embedding(nn.Module): #supervised contrastive learning
    def __init__(self, input_size, drop_rate, hidden_unit_sizes):
        super(Contrastive_Embedding, self).__init__()
        self.input_size = input_size
        self.hidden_unit_sizes = hidden_unit_sizes
        
        leaky_relu = nn.LeakyReLU()
        
        encoder = [nn.Linear(input_size, hidden_unit_sizes[0]),
                   nn.BatchNorm1d(hidden_unit_sizes[0]),
                   nn.Dropout(drop_rate),
                   leaky_relu]
    
        
        for i in range(1,len(hidden_unit_sizes)):
        
                encoder.append(nn.Linear(hidden_unit_sizes[i-1], hidden_unit_sizes[i]))
                encoder.append(nn.BatchNorm1d(hidden_unit_sizes[i]))
                encoder.append(nn.Dropout(drop_rate))
                encoder.append(leaky_relu)
                
        self.encoder = nn.Sequential(
            *encoder
        )
        

    def forward(self, x):
        latent = self.encoder(x)
        return latent

In [None]:
dataset =  pd.read_csv('./1hr Shock Research type27 for Cont_delta preprocessed GAP Mean.csv')

In [None]:
dataset.stay_id.unique().shape

In [None]:
dataset['Data type'].value_counts()

In [None]:
len(dataset[dataset['LR_NS']==0])/len(dataset)

In [None]:
#index: Gap Shock
gap_shock_index = dataset[(dataset['Binary Shock']==1) & (dataset['Shock']==0)].index
#index: pre-Defined Shock
pre_definded_shock_index = dataset[(dataset['Binary Shock']==1) & (dataset['Shock']>0)].index

dataset['Target Shock'] = 0
dataset['Target Shock'][gap_shock_index] = 1
dataset['Target Shock'][pre_definded_shock_index] = 2

Propofol_index = dataset[dataset['Propofol']>0].index
Midazolam_index =dataset[dataset['Midazolam']>0].index
Fentanyl_index =dataset[dataset['Fentanyl']>0].index

dataset['Propofol'][Propofol_index] = 1
dataset['Midazolam'][Midazolam_index] = 1
dataset['Fentanyl'][Fentanyl_index] = 1

norepi_index = dataset[dataset['Norepinephrine']>0].index
epi_index = dataset[dataset['Epinephrine']>0].index
phenyl_index = dataset[dataset['Phenylephrine']>0].index
vaso_index = dataset[dataset['Vasopressin']>0].index

dataset['Norepinephrine'][norepi_index] = 1
dataset['Epinephrine'][epi_index] = 1
dataset['Phenylephrine'][phenyl_index] = 1
dataset['Vasopressin'][vaso_index] = 1

vaso_on_index = dataset[dataset['Vasopressors']>0].index
dataset['Vasopressors:Binary'] = 0
dataset['Vasopressors:Binary'][vaso_on_index] = 1

lr_ns_on_index = dataset[dataset['LR_NS']>0].index
dataset['LR_NS:Binary'] = 0
dataset['LR_NS:Binary'][lr_ns_on_index] = 1

In [None]:
dataset.columns

In [None]:
print(dataset[dataset['LR_NS']>0].shape[0]/(dataset[dataset['LR_NS']>0].shape[0]+dataset[dataset['LR_NS']==0].shape[0]))

In [None]:
dataset['Elevation index type'] = dataset['Elevation index type'].fillna(27).astype('int')

dataset['Cont_label'] = None

dataset['Cont_label'][dataset[(dataset['Elevation index type']==0) & (dataset['Target Shock']==0)].index] = 0
dataset['Cont_label'][dataset[(dataset['Elevation index type']==0) & (dataset['Target Shock']==1)].index] = 1
dataset['Cont_label'][dataset[(dataset['Elevation index type']==0) & (dataset['Target Shock']==2)].index] = 2

dataset['Cont_label'][dataset[(dataset['Elevation index type']==1) & (dataset['Target Shock']==0)].index] = 3
dataset['Cont_label'][dataset[(dataset['Elevation index type']==1) & (dataset['Target Shock']==1)].index] = 4
dataset['Cont_label'][dataset[(dataset['Elevation index type']==1) & (dataset['Target Shock']==2)].index] = 5

dataset['Cont_label'][dataset[(dataset['Elevation index type']==2) & (dataset['Target Shock']==0)].index] = 6
dataset['Cont_label'][dataset[(dataset['Elevation index type']==2) & (dataset['Target Shock']==1)].index] = 7
dataset['Cont_label'][dataset[(dataset['Elevation index type']==2) & (dataset['Target Shock']==2)].index] = 8

dataset['Cont_label'][dataset[(dataset['Elevation index type']==3) & (dataset['Target Shock']==0)].index] = 9
dataset['Cont_label'][dataset[(dataset['Elevation index type']==3) & (dataset['Target Shock']==1)].index] = 10
dataset['Cont_label'][dataset[(dataset['Elevation index type']==3) & (dataset['Target Shock']==2)].index] = 11

dataset['Cont_label'][dataset[(dataset['Elevation index type']==4) & (dataset['Target Shock']==0)].index] = 12
dataset['Cont_label'][dataset[(dataset['Elevation index type']==4) & (dataset['Target Shock']==1)].index] = 13
dataset['Cont_label'][dataset[(dataset['Elevation index type']==4) & (dataset['Target Shock']==2)].index] = 14

dataset['Cont_label'][dataset[(dataset['Elevation index type']==5) & (dataset['Target Shock']==0)].index] = 15
dataset['Cont_label'][dataset[(dataset['Elevation index type']==5) & (dataset['Target Shock']==1)].index] = 16
dataset['Cont_label'][dataset[(dataset['Elevation index type']==5) & (dataset['Target Shock']==2)].index] = 17

dataset['Cont_label'][dataset[(dataset['Elevation index type']==6) & (dataset['Target Shock']==0)].index] = 18
dataset['Cont_label'][dataset[(dataset['Elevation index type']==6) & (dataset['Target Shock']==1)].index] = 19
dataset['Cont_label'][dataset[(dataset['Elevation index type']==6) & (dataset['Target Shock']==2)].index] = 20

dataset['Cont_label'][dataset[(dataset['Elevation index type']==7) & (dataset['Target Shock']==0)].index] = 21
dataset['Cont_label'][dataset[(dataset['Elevation index type']==7) & (dataset['Target Shock']==1)].index] = 22
dataset['Cont_label'][dataset[(dataset['Elevation index type']==7) & (dataset['Target Shock']==2)].index] = 23

dataset['Cont_label'][dataset[(dataset['Elevation index type']==8) & (dataset['Target Shock']==0)].index] = 24
dataset['Cont_label'][dataset[(dataset['Elevation index type']==8) & (dataset['Target Shock']==1)].index] = 25
dataset['Cont_label'][dataset[(dataset['Elevation index type']==8) & (dataset['Target Shock']==2)].index] = 26

dataset['Cont_label'][dataset[(dataset['Elevation index type']==9) & (dataset['Target Shock']==0)].index] = 27
dataset['Cont_label'][dataset[(dataset['Elevation index type']==9) & (dataset['Target Shock']==1)].index] = 28
dataset['Cont_label'][dataset[(dataset['Elevation index type']==9) & (dataset['Target Shock']==2)].index] = 29

dataset['Cont_label'][dataset[(dataset['Elevation index type']==10) & (dataset['Target Shock']==0)].index] = 30
dataset['Cont_label'][dataset[(dataset['Elevation index type']==10) & (dataset['Target Shock']==1)].index] = 31
dataset['Cont_label'][dataset[(dataset['Elevation index type']==10) & (dataset['Target Shock']==2)].index] = 32

dataset['Cont_label'][dataset[(dataset['Elevation index type']==11) & (dataset['Target Shock']==0)].index] = 33
dataset['Cont_label'][dataset[(dataset['Elevation index type']==11) & (dataset['Target Shock']==1)].index] = 34
dataset['Cont_label'][dataset[(dataset['Elevation index type']==11) & (dataset['Target Shock']==2)].index] = 35

dataset['Cont_label'][dataset[(dataset['Elevation index type']==12) & (dataset['Target Shock']==0)].index] = 36
dataset['Cont_label'][dataset[(dataset['Elevation index type']==12) & (dataset['Target Shock']==1)].index] = 37
dataset['Cont_label'][dataset[(dataset['Elevation index type']==12) & (dataset['Target Shock']==2)].index] = 38

dataset['Cont_label'][dataset[(dataset['Elevation index type']==13) & (dataset['Target Shock']==0)].index] = 39
dataset['Cont_label'][dataset[(dataset['Elevation index type']==13) & (dataset['Target Shock']==1)].index] = 40
dataset['Cont_label'][dataset[(dataset['Elevation index type']==13) & (dataset['Target Shock']==2)].index] = 41

dataset['Cont_label'][dataset[(dataset['Elevation index type']==14) & (dataset['Target Shock']==0)].index] = 42
dataset['Cont_label'][dataset[(dataset['Elevation index type']==14) & (dataset['Target Shock']==1)].index] = 43
dataset['Cont_label'][dataset[(dataset['Elevation index type']==14) & (dataset['Target Shock']==2)].index] = 44

dataset['Cont_label'][dataset[(dataset['Elevation index type']==15) & (dataset['Target Shock']==0)].index] = 45
dataset['Cont_label'][dataset[(dataset['Elevation index type']==15) & (dataset['Target Shock']==1)].index] = 46
dataset['Cont_label'][dataset[(dataset['Elevation index type']==15) & (dataset['Target Shock']==2)].index] = 47

dataset['Cont_label'][dataset[(dataset['Elevation index type']==16) & (dataset['Target Shock']==0)].index] = 48
dataset['Cont_label'][dataset[(dataset['Elevation index type']==16) & (dataset['Target Shock']==1)].index] = 49
dataset['Cont_label'][dataset[(dataset['Elevation index type']==16) & (dataset['Target Shock']==2)].index] = 50

dataset['Cont_label'][dataset[(dataset['Elevation index type']==17) & (dataset['Target Shock']==0)].index] = 51
dataset['Cont_label'][dataset[(dataset['Elevation index type']==17) & (dataset['Target Shock']==1)].index] = 52
dataset['Cont_label'][dataset[(dataset['Elevation index type']==17) & (dataset['Target Shock']==2)].index] = 53

dataset['Cont_label'][dataset[(dataset['Elevation index type']==18) & (dataset['Target Shock']==0)].index] = 54
dataset['Cont_label'][dataset[(dataset['Elevation index type']==18) & (dataset['Target Shock']==1)].index] = 55
dataset['Cont_label'][dataset[(dataset['Elevation index type']==18) & (dataset['Target Shock']==2)].index] = 56

dataset['Cont_label'][dataset[(dataset['Elevation index type']==19) & (dataset['Target Shock']==0)].index] = 57
dataset['Cont_label'][dataset[(dataset['Elevation index type']==19) & (dataset['Target Shock']==1)].index] = 58
dataset['Cont_label'][dataset[(dataset['Elevation index type']==19) & (dataset['Target Shock']==2)].index] = 59

dataset['Cont_label'][dataset[(dataset['Elevation index type']==20) & (dataset['Target Shock']==0)].index] = 60
dataset['Cont_label'][dataset[(dataset['Elevation index type']==20) & (dataset['Target Shock']==1)].index] = 61
dataset['Cont_label'][dataset[(dataset['Elevation index type']==20) & (dataset['Target Shock']==2)].index] = 62

dataset['Cont_label'][dataset[(dataset['Elevation index type']==21) & (dataset['Target Shock']==0)].index] = 63
dataset['Cont_label'][dataset[(dataset['Elevation index type']==21) & (dataset['Target Shock']==1)].index] = 64
dataset['Cont_label'][dataset[(dataset['Elevation index type']==21) & (dataset['Target Shock']==2)].index] = 65

dataset['Cont_label'][dataset[(dataset['Elevation index type']==22) & (dataset['Target Shock']==0)].index] = 66
dataset['Cont_label'][dataset[(dataset['Elevation index type']==22) & (dataset['Target Shock']==1)].index] = 67
dataset['Cont_label'][dataset[(dataset['Elevation index type']==22) & (dataset['Target Shock']==2)].index] = 68

dataset['Cont_label'][dataset[(dataset['Elevation index type']==23) & (dataset['Target Shock']==0)].index] = 69
dataset['Cont_label'][dataset[(dataset['Elevation index type']==23) & (dataset['Target Shock']==1)].index] = 70
dataset['Cont_label'][dataset[(dataset['Elevation index type']==23) & (dataset['Target Shock']==2)].index] = 71

dataset['Cont_label'][dataset[(dataset['Elevation index type']==24) & (dataset['Target Shock']==0)].index] = 72
dataset['Cont_label'][dataset[(dataset['Elevation index type']==24) & (dataset['Target Shock']==1)].index] = 73
dataset['Cont_label'][dataset[(dataset['Elevation index type']==24) & (dataset['Target Shock']==2)].index] = 74

dataset['Cont_label'][dataset[(dataset['Elevation index type']==25) & (dataset['Target Shock']==0)].index] = 75
dataset['Cont_label'][dataset[(dataset['Elevation index type']==25) & (dataset['Target Shock']==1)].index] = 76
dataset['Cont_label'][dataset[(dataset['Elevation index type']==25) & (dataset['Target Shock']==2)].index] = 77

dataset['Cont_label'][dataset[(dataset['Elevation index type']==26) & (dataset['Target Shock']==0)].index] = 78
dataset['Cont_label'][dataset[(dataset['Elevation index type']==26) & (dataset['Target Shock']==1)].index] = 79
dataset['Cont_label'][dataset[(dataset['Elevation index type']==26) & (dataset['Target Shock']==2)].index] = 80

dataset['Cont_label'][dataset[(dataset['Elevation index type']==27) & (dataset['Target Shock']==0)].index] = 81
dataset['Cont_label'][dataset[(dataset['Elevation index type']==27) & (dataset['Target Shock']==1)].index] = 82
dataset['Cont_label'][dataset[(dataset['Elevation index type']==27) & (dataset['Target Shock']==2)].index] = 83

In [None]:
dataset.columns

In [None]:
def param():
    USER_NAME = 'JH'                  
    SEED      = 220604 #9861
    batch_size= 128
    epoch     = 120
    lr        = 0
    drop_rate = 0.223573505016481
    hidden = {
        'hidden' : []
        
     }
    temp = 0.3137977865167101
    return hidden, SEED, batch_size, lr, epoch, drop_rate, temp


def load_data(df):
    '''x = df.drop(['subject_id', 'hadm_id', 'stay_id', 'time', 
                 'Death', 'gender', 'anchor_age', 'Weight', 'Height',
                 'Elevation index type', 'Shock', 'Ambiguous Shock',
                 'LR_NS', 'Vasopressors', 'Transfusion',
                 'Actual Troponin test', 'Actual Creatinine test', 'Actual Lactate test', 
                 'Data type', 'Readmission','CMO', 'Cont_label','Hemoglobin', 'Creatinine', 'Troponin', 'Lactate',
                 'IV fluids', 'Urine Output',
                 'Troponin Elevation index', 'Creatinine Elevation index',
                 'Lactate Elevation index','Binary Shock', 'Propofol', 'Midazolam', 'Fentanyl', 'Ketamine', 'Epinephrine',
                 'Norepinephrine', 'Phenylephrine', 'Vasopressin', 'Target Shock'], axis= 1)'''
    
    x = df[['Heart rate', 'Blood pressure systolic', 'Blood pressure diastolic', 'Blood pressure mean', 'Respiratory rate', 'SpO2', 'Temperature', 'Shock index',
            'Heart rate_RSI', 'Blood pressure systolic_RSI', 'Blood pressure diastolic_RSI', 'Blood pressure mean_RSI', 'Respiratory rate_RSI', 'SpO2_RSI', 'Temperature_RSI', 'Shock index_RSI',
            'Heart rate_delta', 'Heart rate_delta_ratio', 'Blood pressure systolic_delta', 'Blood pressure systolic_delta_ratio', 'Blood pressure diastolic_delta', 'Blood pressure diastolic_delta_ratio', 'Blood pressure mean_delta',
            'Blood pressure mean_delta_ratio', 'Respiratory rate_delta', 'Respiratory rate_delta_ratio', 'SpO2_delta', 'SpO2_delta_ratio', 'Temperature_delta', 'Temperature_delta_ratio', 'Shock index_delta', 'Shock index_delta_ratio',
            'Creatinine', 'Troponin', 'Lactate', 'Actual Troponin test', 'Actual Creatinine test', 'Actual Lactate test', 'Troponin Elevation index', 'Creatinine Elevation index', 'Lactate Elevation index', 'Readmission', 'Elevation index type',
            'Propofol', 'Midazolam', 'Fentanyl', 'Ketamine']] #, 'Vasopressors:Binary', 'LR_NS:Binary'

    fn = x.shape[1]
    print(f'{fn} Features: ', x.columns)
    #x['Ketamine'] = x['Ketamine'].astype('category')
    #x['Propofol'] = x['Propofol'].astype('category')
    #x['Midazolam'] = x['Midazolam'].astype('category')
    #x['Fentanyl'] = x['Fentanyl'].astype('category')
    #x['Troponin Elevation index'] = x['Troponin Elevation index'].astype('category')
    #x['Creatinine Elevation index'] = x['Creatinine Elevation index'].astype('category')
    #x['Lactate Elevation index'] = x['Lactate Elevation index'].astype('category')

    y = df['Cont_label'].astype('category') # y = df['Target Shock'].astype('category')
    return x, y


def train(trial, search = True):
    global emb_model, results_loss, optimizer, scheduler
    
    hidden, SEED, batch_size, lr, epoch, drop_rate, temp = param()
    
    # search parameters
    if search == True:
        for idx, i in enumerate(hidden):
            num_layers = trial.suggest_int(f'num_layer_{idx}', 3, 10) # layer 수
            '''for i in range(num_layers):
                hidden['hidden'].append(trial.suggest_int(f'h{i+1}', 10, 200)) # node 수'''

            for i in range(num_layers-1):
                hidden['hidden'].append(trial.suggest_int(f'h{i+1}', 10, 500)) # node 수
            hidden['hidden'].append(trial.suggest_int(f'h{num_layers}', 5, 20)) # last layer node 수
        epoch     = 300 #trial.suggest_int('epoch', 70, 200)
        lr        = trial.suggest_categorical('Learning_rate',[0.0001, 0.00005, 0.00001])
        #drop_rate = trial.suggest_uniform('drop_rate', 0.56, 0.65)
        drop_rate = 0
        temp      = trial.suggest_uniform('temp', 0.1, 0.5)

    else :
        num_layers = trial.params['num_layer_0']
        hidden['hidden'] = [trial.params['h1'],trial.params['h2'],trial.params['h3'],trial.params['h4'],trial.params['h5']] #,trial.params['h9'],trial.params['h10']
        #hidden['hidden'] = [147, 42, 91, 145, 92, 69, 165, 78, 39]
        epoch = 300 #trial.params['epoch']
        lr = 0.00001 #trial.params['Learning_rate']
        #drop_rate = trial.params['drop_rate']
        drop_rate = 0
        temp = trial.params['temp']

    # hidden['hidden'] = sorted(hidden['hidden'], reverse=True)       
    print(hidden)
    print('learning_rate : ', lr, "\nepoch : ", epoch, "\ndrop_rate : ", drop_rate, "\ntemperature : ", temp)
    
    results_loss = {
    'epoch_by_trn'          : [],
    'epoch_by_val'          : []
    }
    mimic_df = dataset.copy()
    
    #undersampling
    #circ가 한번이라도 오지 않은 환자 언더 샘플링
    
    # sty=[]
    # for stay_id, group_data in mimic_df.groupby('stay_id'):
    #     if (group_data['classes'] == 0).all():
    #         sty.append(stay_id)
    
    # mimic_df = mimic_df[~(mimic_df.stay_id.isin(sty))]
    
    # print(mimic_df['classes'].value_counts().sort_index())
    
    trn_x, trn_y  = load_data(mimic_df)
    
    # data leakage 조심
    scaler        = MinMaxScaler()
    trn_sclaed_x  = scaler.fit_transform(trn_x)

    trn_sclaed_x[np.isnan(trn_sclaed_x)] = -1000

    trn_tensor_x  = torch.FloatTensor(trn_sclaed_x)
    trn_tensor_y  = torch.LongTensor(trn_y.values) 
    
  
    
    n_feat = trn_tensor_x.shape[1]
        
    train_dataset = TensorDataset(trn_tensor_x, trn_tensor_y)


    
    y_train_indices = mimic_df.index


    y_train = [mimic_df['Cont_label'][i] for i in y_train_indices]


    class_sample_count = np.array(
        [len(np.where(y_train == t)[0]) for t in np.unique(y_train)])
    


    weight = 1. / class_sample_count
    
    samples_weight = np.array([weight[t] for t in y_train])
    samples_weight = torch.from_numpy(samples_weight)
    


    sampler = WeightedRandomSampler(samples_weight.type('torch.DoubleTensor'), len(samples_weight))

    train_loader  = torch.utils.data.DataLoader(dataset= train_dataset, batch_size=batch_size, shuffle=False, sampler=sampler, drop_last=True)

    # 모델 정의
    emb_model = Contrastive_Embedding(n_feat, drop_rate, hidden['hidden']).to(device)
    print(emb_model)
    
    contrastive_loss = losses.SupConLoss(temperature=temp)
    # mse_loss = nn.MSELoss()
    optimizer = optim.RMSprop(emb_model.parameters(), lr= 0)
    
    scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=50, T_mult=1, eta_max=0.0001,  T_up=50, gamma=0.5)
    
    patience_value = 0
    patience = 10
    for i in range(1, epoch+1):
        emb_model.train()
        
        train_loss = {'cont_loss' :[],
                      'mse_loss'  :[],
                      'total_loss':[],
                      'epoch_by'  :[]}
        
        # current_lr = optimizer.param_groups[0]["lr"]
        start = time.time()
        
        for j, (X, y)  in enumerate(train_loader):
            
            X  = X.to(device)
            y  = y.to(device)

            optimizer.zero_grad()
            
            latent = emb_model.forward(X)
            
            # loss_mse = mse_loss(out, X)
            
            y =  y.type(torch.LongTensor)
            loss_cont = contrastive_loss(latent, y)
            
            
            
            loss_cont.backward()
            optimizer.step()
            
            train_loss['cont_loss'].append(loss_cont.item())

        tr_cont_mean = np.array(train_loss['cont_loss']).mean()
        
        scheduler.step()  
        results_loss['epoch_by_trn'].append(tr_cont_mean)
        
        end = time.time()
        if i % 1 == 0:
            print(f'epoch {i}  time: {end - start:.4f}sec trn_contrastive: {tr_cont_mean: .4f}')

        if i > 2:
            if results_loss['epoch_by_trn'][-1] > results_loss['epoch_by_trn'][-2]:
                patience_value += 1

        if patience_value == patience :
            print('------------------------------------------------')
            print(f'epoch {i} End')
            print('================================================')
            patience_value = 0
            break
    
    return tr_cont_mean


In [None]:
def make_embeded_df(model_name): 
    print()
    print('Start Getting the latent space vector(Train, Valid sample)')
    
    mimic_df = dataset.copy()
    print(len(mimic_df))
    
    trn_x, trn_y  = load_data(mimic_df)
    
    scaler        = MinMaxScaler()
    trn_sclaed_x  = scaler.fit_transform(trn_x)

    trn_sclaed_x[np.isnan(trn_sclaed_x)] = -10000

    trn_tensor_x  = torch.FloatTensor(trn_sclaed_x)
    trn_tensor_y  = torch.LongTensor(trn_y.values) 
    
    
    n_feat = trn_tensor_x.shape[1]
        
    train_dataset = TensorDataset(trn_tensor_x, trn_tensor_y)
    for_latent_loader_trn  = torch.utils.data.DataLoader(dataset= train_dataset, batch_size=trn_tensor_x.shape[0], shuffle=False, drop_last=False)
    
    
    start = time.time()
    model_name.eval()
    with torch.no_grad():
        for X_l, y_l  in for_latent_loader_trn: # Full batch
                
                X_l  = X_l.to(device)
                latent_vector_train = model_name.forward(X_l)
                
                emb_train_x = pd.DataFrame(np.array(latent_vector_train.cpu()))
                emb_train = pd.concat([emb_train_x, pd.DataFrame(np.array(y_l))], axis = 1)
                # emb_train = pd.concat([information, emb_train], axis = 1)
    end = time.time()            
    print()
    print('End, Time consume(min):{}'.format((end - start)/60))  
    
    return emb_train

In [None]:
import math
from torch.optim.lr_scheduler import _LRScheduler

class CosineAnnealingWarmUpRestarts(_LRScheduler):
    def __init__(self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1., last_epoch=-1):
        if T_0 <= 0 or not isinstance(T_0, int):
            raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
        if T_mult < 1 or not isinstance(T_mult, int):
            raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
        if T_up < 0 or not isinstance(T_up, int):
            raise ValueError("Expected positive integer T_up, but got {}".format(T_up))
        self.T_0 = T_0
        self.T_mult = T_mult
        self.base_eta_max = eta_max
        self.eta_max = eta_max
        self.T_up = T_up
        self.T_i = T_0
        self.gamma = gamma
        self.cycle = 0
        self.T_cur = last_epoch
        super(CosineAnnealingWarmUpRestarts, self).__init__(optimizer, last_epoch)
    
    def get_lr(self):
        if self.T_cur == -1:
            return self.base_lrs
        elif self.T_cur < self.T_up:
            return [(self.eta_max - base_lr)*self.T_cur / self.T_up + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.eta_max - base_lr) * (1 + math.cos(math.pi * (self.T_cur-self.T_up) / (self.T_i - self.T_up))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.T_cur = self.T_cur + 1
            if self.T_cur >= self.T_i:
                self.cycle += 1
                self.T_cur = self.T_cur - self.T_i
                self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up
        else:
            if epoch >= self.T_0:
                if self.T_mult == 1:
                    self.T_cur = epoch % self.T_0
                    self.cycle = epoch // self.T_0
                else:
                    n = int(math.log((epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult))
                    self.cycle = n
                    self.T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1)
                    self.T_i = self.T_0 * self.T_mult ** (n)
            else:
                self.T_i = self.T_0
                self.T_cur = epoch
                
        self.eta_max = self.base_eta_max * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

In [None]:
warnings.filterwarnings("ignore")
import os
import optuna

# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]= '0'
os.environ['CUDA_LAUNCH_BLOCKING']= '1'
n_gpu             = 1
device = torch.device('cpu')
print(device)

# Set parameters

study = optuna.create_study(sampler=optuna.samplers.TPESampler(), direction="minimize")
study.optimize(train, n_trials = 5)  

pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Parameter 정보 꼭 기록하기

In [None]:
warnings.filterwarnings("ignore")
import os
import optuna

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]= '0'
os.environ['CUDA_LAUNCH_BLOCKING']= '1'
n_gpu             = 1
device = torch.device('cpu')
print(device)

model_loss = train(trial=trial,search = False)

version = '240721'

'''
240526 is {'hidden': [136, 19, 55, 13, 153, 105, 167, 87, 34]}
learning_rate :  0.0006 
epoch :  86 
drop_rate :  0 
temperature :  0.47626381444779575

240606 is 
  Value:  4.829311113724287
  Params: 
    num_layer_0: 10
    h1: 104
    h2: 108
    h3: 52
    h4: 181
    h5: 34
    h6: 197
    h7: 119
    h8: 70
    h9: 200
    h10: 14
    epoch: 89
    Learning_rate: 0.00005
    temp: 0.3725457792592851

240616 is
 Value:  4.833700034429641
  Params: 
    num_layer_0: 10
    h1: 55
    h2: 172
    h3: 131
    h4: 81
    h5: 104
    h6: 138
    h7: 18
    h8: 27
    h9: 32
    h10: 7
    Learning_rate: 0.0001
    temp: 0.19152583769744558    

240622 is
Value:  4.813156143596138
  Params: 
    num_layer_0: 10
    h1: 22
    h2: 197
    h3: 174
    h4: 116
    h5: 116
    h6: 60
    h7: 70
    h8: 183
    h9: 123
    h10: 14
    Learning_rate: 0.0001
    temp: 0.4484512259552573

240624 is
Value:  4.234910612705803
  Params: 
    num_layer_0: 10
    h1: 283
    h2: 191
    h3: 145
    h4: 113
    h5: 41
    h6: 497
    h7: 17
    h8: 427
    h9: 84
    h10: 6
    Learning_rate: 0.0001
    temp: 0.18610683430418767    

240712 is
Best trial:
  Value:  1.7689395768463896
  Params: 
    num_layer_0: 8
    h1: 53
    h2: 385
    h3: 300
    h4: 490
    h5: 304
    h6: 458
    h7: 209
    h8: 20
    Learning_rate: 1e-05
    temp: 0.2572784082698316

  240721 is
  Best trial:
  Value:  1.6679313899698696
  Params: 
    num_layer_0: 5
    h1: 66
    h2: 30
    h3: 268
    h4: 15
    h5: 19
    Learning_rate: 0.0001
    temp: 0.1768852090437697s
'''

In [None]:
lrs = []
for i in range(120):
    optimizer.step()
    lrs.append(optimizer.param_groups[0]["lr"])
    scheduler.step()


plt.plot(range(120), lrs, color = 'limegreen',  label = 'Training Cosine Annealing Warm Restarts')
plt.xlabel('Epoch')
plt.ylabel('Learning rate(green)')
plt.legend()
plt.show()

In [None]:
plt.plot(range(69), results_loss['epoch_by_trn'], color = 'dodgerblue', label = 'Contrastive Loss')

plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.savefig(f'./vital_lab_{version}_loss')
plt.show()

In [None]:
torch.save({"model_state_dict": emb_model,
            },
           f"./Contrastive_Embedding_Net_vital_lab_({version}).pt")

In [None]:
print(emb_model)

In [None]:
emb_df = make_embeded_df(emb_model)

In [None]:
sample = pd.concat([dataset.reset_index(drop=True), emb_df.reset_index(drop=True)], axis = 1, ignore_index=False)
sample

In [None]:
sample.stay_id.unique().shape

In [None]:
sample.to_csv(f'./train vital embedding_data_type27_{version}.csv', index=False)

In [None]:
sample = pd.read_csv(f'./train vital embedding_data_type27_{version}.csv')
sample.columns

In [None]:
sample['Cont_label'] = sample['0.1']
sample.drop(columns=['0.1'], inplace=True)

In [None]:
sample

In [None]:
sample.to_csv(f'./train vital embedding_data_type27_{version}.csv', index=False)

T-sne

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import random
from matplotlib import colors
from tqdm.notebook import tqdm

x = sample.drop(columns=['subject_id', 'hadm_id', 'stay_id', 'time', 'Death', 'gender',
       'anchor_age', 'Weight', 'Height', 'Heart rate',
       'Blood pressure systolic', 'Blood pressure diastolic',
       'Blood pressure mean', 'Respiratory rate', 'SpO2', 'Temperature',
       'Hemoglobin', 'Creatinine', 'Troponin', 'Lactate', 
       'Propofol', 'Midazolam', 'Fentanyl',
       'IV fluids', 'Transfusion', 'Urine Output', 'LR_NS', 'Shock', 'Ambiguous Shock',
       'Ketamine', 'Vasopressors', 'Actual Troponin test',
       'Actual Creatinine test', 'Actual Lactate test',
       'Troponin Elevation index', 'Creatinine Elevation index',
       'Lactate Elevation index', 'Readmission', 'Elevation index type',
       'Heart rate_delta', 'Heart rate_delta_ratio',
       'Blood pressure systolic_delta', 'Blood pressure systolic_delta_ratio',
       'Blood pressure diastolic_delta',
       'Blood pressure diastolic_delta_ratio', 'Blood pressure mean_delta',
       'Blood pressure mean_delta_ratio', 'Respiratory rate_delta',
       'Respiratory rate_delta_ratio', 'SpO2_delta', 'SpO2_delta_ratio',
       'Temperature_delta', 'Temperature_delta_ratio', 'Shock index_delta', 'Shock index_delta_ratio',
       'Data type', 'CMO', 'Epinephrine', 'Norepinephrine', 'Phenylephrine', 'Vasopressin', 'Cont_label','Binary Shock','Shock index', 'Target Shock',
       'Heart rate_RSI', 'Blood pressure systolic_RSI',
       'Blood pressure diastolic_RSI', 'Blood pressure mean_RSI',
       'Respiratory rate_RSI', 'SpO2_RSI', 'Temperature_RSI', 'Shock index_RSI',
       'Vasopressors:Binary', 'LR_NS:Binary','peri_Shock'])


y = sample[['Cont_label', 'Troponin Elevation index', 'Creatinine Elevation index', 'Lactate Elevation index', 'CMO', 'Shock', 'Binary Shock', 'Target Shock','peri_Shock']]

x.columns

In [None]:
#T-sne operation
plt.rc('axes', unicode_minus=False)
tsne = TSNE(n_components=2,random_state = 0)

result_tsne = tsne.fit_transform(x)

# 시각화
#all_colors = list(colors.CSS4_COLORS.keys())
#selected_colors = random.sample(all_colors, state_num)

In [None]:
plt.figure(figsize=(10, 10))
scatter = plt.scatter(result_tsne[:, 0], result_tsne[:, 1], c=y['Cont_label'], s=0.5)
plt.legend(*scatter.legend_elements(), title="Classes")
#plt.title("T-sne Visualization of {} States".format(state_num), fontsize=16)
#plt.savefig(f"C:/Users/DAHS/Desktop/KU X PITTS/Cont data/1 T-sne Visualization of vital_{version} Target Shock")
plt.show()

In [None]:
plt.figure(figsize=(10, 10))
scatter = plt.scatter(result_tsne[:, 0], result_tsne[:, 1], c=y['Target Shock'], s=0.5)
plt.legend(*scatter.legend_elements(), title="Classes")
#plt.title("T-sne Visualization of {} States".format(state_num), fontsize=16)
#plt.savefig(f"C:/Users/DAHS/Desktop/KU X PITTS/Cont data/1 T-sne Visualization of vital_{version} Target Shock")
plt.show()

In [None]:
non_ambiguous_index = sample[(sample['Data type']=='Non Ambiguous')].index
ambiguous_index = sample[(sample['Data type']=='Ambiguous')].index

y['Data type int'] = 0
y['Data type int'][non_ambiguous_index] = 100
y['Data type int'][ambiguous_index] = -100

plt.figure(figsize=(10, 10))
scatter = plt.scatter(result_tsne[:, 0], result_tsne[:, 1], c=y['Data type int'], s=0.5)
#plt.legend(*scatter.legend_elements(), title="Classes")
plt.title("T-sne Visualization of unknown", fontsize=16)
#plt.savefig(f"C:/Users/DAHS/Desktop/KU X PITTS/Cont data/2 T-sne Visualization of non_ambiguous{state_num}_{date} Elevation type Unknown")
plt.show()

In [None]:
ambiguous_normal_index = sample[(sample['Data type']=='Ambiguous') & (sample['Target Shock']==0)].index
ambiguous_peri_index = sample[(sample['Data type']=='Ambiguous') & (sample['Target Shock']==1)].index
ambiguous_sh_index = sample[(sample['Data type']=='Ambiguous') & (sample['Target Shock']==2)].index

non_ambiguous_normal_index = sample[(sample['Data type']=='Non Ambiguous') & (sample['Target Shock']==0)].index
non_ambiguous_peri_index = sample[(sample['Data type']=='Non Ambiguous') & (sample['Target Shock']==1)].index
non_ambiguous_sh_index = sample[(sample['Data type']=='Non Ambiguous') & (sample['Target Shock']==2)].index

y['Data type int'] = 0

y['Data type int'][ambiguous_normal_index] = -300
y['Data type int'][ambiguous_peri_index] = -200
y['Data type int'][ambiguous_sh_index] = -100
y['Data type int'][non_ambiguous_normal_index] = 100
y['Data type int'][non_ambiguous_peri_index] = 200
y['Data type int'][non_ambiguous_sh_index] = 300


plt.figure(figsize=(10, 10))
scatter = plt.scatter(result_tsne[:, 0], result_tsne[:, 1], c=y['Data type int'], s=0.5)
#plt.legend(*scatter.legend_elements(), title="Classes")
plt.title("T-sne Visualization of Ambi and non-ambi", fontsize=16)
plt.legend(*scatter.legend_elements(), title="Classes")
plt.savefig(f"./T-sne Visualization of vital_lab_{version} Normal and peri and Shock")
plt.show()

In [None]:
plt.figure(figsize=(10, 10))
scatter = plt.scatter(result_tsne[:, 0], result_tsne[:, 1], c=y['Binary Shock'], s=0.5)
plt.legend(*scatter.legend_elements(), title="Classes")
#plt.title("T-sne Visualization of {} States".format(state_num), fontsize=16)
#plt.savefig(f"C:/Users/DAHS/Desktop/KU X PITTS/Cont data/1 T-sne Visualization of vital_{version} States ")
plt.show()

In [None]:
plt.figure(figsize=(10, 10))
scatter = plt.scatter(result_tsne[:, 0], result_tsne[:, 1], c=y['Shock'], s=0.5)
plt.legend(*scatter.legend_elements(), title="Classes")
#plt.title("T-sne Visualization of {} States".format(state_num), fontsize=16)
#plt.savefig(f"C:/Users/DAHS/Desktop/KU X PITTS/Cont data/1 T-sne Visualization of vital_{version} States ")
plt.show()

In [None]:
sample[sample['Elevation index type']==4][['Lactate Elevation index','Creatinine Elevation index','Troponin Elevation index']]

In [None]:
non_ambiguous_index = sample[(sample['Data type']=='Non Ambiguous') & (sample['Target Shock']==1) & (sample['Elevation index type']==4)].index
ambiguous_index = sample[(sample['Data type']=='Ambiguous') & (sample['Target Shock']==1) & (sample['Elevation index type']==27)].index

y['Data type int'] = 0
y['Data type int'][non_ambiguous_index] = 100
y['Data type int'][ambiguous_index] = 0

plt.figure(figsize=(10, 10))
scatter = plt.scatter(result_tsne[:, 0], result_tsne[:, 1], c=y['Data type int'], s=0.5)
plt.legend(*scatter.legend_elements(), title="Classes")
plt.title("T-sne Visualization of unknown", fontsize=16)
#plt.savefig(f"C:/Users/DAHS/Desktop/KU X PITTS/Cont data/2 T-sne Visualization of non_ambiguous{state_num}_{date} Elevation type Unknown")
plt.show()