## Import Packages

In [2]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

from sklearn import metrics

In [2]:
random_seed = 8022022 # or any of your favorite number 
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)

## Read dataset

* Make sure your input dataset included `participant_id`,`specimen`,`collect_wk`,`was_preterm`,`was_early_preterm`. Other variables should be co-variates 

In [3]:
#replace 'test/metadata_imputed.csv' with the path to your input file

mydata = pd.read_csv('data/combo_clean_data.csv', delimiter=',')
mydata = pd.DataFrame(mydata)
# mydata = mydata.loc[mydata['project'].isin(['A','D','E','F','G','H'])]

print(mydata.shape)
mydata.head(5)

(2461, 11)


Unnamed: 0,specimen,participant_id,collect_wk,project,was_preterm,was_early_preterm,age_imp,shannon,bwpd,CST,Lactobacillus
0,A00001-05,A00001,33,A,False,False,27,1.0,0.0,III,0.7979
1,A00002-01,A00002,38,A,False,False,24,1.96362,2.62894,III,0.805641
2,A00003-02,A00003,30,A,False,False,32,1.0,0.0,II,0.963299
3,A00004-08,A00004,27,A,False,False,25,1.0,0.0,III,0.927544
4,A00004-12,A00004,29,A,False,False,25,6.94884,2.78896,III,0.806593


## Data subset/type conversion if necessary

In [4]:
mydata["project"] = mydata["project"].astype('category')
mydata["CST"] = mydata["CST"].astype('category')
mydata['was_preterm'] = mydata['was_preterm'].astype('int8')
mydata['was_early_preterm'] = mydata['was_early_preterm'].astype('int8')
mydata.dtypes

specimen               object
participant_id         object
collect_wk              int64
project              category
was_preterm              int8
was_early_preterm        int8
age_imp                 int64
shannon               float64
bwpd                  float64
CST                  category
Lactobacillus         float64
dtype: object

## Subsetting dataset for different outcome

In [5]:
# only keep the rows with collect_wk < 32 for Preterm task
mydata_preterm = mydata.loc[mydata['collect_wk']<=32,]
print(mydata_preterm.shape)
# only keep the rows with collect_wk < 28 for Early preterm task
mydata_epreterm = mydata.loc[mydata['collect_wk']<=28,]
print(mydata_epreterm.shape)
print(mydata_preterm['CST'].dtypes)

(2077, 11)
(1765, 11)
category


## Define functions for pytorch input

### feature transforming function

In [6]:
def feature_transform(data, var_name, id_list):
    
    temp_data = data.copy()
    
    ##check argument validity

    if var_name not in list(data.columns):
        raise ValueError("var_name must be in column names of data")
        
    ##get data type
    var_type = data[var_name].dtypes
    
    if var_type == "category":
        temp_data[var_name] = pd.factorize(data[var_name])[0] + 1
    
    ##get pivot table of features
    temp_data_wide = temp_data.pivot_table(index = ['participant_id'], columns = 'collect_wk', values = var_name).sort_index(axis = 0)
    temp_data_wide = temp_data_wide.sort_index(axis=1)
    
    temp_data_long = temp_data_wide.T
    
    temp_data_long['collect_tri'] = 3
    temp_data_long.loc[14:26,'collect_tri'] = 2
    temp_data_long.loc[1:14,'collect_tri'] = 1
    
    ##if categorical impute with mode of each outcome group, continuous with mean
    if var_type == "category":
        temp_data_long = temp_data_long.groupby(['collect_tri']).apply(lambda x: x.fillna(x.mode().iloc[0,]))
        temp_data_long = temp_data_long.apply(lambda x: x.fillna(x.mode().iloc[0,]))
        
    else:
        temp_data_long = temp_data_long.groupby(['collect_tri']).apply(lambda x: x.fillna(x.mean()))
        temp_data_long = temp_data_long.apply(lambda x: x.fillna(x.mean()))
    
    return temp_data_long.drop(columns = ['collect_tri']).T.loc[id_list]
    

### outcome transforming function

In [7]:
def outcome_transform(data, id_list, multi_outcome = True, out_var = "was_preterm",label_smooth = True):
    outcome_data = data.groupby('participant_id').first().sort_index(axis = 0)[out_var]
    max_week_per = data.groupby('participant_id')['collect_wk'].max().sort_index()
    max_week = max_week_per.max()
    id_all = list(max_week_per.index)
    
    
    if multi_outcome:
        if label_smooth:
            label_list = [np.concatenate((np.linspace(0.5,0,max_week_per[id]),np.repeat(0,max_week-max_week_per[id]))) \
                          if outcome_data[id] == 0 else \
                         np.concatenate((np.linspace(0.5,1,max_week_per[id]),np.repeat(1,max_week-max_week_per[id]))) \
                         for id in id_all]
            temp_y = pd.DataFrame(label_list,columns = np.arange(1,max_week+1,1), index = max_week_per.index)
            return temp_y.loc[id_list]
        else:
            temp_y = data.pivot_table(index=['participant_id'], columns='collect_wk', values= out_var).sort_index(axis = 0)
            # sort by collect_wk
            temp_y = temp_y.sort_index(axis=1)
            temp_y = temp_y.apply(lambda row: row.fillna(row.mean()), axis=1)
            return temp_y.loc[id_list]
    else:
        return data.groupby('participant_id').first().sort_index(axis = 0)[out_var][id_list]

### ternsor generator

In [8]:
def tensor_generator(data, id_list, features, out_var = "was_preterm", label_smooth = True, multi_outcome = True):
    X_matrix = [feature_transform(data, var_name,id_list).to_numpy() for var_name in features]
    y_matrix = outcome_transform(data, id_list,multi_outcome,out_var,label_smooth).to_numpy()
    
    input_X = torch.from_numpy(np.dstack(X_matrix).astype('float32'))
    
    if multi_outcome:
        input_y = torch.from_numpy(np.dstack((y_matrix,1-y_matrix)).astype('float32'))
    else:
        input_y = torch.from_numpy(np.vstack((y_matrix,1-y_matrix)).T.astype('float32'))
    
    return input_X, input_y


## Define models

In [9]:
# Define RNN model
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers = 1,drop_prob=0.2):
        
        """
            parameters:
                input_dim: dimensions of input data (# features)
                hidden_dim: dimensions of hidden layer
                output_dim: dimensions of output layer (should be two in our analysis)
                n_layers: number of layers for GRU structure, default is 1, 2 means stacked GRU
                drop_prob: dropout probability

        """
        
        #inherit from super class
        super(RNNModel, self).__init__()
        
        #define parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        #define layers
        
        ##GRU layers
        self.rnn = nn.RNN(input_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)
        
        ##fully connected layer(use one linear layer first, later can customize)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        
        batch_size = x.size(0)

        #Initializing hidden state
        hidden = self.init_hidden(batch_size)
        
        out, hidden = self.rnn(x, hidden)
        
        #pass out to fully connected layer
        out = self.fc(out.reshape(-1,out.shape[-1]))
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        return torch.zeros((self.n_layers,batch_size, self.hidden_dim), device = device)
    


In [10]:
# Define GRU model
class GRUModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers = 1,drop_prob=0.2):
        
        """
            parameters:
                input_dim: dimensions of input data (# features)
                hidden_dim: dimensions of hidden layer
                output_dim: dimensions of output layer (should be two in our analysis)
                n_layers: number of layers for GRU structure, default is 1, 2 means stacked GRU
                drop_prob: dropout probability

        """
        
        #inherit from super class
        super(GRUModel, self).__init__()
        
        #define parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        #define layers
        
        ##GRU layers
        self.gru = nn.GRU(input_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)
        
        ##fully connected layer(use one linear layer first, later can customize)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        
        batch_size = x.size(0)

        #Initializing hidden state
        hidden = self.init_hidden(batch_size)
        
        out, hidden = self.gru(x, hidden)
        
        #pass out to fully connected layer
        out = self.fc(out.reshape(-1,out.shape[-1]))
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        return torch.zeros((self.n_layers,batch_size, self.hidden_dim), device = device)
    


In [11]:
# Define LSTM model

class LSTMModel(nn.Module):
    
    """
        parameters:
            input_dim: dimensions of input data (# features)
            hidden_dim: dimensions of hidden layer
            output_dim: dimensions of output layer (should be two in our analysis)
            n_layers: number of layers for GRU structure, default is 1, 2 means stacked GRU
            drop_prob: dropout probability

    """
        
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers = 1, drop_prob=0.2):
        
        #inherit from super class
        super(LSTMModel, self).__init__()
        
        #define parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        #define layers
        
        ##LSTM layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)
        
        ##fully connected layer(use one linear layer first, later can customize)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
        
    def forward(self, x):
        
        batch_size = x.size(0)

        #Initializing hidden state
        hidden = self.init_hidden(batch_size)
        
        out, hidden = self.lstm(x, hidden)
        
        #pass out to fully connected layer
        out = self.fc(out.reshape(-1,out.shape[-1]))
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        return (torch.zeros((self.n_layers,batch_size, self.hidden_dim),device = device),
                torch.zeros((self.n_layers,batch_size, self.hidden_dim),device = device))

In [12]:
# Define LSTM model

class CUSTOM_Model(nn.Module):
    
    """
        parameters:
            input_dim: dimensions of input data (# features)
            hidden_dim: dimensions of hidden layer
            output_dim: dimensions of output layer (should be two in our analysis)
            n_layers: number of layers for GRU structure, default is 1, 2 means stacked GRU
            drop_prob: dropout probability

    """
        
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers = 1, drop_prob=0.2):
        
        #inherit from super class
        super(LSTMModel, self).__init__()
        
        #define parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        #define layers
        
        ##LSTM layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)
        
        ##fully connected layer(use one linear layer first, later can customize)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
        
    def forward(self, x):
        
        batch_size = x.size(0)

        #Initializing hidden state
        hidden = self.init_hidden(batch_size)
        
        out, hidden = self.lstm(x, hidden)
        
        #pass out to fully connected layer
        out = self.fc(out.reshape(-1,out.shape[-1]))
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        return (torch.zeros((self.n_layers,batch_size, self.hidden_dim),device = device),
                torch.zeros((self.n_layers,batch_size, self.hidden_dim),device = device))

In [13]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU not available, CPU used


In [14]:
# define train function

def dataloader(X, y, batch_size = 100):
    data = TensorDataset(X, y)
    
    if batch_size > X.shape[0]:
        batch_size = X.shape[0]
    
    loader = DataLoader(data, shuffle = True, batch_size = batch_size, drop_last = True)
    return loader


def train_epoch(train_loader, validation_loader, learn_rate, \
                hidden_dim, n_layers, drop_prob, \
                device = device, EPOCHS = 100, output_dim = 2, method = "RNN"):
    
    #input_dim
    
    input_dim = next(iter(train_loader))[0].shape[2]
    
    #instantiating the models
    if method == "RNN":
        model = RNNModel(input_dim, hidden_dim, output_dim,n_layers = n_layers,drop_prob=drop_prob)
    if method == "GRU":
        model = GRUModel(input_dim, hidden_dim, output_dim,n_layers = n_layers,drop_prob=drop_prob)
    elif method == "LSTM":
        model = LSTMModel(input_dim, hidden_dim, output_dim,n_layers = n_layers,drop_prob=drop_prob)
        
    model.to(device)
    
    # loss criterion and optimizer

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = learn_rate)
    
    train_loss = []
    val_loss = []
    # train model
    
    model.train()
    
    print('Starting training of {} model'.format(method))
    
    #Start training loop
    
    for epoch in range(1, EPOCHS + 1):
        
        batch_train_losses = []
        batch_val_losses = []
        
        for x, label in train_loader:
            
            x, label = x.to(device), label.to(device)
            
            model.zero_grad()
            
            label = label.reshape(-1,label.shape[-1])
        
            predictions = model(x)[0]
            
            predictions = predictions.to(device)
        
            loss = criterion(predictions, label)
            
            batch_train_losses.append(loss.detach().numpy())
            # backpropagation
            loss.backward() 
            # Updates the weights accordingly
            optimizer.step()
        
        train_loss.append(np.mean(batch_train_losses))
        
        with torch.no_grad():
            
            model.eval()
            
            for x, label in validation_loader:
                
                x, label = x.to(device), label.to(device)
                
                label = label.reshape(-1,label.shape[-1])
                
                out = model(x)[0]
                
                out = out.to(device)
                
                loss_val = criterion(out, label)
                
                batch_val_losses.append(loss_val.numpy())
        
        val_loss.append(np.mean(batch_val_losses))
        
        if epoch%10 == 0:
            print('Epoch: {}/{}.............'.format(epoch, EPOCHS), end=' ')
            print("Train Loss: {:.4f}".format(train_loss[epoch-1]))
            print("Validation Loss: {:.4f}".format(val_loss[epoch-1]))
            
        ##stopping rule?
        
    return model,train_loss,val_loss


In [15]:
def test_metrics(model, data, id_list, features, out_var = "was_preterm", multi_outcome = True):
    
    test_X = tensor_generator(data, id_list,features, out_var, label_smooth = False, multi_outcome = multi_outcome)[0]
    
    test_y = outcome_transform(data ,id_list, multi_outcome = False, out_var = "was_preterm",label_smooth = False).to_numpy()

    model.eval()
    
    if multi_outcome:
        out = model(test_X)[0]
        predicted_props = nn.functional.softmax(out.reshape(test_X.shape[0],test_X.shape[1],2),dim = 2)[:,-1,0].detach().numpy()
        predicted_labels = 1*(predicted_props >0.5)
    else:
        out = model(test_X)
        predicted_props = nn.functional.softmax(out, dim = 1)[:,0].detach().numpy()
        predicted_labels = 1*(predicted_props >0.5)
    
    
    result_tab = pd.DataFrame(data = [predicted_props,predicted_labels,test_y],
                             columns = id_list,
                             index= ['predicted_prop','predicted_y','y']).T
    
    acc = metrics.accuracy_score(test_y, predicted_labels, normalize=False) / float(test_y.size)
    confusion = metrics.confusion_matrix(test_y, predicted_labels)
    

    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    
    specificity = TN / (TN + FP)
    sensitivity = TP / (TP + FN)
    precision = TP/(TP + FP)
    
    auc = metrics.roc_auc_score(test_y, predicted_props)
    
    
    return acc,sensitivity,specificity,auc,precision,result_tab


In [16]:
def project_cross_val(features,train_ratio = 0.7, learn_rate = 0.01,hidden_dim = 18, n_layers = 1, device = device, \
                      batch_size = 1000,drop_prob = 0.2, EPOCHS = 1000, output_dim = 2, method = "RNN", \
                      out_var = "was_preterm",label_smooth= True, multi_outcome= True):
    
    ##check argument validity
    if out_var != "was_preterm" and out_var != "was_early_preterm":
        raise ValueError("outcome must be was_preterm or was_early_preterm")
    
    ##obtain data by goal
    if out_var == "was_preterm":
        data = mydata_preterm
    else:
        data = mydata_epreterm
        
    project_list = data['project'].unique()
     ##accuracy, sen, spe, auc score
    train_loss = {}
    val_loss = {}
    acc = {}
    sen = {}
    spe = {}
    auc = {}
    precision = {}
    result_tabs = {}
    
    
    for project in project_list:
        train_list = data[data['project']!= project].groupby('participant_id').first().sort_index(axis = 0)
        train_id = list(train_list.groupby(out_var).sample(frac = train_ratio, random_state = 100).sort_index(axis = 0).index)
        val_id = np.setdiff1d(list(train_list.index),train_id)
        test_id = list(data[data['project']== project].groupby('participant_id').first().sort_index(axis = 0).index)
        train_x, train_y = tensor_generator(data,train_id,features = features, \
                                            out_var= "was_preterm", \
                                            label_smooth= label_smooth, multi_outcome= multi_outcome)
        val_x, val_y = tensor_generator(data,val_id,features = features, \
                                            out_var= "was_preterm", \
                                            label_smooth= label_smooth, multi_outcome= multi_outcome)
        
        train_dataloader = dataloader(train_x, train_y,batch_size =  batch_size)
        val_dataloader = dataloader(val_x, val_y,batch_size =  batch_size)
        model, train_loss[project],val_loss[project] = train_epoch(train_dataloader,val_dataloader, learn_rate = learn_rate, \
                hidden_dim = hidden_dim, n_layers = n_layers, drop_prob = drop_prob, \
                device = device, EPOCHS = EPOCHS, output_dim = output_dim, method = method)
        
        acc[project],sen[project],spe[project],auc[project],precision[project],result_tabs[project] = \
        test_metrics(model, data,test_id,features, out_var = out_var, multi_outcome = multi_outcome)
    
    return train_loss,val_loss, result_tabs ,pd.DataFrame([acc,sen,spe,precision,auc], \
                                                 index = ['accuracy','sensitivity','specificity','precision','AUC'])
    
   
    

In [17]:
train_loss_GRU,val_loss_GRU,result_tabs_GRU,metrics_GRU = project_cross_val(['shannon','bwpd','CST','Lactobacillus'], \
                                                    train_ratio = 0.8,learn_rate = 0.01,hidden_dim = 10, n_layers = 2, device = device, \
                                                    batch_size = 1000, drop_prob = 0.1,EPOCHS = 120, output_dim = 2, method = "GRU", \
                                                    out_var = "was_preterm",label_smooth= True, multi_outcome= True)

Starting training of GRU model
Epoch: 10/120............. Train Loss: 0.1389
Validation Loss: 0.1520
Epoch: 20/120............. Train Loss: 0.1295
Validation Loss: 0.1358
Epoch: 30/120............. Train Loss: 0.1243
Validation Loss: 0.1308
Epoch: 40/120............. Train Loss: 0.1224
Validation Loss: 0.1281
Epoch: 50/120............. Train Loss: 0.1213
Validation Loss: 0.1269
Epoch: 60/120............. Train Loss: 0.1207
Validation Loss: 0.1280
Epoch: 70/120............. Train Loss: 0.1203
Validation Loss: 0.1271
Epoch: 80/120............. Train Loss: 0.1200
Validation Loss: 0.1258
Epoch: 90/120............. Train Loss: 0.1197
Validation Loss: 0.1260
Epoch: 100/120............. Train Loss: 0.1194
Validation Loss: 0.1262
Epoch: 110/120............. Train Loss: 0.1191
Validation Loss: 0.1267
Epoch: 120/120............. Train Loss: 0.1188
Validation Loss: 0.1270
Starting training of GRU model
Epoch: 10/120............. Train Loss: 0.1522
Validation Loss: 0.1572
Epoch: 20/120............

  precision = TP/(TP + FP)


Starting training of GRU model
Epoch: 10/120............. Train Loss: 0.1225
Validation Loss: 0.1247
Epoch: 20/120............. Train Loss: 0.1186
Validation Loss: 0.1209
Epoch: 30/120............. Train Loss: 0.1172
Validation Loss: 0.1188
Epoch: 40/120............. Train Loss: 0.1165
Validation Loss: 0.1184
Epoch: 50/120............. Train Loss: 0.1160
Validation Loss: 0.1182
Epoch: 60/120............. Train Loss: 0.1153
Validation Loss: 0.1183
Epoch: 70/120............. Train Loss: 0.1145
Validation Loss: 0.1180
Epoch: 80/120............. Train Loss: 0.1139
Validation Loss: 0.1181
Epoch: 90/120............. Train Loss: 0.1135
Validation Loss: 0.1185
Epoch: 100/120............. Train Loss: 0.1132
Validation Loss: 0.1185
Epoch: 110/120............. Train Loss: 0.1129
Validation Loss: 0.1185
Epoch: 120/120............. Train Loss: 0.1126
Validation Loss: 0.1185


  precision = TP/(TP + FP)


Starting training of GRU model
Epoch: 10/120............. Train Loss: 0.1243
Validation Loss: 0.1290
Epoch: 20/120............. Train Loss: 0.1200
Validation Loss: 0.1204
Epoch: 30/120............. Train Loss: 0.1168
Validation Loss: 0.1178
Epoch: 40/120............. Train Loss: 0.1162
Validation Loss: 0.1161
Epoch: 50/120............. Train Loss: 0.1153
Validation Loss: 0.1157
Epoch: 60/120............. Train Loss: 0.1148
Validation Loss: 0.1154
Epoch: 70/120............. Train Loss: 0.1144
Validation Loss: 0.1146
Epoch: 80/120............. Train Loss: 0.1139
Validation Loss: 0.1140
Epoch: 90/120............. Train Loss: 0.1135
Validation Loss: 0.1135
Epoch: 100/120............. Train Loss: 0.1130
Validation Loss: 0.1130
Epoch: 110/120............. Train Loss: 0.1125
Validation Loss: 0.1126
Epoch: 120/120............. Train Loss: 0.1120
Validation Loss: 0.1123


  precision = TP/(TP + FP)


Starting training of GRU model
Epoch: 10/120............. Train Loss: 0.1512
Validation Loss: 0.1501
Epoch: 20/120............. Train Loss: 0.1310
Validation Loss: 0.1324
Epoch: 30/120............. Train Loss: 0.1251
Validation Loss: 0.1296
Epoch: 40/120............. Train Loss: 0.1236
Validation Loss: 0.1269
Epoch: 50/120............. Train Loss: 0.1222
Validation Loss: 0.1256
Epoch: 60/120............. Train Loss: 0.1211
Validation Loss: 0.1246
Epoch: 70/120............. Train Loss: 0.1203
Validation Loss: 0.1237
Epoch: 80/120............. Train Loss: 0.1197
Validation Loss: 0.1227
Epoch: 90/120............. Train Loss: 0.1193
Validation Loss: 0.1224
Epoch: 100/120............. Train Loss: 0.1189
Validation Loss: 0.1219
Epoch: 110/120............. Train Loss: 0.1184
Validation Loss: 0.1214
Epoch: 120/120............. Train Loss: 0.1180
Validation Loss: 0.1210


  precision = TP/(TP + FP)


Starting training of GRU model
Epoch: 10/120............. Train Loss: 0.1556
Validation Loss: 0.1457
Epoch: 20/120............. Train Loss: 0.1360
Validation Loss: 0.1350
Epoch: 30/120............. Train Loss: 0.1298
Validation Loss: 0.1285
Epoch: 40/120............. Train Loss: 0.1270
Validation Loss: 0.1269
Epoch: 50/120............. Train Loss: 0.1253
Validation Loss: 0.1252
Epoch: 60/120............. Train Loss: 0.1242
Validation Loss: 0.1243
Epoch: 70/120............. Train Loss: 0.1233
Validation Loss: 0.1237
Epoch: 80/120............. Train Loss: 0.1224
Validation Loss: 0.1231
Epoch: 90/120............. Train Loss: 0.1217
Validation Loss: 0.1229
Epoch: 100/120............. Train Loss: 0.1211
Validation Loss: 0.1228
Epoch: 110/120............. Train Loss: 0.1204
Validation Loss: 0.1228
Epoch: 120/120............. Train Loss: 0.1198
Validation Loss: 0.1229
Starting training of GRU model
Epoch: 10/120............. Train Loss: 0.1586
Validation Loss: 0.1493
Epoch: 20/120............

  precision = TP/(TP + FP)


In [18]:
print(metrics_GRU)

                    A         B         C         D         E         F  \
accuracy     0.888889  0.696970  0.204545  0.202899  0.753846  0.472222   
sensitivity  0.666667  0.000000  0.000000  0.000000  0.000000  0.380952   
specificity  0.933333  1.000000  1.000000  1.000000  1.000000  0.509804   
precision    0.666667       NaN       NaN       NaN       NaN  0.242424   
AUC          0.894444  0.582609  0.550000  0.502597  0.524872  0.494865   

                    G         H         I         J  
accuracy     0.568182  0.552083  0.520305  0.819277  
sensitivity  0.183673  0.307692  0.580247  0.000000  
specificity  0.795181  0.719298  0.504792  1.000000  
precision    0.346154  0.428571  0.232673       NaN  
AUC          0.539956  0.518893  0.562103  0.519118  


In [19]:
train_loss_LSTM,val_loss_LSTM,result_tabs_LSTM,metrics_LSTM = project_cross_val(['shannon','bwpd','CST','Lactobacillus'], \
                                                    train_ratio = 0.8,learn_rate = 0.01,hidden_dim = 10, n_layers = 2, device = device, \
                                                    batch_size = 1000, drop_prob = 0.1,EPOCHS = 120, output_dim = 2, method = "LSTM", \
                                                    out_var = "was_preterm",label_smooth= True, multi_outcome= True)

Starting training of LSTM model
Epoch: 10/120............. Train Loss: 0.1675
Validation Loss: 0.1564
Epoch: 20/120............. Train Loss: 0.1396
Validation Loss: 0.1419
Epoch: 30/120............. Train Loss: 0.1317
Validation Loss: 0.1361
Epoch: 40/120............. Train Loss: 0.1292
Validation Loss: 0.1335
Epoch: 50/120............. Train Loss: 0.1287
Validation Loss: 0.1328
Epoch: 60/120............. Train Loss: 0.1282
Validation Loss: 0.1325
Epoch: 70/120............. Train Loss: 0.1276
Validation Loss: 0.1323
Epoch: 80/120............. Train Loss: 0.1267
Validation Loss: 0.1317
Epoch: 90/120............. Train Loss: 0.1249
Validation Loss: 0.1289
Epoch: 100/120............. Train Loss: 0.1223
Validation Loss: 0.1310
Epoch: 110/120............. Train Loss: 0.1205
Validation Loss: 0.1308
Epoch: 120/120............. Train Loss: 0.1198
Validation Loss: 0.1276
Starting training of LSTM model
Epoch: 10/120............. Train Loss: 0.1305
Validation Loss: 0.1272
Epoch: 20/120..........

  precision = TP/(TP + FP)


Starting training of LSTM model
Epoch: 10/120............. Train Loss: 0.1579
Validation Loss: 0.1458
Epoch: 20/120............. Train Loss: 0.1299
Validation Loss: 0.1278
Epoch: 30/120............. Train Loss: 0.1225
Validation Loss: 0.1227
Epoch: 40/120............. Train Loss: 0.1204
Validation Loss: 0.1210
Epoch: 50/120............. Train Loss: 0.1196
Validation Loss: 0.1210
Epoch: 60/120............. Train Loss: 0.1191
Validation Loss: 0.1210
Epoch: 70/120............. Train Loss: 0.1184
Validation Loss: 0.1204
Epoch: 80/120............. Train Loss: 0.1175
Validation Loss: 0.1199
Epoch: 90/120............. Train Loss: 0.1165
Validation Loss: 0.1190
Epoch: 100/120............. Train Loss: 0.1151
Validation Loss: 0.1178
Epoch: 110/120............. Train Loss: 0.1135
Validation Loss: 0.1156
Epoch: 120/120............. Train Loss: 0.1122
Validation Loss: 0.1142
Starting training of LSTM model
Epoch: 10/120............. Train Loss: 0.1428
Validation Loss: 0.1376
Epoch: 20/120..........

In [20]:
print(metrics_LSTM)

                    A         B         C         D         E         F  \
accuracy     0.805556  0.666667  0.204545  0.246377  0.261538  0.569444   
sensitivity  0.833333  0.100000  0.000000  0.054545  1.000000  0.333333   
specificity  0.800000  0.913043  1.000000  1.000000  0.020408  0.666667   
precision    0.454545  0.333333       NaN  1.000000  0.250000  0.291667   
AUC          0.855556  0.669565  0.403175  0.584416  0.550383  0.563025   

                    G         H         I         J  
accuracy     0.606061  0.614583  0.271574  0.807229  
sensitivity  0.081633  0.410256  0.925926  0.000000  
specificity  0.915663  0.754386  0.102236  0.985294  
precision    0.363636  0.533333  0.210674  0.000000  
AUC          0.558643  0.578722  0.557607  0.525980  


In [21]:
train_loss_RNN,val_loss_RNN,result_tabs_RNN,metrics_RNN = project_cross_val(['shannon','bwpd','CST','Lactobacillus'], \
                                                    train_ratio = 0.8,learn_rate = 0.01,hidden_dim = 10, n_layers = 2, device = device, \
                                                    batch_size = 1000, drop_prob = 0.1,EPOCHS = 120, output_dim = 2, method = "RNN", \
                                                    out_var = "was_preterm",label_smooth= True, multi_outcome= True)

Starting training of RNN model
Epoch: 10/120............. Train Loss: 0.1300
Validation Loss: 0.1335
Epoch: 20/120............. Train Loss: 0.1269
Validation Loss: 0.1328
Epoch: 30/120............. Train Loss: 0.1264
Validation Loss: 0.1310
Epoch: 40/120............. Train Loss: 0.1254
Validation Loss: 0.1309
Epoch: 50/120............. Train Loss: 0.1245
Validation Loss: 0.1298
Epoch: 60/120............. Train Loss: 0.1239
Validation Loss: 0.1293
Epoch: 70/120............. Train Loss: 0.1235
Validation Loss: 0.1293
Epoch: 80/120............. Train Loss: 0.1231
Validation Loss: 0.1288
Epoch: 90/120............. Train Loss: 0.1227
Validation Loss: 0.1287
Epoch: 100/120............. Train Loss: 0.1223
Validation Loss: 0.1284
Epoch: 110/120............. Train Loss: 0.1219
Validation Loss: 0.1280
Epoch: 120/120............. Train Loss: 0.1215
Validation Loss: 0.1280
Starting training of RNN model
Epoch: 10/120............. Train Loss: 0.1263
Validation Loss: 0.1243
Epoch: 20/120............

  precision = TP/(TP + FP)


Starting training of RNN model
Epoch: 10/120............. Train Loss: 0.1541
Validation Loss: 0.1428
Epoch: 20/120............. Train Loss: 0.1323
Validation Loss: 0.1289
Epoch: 30/120............. Train Loss: 0.1295
Validation Loss: 0.1283
Epoch: 40/120............. Train Loss: 0.1277
Validation Loss: 0.1270
Epoch: 50/120............. Train Loss: 0.1268
Validation Loss: 0.1267
Epoch: 60/120............. Train Loss: 0.1262
Validation Loss: 0.1262
Epoch: 70/120............. Train Loss: 0.1259
Validation Loss: 0.1261
Epoch: 80/120............. Train Loss: 0.1255
Validation Loss: 0.1259
Epoch: 90/120............. Train Loss: 0.1251
Validation Loss: 0.1257
Epoch: 100/120............. Train Loss: 0.1246
Validation Loss: 0.1257
Epoch: 110/120............. Train Loss: 0.1239
Validation Loss: 0.1256
Epoch: 120/120............. Train Loss: 0.1231
Validation Loss: 0.1255


  precision = TP/(TP + FP)


Starting training of RNN model
Epoch: 10/120............. Train Loss: 0.1352
Validation Loss: 0.1337
Epoch: 20/120............. Train Loss: 0.1326
Validation Loss: 0.1281
Epoch: 30/120............. Train Loss: 0.1308
Validation Loss: 0.1256
Epoch: 40/120............. Train Loss: 0.1298
Validation Loss: 0.1242
Epoch: 50/120............. Train Loss: 0.1291
Validation Loss: 0.1232
Epoch: 60/120............. Train Loss: 0.1286
Validation Loss: 0.1224
Epoch: 70/120............. Train Loss: 0.1282
Validation Loss: 0.1218
Epoch: 80/120............. Train Loss: 0.1279
Validation Loss: 0.1212
Epoch: 90/120............. Train Loss: 0.1276
Validation Loss: 0.1209
Epoch: 100/120............. Train Loss: 0.1273
Validation Loss: 0.1207
Epoch: 110/120............. Train Loss: 0.1270
Validation Loss: 0.1206
Epoch: 120/120............. Train Loss: 0.1267
Validation Loss: 0.1205


  precision = TP/(TP + FP)


Starting training of RNN model
Epoch: 10/120............. Train Loss: 0.1341
Validation Loss: 0.1312
Epoch: 20/120............. Train Loss: 0.1246
Validation Loss: 0.1246
Epoch: 30/120............. Train Loss: 0.1233
Validation Loss: 0.1231
Epoch: 40/120............. Train Loss: 0.1225
Validation Loss: 0.1225
Epoch: 50/120............. Train Loss: 0.1218
Validation Loss: 0.1224
Epoch: 60/120............. Train Loss: 0.1211
Validation Loss: 0.1221
Epoch: 70/120............. Train Loss: 0.1206
Validation Loss: 0.1223
Epoch: 80/120............. Train Loss: 0.1203
Validation Loss: 0.1223
Epoch: 90/120............. Train Loss: 0.1199
Validation Loss: 0.1221
Epoch: 100/120............. Train Loss: 0.1195
Validation Loss: 0.1218
Epoch: 110/120............. Train Loss: 0.1191
Validation Loss: 0.1214
Epoch: 120/120............. Train Loss: 0.1186
Validation Loss: 0.1211
Starting training of RNN model
Epoch: 10/120............. Train Loss: 0.1470
Validation Loss: 0.1513
Epoch: 20/120............

In [22]:
print(metrics_RNN)

                    A         B         C         D         E         F  \
accuracy     0.861111  0.666667  0.238636  0.275362  0.753846  0.708333   
sensitivity  0.166667  0.100000  0.042857  0.090909  0.000000  0.000000   
specificity  1.000000  0.913043  1.000000  1.000000  1.000000  1.000000   
precision    1.000000  0.333333  1.000000  1.000000       NaN       NaN   
AUC          0.855556  0.565217  0.601587  0.610390  0.584821  0.384687   

                    G         H         I         J  
accuracy     0.628788  0.562500  0.446701  0.819277  
sensitivity  0.000000  0.153846  0.629630  0.066667  
specificity  1.000000  0.842105  0.399361  0.985294  
precision         NaN  0.400000  0.213389  0.500000  
AUC          0.587903  0.525191  0.561985  0.492647  
