In [1]:
import pandas as pd, os, random, math
import torch, numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from freeze import *
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split
from sklearn import metrics
from torch.optim import Adam
from tqdm import tqdm
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cudnn.benchmark = True
random.seed(1234)
torch.manual_seed(1234)

<torch._C.Generator at 0x7fdf440bbf50>

In [2]:
class MLP(nn.Module):
    def __init__(self, num_classes, input_size):
        super(MLP,self).__init__()
        self.linear1 = nn.Linear(in_features=input_size, out_features=10)
        self.bn1 = nn.BatchNorm1d(10)
        self.dt1 = nn.Dropout(0.25)
        self.linear2 = nn.Linear(in_features=10, out_features=5)
        self.bn2 = nn.BatchNorm1d(5)
        self.dt2 = nn.Dropout(0.25)
        self.linear3 = nn.Linear(in_features=5, out_features=num_classes)
        
    def forward(self, x):
        x = self.bn1(self.linear1(x))
        x = F.relu(x)
        x = self.bn2(self.linear2(x))
        x = F.relu(x)
        x = self.linear3(x)
        x = torch.sigmoid(x)
        return x
        
    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.BatchNorm1d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                torch.nn.init.normal_(m.weight.data, 0, 0.01)
                m.bias.data.zero_()

In [3]:
def model_create():
    model = MLP(num_classes=1, input_size=25)
    model.initialize_weights()
    model.to(device)
    return model

In [4]:
data_df_first = pd.read_csv('./xlsx/original_555 sepsis dataset.csv')
data_df = pd.read_csv('./xlsx/12-17-rhees death_within28days+feature.csv')

# data_df = pd.read_csv('./xlsx/local_sepsis.csv')
# data_df = pd.read_csv('./xlsx/original_555 sepsis dataset.csv')
def dataloader(table):
    for i in table:
        if (i in ['ID','LOC','outcome'])==False:
            # print(i)
            cols_filter = [x for x in table[i] if math.isnan(float(x))==False ]
            med = np.median(cols_filter)
            table[i] = [med if math.isnan(float(x))==True else x for x in table[i]]
            min_cols, max_cols =np.min(cols_filter), np.max(cols_filter)

            normal = lambda x: (x - min_cols)/(max_cols - min_cols)
            table[i] = [normal(x) for x in table[i]]
            table[i] = [0 if math.isnan(float(x))==True else x for x in table[i]]
    return table
data_df = dataloader(data_df)
data_df_first = dataloader(data_df_first)
# data_df_first

In [5]:
from sklearn.metrics import *
def CI(y_pred, y_true):
    n_bootstraps = 1000
    rng_seed = 42  # control reproducibility
    bootstrapped_scores = []
    rng = np.random.RandomState(rng_seed)
    for i in range(n_bootstraps):
        indices = rng.randint(0, len(y_pred), len(y_pred))
        if len(np.unique(y_true[indices])) < 2:
            continue
        score = roc_auc_score(y_true[indices], y_pred[indices])
        bootstrapped_scores.append(score)
    sorted_scores = np.array(bootstrapped_scores)
    sorted_scores.sort()
    confidence_lower = sorted_scores[int(0.025 * len(sorted_scores))]
    confidence_upper = sorted_scores[int(0.975 * len(sorted_scores))]
    return confidence_lower, confidence_upper

In [6]:
import random 

# checkpoint_path = ["../multi_connect/checkpoint/FeatureExtracte-best-weight-2021-12-21.pt", 
#                                         "../multi_connect/checkpoint/FeatureExtracte-best-weight-2021-12-21-Cleint-1.pt", 
#                                         "../multi_connect/checkpoint/FeatureExtracte-best-weight-2021-12-21-Cleint-2.pt",
#                                         "../multi_connect/checkpoint/FeatureExtracte-best-weight-2021-12-21-Cleint-3.pt"]
# checkpoint_path = ["../multi_connect/checkpoint/FeatureExtracte-best-weight-2021-12-21(Sampleweight).pt", 
#                                         "../multi_connect/checkpoint/FeatureExtracte-best-weight-2021-12-21(Sampleweight)-Cleint-1.pt", 
#                                         "../multi_connect/checkpoint/FeatureExtracte-best-weight-2021-12-21(Sampleweight)-Cleint-2.pt",
#                                         "../multi_connect/checkpoint/FeatureExtracte-best-weight-2021-12-21(Sampleweight)-Cleint-3.pt"]
checkpoint_path = ["../multi_connect/checkpoint/Freeze-best-weight-2021-12-21.pt", 
                                        "../multi_connect/checkpoint/Freeze-best-weight-2021-12-21-Cleint-1.pt", 
                                        "../multi_connect/checkpoint/Freeze-best-weight-2021-12-21-Cleint-2.pt",
                                        "../multi_connect/checkpoint/Freeze-best-weight-2021-12-21-Cleint-3.pt"]

for i in [0, 1,2,3]:
    
    if i ==0:
        X_train, X_test, y_train, y_test = train_test_split(data_df_first.drop(['outcome'],axis=1), data_df_first['outcome'], 
                                                                                                            test_size=0.25, stratify=list(data_df_first['outcome']), random_state=123) #seed = 42, 123
        print("Patient 555")
    else:
        data_df_LOC = data_df[data_df["LOC"]==i]
        X_train, X_test, y_train, y_test = train_test_split(data_df_LOC.drop(['outcome'],axis=1), data_df_LOC['outcome'], 
                                                                                                            test_size=0.25, stratify=list(data_df_LOC['outcome']), random_state=123) #seed = 42, 123
        print("LOC:", i)
    print('train', ' 0: ', len(y_train)-sum(y_train),'1:',sum(y_train))
    print('valid', '0: ', len(y_test)-sum(y_test), '1:',sum(y_test))

    try:
        X_train_ = np.array(X_train.drop(['ID','LOC'],axis=1))
        X_test_ = np.array(X_test.drop(['ID','LOC'],axis=1))
        y_train_ = np.array(y_train)
        y_test_ = np.array(y_test)
    except:
        X_train_ = np.array(X_train.drop(['ID'],axis=1))
        X_test_ = np.array(X_test.drop(['ID'],axis=1))
        y_train_ = np.array(y_train)
        y_test_ = np.array(y_test)
    print(X_train_.shape, X_test_.shape, y_train_.shape, y_test_.shape)
    training_set = TensorDataset(torch.FloatTensor(X_train_), torch.FloatTensor(y_train_))
    validation_set = TensorDataset(torch.FloatTensor(X_test_), torch.FloatTensor(y_test_))
# Test
    train_loader = DataLoader(training_set, batch_size=len(training_set), drop_last=False, shuffle=False)
    test_loader = DataLoader(validation_set, batch_size=len(validation_set), drop_last=False, shuffle=False)
     
    checkpoint = torch.load(checkpoint_path[i], map_location=torch.device('cpu'))
    model = model_create()
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to('cpu')
    with torch.no_grad():
        for i, (text, label) in enumerate(train_loader, start=1):
            output = model(text.to('cpu'))
            label = label
        acc = metrics.accuracy_score(output>0.5, label)
        fpr, tpr, thresholds = metrics.roc_curve(label, output, pos_label=1)
        auc = metrics.auc(fpr, tpr)
        print("///-------Transfer Pretrain model-------///")
        print("Accuracy:", round(acc,5), "\nAUC:", round(auc,5), '[',CI(np.array(output),np.array(label).astype(np.int8)),']')
        print("///-----------------Train End-----------------///")
        for i, (text, label) in enumerate(test_loader, start=1):
            output = model(text.to('cpu'))
            label = label
        acc = metrics.accuracy_score(output>0.5, label)
        fpr, tpr, thresholds = metrics.roc_curve(label, output, pos_label=1)
        auc = metrics.auc(fpr, tpr)
        print("Accuracy:", round(acc,5), "\nAUC:", round(auc,5), '[',CI(np.array(output),np.array(label).astype(np.int8)),']')
        print("///-----------------Test End-----------------///")


Patient 555
train  0:  385 1: 31
valid 0:  129 1: 10
(416, 25) (139, 25) (416,) (139,)
///-------Transfer Pretrain model-------///
Accuracy: 0.98317 
AUC: 1.0 [ (0.9999999999999999, 1.0) ]
///-----------------Train End-----------------///
Accuracy: 0.94245 
AUC: 0.77171 [ (0.517175572519084, 0.9717054263565892) ]
///-----------------Test End-----------------///
LOC: 1
train  0:  908 1: 34
valid 0:  303 1: 11
(942, 25) (314, 25) (942,) (314,)
///-------Transfer Pretrain model-------///
Accuracy: 0.96391 
AUC: 0.78523 [ (0.6973823123358869, 0.8626747608535688) ]
///-----------------Train End-----------------///
Accuracy: 0.96497 
AUC: 0.78833 [ (0.5707142857142857, 0.9577413479052823) ]
///-----------------Test End-----------------///
LOC: 2
train  0:  1390 1: 303
valid 0:  464 1: 101
(1693, 25) (565, 25) (1693,) (565,)
///-------Transfer Pretrain model-------///
Accuracy: 0.82103 
AUC: 0.742 [ (0.7117736259665695, 0.7737449201190248) ]
///-----------------Train End-----------------///
A