In [1]:
import pandas as pd, os, random, math
import torch, numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split
from sklearn import metrics
from torch.optim import Adam
from tqdm import tqdm
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cudnn.benchmark = True
random.seed(1234)
torch.manual_seed(1234)

<torch._C.Generator at 0x7f833cd45130>

In [2]:
class logs_realtime_reply:
    def __init__(self):
        self.avg_dice = 0.0
        self.avg_loss=np.inf
        self.avg_tn = 0
        self.avg_fp = 0
        self.avg_fn = 0
        # self.running_metic = {"Loss":0, "TP":0, "FP":0, "FN": 0, "Spec": 0, "Sens": 0}
        # self.running_metic = {"Loss":0, "Accuracy":0, "Spec": 0, "Sens": 0, "AUC": 0}
        self.running_metic = {"Loss":0,"Accuracy":0, "AUC": 0}
        self.end_epoch_metric = None
    def metric_stack(self, inputs, targets, loss):
        with torch.no_grad():
            self.running_metic['Loss'] +=loss
            # metric setting
            SR = inputs.cpu().data.numpy()
            GT = targets.cpu().data.numpy()
            # print("SR", SR)
            # print("GT", GT)
            acc = metrics.accuracy_score(SR>0.5, GT)
            fpr, tpr, thresholds = metrics.roc_curve(GT, SR, pos_label=1)
            auc = metrics.auc(fpr, tpr)
            self.running_metic['Accuracy'] += round((acc), 5)
            self.running_metic['AUC'] += round((auc), 5)
    def mini_batch_reply(self, current_step, epoch, iter_len):
        # avg_reply_metric = {"Loss":None, "TP":None, "FP":None, "FN": None, "Spec": None, "Sens": None}
        avg_reply_metric = {"Loss":None, "Accuracy": None, "AUC": None}
        # avg_reply_metric = {"Loss":None, "Accuracy": None,"Spec": None, "Sens": None, "AUC": None}
        for j in avg_reply_metric:
            avg_reply_metric[j] = round(self.running_metic[j]/int(current_step),5)
        
        if current_step ==iter_len:
            self.end_epoch_metric = avg_reply_metric
        return avg_reply_metric

    def epoch_reply(self):
        return self.end_epoch_metric

In [3]:
class MLP(nn.Module):
    def __init__(self, num_classes, input_size):
        super(MLP,self).__init__()
        self.linear1 = nn.Linear(in_features=input_size, out_features=10)
        self.bn1 = nn.BatchNorm1d(10)
        self.dt1 = nn.Dropout(0.25)
        self.linear2 = nn.Linear(in_features=10, out_features=5)
        self.bn2 = nn.BatchNorm1d(5)
        self.dt2 = nn.Dropout(0.25)
        self.linear3 = nn.Linear(in_features=5, out_features=num_classes)
        
    def forward(self, x):
        x = self.bn1(self.linear1(x))
        x = F.relu(x)
        x = self.bn2(self.linear2(x))
        x = F.relu(x)
        x = self.linear3(x)
        x = torch.sigmoid(x)
        return x
        
    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.BatchNorm1d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                torch.nn.init.normal_(m.weight.data, 0, 0.01)
                m.bias.data.zero_()

In [4]:
def model_create():
    model = MLP(num_classes=1, input_size=25)
    model.initialize_weights()
    model.to(device)
    return model

In [5]:
def train(train_loader, model, criterion, optimizer, epoch):
    global best_tacc, best_tauc
    get_logs_reply = logs_realtime_reply()
    model.train()
    # stream = tqdm(train_loader)
   
    for i, (text, label) in enumerate(train_loader, start=1):
        images = text.to(device)
        target = label.to(device)
        output = model(images).squeeze(1)
        # print(output)
        loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        get_logs_reply.metric_stack(output, target, loss = round(loss.item(), 5))
        avg_reply_metric = get_logs_reply.mini_batch_reply(i, epoch, len(train_loader))
        avg_reply_metric['lr'] = optimizer.param_groups[0]['lr']
        # stream.set_description(f"Epoch: {epoch}. Train. {str(avg_reply_metric)}")
    avg_reply_metric = get_logs_reply.epoch_reply()
    
    for x in avg_reply_metric:
        if x =='Accuracy' and avg_reply_metric[x] > best_tacc:
            best_tacc = avg_reply_metric[x]
            current_loss = avg_reply_metric['Loss']
            save_ck_name = f'{ck_pth}/best - tacc - {project_name}.pt'
            torch.save({
                    'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 
                    'loss':  current_loss,}, save_ck_name)
            # print('save...', save_ck_name)
        if x=='AUC' and avg_reply_metric[x]>best_tauc:
            best_tauc = avg_reply_metric[x]
            current_loss = avg_reply_metric['Loss']
            best_ck_name = f'{ck_pth}/best - tauc - {project_name}.pt'
            torch.save({
                    'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 
                    'loss':  current_loss,}, best_ck_name)
            # print('save...', best_ck_name)
        # print(avg_reply_metric)
        writer.add_scalar(f'{x}/Train {x}', avg_reply_metric[x], epoch)

In [6]:
def  train_valid_process_main(model, training_set, batch_size):
    global best_tacc, best_tauc
    # best_tloss = np.inf
    best_tauc = 0.00
    best_tacc = 0.00
    # Subject Dataloader Building
    train_loader = DataLoader(training_set, batch_size=batch_size, shuffle=True, num_workers=10)
    for epoch in tqdm(range(1, params["epochs"] + 1)):
        train(train_loader, model, loss, optimizer, epoch)
    print("Train","Best accuracy:", best_tacc, ' Best auc:', best_tauc)
    return model

In [7]:
# data_df = pd.read_csv('./xlsx/local_sepsis.csv')
data_df = pd.read_csv('./xlsx/original_555 sepsis dataset.csv')
def dataloader(table):
    for i in table:
        if (i in ['ID','LOC','outcome'])==False:
            cols_filter = [x for x in table[i] if math.isnan(float(x))==False ]
            med = np.median(cols_filter)
            table[i] = [med if math.isnan(float(x))==True else x for x in table[i] ]
            min_cols, max_cols =np.min(cols_filter), np.max(cols_filter)
            normal = lambda x: (x - min_cols)/(max_cols - min_cols)
            table[i] = [normal(x) for x in table[i]]

    return table

In [8]:
import random 
# for i in range(50):
    # seed = random.randint(10,100)
X_train, X_test, y_train, y_test = train_test_split(data_df.drop(['outcome'],axis=1), data_df['outcome'], 
                                                                                                    test_size=0.25, stratify=list(data_df['outcome']), random_state=123) #seed = 42, 123
X_train = dataloader(X_train)
X_test = dataloader(X_test)
print('train', ' 0: ', len(y_train)-sum(y_train),'1:',sum(y_train))
print('valid', '0: ', len(y_test)-sum(y_test), '1:',sum(y_test))

try:
    X_train_ = np.array(X_train.drop(['ID','LOC'],axis=1))
    X_test_ = np.array(X_test.drop(['ID','LOC'],axis=1))
    y_train_ = np.array(y_train)
    y_test_ = np.array(y_test)
except:
    X_train_ = np.array(X_train.drop(['ID'],axis=1))
    X_test_ = np.array(X_test.drop(['ID'],axis=1))
    y_train_ = np.array(y_train)
    y_test_ = np.array(y_test)
print(X_train_.shape, X_test_.shape, y_train_.shape, y_test_.shape)

train  0:  385 1: 31
valid 0:  129 1: 10
(416, 25) (139, 25) (416,) (139,)


In [9]:

if True: #model record
    params = {
        "type": "Sepsis-original",
        "model": 'MLP', #baseline = 'resnet18'
        "model_depth": 3,
        "device": "cuda",
        "opt": "Adam",
        "lr": 0.003, #baseline = 0.003
        "batch_size": 256, #baseline resnet18 : 8
        "epochs": 150,
        "fixing": "None"
        }
training_set = TensorDataset(torch.FloatTensor(X_train_), torch.FloatTensor(y_train_))

# checkpoint setting
project_name = f"{params['type']} - {params['model']}{params['model_depth']} - lr_{params['lr']} - CEL"
project_folder = f"2021.12.10.t2 - Sepsis - MLP - {params['type']}"
ck_pth = f'./checkpoint/{project_folder}'
if os.path.exists(ck_pth)==False:
    os.mkdir(ck_pth)
ck_name = project_name
path = f'./checkpoint/{project_folder}/{project_name}.txt'
f = open(path, 'w')
lines = params
f.writelines([f'{i} : {params[i]} \n' for i in params])
f.close()
tensorboard_logdir = f'./logsdir/S2/ {project_folder} - {project_name}'
writer=SummaryWriter(tensorboard_logdir)
model = model_create()
loss = torch.nn.BCELoss()
if params['opt']=='Adam':
    optimizer = Adam(model.parameters(), lr=params['lr'], betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
else:
    optimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], weight_decay = 1e-4, momentum=0.9)
logs  = train_valid_process_main(model, training_set, params['batch_size'])
writer.close()

100%|██████████| 150/150 [01:02<00:00,  2.42it/s]

Train Best accuracy: 0.9336  Best auc: 0.90621





In [10]:
validation_set = TensorDataset(torch.FloatTensor(X_test_), torch.FloatTensor(y_test_))
test_loader = DataLoader(validation_set, batch_size=139, shuffle=False)
checkpoint_path = f'./checkpoint/{project_folder}/best - tauc - Sepsis-original - MLP3 - lr_0.003 - CEL.pt'
checkpoint = torch.load(checkpoint_path, map_location=torch.device(device))
model = model_create()
model.load_state_dict(checkpoint['model_state_dict'])
model.to('cpu')
with torch.no_grad():
    for i, (text, label) in enumerate(test_loader, start=1):
        output = model(text.to('cpu'))
        label = label
    acc = metrics.accuracy_score(output>0.5, label)
    fpr, tpr, thresholds = metrics.roc_curve(label, output, pos_label=1)
    auc = metrics.auc(fpr, tpr)
    print("///-------original model-------///")
    print("Accuracy:", round(acc,5), "\nAUC:", round(auc,5))

# Dataset Scale
# train  [0:  385, 1: 31]
# valid  [0: 129, 1: 10] (test)

# Test 結果
# 沒有初始化model weight
# Accuracy: 0.94964 
# AUC: 0.94109

# 有初始化model weight
# Accuracy: 0.92806 
# AUC: 0.89147

///-------original model-------///
Accuracy: 0.92806 
AUC: 0.89147
