In [1]:
import torch, os, warnings
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from util import data_process
from sklearn.metrics import f1_score, recall_score, roc_auc_score, confusion_matrix, accuracy_score
warnings.filterwarnings("ignore")
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [2]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=1):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)  # 第一層全連接層
        self.relu = nn.ReLU()                         # ReLU 激活函數
        self.fc2 = nn.Linear(hidden_size, hidden_size) # 第二層全連接層
        self.dp1 = nn.Dropout(0.25)
        self.fc3 = nn.Linear(hidden_size, output_size) # 輸出層

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dp1(self.fc2(out))
        out = self.relu(out)
        out = self.fc3(out)
        return out

In [3]:
def trainer(model = None, TrainData = None, ValidData = None, epochs = 20, opt = None, crit = None, save_path = None, fold_n = None):
    best_recall = 0
    best_loss = np.inf
    patient = 1
    # lambda1 = lambda epoch: opt.param_groups[0]['lr']*0.2 * epoch if epoch > 5 else 1
    lambda2 = lambda epoch: 0.9 **epoch if epoch > 5 else 1
    # scheduler = optim.lr_scheduler.LambdaLR(opt, lr_lambda=lambda1)
    scheduler = optim.lr_scheduler.MultiplicativeLR(opt, lr_lambda=lambda2)
    for epoch in range(epochs):
        model.train()
        for i, (features, labels) in enumerate(TrainData):
            # 向前傳播
            outputs = model(features).squeeze(1)  # 輸出需要匹配 labels 的 shape
            loss = crit(outputs, labels)

            # 向後傳播和優化
            opt.zero_grad()
            loss.backward()
            opt.step()
        scheduler.step()
        if epoch%5==0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}, Learning Rate: {scheduler.get_last_lr()[0]}')
        model.eval()
        y_true = []
        y_pred = []
        y_probs = []
        y_loss = []
        with torch.no_grad():
            total = 0
            for features, labels in ValidData:
                outputs = model(features).squeeze(1)
                loss = crit(outputs, labels)
                probs = torch.sigmoid(outputs)
                predictions = probs > 0.5
                total += labels.size(0)
                
                y_true.extend(labels.tolist())
                y_pred.extend(predictions.tolist())
                y_probs.extend(probs.tolist())
                y_loss.append(loss.item())
            # print(y_true)
            cm = confusion_matrix(y_true, y_pred)
            tn, fp, fn, tp = cm.ravel()
            accuracy = accuracy_score(y_true, y_pred)
            f1 = f1_score(y_true, y_pred, average='macro')
            recall = recall_score(y_true, y_pred, average='macro')
            auc = roc_auc_score(y_true, y_probs)
            mean_loss = np.mean(y_loss)
            # print(mean_loss)
            # if recall > best_recall:
            if mean_loss < best_loss:
                patient=0
                # best_recall = recall
                best_loss = mean_loss
                os.makedirs(save_path, exist_ok=True)
                torch.save(model.state_dict(), os.path.join(save_path,f'best_model_cv{fold_n}.pth'))  # 儲存最佳模型權重
                # print(f'New best model found at epoch {epoch+1} with recall: {best_recall:.4f}. Model saved.')
                print(f'New best model found at epoch {epoch+1} with loss: {best_loss:.4f}. Model saved.')
                print(f'Accuracy of the model on the test set: {accuracy:.2f}% / Macro F1 Score: {f1:.4f} / Macro Recall: {recall:.4f} / AUC: {auc:.4f}')
                print(f'True Positive: {tp}({tp+fn}), False Negative: {fn}({fn+tp}), True Negative: {tn}, False Positive: {fp}')
        #     else:
        #         patient+=1
        #         # print(f"Early Stopping patient:{patient}/10")
        # if patient==10:
        #     print("Early Stopping!!!!!!!!!!")
            # break
    return model

In [4]:
DB_type = 'A'
flag = ['train','val']


Data_ = {
    'A':'../dataset/DATA_A_FinalFinished[2024-09-27-filter-age]fixed.csv',
    'B':'../dataset/DATA_B_FinalFinished[2024-09-27-filter-age]fixed.csv',
    'C':'../dataset/DATA_C_FinalFinished[2024-09-27-filter-age]fixed.csv'
    }

select_cols =  ['性別', '入院方式', 'HCV','HBV','有無糖尿病','FISTULA','GRAFT','Catheter','Intact PTH','age', '體重1開始','開始血壓SBP', '開始血壓DBP',
            '開始脈搏', '體溫', '體重實際脫水','每公斤脫水量(ml/kg)','BUN','K', 'HGB','URR%','Na', 'Ca','P',
            '透析液 Ca','ALBUMIN','ALT (SGPT)','Alk.phosphatase','Ferritin','IRON/TIBC','MCV', 'MCHC', 'MCH','Iron','Glucose AC','RBC', 'WBC',
            'Platelet', 'Creatinine','AST (SGOT)','TIBC','Bilirubin-T', 'Cholesterol-T', 'CRP']
print(len(select_cols))
# select_cols = select_cols+ ['Max Diff mbp', 'Max Diff sbp','結束脈搏','Final Judge','Raw Index','ID','洗腎紀錄時間去時分',
#                             'fold_0','fold_1', 'fold_2', 'fold_3', 'fold_4']

select_cols = select_cols+ ['Final Judge','Raw Index','ID','洗腎紀錄時間去時分',
                            'fold_0','fold_1', 'fold_2', 'fold_3', 'fold_4']

hidden_size = 128
epoch = 100
save_path = './mlp_ckpt' #path 1
save_path += '/nonmark' #path 2

for DB_type in ['A','B','C']:
    print(DB_type, Data_[DB_type])
    for fold_next in [0,1,2,3,4]:
        if DB_type =='A':
            cat_col_names = ['入院方式', '性別', '體溫', 'FISTULA', 'GRAFT', 'Catheter', '有無糖尿病','Intact PTH', 'HCV', 'HBV']
        else:
            cat_col_names = ['入院方式', '性別', 'FISTULA', 'GRAFT', 'Catheter', '有無糖尿病', 'HCV', 'HBV']
        DATA_PROCESS = data_process()
        train_X, train_y, cat_cols, num_cols = DATA_PROCESS.data_loader(Data_[DB_type], 'train', fold_next, select_cols ,cat_col_names)
        val_X, val_y, _, _ = DATA_PROCESS.data_loader(Data_[DB_type], 'val', fold_next, select_cols ,cat_col_names)
        X_train_tensor = torch.tensor(train_X.values, dtype=torch.float32)
        y_train_tensor = torch.tensor(train_y.values, dtype=torch.float32)
        X_val_tensor = torch.tensor(val_X.values, dtype=torch.float32)
        y_val_tensor = torch.tensor(val_y.values, dtype=torch.float32)
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        valid_dataset = TensorDataset(X_val_tensor, y_val_tensor)

        train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
        valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)
        input_size = train_X.shape[1]
        model = MLP(input_size, hidden_size, output_size=1)
        criterion = nn.BCEWithLogitsLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        print(f"DATA: {DB_type} | Fold CV: {fold_next+1}")
        model = trainer(model=model, save_path = os.path.join(f'{save_path}_{DB_type}'), fold_n = fold_next,
                        TrainData=train_loader, ValidData=valid_loader, 
                        epochs=epoch, crit=criterion, opt=optimizer)
        # break

44
A ../dataset/DATA_A_FinalFinished[2024-09-27-filter-age]fixed.csv
DATA: A | Fold CV: 1
Epoch [1/100], Loss: 0.3677, Learning Rate: 0.001
New best model found at epoch 1 with loss: 0.4222. Model saved.
Accuracy of the model on the test set: 0.84% / Macro F1 Score: 0.5081 / Macro Recall: 0.5210 / AUC: 0.6565
True Positive: 407(6625), False Negative: 6218(6625), True Negative: 35333, False Positive: 699
New best model found at epoch 3 with loss: 0.4168. Model saved.
Accuracy of the model on the test set: 0.84% / Macro F1 Score: 0.5315 / Macro Recall: 0.5345 / AUC: 0.6756
True Positive: 618(6625), False Negative: 6007(6625), True Negative: 35156, False Positive: 876
Epoch [6/100], Loss: 0.2755, Learning Rate: 0.000531441
Epoch [11/100], Loss: 0.1958, Learning Rate: 4.6383976865881075e-06
Epoch [16/100], Loss: 0.3185, Learning Rate: 2.9063214161987073e-09
Epoch [21/100], Loss: 0.3928, Learning Rate: 1.307320402228525e-13
Epoch [26/100], Loss: 0.2375, Learning Rate: 4.221659203144745e-19
