In [4]:
import torch, os, warnings
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from util import data_process
from sklearn.metrics import f1_score, recall_score, roc_auc_score, confusion_matrix, accuracy_score
warnings.filterwarnings("ignore")
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [2]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=1):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)  # 第一層全連接層
        self.relu = nn.ReLU()                         # ReLU 激活函數
        self.fc2 = nn.Linear(hidden_size, hidden_size) # 第二層全連接層
        self.fc3 = nn.Linear(hidden_size, output_size) # 輸出層

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out

In [3]:
DB_type = 'A'
flag = ['train','val']


Data_ = {
    'A':'../dataset/DATA_A_FinalFinished[2024-09-27-filter-age]fixed.csv',
    'B':'../dataset/DATA_B_FinalFinished[2024-09-27-filger-age]fixed.csv',
    'C':'../dataset/DATA_C_FinalFinished[2024-09-27-filter-age]fixed.csv'
    }

Data_Test = {
    'Test':'../dataset/DATA_test[2024-09-27_from_DATA_A].csv',
    }

select_cols =  ['性別', '入院方式', 'HCV','HBV','有無糖尿病','FISTULA','GRAFT','Catheter','Intact PTH','age', '體重1開始','開始血壓SBP', '開始血壓DBP',
            '開始脈搏', '體溫', '體重實際脫水','每公斤脫水量(ml/kg)','BUN','K', 'HGB','URR%','Na', 'Ca','P',
            '透析液 Ca','ALBUMIN','ALT (SGPT)','Alk.phosphatase','Ferritin','IRON/TIBC','MCV', 'MCHC', 'MCH','Iron','Glucose AC','RBC', 'WBC',
            'Platelet', 'Creatinine','AST (SGOT)','TIBC','Bilirubin-T', 'Cholesterol-T', 'CRP']
select_cols = select_cols+ ['Max Diff mbp', 'Max Diff sbp','結束脈搏','Final Judge','Raw Index','ID','洗腎紀錄時間去時分',
                            'fold_0','fold_1', 'fold_2', 'fold_3', 'fold_4']

hidden_size = 128
epoch = 20
save_path = './mlp_ckpt' #path 1
save_path += '/nonmark' #path 2

for DB_type in ['A','B','C']:
    print(DB_type, Data_[DB_type])
    for fold_next in [0,1,2,3,4]:
        if DB_type =='A':
            cat_col_names = ['入院方式', '性別', '體溫', 'FISTULA', 'GRAFT', 'Catheter', '有無糖尿病','Intact PTH', 'HCV', 'HBV']
        else:
            cat_col_names = ['入院方式', '性別', 'FISTULA', 'GRAFT', 'Catheter', '有無糖尿病', 'HCV', 'HBV']
        DATA_PROCESS = data_process()
        train_X, train_y, cat_cols, num_cols = DATA_PROCESS.data_loader(Data_[DB_type], 'train', fold_next, select_cols ,cat_col_names)
        val_X, val_y, _, _ = DATA_PROCESS.data_loader(Data_Test['Test'], 'test', fold_next, [cols for cols in select_cols if 'fold' not in cols] ,cat_col_names)
        X_val_tensor = torch.tensor(val_X.values, dtype=torch.float32)
        y_val_tensor = torch.tensor(val_y.values, dtype=torch.float32)
        valid_dataset = TensorDataset(X_val_tensor, y_val_tensor)

        valid_loader = DataLoader(valid_dataset, batch_size=256, shuffle=False)
        input_size = train_X.shape[1]
        model = MLP(input_size, hidden_size, output_size=1)
        model.load_state_dict(torch.load(os.path.join(save_path+f'_{DB_type}',f'best_model_cv{fold_next}.pth')))
        print(f"DATA: {DB_type} | Fold CV: {fold_next+1}")
        model.eval()
        y_true = []
        y_pred = []
        y_probs = []
        with torch.no_grad():
            total = 0
            for features, labels in valid_loader:
                outputs = model(features).squeeze(1)
                probs = torch.sigmoid(outputs)
                predictions = probs > 0.5
                total += labels.size(0)
                
                y_true.extend(labels.tolist())
                y_pred.extend(predictions.tolist())
                y_probs.extend(probs.tolist())
            # print(y_true)
            cm = confusion_matrix(y_true, y_pred)
            tn, fp, fn, tp = cm.ravel()
            accuracy = accuracy_score(y_true, y_pred)
            f1 = f1_score(y_true, y_pred, average='binary')
            recall = recall_score(y_true, y_pred, average='binary')
            auc = roc_auc_score(y_true, y_probs)
            print(f'Accuracy of the model on the test set: {accuracy:4f} / Macro F1 Score: {f1:.4f} / Macro Recall: {recall:.4f} / AUC: {auc:.4f}')
            print(f'True Positive: {tp}({tp+fn}), False Negative: {fn}({fn+tp}), True Negative: {tn}, False Positive: {fp}')
        # break

A ../dataset/DATA_A_FinalFinished[2024-09-27-filter-age]fixed.csv
DATA: A | Fold CV: 1
Accuracy of the model on the test set: 0.898842 / Macro F1 Score: 0.6327 / Macro Recall: 0.5653 / AUC: 0.9118
True Positive: 4634(8198), False Negative: 3564(8198), True Negative: 43179, False Positive: 1817
DATA: A | Fold CV: 2
Accuracy of the model on the test set: 0.899688 / Macro F1 Score: 0.6384 / Macro Recall: 0.5747 / AUC: 0.9146
True Positive: 4711(8198), False Negative: 3487(8198), True Negative: 43147, False Positive: 1849
DATA: A | Fold CV: 3
Accuracy of the model on the test set: 0.899481 / Macro F1 Score: 0.6404 / Macro Recall: 0.5808 / AUC: 0.9153
True Positive: 4761(8198), False Negative: 3437(8198), True Negative: 43086, False Positive: 1910
DATA: A | Fold CV: 4
Accuracy of the model on the test set: 0.900797 / Macro F1 Score: 0.6444 / Macro Recall: 0.5833 / AUC: 0.9159
True Positive: 4782(8198), False Negative: 3416(8198), True Negative: 43135, False Positive: 1861
DATA: A | Fold CV: