# MTGBM Package Install

In [None]:
!git clone https://github.com/antmachineintelligence/mtgbmcode.git

%cd ./mtgbmcode/python-package/
!python -m build

%cd ./dist
!pip install lightgbmmt-2.3.2-py3-none-any.whl --force-reinstall --no-deps

# Package

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
import warnings
warnings.filterwarnings('ignore')
import os
import pandas as pd
import numpy as np
import sys
import sklearn
from sklearn.model_selection import train_test_split,  KFold,StratifiedKFold
from sklearn.preprocessing import OneHotEncoder,  RobustScaler, MinMaxScaler
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.utils import resample
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve,log_loss, auc, precision_recall_curve, mean_absolute_error, mean_squared_error, f1_score,precision_recall_curve, mean_squared_error, average_precision_score,median_absolute_error, precision_score, f1_score, balanced_accuracy_score, accuracy_score, roc_auc_score, recall_score, r2_score
from sklearn.calibration import calibration_curve
from sklearn.model_selection import StratifiedShuffleSplit
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.utils import resample
import skimage
import scipy.stats as stat
import lightgbm as lgb
import lightgbmmt as lgbmmt
from scipy.stats import norm, sem, t
from scipy import stats
from scipy.optimize import minimize
import shap
import scipy
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import random
import ipywidgets as widgets


# Model Performance Metrics

In [None]:
def delong_roc_test(actual, preds_a, preds_b):
    actual = np.array(actual, dtype=bool)
    preds_a = np.array(preds_a, dtype=float)
    preds_b = np.array(preds_b, dtype=float)

    xa = preds_a[actual]
    ya = preds_a[~actual]
    ta1 = np.tile(xa, (len(ya), 1))
    ta2 = np.tile(ya, (len(xa), 1)).T
    xa_ya = (ta1 > ta2).astype(float)
    xa_ya[ta1 == ta2] = 0.5
    auc_a = np.mean(xa_ya)
    va10 = np.mean(xa_ya, axis=0)
    va01 = np.mean(xa_ya, axis=1)

    xb = preds_b[actual]
    yb = preds_b[~actual]
    tb1 = np.tile(xb, (len(yb), 1))
    tb2 = np.tile(yb, (len(xb), 1)).T
    xb_yb = (tb1 > tb2).astype(float)
    xb_yb[tb1 == tb2] = 0.5
    auc_b = np.mean(xb_yb)
    vb10 = np.mean(xb_yb, axis=0)
    vb01 = np.mean(xb_yb, axis=1)

    var_a = np.mean([(a - auc_a) * (b - auc_a) for a, b in zip(va10, va10)]) / (len(va10) - 1)
    var_a += np.mean([(a - auc_a) * (b - auc_a) for a, b in zip(va01, va01)]) / (len(va01) - 1)

    var_b = np.mean([(a - auc_b) * (b - auc_b) for a, b in zip(vb10, vb10)]) / (len(vb10) - 1)
    var_b += np.mean([(a - auc_b) * (b - auc_b) for a, b in zip(vb01, vb01)]) / (len(vb01) - 1)

    covar_ab = np.mean([(a - auc_a) * (b - auc_b) for a, b in zip(va10, vb10)]) / (len(va10) - 1)
    covar_ab += np.mean([(a - auc_a) * (b - auc_b) for a, b in zip(va01, vb01)]) / (len(va01) - 1)

    denominator = (var_a + var_b - 2 * covar_ab)
    if denominator <= 0:
        return 1.0

    z = (auc_a - auc_b) / denominator ** 0.5
    print(scipy.stats.norm.sf(abs(z) * 2))
    return scipy.stats.norm.sf(abs(z) * 2)

def plot_calibration_curve_with_ci2(actual_labels, predicted_probabilities, n_bins=9, confidence=0.95):
    actual_labels = np.array(actual_labels)
    predicted_probabilities = np.array(predicted_probabilities)

    fraction_of_positives, mean_predicted_value = calibration_curve(
        actual_labels, predicted_probabilities, n_bins=n_bins, strategy='uniform'
    )

    bin_edges = np.linspace(0, 1, n_bins + 1)
    z = 1.96

    sns.set_style('whitegrid')
    plt.figure(figsize=(14, 6))

    plt.subplot(1, 2, 1)

    for i in range(n_bins):
        bin_low, bin_high = bin_edges[i], bin_edges[i + 1]
        mask = (predicted_probabilities >= bin_low) & (predicted_probabilities < bin_high)
        n = np.sum(mask)
        y = fraction_of_positives[i]
        x = mean_predicted_value[i]

        if n > 0:
            std_err = np.sqrt(y * (1 - y) / n)
            h = std_err * z
        else:
            h = 0

        plt.errorbar(
            x, y, yerr=h, fmt='o-', color='tab:blue', ecolor='gray', capsize=4,
            markersize=7, linewidth=2, label=None if i > 0 else "Platt Calibration"
        )

    plt.plot([0, 1], [0, 1], '--', color='gray', linewidth=2, label="Perfect Calibration")

    plt.xlabel("Predicted Probability", fontsize=14)
    plt.ylabel("Fraction of Positives", fontsize=14)
    plt.title(f"Platt-Scaled Calibration Curve with 95% CI (Bins: {n_bins})", fontsize=16)
    plt.xlim(0.0, 1.0)
    plt.ylim(0.0, 1.0)
    plt.legend(loc="upper left", fontsize=12)

    plt.subplot(1, 2, 2)
    plt.hist(predicted_probabilities, bins=20, alpha=0.7, color='tab:blue', edgecolor='black')
    plt.xlabel("Calibrated Probability", fontsize=14)
    plt.ylabel("Count", fontsize=14)
    plt.title("Calibrated Probability Distribution", fontsize=16)

    plt.tight_layout()
    plt.show()

def new_threshold(y_test, y_pred):
    fpr, tpr, thvals = roc_curve(y_test, y_pred)
    lb, ub = roc_ci(y_test, y_pred)
    auroc = auc(fpr, tpr)
    precision, recall, _ = precision_recall_curve(y_test, y_pred)
    auprc = auc(recall, precision)
    sensitivity_threshold = 0.9
    valid_idx = np.where(tpr >= sensitivity_threshold)[0]

    if sum(i > 0 for i in valid_idx) == 0:
        raise ValueError("No thresholds found with sensitivity >= 0.90")

    youden_index = tpr[valid_idx] - fpr[valid_idx]
    optimal_idx = np.argmax(youden_index)
    optimal_threshold = thvals[valid_idx[optimal_idx]]

    return optimal_threshold

def roc_ci(y_true, y_pred):
    total = len(y_true)
    success = roc_auc_score(y_true, y_pred) * total
    alpha = 0.05
    lower = stats.beta.ppf(alpha / 2, success, total - success + 1)
    upper = stats.beta.ppf(1 - alpha / 2, success + 1, total - success)
    return lower, upper

def calculate_auprc(y_true, y_pred):
    precision, recall, _ = precision_recall_curve(y_true, y_pred)
    auprc = auc(recall, precision)
    total = len(y_true)
    success = auprc * total
    alpha = 0.05
    lower = stats.beta.ppf(alpha / 2, success, total - success + 1)
    upper = stats.beta.ppf(1 - alpha / 2, success + 1, total - success)
    return lower, upper

def custom_sampling(y_true, y_pred, num_bootstrap=2000):
    metric_values = []
    bs = 0

    while bs < num_bootstrap:
        np.random.seed(bs)
        sampled_indices = np.random.choice(len(y_true), len(y_true), replace=True)
        sampled_y_true = y_true.iloc[sampled_indices]
        sampled_y_pred = y_pred[sampled_indices]

        unique_classes = np.unique(sampled_y_true)

        if len(unique_classes) > 1:
            metric_values.append((sampled_y_true, sampled_y_pred))
            bs += 1

    return metric_values

def calculate_ci(metric_values, metric_fn, best_thres=0):
    if best_thres > 0:
        metric_values = [metric_fn(sample_y_true, sample_y_pred > best_thres) for sample_y_true, sample_y_pred in metric_values]
    else:
        metric_values = [metric_fn(sample_y_true, sample_y_pred) for sample_y_true, sample_y_pred in metric_values]
    lower_bound = np.percentile(metric_values, 2.5)
    upper_bound = np.percentile(metric_values, 97.5)
    mean = np.mean(metric_values)
    return mean, lower_bound, upper_bound

def specificity_score(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    return tn / (tn + fp)

def auc_plot(y_test, y_pred, name, best_threshold):
    fpr, tpr, thvals = roc_curve(y_test, y_pred)
    lb_auroc, ub_auroc = roc_ci(y_test, y_pred)
    auroc = auc(fpr, tpr)
    precision, recall, _ = precision_recall_curve(y_test, y_pred)
    auprc = auc(recall, precision)
    lb_auprc, ub_auprc = calculate_auprc(y_test, y_pred)
    num_bootstrap = 2000
    metric_val = custom_sampling(y_test, y_pred, num_bootstrap=num_bootstrap)
    y_pred = y_pred > best_threshold
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    sens, sens_lb, sens_ub = calculate_ci(metric_val, recall_score, best_thres=best_threshold)
    spec, spec_lb, spec_ub = calculate_ci(metric_val, specificity_score, best_thres=best_threshold)
    prec, prec_lb, prec_ub = calculate_ci(metric_val, precision_score, best_thres=best_threshold)
    acc, acc_lb, acc_ub = calculate_ci(metric_val, balanced_accuracy_score, best_thres=best_threshold)
    f1, f1_lb, f1_ub = calculate_ci(metric_val, f1_score, best_thres=best_threshold)

    val = pd.DataFrame({
        'Threshold': best_threshold,
        'auroc': (' {:.3f} ({:.3f} - {:.3f})'.format(auroc, lb_auroc, ub_auroc)),
        'auprc': (' {:.3f} ({:.3f} - {:.3f})'.format(auprc, lb_auprc, ub_auprc)),
        'Sensitivity (95% CI)': (' {:.3f} ({:.3f} - {:.3f})'.format(sens, sens_lb, sens_ub)),
        'Specificity (95% CI)': (' {:.3f} ({:.3f} - {:.3f})'.format(spec, spec_lb, spec_ub)),
        'Precision (95% CI)': (' {:.3f} ({:.3f} - {:.3f})'.format(prec, prec_lb, prec_ub)),
        'F1 score (95% CI)': (' {:.3f} ({:.3f} - {:.3f})'.format(f1, f1_lb, f1_ub)),
        'Accuracy (95% CI)': (' {:.3f} ({:.3f} - {:.3f})'.format(acc, acc_lb, acc_ub)),
        'TN': tn,
        'FP': fp,
        'FN': fn,
        'TP': tp
    }, index=[name])

    return val


# 1. Sample Selection

#### 1.1 Train Test Set Sampling

In [None]:
SEED = 42
FOLDS_NUM = 5
num_labels=3

In [None]:
def make_dataset0(data_a, data_ak, data_an, test_size=0.2, seed=SEED):
    data_a_grouped = data_a.groupby('hid')
    group_aki_mean = data_a_grouped['total_aki'].mean().reset_index()
    group_aki_mean.columns = ['hid', 'total_aki_mean']
    group_aki_mean['total_aki_mean'] = group_aki_mean['total_aki_mean'].apply(lambda x: 0.5 if x < 0.5 else x)
    train_hids, test_hids = train_test_split(
        group_aki_mean, test_size=test_size, random_state=seed, stratify=group_aki_mean['total_aki_mean'])

    train_data_list = [data_a_grouped.get_group(hid) for hid in train_hids['hid']]
    test_data_list = [data_a_grouped.get_group(hid) for hid in test_hids['hid']]
    data_a_train = pd.concat(train_data_list)
    data_a_test = pd.concat(test_data_list)

    label_col_all = ['total_aki', 'hos', 'final_prf', 'inhos_mortality']
    label_col_a = ['total_aki', 'hos', 'final_prf', 'inhos_mortality']
    aX_train, aY_all_train = data_a_train.loc[:, ~data_a_train.columns.isin(label_col_all)], data_a_train[label_col_all]
    aX_test, aY_all_test = data_a_test.loc[:, ~data_a_test.columns.isin(label_col_all)], data_a_test[label_col_all]
    aya_train = aY_all_train[label_col_a]
    aya_test = aY_all_test[label_col_a]

    aX_kmc, aY_all_kmc = data_ak.loc[:, ~data_ak.columns.isin(label_col_all)], data_ak[label_col_all]
    aya_kmc = aY_all_kmc[label_col_a]
    aX_nowon, aY_all_nowon = data_an.loc[:, ~data_an.columns.isin(label_col_all)], data_an[label_col_all]
    aya_nowon = aY_all_nowon[label_col_a]

    ft_ii = ['asa', 'sex', 'pcr', 'age', 'bmi', 'total_em', 'phb', 'pwbc', 'pplt', 'pbun', 'palb', 'pgot', 'pgpt', 'pna', 'pk', 'pglu', 'pptinr', 'andur', 'dept_GS', 'dept_NS', 'dept_OG', 'dept_OL', 'dept_OS', 'dept_Others', 'dept_PS', 'dept_TS', 'dept_UR']

    aX_kmc['hid'] = 99999999
    aX_nowon['hid'] = 88888888

    tr_asa = aX_train[['caseid', 'asa']]
    te_asa = aX_test[['caseid', 'asa']]
    kmc_asa = aX_kmc[['caseid', 'asa']]
    nowon_asa = aX_nowon[['caseid', 'asa']]

    aX_train_key = aX_train[['caseid', 'hid']]
    aX_test_key = aX_test[['caseid', 'hid']]
    aX_kmc_key = aX_kmc[['caseid', 'hid']]
    aX_nowon_key = aX_nowon[['caseid', 'hid']]

    aX_train = aX_train[ft_ii]
    aX_test = aX_test[ft_ii]
    aX_kmc = aX_kmc[ft_ii]
    aX_nowon = aX_nowon[ft_ii]

    imp = IterativeImputer(max_iter=50, random_state=seed)
    aX_train = imp.fit_transform(aX_train)
    aX_test = imp.transform(aX_test)
    aX_kmc = imp.transform(aX_kmc)
    aX_nowon = imp.transform(aX_nowon)

    aX_train = pd.DataFrame(data=aX_train, columns=ft_ii, index=aX_train_key.index)
    aX_test = pd.DataFrame(data=aX_test, columns=ft_ii, index=aX_test_key.index)
    aX_kmc = pd.DataFrame(data=aX_kmc, columns=ft_ii, index=aX_kmc_key.index)
    aX_nowon = pd.DataFrame(data=aX_nowon, columns=ft_ii, index=aX_nowon_key.index)

    aX_train = pd.concat([aX_train_key, aX_train], axis=1)
    aX_test = pd.concat([aX_test_key, aX_test], axis=1)
    aX_kmc = pd.concat([aX_kmc_key, aX_kmc], axis=1)
    aX_nowon = pd.concat([aX_nowon_key, aX_nowon], axis=1)

    scaler = MinMaxScaler()
    aya_train['hos_org'] = aya_train['hos'].copy()
    aya_test['hos_org'] = aya_test['hos'].copy()
    aya_kmc['hos_org'] = aya_kmc['hos'].copy()
    aya_nowon['hos_org'] = aya_nowon['hos'].copy()

    aya_train['hos'] = scaler.fit_transform(aya_train[['hos']])
    aya_test['hos'] = scaler.transform(aya_test[['hos']])
    aya_kmc['hos'] = scaler.transform(aya_kmc[['hos']])
    aya_nowon['hos'] = scaler.transform(aya_nowon[['hos']])

    return aX_train, aya_train, aX_test, aya_test, aX_kmc, aya_kmc, tr_asa, te_asa, kmc_asa, aX_nowon, aya_nowon, nowon_asa


In [None]:
def make_dataset1(data_a, data_ak, data_an, test_size=0.2, seed=SEED):
    data_a_grouped = data_a.groupby('hid')
    group_mean = data_a_grouped[['total_aki', 'final_prf', 'inhos_mortality']].mean().reset_index()
    for col in ['total_aki', 'final_prf', 'inhos_mortality']:
        group_mean[col] = group_mean[col].apply(lambda x: 0.5 if x < 0.5 else 1)
    group_mean['stratify_label'] = (
        group_mean['total_aki'].astype(str) + '_' +
        group_mean['final_prf'].astype(str) + '_' +
        group_mean['inhos_mortality'].astype(str)
    )
    train_hids, test_hids = train_test_split(
        group_mean, test_size=test_size, random_state=seed, stratify=group_mean['stratify_label']
    )
    train_data_list = [data_a_grouped.get_group(hid) for hid in train_hids['hid']]
    test_data_list = [data_a_grouped.get_group(hid) for hid in test_hids['hid']]
    data_a_train = pd.concat(train_data_list)
    data_a_test = pd.concat(test_data_list)

    label_col_all = ['total_aki', 'hos', 'final_prf', 'inhos_mortality']
    label_col_a = ['total_aki', 'hos', 'final_prf', 'inhos_mortality']
    aX_train, aY_all_train = data_a_train.loc[:, ~data_a_train.columns.isin(label_col_all)], data_a_train[label_col_all]
    aX_test, aY_all_test = data_a_test.loc[:, ~data_a_test.columns.isin(label_col_all)], data_a_test[label_col_all]
    aya_train = aY_all_train[label_col_a]
    aya_test = aY_all_test[label_col_a]

    aX_kmc, aY_all_kmc = data_ak.loc[:, ~data_ak.columns.isin(label_col_all)], data_ak[label_col_all]
    aya_kmc = aY_all_kmc[label_col_a]
    aX_nowon, aY_all_nowon = data_an.loc[:, ~data_an.columns.isin(label_col_all)], data_an[label_col_all]
    aya_nowon = aY_all_nowon[label_col_a]

    ft_ii = ['asa', 'sex', 'pcr', 'age', 'bmi', 'total_em', 'phb', 'pwbc', 'pplt', 'pbun', 'palb', 'pgot', 'pgpt', 'pna', 'pk', 'pglu', 'pptinr', 'andur', 'dept_GS', 'dept_NS', 'dept_OG', 'dept_OL', 'dept_OS', 'dept_Others', 'dept_PS', 'dept_TS', 'dept_UR']

    aX_kmc['hid'] = 99999999
    aX_nowon['hid'] = 88888888

    tr_asa = aX_train[['caseid', 'asa']]
    te_asa = aX_test[['caseid', 'asa']]
    kmc_asa = aX_kmc[['caseid', 'asa']]
    nowon_asa = aX_nowon[['caseid', 'asa']]

    aX_train_key = aX_train[['caseid', 'hid']]
    aX_test_key = aX_test[['caseid', 'hid']]
    aX_kmc_key = aX_kmc[['caseid', 'hid']]
    aX_nowon_key = aX_nowon[['caseid', 'hid']]

    aX_train = aX_train[ft_ii]
    aX_test = aX_test[ft_ii]
    aX_kmc = aX_kmc[ft_ii]
    aX_nowon = aX_nowon[ft_ii]

    imp = IterativeImputer(max_iter=50, random_state=seed)
    aX_train = imp.fit_transform(aX_train)
    aX_test = imp.transform(aX_test)
    aX_kmc = imp.transform(aX_kmc)
    aX_nowon = imp.transform(aX_nowon)

    aX_train = pd.DataFrame(data=aX_train, columns=ft_ii, index=aX_train_key.index)
    aX_test = pd.DataFrame(data=aX_test, columns=ft_ii, index=aX_test_key.index)
    aX_kmc = pd.DataFrame(data=aX_kmc, columns=ft_ii, index=aX_kmc_key.index)
    aX_nowon = pd.DataFrame(data=aX_nowon, columns=ft_ii, index=aX_nowon_key.index)

    aX_train = pd.concat([aX_train_key, aX_train], axis=1)
    aX_test = pd.concat([aX_test_key, aX_test], axis=1)
    aX_kmc = pd.concat([aX_kmc_key, aX_kmc], axis=1)
    aX_nowon = pd.concat([aX_nowon_key, aX_nowon], axis=1)

    scaler = MinMaxScaler()
    aya_train['hos_org'] = aya_train['hos'].copy()
    aya_test['hos_org'] = aya_test['hos'].copy()
    aya_kmc['hos_org'] = aya_kmc['hos'].copy()
    aya_nowon['hos_org'] = aya_nowon['hos'].copy()

    aya_train['hos'] = scaler.fit_transform(aya_train[['hos']])
    aya_test['hos'] = scaler.transform(aya_test[['hos']])
    aya_kmc['hos'] = scaler.transform(aya_kmc[['hos']])
    aya_nowon['hos'] = scaler.transform(aya_nowon[['hos']])

    return aX_train, aya_train, aX_test, aya_test, aX_kmc, aya_kmc, tr_asa, te_asa, kmc_asa, aX_nowon, aya_nowon, nowon_asa


In [None]:
#data load
df0 = pd.read_csv('SNUH_data.csv')
kmc0 = pd.read_csv('KMC_data.csv')
nowon0 = pd.read_csv('Nowon_data.csv')

print('1.load new snu:',df0.shape)
print('1.load new kmc:',kmc0.shape)
print('1.load new nowon:',nowon0.shape)

df0['asa'] = (df0[['asa_1', 'asa_2', 'asa_3', 'asa_4', 'asa_5']] * [1, 2, 3, 4, 5]).sum(axis=1)
kmc0['asa'] = (kmc0[['asa_1', 'asa_2', 'asa_3', 'asa_4', 'asa_5']] * [1, 2, 3, 4, 5]).sum(axis=1)
nowon0['asa'] = (nowon0[['asa_1', 'asa_2', 'asa_3', 'asa_4', 'asa_5']] * [1, 2, 3, 4, 5]).sum(axis=1)

data0=df0.copy()
data_k0=kmc0.copy()
data_n0=nowon0.copy()

adtrain0, aya_train0, adtest0, aya_test0, adkmc0, aya_kmc0, asa_train0, asa_test0, asa_kmc0, adnowon0, aya_nowon0, asa_nowon0 = make_dataset0(data0, data_k0, data_n0)

print('adtrain0:' , adtrain0.shape)
print('adtest0:' , adtest0.shape)
print('kmc0:' , adkmc0.shape)
print('nowon0:' , adnowon0.shape)
print('asa_train0:' , asa_train0.shape)
print('asa_test0:' , asa_test0.shape)
print('asa_kmc0:' , asa_kmc0.shape)
print('asa_nowon0:' , asa_nowon0.shape)

In [None]:
snu_train_hos0 = pd.DataFrame()
snu_train_hos0['hos_org'] = aya_train0['hos_org']
aya_train0.drop('hos_org', axis=1, inplace=True)

snu_test_hos0 = pd.DataFrame()
snu_test_hos0['hos_org'] = aya_test0['hos_org']
aya_test0.drop('hos_org', axis=1, inplace=True)

kmc_hos0 = pd.DataFrame()
kmc_hos0['hos_org'] = aya_kmc0['hos_org']
aya_kmc0.drop('hos_org', axis=1, inplace=True)

nowon_hos0 = pd.DataFrame()
nowon_hos0['hos_org'] = aya_nowon0['hos_org']
aya_nowon0.drop('hos_org', axis=1, inplace=True)

#### 1.2 5-fold Sampling



In [None]:
import time
import pickle

seed = SEED
np.random.seed(seed)

data = pd.concat([adtrain0.reset_index(drop=True), aya_train0.reset_index(drop=True)], axis=1)

group_mean = data.groupby('hid')[['total_aki', 'final_prf', 'inhos_mortality']].mean().reset_index()

for col in ['total_aki', 'final_prf', 'inhos_mortality']:
    group_mean[col] = group_mean[col].apply(lambda x: 0.5 if x < 0.5 else 1)

group_mean['stratify'] = (
    group_mean['total_aki'].astype(str) + '_' +
    group_mean['final_prf'].astype(str) + '_' +
    group_mean['inhos_mortality'].astype(str)
)

remaining_hids = group_mean

folds = [[] for _ in range(5)]
test_sizes = [0.2, 0.25, 0.33, 0.5]
target_fold_size = len(data) // 5

for i, test_size in enumerate(test_sizes):
    print(f"fold {i+1} generated")

    remaining_hids, test_hids = train_test_split(
        remaining_hids, test_size=test_size, random_state=seed, stratify=remaining_hids['stratify'])
    fold_hids = test_hids['hid'].tolist()

    fold_data = data[data['hid'].isin(fold_hids)]
    while len(fold_data) > target_fold_size:
        extra_hid = fold_data['hid'].value_counts().idxmin()
        fold_data = fold_data[fold_data['hid'] != extra_hid]
        extra_row = group_mean[group_mean['hid'] == extra_hid]
        remaining_hids = pd.concat([remaining_hids, extra_row])
        print(f"Adjusting fold {i+1} down: {len(fold_data)} remaining, target {target_fold_size}")
        time.sleep(0.1)

    while len(fold_data) < target_fold_size:
        extra_hid = remaining_hids['hid'].value_counts().idxmin()
        remaining_hids = remaining_hids[remaining_hids['hid'] != extra_hid]
        extra_row = data[data['hid'] == extra_hid]
        fold_data = pd.concat([fold_data, extra_row])
        print(f"Adjusting fold {i+1} up: {len(fold_data)} current, target {target_fold_size}")
        time.sleep(0.1)

    folds[i] = fold_data['hid'].tolist()

folds[4] = remaining_hids['hid'].tolist()

data_folds = [data[data['hid'].isin(fold)] for fold in folds]
fold_sizes = [len(fold_data) for fold_data in data_folds]

for i, fold_data in enumerate(data_folds):
    with open(f'fold_{i+1}.pkl', 'wb') as f:
        pickle.dump(fold_data, f)
    print(f"Fold {i+1} saved to fold_{i+1}.pkl")


In [None]:
import time
import pickle

loaded_folds = []
for i in range(5):
    with open(f'fold_{i+1}.pkl', 'rb') as f:
        fold_data = pickle.load(f)
        loaded_folds.append(fold_data)
    print(f"Fold {i+1} 로드 완료: fold_{i+1}.pkl")

loaded_fold_sizes = [len(fold_data) for fold_data in loaded_folds]
print("Loaded fold sizes:", loaded_fold_sizes)

num_patients_list = []
num_aki_1_list = []
num_final_prf_1_list = []
num_inhos_mortality_1_list = []
hos_mean_list = []

for i, fold_data in enumerate(loaded_folds):
    num_patients = fold_data['hid'].nunique()
    num_aki_1 = fold_data[fold_data['total_aki'] == 1].shape[0]
    num_final_prf_1 = fold_data[fold_data['final_prf'] == 1].shape[0]
    num_inhos_mortality_1 = fold_data[fold_data['inhos_mortality'] == 1].shape[0]
    hos_mean = fold_data['hos'].mean()

    num_patients_list.append(num_patients)
    num_aki_1_list.append(num_aki_1)
    num_final_prf_1_list.append(num_final_prf_1)
    num_inhos_mortality_1_list.append(num_inhos_mortality_1)
    hos_mean_list.append(hos_mean)

    print(f"Fold {i+1}: {num_patients} patients, {num_aki_1} total_aki = 1, {num_final_prf_1} final_prf = 1, {num_inhos_mortality_1} inhos_mortality = 1, hos mean = {hos_mean}")

fold_data_summary = [
    {
        "Fold": f"Fold {i+1}",
        "Total Patients": num_patients_list[i],
        "Total AKI = 1": num_aki_1_list[i],
        "Total Final PRF = 1": num_final_prf_1_list[i],
        "Total Inhos Mortality = 1": num_inhos_mortality_1_list[i],
        "Hos Mean": hos_mean_list[i],
        "Fold Size": loaded_fold_sizes[i]
    }
    for i in range(5)
]

fold_summary_df = pd.DataFrame(fold_data_summary)

fold_summary_df


In [None]:
feature_columns = adtrain0.columns
target_columns = aya_train0.columns

folds_features = []
folds_targets = []

for i, fold_data in enumerate(loaded_folds):
    features = fold_data[feature_columns]
    targets = fold_data[target_columns]

    folds_features.append(features)
    folds_targets.append(targets)

    print(f"Fold {i+1} features and targets separated.")

In [None]:
# features selected by BorutaSHAP
# acute kidney injury
s10 = ['dept_OS', 'asa', 'phb', 'dept_NS', 'andur', 'palb', 'bmi', 'pcr', 'sex', 'age'] 


# postoperative respiratory failure
s30 = ['dept_OS', 'asa', 'pwbc', 'andur', 'palb', 'dept_OGUR', 'age'] 

# inhospital mortality 
s40 = ['pgot', 'asa', 'pplt', 'pwbc', 'palb', 'pptinr', 'pglu'] 

# uniton set of selected features
ft0 = ['age', 'andur', 'asa', 'bmi', 'dept_NS',  'dept_OGUR', 'dept_OS', 'palb', 'pcr', 'phb',  'pgot', 'pglu', 'pplt', 'pptinr', 'pwbc', 'sex'] 


# 3. Single Light GBM Model

In [None]:
binary_params = {
    'objective': 'binary',
    'device': "gpu",
    'metric': 'auc',
    'boosting': 'gbdt',
    'max_depth': 16,
    'learning_rate': 0.01,
    'bagging_fraction': 0.7,
    'feature_fraction': 0.7,
    'verbosity': -1,
    'lambda_l1': 0.7,
    'lambda_l2': 0.7,
    'num_leaves': 50,
    'min_data_in_leaf': 40,
    'metric_freq': 9,
    'data_random_seed': SEED,
    'num_threads': -1,
    'num_boost_round': 600
}

In [None]:
def single_lgb_fun(folds_features, folds_targets, features, target, dtest, y1_test, params, cate):
    foldsn = FOLDS_NUM
    y_singlelgb = np.zeros(dtest.shape[0])
    best_thresholds = 0.0

    for i in range(foldsn):
        X_vl, y_vl = folds_features[i][features], folds_targets[i][target]
        X_tr = pd.concat([folds_features[j] for j in range(foldsn) if j != i], axis=0)[features]
        y_tr = pd.concat([folds_targets[j] for j in range(foldsn) if j != i], axis=0)[target]

        k_train = lgb.Dataset(X_tr, y_tr, categorical_feature=cate)
        k_valid = lgb.Dataset(X_vl, y_vl, categorical_feature=cate)
        watchlist = [k_valid]
        y_oof = np.zeros(X_vl.shape[0])

        singlelgb_model = lgb.train(
            params=params,
            train_set=k_train,
            valid_sets=watchlist
        )

        y_pred_train = singlelgb_model.predict(X_vl)
        y_oof = y_pred_train
        print('ROC AUC {}'.format(roc_auc_score(y_vl, y_pred_train)))

        i += 1

    dtrain = pd.concat([folds_features[j] for j in range(foldsn)], axis=0)[features]
    y1_train = pd.concat([folds_targets[j] for j in range(foldsn)], axis=0)[target]
    k_train = lgb.Dataset(dtrain, y1_train, categorical_feature=cate)

    singlelgb_model = lgb.train(
        params=params,
        train_set=k_train
    )

    forthval = singlelgb_model.predict(dtrain)
    best_thresholds = new_threshold(y1_train, forthval)
    y_singlelgb = singlelgb_model.predict(dtest)

    return y_singlelgb, singlelgb_model, best_thresholds, forthval, y1_train

def single_lgb_kmc(single_model_aki, dkmc, y1_kmc):
    kmc_tmp = single_model_aki.predict(dkmc)
    return kmc_tmp


In [None]:
result = pd.DataFrame(columns =['Threshold', 'auroc',	'auprc',	'Sensitivity (95% CI)',	'Specificity (95% CI)',	'Precision (95% CI)',
                                'F1 score (95% CI)',	'Accuracy (95% CI)', 'TN', 'FP', 'FN','TP'] )

In [None]:
#Single aki
cate = []
target = 'total_aki'
single_aki, single_model_aki, s_thold, forthval, y1_train = single_lgb_fun(folds_features, folds_targets, s10, target, adtest0[s10],
                                                      aya_test0[target], binary_params, cate )

aya_nowon_pred =single_lgb_kmc(single_model_aki, adnowon0[s10], s_thold )
aya_kmc_pred =single_lgb_kmc(single_model_aki, adkmc0[s10], s_thold )
val_single_1 = auc_plot(aya_test0.iloc[:, [0]], single_aki, 'single_total_aki_snu',  s_thold)
val_single_1_n = auc_plot(aya_nowon0.iloc[:, [0]], aya_nowon_pred, 'single_total_aki_nowon', s_thold)
val_single_1_k = auc_plot(aya_kmc0.iloc[:, [0]], aya_kmc_pred, 'single_total_aki_kmc', s_thold)
result = pd.concat([result, val_single_1])
result = pd.concat([result, val_single_1_n])
result = pd.concat([result, val_single_1_k])

In [None]:
!pip install ml_insights

In [None]:
# Spline Calibration for single AKI

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.calibration import calibration_curve
import ml_insights as mli

def plot_calibration_curve_with_ci2(actual_labels, predicted_probabilities, n_bins=9, confidence=0.95, prefix=None):
    actual_labels = np.array(actual_labels)
    predicted_probabilities = np.array(predicted_probabilities)

    fraction_of_positives, mean_predicted_value = calibration_curve(
        actual_labels, predicted_probabilities, n_bins=n_bins, strategy='uniform'
    )

    bin_edges = np.linspace(0, 1, n_bins + 1)
    z = 1.96

    x_values = []
    y_values = []
    y_err = []

    for i in range(n_bins):
        bin_low, bin_high = bin_edges[i], bin_edges[i+1]
        mask = (predicted_probabilities >= bin_low) & (predicted_probabilities < bin_high)
        n = np.sum(mask)
        y = fraction_of_positives[i]
        x = mean_predicted_value[i]

        if n > 0:
            std_err = np.sqrt(y * (1 - y) / n)
            h = std_err * z
        else:
            h = 0

        x_values.append(x)
        y_values.append(y)
        y_err.append(h)

    sns.set_style('whitegrid')
    plt.figure(figsize=(14, 6))

    plt.subplot(1, 2, 1)
    plt.errorbar(
        x_values, y_values, yerr=y_err, fmt='o', color='tab:blue', ecolor='gray',
        capsize=4, markersize=7, linewidth=2, alpha=0.5, label="Spline Calibration"
    )
    plt.plot([0, 1], [0, 1], '--', color='gray', linewidth=2, label="Perfect Calibration")
    plt.xlabel("Predicted Probability", fontsize=14)
    plt.ylabel("Fraction of Positives", fontsize=14)
    plt.title(f"Spline-Scaled Calibration Curve with 95% CI (Bins: {n_bins})", fontsize=16)
    plt.xlim(0.0, 1.0)
    plt.ylim(0.0, 1.0)
    plt.legend(loc="upper left", fontsize=12)

    plt.subplot(1, 2, 2)
    plt.hist(predicted_probabilities, bins=20, alpha=0.7, color='tab:blue', edgecolor='black')
    plt.xlabel("Calibrated Probability", fontsize=14)
    plt.ylabel("Count", fontsize=14)
    plt.title("Calibrated Probability Distribution", fontsize=16)

    plt.tight_layout()

    if prefix is not None:
        plt.savefig(f"{prefix}_calibration_curve.png", dpi=300)

    plt.show()

splinecalib = mli.SplineCalib()
splinecalib.fit(forthval, y1_train)

calibrated_single_aki = splinecalib.predict(single_aki)
calibrated_aya_nowon_pred = splinecalib.predict(aya_nowon_pred)
calibrated_aya_kmc_pred = splinecalib.predict(aya_kmc_pred)

plot_calibration_curve_with_ci2(aya_test0.iloc[:, [0]].to_numpy().flatten(), calibrated_single_aki, prefix="single_aki_snuh")
plot_calibration_curve_with_ci2(aya_nowon0.iloc[:, [0]].to_numpy().flatten(), calibrated_aya_nowon_pred, prefix="single_aki_nowon")
plot_calibration_curve_with_ci2(aya_kmc0.iloc[:, [0]].to_numpy().flatten(), calibrated_aya_kmc_pred, prefix="single_aki_kmc")

In [None]:
# snu single final_prf
cate = []

target='final_prf'
single_prf, single_model_prf, s3_thold, forthval, y1_train = single_lgb_fun(folds_features,  folds_targets, s30, target, adtest0[s30],
                                                       aya_test0['final_prf'], binary_params, cate )
aya_nowon_pred3 =single_lgb_kmc(single_model_prf, adnowon0[s30], s3_thold )
aya_kmc_pred3 =single_lgb_kmc(single_model_prf, adkmc0[s30], s3_thold )
val_single_3 = auc_plot(aya_test0.iloc[:, [2]], single_prf,  'single_prf_snu', s3_thold)
val_single_3_n =auc_plot(aya_nowon0.iloc[:, [2]], aya_nowon_pred3,  'single_prf_nowon', s3_thold)
val_single_3_k =auc_plot(aya_kmc0.iloc[:, [2]], aya_kmc_pred3, 'single_prf_kmc', s3_thold)
result = pd.concat([result, val_single_3])
result = pd.concat([result, val_single_3_n])
result = pd.concat([result, val_single_3_k])

In [None]:
# spline calibration for single PRF

import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.calibration import calibration_curve

def plot_calibration_curve_with_ci2(actual_labels, predicted_probabilities, n_bins=9, confidence=0.95, prefix=None):
    actual_labels = np.array(actual_labels)
    predicted_probabilities = np.array(predicted_probabilities)

    fraction_of_positives, mean_predicted_value = calibration_curve(
        actual_labels, predicted_probabilities, n_bins=n_bins, strategy='uniform'
    )

    bin_edges = np.linspace(0, 1, n_bins + 1)
    z = 1.96

    x_values = []
    y_values = []
    y_err = []

    for i in range(n_bins):
        bin_low, bin_high = bin_edges[i], bin_edges[i+1]
        mask = (predicted_probabilities >= bin_low) & (predicted_probabilities < bin_high)
        n = np.sum(mask)
        y = fraction_of_positives[i]
        x = mean_predicted_value[i]

        if n > 0:
            std_err = np.sqrt(y * (1 - y) / n)
            h = std_err * z
        else:
            h = 0

        x_values.append(x)
        y_values.append(y)
        y_err.append(h)

    sns.set_style('whitegrid')
    plt.figure(figsize=(14, 6))

    plt.subplot(1, 2, 1)
    plt.errorbar(
        x_values, y_values, yerr=y_err, fmt='o', color='tab:blue', ecolor='gray',
        capsize=4, markersize=7, linewidth=2, alpha=0.5, label="Spline Calibration"
    )
    plt.plot([0, 1], [0, 1], '--', color='gray', linewidth=2, label="Perfect Calibration")
    plt.xlabel("Predicted Probability", fontsize=14)
    plt.ylabel("Fraction of Positives", fontsize=14)
    plt.title(f"Spline-Scaled Calibration Curve with 95% CI (Bins: {n_bins})", fontsize=16)
    plt.xlim(0.0, 1.0)
    plt.ylim(0.0, 1.0)
    plt.legend(loc="upper left", fontsize=12)

    plt.subplot(1, 2, 2)
    plt.hist(predicted_probabilities, bins=20, alpha=0.7, color='tab:blue', edgecolor='black')
    plt.xlabel("Calibrated Probability", fontsize=14)
    plt.ylabel("Count", fontsize=14)
    plt.title("Calibrated Probability Distribution", fontsize=16)

    plt.tight_layout()

    if prefix is not None:
        plt.savefig(f"{prefix}_calibration_curve.png", dpi=300)
    plt.show()

splinecalib = mli.SplineCalib()
splinecalib.fit(forthval, y1_train)

calibrated_single_prf = splinecalib.predict(single_prf)
calibrated_aya_nowon_pred3 = splinecalib.predict(aya_nowon_pred3)
calibrated_aya_kmc_pred3 = splinecalib.predict(aya_kmc_pred3)

plot_calibration_curve_with_ci2(aya_test0.iloc[:, [0]].to_numpy().flatten(), calibrated_single_prf, prefix="single_prf_snuh")
plot_calibration_curve_with_ci2(aya_nowon0.iloc[:, [0]].to_numpy().flatten(), calibrated_aya_nowon_pred3, prefix="single_prf_nowon")
plot_calibration_curve_with_ci2(aya_kmc0.iloc[:, [0]].to_numpy().flatten(), calibrated_aya_kmc_pred3, prefix="single_prf_kmc")


In [None]:
# snu single inhos_mortality
cate = []

target='inhos_mortality'
single_inhos, single_model_inhos, s4_thold, forthval, y1_train= single_lgb_fun(folds_features,  folds_targets, s40, target,  adtest0[s40],
                                                           aya_test0['inhos_mortality'], binary_params,  cate )
aya_nowon_pred4 =single_lgb_kmc(single_model_inhos, adnowon0[s40], s4_thold )
aya_kmc_pred4 =single_lgb_kmc(single_model_inhos, adkmc0[s40], s4_thold )
val_single_4 = auc_plot(aya_test0.iloc[:, [3]], single_inhos,  'single_inhos_snu', s4_thold)
val_single_4_n =auc_plot(aya_nowon0.iloc[:, [3]], aya_nowon_pred4,  'single_inhos_nowon', s4_thold)
val_single_4_k =auc_plot(aya_kmc0.iloc[:, [3]], aya_kmc_pred4, 'single_inhos_kmc', s4_thold)
result = pd.concat([result, val_single_4])
result = pd.concat([result, val_single_4_n])
result = pd.concat([result, val_single_4_k])

In [None]:
# Spline calibration for single inhos mortality

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.calibration import calibration_curve

def plot_calibration_curve_with_ci2(actual_labels, predicted_probabilities, n_bins=9, confidence=0.95, prefix=None):
    actual_labels = np.array(actual_labels)
    predicted_probabilities = np.array(predicted_probabilities)

    fraction_of_positives, mean_predicted_value = calibration_curve(
        actual_labels, predicted_probabilities, n_bins=n_bins, strategy='uniform'
    )

    bin_edges = np.linspace(0, 1, n_bins + 1)
    z = 1.96

    x_values = []
    y_values = []
    y_err = []

    for i in range(n_bins):
        bin_low, bin_high = bin_edges[i], bin_edges[i+1]
        mask = (predicted_probabilities >= bin_low) & (predicted_probabilities < bin_high)
        n = np.sum(mask)
        y = fraction_of_positives[i]
        x = mean_predicted_value[i]

        if n > 0:
            std_err = np.sqrt(y * (1 - y) / n)
            h = std_err * z
        else:
            h = 0

        x_values.append(x)
        y_values.append(y)
        y_err.append(h)

    sns.set_style('whitegrid')
    plt.figure(figsize=(14, 6))

    plt.subplot(1, 2, 1)
    plt.errorbar(
        x_values, y_values, yerr=y_err, fmt='o', color='tab:blue', ecolor='gray',
        capsize=4, markersize=7, linewidth=2, alpha=0.5, label="Spline Calibration"
    )
    plt.plot([0, 1], [0, 1], '--', color='gray', linewidth=2, label="Perfect Calibration")
    plt.xlabel("Predicted Probability", fontsize=14)
    plt.ylabel("Fraction of Positives", fontsize=14)
    plt.title(f"Spline-Scaled Calibration Curve with 95% CI (Bins: {n_bins})", fontsize=16)
    plt.xlim(0.0, 1.0)
    plt.ylim(0.0, 1.0)
    plt.legend(loc="upper left", fontsize=12)

    plt.subplot(1, 2, 2)
    plt.hist(predicted_probabilities, bins=20, alpha=0.7, color='tab:blue', edgecolor='black')
    plt.xlabel("Calibrated Probability", fontsize=14)
    plt.ylabel("Count", fontsize=14)
    plt.title("Calibrated Probability Distribution", fontsize=16)

    plt.tight_layout()

    if prefix is not None:
        plt.savefig(f"{prefix}_calibration_curve.png", dpi=300)

    plt.show()


splinecalib = mli.SplineCalib()
splinecalib.fit(forthval, y1_train)

calibrated_single_inhos = splinecalib.predict(single_inhos)
calibrated_aya_nowon_pred4 = splinecalib.predict(aya_nowon_pred4)
calibrated_aya_kmc_pred4 = splinecalib.predict(aya_kmc_pred4)

plot_calibration_curve_with_ci2(aya_test0.iloc[:, [0]].to_numpy().flatten(), calibrated_single_inhos, prefix="single_inhos_snuh")
plot_calibration_curve_with_ci2(aya_nowon0.iloc[:, [0]].to_numpy().flatten(), calibrated_aya_nowon_pred4, prefix="single_inhos_nowon")
plot_calibration_curve_with_ci2(aya_kmc0.iloc[:, [0]].to_numpy().flatten(), calibrated_aya_kmc_pred4, prefix="single_inhos_kmc")


In [None]:
# Spline calibration for in hospital mortality
def plot_calibration_curve_with_ci2(actual_labels, predicted_probabilities, n_bins=9, confidence=0.95):
    actual_labels = np.array(actual_labels)
    predicted_probabilities = np.array(predicted_probabilities)

    fraction_of_positives, mean_predicted_value = calibration_curve(
        actual_labels, predicted_probabilities, n_bins=n_bins, strategy='uniform'
    )

    bins_count = len(fraction_of_positives)
    bin_edges = np.linspace(0, 1, n_bins + 1)
    z = 1.96
    x_values = []
    y_values = []
    y_err = []

    for i in range(bins_count):
        bin_low = bin_edges[i]
        bin_high = bin_edges[i+1]
        mask = (predicted_probabilities >= bin_low) & (predicted_probabilities < bin_high)
        n = np.sum(mask)
        y = fraction_of_positives[i]
        x = mean_predicted_value[i]

        if n > 0:
            std_err = np.sqrt(y * (1 - y) / n)
            h = std_err * z
        else:
            h = 0

        x_values.append(x)
        y_values.append(y)
        y_err.append(h)

    sns.set_style('whitegrid')
    plt.figure(figsize=(14, 6))

    plt.subplot(1, 2, 1)
    plt.errorbar(
        x_values, y_values, yerr=y_err, fmt='o', color='tab:blue', ecolor='gray',
        capsize=4, markersize=7, linewidth=2, alpha=0.5, label="Spline Calibration"
    )

    plt.plot([0, 1], [0, 1], '--', color='gray', linewidth=2, label="Perfect Calibration")
    plt.xlabel("Predicted Probability", fontsize=14)
    plt.ylabel("Fraction of Positives", fontsize=14)
    plt.title(f"Spline-Scaled Calibration Curve with 95% CI (Bins: {bins_count})", fontsize=16)
    plt.xlim(0.0, 1.0)
    plt.ylim(0.0, 1.0)
    plt.legend(loc="upper left", fontsize=12)

    plt.subplot(1, 2, 2)
    plt.hist(predicted_probabilities, bins=20, alpha=0.7, color='tab:blue', edgecolor='black')
    plt.xlabel("Calibrated Probability", fontsize=14)
    plt.ylabel("Count", fontsize=14)
    plt.title("Calibrated Probability Distribution", fontsize=16)

    plt.tight_layout()
    plt.show()

splinecalib = mli.SplineCalib()
splinecalib.fit(forthval, y1_train)

calibrated_single_inhos = splinecalib.predict(single_inhos)
calibrated_aya_nowon_pred4 = splinecalib.predict(aya_nowon_pred4)
calibrated_aya_kmc_pred4 = splinecalib.predict(aya_kmc_pred4)

plot_calibration_curve_with_ci2(aya_test0.iloc[:, [0]].to_numpy().flatten(), calibrated_single_inhos)
plot_calibration_curve_with_ci2(aya_nowon0.iloc[:, [0]].to_numpy().flatten(), calibrated_aya_nowon_pred4)
plot_calibration_curve_with_ci2(aya_kmc0.iloc[:, [0]].to_numpy().flatten(), calibrated_aya_kmc_pred4)


In [None]:
result

# 4. Multi-Task GBM

In [None]:
def mtgbm3_kmc(lgbmmt3_model_d, dkmc, y1_kmc):
    features = [x for x in dkmc.columns]
    y_lgbmtsub = np.zeros((dkmc.shape[0], 3))
    temp = lgbmmt3_model_d.predict(dkmc[features])
    y_lgbmtsub += (1. / (1. + np.exp(-temp)))
    return y_lgbmtsub

def self_metric3(preds, train_data):
    labels = train_data.get_label()
    labels2 = labels.reshape((num_labels, -1)).transpose()
    preds2 = preds.reshape((num_labels, -1)).transpose()
    auroc_scores = []
    for i in [0, 1, 2]:
        labels_binary = labels2[:, i]
        preds_binary = 1. / (1. + np.exp(-preds2[:, i]))
        auroc_score = roc_auc_score(labels_binary, preds_binary)
        auroc_scores.append(round(auroc_score, 3))
    score_dict = {
        'auroc1': auroc_scores[0],
        'auroc2': auroc_scores[1],
        'auroc3': auroc_scores[2],
    }
    return [('auroc1', score_dict['auroc1'], True),
            ('auroc2', score_dict['auroc2'], True),
            ('auroc3', score_dict['auroc3'], True)]

def calculate_gradient(li, pi):
    gradient = -1 * (li - pi)
    return gradient

def calculate_hessian(li, pi):
    hessian = np.full_like(pi, 1.0)
    return hessian

def f(corr, weights):
    return np.sum(corr * weights)

def clip_corr(corr):
    return np.clip(corr, 0.1, 1)

def calculate_beta(epoch):
    max_beta = 0.4
    min_beta = 0.2
    decay_rate = 0.5
    decay_steps = 100
    current_step = epoch % decay_steps
    beta = max_beta - decay_rate * (current_step // (decay_steps / 2))
    beta = np.maximum(min_beta, beta)
    return beta

def apply_threshold(predictions, threshold):
    return (predictions > threshold).astype(int)

def mymse3(preds, train_data, ep=0):
    labels = train_data.get_label()
    labels2 = labels.reshape((num_labels, -1)).transpose()
    preds2 = preds.reshape((num_labels, -1)).transpose()
    labels2 = np.clip(labels2, 0, 1)
    preds3 = 1. / (1. + np.exp(-preds2))
    grad2 = preds3 - labels2
    hess2 = preds3 * (1. - preds3)
    beta = 0.2
    w = np.array([1, 1, 0.01 * beta])
    w2 = np.array([1.0, 1.0, 1.0])
    grad = (grad2) * np.array(w)
    grad = np.sum(grad, axis=1)
    grad2 = (grad2 * w2).transpose().reshape((-1))
    hess = np.sum((hess2) * np.array(w), axis=1)
    return grad, hess, grad2, hess2


# Hyper Parameter for MTGBM

In [None]:
pa_0115 = {
    'objective': 'custom',
    'num_labels': 3,
    'tree_learner': 'serial2',
    'boosting': 'gbdt',
    'max_depth': 16,
    'learning_rate': 0.01,
    'bagging_fraction': 0.7,
    'feature_fraction': 0.7,
    'verbosity': -1,
    'lambda_l1': 0.7,
    'lambda_l2': 0.7,
    'num_leaves': 50,
    'min_child_weight': 0.3,
    'min_data_in_leaf': 40,
    'metric_freq': 9,
    'data_random_seed': SEED,
    'num_threads': -1,
    'num_boost_round': 600
}


In [None]:
def MTGBM_3TASK(folds_features, folds_targets, features, dtest, y3_test, lgbmmt3_params2, cate):
    subtasklist = [x for x in y3_test.columns]
    featuresall = [x for x in dtest.columns]
    foldsn = FOLDS_NUM
    y_lgbmt = np.zeros((dtest.shape[0], num_labels))
    y_lgbmtsub = np.zeros((dtest.shape[0], num_labels))
    sub_task = [1, 2]
    best_thresholds = [[0.0, 0.0, 0.0]]

    for i in range(foldsn):
        X_vl, y_vl = folds_features[i][features], folds_targets[i]['total_aki']
        X_tr = pd.concat([folds_features[j] for j in range(foldsn) if j != i], axis=0)[features]
        y_tr = pd.concat([folds_targets[j] for j in range(foldsn) if j != i], axis=0)['total_aki']
        y_tr2 = pd.concat([folds_targets[j] for j in range(foldsn) if j != i], axis=0).iloc[:, sub_task]
        y_vl2 = folds_targets[i].iloc[:, sub_task]
        y_oof = np.zeros((X_vl.shape[0], num_labels))

        k_train = lgbmmt.Dataset(X_tr, label=np.concatenate([y_tr.values.reshape((-1, 1)), y_tr2.values], axis=1), categorical_feature=cate)
        k_valid = lgbmmt.Dataset(X_vl, label=np.concatenate([y_vl.values.reshape((-1, 1)), y_vl2.values], axis=1), categorical_feature=cate)
        watchlist = [k_valid]

        lgbmmt3_model = lgbmmt.train(
            lgbmmt3_params2,
            train_set=k_train,
            valid_sets=watchlist,
            verbose_eval=100,
            fobj=mymse3,
            feval=self_metric3,
        )

        lgbmmt3_model.set_num_labels(num_labels)

        y_pred_train = lgbmmt3_model.predict(X_vl)
        y_oof = y_pred_train

        y_pred_valid = mtgbm3_kmc(lgbmmt3_model, X_vl, np.concatenate([y_vl.values.reshape((-1, 1)), y_vl2.values], axis=1))
        print('ROC AUC {}'.format(roc_auc_score(y_vl, y_pred_train[:, 0])))
        print('ROC AUC {}'.format(roc_auc_score(y_vl2.iloc[:, 0], y_pred_train[:, 1])))
        print('ROC AUC {}'.format(roc_auc_score(y_vl2.iloc[:, 1], y_pred_train[:, 2])))

        temp = lgbmmt3_model.predict(dtest[features])
        i += 1

    dtrain = pd.concat([folds_features[j] for j in range(foldsn)], axis=0)[features]
    y3_train = pd.concat([folds_targets[j] for j in range(foldsn)], axis=0)
    bayes_dtrain = lgbmmt.Dataset(dtrain, label=y3_train.to_numpy(), categorical_feature=cate)
    bayes_dtest = lgbmmt.Dataset(dtest, label=y3_test.to_numpy(), categorical_feature=cate)

    lgbmmt3_model = lgbmmt.train(
        lgbmmt3_params2,
        train_set=bayes_dtrain,
        verbose_eval=100,
        fobj=mymse3,
        feval=self_metric3,
    )

    lgbmmt3_model.set_num_labels(num_labels)

    fortmp = lgbmmt3_model.predict(dtrain[features])
    forthval = (1. / (1. + np.exp(-fortmp)))

    best_thresholds[0][0] = new_threshold(y3_train.iloc[:, 0], forthval[:, 0])
    best_thresholds[0][1] = new_threshold(y3_train.iloc[:, 1], forthval[:, 1])
    best_thresholds[0][2] = new_threshold(y3_train.iloc[:, 2], forthval[:, 2])

    tmp = lgbmmt3_model.predict(dtest[features])
    y_lgbmtsub = (1. / (1. + np.exp(-tmp)))

    return y_lgbmtsub, lgbmmt3_model, best_thresholds


In [None]:
folds_targets3 = [df.drop(df.columns[1], axis=1) for df in folds_targets]
folds_targets3[0]

In [None]:
aya_test03 = aya_test0.drop(aya_test0.columns[1], axis=1)
aya_test03

In [None]:
#MTGBM
cate = []

y_pred, model, mt_thold = MTGBM_3TASK(folds_features, folds_targets3, ft0, adtest0[ft0], aya_test03, pa_0115, cate)

In [None]:
aya_nowon03 = aya_nowon0.drop(aya_nowon0.columns[1], axis=1)
aya_kmc03 = aya_kmc0.drop(aya_kmc0.columns[1], axis=1)
aya_nowon03, aya_kmc03

In [None]:
y_pred_sub_n = mtgbm3_kmc(model,  adnowon0[ft0], aya_nowon03)
y_pred_sub_k = mtgbm3_kmc(model,  adkmc0[ft0], aya_kmc03)

In [None]:
# SNU
asa_test0.loc[asa_test0['asa'] == 6, 'asa'] = 1
asa_test0['asa'] = np.nan_to_num(asa_test0['asa'], nan=2)
scaler = MinMaxScaler()
asa_reshaped = asa_test0['asa'].values.reshape(-1, 1)
asa_scaled = asa_reshaped / 5
print(np.unique(asa_scaled))

# Nowon
asa_nowon0['asa'] = np.nan_to_num(asa_nowon0['asa'], nan=2)
scaler = MinMaxScaler()
asa_reshaped_n = asa_nowon0['asa'].values.reshape(-1, 1)
asa_scaled_n = asa_reshaped_n / 5
print(np.unique(asa_scaled_n))

# KMC
asa_kmc0['asa'] = np.nan_to_num(asa_kmc0['asa'], nan=2)
scaler = MinMaxScaler()
asa_reshaped_k = asa_kmc0['asa'].values.reshape(-1, 1)
asa_scaled_k = asa_reshaped_k / 5
print(np.unique(asa_scaled_k))


In [None]:
# SNU Delong
print("delong pvalue SNU_aki_/ single vs asa:")
delong_roc_test(aya_test0.iloc[:, [0]].astype(int).to_numpy().flatten(), single_aki, asa_scaled.flatten())
print("delong pvalue SNU_aki_/ mtgbm vs asa:")
delong_roc_test(aya_test0.iloc[:, [0]].astype(int).to_numpy().flatten(), y_pred[:, 0:1].flatten(), asa_scaled.flatten())

print("delong pvalue SNU_prf_/ single vs asa:")
delong_roc_test(aya_test0.iloc[:, [2]].astype(int).to_numpy().flatten(), single_prf, asa_scaled.flatten())
print("delong pvalue SNU_prf_/ mtgbm vs asa:")
delong_roc_test(aya_test0.iloc[:, [2]].astype(int).to_numpy().flatten(), y_pred[:, 1:2].flatten(), asa_scaled.flatten())

print("delong pvalue SNU_inhos_/ single vs asa:")
delong_roc_test(aya_test0.iloc[:, [3]].astype(int).to_numpy().flatten(), single_inhos, asa_scaled.flatten())
print("delong pvalue SNU_inhos_/ mtgbm vs asa:")
delong_roc_test(aya_test0.iloc[:, [3]].astype(int).to_numpy().flatten(), y_pred[:, 2:3].flatten(), asa_scaled.flatten())

# Nowon Delong
print("delong pvalue nowon_aki_/ single vs asa:")
delong_roc_test(aya_nowon0.iloc[:, [0]].astype(int).to_numpy().flatten(), aya_nowon_pred, asa_scaled_n.flatten())
print("delong pvalue nowon_aki_/ mtgbm vs asa:")
delong_roc_test(aya_nowon0.iloc[:, [0]].astype(int).to_numpy().flatten(), y_pred_sub_n[:, 0:1].flatten(), asa_scaled_n.flatten())

print("delong pvalue nowon_prf_/ single vs asa:")
delong_roc_test(aya_nowon0.iloc[:, [2]].astype(int).to_numpy().flatten(), aya_nowon_pred3, asa_scaled_n.flatten())
print("delong pvalue nowon_prf_/ mtgbm vs asa:")
delong_roc_test(aya_nowon0.iloc[:, [2]].astype(int).to_numpy().flatten(), y_pred_sub_n[:, 1:2].flatten(), asa_scaled_n.flatten())

print("delong pvalue nowon_inhos_/ single vs asa:")
delong_roc_test(aya_nowon0.iloc[:, [3]].astype(int).to_numpy().flatten(), aya_nowon_pred4, asa_scaled_n.flatten())
print("delong pvalue nowon_inhos_/ mtgbm vs asa:")
delong_roc_test(aya_nowon0.iloc[:, [3]].astype(int).to_numpy().flatten(), y_pred_sub_n[:, 2:3].flatten(), asa_scaled_n.flatten())

# KMC Delong
print("delong pvalue kmc_aki_/ single vs asa:")
delong_roc_test(aya_kmc0.iloc[:, [0]].astype(int).to_numpy().flatten(), aya_kmc_pred, asa_scaled_k.flatten())
print("delong pvalue kmc_aki_/ mtgbm vs asa:")
delong_roc_test(aya_kmc0.iloc[:, [0]].astype(int).to_numpy().flatten(), y_pred_sub_k[:, 0:1].flatten(), asa_scaled_k.flatten())

print("delong pvalue kmc_prf_/ single vs asa:")
delong_roc_test(aya_kmc0.iloc[:, [2]].astype(int).to_numpy().flatten(), aya_kmc_pred3, asa_scaled_k.flatten())
print("delong pvalue kmc_prf_/ mtgbm vs asa:")
delong_roc_test(aya_kmc0.iloc[:, [2]].astype(int).to_numpy().flatten(), y_pred_sub_k[:, 1:2].flatten(), asa_scaled_k.flatten())

print("delong pvalue kmc_inhos_/ single vs asa:")
delong_roc_test(aya_kmc0.iloc[:, [3]].astype(int).to_numpy().flatten(), aya_kmc_pred4, asa_scaled_k.flatten())
print("delong pvalue kmc_inhos_/ mtgbm vs asa:")
delong_roc_test(aya_kmc0.iloc[:, [3]].astype(int).to_numpy().flatten(), y_pred_sub_k[:, 2:3].flatten(), asa_scaled_k.flatten())


In [None]:
fpr_aki, tpr_aki, _ = roc_curve(aya_test03.iloc[:, 0], single_aki)
roc_auc_aki = auc(fpr_aki, tpr_aki)

fpr_mt, tpr_mt, _ = roc_curve(aya_test03.iloc[:, 0], y_pred[:, 0])
roc_auc_mt = auc(fpr_mt, tpr_mt)

roc_auc_aki_str = f'{roc_auc_aki:.4f}'
roc_auc_mt_str = f'{roc_auc_mt:.4f}'

plt.figure(figsize=(8, 6))
plt.plot(fpr_mt, tpr_mt, color='navy', lw=2, label=f'MT-GBM Model (AUC = {roc_auc_mt_str})')
plt.plot(fpr_aki, tpr_aki, color='darkorange', lw=2, label=f'Single Prediction Model (AUC = {roc_auc_aki_str})')
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison _ aki _snu')
plt.legend(loc='lower right')
plt.savefig('roc_curve_comparison_aki_snu.jpg', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.metrics import roc_curve, auc

mpl.rcParams['figure.facecolor'] = 'white'
mpl.rcParams['axes.facecolor'] = 'white'
mpl.rcParams['axes.edgecolor'] = 'black'
mpl.rcParams['axes.labelcolor'] = 'black'
mpl.rcParams['axes.linewidth'] = 1.0
mpl.rcParams['font.family'] = 'serif'
mpl.rcParams['font.size'] = 12
mpl.rcParams['legend.fontsize'] = 10
mpl.rcParams['xtick.color'] = 'black'
mpl.rcParams['ytick.color'] = 'black'
mpl.rcParams['xtick.labelsize'] = 11
mpl.rcParams['ytick.labelsize'] = 11
mpl.rcParams['legend.frameon'] = False
mpl.rcParams['savefig.dpi'] = 300

plt.figure(figsize=(6.5, 5))
plt.plot(
    fpr_mt, tpr_mt,
    color='navy', lw=2,
    label=f'MT-GBM Model (AUC = {roc_auc_mt_str})'
)
plt.plot(
    fpr_aki, tpr_aki,
    color='darkorange', lw=2,
    label=f'Single Prediction Model (AUC = {roc_auc_aki_str})'
)
plt.plot(
    [0, 1], [0, 1],
    color='gray', lw=1.5, linestyle='--'
)

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.02])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison for AKI Prediction', fontsize=14, pad=10)
plt.legend(loc='lower right')
plt.tight_layout()
plt.show()


In [None]:
fpr_aki, tpr_aki, _ = roc_curve(aya_nowon03.iloc[:, 0], aya_nowon_pred)
roc_auc_aki = auc(fpr_aki, tpr_aki)

fpr_mt, tpr_mt, _ = roc_curve(aya_nowon03.iloc[:, 0], y_pred_sub_n[:, 0])
roc_auc_mt = auc(fpr_mt, tpr_mt)

roc_auc_aki_str = f'{roc_auc_aki:.4f}'
roc_auc_mt_str = f'{roc_auc_mt:.4f}'


plt.figure(figsize=(8, 6))
plt.plot(fpr_mt, tpr_mt, color='navy', lw=2, label=f'MT-GBM Model (AUC = {roc_auc_mt_str})')
plt.plot(fpr_aki, tpr_aki, color='darkorange', lw=2, label=f'Single Prediction Model (AUC = {roc_auc_aki_str})')
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison aki nowon')
plt.legend(loc='lower right')
plt.savefig('roc_curve_comparison_aki_nowon.jpg', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
fpr_aki, tpr_aki, _ = roc_curve(aya_kmc03.iloc[:, 0], aya_kmc_pred)
roc_auc_aki = auc(fpr_aki, tpr_aki)

fpr_mt, tpr_mt, _ = roc_curve(aya_kmc03.iloc[:, 0], y_pred_sub_k[:, 0])
roc_auc_mt = auc(fpr_mt, tpr_mt)

roc_auc_aki_str = f'{roc_auc_aki:.4f}'
roc_auc_mt_str = f'{roc_auc_mt:.4f}'

plt.figure(figsize=(8, 6))
plt.plot(fpr_mt, tpr_mt, color='navy', lw=2, label=f'MT-GBM Model (AUC = {roc_auc_mt_str})')
plt.plot(fpr_aki, tpr_aki, color='darkorange', lw=2, label=f'Single Prediction Model (AUC = {roc_auc_aki_str})')
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison aki_kmc')
plt.legend(loc='lower right')
plt.savefig('roc_curve_comparison_aki_kmc.jpg', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
#prf snu
fpr_prf, tpr_prf, _ = roc_curve(aya_test03.iloc[:, 1], single_prf)
roc_auc_prf = auc(fpr_prf, tpr_prf)
fpr_mt, tpr_mt, _ = roc_curve(aya_test03.iloc[:, 1], y_pred[:, 1])
roc_auc_mt = auc(fpr_mt, tpr_mt)

roc_auc_prf_str = f'{roc_auc_prf:.4f}'
roc_auc_mt_str = f'{roc_auc_mt:.4f}'

plt.figure(figsize=(8, 6))
plt.plot(fpr_mt, tpr_mt, color='navy', lw=2, label=f'MT-GBM Model (AUC = {roc_auc_mt_str})')
plt.plot(fpr_prf, tpr_prf, color='darkorange', lw=2, label=f'Single Prediction Model (AUC = {roc_auc_prf_str})')
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison prf _ snu')
plt.legend(loc='lower right')
plt.savefig('roc_curve_comparison_prf_snu.jpg', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# prf nowon
fpr_aki, tpr_aki, _ = roc_curve(aya_nowon03.iloc[:, 1], aya_nowon_pred3)
roc_auc_aki = auc(fpr_aki, tpr_aki)
fpr_mt, tpr_mt, _ = roc_curve(aya_nowon03.iloc[:, 1], y_pred_sub_n[:, 1])
roc_auc_mt = auc(fpr_mt, tpr_mt)
roc_auc_aki_str = f'{roc_auc_aki:.4f}'
roc_auc_mt_str = f'{roc_auc_mt:.4f}'

plt.figure(figsize=(8, 6))
plt.plot(fpr_mt, tpr_mt, color='navy', lw=2, label=f'MT-GBM Model (AUC = {roc_auc_mt_str})')
plt.plot(fpr_aki, tpr_aki, color='darkorange', lw=2, label=f'Single Prediction Model (AUC = {roc_auc_aki_str})')
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison prf nowon')
plt.legend(loc='lower right')
plt.savefig('roc_curve_comparison_prf_nowon.jpg', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# prf kmc
fpr_aki, tpr_aki, _ = roc_curve(aya_kmc03.iloc[:, 1], aya_kmc_pred3)
roc_auc_aki = auc(fpr_aki, tpr_aki)

fpr_mt, tpr_mt, _ = roc_curve(aya_kmc03.iloc[:, 1], y_pred_sub_k[:, 1])
roc_auc_mt = auc(fpr_mt, tpr_mt)

roc_auc_aki_str = f'{roc_auc_aki:.4f}'
roc_auc_mt_str = f'{roc_auc_mt:.4f}'

plt.figure(figsize=(8, 6))
plt.plot(fpr_mt, tpr_mt, color='navy', lw=2, label=f'MT-GBM Model (AUC = {roc_auc_mt_str})')
plt.plot(fpr_aki, tpr_aki, color='darkorange', lw=2, label=f'Single Prediction Model (AUC = {roc_auc_aki_str})')
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison prf_kmc')
plt.legend(loc='lower right')
plt.savefig('roc_curve_comparison_prf_kmc.jpg', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
#inhos snu
fpr_inhos, tpr_inhos, _ = roc_curve(aya_test03.iloc[:, 2], single_inhos)
roc_auc_inhos = auc(fpr_inhos, tpr_inhos)

fpr_mt, tpr_mt, _ = roc_curve(aya_test03.iloc[:, 2], y_pred[:, 2])
roc_auc_mt = auc(fpr_mt, tpr_mt)

roc_auc_inhos_str = f'{roc_auc_inhos:.4f}'
roc_auc_mt_str = f'{roc_auc_mt:.4f}'

plt.figure(figsize=(8, 6))
plt.plot(fpr_mt, tpr_mt, color='navy', lw=2, label=f'MT-GBM Model (AUC = {roc_auc_mt_str})')
plt.plot(fpr_inhos, tpr_inhos, color='darkorange', lw=2, label=f'Single Prediction Model (AUC = {roc_auc_inhos_str})')
# plt.plot(fpr_asa, tpr_asa, color='green', lw=2, label=f'ASA class (AUC = {roc_auc_asa_str})')
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison inhos snu')
plt.legend(loc='lower right')
plt.savefig('roc_curve_comparison_inhos_mortality_snu.jpg', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# inhos nowon
fpr_aki, tpr_aki, _ = roc_curve(aya_nowon03.iloc[:, 2], aya_nowon_pred4)
roc_auc_aki = auc(fpr_aki, tpr_aki)

fpr_mt, tpr_mt, _ = roc_curve(aya_nowon03.iloc[:, 2], y_pred_sub_n[:, 2])
roc_auc_mt = auc(fpr_mt, tpr_mt)

roc_auc_aki_str = f'{roc_auc_aki:.4f}'
roc_auc_mt_str = f'{roc_auc_mt:.4f}'

plt.figure(figsize=(8, 6))
plt.plot(fpr_mt, tpr_mt, color='navy', lw=2, label=f'MT-GBM Model (AUC = {roc_auc_mt_str})')
plt.plot(fpr_aki, tpr_aki, color='darkorange', lw=2, label=f'Single Prediction Model (AUC = {roc_auc_aki_str})')
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison inhos nowon')
plt.legend(loc='lower right')
plt.savefig('roc_curve_comparison_inhos_mortality_nowon.jpg', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# inhos kmc
fpr_aki, tpr_aki, _ = roc_curve(aya_kmc03.iloc[:, 2], aya_kmc_pred4)
roc_auc_aki = auc(fpr_aki, tpr_aki)

fpr_mt, tpr_mt, _ = roc_curve(aya_kmc03.iloc[:, 2], y_pred_sub_k[:, 2])
roc_auc_mt = auc(fpr_mt, tpr_mt)

roc_auc_aki_str = f'{roc_auc_aki:.4f}'
roc_auc_mt_str = f'{roc_auc_mt:.4f}'

plt.figure(figsize=(8, 6))
plt.plot(fpr_mt, tpr_mt, color='navy', lw=2, label=f'MT-GBM Model (AUC = {roc_auc_mt_str})')
plt.plot(fpr_aki, tpr_aki, color='darkorange', lw=2, label=f'Single Prediction Model (AUC = {roc_auc_aki_str})')
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison inhos_kmc')
plt.legend(loc='lower right')
plt.savefig('roc_curve_comparison_inhos_mortality_kmc.jpg', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
#snu ci calibration
plot_calibration_curve_with_ci2(aya_test03.iloc[:, [0]].to_numpy().flatten(), y_pred[:, 0:1].flatten())
#nowon ci calibration
plot_calibration_curve_with_ci2(aya_nowon03.iloc[:, [0]].to_numpy().flatten(),  y_pred_sub_n[:, 0:1].flatten())
#kmc ci calibration
plot_calibration_curve_with_ci2(aya_kmc03.iloc[:, [0]].to_numpy().flatten(),  y_pred_sub_k[:, 0:1].flatten())

#delong
print("delong pvalue SNU:")
delong_roc_test(aya_test03.iloc[:, [0]].astype(int).to_numpy().flatten(),  single_aki,  y_pred[:, 0:1].flatten() )
print("delong pvalue nowon:")
delong_roc_test(aya_nowon03.iloc[:, [0]].astype(int).to_numpy().flatten() , aya_nowon_pred, y_pred_sub_n[:, 0:1].flatten())
print("delong pvalue KMC:")
delong_roc_test(aya_kmc03.iloc[:, [0]].astype(int).to_numpy().flatten() , aya_kmc_pred, y_pred_sub_k[:, 0:1].flatten())

In [None]:
# Spline calibration for AKI of MTGBM

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.calibration import calibration_curve

def plot_calibration_curve_with_ci2(actual_labels, predicted_probabilities, n_bins=9, confidence=0.95, prefix=None):
    actual_labels = np.array(actual_labels)
    predicted_probabilities = np.array(predicted_probabilities)

    fraction_of_positives, mean_predicted_value = calibration_curve(
        actual_labels, predicted_probabilities, n_bins=n_bins, strategy='uniform'
    )

    bin_edges = np.linspace(0, 1, n_bins + 1)
    z = 1.96

    x_values = []
    y_values = []
    y_err = []

    for i in range(n_bins):
        bin_low, bin_high = bin_edges[i], bin_edges[i+1]
        mask = (predicted_probabilities >= bin_low) & (predicted_probabilities < bin_high)
        n = np.sum(mask)
        y = fraction_of_positives[i]
        x = mean_predicted_value[i]

        if n > 0:
            std_err = np.sqrt(y * (1 - y) / n)
            h = std_err * z
        else:
            h = 0

        x_values.append(x)
        y_values.append(y)
        y_err.append(h)

    sns.set_style('whitegrid')
    plt.figure(figsize=(14, 6))

    plt.subplot(1, 2, 1)
    plt.errorbar(
        x_values, y_values, yerr=y_err, fmt='o', color='tab:blue', ecolor='gray',
        capsize=4, markersize=7, linewidth=2, alpha=0.5, label="Spline Calibration"
    )
    plt.plot([0, 1], [0, 1], '--', color='gray', linewidth=2, label="Perfect Calibration")
    plt.xlabel("Predicted Probability", fontsize=14)
    plt.ylabel("Fraction of Positives", fontsize=14)
    plt.title(f"Spline-Scaled Calibration Curve with 95% CI (Bins: {n_bins})", fontsize=16)
    plt.xlim(0.0, 1.0)
    plt.ylim(0.0, 1.0)
    plt.legend(loc="upper left", fontsize=12)

    plt.subplot(1, 2, 2)
    plt.hist(predicted_probabilities, bins=20, alpha=0.7, color='tab:blue', edgecolor='black')
    plt.xlabel("Calibrated Probability", fontsize=14)
    plt.ylabel("Count", fontsize=14)
    plt.title("Calibrated Probability Distribution", fontsize=16)

    plt.tight_layout()

    if prefix is not None:
        plt.savefig(f"{prefix}_calibration_curve.png", dpi=300)

    plt.show()


splinecalib = mli.SplineCalib()
splinecalib.fit(forthval, y1_train)

calibrated_y_pred = splinecalib.predict(y_pred[:, 0:1].flatten())
calibrated_y_pred_sub_n = splinecalib.predict(y_pred_sub_n[:, 0:1].flatten())
calibrated_y_pred_sub_k = splinecalib.predict(y_pred_sub_k[:, 0:1].flatten())

plot_calibration_curve_with_ci2(aya_test03.iloc[:, [0]].to_numpy().flatten(), calibrated_y_pred, prefix="mtgbm_snu_aki")
plot_calibration_curve_with_ci2(aya_nowon03.iloc[:, [0]].to_numpy().flatten(), calibrated_y_pred_sub_n, prefix="mtgbm_nowon_aki")
plot_calibration_curve_with_ci2(aya_kmc03.iloc[:, [0]].to_numpy().flatten(), calibrated_y_pred_sub_k, prefix="mtgbm_kmc_aki")


In [None]:
## SPline calibration for PRF of MTGBM

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.calibration import calibration_curve

def plot_calibration_curve_with_ci2(actual_labels, predicted_probabilities, n_bins=9, confidence=0.95, prefix=None):
    actual_labels = np.array(actual_labels)
    predicted_probabilities = np.array(predicted_probabilities)

    fraction_of_positives, mean_predicted_value = calibration_curve(
        actual_labels, predicted_probabilities, n_bins=n_bins, strategy='uniform'
    )

    bins_length = len(fraction_of_positives)
    bin_edges = np.linspace(0, 1, n_bins + 1)
    z = 1.96

    x_values = []
    y_values = []
    y_err = []

    for i in range(bins_length):
        bin_low = bin_edges[i]
        bin_high = bin_edges[i+1]
        mask = (predicted_probabilities >= bin_low) & (predicted_probabilities < bin_high)
        n = np.sum(mask)
        y = fraction_of_positives[i]
        x = mean_predicted_value[i]

        if n > 0:
            std_err = np.sqrt(y * (1 - y) / n)
            h = std_err * z
        else:
            h = 0

        x_values.append(x)
        y_values.append(y)
        y_err.append(h)

    sns.set_style('whitegrid')
    plt.figure(figsize=(14, 6))

    plt.subplot(1, 2, 1)
    plt.errorbar(
        x_values, y_values, yerr=y_err, fmt='o', color='tab:blue', ecolor='gray',
        capsize=4, markersize=7, linewidth=2, alpha=0.5, label="Spline Calibration"
    )
    plt.plot([0, 1], [0, 1], '--', color='gray', linewidth=2, label="Perfect Calibration")
    plt.xlabel("Predicted Probability", fontsize=14)
    plt.ylabel("Fraction of Positives", fontsize=14)
    plt.title(f"Spline-Scaled Calibration Curve with 95% CI (Bins: {bins_length})", fontsize=16)
    plt.xlim(0.0, 1.0)
    plt.ylim(0.0, 1.0)
    plt.legend(loc="upper left", fontsize=12)

    plt.subplot(1, 2, 2)
    plt.hist(predicted_probabilities, bins=20, alpha=0.7, color='tab:blue', edgecolor='black')
    plt.xlabel("Calibrated Probability", fontsize=14)
    plt.ylabel("Count", fontsize=14)
    plt.title("Calibrated Probability Distribution", fontsize=16)

    plt.tight_layout()

    if prefix is not None:
        plt.savefig(f"{prefix}_calibration_curve.png", dpi=300)

    plt.show()

splinecalib = mli.SplineCalib()
splinecalib.fit(forthval, y1_train)

calibrated_y_pred = splinecalib.predict(y_pred[:, 1:2].flatten())
calibrated_y_pred_sub_n = splinecalib.predict(y_pred_sub_n[:, 1:2].flatten())
calibrated_y_pred_sub_k = splinecalib.predict(y_pred_sub_k[:, 1:2].flatten())

plot_calibration_curve_with_ci2(aya_test03.iloc[:, [0]].to_numpy().flatten(), calibrated_y_pred, prefix="mtgbm_snu_prf")
plot_calibration_curve_with_ci2(aya_nowon03.iloc[:, [0]].to_numpy().flatten(), calibrated_y_pred_sub_n, prefix="mtgbm_nowon_prf")
plot_calibration_curve_with_ci2(aya_kmc03.iloc[:, [0]].to_numpy().flatten(), calibrated_y_pred_sub_k, prefix="mtgbm_kmc_prf")


In [None]:
# Spline calibration for inhos mortality of MTGBM

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.calibration import calibration_curve

def plot_calibration_curve_with_ci2(actual_labels, predicted_probabilities, n_bins=9, confidence=0.95, prefix=None):
    actual_labels = np.array(actual_labels)
    predicted_probabilities = np.array(predicted_probabilities)

    fraction_of_positives, mean_predicted_value = calibration_curve(
        actual_labels, predicted_probabilities, n_bins=n_bins, strategy='uniform'
    )

    bins_length = len(fraction_of_positives)
    bin_edges = np.linspace(0, 1, n_bins + 1)
    z = 1.96

    x_values = []
    y_values = []
    y_err = []

    for i in range(bins_length):
        bin_low = bin_edges[i]
        bin_high = bin_edges[i+1]
        mask = (predicted_probabilities >= bin_low) & (predicted_probabilities < bin_high)
        n = np.sum(mask)
        y = fraction_of_positives[i]
        x = mean_predicted_value[i]

        if n > 0:
            std_err = np.sqrt(y * (1 - y) / n)
            h = std_err * z
        else:
            h = 0

        x_values.append(x)
        y_values.append(y)
        y_err.append(h)

    sns.set_style('whitegrid')
    plt.figure(figsize=(14, 6))

    plt.subplot(1, 2, 1)
    plt.errorbar(
        x_values, y_values, yerr=y_err, fmt='o', color='tab:blue', ecolor='gray',
        capsize=4, markersize=7, linewidth=2, alpha=0.5, label="Spline Calibration"
    )
    plt.plot([0, 1], [0, 1], '--', color='gray', linewidth=2, label="Perfect Calibration")
    plt.xlabel("Predicted Probability", fontsize=14)
    plt.ylabel("Fraction of Positives", fontsize=14)
    plt.title(f"Spline-Scaled Calibration Curve with 95% CI (Bins: {bins_length})", fontsize=16)
    plt.xlim(0.0, 1.0)
    plt.ylim(0.0, 1.0)
    plt.legend(loc="upper left", fontsize=12)

    plt.subplot(1, 2, 2)
    plt.hist(predicted_probabilities, bins=20, alpha=0.7, color='tab:blue', edgecolor='black')
    plt.xlabel("Calibrated Probability", fontsize=14)
    plt.ylabel("Count", fontsize=14)
    plt.title("Calibrated Probability Distribution", fontsize=16)

    plt.tight_layout()

    if prefix is not None:
        plt.savefig(f"{prefix}_calibration_curve.png", dpi=300)

    plt.show()

splinecalib = mli.SplineCalib()
splinecalib.fit(forthval, y1_train)

calibrated_y_pred = splinecalib.predict(y_pred[:, 2:3].flatten())
calibrated_y_pred_sub_n = splinecalib.predict(y_pred_sub_n[:, 2:3].flatten())
calibrated_y_pred_sub_k = splinecalib.predict(y_pred_sub_k[:, 2:3].flatten())

plot_calibration_curve_with_ci2(aya_test03.iloc[:, [0]].to_numpy().flatten(), calibrated_y_pred, prefix="mtgbm_snu_inhos")
plot_calibration_curve_with_ci2(aya_nowon03.iloc[:, [0]].to_numpy().flatten(), calibrated_y_pred_sub_n, prefix="mtgbm_nowon_inhos")
plot_calibration_curve_with_ci2(aya_kmc03.iloc[:, [0]].to_numpy().flatten(), calibrated_y_pred_sub_k, prefix="mtgbm_kmc_inhos")


In [None]:
print("AKI - delong pvalue SNU:")
delong_roc_test(aya_test03.iloc[:, [0]].astype(int).to_numpy().flatten(),  single_aki,  y_pred[:, 0:1].flatten() )
print("AKI - delong pvalue nowon:")
delong_roc_test(aya_nowon03.iloc[:, [0]].astype(int).to_numpy().flatten() , aya_nowon_pred, y_pred_sub_n[:, 0:1].flatten())
print("AKI - delong pvalue KMC:")
delong_roc_test(aya_kmc03.iloc[:, [0]].astype(int).to_numpy().flatten() , aya_kmc_pred, y_pred_sub_k[:, 0:1].flatten())

In [None]:
print("PRF - delong pvalue SNU_prf:")
delong_roc_test(aya_test03.iloc[:, [1]].astype(int).to_numpy().flatten(),  single_prf,  y_pred[:, 1:2].flatten() )
print("PRF - delong pvalue nowon_prf:")
delong_roc_test(aya_nowon03.iloc[:, [1]].astype(int).to_numpy().flatten() , aya_nowon_pred3, y_pred_sub_n[:, 1:2].flatten())
print("PRF - delong pvalue KMC_prf:")
delong_roc_test(aya_kmc03.iloc[:, [1]].astype(int).to_numpy().flatten() , aya_kmc_pred3, y_pred_sub_k[:, 1:2].flatten())


In [None]:
print("Inhos mortality - delong pvalue SNU_inhos:")
delong_roc_test(aya_test03.iloc[:, [2]].astype(int).to_numpy().flatten(),  single_inhos,  y_pred[:, 2:3].flatten() )
print("Inhos mortality - delong pvalue nowon_inhos:")
delong_roc_test(aya_nowon03.iloc[:, [2]].astype(int).to_numpy().flatten() , aya_nowon_pred4, y_pred_sub_n[:, 2:3].flatten())
print("Inhos mortality - delong pvalue KMC_inhos:")
delong_roc_test(aya_kmc03.iloc[:, [2]].astype(int).to_numpy().flatten() , aya_kmc_pred4, y_pred_sub_k[:, 2:3].flatten())

In [None]:
val_mt1_aki = auc_plot(aya_test03.iloc[:, [0]], y_pred[:, 0:1], 'mt_total_aki_snu' ,mt_thold[0][0])
val_mt1_aki_n =auc_plot(aya_nowon03.iloc[:, [0]], y_pred_sub_n[:, 0:1],  'mt_total_aki_nowon',mt_thold[0][0])
val_mt1_aki_k =auc_plot(aya_kmc03.iloc[:, [0]], y_pred_sub_k[:, 0:1],  'mt_total_aki_kmc',mt_thold[0][0])

val_mt1_prf = auc_plot(aya_test03.iloc[:, [1]],  y_pred[:, 1:2], 'mt_total_prf_snu',mt_thold[0][1])
val_mt1_prf_n =auc_plot(aya_nowon03.iloc[:, [1]], y_pred_sub_n[:, 1:2],  'mt_total_prf_nowon',mt_thold[0][1])
val_mt1_prf_k =auc_plot(aya_kmc03.iloc[:, [1]],  y_pred_sub_k[:, 1:2],  'mt_total_prf_kmc',mt_thold[0][1])

val_mt1_inhos = auc_plot(aya_test03.iloc[:, [2]],  y_pred[:, 2:3], 'mt_total_inhos_snu',mt_thold[0][2])
val_mt1_inhos_n =auc_plot(aya_nowon03.iloc[:, [2]], y_pred_sub_n[:, 2:3],  'mt_total_inhos_nowon',mt_thold[0][2])
val_mt1_inhos_k =auc_plot(aya_kmc03.iloc[:, [2]],  y_pred_sub_k[:, 2:3], 'mt_total_inhos_kmc',mt_thold[0][2])

result = pd.concat([result, val_mt1_aki])
result = pd.concat([result, val_mt1_aki_n])
result = pd.concat([result, val_mt1_aki_k])

result = pd.concat([result, val_mt1_prf])
result = pd.concat([result, val_mt1_prf_n])
result = pd.concat([result, val_mt1_prf_k])

result = pd.concat([result, val_mt1_inhos])
result = pd.concat([result, val_mt1_inhos_n])
result = pd.concat([result, val_mt1_inhos_k])

result