# Mortality Prediction

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.utils.data
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.tests.test_x13 import dataset
from torch.utils.data import Dataset

seed = 804

In [2]:
class MIMICDATASET(Dataset):
    def __init__(self, x_t, x_s, y, train=None, transform=None):
        # Transform
        self.transform = transform
        self.train = train
        self.xt = x_t
        self.xs = x_s
        self.y = y

    def return_data(self):
        return self.xt, self.xs, self.label

    def __len__(self):
        return len(self.xt)

    def __getitem__(self, idx):
        sample = self.xt[idx]
        stat = self.xs[idx]
        sample_y = self.y[idx]
        return sample, stat, sample_y

In [3]:
def RF_evaluation(test_S, test_X, test_y):
    X_all = np.hstack([test_S, test_X])
    y_all = test_y
    
    scaler = MinMaxScaler()
    X_all = scaler.fit_transform(X_all)

    print(X_all.shape, y_all.shape)
    Xtr, Xte, ytr, yte = train_test_split(X_all, y_all, stratify=y_all, random_state=1)
    
    
    Xtr = scaler.fit_transform(Xtr)
    Xte = scaler.transform(Xte)

    clf = RandomForestClassifier(random_state=1)
    clf.fit(Xtr, ytr)

    y_pred = clf.predict(Xte)
    print('Test Accuracy:', metrics.accuracy_score(yte, y_pred))

    y_prob = clf.predict_proba(Xte)[:, 1]
    score = metrics.roc_auc_score(yte, y_prob)
    print('Test AUROC score:', score)

In [4]:
def syn_data(model_name, dataset):
    model_list = ["vae", "my_vae", "medDiff", "flexgen"]
    if model_name not in model_list:
        raise ValueError(f"model_name must be in {model_list}")
    folder = {
        "MIMIC": "Synthetic_MIMIC",
        "eICU": "Synthetic_eICU"
    }
    static_npy_path = folder[dataset] + "/" + model_name + "_static.npy"
    temporal_npy_path = folder[dataset] + "/" + model_name + "_temporal.npy"

    test_S = np.load(static_npy_path)
    test_X = np.load(temporal_npy_path)

    return test_S, test_X

In [5]:
def MMD(x, y, kernel='rbf', gamma=1.0):
    """
    GPU 가속을 위한 PyTorch 기반 MMD 구현.
    
    Parameters:
    - x: PyTorch tensor of shape (n_samples_x, n_features)
    - y: PyTorch tensor of shape (n_samples_y, n_features)
    - kernel: string, type of kernel to use ('linear', 'poly', 'rbf')
    - gamma: float, parameter for the RBF kernel
    
    Returns:
    - mmd: float, the MMD value
    """

    x = torch.tensor(x)
    y = torch.tensor(y)

    x = x.to(torch.device('cuda'))
    y = y.to(torch.device('cuda'))

    if kernel == 'linear':
        K_xx = torch.matmul(x, x.T)
        K_yy = torch.matmul(y, y.T)
        K_xy = torch.matmul(x, y.T)
    elif kernel == 'poly':
        K_xx = (torch.matmul(x, x.T) + 1) ** 2
        K_yy = (torch.matmul(y, y.T) + 1) ** 2
        K_xy = (torch.matmul(x, y.T) + 1) ** 2
    elif kernel == 'rbf':
        K_xx = torch.exp(-gamma * torch.cdist(x, x, p=2) ** 2)
        K_yy = torch.exp(-gamma * torch.cdist(y, y, p=2) ** 2)
        K_xy = torch.exp(-gamma * torch.cdist(x, y, p=2) ** 2)
    else:
        raise ValueError("Unsupported kernel type. Choose from 'linear', 'poly', or 'rbf'.")

    mmd = torch.mean(K_xx) + torch.mean(K_yy) - 2 * torch.mean(K_xy)
   
    return mmd.item()


def threshold_finder(orign, syn):
    best_MMD = 100
    best_threshold = 0
    for i in range(10, 90):
        temp_syn = syn.copy()
        threshold = i / 100
        temp_syn[temp_syn >= threshold] = 1
        temp_syn[temp_syn < threshold] = 0
        mmd = MMD(orign, temp_syn)
        if mmd < best_MMD:
            best_MMD = mmd
            best_threshold = threshold
    return best_MMD, best_threshold

## Original

In [17]:
print("MIMIC-III")
ori_MIMIC_s = np.load('FIDDLE_MIMIC/features/mortality_48h/s.npz')
ori_MIMIC_t = np.load('FIDDLE_MIMIC/features/mortality_48h/X.npz')
ori_MIMIC_s = torch.sparse_coo_tensor(torch.tensor(ori_MIMIC_s['coords']),
                                      torch.tensor(ori_MIMIC_s['data'])).to_dense().to(torch.float32)
ori_MIMIC_t = torch.sparse_coo_tensor(torch.tensor(ori_MIMIC_t['coords']),
                                      torch.tensor(ori_MIMIC_t['data'])).to_dense().to(torch.float32)
df_pop = pd.read_csv('FIDDLE_MIMIC/population/mortality_48h.csv')
ori_MIMIC_y = torch.tensor(df_pop["mortality_LABEL"].values).to(torch.float32)

ori_MIMIC_t = ori_MIMIC_t.mean(dim=1)
RF_evaluation(ori_MIMIC_s, ori_MIMIC_t, ori_MIMIC_y)

print("eICU")
ori_eicu_s = np.load('FIDDLE_eICU/features/mortality_48h/s.npz')
ori_eicu_t = np.load('FIDDLE_eICU/features/mortality_48h/X.npz')
ori_eicu_s = torch.sparse_coo_tensor(torch.tensor(ori_eicu_s['coords']),
                                     torch.tensor(ori_eicu_s['data'])).to_dense().to(torch.float32)
ori_eicu_t = torch.sparse_coo_tensor(torch.tensor(ori_eicu_t['coords']),
                                     torch.tensor(ori_eicu_t['data'])).to_dense().to(torch.float32)
df_pop = pd.read_csv('FIDDLE_eICU/population/mortality_48h.csv')
ori_eicu_y = torch.tensor(df_pop["mortality_LABEL"].values).to(torch.float32)

ori_eicu_t = ori_eicu_t.mean(dim=1)
RF_evaluation(ori_eicu_s, ori_eicu_t, ori_eicu_y)

MIMIC-III
(8577, 7403) torch.Size([8577])
Test Accuracy: 0.88997668997669
Test AUROC score: 0.8625407212958512
eICU
(77066, 2528) torch.Size([77066])
Test Accuracy: 0.8875278974412207
Test AUROC score: 0.7893212449609718


## VAE

In [11]:
print("MIMIC-III")
test_S, test_X = syn_data("vae", dataset="MIMIC")
df_pop = pd.read_csv('FIDDLE_MIMIC/population/mortality_48h.csv')
ori_MIMIC_y = torch.tensor(df_pop["mortality_LABEL"].values).to(torch.float32)
RF_evaluation(test_S, test_X, ori_MIMIC_y)
print("eICU")
test_S, test_X = syn_data("vae", dataset="eICU")
df_pop = pd.read_csv('FIDDLE_eICU/population/mortality_48h.csv')
ori_eicu_y = torch.tensor(df_pop["mortality_LABEL"].values).to(torch.float32)
RF_evaluation(test_S, test_X, ori_eicu_y)

MIMIC-III
(8577, 7403) torch.Size([8577])
Test Accuracy: 1.0
Test AUROC score: 1.0
eICU
(77066, 2528) torch.Size([77066])
Test Accuracy: 1.0
Test AUROC score: 1.0


## MY_VAE

In [6]:
print("MIMIC-III")
test_S, test_X = syn_data("my_vae", dataset="MIMIC")
y = np.concatenate((np.zeros(1000), np.ones(1000)))
RF_evaluation(test_S, test_X, y)

# test_S, test_X = syn_data("my_vae", dataset="eICU")
# y = np.concatenate((np.zeros(10000), np.ones(10000)))
# RF_evaluation(test_S, test_X, y)

MIMIC-III
(2000, 7403) (2000,)
Test Accuracy: 0.6
Test AUROC score: 0.6463199999999999
(20000, 2528) (20000,)
Test Accuracy: 1.0
Test AUROC score: 1.0


In [12]:
ori_MIMIC_s = np.load('FIDDLE_MIMIC/features/mortality_48h/s.npz')
ori_MIMIC_t = np.load('FIDDLE_MIMIC/features/mortality_48h/X.npz')
ori_MIMIC_s = torch.sparse_coo_tensor(torch.tensor(ori_MIMIC_s['coords']), torch.tensor(ori_MIMIC_s['data'])).to_dense().to(torch.float32)
ori_MIMIC_t = torch.sparse_coo_tensor(torch.tensor(ori_MIMIC_t['coords']), torch.tensor(ori_MIMIC_t['data'])).to_dense().to(torch.float32)
ori_MIMIC_t = ori_MIMIC_t.mean(dim=1)
test_S, test_X = syn_data("my_vae", "MIMIC")

MMD(ori_MIMIC_s.numpy(), test_S), MMD(ori_MIMIC_t.numpy(), test_X)

(0.5280638337135315, 0.6655136942863464)

In [16]:
ori_MIMIC_t[:100, :10]

tensor([[0.9583, 0.9375, 0.9375, 0.9375, 0.9167, 0.9375, 0.0833, 0.0000, 0.0000,
         0.0833],
        [0.9792, 0.9583, 0.9792, 0.9792, 0.9792, 0.9583, 0.1250, 0.0000, 0.0000,
         0.1250],
        [0.8958, 0.8750, 0.8958, 0.8958, 0.8750, 0.8750, 0.0833, 0.0000, 0.0208,
         0.0625],
        [0.9167, 0.8958, 0.8958, 0.8542, 0.7708, 0.8958, 0.1042, 0.0000, 0.0000,
         0.1042],
        [0.8750, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.1042, 0.0000, 0.1042,
         0.0000],
        [0.9167, 0.9167, 0.9167, 0.9167, 0.9167, 0.9167, 0.1042, 0.0000, 0.1042,
         0.0000],
        [0.8333, 0.8542, 0.8542, 0.8542, 0.8542, 0.8542, 0.0625, 0.0000, 0.0208,
         0.0417],
        [0.9792, 0.9375, 0.9583, 0.9583, 0.9583, 0.9375, 0.0625, 0.0000, 0.0000,
         0.0625],
        [0.9583, 0.8750, 0.8750, 0.8750, 0.8958, 0.8750, 0.1042, 0.0000, 0.0625,
         0.0417],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.1250, 0.0000, 0.1250,
         0.0000],
        [1

In [15]:
test_X[:100, :10]

array([[0.9328672 , 0.91804504, 0.946497  , 0.9486718 , 0.939298  ,
        0.9327442 , 0.09669938, 0.01345006, 0.04744392, 0.0472494 ],
       [0.91591406, 0.9158869 , 0.9604852 , 0.9557722 , 0.9470598 ,
        0.9411216 , 0.08524165, 0.01434674, 0.05591734, 0.03618789],
       [0.9340429 , 0.9151902 , 0.96340686, 0.95398116, 0.91384655,
        0.91236055, 0.09359962, 0.01166431, 0.04597577, 0.03883178],
       [0.9419963 , 0.9122335 , 0.95667154, 0.95735687, 0.92741156,
        0.90471274, 0.08541238, 0.00904276, 0.05153199, 0.03608669],
       [0.95152855, 0.9298215 , 0.9672093 , 0.9563402 , 0.9521994 ,
        0.90420204, 0.10297178, 0.01039687, 0.06603692, 0.04462571],
       [0.93946576, 0.9151437 , 0.9382967 , 0.9436095 , 0.9479887 ,
        0.919246  , 0.08730678, 0.01659382, 0.05713613, 0.03299408],
       [0.9391619 , 0.9178938 , 0.9395542 , 0.921718  , 0.9298537 ,
        0.903115  , 0.11884213, 0.02058709, 0.06253508, 0.03059905],
       [0.9401606 , 0.9499027 , 0.9390171

In [14]:
test_S, test_X = syn_data("my_vae", "MIMIC")
ori_MIMIC_s[:, :10].mean(axis=0), test_S[:, :10].mean(axis=0)

(tensor([0.3823, 0.3021, 0.1519, 0.1575, 0.0017, 0.0043, 0.1253, 0.8630, 0.0117,
         0.2001]),
 array([0.3733084 , 0.29335558, 0.29125702, 0.22213016, 0.20040773,
        0.25701445, 0.20888934, 0.7676358 , 0.20715517, 0.30511987],
       dtype=float32))

## MedDiff

In [None]:
print("MIMIC-III")
test_S, test_X = syn_data("medDiff", "MIMIC")
y = np.concatenate((np.zeros(1000), np.ones(1000)))
RF_evaluation(test_S, test_X, y)
print("eICU")
test_S, test_X = syn_data("medDiff", "eICU")
RF_evaluation(test_S, test_X, y)

## FlexGen

In [10]:
print("MIMIC-III")
test_S, test_X = syn_data("flexgen", "MIMIC")
y = np.concatenate((np.zeros(1000), np.ones(1000)))
RF_evaluation(test_S, test_X, y)
# print("eICU")
# test_S, test_X = syn_data("flexgen", "eICU")
# RF_evaluation(test_S, test_X, y)

MIMIC-III
(2000, 7403) (2000,)
Test Accuracy: 0.684
Test AUROC score: 0.7356879999999999


In [13]:
test_S, test_X = syn_data("flexgen", "MIMIC")
MMD(ori_MIMIC_s.numpy(), test_S), MMD(ori_MIMIC_t.numpy(), test_X)

(0.5156745910644531, 0.6836368441581726)