In [3]:
import numpy as np
import dill
import torch
from Utils.masking_service import make_missing_mask, make_indicating_mask

In [6]:
data = np.load('data_split.npy', allow_pickle=True)

In [7]:
data[()]['heart']

array([[[         nan,          nan,          nan, ...,          nan,
                  nan,          nan],
        [         nan,          nan,          nan, ...,          nan,
                  nan,          nan],
        [         nan,          nan,          nan, ...,          nan,
                  nan,          nan],
        ...,
        [         nan,          nan,          nan, ...,          nan,
                  nan,          nan],
        [         nan,          nan,          nan, ...,          nan,
                  nan,          nan],
        [         nan,          nan,          nan, ...,          nan,
                  nan,          nan]],

       [[ 37.5       ,  36.88890076,          nan, ...,          nan,
                  nan,          nan],
        [         nan,          nan,          nan, ...,          nan,
                  nan,          nan],
        [         nan,          nan,          nan, ...,          nan,
                  nan,          nan],
        ...,


In [22]:
heart_data_raw = data[()]['heart']
no_heart_data_raw = data[()]['no_heart']

In [23]:
heart_data_raw.shape, no_heart_data_raw.shape

((16054, 10, 48), (36847, 10, 48))

In [12]:
16054 + 36847

52901

In [24]:
heart_data_raw = torch.tensor(heart_data_raw, dtype= torch.float32)
no_heart_data_raw = torch.tensor(no_heart_data_raw, dtype= torch.float32)
heart_data_raw = torch.transpose(heart_data_raw,1,2)[:,:,0:4]
no_heart_data_raw = torch.transpose(no_heart_data_raw,1,2)[:,:,0:4]

In [25]:
heart_data_raw.shape, no_heart_data_raw.shape

(torch.Size([16054, 48, 4]), torch.Size([36847, 48, 4]))

In [26]:
n_heart = heart_data_raw.shape[0]
n_no_heart = no_heart_data_raw.shape[0]
heart_data_raw[0], no_heart_data_raw[0]

In [27]:
# shuffling
perm_heart = torch.randperm(n_heart)
perm_no_heart = torch.randperm(n_no_heart)
heart_data_raw = heart_data_raw[perm_heart]
no_heart_data_raw = no_heart_data_raw[perm_no_heart]
heart_data_raw.shape, no_heart_data_raw.shape, heart_data_raw[0], no_heart_data_raw[0]

(torch.Size([16054, 48, 4]), torch.Size([36847, 48, 4]))

In [28]:
all_data_raw = torch.cat((no_heart_data_raw,heart_data_raw),0)
all_data_raw.shape

torch.Size([52901, 48, 4])

In [None]:
# check if there are non-unique datapoints
torch.unique(all_data_raw, dim=0).shape[0], all_data_raw.shape[0]

In [None]:
mean_all = torch.nanmean(all_data_raw,[0,1])
std_all = torch.sqrt(torch.nanmean((all_data_raw-mean_all)*(all_data_raw-mean_all), [0,1]))
mean_all, std_all

In [None]:
# normalizing
all_data = (all_data_raw-mean_all)/std_all
heart_data = (heart_data_raw-mean_all)/std_all
no_heart_data = (no_heart_data_raw-mean_all)/std_all

In [None]:
# sanity check, values should be near 0,1,0,1,0,1 whereas the last 4 values measure the difference between all_ICU and HEART_ONLY
mean_all_temp = torch.nanmean(all_data,[0,1])
std_all_temp = torch.sqrt(torch.nanmean((all_data-mean_all_temp)*(all_data-mean_all_temp), [0,1]))
mean_heart_temp = torch.nanmean(heart_data,[0,1])
std_heart_temp = torch.sqrt(torch.nanmean((heart_data-mean_heart_temp)*(heart_data-mean_heart_temp), [0,1]))
mean_no_heart_temp = torch.nanmean(no_heart_data,[0,1])
std_no_heart_temp = torch.sqrt(torch.nanmean((no_heart_data-mean_no_heart_temp)*(no_heart_data-mean_no_heart_temp), [0,1]))
mean_heart_temp, std_all_temp, mean_heart_temp, std_heart_temp, mean_no_heart_temp, std_no_heart_temp

In [None]:
# splitting heart
train_set_heart =heart_data[0: int(n_heart*0.6)]
test_set_heart =heart_data[int(n_heart*0.6):int(n_heart * 0.8)]
validation_set_heart =heart_data[int(n_heart*0.8):]
train_set_heart.shape, test_set_heart.shape, validation_set_heart.shape

In [None]:
# splitting no_heart
train_set_no_heart =no_heart_data[0: int(n_no_heart*0.6)]
test_set_no_heart = no_heart_data[int(n_no_heart*0.6):int(n_no_heart * 0.8)]
validation_set_no_heart =no_heart_data[int(n_no_heart*0.8):]
train_set_no_heart.shape, test_set_no_heart.shape, validation_set_no_heart.shape

In [None]:
# masking heart
p = 0.1
missing_mask_train_heart = make_missing_mask(train_set_heart)
missing_mask_test_heart = make_missing_mask(test_set_heart)
missing_mask_validation_heart = make_missing_mask(validation_set_heart)
indicating_mask_train_heart = make_indicating_mask(missing_mask_train_heart,p)
indicating_mask_test_heart = make_indicating_mask(missing_mask_test_heart,p)
indicating_mask_validation_heart = make_indicating_mask(missing_mask_validation_heart,p)
# missing mask doesnt know about indication masking till now
missing_mask_train_heart = missing_mask_train_heart- indicating_mask_train_heart
missing_mask_validation_heart = missing_mask_validation_heart-indicating_mask_validation_heart
missing_mask_test_heart = missing_mask_test_heart- indicating_mask_test_heart

In [None]:
# masking no_heart
missing_mask_train_no_heart = make_missing_mask(train_set_no_heart)
missing_mask_test_no_heart = make_missing_mask(test_set_no_heart)
missing_mask_validation_no_heart = make_missing_mask(validation_set_no_heart)
indicating_mask_train_no_heart = make_indicating_mask(missing_mask_train_no_heart,p)
indicating_mask_test_no_heart = make_indicating_mask(missing_mask_test_no_heart,p)
indicating_mask_validation_no_heart = make_indicating_mask(missing_mask_validation_no_heart,p)
# missing mask doesnt know about indication masking till now
missing_mask_train_no_heart = missing_mask_train_no_heart- indicating_mask_train_no_heart
missing_mask_validation_no_heart = missing_mask_validation_no_heart-indicating_mask_validation_no_heart
missing_mask_test_no_heart = missing_mask_test_no_heart- indicating_mask_test_no_heart

In [None]:
# setting NaN to zero
train_set_heart[torch.isnan(train_set_heart)] = 0
test_set_heart[torch.isnan(test_set_heart)] = 0
validation_set_heart[torch.isnan(validation_set_heart)] = 0
train_set_no_heart[torch.isnan(train_set_no_heart)] = 0
test_set_no_heart[torch.isnan(test_set_no_heart)] = 0
validation_set_no_heart[torch.isnan(validation_set_no_heart)] = 0

In [None]:
# putting together ALL_ICU
train_set_all = torch.cat((train_set_heart,train_set_no_heart),0)
test_set_all = torch.cat((test_set_heart, test_set_no_heart),0)
validation_set_all = torch.cat((validation_set_heart, validation_set_no_heart))
missing_mask_train_all = torch.cat((missing_mask_train_heart, missing_mask_train_no_heart), 0)
missing_mask_test_all = torch.cat((missing_mask_test_heart, missing_mask_test_no_heart),0)
missing_mask_validation_all = torch.cat((missing_mask_validation_heart, missing_mask_validation_no_heart), 0)
indicating_mask_train_all = torch.cat((indicating_mask_train_heart, indicating_mask_train_no_heart),0)
indicating_mask_test_all = torch.cat((indicating_mask_test_heart, indicating_mask_test_no_heart),0)
indicating_mask_validation_all = torch.cat((indicating_mask_validation_heart, indicating_mask_validation_no_heart),0)

In [None]:
# saving the data
# saving heart_only first
data_dic_heart = {
    'train_set': train_set_heart,
    'missing_mask_train': missing_mask_train_heart,
    'indicating_mask_train': indicating_mask_train_heart,
    'test_set': test_set_heart,
    'missing_mask_test': missing_mask_test_heart,
    'indicating_mask_test': indicating_mask_test_heart,
    'validation_set': validation_set_heart,
    'missing_mask_validation': missing_mask_validation_heart,
    'indicating_mask_validation': indicating_mask_validation_heart,
    'percentage_indicating': p,
    'description': 'Heart data only first for features, normalized featurewise by substracting the mean of ALL_ICU and scaling by the inverse of the standardvariance of ALL_ICU, split compatible with all_icu_compatible'
}
torch.save(data_dic_heart, 'heart_only_compatible')

data_dic_all = {
    'train_set': train_set_all,
    'missing_mask_train': missing_mask_train_all,
    'indicating_mask_train': indicating_mask_train_all,
    'test_set': test_set_all,
    'missing_mask_test': missing_mask_test_all,
    'indicating_mask_test': indicating_mask_test_all,
    'validation_set': validation_set_all,
    'missing_mask_validation': missing_mask_validation_all,
    'indicating_mask_validation': indicating_mask_validation_all,
    'percentage_indicating': p,
    'description': 'all icu first four features, normalized featurewise by substracting the mean of ALL_ICU and scaling by the inverse of the standardvariance of ALL_ICU, split compatible with heart_only_compatible'
}
torch.save(data_dic_all, 'all_icu_compatible')