In [1]:
# autoreload
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'

import tensorflow as tf
import sys
import numpy as np
import pandas as pd

In [3]:
mimic_iv_path = "/data/wang/junh/datasets/physionet.org/files/mimiciv/2.2"
mm_dir = "/data/wang/junh/datasets/multimodal"

output_dir = os.path.join(mm_dir, "preprocessing")
os.makedirs(output_dir, exist_ok=True)

In [4]:
f_path = os.path.join(mimic_iv_path, "hosp", "admissions.csv.gz")
admissions_df = pd.read_csv(f_path, low_memory=False)
admissions_df['admittime'] = pd.to_datetime(admissions_df['admittime'])
admissions_df['dischtime'] = pd.to_datetime(admissions_df['dischtime'])

icustays_df = pd.read_csv(os.path.join(mimic_iv_path, "icu", "icustays.csv.gz"), low_memory=False)
icustays_df['intime'] = pd.to_datetime(icustays_df['intime'])
icustays_df['outtime'] = pd.to_datetime(icustays_df['outtime'])

In [5]:
ecg_folder = '/data/wang/junh/datasets/physionet.org/files/mimic-iv-ecg/'

records_list_df = pd.read_csv(os.path.join(ecg_folder, 'record_list.csv'))
records_list_df['ecg_time'] = pd.to_datetime(records_list_df['ecg_time'])

In [6]:
def calc_time_delta_hrs(icu_intime, charttime):
    return (charttime - icu_intime).total_seconds() / 3600


row = icustays_df.iloc[0]

out_df = pd.DataFrame()
for index, row in icustays_df.iterrows():
    curr_subject_no = row['subject_id']
    curr_hadm_id = row['hadm_id']
    curr_stay_id = row['stay_id']
    curr_intime = row['intime']
    curr_outtime = row['outtime']

    # Check if subject has ECG data
    curr_subject_ecg = records_list_df[records_list_df['subject_id'] == curr_subject_no]
    curr_subject_ecg = curr_subject_ecg[curr_subject_ecg['ecg_time'] >= curr_intime]
    curr_subject_ecg = curr_subject_ecg[curr_subject_ecg['ecg_time'] <= curr_outtime]

    if curr_subject_ecg.shape[0] == 0:
        continue

    for ecg_index, ecg_row in curr_subject_ecg.iterrows():
        tmp_dict = {'subject_id': curr_subject_no,
                    'hadm_id': curr_hadm_id,
                    'stay_id': curr_stay_id,
                    'icu_time_delta': calc_time_delta_hrs(curr_intime, ecg_row['ecg_time']),
                    'ecg_time': ecg_row['ecg_time'],
                    'path': ecg_row['path']}
        tmp_df = pd.DataFrame(tmp_dict, index=[0])
        out_df = pd.concat([out_df, tmp_df], axis=0, ignore_index=True)

In [7]:
# import wfdb

# f_path = '/data/wang/junh/githubs/Multimodal-Transformer/attia_encoder_256.keras'
# encoder = tf.keras.models.load_model(f_path)

# def load_ecg(path, stop_index=4096):
#     rd_record = wfdb.rdrecord(path) 
#     sig = rd_record.p_signal
#     sig = sig[:stop_index, :]
#     return sig

# out_df['embeddings'] = None

# from tqdm import tqdm
# for index, row in tqdm(out_df.iterrows(), total=out_df.shape[0]):
#     curr_ecg_path = os.path.join(ecg_folder, row['path'])
#     wf = load_ecg(curr_ecg_path)
#     out_df.at[index, 'embeddings'] = encoder.predict(wf.reshape(1, -1, 12), verbose=0)

In [None]:
import tensorflow as tf
print(tf.__version__)

2.10.0


In [9]:
import wfdb
import tensorflow as tf
import json
from keras.models import model_from_json
import os
import numpy as np
from tqdm import tqdm
import h5py

# Load the model architecture from JSON file
model_architecture = '/data/wang/junh/githubs/Multimodal-Transformer/attia_encoder_256/config.json'
with open(model_architecture, 'r') as json_file:
    architecture = json.load(json_file)
    architecture_str = json.dumps(architecture)
    model = model_from_json(architecture_str)

# Load weights manually from the HDF5 file
weights_path = '/data/wang/junh/githubs/Multimodal-Transformer/attia_encoder_256/model.weights.h5'
with h5py.File(weights_path, 'r') as f:
    for layer in model.layers:
        layer_group = f['layers'].get(layer.name)
        if layer_group and 'vars' in layer_group:
            # Navigate to the 'vars' subgroup
            vars_group = layer_group['vars']
            # Collect weights assuming they are stored in the correct order under numbered keys
            layer_weights = [vars_group[str(i)][:] for i in range(len(vars_group))]
            layer.set_weights(layer_weights)
            print(f"Weights successfully loaded for layer: {layer.name}")

print("All weights loaded successfully into the model.")

# Function to load ECG
def load_ecg(path, stop_index=4096):
    rd_record = wfdb.rdrecord(path) 
    sig = rd_record.p_signal
    sig = sig[:stop_index, :]
    return sig

# Prepare for batch processing
batch_size = 32  # You can adjust the batch size depending on your GPU memory
ecg_batch = []
batch_indices = []
out_df['embeddings'] = None

# Process in batches
for index, row in tqdm(out_df.iterrows(), total=out_df.shape[0]):
    curr_ecg_path = os.path.join(ecg_folder, row['path'])
    wf = load_ecg(curr_ecg_path)
    ecg_batch.append(wf.reshape(1, -1, 12))
    batch_indices.append(index)

    # When batch is full, process it
    if len(ecg_batch) == batch_size:
        batch_ecgs = np.vstack(ecg_batch)
        embeddings = model.predict(batch_ecgs, verbose=0)

        # Assign embeddings to the correct rows
        for i, idx in enumerate(batch_indices):
            out_df.at[idx, 'embeddings'] = embeddings[i]

        # Reset for next batch
        ecg_batch = []
        batch_indices = []

# Process any remaining ECGs
if ecg_batch:
    batch_ecgs = np.vstack(ecg_batch)
    embeddings = model.predict(batch_ecgs, verbose=0)
    for i, idx in enumerate(batch_indices):
        out_df.at[idx, 'embeddings'] = embeddings[i]


Weights successfully loaded for layer: conv1d
Weights successfully loaded for layer: batch_normalization
Weights successfully loaded for layer: spatial_dropout1d
Weights successfully loaded for layer: max_pooling1d
Weights successfully loaded for layer: conv1d_1
Weights successfully loaded for layer: batch_normalization_1
Weights successfully loaded for layer: spatial_dropout1d_1
Weights successfully loaded for layer: max_pooling1d_1
Weights successfully loaded for layer: conv1d_2
Weights successfully loaded for layer: batch_normalization_2
Weights successfully loaded for layer: spatial_dropout1d_2
Weights successfully loaded for layer: max_pooling1d_2
Weights successfully loaded for layer: conv1d_3
Weights successfully loaded for layer: batch_normalization_3
Weights successfully loaded for layer: spatial_dropout1d_3
Weights successfully loaded for layer: max_pooling1d_3
Weights successfully loaded for layer: conv1d_4
Weights successfully loaded for layer: batch_normalization_4
Weights

100%|██████████| 72167/72167 [06:51<00:00, 175.38it/s]


In [10]:
mm_dir = "/data/wang/junh/datasets/multimodal"
output_dir = os.path.join(mm_dir, "preprocessing")

out_df.to_pickle(os.path.join(output_dir, "ecg_embeddings_icu.pkl"))