In [2]:
import numpy as np
import pandas as pd
import os
import h5py
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
matplotlib.style.use('ggplot')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Keras
import keras
from keras.models import Sequential
from keras import layers
from keras import optimizers
from keras import backend as K
from keras import regularizers

# Tensorflow
import tensorflow as tf
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())


from physionet_processing import fetch_data

from physionet_generator import DataGenerator

print('Tensorflow version:', tf.__version__)
print('Keras version:', keras.__version__)

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 2949086928834227690
xla_global_id: -1
]
Tensorflow version: 2.17.0
Keras version: 3.6.0


In [3]:
data_root = os.path.normpath("C:\\Users\\nazrulsa\\Desktop\\Data-Fusion\\")  # Base path for MIMIC dataset

records_file = os.path.join('.', 'records_filtered.csv')  

records_df = pd.read_csv(records_file)

label_set = list(sorted(records_df['icd_title'].unique()))
encoder = LabelEncoder().fit(label_set)
label_set_codings = encoder.transform(label_set)
records_df = records_df.assign(encoded=encoder.transform(records_df['icd_title']))

print('Unique labels:', encoder.inverse_transform(label_set_codings))
print('Unique codings:', label_set_codings)
print('Dataset labels:\n', records_df[['subject_id', 'icd_title', 'encoded']].head())


test_split = 0.33
idx = np.arange(records_df.shape[0])
id_train, id_val, _, _ = train_test_split(idx, idx, 
                                         test_size=test_split,
                                         shuffle=True,
                                         random_state=123)


partition = {
    'train': list(records_df.iloc[id_train]['study_id']),  
    'validation': list(records_df.iloc[id_val]['study_id'])
}


labels = dict(zip(records_df['study_id'], records_df['encoded']))


for idx in range(5):
    study_id = records_df.iloc[idx]['study_id']
    path = os.path.join(data_root, records_df.iloc[idx]['path'])  
    label = labels[study_id]
    print(f"Study ID: {study_id}, Path: {path}, Label: {label}")

Unique labels: ['Atrial Fibrillation' 'Myocardial infarction' 'Ventricular tachycardia']
Unique codings: [0 1 2]
Dataset labels:
    subject_id              icd_title  encoded
0    10007058  Myocardial infarction        1
1    10007058  Myocardial infarction        1
2    10007058  Myocardial infarction        1
3    10007058  Myocardial infarction        1
4    10007058  Myocardial infarction        1
Study ID: 41688028, Path: C:\Users\nazrulsa\Desktop\Data-Fusion\files/p1000/p10007058/s41688028/41688028, Label: 1
Study ID: 47527771, Path: C:\Users\nazrulsa\Desktop\Data-Fusion\files/p1000/p10007058/s47527771/47527771, Label: 1
Study ID: 47979034, Path: C:\Users\nazrulsa\Desktop\Data-Fusion\files/p1000/p10007058/s47979034/47979034, Label: 1
Study ID: 40778825, Path: C:\Users\nazrulsa\Desktop\Data-Fusion\files/p1000/p10007058/s40778825/40778825, Label: 1
Study ID: 49692275, Path: C:\Users\nazrulsa\Desktop\Data-Fusion\files/p1000/p10007058/s49692275/49692275, Label: 1
