In [2]:
import pickle
import os
import numpy as np
import psycopg2 as py

np_data = np.load('list_adm_id.npy')

In [13]:
list_adm_id=np_data.tolist()

In [14]:
import math

def split_list(lst, n):
    chunk_size = math.ceil(len(lst) / n)
    return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]

In [15]:
# Dictionary of vitals with itemid sets
vital_itemids = {
    "SpO2": [646, 220277],
    "HR": [211, 220045],
    "RR": [618, 615, 220210, 224690],
    "SBP": [51, 442, 455, 6701, 220179, 220050],
    "DBP": [8368, 8440, 8441, 8555, 220180, 220051],
    "EtCO2": [1817, 228640],
    "Temp_F": [223761, 678],
    "Temp_C": [223762, 676],
    "TGCS": [198, 226755, 227013],
    "CRR": [3348],
    "FiO2": [2981, 3420, 3422, 223835],
    "Glucose": [807, 811, 1529, 3745, 3744, 225664, 220621, 226537],
    "pH": [780, 860, 1126, 1673, 3839, 4202, 4753, 6003, 220274, 220734, 223830, 228243],
}

# Urine output itemids as a separate set since it's fetched from outputevents
urine_output_itemids = [
    43647, 43053, 43171, 43173, 43333, 43347, 43348, 43355, 43365, 
    43373, 43374, 43379, 43380, 43431, 43519, 43522, 43537, 43576, 
    43583, 43589, 43638, 43654, 43811, 43812, 43856, 44706, 45304, 227519
]

# data = []
# for id in range(len(list_adm_id)):
#     hadm_id = list_adm_id[id][0]
#     print(id, hadm_id)
#     vitals = []

#     # Loop through the vital itemids and execute a single query per vital type
#     for vital_name, itemids in vital_itemids.items():
#         itemid_str = ','.join(map(str, itemids))
#         # cur.execute(f"SELECT charttime, valuenum FROM chartevents WHERE hadm_id = %s AND itemid IN ({itemid_str}) ORDER BY charttime", [hadm_id])
#         vitals.append(f"SELECT charttime, valuenum FROM chartevents WHERE hadm_id = {hadm_id} AND itemid IN ({itemid_str}) ORDER BY charttime")

#     # Fetch urine output from outputevents separately
#     urine_itemid_str = ','.join(map(str, urine_output_itemids))
#     # cur.execute(f"SELECT charttime, VALUE FROM outputevents WHERE hadm_id = %s AND itemid IN ({urine_itemid_str}) ORDER BY charttime", [hadm_id])
#     vitals.append(f"SELECT charttime, VALUE FROM outputevents WHERE hadm_id = {hadm_id} AND itemid IN ({urine_itemid_str}) ORDER BY charttime")

#     # Append the vitals for this admission ID to the main data list
#     data.append(vitals)


# Function to process each chunk and save it as a .npy file
def process_chunk(chunk, chunk_index):
    data = []
    for id in range(len(chunk)):
        hadm_id = chunk[id][0]
        print(f"Processing admission {id} in chunk {chunk_index}, hadm_id: {hadm_id}")
        vitals = []

        # Loop through the vital itemids and execute a single query per vital type
        for vital_name, itemids in vital_itemids.items():
            itemid_str = ','.join(map(str, itemids))
            # Here you would execute the query and fetch results
            # cur.execute(f"SELECT charttime, valuenum FROM chartevents WHERE hadm_id = %s AND itemid IN ({itemid_str}) ORDER BY charttime", [hadm_id])
            # For now, we are simulating the query
            vitals.append(f"SELECT charttime, valuenum FROM chartevents WHERE hadm_id = {hadm_id} AND itemid IN ({itemid_str}) ORDER BY charttime")

        # Fetch urine output from outputevents separately
        urine_itemid_str = ','.join(map(str, urine_output_itemids))
        # cur.execute(f"SELECT charttime, VALUE FROM outputevents WHERE hadm_id = %s AND itemid IN ({urine_itemid_str}) ORDER BY charttime", [hadm_id])
        vitals.append(f"SELECT charttime, VALUE FROM outputevents WHERE hadm_id = {hadm_id} AND itemid IN ({urine_itemid_str}) ORDER BY charttime")

        # Append the vitals for this admission ID to the main data list
        data.append(vitals)

    # Save the chunk's data as a .npy file
    np.save(f"chunk_{chunk_index}.npy", np.array(data))
    print(f"Chunk {chunk_index} saved.")


chunks = split_list(list_adm_id, 10)


In [3]:
def load_and_combine_npy_files(output_dir, combined_output_path):
    """
    Load all .npy files from the output directory and combine them into a single file.
    """
    combined_data = []
    for filename in os.listdir(output_dir):
        if filename.endswith('.npy'):
            file_path = os.path.join(output_dir, filename)
            chunk_data = np.load(file_path, allow_pickle=True)
            combined_data.extend(chunk_data)
            print(f"Loaded {filename}")
    
    # Save the combined data
    np.save(combined_output_path, combined_data)
    print(f"Combined data saved to {combined_output_path}")

load_and_combine_npy_files('./vitals_with_item_id', 'combined_vitals_data.npy')

Loaded chunk_0.npy
Loaded chunk_2.npy
Loaded chunk_13.npy
Loaded chunk_7.npy
Loaded chunk_9.npy
Loaded chunk_4.npy
Loaded chunk_11.npy
Loaded chunk_5.npy
Loaded chunk_8.npy
Loaded chunk_1.npy
Loaded chunk_15.npy
Loaded chunk_10.npy
Loaded chunk_14.npy
Loaded chunk_6.npy
Loaded chunk_3.npy
Loaded chunk_12.npy
Combined data saved to combined_vitals_data.npy


In [4]:
# Load the .npy file
npy_data = np.load('combined_vitals_data.npy', allow_pickle=True)

# Save it as a .pkl file
with open('combined_vitals_data.pkl', 'wb') as pkl_file:
    pickle.dump(npy_data, pkl_file)

print("File converted from .npy to .pkl")


File converted from .npy to .pkl
