In [None]:
import gzip
import pandas as pd
import warnings
import gc

## Create and fill "Mean" table (rows and columns)

In [None]:
# Read the exported for header chartevent CSV file
head_df = pd.read_csv('CSV\Exports\groupby\chartevent\o01_icu_chartevent_grouped_mean.csv')

# Read the exported for rows chartevent CSV file
row_df = pd.read_csv('CSV\Exports\o07_chartevent_rows.csv')

# Read the files `d_items.csv`
compressed_chart_items_df = r"..\Datasets\mimic-iv-2_2\icu\d_items.csv.gz"
compressed_lab_items_df = r"..\Datasets\mimic-iv-2_2\hosp\d_labitems.csv.gz"

# Read and pass the compressed CSV file into a DataFrame
chart_items_df = pd.read_csv(compressed_chart_items_df, compression='gzip')
lab_items_df = pd.read_csv(compressed_lab_items_df, compression='gzip')

In [None]:
"""
Creating the concat column as an index
from the combination of subject_id,
hadm_id and Time_Zone.
"""
row_df["concat"] = (row_df["subject_id"].astype(str)
                          + row_df["hadm_id"].astype(str)
                          + row_df["Time_Zone"].astype(str))
    
# I define the concat column as the index of the rows
row_df = row_df.set_index('concat')

"""
Creating the concat column in the table as
well which includes the measurements to be
the common point between the two tables.
"""
head_df["concat"] = (head_df["subject_id"].astype(str)
                     + head_df["hadm_id"].astype(str)
                     + head_df["Time_Zone"].astype(str))

# I define the concat column as the index of the rows
head_df = head_df.set_index('concat')

In [None]:
# Keeping the row_df unaffected by changes.
new_df = row_df

# Suppress the specific warning.
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)

# Filling the table
for row in head_df.itertuples():
    con = row.Index
    item = row.itemid
    value = row.Mean_Chart
    new_df.at['{}'.format(con), '{}'.format(item)] = value
    
# Removing rows that contains no values
new_df = new_df.dropna(subset=row_df.columns[3:], how='all')

In [None]:
# Combining the elements from charts and labs to update my table header

# Merge values in chart data for header
chart_items_df["header"] = (chart_items_df["label"].astype(str)
                           + " ("
                           + chart_items_df["unitname"].astype(str)
                           + ")"
                           + " - Mean")

# Merge values in lab data for header
lab_items_df["header"] = (lab_items_df["label"].astype(str)
                          + " - Mean")

# Keeping only the two specific columns
chart_items_df = chart_items_df[["itemid", "header"]]
lab_items_df = lab_items_df[["itemid", "header"]]

# Combine them.
combined_df = pd.concat([lab_items_df, chart_items_df], ignore_index=True)

# Remove the "(nan)" substring from the "header" column
combined_df['header'] = combined_df['header'].str.replace('(nan)', '').str.strip()

In [None]:
# Change header numbers with observation names

# Keeping the row_df unaffected by changes.
temp_df = new_df

# Create a dictionary mapping item IDs to headers from combined_df
header_mapping = dict(zip(combined_df['itemid'].astype(str), combined_df['header']))

# Replace the numeric headers in test_df with the corresponding headers
temp_df.columns = temp_df.columns.map(header_mapping)

# Rename the first three columns directly
temp_df.columns.values[0:3] = ["subject_id", "hadm_id", "Time_Zone"]

# Export the merged DataFrame to a CSV file
temp_df.to_csv('CSV\Exports\datasets\Temp\o01_mean_table.csv', index=False)

# Free RAM
gc.collect()

## Create and fill "Median" table (rows and columns)

In [None]:
# Read the exported for header chartevent CSV file
head_df = pd.read_csv('CSV\Exports\groupby\chartevent\o02_icu_chartevent_grouped_median.csv')

# Read the exported for rows chartevent CSV file
row_df = pd.read_csv('CSV\Exports\o07_chartevent_rows.csv')

# Read the files `d_items.csv`
compressed_chart_items_df = r"..\Datasets\mimic-iv-2_2\icu\d_items.csv.gz"
compressed_lab_items_df = r"..\Datasets\mimic-iv-2_2\hosp\d_labitems.csv.gz"

# Read and pass the compressed CSV file into a DataFrame
chart_items_df = pd.read_csv(compressed_chart_items_df, compression='gzip')
lab_items_df = pd.read_csv(compressed_lab_items_df, compression='gzip')

In [None]:
"""
Creating the concat column as an index
from the combination of subject_id,
hadm_id and Time_Zone.
"""
row_df["concat"] = (row_df["subject_id"].astype(str)
                          + row_df["hadm_id"].astype(str)
                          + row_df["Time_Zone"].astype(str))
    
# Define the concat column as the index of the rows
row_df = row_df.set_index('concat')

"""
Creating the concat column in the table as
well which includes the measurements to be
the common point between the two tables.
"""
head_df["concat"] = (head_df["subject_id"].astype(str)
                     + head_df["hadm_id"].astype(str)
                     + head_df["Time_Zone"].astype(str))

# Define the concat column as the index of the rows
head_df = head_df.set_index('concat')

In [None]:
# Keeping the row_df unaffected by changes.
new_df = row_df

# Suppress the specific warning.
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)

# Filling the table
for row in head_df.itertuples():
    con = row.Index
    item = row.itemid
    value = row.Median_Chart
    new_df.at['{}'.format(con), '{}'.format(item)] = value
    
# Removing rows that contains no values
new_df = new_df.dropna(subset=row_df.columns[3:], how='all')

In [None]:
# Combining the elements from charts and labs to update my table header

# Merge values in chart data for header
chart_items_df["header"] = (chart_items_df["label"].astype(str)
                           + " ("
                           + chart_items_df["unitname"].astype(str)
                           + ")"
                           + " - Median")

# Merge values in lab data for header
lab_items_df["header"] = (lab_items_df["label"].astype(str)
                          + " - Median")

# Keeping only the two specific columns
chart_items_df = chart_items_df[["itemid", "header"]]
lab_items_df = lab_items_df[["itemid", "header"]]

# Combine them.
combined_df = pd.concat([lab_items_df, chart_items_df], ignore_index=True)

# Remove the "(nan)" substring from the "header" column
combined_df['header'] = combined_df['header'].str.replace('(nan)', '').str.strip()

In [None]:
# Change header numbers with observation names

# Keeping the row_df unaffected by changes.
temp_df = new_df

# Create a dictionary mapping item IDs to headers from combined_df
header_mapping = dict(zip(combined_df['itemid'].astype(str), combined_df['header']))

# Replace the numeric headers in test_df with the corresponding headers
temp_df.columns = temp_df.columns.map(header_mapping)

# Rename the first three columns directly
temp_df.columns.values[0:3] = ["subject_id", "hadm_id", "Time_Zone"]

# Export the merged DataFrame to a CSV file
temp_df.to_csv('CSV\Exports\datasets\Temp\o02_median_table.csv', index=False)

# Free RAM
gc.collect()

## Create and fill "Min" table (rows and columns)

In [None]:
# Read the exported for header chartevent CSV file
head_df = pd.read_csv('CSV\Exports\groupby\chartevent\o03_icu_chartevent_grouped_min.csv')

# Read the exported for rows chartevent CSV file
row_df = pd.read_csv('CSV\Exports\o07_chartevent_rows.csv')

# Read the files `d_items.csv`
compressed_chart_items_df = r"..\Datasets\mimic-iv-2_2\icu\d_items.csv.gz"
compressed_lab_items_df = r"..\Datasets\mimic-iv-2_2\hosp\d_labitems.csv.gz"

# Read and pass the compressed CSV file into a DataFrame
chart_items_df = pd.read_csv(compressed_chart_items_df, compression='gzip')
lab_items_df = pd.read_csv(compressed_lab_items_df, compression='gzip')

In [None]:
"""
Creating the concat column as an index
from the combination of subject_id,
hadm_id and Time_Zone.
"""
row_df["concat"] = (row_df["subject_id"].astype(str)
                          + row_df["hadm_id"].astype(str)
                          + row_df["Time_Zone"].astype(str))
    
# DSefine the concat column as the index of the rows
row_df = row_df.set_index('concat')

"""
Creating the concat column in the table as
well which includes the measurements to be
the common point between the two tables.
"""
head_df["concat"] = (head_df["subject_id"].astype(str)
                     + head_df["hadm_id"].astype(str)
                     + head_df["Time_Zone"].astype(str))

# Define the concat column as the index of the rows
head_df = head_df.set_index('concat')

In [None]:
# Keeping the row_df unaffected by changes.
new_df = row_df

# Suppress the specific warning.
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)

# Filling the table
for row in head_df.itertuples():
    con = row.Index
    item = row.itemid
    value = row.Min_Chart
    new_df.at['{}'.format(con), '{}'.format(item)] = value
    
# Removing rows that contains no values
new_df = new_df.dropna(subset=row_df.columns[3:], how='all')

In [None]:
# Combining the elements from charts and labs to update my table header

# Merge values in chart data for header
chart_items_df["header"] = (chart_items_df["label"].astype(str)
                           + " ("
                           + chart_items_df["unitname"].astype(str)
                           + ")"
                           + " - Min")

# Merge values in lab data for header
lab_items_df["header"] = (lab_items_df["label"].astype(str)
                          + " - Min")

# Keeping only the two specific columns
chart_items_df = chart_items_df[["itemid", "header"]]
lab_items_df = lab_items_df[["itemid", "header"]]

# Combine them.
combined_df = pd.concat([lab_items_df, chart_items_df], ignore_index=True)

# Remove the "(nan)" substring from the "header" column
combined_df['header'] = combined_df['header'].str.replace('(nan)', '').str.strip()

In [None]:
# Change header numbers with observation names

# Keeping the row_df unaffected by changes.
temp_df = new_df

# Create a dictionary mapping item IDs to headers from combined_df
header_mapping = dict(zip(combined_df['itemid'].astype(str), combined_df['header']))

# Replace the numeric headers in test_df with the corresponding headers
temp_df.columns = temp_df.columns.map(header_mapping)

# Rename the first three columns directly
temp_df.columns.values[0:3] = ["subject_id", "hadm_id", "Time_Zone"]

# Export the merged DataFrame to a CSV file
temp_df.to_csv('CSV\Exports\datasets\Temp\o03_min_table.csv', index=False)

# Free RAM
gc.collect()

## Create and fill "Max" table (rows and columns)

In [None]:
# Read the exported for header chartevent CSV file
head_df = pd.read_csv('CSV\Exports\groupby\chartevent\o04_icu_chartevent_grouped_max.csv')

# Read the exported for rows chartevent CSV file
row_df = pd.read_csv('CSV\Exports\o07_chartevent_rows.csv')

# Read the files `d_items.csv`
compressed_chart_items_df = r"..\Datasets\mimic-iv-2_2\icu\d_items.csv.gz"
compressed_lab_items_df = r"..\Datasets\mimic-iv-2_2\hosp\d_labitems.csv.gz"

# Read and pass the compressed CSV file into a DataFrame
chart_items_df = pd.read_csv(compressed_chart_items_df, compression='gzip')
lab_items_df = pd.read_csv(compressed_lab_items_df, compression='gzip')

In [None]:
"""
Creating the concat column as an index
from the combination of subject_id,
hadm_id and Time_Zone.
"""
row_df["concat"] = (row_df["subject_id"].astype(str)
                          + row_df["hadm_id"].astype(str)
                          + row_df["Time_Zone"].astype(str))
    
# Define the concat column as the index of the rows
row_df = row_df.set_index('concat')

"""
Creating the concat column in the table as
well which includes the measurements to be
the common point between the two tables.
"""
head_df["concat"] = (head_df["subject_id"].astype(str)
                     + head_df["hadm_id"].astype(str)
                     + head_df["Time_Zone"].astype(str))

# Define the concat column as the index of the rows
head_df = head_df.set_index('concat')

In [None]:
# Keeping to keep the row_df unaffected by changes.
new_df = row_df

# Suppress the specific warning.
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)

# Filling in the table
for row in head_df.itertuples():
    con = row.Index
    item = row.itemid
    value = row.Max_Chart
    new_df.at['{}'.format(con), '{}'.format(item)] = value
    
# Removing rows that contains no values
new_df = new_df.dropna(subset=row_df.columns[3:], how='all')

In [None]:
# Combining the elements from charts and labs to update my table header

# Merge values in chart data for header
chart_items_df["header"] = (chart_items_df["label"].astype(str)
                           + " ("
                           + chart_items_df["unitname"].astype(str)
                           + ")"
                           + " - Max")

# Merge values in lab data for header
lab_items_df["header"] = (lab_items_df["label"].astype(str)
                          + " - Max")

# Keeping only the two specific columns
chart_items_df = chart_items_df[["itemid", "header"]]
lab_items_df = lab_items_df[["itemid", "header"]]

# Combine them.
combined_df = pd.concat([lab_items_df, chart_items_df], ignore_index=True)

# Remove the "(nan)" substring from the "header" column
combined_df['header'] = combined_df['header'].str.replace('(nan)', '').str.strip()

In [None]:
# Change header numbers with observation names

# Keeping the row_df unaffected by changes.
temp_df = new_df

# Create a dictionary mapping item IDs to headers from combined_df
header_mapping = dict(zip(combined_df['itemid'].astype(str), combined_df['header']))

# Replace the numeric headers in test_df with the corresponding headers
temp_df.columns = temp_df.columns.map(header_mapping)

# Rename the first three columns directly
temp_df.columns.values[0:3] = ["subject_id", "hadm_id", "Time_Zone"]

# Export the merged DataFrame to a CSV file
temp_df.to_csv('CSV\Exports\datasets\Temp\o04_max_table.csv', index=False)

# Free RAM
gc.collect()