In [2]:
import csv
import pandas as pd
import numpy as np

In [3]:
# Read MIMICs CSV file
mimic_mean_df = pd.read_csv('..\\01_MimicIV\\CSV\\Exports\\datasets\\whole_set\\o01_final_mean_with_los.csv', low_memory=False)
mimic_median_df = pd.read_csv('..\\01_MimicIV\\CSV\\Exports\\datasets\\whole_set\\o02_final_median_with_los.csv', low_memory=False)
mimic_min_df = pd.read_csv('..\\01_MimicIV\\CSV\\Exports\\datasets\\whole_set\\o03_final_min_with_los.csv', low_memory=False)
mimic_max_df = pd.read_csv('..\\01_MimicIV\\CSV\\Exports\\datasets\\whole_set\\o04_final_max_with_los.csv', low_memory=False)

# Read eICUs CSV file
eicu_meam_df = pd.read_csv('..\\02_eICU\\CSV\\Exports\\datasets\\whole_set\\o01_final_mean_table.csv', low_memory=False)
eicu_median_df = pd.read_csv('..\\02_eICU\\CSV\\Exports\\datasets\\whole_set\\o02_final_median_table.csv', low_memory=False)
eicu_min_df = pd.read_csv('..\\02_eICU\\CSV\\Exports\\datasets\\whole_set\\o03_final_min_table.csv', low_memory=False)
eicu_max_df = pd.read_csv('..\\02_eICU\\CSV\\Exports\\datasets\\whole_set\\o04_final_max_table.csv', low_memory=False)

In [4]:
# columns to keep
mimic_columns_to_keep = pd.read_csv('CSV\\imports\\mimic_features.csv')
eicu_columns_to_keep = pd.read_csv("CSV\\imports\\eicu_features.csv")

# MIMIC

In [6]:
# Merge mimics dataframes
merged_mimic_df = mimic_mean_df.merge(mimic_median_df, on=['row_count','subject_id', 'hadm_id', 'Time_Zone', 'gender', 'age', 'language', 'marital_status', 'race', 'hospital_expire_flag', 'los'], suffixes=('_mean', '_median'))
merged_mimic_df = merged_mimic_df.merge(mimic_min_df, on=['row_count','subject_id', 'hadm_id', 'Time_Zone', 'gender', 'age', 'language', 'marital_status', 'race', 'hospital_expire_flag', 'los'], suffixes=('', '_min'))
merged_mimic_df = merged_mimic_df.merge(mimic_max_df, on=['row_count','subject_id', 'hadm_id', 'Time_Zone', 'gender', 'age', 'language', 'marital_status', 'race', 'hospital_expire_flag', 'los'], suffixes=('', '_max'))

# Rename columns to replace suffixes
merged_mimic_df.columns = merged_mimic_df.columns.str.replace(r'\s*-\s*Mean', ' (Mean)', regex=True)
merged_mimic_df.columns = merged_mimic_df.columns.str.replace(r'\s*-\s*Median', ' (Median)', regex=True)
merged_mimic_df.columns = merged_mimic_df.columns.str.replace(r'\s*-\s*Min', ' (Min)', regex=True)
merged_mimic_df.columns = merged_mimic_df.columns.str.replace(r'\s*-\s*Max', ' (Max)', regex=True)

# Move the 'hospital_expire_flag' and 'LOS' columns to the end of the dataframe
hospital_expire_flag_column = merged_mimic_df.pop('hospital_expire_flag')
los_column = merged_mimic_df.pop('los')
merged_mimic_df = pd.concat([merged_mimic_df, hospital_expire_flag_column, los_column], axis=1)

# Rename the last two columns to preserve their original names
merged_mimic_df.columns = list(merged_mimic_df.columns[:-2]) + ['hospital_expire_flag', 'los']

In [None]:
# Summarize GCS components for Mean, Median, Min, and Max while handling NaNs
merged_mimic_df['GCS (Mean)'] = merged_mimic_df.apply(lambda row: row[['GCS - Eye Opening (Mean)', 'GCS - Verbal Response (Mean)', 'GCS - Motor Response (Mean)']].sum() if not all(row[['GCS - Eye Opening (Mean)', 'GCS - Verbal Response (Mean)', 'GCS - Motor Response (Mean)']].isna()) else np.nan, axis=1)
merged_mimic_df['GCS (Median)'] = merged_mimic_df.apply(lambda row: row[['GCS - Eye Opening (Median)', 'GCS - Verbal Response (Median)', 'GCS - Motor Response (Median)']].sum() if not all(row[['GCS - Eye Opening (Median)', 'GCS - Verbal Response (Median)', 'GCS - Motor Response (Median)']].isna()) else np.nan, axis=1)
merged_mimic_df['GCS (Min)'] = merged_mimic_df.apply(lambda row: row[['GCS - Eye Opening (Min)', 'GCS - Verbal Response (Min)', 'GCS - Motor Response (Min)']].sum() if not all(row[['GCS - Eye Opening (Min)', 'GCS - Verbal Response (Min)', 'GCS - Motor Response (Min)']].isna()) else np.nan, axis=1)
merged_mimic_df['GCS (Max)'] = merged_mimic_df.apply(lambda row: row[['GCS - Eye Opening (Max)', 'GCS - Verbal Response (Max)', 'GCS - Motor Response (Max)']].sum() if not all(row[['GCS - Eye Opening (Max)', 'GCS - Verbal Response (Max)', 'GCS - Motor Response (Max)']].isna()) else np.nan, axis=1)

# Drop the original GCS component columns
merged_mimic_df.drop(columns=[
    'GCS - Eye Opening (Mean)', 'GCS - Verbal Response (Mean)', 'GCS - Motor Response (Mean)',
    'GCS - Eye Opening (Median)', 'GCS - Verbal Response (Median)', 'GCS - Motor Response (Median)',
    'GCS - Eye Opening (Min)', 'GCS - Verbal Response (Min)', 'GCS - Motor Response (Min)',
    'GCS - Eye Opening (Max)', 'GCS - Verbal Response (Max)', 'GCS - Motor Response (Max)'
], inplace=True)

In [None]:
# Summarize Braden components for Mean, Median, Min, and Max while handling NaNs
merged_mimic_df['Braden (Mean)'] = merged_mimic_df.apply(lambda row: row[['Braden Sensory Perception (Mean)', 'Braden Moisture (Mean)', 'Braden Activity (Mean)', 'Braden Mobility (Mean)', 'Braden Nutrition (Mean)', 'Braden Friction/Shear (Mean)']].sum() if not all(row[['Braden Sensory Perception (Mean)', 'Braden Moisture (Mean)', 'Braden Activity (Mean)', 'Braden Mobility (Mean)', 'Braden Nutrition (Mean)', 'Braden Friction/Shear (Mean)']].isna()) else np.nan, axis=1)
merged_mimic_df['Braden (Median)'] = merged_mimic_df.apply(lambda row: row[['Braden Sensory Perception (Median)', 'Braden Moisture (Median)', 'Braden Activity (Median)', 'Braden Mobility (Median)', 'Braden Nutrition (Median)', 'Braden Friction/Shear (Median)']].sum() if not all(row[['Braden Sensory Perception (Median)', 'Braden Moisture (Median)', 'Braden Activity (Median)', 'Braden Mobility (Median)', 'Braden Nutrition (Median)', 'Braden Friction/Shear (Median)']].isna()) else np.nan, axis=1)
merged_mimic_df['Braden (Min)'] = merged_mimic_df.apply(lambda row: row[['Braden Sensory Perception (Min)', 'Braden Moisture (Min)', 'Braden Activity (Min)', 'Braden Mobility (Min)', 'Braden Nutrition (Min)', 'Braden Friction/Shear (Min)']].sum() if not all(row[['Braden Sensory Perception (Min)', 'Braden Moisture (Min)', 'Braden Activity (Min)', 'Braden Mobility (Min)', 'Braden Nutrition (Min)', 'Braden Friction/Shear (Min)']].isna()) else np.nan, axis=1)
merged_mimic_df['Braden (Max)'] = merged_mimic_df.apply(lambda row: row[['Braden Sensory Perception (Max)', 'Braden Moisture (Max)', 'Braden Activity (Max)', 'Braden Mobility (Max)', 'Braden Nutrition (Max)', 'Braden Friction/Shear (Max)']].sum() if not all(row[['Braden Sensory Perception (Max)', 'Braden Moisture (Max)', 'Braden Activity (Max)', 'Braden Mobility (Max)', 'Braden Nutrition (Max)', 'Braden Friction/Shear (Max)']].isna()) else np.nan, axis=1)

# Drop the original Braden component columns
merged_mimic_df.drop(columns=[
    'Braden Sensory Perception (Mean)', 'Braden Moisture (Mean)', 'Braden Activity (Mean)', 'Braden Mobility (Mean)', 'Braden Nutrition (Mean)', 'Braden Friction/Shear (Mean)',
    'Braden Sensory Perception (Median)', 'Braden Moisture (Median)', 'Braden Activity (Median)', 'Braden Mobility (Median)', 'Braden Nutrition (Median)', 'Braden Friction/Shear (Median)',
    'Braden Sensory Perception (Min)', 'Braden Moisture (Min)', 'Braden Activity (Min)', 'Braden Mobility (Min)', 'Braden Nutrition (Min)', 'Braden Friction/Shear (Min)',
    'Braden Sensory Perception (Max)', 'Braden Moisture (Max)', 'Braden Activity (Max)', 'Braden Mobility (Max)', 'Braden Nutrition (Max)', 'Braden Friction/Shear (Max)'
], inplace=True)

In [None]:
# Remove spaces and commas
merged_mimic_df.columns = merged_mimic_df.columns.str.replace(r'[ ,]+', '_', regex=True)

In [None]:
# Drop second column from the column_names_df
mimic_columns_to_keep.drop(columns=['Unnamed: 1'], inplace=True)

# Extract column names from columns_to_keep DataFrame
columns_to_keep_names = mimic_columns_to_keep['column'].tolist()

# Select only the desired columns
mimic_temp = merged_mimic_df[columns_to_keep_names]

In [None]:
# Remove Duplicate Columns
df_mimic_unique = mimic_temp.loc[:, ~mimic_temp.columns.duplicated()]

In [None]:
# Multiply values by 4 in 'Ionized Calcium' column, leaving NaN values unchanged for normalization with eicu
df_mimic_unique.loc[:, 'Ionized_Calcium_(Max)'] = mimic_temp['Ionized_Calcium_(Max)'].apply(lambda x: x * 4 if pd.notna(x) else x)
df_mimic_unique.loc[:, 'Ionized_Calcium_(Mean)'] = mimic_temp['Ionized_Calcium_(Mean)'].apply(lambda x: x * 4 if pd.notna(x) else x)
df_mimic_unique.loc[:, 'Ionized_Calcium_(Median)'] = mimic_temp['Ionized_Calcium_(Median)'].apply(lambda x: x * 4 if pd.notna(x) else x)
df_mimic_unique.loc[:, 'Ionized_Calcium_(Min)'] = mimic_temp['Ionized_Calcium_(Min)'].apply(lambda x: x * 4 if pd.notna(x) else x)

In [None]:
# Make a copy df_mimic_unique in order to avoid SettingWithCopyWarning
df_mimic_unique = df_mimic_unique.copy()

# Glucose merge - calculate the mean for each aggregation type and handle NaN values
df_mimic_unique['Glucose (Max)'] = df_mimic_unique[['Glucose_(Max)', 'Glucose_(Max).1', 'Glucose_(Max).2']].mean(axis=1)
df_mimic_unique['Glucose (Mean)'] = df_mimic_unique[['Glucose_(Mean)', 'Glucose_(Mean).1', 'Glucose_(Mean).2']].mean(axis=1)
df_mimic_unique['Glucose (Median)'] = df_mimic_unique[['Glucose_(Median)', 'Glucose_(Median).1', 'Glucose_(Median).2']].mean(axis=1)
df_mimic_unique['Glucose (Min)'] = df_mimic_unique[['Glucose_(Min)', 'Glucose_(Min).1', 'Glucose_(Min).2']].mean(axis=1)

# Drop original Glucose columns to keep only the summarized columns
df_mimic_unique.drop(columns=[
    'Glucose_(Max)', 'Glucose_(Max).1', 'Glucose_(Max).2',
    'Glucose_(Mean)', 'Glucose_(Mean).1', 'Glucose_(Mean).2',
    'Glucose_(Median)', 'Glucose_(Median).1', 'Glucose_(Median).2',
    'Glucose_(Min)', 'Glucose_(Min).1', 'Glucose_(Min).2'
], inplace=True)

# Display the updated DataFrame to verify the results
print(df_mimic_unique.head())


In [None]:
# Make a copy df_mimic_unique in order to avoid SettingWithCopyWarning
df_mimic_unique = df_mimic_unique.copy()
# pH merge
df_mimic_unique.loc[:, 'pH (Max)'] = df_mimic_unique.apply(lambda row: row[['pH_(Max)', 'pH_(Max).1', 'pH_(Max).3']].mean() if not all(row[['pH_(Max)', 'pH_(Max).1', 'pH_(Max).3']].isna()) else np.nan, axis=1)
df_mimic_unique.loc[:, 'pH (Mean)'] = df_mimic_unique.apply(lambda row: row[['pH_(Mean)', 'pH_(Mean).1', 'pH_(Mean).2', 'pH_(Mean).3']].mean() if not all(row[['pH_(Mean)', 'pH_(Mean).1', 'pH_(Mean).2', 'pH_(Mean).3']].isna()) else np.nan, axis=1)
df_mimic_unique.loc[:, 'pH (Median)'] = df_mimic_unique.apply(lambda row: row[['pH_(Median)', 'pH_(Median).1', 'pH_(Median).3']].mean() if not all(row[['pH_(Median)', 'pH_(Median).1', 'pH_(Median).3']].isna()) else np.nan, axis=1)
df_mimic_unique.loc[:, 'pH (Min)'] = df_mimic_unique.apply(lambda row: row[['pH_(Min)', 'pH_(Min).1', 'pH_(Min).3']].mean() if not all(row[['pH_(Min)', 'pH_(Min).1', 'pH_(Min).3']].isna()) else np.nan, axis=1)

# Drop original pH columns to keep only the summarized columns
df_mimic_unique.drop(columns=[
    'pH_(Max)', 'pH_(Max).1', 'pH_(Max).3',
    'pH_(Mean)', 'pH_(Mean).1', 'pH_(Mean).2', 'pH_(Mean).3',
    'pH_(Median)', 'pH_(Median).1', 'pH_(Median).3',
    'pH_(Min)', 'pH_(Min).1', 'pH_(Min).3'
], inplace=True)

# eICU

In [None]:
# Merge eICU dataframes
merged_eicu_df = eicu_meam_df.merge(eicu_median_df, on=['row_count', 'uniquepid', 'patientunitstayid', 'Time_Zone', 'gender', 'age', 'ethnicity', 'unitdischargestatus', 'LOS'], suffixes=('_mean', '_median'))
merged_eicu_df = merged_eicu_df.merge(eicu_min_df, on=['row_count', 'uniquepid', 'patientunitstayid', 'Time_Zone', 'gender', 'age', 'ethnicity', 'unitdischargestatus', 'LOS'], suffixes=('', '_min'))
merged_eicu_df = merged_eicu_df.merge(eicu_max_df, on=['row_count', 'uniquepid', 'patientunitstayid', 'Time_Zone', 'gender', 'age', 'ethnicity', 'unitdischargestatus', 'LOS'], suffixes=('', '_max'))

# Move the 'unitdischargestatus' and 'LOS' columns to the end of the dataframe
unitdischargestatus_column = merged_eicu_df.pop('unitdischargestatus')
los_column = merged_eicu_df.pop('LOS')
merged_eicu_df = pd.concat([merged_eicu_df, unitdischargestatus_column, los_column], axis=1)

# Rename the last two columns to preserve their original names
merged_eicu_df.columns = list(merged_eicu_df.columns[:-2]) + ['unitdischargestatus', 'LOS']

In [None]:
# Drop second column from the column_names_df
eicu_columns_to_keep.drop(columns=['Unnamed: 1'], inplace=True)

# Extract column names from columns_to_keep DataFrame
columns_to_keep_names = eicu_columns_to_keep['column'].tolist()

# Select only the desired columns
eicu_temp = merged_eicu_df[columns_to_keep_names]

In [None]:
"""--------Replace Block----------"""
# Make a copy df_mimic_unique in order to avoid SettingWithCopyWarning
eicu_temp = eicu_temp.copy()

# Replace 'Alive' with 0 and 'Expired' with 1 in the 'unitdischargestatus' column
eicu_temp.loc[:, 'unitdischargestatus'] = eicu_temp['unitdischargestatus'].replace({'Alive': 0, 'Expired': 1})

# Replace 'Female' with 'F' and 'Male' with 'M' in the 'gender' column
eicu_temp.loc[:, 'gender'] = eicu_temp['gender'].replace({'Female': 'F', 'Male': 'M'})


# Replace values in the 'ethnicity' column for standardization
eicu_temp.loc[:, 'ethnicity'] = eicu_temp['ethnicity'].replace({
    'African American': 'BLACK/AFRICAN AMERICAN',
    'Caucasian': 'WHITE',
    'Hispanic': 'HISPANIC OR LATINO',
    'Asian': 'ASIAN',
    'Native American': 'AMERICAN INDIAN/ALASKA NATIVE',
    'Other/Unknown': 'UNKNOWN'
})

# Replace age values higher than 89 with 90, and convert age to integer
eicu_temp.loc[:, 'age'] = eicu_temp['age'].replace('> 89', 90)
eicu_temp.loc[:, 'age'] = eicu_temp['age'].astype(np.int64)

In [None]:
# Remove spaces and commas
df_mimic_unique.columns = df_mimic_unique.columns.str.replace(r'[ ,]+', '_', regex=True)

eicu_temp.columns = eicu_temp.columns.str.replace(r'[ ,]+', '_', regex=True)

In [None]:
# Bedside glucosd

# Make a copy df_mimic_unique in order to avoid SettingWithCopyWarning
eicu_temp = eicu_temp.copy()

eicu_temp.loc[:, 'bedside_glucose (Max)'] = eicu_temp.apply(
    lambda row: row[['bedside_glucose_(Max)', 'Bedside_Glucose_(Max)']].mean() if not all(row[['bedside_glucose_(Max)', 'Bedside_Glucose_(Max)']].isna()) else np.nan,
    axis=1
)
eicu_temp.loc[:, 'bedside_glucose (Mean)'] = eicu_temp.apply(
    lambda row: row[['bedside_glucose_(Mean)', 'Bedside_Glucose_(Mean)']].mean() if not all(row[['bedside_glucose_(Mean)', 'Bedside_Glucose_(Mean)']].isna()) else np.nan,
    axis=1
)
eicu_temp.loc[:, 'bedside_glucose (Median)'] = eicu_temp.apply(
    lambda row: row[['bedside_glucose_(Median)', 'Bedside_Glucose_(Median)']].mean() if not all(row[['bedside_glucose_(Median)', 'Bedside_Glucose_(Median)']].isna()) else np.nan,
    axis=1
)
eicu_temp.loc[:, 'bedside_glucose (Min)'] = eicu_temp.apply(
    lambda row: row[['bedside_glucose_(Min)', 'Bedside_Glucose_(Min)']].mean() if not all(row[['bedside_glucose_(Min)', 'Bedside_Glucose_(Min)']].isna()) else np.nan,
    axis=1
)

# Drop original bedside glucose columns to keep only the summarized ones
eicu_temp.drop(columns=[
    'bedside_glucose_(Max)', 'Bedside_Glucose_(Max)',
    'bedside_glucose_(Mean)', 'Bedside_Glucose_(Mean)',
    'bedside_glucose_(Median)', 'Bedside_Glucose_(Median)',
    'bedside_glucose_(Min)', 'Bedside_Glucose_(Min)'
], inplace=True)

eicu_temp.columns = eicu_temp.columns.str.replace(r'[ ,]+', '_', regex=True)

In [None]:
# Rename eICU header to align with mimics
column_eicu_mapping = {
    'column': 'column',
    'row_count': 'row_count',
    'uniquepid': 'subject_id',
    'patientunitstayid': 'hadm_id',
    'Time_Zone': 'Time_Zone',
    'gender': 'gender',
    'age': 'age',
    'ethnicity': 'race',
    'Base_Excess_(Max)': 'Base_Excess_(Max)',
    'Base_Excess_(Mean)': 'Base_Excess_(Mean)',
    'Base_Excess_(Median)': 'Base_Excess_(Median)',
    'Base_Excess_(Min)': 'Base_Excess_(Min)',
    'lactate_(Max)': 'Lactate_(Max)',
    'lactate_(Mean)': 'Lactate_(Mean)',
    'lactate_(Median)': 'Lactate_(Median)',
    'lactate_(Min)': 'Lactate_(Min)',
    'paCO2_(Max)': 'pCO2_(Max)',
    'paCO2_(Mean)': 'pCO2_(Mean)',
    'paCO2_(Median)': 'pCO2_(Median)',
    'paCO2_(Min)': 'pCO2_(Min)',
    'Total_CO2_(Max)': 'Calculated_Total_CO2_(Max)',
    'Total_CO2_(Mean)': 'Calculated_Total_CO2_(Mean)',
    'Total_CO2_(Median)': 'Calculated_Total_CO2_(Median)',
    'Total_CO2_(Min)': 'Calculated_Total_CO2_(Min)',
    'BUN_(Max)': 'BUN_(Max)',
    'BUN_(Mean)': 'BUN_(Mean)',
    'BUN_(Median)': 'BUN_(Median)',
    'BUN_(Min)': 'BUN_(Min)',
    'pH_(Max)': 'pH_(Max)',
    'pH_(Mean)': 'pH_(Mean)',
    'pH_(Median)': 'pH_(Median)',
    'pH_(Min)': 'pH_(Min)',
    'paO2_(Max)': 'pO2_(Max)',
    'paO2_(Mean)': 'pO2_(Mean)',
    'paO2_(Median)': 'pO2_(Median)',
    'paO2_(Min)': 'pO2_(Min)',
    'ALT_(SGPT)_(Max)': 'Alanine_Aminotransferase_(ALT)_(Max)',
    'ALT_(SGPT)_(Mean)': 'Alanine_Aminotransferase_(ALT)_(Mean)',
    'ALT_(SGPT)_(Median)': 'Alanine_Aminotransferase_(ALT)_(Median)',
    'ALT_(SGPT)_(Min)': 'Alanine_Aminotransferase_(ALT)_(Min)',
    'alkaline_phos._(Max)': 'Alkaline_Phosphatase_(Max)',
    'alkaline_phos._(Mean)': 'Alkaline_Phosphatase_(Mean)',
    'alkaline_phos._(Median)': 'Alkaline_Phosphatase_(Median)',
    'alkaline_phos._(Min)': 'Alkaline_Phosphatase_(Min)',
    'anion_gap_(Max)': 'Anion_Gap_(Max)',
    'anion_gap_(Mean)': 'Anion_Gap_(Mean)',
    'anion_gap_(Median)': 'Anion_Gap_(Median)',
    'anion_gap_(Min)': 'Anion_Gap_(Min)',
    'AST_(SGOT)_(Max)': 'Asparate_Aminotransferase_(AST)_(Max)',
    'AST_(SGOT)_(Mean)': 'Asparate_Aminotransferase_(AST)_(Mean)',
    'AST_(SGOT)_(Median)': 'Asparate_Aminotransferase_(AST)_(Median)',
    'AST_(SGOT)_(Min)': 'Asparate_Aminotransferase_(AST)_(Min)',
    'bicarbonate_(Max)': 'Bicarbonate_(Max)',
    'bicarbonate_(Mean)': 'Bicarbonate_(Mean)',
    'bicarbonate_(Median)': 'Bicarbonate_(Median)',
    'bicarbonate_(Min)': 'Bicarbonate_(Min)',
    'chloride_(Max)': 'Chloride_(Max)',
    'chloride_(Mean)': 'Chloride_(Mean)',
    'chloride_(Median)': 'Chloride_(Median)',
    'chloride_(Min)': 'Chloride_(Min)',
    'creatinine_(Max)': 'Creatinine_(Max)',
    'creatinine_(Mean)': 'Creatinine_(Mean)',
    'creatinine_(Median)': 'Creatinine_(Median)',
    'creatinine_(Min)': 'Creatinine_(Min)',
    'glucose_(Max)': 'Glucose_(Max)',
    'glucose_(Mean)': 'Glucose_(Mean)',
    'glucose_(Median)': 'Glucose_(Median)',
    'glucose_(Min)': 'Glucose_(Min)',
    'magnesium_(Max)': 'Magnesium_(Max)',
    'magnesium_(Mean)': 'Magnesium_(Mean)',
    'magnesium_(Median)': 'Magnesium_(Median)',
    'magnesium_(Min)': 'Magnesium_(Min)',
    'phosphate_(Max)': 'Phosphate_(Max)',
    'phosphate_(Mean)': 'Phosphate_(Mean)',
    'phosphate_(Median)': 'Phosphate_(Median)',
    'phosphate_(Min)': 'Phosphate_(Min)',
    'potassium_(Max)': 'Potassium_(Max)',
    'potassium_(Mean)': 'Potassium_(Mean)',
    'potassium_(Median)': 'Potassium_(Median)',
    'potassium_(Min)': 'Potassium_(Min)',
    'sodium_(Max)': 'Sodium_(Max)',
    'sodium_(Mean)': 'Sodium_(Mean)',
    'sodium_(Median)': 'Sodium_(Median)',
    'sodium_(Min)': 'Sodium_(Min)',
    'Hct_(Max)': 'Hematocrit_(Max)',
    'Hct_(Mean)': 'Hematocrit_(Mean)',
    'Hct_(Median)': 'Hematocrit_(Median)',
    'Hct_(Min)': 'Hematocrit_(Min)',
    'Hgb_(Max)': 'Hemoglobin_(Max)',
    'Hgb_(Mean)': 'Hemoglobin_(Mean)',
    'Hgb_(Median)': 'Hemoglobin_(Median)',
    'Hgb_(Min)': 'Hemoglobin_(Min)',
    'PT_-_INR_(Max)': 'INR(PT)_(Max)',
    'PT_-_INR_(Mean)': 'INR(PT)_(Mean)',
    'PT_-_INR_(Median)': 'INR(PT)_(Median)',
    'PT_-_INR_(Min)': 'INR(PT)_(Min)',
    'MCH_(Max)': 'MCH_(Max)',
    'MCH_(Mean)': 'MCH_(Mean)',
    'MCH_(Median)': 'MCH_(Median)',
    'MCH_(Min)': 'MCH_(Min)',
    'MCHC_(Max)': 'MCHC_(Max)',
    'MCHC_(Mean)': 'MCHC_(Mean)',
    'MCHC_(Median)': 'MCHC_(Median)',
    'MCHC_(Min)': 'MCHC_(Min)',
    'MCV_(Max)': 'MCV_(Max)',
    'MCV_(Mean)': 'MCV_(Mean)',
    'MCV_(Median)': 'MCV_(Median)',
    'MCV_(Min)': 'MCV_(Min)',
    'platelets_x_1000_(Max)': 'Platelet_Count_(Max)',
    'platelets_x_1000_(Mean)': 'Platelet_Count_(Mean)',
    'platelets_x_1000_(Median)': 'Platelet_Count_(Median)',
    'platelets_x_1000_(Min)': 'Platelet_Count_(Min)',
    'PT_(Max)': 'PT_(Max)',
    'PT_(Mean)': 'PT_(Mean)',
    'PT_(Median)': 'PT_(Median)',
    'PT_(Min)': 'PT_(Min)',
    'PTT_(Max)': 'PTT_(Max)',
    'PTT_(Mean)': 'PTT_(Mean)',
    'PTT_(Median)': 'PTT_(Median)',
    'PTT_(Min)': 'PTT_(Min)',
    'RDW_(Max)': 'RDW_(Max)',
    'RDW_(Mean)': 'RDW_(Mean)',
    'RDW_(Median)': 'RDW_(Median)',
    'RDW_(Min)': 'RDW_(Min)',
    'RBC_(Max)': 'Red_Blood_Cells_(Max)',
    'RBC_(Mean)': 'Red_Blood_Cells_(Mean)',
    'RBC_(Median)': 'Red_Blood_Cells_(Median)',
    'RBC_(Min)': 'Red_Blood_Cells_(Min)',
    'WBC_x_1000_(Max)': 'White_Blood_Cells_(Max)',
    'WBC_x_1000_(Mean)': 'White_Blood_Cells_(Mean)',
    'WBC_x_1000_(Median)': 'White_Blood_Cells_(Median)',
    'WBC_x_1000_(Min)': 'White_Blood_Cells_(Min)',
    'Heart_Rate_(Max)': 'Heart_Rate_(bpm)_(Max)',
    'Heart_Rate_(Mean)': 'Heart_Rate_(bpm)_(Mean)',
    'Heart_Rate_(Median)': 'Heart_Rate_(bpm)_(Median)',
    'Heart_Rate_(Min)': 'Heart_Rate_(bpm)_(Min)',
    'Non-Invasive_BP_Diastolic_(Max)': 'Non_Invasive_Blood_Pressure_systolic_(mmHg)_(Max)',
    'Non-Invasive_BP_Diastolic_(Mean)': 'Non_Invasive_Blood_Pressure_systolic_(mmHg)_(Mean)',
    'Non-Invasive_BP_Diastolic_(Median)': 'Non_Invasive_Blood_Pressure_systolic_(mmHg)_(Median)',
    'Non-Invasive_BP_Diastolic_(Min)': 'Non_Invasive_Blood_Pressure_systolic_(mmHg)_(Min)',
    'Non-Invasive_BP_Systolic_(Max)': 'Non_Invasive_Blood_Pressure_diastolic_(mmHg)_(Max)',
    'Non-Invasive_BP_Systolic_(Mean)': 'Non_Invasive_Blood_Pressure_diastolic_(mmHg)_(Mean)',
    'Non-Invasive_BP_Systolic_(Median)': 'Non_Invasive_Blood_Pressure_diastolic_(mmHg)_(Median)',
    'Non-Invasive_BP_Systolic_(Min)': 'Non_Invasive_Blood_Pressure_diastolic_(mmHg)_(Min)',
    'Non-Invasive_BP_Mean_(Max)': 'Non_Invasive_Blood_Pressure_mean_(mmHg)_(Max)',
    'Non-Invasive_BP_Mean_(Mean)': 'Non_Invasive_Blood_Pressure_mean_(mmHg)_(Mean)',
    'Non-Invasive_BP_Mean_(Median)': 'Non_Invasive_Blood_Pressure_mean_(mmHg)_(Median)',
    'Non-Invasive_BP_Mean_(Min)': 'Non_Invasive_Blood_Pressure_mean_(mmHg)_(Min)',
    'Respiratory_Rate_(Max)': 'Respiratory_Rate_(insp/min)_(Max)',
    'Respiratory_Rate_(Mean)': 'Respiratory_Rate_(insp/min)_(Mean)',
    'Respiratory_Rate_(Median)': 'Respiratory_Rate_(insp/min)_(Median)',
    'Respiratory_Rate_(Min)': 'Respiratory_Rate_(insp/min)_(Min)',
    'O2_Saturation_(Max)': 'O2_saturation_pulseoxymetry_(%)_(Max)',
    'O2_Saturation_(Mean)': 'O2_saturation_pulseoxymetry_(%)_(Mean)',
    'O2_Saturation_(Median)': 'O2_saturation_pulseoxymetry_(%)_(Median)',
    'O2_Saturation_(Min)': 'O2_saturation_pulseoxymetry_(%)_(Min)',
    'CI_(Max)': 'Chloride_(serum)_(Max)',
    'CI_(Mean)': 'Chloride_(serum)_(Mean)',
    'CI_(Median)': 'Chloride_(serum)_(Median)',
    'CI_(Min)': 'Chloride_(serum)_(Min)',
    'calcium_(Max)': 'Calcium_non-ionized_(Max)',
    'calcium_(Mean)': 'Calcium_non-ionized_(Mean)',
    'calcium_(Median)': 'Calcium_non-ionized_(Median)',
    'calcium_(Min)': 'Calcium_non-ionized_(Min)',
    'CPK_(Max)': 'CK_(CPK)_(Max)',
    'CPK_(Mean)': 'CK_(CPK)_(Mean)',
    'CPK_(Median)': 'CK_(CPK)_(Median)',
    'CPK_(Min)': 'CK_(CPK)_(Min)',
    'Temperature_(F)_(Max)': 'Temperature_Fahrenheit_(F)_(Max)',
    'Temperature_(F)_(Mean)': 'Temperature_Fahrenheit_(F)_(Mean)',
    'Temperature_(F)_(Median)': 'Temperature_Fahrenheit_(F)_(Median)',
    'Temperature_(F)_(Min)': 'Temperature_Fahrenheit_(F)_(Min)',
    'Pain_Score_(Max)': 'Pain_Level_(Max)',
    'Pain_Score_(Mean)': 'Pain_Level_(Mean)',
    'Pain_Score_(Median)': 'Pain_Level_(Median)',
    'Pain_Score_(Min)': 'Pain_Level_(Min)',
    'LPM_O2_(Max)': 'O2_Flow_(L/min)_(Max)',
    'LPM_O2_(Mean)': 'O2_Flow_(L/min)_(Mean)',
    'LPM_O2_(Median)': 'O2_Flow_(L/min)_(Median)',
    'LPM_O2_(Min)': 'O2_Flow_(L/min)_(Min)',
    'O2_L/%_(Max)': 'Inspired_O2_Fraction_(Max)',
    'O2_L/%_(Mean)': 'Inspired_O2_Fraction_(Mean)',
    'O2_L/%_(Median)': 'Inspired_O2_Fraction_(Median)',
    'O2_L/%_(Min)': 'Inspired_O2_Fraction_(Min)',
    'ionized_calcium_(Max)': 'Ionized_Calcium_(Max)',
    'ionized_calcium_(Mean)': 'Ionized_Calcium_(Mean)',
    'ionized_calcium_(Median)': 'Ionized_Calcium_(Median)',
    'ionized_calcium_(Min)': 'Ionized_Calcium_(Min)',
    'albumin_(Max)': 'Albumin_(Max)',
    'albumin_(Mean)': 'Albumin_(Mean)',
    'albumin_(Median)': 'Albumin_(Median)',
    'albumin_(Min)': 'Albumin_(Min)',
    'GCS_Total_(Max)': 'GCS_(Max)',
    'GCS_Total_(Mean)': 'GCS_(Mean)',
    'GCS_Total_(Median)': 'GCS_(Median)',
    'GCS_Total_(Min)': 'GCS_(Min)',
    'LDH_(Max)': 'LDH_(Max)',
    'LDH_(Mean)': 'LDH_(Mean)',
    'LDH_(Median)': 'LDH_(Median)',
    'LDH_(Min)': 'LDH_(Min)',
    'ethanol_(Max)': 'ETOH_(Max)',
    'ethanol_(Mean)': 'ETOH_(Mean)',
    'ethanol_(Median)': 'ETOH_(Median)',
    'ethanol_(Min)': 'ETOH_(Min)',
    'Invasive_BP_Systolic_(Max)': 'Arterial_Blood_Pressure_systolic_(mmHg)_(Max)',
    'Invasive_BP_Systolic_(Mean)': 'Arterial_Blood_Pressure_systolic_(mmHg)_(Mean)',
    'Invasive_BP_Systolic_(Median)': 'Arterial_Blood_Pressure_systolic_(mmHg)_(Median)',
    'Invasive_BP_Systolic_(Min)': 'Arterial_Blood_Pressure_systolic_(mmHg)_(Min)',
    'Invasive_BP_Mean_(Max)': 'Arterial_Blood_Pressure_mean_(mmHg)_(Max)',
    'Invasive_BP_Mean_(Mean)': 'Arterial_Blood_Pressure_mean_(mmHg)_(Mean)',
    'Invasive_BP_Mean_(Median)': 'Arterial_Blood_Pressure_mean_(mmHg)_(Median)',
    'Invasive_BP_Mean_(Min)': 'Arterial_Blood_Pressure_mean_(mmHg)_(Min)',
    'serum_osmolality_(Max)': 'Serum_Osmolality_(Max)',
    'serum_osmolality_(Mean)': 'Serum_Osmolality_(Mean)',
    'serum_osmolality_(Median)': 'Serum_Osmolality_(Median)',
    'serum_osmolality_(Min)': 'Serum_Osmolality_(Min)',
    'troponin_-_I_(Max)': 'Troponin-T_(Max)',
    'troponin_-_I_(Mean)': 'Troponin-T_(Mean)',
    'troponin_-_I_(Median)': 'Troponin-T_(Median)',
    'troponin_-_I_(Min)': 'Troponin-T_(Min)',
    'uric_acid_(Max)': 'Uric_Acid_(Max)',
    'uric_acid_(Mean)': 'Uric_Acid_(Mean)',
    'uric_acid_(Median)': 'Uric_Acid_(Median)',
    'uric_acid_(Min)': 'Uric_Acid_(Min)',
    'ammonia_(Max)': 'Ammonia_(Max)',
    'ammonia_(Mean)': 'Ammonia_(Mean)',
    'ammonia_(Median)': 'Ammonia_(Median)',
    'ammonia_(Min)': 'Ammonia_(Min)',
    'CRP_(Max)': 'C_Reactive_Protein_(CRP)_(Max)',
    'CRP_(Mean)': 'C_Reactive_Protein_(CRP)_(Mean)',
    'CRP_(Median)': 'C_Reactive_Protein_(CRP)_(Min)',
    'CRP_(Min)': 'C_Reactive_Protein_(CRP)_(Median)',
    'fibrinogen_(Max)': 'Fibrinogen_(Max)',
    'fibrinogen_(Mean)': 'Fibrinogen_(Mean)',
    'fibrinogen_(Median)': 'Fibrinogen_(Median)',
    'fibrinogen_(Min)': 'Fibrinogen_(Min)',
    'PA_Systolic_(Max)': 'Pulmonary_Artery_Pressure_systolic_(mmHg)_(Max)',
    'PA_Systolic_(Mean)': 'Pulmonary_Artery_Pressure_systolic_(mmHg)_(Mean)',
    'PA_Systolic_(Median)': 'Pulmonary_Artery_Pressure_systolic_(mmHg)_(Median)',
    'PA_Systolic_(Min)': 'Pulmonary_Artery_Pressure_systolic_(mmHg)_(Min)',	
    'PA_Diastolic_(Max)': 'Pulmonary_Artery_Pressure_diastolic_(mmHg)_(Max)',
    'PA_Diastolic_(Mean)': 'Pulmonary_Artery_Pressure_diastolic_(mmHg)_(Mean)',
    'PA_Diastolic_(Median)': 'Pulmonary_Artery_Pressure_diastolic_(mmHg)_(Median)',
    'PA_Diastolic_(Min)': 'Pulmonary_Artery_Pressure_diastolic_(mmHg)_(Min)',
    'PA_Mean_(Max)': 'Pulmonary_Artery_Pressure_mean_(mmHg)_(Max)',
    'PA_Mean_(Mean)': 'Pulmonary_Artery_Pressure_mean_(mmHg)_(Mean)',
    'PA_Mean_(Median)': 'Pulmonary_Artery_Pressure_mean_(mmHg)_(Median)',
    'PA_Mean_(Min)': 'Pulmonary_Artery_Pressure_mean_(mmHg)_(Min)',
    'bedside_glucose_(Max)': 'Glucose_finger_stick_(range_70-100)_(Max)',
    'bedside_glucose_(Mean)': 'Glucose_finger_stick_(range_70-100)_(Mean)',
    'bedside_glucose_(Median)': 'Glucose_finger_stick_(range_70-100)_(Median)',
    'bedside_glucose_(Min)': 'Glucose_finger_stick_(range_70-100)_(Min)',
    'reticulocyte_count_(Max)': 'Reticulocyte_Count_Automated_(Mean)',
    'reticulocyte_count_(Mean)': 'Reticulocyte_Count_Automated_(Median)',
    'reticulocyte_count_(Median)': 'Reticulocyte_Count_Automated_(Min)',
    'reticulocyte_count_(Min)': 'Reticulocyte_Count_Automated_(Max)',
    '-basos_(Max)': 'Differential-Basos_(Max)',
    '-basos_(Mean)': 'Differential-Basos_(Mean)',
    '-basos_(Median)': 'Differential-Basos_(Median)',
    '-basos_(Min)': 'Differential-Basos_(Min)',
    '-eos_(Max)': 'Differential-Eos_(Max)',
    '-eos_(Mean)': 'Differential-Eos_(Mean)',
    '-eos_(Median)': 'Differential-Eos_(Median)',
    '-eos_(Min)': 'Differential-Eos_(Min)',
    '-lymphs_(Max)': 'Differential-Lymphs_(Max)',
    '-lymphs_(Mean)': 'Differential-Lymphs_(Mean)',
    '-lymphs_(Median)': 'Differential-Lymphs_(Median)',
    '-lymphs_(Min)': 'Differential-Lymphs_(Min)',
    '-monos_(Max)': 'Differential-Monos_(Max)',
    '-monos_(Mean)': 'Differential-Monos_(Mean)',
    '-monos_(Median)': 'Differential-Monos_(Median)',
    '-monos_(Min)': 'Differential-Monos_(Min)',
    '-polys_(Max)': 'Differential-Neuts_(Max)',
    '-polys_(Mean)': 'Differential-Neuts_(Mean)',
    '-polys_(Median)': 'Differential-Neuts_(Median)',
    '-polys_(Min)': 'Differential-Neuts_(Min)',
    'haptoglobin_(Max)': 'Haptoglobin_(Max)',
    'haptoglobin_(Mean)': 'Haptoglobin_(Mean)',
    'haptoglobin_(Median)': 'Haptoglobin_(Median)',
    'haptoglobin_(Min)': 'Haptoglobin_(Min)',
    'direct_bilirubin_(Max)': 'Bilirubin_Direct_(Max)',
    'direct_bilirubin_(Mean)': 'Bilirubin_Direct_(Mean)',
    'direct_bilirubin_(Median)': 'Bilirubin_Direct_(Median)',
    'direct_bilirubin_(Min)': 'Bilirubin_Direct_(Min)',
    'free_T4_(Max)': 'Thyroxine_(T4)_Free_(Max)',
    'free_T4_(Mean)': 'Thyroxine_(T4)_Free_(Mean)',
    'free_T4_(Median)': 'Thyroxine_(T4)_Free_(Median)',
    'free_T4_(Min)': 'Thyroxine_(T4)_Free_(Min)',
    'ESR_(Max)': 'Sedimentation_Rate_(Max)',
    'ESR_(Mean)': 'Sedimentation_Rate_(Mean)',
    'ESR_(Median)': 'Sedimentation_Rate_(Median)',
    'ESR_(Min)': 'Sedimentation_Rate_(Min)',
    'CPK-MB_INDEX_(Max)': 'CK-MB_(Max)',
    'CPK-MB_INDEX_(Mean)': 'CK-MB_(Mean)',
    'CPK-MB_INDEX_(Median)': 'CK-MB_(Median)',
    'CPK-MB_INDEX_(Min)': 'CK-MB_(Min)',
    'amylase_(Max)': 'Amylase_(Max)',
    'amylase_(Mean)': 'Amylase_(Mean)',
    'amylase_(Median)': 'Amylase_(Median)',
    'amylase_(Min)': 'Amylase_(Min)',
    'PEEP_(Max)': 'PEEP_set_(cmH2O)_(Max)',
    'PEEP_(Mean)': 'PEEP_set_(cmH2O)_(Mean)',
    'PEEP_(Median)': 'PEEP_set_(cmH2O)_(Median)',
    'PEEP_(Min)': 'PEEP_set_(cmH2O)_(Min)',
    'CVP_(Max)': 'Central_Venous_Pressure_(mmHg)_(Max)',
    'CVP_(Mean)': 'Central_Venous_Pressure_(mmHg)_(Mean)',
    'CVP_(Median)': 'Central_Venous_Pressure_(mmHg)_(Median)',
    'CVP_(Min)': 'Central_Venous_Pressure_(mmHg)_(Min)',
    'total_bilirubin_(Max)': 'Total_Bilirubin_(Max)',
    'total_bilirubin_(Mean)': 'Total_Bilirubin_(Mean)',
    'total_bilirubin_(Median)': 'Total_Bilirubin_(Median)',
    'total_bilirubin_(Min)': 'Total_Bilirubin_(Min)',
    'Invasive_BP_Diastolic_(Max)': 'Arterial_Blood_Pressure_diastolic_(mmHg)_(Max)',
    'Invasive_BP_Diastolic_(Mean)': 'Arterial_Blood_Pressure_diastolic_(mmHg)_(Mean)',
    'Invasive_BP_Diastolic_(Median)': 'Arterial_Blood_Pressure_diastolic_(mmHg)_(Median)',
    'Invasive_BP_Diastolic_(Min)': 'Arterial_Blood_Pressure_diastolic_(mmHg)_(Min)',
    'unitdischargestatus': 'hospital_expire_flag',
    'LOS': 'los'
}

# Replace the DataFrame and column names mapping
eicu_temp.rename(columns=column_eicu_mapping, inplace=True)



temperature_rename_mapping = {
    'Temperature_Fahrenheit_(°F)_(Max)': 'Temperature_Fahrenheit_(F)_(Max)',
    'Temperature_Fahrenheit_(°F)_(Mean)': 'Temperature_Fahrenheit_(F)_(Mean)',
    'Temperature_Fahrenheit_(°F)_(Median)': 'Temperature_Fahrenheit_(F)_(Median)',
    'Temperature_Fahrenheit_(°F)_(Min)': 'Temperature_Fahrenheit_(F)_(Min)'
}

# Rename the columns using the dictionary and reassign the DataFrame
df_mimic_unique = df_mimic_unique.rename(columns=temperature_rename_mapping)

# Remove "-" from the 'subject_id' column in eicu
eicu_temp['subject_id'] = eicu_temp['subject_id'].str.replace('-', '')

# Convert 'subject_id' in eicu to int64
eicu_temp['subject_id'] = eicu_temp['subject_id'].astype(np.int64)

In [None]:
# Replace 'Survive' with 0 and 'Death' with 1 in the 'hospital_expire_flag' column
df_mimic_unique['hospital_expire_flag'] = df_mimic_unique['hospital_expire_flag'].replace({'Survive': 0, 'Death': 1})

In [None]:
display(df_mimic_unique)

display(eicu_temp)

In [None]:
# Check if mimic and eicu datasets have the same dtype and header names
# Get the column names from each DataFrame
mimic_columns = set(df_mimic_unique.columns)
eicu_columns = set(eicu_temp.columns)

# Find the columns that are in mimic_df but not in eicu_df
mimic_not_in_eicu = mimic_columns - eicu_columns

# Find the columns that are in eicu_df but not in mimic_df
eicu_not_in_mimic = eicu_columns - mimic_columns

# Display columns that are different
if mimic_not_in_eicu:
    print("Columns in mimic_df but not in eicu_df:")
    print(mimic_not_in_eicu)

if eicu_not_in_mimic:
    print("\nColumns in eicu_df but not in mimic_df:")
    print(eicu_not_in_mimic)

if not mimic_not_in_eicu and not eicu_not_in_mimic:
    print("The column names are identical between mimic_df and eicu_df.")

# Check if the number of columns is the same
if len(mimic_info) != len(eicu_info):
    print("Number of columns is different between mimic_df and eicu_df.")
else:
    # Iterate over the columns and compare the data type.
    for column_name in mimic_info.index:
        mimic_dtype = mimic_info[column_name]
        eicu_dtype = eicu_info[column_name]
        if mimic_dtype != eicu_dtype:
            print(f"Column '{column_name}' has different data types: mimic_df has '{mimic_dtype}' and eicu_df has '{eicu_dtype}'.")

In [None]:
# Convert 'age' in eicu_temp to Int64 using .loc to avoid SettingWithCopyWarning
eicu_temp.loc[:, 'age'] = pd.to_numeric(eicu_temp['age'], errors='coerce').astype('Int64')

# Convert 'hospital_expire_flag' in eicu_temp to Int64 using .loc to avoid SettingWithCopyWarning
eicu_temp.loc[:, 'hospital_expire_flag'] = pd.to_numeric(eicu_temp['hospital_expire_flag'], errors='coerce').astype('Int64')

# Convert 'age' in mimic_df to Int64 if needed
df_mimic_unique.loc[:, 'age'] = df_mimic_unique['age'].astype('Int64')

# Convert 'hospital_expire_flag' in mimic_df to Int64 if needed
df_mimic_unique.loc[:, 'hospital_expire_flag'] = df_mimic_unique['hospital_expire_flag'].astype('Int64')

In [None]:
# Put 'hospital_expire_flag' and 'los' to the end of df_mimic_unique
hospital_expire_flag_mimic = df_mimic_unique.pop('hospital_expire_flag')
los_mimic = df_mimic_unique.pop('los')
df_mimic_unique = pd.concat([df_mimic_unique, hospital_expire_flag_mimic, los_mimic], axis=1)

# Move 'hospital_expire_flag' and 'los' to the end of eicu_temp
hospital_expire_flag_eicu = eicu_temp.pop('hospital_expire_flag')
los_eicu = eicu_temp.pop('los')
eicu_temp = pd.concat([eicu_temp, hospital_expire_flag_eicu, los_eicu], axis=1)

In [None]:
# Export the merged DataFrame to a CSV file
mimic_temp.to_csv('CSV\\exports\\final\\mimic_mean_final.csv', index=False)

In [None]:
# Export the merged DataFrame to a CSV file
eicu_temp.to_csv('CSV\\exports\\final\\eicu_mean_final.csv', index=False)

# Testing Field

In [None]:
# Remove duplicate columns
df_mimic_unique = df_mimic_unique.loc[:, ~df_mimic_unique.columns.duplicated()]

# Remove duplicate columns
eicu_temp = eicu_temp.loc[:, ~eicu_temp.columns.duplicated()]

In [None]:
# Check specific columns and missing values

braden_columns = [col for col in df_mimic_unique.columns if col.startswith('Temp')]
braden_df = df_mimic_unique[braden_columns]

# Count the missing values for each GCS column
missing_values_count = braden_df.isnull().sum()

In [None]:
#Export Columns
column_names = merged_eicu_df.columns

# Create a dataframe from the column names
columns_df = pd.DataFrame(column_names, columns=['Column_Name'])

# Export to CSV
columns_df.to_csv('column_names.csv', index=False)