# Creating the final lab table using data from lab and customlab tables

In [1]:
import gc
import gzip
import pandas as pd

# Set the display option to show all columns
pd.set_option('display.max_columns', None)

# Diagnosis file 
admissions_path = 'CSV\Exports\o02_eicu_unique_admissions.csv'

compressed_file_path = r"..\Datasets\eicu-2_0\customlab.csv.gz"

df = pd.read_csv(compressed_file_path, compression='gzip')

# Read the specific ICD codes from a CSV file
admissions_df = pd.read_csv(admissions_path)

In [2]:
merged_df = admissions_df.merge(df, on='patientunitstayid', how='inner')

# Filtering out rows with negative values in the labresultoffset column
merged_df = merged_df[merged_df['labotheroffset'] >= 0]

# Optional: Resetting the index if you want consecutive integer indices
merged_df.reset_index(drop=True, inplace=True)

# List of columns to be dropped
columns_to_drop = ['diagnosisid', 'activeupondischarge', 'labothervaluetext', 'labothertypeid',
                   'customlabid', 'order_of_appearance', 'unitdischargestatus', 'unitdischargelocation',
                   'unitdischargeoffset', 'unitdischargetime24', 'dischargeweight', 'admissionweight',
                   'unitstaytype', 'unitvisitnumber', 'unitadmitsource', 'unitadmittime24', 'unittype',
                   'hospitaldischargestatus', 'hospitaldischargelocation', 'hospitaldischargeoffset',
                   'hospitaldischargetime24', 'hospitaldischargeyear', 'icd9code', 'diagnosispriority',
                   'patienthealthsystemstayid', 'gender', 'age', 'ethnicity', 'hospitalid', 'wardid',
                   'apacheadmissiondx', 'admissionheight', 'hospitaladmitoffset', 'hospitaladmitsource',
                   'diagnosisstring', 'hospitaladmittime24', 'diagnosisoffset']

# Drop the specified columns
merged_df = merged_df.drop(columns=columns_to_drop)

merged_df['labmeasurenamesystem'] = '-'

In [3]:
# Replace the column names accordingly

column_mapping = {
    'labotheroffset': 'labresultoffset',
    'labothername': 'labname',
    'labotherresult': 'labresult'
}

merged_df.rename(columns=column_mapping, inplace=True)

In [4]:
# combine lab with customlab

lab_path = 'CSV\Exports\o03_eicu_lab.csv'
lab_df = pd.read_csv(lab_path)

df_combined = pd.concat([lab_df, merged_df], ignore_index=True)

In [5]:
# Export the filtered DataFrame to a CSV file
df_combined.to_csv('CSV\Exports\o04_eicu_lab_final.csv', index=False, header=True)

# Free RAM
gc.collect()

0