# Dataset for Lab-9 is ICU_Admissions_Raw.csv

Raw dataset file ICU_Admissions_Raw.csv is extracted from the Lock5Data library in RStudio

In [5]:
# Step-1: Importing the raw dataset.
# --------------------------------------------------
import pandas as pd
ICU = pd.read_csv(r'..\Data_Extraction\ICU_Admissions_Raw.csv')
ICU

Unnamed: 0,ID,Status,Age,Sex,Race,Service,Cancer,Renal,Infection,CPR,...,HeartRate,Previous,Type,Fracture,PO2,PH,PCO2,Bicarbonate,Creatinine,Consciousness
0,8,0,27,1,1,0,0,0,1,0,...,88,0,1,0,0,0,0,0,0,1
1,12,0,59,0,1,0,0,0,0,0,...,80,1,1,0,0,0,0,0,0,1
2,14,0,77,0,1,1,0,0,0,0,...,70,0,0,0,0,0,0,0,0,1
3,28,0,54,0,1,0,0,0,1,0,...,103,0,1,1,0,0,0,0,0,1
4,32,0,87,1,1,1,0,0,1,0,...,154,1,1,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,751,1,69,0,1,0,0,1,0,0,...,81,0,1,0,0,0,0,0,0,3
196,752,1,64,0,1,0,1,0,1,0,...,118,0,1,0,1,0,0,0,1,1
197,789,1,60,0,1,0,0,0,1,0,...,114,1,1,0,0,1,0,1,0,1
198,871,1,60,0,3,1,0,1,1,0,...,55,0,1,0,0,0,0,0,0,2


In [8]:
# Step-2: Removing unwanted variables
# --------------------------------------------------
cols_to_remove = ['Race', 'Infection', 'Type', 'Cancer', 'Renal', 'Fracture', 'PO2', 'PCO2', 'PH', 'Bicarbonate', 'Creatinine']

ICU_1 = ICU.drop(columns=cols_to_remove)
ICU_1.head()

Unnamed: 0,ID,Status,Age,Sex,Service,CPR,Systolic,HeartRate,Previous,Consciousness
0,8,0,27,1,0,0,142,88,0,1
1,12,0,59,0,0,0,112,80,1,1
2,14,0,77,0,1,0,100,70,0,1
3,28,0,54,0,0,0,142,103,0,1
4,32,0,87,1,1,0,110,154,1,1


In [10]:
# Step-3: Renaming the column names
# --------------------------------------------------
rename_map = {'ID': 'ID',
              'Status': 'STATUS',
              'Age': 'AGE',
              'Sex': 'SEX',
              'Service': 'SERVICE',
              'CPR': 'CPR',
              'Systolic': 'SYS_BP',
              'HeartRate': 'HEART_RATE',
              'Previous': 'PREV_ICU',
              'Consciousness': 'CONSCIOUS'}

ICU_2 = ICU_1.rename(columns=rename_map)

ICU_2.head()

Unnamed: 0,ID,STATUS,AGE,SEX,SERVICE,CPR,SYS_BP,HEART_RATE,PREV_ICU,CONSCIOUS
0,8,0,27,1,0,0,142,88,0,1
1,12,0,59,0,0,0,112,80,1,1
2,14,0,77,0,1,0,100,70,0,1
3,28,0,54,0,0,0,142,103,0,1
4,32,0,87,1,1,0,110,154,1,1


In [12]:
# Step-4: Removing rows with NA or blank values.
# --------------------------------------------------
ICU_2.dropna(inplace=True)
ICU_2.head()

Unnamed: 0,ID,STATUS,AGE,SEX,SERVICE,CPR,SYS_BP,HEART_RATE,PREV_ICU,CONSCIOUS
0,8,0,27,1,0,0,142,88,0,1
1,12,0,59,0,0,0,112,80,1,1
2,14,0,77,0,1,0,100,70,0,1
3,28,0,54,0,0,0,142,103,0,1
4,32,0,87,1,1,0,110,154,1,1


In [14]:
# Step-5: Recode groups in the 'GENDER' column
# --------------------------------------------------
ICU_2['STATUS'] = ICU_2['STATUS'].map({0: 'Lived', 1: 'Died'})
ICU_2['SEX'] = ICU_2['SEX'].map({0: 'Male', 1: 'Female'})
ICU_2['SERVICE'] = ICU_2['SERVICE'].map({0: 'Medical', 1: 'Surgical'})
ICU_2['CPR'] = ICU_2['CPR'].map({0: 'No', 1: 'Yes'})
ICU_2['PREV_ICU'] = ICU_2['PREV_ICU'].map({0: 'No', 1: 'Yes'})
ICU_2['CONSCIOUS'] = ICU_2['CONSCIOUS'].map({1: 'Conscious', 2: 'Deep Stupor', 3:'Coma'})
ICU_2.head()

Unnamed: 0,ID,STATUS,AGE,SEX,SERVICE,CPR,SYS_BP,HEART_RATE,PREV_ICU,CONSCIOUS
0,8,Lived,27,Female,Medical,No,142,88,No,Conscious
1,12,Lived,59,Male,Medical,No,112,80,Yes,Conscious
2,14,Lived,77,Male,Surgical,No,100,70,No,Conscious
3,28,Lived,54,Male,Medical,No,142,103,No,Conscious
4,32,Lived,87,Female,Surgical,No,110,154,Yes,Conscious


In [16]:
# Step-6: Exporting file as CSV to the current working directory
# --------------------------------------------------
output_file_name = 'ICU_Cleaned.csv'
ICU_2.to_csv(output_file_name, index=False)

print(f"Dataset successfully cleaned and saved to {output_file_name}")

Dataset successfully cleaned and saved to ICU_Cleaned.csv
