# ICU Data Sorter for SET A

### Creation

In [2]:
import pandas as pd
import numpy as np
from os.path import exists
#import sklearn as sk
from sklearn.ensemble import RandomForestRegressor
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

general_desc = ["RecordID","Age","Gender","Height","ICUType","Weight"]
ts_desc = ["Albumin","ALP","ALT","AST","Bilirubin","BUN","Cholesterol",
           "Creatinine","DiasABP","FiO2","GCS","Glucose","HCO3","HCT","HR","K",
          "Lactate","Mg","MAP","MechVent","Na","NIDiasABP","NIMAP","NISysABP",
          "PaCO2","PaO2","pH","Platelets","RespRate","SaO2","SysABP","Temp",
           "TroponinI","TroponinT","Urine","WBC"] #weight as time series ignored
ts_full = []
for each in ts_desc:
    for end in [".count",".min",".mean",".median",".max",".first",".last"]:
        new = each + end
        ts_full.append(new)
        
variables = general_desc + ts_full

ICU_df = pd.DataFrame(columns=variables)


In [60]:
ICU_df

In [62]:
n = 132538
for file in range(10135):
    current_patient = []
    n += 1
    if exists(str(n)+".txt"):
        convert = pd.read_csv(str(n)+".txt")
        for desc in general_desc:#non time series
            temp = convert.loc[convert['Parameter'] == desc,'Value']
            if desc != "Weight":
                current_patient.append(float(temp))
            else:
                temp = convert.loc[convert['Parameter'] == desc,['Value']]
                current_patient.append(float(temp['Value'].iat[0]))
        for desc in ts_desc:#time series variables
            temp = convert.loc[convert['Parameter'] == desc,'Value']
            current_patient.append(int(len(temp.index)))#time series counts
            if len(temp.index) != 0:
                current_patient.append(float(temp.min()))#time series min
            else:
                current_patient.append(-1)
            if len(temp.index) != 0:
                current_patient.append(float(temp.mean()))#time series mean
            else:
                current_patient.append(-1)
            if len(temp.index) != 0:
                current_patient.append(float(temp.median()))#time series median
            else:
                current_patient.append(-1)
            if len(temp.index) != 0:
                current_patient.append(float(temp.max()))#time series max
            else:
                current_patient.append(-1)
            temp = convert.loc[convert['Parameter'] == desc,['Value']]
            if int(len(temp.index)) != 0:
                current_patient.append(float(temp['Value'].iat[0]))#time series first
                current_patient.append(float(temp['Value'].iat[-1]))#time series last
            else:
                current_patient.append(-1)
                current_patient.append(-1)
        ICU_df.loc[len(ICU_df.index)] = current_patient
#print(current_patient)
#print(len(current_patient))
#print(len(ts_full)+len(general_desc))

print("Complete")

Complete


#### UPLOAD

In [63]:
ICU_df.to_csv("ICU.csv",index=False)

### Further Processing

#### Height

In [16]:
ICU_df2 = pd.read_csv('ICU.csv')

In [17]:
HeightEstimate = ICU_df2[["Age","Gender","Height"]]
HeightEstimate = HeightEstimate.replace(-1,np.NaN)

In [18]:
imputer = IterativeImputer(estimator=RandomForestRegressor())
imputer.fit(HeightEstimate)



IterativeImputer(estimator=RandomForestRegressor())

In [19]:
#HeightEstimate
display(pd.DataFrame(imputer.transform(HeightEstimate)))

Unnamed: 0,0,1,2
0,54.0,0.0,160.914990
1,76.0,1.0,175.300000
2,44.0,0.0,165.823486
3,68.0,1.0,180.300000
4,88.0,0.0,147.935393
...,...,...,...
3995,70.0,0.0,162.990689
3996,25.0,1.0,181.953480
3997,44.0,1.0,177.707321
3998,37.0,1.0,172.853510


In [20]:
height = pd.DataFrame(imputer.transform(HeightEstimate))
height = height.round(1)

In [21]:
ICU_df2['Height'] = height[2]

In [59]:
#ICU_df

#### Weight

In [22]:
WeightEstimate = ICU_df2[["Age","Gender","Height","Weight"]]
WeightEstimate = WeightEstimate.replace(-1,np.NaN)
imputer = IterativeImputer(estimator=RandomForestRegressor())
imputer.fit(WeightEstimate)



IterativeImputer(estimator=RandomForestRegressor())

In [23]:
display(pd.DataFrame(imputer.transform(WeightEstimate)))

Unnamed: 0,0,1,2,3
0,54.0,0.0,160.9,98.524919
1,76.0,1.0,175.3,76.000000
2,44.0,0.0,165.8,56.700000
3,68.0,1.0,180.3,84.600000
4,88.0,0.0,147.9,65.692546
...,...,...,...,...
3995,70.0,0.0,163.0,87.000000
3996,25.0,1.0,182.0,166.400000
3997,44.0,1.0,177.7,109.000000
3998,37.0,1.0,172.9,87.400000


In [24]:
weight = pd.DataFrame(imputer.transform(WeightEstimate))
weight = weight.round(1)

In [25]:
ICU_df2['Weight'] = weight[3]

#### In hospital death

In [26]:
ICU_deathstats = pd.read_csv('Outcomes-a.txt')

In [34]:
ICU_deathstats

Unnamed: 0,RecordID,SAPS-I,SOFA,Length_of_stay,Survival,In-hospital_death
0,132539,6,1,5,-1,0
1,132540,16,8,8,-1,0
2,132541,21,11,19,-1,0
3,132543,7,1,9,575,0
4,132545,17,2,4,918,0
...,...,...,...,...,...,...
3995,142665,19,7,10,336,0
3996,142667,8,2,3,-1,0
3997,142670,8,5,11,-1,0
3998,142671,22,10,8,7,1


In [30]:
ICU_df2["Death"] = ICU_deathstats["In-hospital_death"]

In [35]:
ICU_df2

Unnamed: 0,RecordID,Age,Gender,Height,ICUType,Weight,Albumin.count,Albumin.min,Albumin.mean,Albumin.median,...,Urine.first,Urine.last,WBC.count,WBC.min,WBC.mean,WBC.median,WBC.max,WBC.first,WBC.last,Death
0,132539.0,54.0,0.0,160.9,4.0,98.5,0.0,-1.0,-1.0,-1.0,...,900.0,280.0,2.0,9.4,10.300000,10.30,11.2,11.2,9.4,0
1,132540.0,76.0,1.0,175.3,2.0,76.0,0.0,-1.0,-1.0,-1.0,...,770.0,220.0,3.0,7.4,11.266667,13.10,13.3,7.4,13.3,0
2,132541.0,44.0,0.0,165.8,3.0,56.7,2.0,2.3,2.5,2.5,...,100.0,35.0,3.0,3.7,4.700000,4.20,6.2,4.2,6.2,0
3,132543.0,68.0,1.0,180.3,3.0,84.6,1.0,4.4,4.4,4.4,...,600.0,550.0,3.0,7.9,9.400000,8.80,11.5,11.5,7.9,0
4,132545.0,88.0,0.0,147.9,3.0,65.7,1.0,3.3,3.3,3.3,...,140.0,80.0,2.0,3.8,4.300000,4.30,4.8,3.8,4.8,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,142665.0,70.0,0.0,163.0,4.0,87.0,0.0,-1.0,-1.0,-1.0,...,150.0,40.0,2.0,11.5,14.500000,14.50,17.5,11.5,17.5,0
3996,142667.0,25.0,1.0,182.0,3.0,166.4,0.0,-1.0,-1.0,-1.0,...,600.0,900.0,3.0,3.0,4.733333,4.70,6.5,6.5,3.0,0
3997,142670.0,44.0,1.0,177.7,3.0,109.0,0.0,-1.0,-1.0,-1.0,...,250.0,20.0,3.0,10.6,11.066667,10.80,11.8,11.8,10.6,0
3998,142671.0,37.0,1.0,172.9,3.0,87.4,1.0,3.1,3.1,3.1,...,12.0,3.0,4.0,11.5,13.025000,12.95,14.7,14.7,11.5,1


#### Upload

In [33]:
ICU_df2.to_csv("ICU2.csv",index=False)