# TensorFlow Testing Field

In [1]:
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import RandomForestRegressor

In [2]:
# Load the dataset
data = pd.read_csv("mimic_mean_final.csv")

In [3]:
# Filtering rows where Time_Zone column equals 1
data = data[data['Time_Zone'] == 1]

In [4]:
display(data)

Unnamed: 0,row_count,subject_id,hadm_id,Time_Zone,gender,age,race,Base Excess,Lactate,pCO2,...,Haptoglobin,Bilirubin Direct,Thyroxine (T4) Free,Sedimentation Rate,CK-MB,Amylase,PEEP set (cmH2O),Central Venous Pressure (mmHg),hospital_expire_flag,los
0,1,10004733,27411876,1,M,51,UNKNOWN,0.0,0.8,38.0,...,294.0,,,,,,5.0,3.0,0,8.357373
16,17,10006277,25610553,1,M,88,WHITE,,,,...,,,,,,,,,0,0.869225
32,33,10008100,29402054,1,F,86,WHITE,,,,...,,,,,,,,,0,0.809688
48,49,10017492,27417763,1,M,86,PATIENT DECLINED TO ANSWER,,,,...,,,,,,,,,1,0.798125
64,65,10025463,24470193,1,M,67,WHITE,-4.0,1.2,23.0,...,,,,,,,5.0,,1,0.611944
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55712,55713,19992885,20352341,1,M,50,WHITE,1.0,2.5,25.0,...,,1.0,,,,,5.0,,1,7.774468
55728,55729,19994233,29338696,1,F,87,UNKNOWN,,,,...,,,,,3.0,,,,0,0.756493
55744,55745,19997293,26366652,1,M,76,WHITE,,2.5,,...,,,1.2,,6.0,,,,0,2.473252
55760,55761,19999442,26785317,1,M,43,WHITE,0.0,3.0,32.0,...,,,,,4.0,,5.0,,0,6.950370


In [6]:
# Keep only features with numeric values 
numeric_data = data.select_dtypes(include='number')

# Select only the categorical columns
categorical_data = data.select_dtypes(exclude='number')

#  We specify the imputer.
imputer = IterativeImputer(estimator=RandomForestRegressor(), random_state=0)

# Replace missing values with estimated values based on the iterative imputation process.
imputed_data = imputer.fit_transform(numeric_data)

# We convert the imputed data array back into a pandas DataFrame 
imputed_df = pd.DataFrame(imputed_data, columns=numeric_data.columns)

# Merge categorical_data with imputed_df
imputed_full_df = pd.concat([categorical_data.reset_index(drop=True), imputed_df.reset_index(drop=True)], axis=1)

In [24]:
display(imputed_full_df)

Unnamed: 0,gender,race,row_count,subject_id,hadm_id,Time_Zone,age,Base Excess,Lactate,pCO2,...,Haptoglobin,Bilirubin Direct,Thyroxine (T4) Free,Sedimentation Rate,CK-MB,Amylase,PEEP set (cmH2O),Central Venous Pressure (mmHg),hospital_expire_flag,los
0,M,UNKNOWN,1.0,10004733.0,27411876.0,1.0,51.0,0.000000,0.800000,38.000000,...,294.000,1.123,1.0088,81.81,8.960,92.48,5.000,3.000000,0.0,8.357373
1,M,WHITE,17.0,10006277.0,25610553.0,1.0,88.0,0.383333,1.486500,36.225000,...,144.500,1.216,0.9832,47.34,7.540,84.42,5.380,16.239056,0.0,0.869225
2,F,WHITE,33.0,10008100.0,29402054.0,1.0,86.0,0.003333,1.626500,38.559167,...,129.760,0.593,1.0143,54.41,6.690,67.85,5.130,10.592519,0.0,0.809688
3,M,PATIENT DECLINED TO ANSWER,49.0,10017492.0,27417763.0,1.0,86.0,0.057500,1.734667,37.782500,...,134.880,1.051,1.0846,58.74,8.450,115.40,5.755,13.120487,1.0,0.798125
4,M,WHITE,65.0,10025463.0,24470193.0,1.0,67.0,-4.000000,1.200000,23.000000,...,139.895,0.609,1.1043,54.72,16.390,134.23,5.000,13.894583,1.0,0.611944
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3482,M,WHITE,55713.0,19992885.0,20352341.0,1.0,50.0,1.000000,2.500000,25.000000,...,100.600,1.000,1.0767,65.49,25.085,124.90,5.000,14.832324,1.0,7.774468
3483,F,UNKNOWN,55729.0,19994233.0,29338696.0,1.0,87.0,0.166667,1.631000,37.739167,...,184.790,0.506,1.0675,47.95,3.000,122.31,5.060,10.511208,0.0,0.756493
3484,M,WHITE,55745.0,19997293.0,26366652.0,1.0,76.0,0.253333,2.500000,37.876667,...,153.290,0.647,1.2000,27.37,6.000,87.43,5.350,37.831495,0.0,2.473252
3485,M,WHITE,55761.0,19999442.0,26785317.0,1.0,43.0,0.000000,3.000000,32.000000,...,158.450,0.436,1.1708,52.01,4.000,74.95,5.000,14.179551,0.0,6.950370


In [26]:
# Export the merged DataFrame to a CSV file
imputed_full_df.to_csv('imputed_mimic_86_features.csv', index=False)