# TensorFlow Testing Field

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm #create process bar
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import RandomForestRegressor

In [2]:
# Load the dataset
data = pd.read_csv("mimic_mean_final.csv")

In [3]:
# Filtering rows at the specific time zone
data = data[data['Time_Zone'] == 1]

In [4]:
display(data)

Unnamed: 0,row_count,subject_id,hadm_id,Time_Zone,gender,age,race,Base Excess,Lactate,pCO2,...,Haptoglobin,Bilirubin Direct,Thyroxine (T4) Free,Sedimentation Rate,CK-MB,Amylase,PEEP set (cmH2O),Central Venous Pressure (mmHg),hospital_expire_flag,los
0,1,10004733,27411876,1,M,51,UNKNOWN,0.0,0.8,38.0,...,294.0,,,,,,5.0,3.0,0,8.357373
16,17,10006277,25610553,1,M,88,WHITE,,,,...,,,,,,,,,0,0.869225
32,33,10008100,29402054,1,F,86,WHITE,,,,...,,,,,,,,,0,0.809688
48,49,10017492,27417763,1,M,86,PATIENT DECLINED TO ANSWER,,,,...,,,,,,,,,1,0.798125
64,65,10025463,24470193,1,M,67,WHITE,-4.0,1.2,23.0,...,,,,,,,5.0,,1,0.611944
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55712,55713,19992885,20352341,1,M,50,WHITE,1.0,2.5,25.0,...,,1.0,,,,,5.0,,1,7.774468
55728,55729,19994233,29338696,1,F,87,UNKNOWN,,,,...,,,,,3.0,,,,0,0.756493
55744,55745,19997293,26366652,1,M,76,WHITE,,2.5,,...,,,1.2,,6.0,,,,0,2.473252
55760,55761,19999442,26785317,1,M,43,WHITE,0.0,3.0,32.0,...,,,,,4.0,,5.0,,0,6.950370


In [5]:
# Separate numeric and categorical columns
numeric_data = data.select_dtypes(include=np.number)
categorical_data = data.select_dtypes(exclude=np.number)

# Initialize the IterativeImputer
imputer = IterativeImputer(estimator=RandomForestRegressor(), random_state=0)

# Define the number of iterations for imputation
num_iterations = 10

# Calculate the total number of imputation steps
total_steps = num_iterations * numeric_data.shape[1]

# Create a progress bar for the imputation process
pbar = tqdm(total=total_steps)

# Perform imputation on the entire numeric dataset
for _ in range(num_iterations):
    imputer.fit(numeric_data)
    imputed_numeric_data = imputer.transform(numeric_data)
    pbar.update(numeric_data.shape[1])  # Update progress bar for each column imputed

# Close the progress bar
pbar.close()

# Convert imputed numeric data to a DataFrame
imputed_numeric_df = pd.DataFrame(imputed_numeric_data, columns=numeric_data.columns)

# Concatenate imputed numeric data with categorical data
imputed_data = pd.concat([categorical_data.reset_index(drop=True), imputed_numeric_df.reset_index(drop=True)], axis=1)

100%|██████████| 840/840 [2:30:00<00:00, 10.71s/it]  


In [6]:
display(imputed_numeric_df)

Unnamed: 0,row_count,subject_id,hadm_id,Time_Zone,age,Base Excess,Lactate,pCO2,Calculated Total CO2,BUN,...,Haptoglobin,Bilirubin Direct,Thyroxine (T4) Free,Sedimentation Rate,CK-MB,Amylase,PEEP set (cmH2O),Central Venous Pressure (mmHg),hospital_expire_flag,los
0,1.0,10004733.0,27411876.0,1.0,51.0,0.000000,0.800000,38.000000,26.000000,44.00,...,294.000,1.023,0.9762,70.77,7.880,88.36,5.000000,3.000000,0.0,8.357373
1,17.0,10006277.0,25610553.0,1.0,88.0,0.074167,1.543667,37.478333,24.666667,15.00,...,156.860,1.191,1.0468,42.05,10.490,194.96,5.685000,13.981688,0.0,0.869225
2,33.0,10008100.0,29402054.0,1.0,86.0,0.018333,1.703900,38.076667,24.852500,17.00,...,141.030,0.697,1.0367,49.05,5.095,72.41,5.100000,9.553650,0.0,0.809688
3,49.0,10017492.0,27417763.0,1.0,86.0,0.078333,1.708500,37.310833,24.721667,81.00,...,126.740,1.850,1.1371,50.98,10.040,149.89,5.806667,15.126702,1.0,0.798125
4,65.0,10025463.0,24470193.0,1.0,67.0,-4.000000,1.200000,23.000000,17.000000,21.92,...,152.370,0.778,1.0326,54.81,15.775,110.11,5.000000,15.832393,1.0,0.611944
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3482,55713.0,19992885.0,20352341.0,1.0,50.0,1.000000,2.500000,25.000000,23.000000,16.00,...,105.745,1.000,1.0552,67.92,16.095,112.13,5.000000,15.053774,1.0,7.774468
3483,55729.0,19994233.0,29338696.0,1.0,87.0,0.240000,1.610000,36.634167,25.235000,18.00,...,159.550,0.717,1.0709,49.61,3.000,85.71,5.098333,11.634403,0.0,0.756493
3484,55745.0,19997293.0,26366652.0,1.0,76.0,0.273333,2.500000,37.795833,25.056667,23.00,...,167.340,0.710,1.2000,32.75,6.000,85.70,5.625000,29.873450,0.0,2.473252
3485,55761.0,19999442.0,26785317.0,1.0,43.0,0.000000,3.000000,32.000000,23.000000,12.00,...,165.860,0.262,1.3647,51.91,4.000,64.43,5.000000,12.278863,0.0,6.950370


In [7]:
# Export the merged DataFrame to a CSV file
imputed_data.to_csv('CSV\export\imputed_mimic_time_zone_1_inter_10.csv', index=False)