# TensorFlow Testing Field

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import RandomForestRegressor

In [2]:
# Load the dataset
data = pd.read_csv("mimic_mean_final.csv")

In [3]:
data = data[['Time_Zone', 'gender', 'age', 'race', 'Lactate', 'los']]

In [8]:
display(data)

Unnamed: 0,Time_Zone,gender,age,race,Lactate,los
0,1,M,51,UNKNOWN,0.8,8.357373
16,1,M,88,WHITE,,0.869225
32,1,F,86,WHITE,,0.809688
48,1,M,86,PATIENT DECLINED TO ANSWER,,0.798125
64,1,M,67,WHITE,1.2,0.611944
...,...,...,...,...,...,...
55712,1,M,50,WHITE,2.5,7.774468
55728,1,F,87,UNKNOWN,,0.756493
55744,1,M,76,WHITE,2.5,2.473252
55760,1,M,43,WHITE,3.0,6.950370


In [5]:
# Filtering rows where Time_Zone column equals 1
data = data[data['Time_Zone'] == 1]

In [6]:
"""
Keep only features with numeric values
because I want to impute only the
"""
numeric_data = data.select_dtypes(include='number')

# Select only the categorical columns
categorical_data = data.select_dtypes(exclude='number')

# We specify the imputer.
imputer = IterativeImputer(estimator=RandomForestRegressor(), random_state=0)

# Replace missing values with estimated values based on the iterative imputation process.
# Wrap the iteration in tqdm to show progress bar
imputed_data = []
for row in tqdm(numeric_data.values, desc="Imputing Values", total=len(numeric_data)):
    imputed_row = imputer.fit_transform(row.reshape(1, -1))
    imputed_data.append(imputed_row.ravel())

# Convert the imputed data array back into a pandas DataFrame
imputed_df = pd.DataFrame(imputed_data, columns=numeric_data.columns)

# Merge categorical_data with imputed_df
imputed_full_df = pd.concat([categorical_data.reset_index(drop=True), imputed_df.reset_index(drop=True)], axis=1)

Imputing Values: 100%|██████████| 3487/3487 [12:52<00:00,  4.51it/s]


In [None]:
# Export the merged DataFrame to a CSV file
imputed_full_df.to_csv('imputed_mimic_86_features.csv', index=False)

In [None]:
"""
Impute without process bar
"""


# Keep only features with numeric values 
numeric_data = data.select_dtypes(include='number')

# Select only the categorical columns
categorical_data = data.select_dtypes(exclude='number')

#  We specify the imputer.
imputer = IterativeImputer(estimator=RandomForestRegressor(), random_state=0)

# Replace missing values with estimated values based on the iterative imputation process.
imputed_data = imputer.fit_transform(numeric_data)

# We convert the imputed data array back into a pandas DataFrame 
imputed_df = pd.DataFrame(imputed_data, columns=numeric_data.columns)

# Merge categorical_data with imputed_df
imputed_full_df = pd.concat([categorical_data.reset_index(drop=True), imputed_df.reset_index(drop=True)], axis=1)