# CIR-03: Hierarchical Imputation Framework

In [1]:
import pandas as pd
import numpy as np
import os
import io
import logging

from tqdm import tqdm

In [2]:
# Initial logger setup
logger = logging.getLogger()
logger.setLevel(logging.INFO)

# Global variable to hold the active file handler
current_file_handler = None

# Create the stream handler
stream_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)

def switch_log_file(filename):
    global current_file_handler

    # If a file handler already exists, remove and close it
    if current_file_handler:
        logger.removeHandler(current_file_handler)
        current_file_handler.close()

    # Create a new file handler
    current_file_handler = logging.FileHandler(filename)
    current_file_handler.setFormatter(formatter)
    logger.addHandler(current_file_handler)

    logger.info(f"Switched logging to {filename}")

In [6]:
# Build log file
switch_log_file('logs/CIR-2.log')
logger.info("This is being logged to CIR-2.log")

2025-05-03 12:33:33,168 - INFO - Switched logging to logs/CIR-2.log
2025-05-03 12:33:33,169 - INFO - This is being logged to CIR-2.log


In [4]:
# Load datasets
data_path = "../04_ANN/CSV/exports/split_set/without_multiple_rows"
all_files = os.listdir(data_path)

logging.info("+++++++++++++++++CIR-2+++++++++++++++++++++++++")
logging.info("Start Loading Dataframes.")

# Load CSVs into a dictionary of dataframes
dataframes = {}
for file in all_files:
    if file.endswith(".csv"):
        var_name = file.replace(".csv", "").replace("-", "_")
        logging.info(f"Loading... -> {file}")
        dataframes[var_name] = pd.read_csv(os.path.join(data_path, file)).astype('float32')

# Log loaded datasets
for var_name, df in dataframes.items():
    globals()[var_name] = df
    logging.info(f"{var_name} loaded successfully with shape {df.shape}")
logging.info("Load Complete.")
logging.info("++++++++++++++++++++++++++++++++++++++++++")

2025-05-03 12:32:39,653 - INFO - +++++++++++++++++CIR-2+++++++++++++++++++++++++
2025-05-03 12:32:39,654 - INFO - Start Loading Dataframes.
2025-05-03 12:32:39,655 - INFO - Loading... -> o1_X_external.csv
2025-05-03 12:32:46,797 - INFO - Loading... -> o1_X_test.csv
2025-05-03 12:32:47,362 - INFO - Loading... -> o1_X_train.csv
2025-05-03 12:32:51,496 - INFO - Loading... -> o1_X_validate.csv
2025-05-03 12:32:52,017 - INFO - Loading... -> o1_y_external_los.csv
2025-05-03 12:32:52,065 - INFO - Loading... -> o1_y_external_mortality.csv
2025-05-03 12:32:52,096 - INFO - Loading... -> o1_y_test_los.csv
2025-05-03 12:32:52,106 - INFO - Loading... -> o1_y_test_mortality.csv
2025-05-03 12:32:52,111 - INFO - Loading... -> o1_y_train_los.csv
2025-05-03 12:32:52,151 - INFO - Loading... -> o1_y_train_mortality.csv
2025-05-03 12:32:52,169 - INFO - Loading... -> o1_y_validate_los.csv
2025-05-03 12:32:52,180 - INFO - Loading... -> o1_y_validate_mortality.csv
2025-05-03 12:32:52,184 - INFO - Loading... -