
# Config file creation for categorized analyses

Because of the number of categories, writing config files individually would be long. This notebook's purpose is to streamline their creation.

In [25]:
import json
import pandas as pd
import numpy as np

In [26]:
infos = {
    "selected_analyses_ids", # varies with category, get from list
    "positive_diagnoses", # varies with disease
    "lab_records_frequency", # doesn't vary > 'h'
    "backward_window_time_unit", # "month"
    "backward_window_value", # parameterizable, 24 for this issue
    "gap_days", # parameterizable, 90 for this issue
    "prediction_window_days", # varies
}

In [27]:
from pathlib import Path


def find_project_root():
    # Start from the current working directory and navigate to the directory containing "src"
    base_dir = Path.cwd()
    while not (base_dir / "src").exists():
        if base_dir.parent == base_dir:  # Stop if we reach the root without finding "src"
            raise FileNotFoundError("Could not find the 'src' directory in the path tree.")
        base_dir = base_dir.parent
    return base_dir / "src"

In [28]:
def make_config(
        output_file,
        selected_analyses_ids,
        positive_diagnoses,
        prediction_window_days,
        lab_records_frequency="h",
        backward_window_time_unit="month",
        backward_window_value=24,
        gap_days=90,
):
    config_dict = {
        "selected_analyses_ids": selected_analyses_ids,
        "positive_diagnoses": positive_diagnoses,
        "lab_records_frequency": lab_records_frequency,
        "backward_window_time_unit": backward_window_time_unit,
        "backward_window_value": backward_window_value,
        "gap_days": gap_days,
        "prediction_window_days": prediction_window_days,
    }

    with open(output_file, "w") as f:
        json.dump(config_dict, f, indent=4)

    return config_dict

In [29]:
src = find_project_root()

categorized_analyses_file = src / "features/mimic/analyses/categorized/categorized_analyses.json"


with open(categorized_analyses_file, "r") as f:
    categorized_analyses = json.load(f)

In [71]:

from os import path


def make_config_files(
        disease_identifier,
        diagnoses,
        prediction_window_string,
        prediction_window,
        backward_window_string,
        backward_window_unit,
        backward_window,
        gap_string,
        gap,
):
    for category, analyses_codes in categorized_analyses.items():
        base_directory_name = (f"{disease_identifier}_"
                          f"B{backward_window_string}_"
                          f"G{gap_string}_"
                          f"P{prediction_window_string}")

        directory = path.join(disease_identifier, base_directory_name, category)
        Path(directory).mkdir(parents=True, exist_ok=True)

        file_name = f"{base_directory_name}_{category}_config.json"
        file_path = path.join(directory, file_name)

        make_config(
            output_file=file_path,
            selected_analyses_ids=analyses_codes,
            positive_diagnoses=diagnoses,
            prediction_window_days=prediction_window,
            lab_records_frequency="h",
            backward_window_time_unit=backward_window_unit,
            backward_window_value=backward_window,
            gap_days=gap,
        )

In [72]:
ckd_diagnoses = ["5856", "40391", "N186", "I120", "5854", "N184", "I132", "5855", "N185", "40311", "40301", "40493",
                 "I1311", "40492", "40403", "40413"]
t2d_diagnoses = ["25000", "E119", "E1122", "25060", "E1165", "25040", "E1140", "25050", "E1151", "25002", "25080",
                 "E11319", "E1142", "E1121", "E11649", "25062", "E11621", "25042", "25082", "E1169", "E1143", "25052",
                 "25070", "E1152", "E118", "25012", "25092", "E11610", "25072", "E11622", "E1110", "25090", "E11628",
                 "E1139", "E1136", "E1100", "25022", "E113599", "E113299", "O2412", "E11311", "25020", "E113593",
                 "E1159", "E11329", "E1149", "E113293", "E11359", "E1129", "E113499", "E113399", "E1141", "O24113",
                 "E113592", "E113591", "E113513", "O24111", "E113393", "E1144", "25032", "E113291", "E113519",
                 "E113292", "E113219", "E11349", "O24112", "E11641", "E113313", "25010", "25030", "E1101", "E11620",
                 "E113493", "E11351", "E11339", "E113213", "E113319", "E113413", "E11321", "E113492", "O2413",
                 "E1137X9", "E113391", "E11331", "E113311", "E11341", "E113491", "E113542", "E113511", "E11618",
                 "E113532", "E113392", "E113512", "E113521", "E113212", "E113419", "E113553", "E113211"]

CKD config files

In [73]:
# ckd_B24m_G3m_P1y
make_config_files(
        disease_identifier = "ckd",
        diagnoses = ckd_diagnoses,

        prediction_window_string = "1y",
        prediction_window = 365,  # days

        backward_window_string = "24m",
        backward_window_unit = "month",
        backward_window = 24,  # days

        gap_string = "3m",
        gap = 90 , # days
)

In [74]:
# ckd_B24m_G3m_P2y
make_config_files(
        disease_identifier = "ckd",
        diagnoses = ckd_diagnoses,

        prediction_window_string = "2y",
        prediction_window = 730,  # days

        backward_window_string = "24m",
        backward_window_unit = "month",
        backward_window = 24,  # days

        gap_string = "3m",
        gap = 90 , # days
)

In [75]:
# ckd_B24m_G3m_P3m
make_config_files(
        disease_identifier = "ckd",
        diagnoses = ckd_diagnoses,

        prediction_window_string = "3m",
        prediction_window = 90,  # days

        backward_window_string = "24m",
        backward_window_unit = "month",
        backward_window = 24,  # days

        gap_string = "3m",
        gap = 90 , # days
)

In [76]:
# ckd_B24m_G3m_P5y
make_config_files(
        disease_identifier = "ckd",
        diagnoses = ckd_diagnoses,

        prediction_window_string = "5y",
        prediction_window = 1825,  # days

        backward_window_string = "24m",
        backward_window_unit = "month",
        backward_window = 24,  # days

        gap_string = "3m",
        gap = 90 , # days
)

T2D config files

In [77]:
# t2d_B24m_G3m_P1y
make_config_files(
        disease_identifier = "t2d",
        diagnoses = t2d_diagnoses,

        prediction_window_string = "1y",
        prediction_window = 365,  # days

        backward_window_string = "24m",
        backward_window_unit = "month",
        backward_window = 24,  # days

        gap_string = "3m",
        gap = 90 , # days
)

In [78]:
# t2d_B24m_G3m_P2y
make_config_files(
        disease_identifier = "t2d",
        diagnoses = t2d_diagnoses,

        prediction_window_string = "2y",
        prediction_window = 730,  # days

        backward_window_string = "24m",
        backward_window_unit = "month",
        backward_window = 24,  # days

        gap_string = "3m",
        gap = 90 , # days
)

In [79]:
# t2d_B24m_G3m_P3m
make_config_files(
        disease_identifier = "t2d",
        diagnoses = t2d_diagnoses,

        prediction_window_string = "3m",
        prediction_window = 90,  # days

        backward_window_string = "24m",
        backward_window_unit = "month",
        backward_window = 24,  # days

        gap_string = "3m",
        gap = 90 , # days
)

In [80]:
# t2d_B24m_G3m_P5y
make_config_files(
        disease_identifier = "t2d",
        diagnoses = t2d_diagnoses,

        prediction_window_string = "5y",
        prediction_window = 1825,  # days

        backward_window_string = "24m",
        backward_window_unit = "month",
        backward_window = 24,  # days

        gap_string = "3m",
        gap = 90 , # days
)