# Generate labels: ICU phenotyping (25-label multi-label)

## Purpose
Generate ICU phenotyping labels (multi-label with 25 phenotypes) for each split.

## Inputs
- FEMR database at <BASE>/<split>/extract/
- phenotype-to-code mapping (if used)
- Label output directory <BASE>/<split>/femr_labels/

## Outputs
- Labels at: <BASE>/<split>/femr_labels/mimic_icu_phenotyping/*.csv

## Notes
This task is multi-label (25 binary labels), not a single 25-way softmax classification.


In [None]:
phenotype_to_codes = {
  "Acute and unspecified renal failure": [
    "ICD9CM/584.5",
    "ICD9CM/584.6",
    "ICD9CM/584.7",
    "ICD9CM/584.8",
    "ICD9CM/584.9",
    "ICD9CM/586"
  ],
  "Acute cerebrovascular disease": [
    "ICD9CM/346.60",
    "ICD9CM/346.61",
    "ICD9CM/346.62",
    "ICD9CM/346.63",
    "ICD9CM/430",
    "ICD9CM/431",
    "ICD9CM/432.0",
    "ICD9CM/432.1",
    "ICD9CM/432.9",
    "ICD9CM/433.01",
    "ICD9CM/433.11",
    "ICD9CM/433.21",
    "ICD9CM/433.31",
    "ICD9CM/433.81",
    "ICD9CM/433.91",
    "ICD9CM/434.0",
    "ICD9CM/434.00",
    "ICD9CM/434.01",
    "ICD9CM/434.1",
    "ICD9CM/434.10",
    "ICD9CM/434.11",
    "ICD9CM/434.9",
    "ICD9CM/434.90",
    "ICD9CM/434.91",
    "ICD9CM/436"
  ],
  "Acute myocardial infarction": [
    "ICD9CM/410.0",
    "ICD9CM/410.00",
    "ICD9CM/410.01",
    "ICD9CM/410.02",
    "ICD9CM/410.1",
    "ICD9CM/410.10",
    "ICD9CM/410.11",
    "ICD9CM/410.12",
    "ICD9CM/410.2",
    "ICD9CM/410.20",
    "ICD9CM/410.21",
    "ICD9CM/410.22",
    "ICD9CM/410.3",
    "ICD9CM/410.30",
    "ICD9CM/410.31",
    "ICD9CM/410.32",
    "ICD9CM/410.4",
    "ICD9CM/410.40",
    "ICD9CM/410.41",
    "ICD9CM/410.42",
    "ICD9CM/410.5",
    "ICD9CM/410.50",
    "ICD9CM/410.51",
    "ICD9CM/410.52",
    "ICD9CM/410.6",
    "ICD9CM/410.60",
    "ICD9CM/410.61",
    "ICD9CM/410.62",
    "ICD9CM/410.7",
    "ICD9CM/410.70",
    "ICD9CM/410.71",
    "ICD9CM/410.72",
    "ICD9CM/410.8",
    "ICD9CM/410.80",
    "ICD9CM/410.81",
    "ICD9CM/410.82",
    "ICD9CM/410.9",
    "ICD9CM/410.90",
    "ICD9CM/410.91",
    "ICD9CM/410.92"
  ],
  "Cardiac dysrhythmias": [
    "ICD9CM/427.0",
    "ICD9CM/427.1",
    "ICD9CM/427.2",
    "ICD9CM/427.31",
    "ICD9CM/427.32",
    "ICD9CM/427.60",
    "ICD9CM/427.61",
    "ICD9CM/427.69",
    "ICD9CM/427.81",
    "ICD9CM/427.89",
    "ICD9CM/427.9",
    "ICD9CM/785.0",
    "ICD9CM/785.1"
  ],
  "Chronic kidney disease": [
    "ICD9CM/585",
    "ICD9CM/585.1",
    "ICD9CM/585.2",
    "ICD9CM/585.3",
    "ICD9CM/585.4",
    "ICD9CM/585.5",
    "ICD9CM/585.6",
    "ICD9CM/585.9",
    "ICD9CM/792.5",
    "ICD9CM/V42.0",
    "ICD9CM/V45.1",
    "ICD9CM/V45.11",
    "ICD9CM/V45.12",
    "ICD9CM/V56.0",
    "ICD9CM/V56.1",
    "ICD9CM/V56.2",
    "ICD9CM/V56.31",
    "ICD9CM/V56.32",
    "ICD9CM/V56.8"
  ],
  "Chronic obstructive pulmonary disease and bronchiectasis": [
    "ICD9CM/490",
    "ICD9CM/491.0",
    "ICD9CM/491.1",
    "ICD9CM/491.2",
    "ICD9CM/491.20",
    "ICD9CM/491.21",
    "ICD9CM/491.22",
    "ICD9CM/491.8",
    "ICD9CM/491.9",
    "ICD9CM/492.0",
    "ICD9CM/492.8",
    "ICD9CM/494",
    "ICD9CM/494.0",
    "ICD9CM/494.1",
    "ICD9CM/496"
  ],
  "Complications of surgical procedures or medical care": [
    "ICD9CM/276.61",
    "ICD9CM/277.83",
    "ICD9CM/277.88",
    "ICD9CM/285.3",
    "ICD9CM/287.41",
    "ICD9CM/349.0",
    "ICD9CM/349.1",
    "ICD9CM/349.31",
    "ICD9CM/415.11",
    "ICD9CM/429.4",
    "ICD9CM/458.2",
    "ICD9CM/458.21",
    "ICD9CM/458.29",
    "ICD9CM/512.1",
    "ICD9CM/512.2",
    "ICD9CM/518.7",
    "ICD9CM/519.0",
    "ICD9CM/519.00",
    "ICD9CM/519.01",
    "ICD9CM/519.02",
    "ICD9CM/519.09",
    "ICD9CM/530.86",
    "ICD9CM/530.87",
    "ICD9CM/536.40",
    "ICD9CM/536.41",
    "ICD9CM/536.42",
    "ICD9CM/536.49",
    "ICD9CM/539.01",
    "ICD9CM/539.09",
    "ICD9CM/539.81",
    "ICD9CM/539.89",
    "ICD9CM/564.2",
    "ICD9CM/564.3",
    "ICD9CM/564.4",
    "ICD9CM/569.6",
    "ICD9CM/569.62",
    "ICD9CM/569.71",
    "ICD9CM/569.79",
    "ICD9CM/579.3",
    "ICD9CM/596.81",
    "ICD9CM/780.62",
    "ICD9CM/780.63",
    "ICD9CM/780.66",
    "ICD9CM/909.3",
    "ICD9CM/995.24",
    "ICD9CM/995.4",
    "ICD9CM/995.86",
    "ICD9CM/997.0",
    "ICD9CM/997.00",
    "ICD9CM/997.01",
    "ICD9CM/997.02",
    "ICD9CM/997.09",
    "ICD9CM/997.1",
    "ICD9CM/997.2",
    "ICD9CM/997.3",
    "ICD9CM/997.31",
    "ICD9CM/997.32",
    "ICD9CM/997.39",
    "ICD9CM/997.4",
    "ICD9CM/997.41",
    "ICD9CM/997.49",
    "ICD9CM/997.5",
    "ICD9CM/997.60",
    "ICD9CM/997.61",
    "ICD9CM/997.62",
    "ICD9CM/997.69",
    "ICD9CM/997.71",
    "ICD9CM/997.72",
    "ICD9CM/997.79",
    "ICD9CM/997.9",
    "ICD9CM/997.91",
    "ICD9CM/997.99",
    "ICD9CM/998.0",
    "ICD9CM/998.00",
    "ICD9CM/998.01",
    "ICD9CM/998.02",
    "ICD9CM/998.09",
    "ICD9CM/998.1",
    "ICD9CM/998.11",
    "ICD9CM/998.12",
    "ICD9CM/998.13",
    "ICD9CM/998.2",
    "ICD9CM/998.3",
    "ICD9CM/998.30",
    "ICD9CM/998.31",
    "ICD9CM/998.32",
    "ICD9CM/998.33",
    "ICD9CM/998.4",
    "ICD9CM/998.5",
    "ICD9CM/998.51",
    "ICD9CM/998.59",
    "ICD9CM/998.6",
    "ICD9CM/998.7",
    "ICD9CM/998.8",
    "ICD9CM/998.81",
    "ICD9CM/998.82",
    "ICD9CM/998.83",
    "ICD9CM/998.89",
    "ICD9CM/998.9",
    "ICD9CM/999.0",
    "ICD9CM/999.1",
    "ICD9CM/999.2",
    "ICD9CM/999.3",
    "ICD9CM/999.34",
    "ICD9CM/999.39",
    "ICD9CM/999.4",
    "ICD9CM/999.41",
    "ICD9CM/999.42",
    "ICD9CM/999.49",
    "ICD9CM/999.5",
    "ICD9CM/999.51",
    "ICD9CM/999.52",
    "ICD9CM/999.59",
    "ICD9CM/999.6",
    "ICD9CM/999.60",
    "ICD9CM/999.61",
    "ICD9CM/999.62",
    "ICD9CM/999.63",
    "ICD9CM/999.69",
    "ICD9CM/999.7",
    "ICD9CM/999.70",
    "ICD9CM/999.71",
    "ICD9CM/999.72",
    "ICD9CM/999.73",
    "ICD9CM/999.74",
    "ICD9CM/999.75",
    "ICD9CM/999.76",
    "ICD9CM/999.77",
    "ICD9CM/999.78",
    "ICD9CM/999.79",
    "ICD9CM/999.8",
    "ICD9CM/999.80",
    "ICD9CM/999.81",
    "ICD9CM/999.82",
    "ICD9CM/999.83",
    "ICD9CM/999.84",
    "ICD9CM/999.85",
    "ICD9CM/999.88",
    "ICD9CM/999.89",
    "ICD9CM/999.9",
    "ICD9CM/V15.53",
    "ICD9CM/V15.80",
    "ICD9CM/V15.83",
    "ICD9CM/V90.01",
    "ICD9CM/V90.09"
  ],
  "Conduction disorders": [
    "ICD9CM/426.0",
    "ICD9CM/426.10",
    "ICD9CM/426.11",
    "ICD9CM/426.12",
    "ICD9CM/426.13",
    "ICD9CM/426.2",
    "ICD9CM/426.3",
    "ICD9CM/426.4",
    "ICD9CM/426.50",
    "ICD9CM/426.51",
    "ICD9CM/426.52",
    "ICD9CM/426.53",
    "ICD9CM/426.54",
    "ICD9CM/426.6",
    "ICD9CM/426.7",
    "ICD9CM/426.81",
    "ICD9CM/426.82",
    "ICD9CM/426.89",
    "ICD9CM/426.9",
    "ICD9CM/V45.0",
    "ICD9CM/V45.00",
    "ICD9CM/V45.01",
    "ICD9CM/V45.02",
    "ICD9CM/V45.09",
    "ICD9CM/V53.3",
    "ICD9CM/V53.31",
    "ICD9CM/V53.32",
    "ICD9CM/V53.39"
  ],
  "Congestive heart failure; nonhypertensive": [
    "ICD9CM/398.91",
    "ICD9CM/428.0",
    "ICD9CM/428.1",
    "ICD9CM/428.20",
    "ICD9CM/428.21",
    "ICD9CM/428.22",
    "ICD9CM/428.23",
    "ICD9CM/428.30",
    "ICD9CM/428.31",
    "ICD9CM/428.32",
    "ICD9CM/428.33",
    "ICD9CM/428.40",
    "ICD9CM/428.41",
    "ICD9CM/428.42",
    "ICD9CM/428.43",
    "ICD9CM/428.9"
  ],
  "Coronary atherosclerosis and other heart disease": [
    "ICD9CM/411.0",
    "ICD9CM/411.1",
    "ICD9CM/411.8",
    "ICD9CM/411.81",
    "ICD9CM/411.89",
    "ICD9CM/412",
    "ICD9CM/413.0",
    "ICD9CM/413.1",
    "ICD9CM/413.9",
    "ICD9CM/414.0",
    "ICD9CM/414.00",
    "ICD9CM/414.01",
    "ICD9CM/414.06",
    "ICD9CM/414.2",
    "ICD9CM/414.3",
    "ICD9CM/414.4",
    "ICD9CM/414.8",
    "ICD9CM/414.9",
    "ICD9CM/V45.81",
    "ICD9CM/V45.82"
  ],
  "Diabetes mellitus with complications": [
    "ICD9CM/249.01",
    "ICD9CM/249.10",
    "ICD9CM/249.11",
    "ICD9CM/249.20",
    "ICD9CM/249.21",
    "ICD9CM/249.30",
    "ICD9CM/249.31",
    "ICD9CM/249.40",
    "ICD9CM/249.41",
    "ICD9CM/249.50",
    "ICD9CM/249.51",
    "ICD9CM/249.60",
    "ICD9CM/249.61",
    "ICD9CM/249.70",
    "ICD9CM/249.71",
    "ICD9CM/249.80",
    "ICD9CM/249.81",
    "ICD9CM/249.90",
    "ICD9CM/249.91",
    "ICD9CM/250.02",
    "ICD9CM/250.03",
    "ICD9CM/250.10",
    "ICD9CM/250.11",
    "ICD9CM/250.12",
    "ICD9CM/250.13",
    "ICD9CM/250.20",
    "ICD9CM/250.21",
    "ICD9CM/250.22",
    "ICD9CM/250.23",
    "ICD9CM/250.30",
    "ICD9CM/250.31",
    "ICD9CM/250.32",
    "ICD9CM/250.33",
    "ICD9CM/250.40",
    "ICD9CM/250.41",
    "ICD9CM/250.42",
    "ICD9CM/250.43",
    "ICD9CM/250.50",
    "ICD9CM/250.51",
    "ICD9CM/250.52",
    "ICD9CM/250.53",
    "ICD9CM/250.60",
    "ICD9CM/250.61",
    "ICD9CM/250.62",
    "ICD9CM/250.63",
    "ICD9CM/250.70",
    "ICD9CM/250.71",
    "ICD9CM/250.72",
    "ICD9CM/250.73",
    "ICD9CM/250.80",
    "ICD9CM/250.81",
    "ICD9CM/250.82",
    "ICD9CM/250.83",
    "ICD9CM/250.90",
    "ICD9CM/250.91",
    "ICD9CM/250.92",
    "ICD9CM/250.93"
  ],
  "Diabetes mellitus without complication": [
    "ICD9CM/249.00",
    "ICD9CM/250.00",
    "ICD9CM/250.01",
    "ICD9CM/790.2",
    "ICD9CM/790.21",
    "ICD9CM/790.22",
    "ICD9CM/790.29",
    "ICD9CM/791.5",
    "ICD9CM/791.6",
    "ICD9CM/V45.85",
    "ICD9CM/V53.91",
    "ICD9CM/V65.46"
  ],
  "Disorders of lipid metabolism": [
    "ICD9CM/272.0",
    "ICD9CM/272.1",
    "ICD9CM/272.2",
    "ICD9CM/272.3",
    "ICD9CM/272.4"
  ],
  "Essential hypertension": [
    "ICD9CM/401.1",
    "ICD9CM/401.9"
  ],
  "Fluid and electrolyte disorders": [
    "ICD9CM/276.0",
    "ICD9CM/276.1",
    "ICD9CM/276.2",
    "ICD9CM/276.3",
    "ICD9CM/276.4",
    "ICD9CM/276.5",
    "ICD9CM/276.50",
    "ICD9CM/276.51",
    "ICD9CM/276.52",
    "ICD9CM/276.6",
    "ICD9CM/276.69",
    "ICD9CM/276.7",
    "ICD9CM/276.8",
    "ICD9CM/276.9",
    "ICD9CM/995.1"
  ],
  "Gastrointestinal hemorrhage": [
    "ICD9CM/456.0",
    "ICD9CM/456.20",
    "ICD9CM/530.7",
    "ICD9CM/530.82",
    "ICD9CM/531.00",
    "ICD9CM/531.01",
    "ICD9CM/531.20",
    "ICD9CM/531.21",
    "ICD9CM/531.40",
    "ICD9CM/531.41",
    "ICD9CM/531.60",
    "ICD9CM/531.61",
    "ICD9CM/532.00",
    "ICD9CM/532.01",
    "ICD9CM/532.20",
    "ICD9CM/532.21",
    "ICD9CM/532.40",
    "ICD9CM/532.41",
    "ICD9CM/532.60",
    "ICD9CM/532.61",
    "ICD9CM/533.00",
    "ICD9CM/533.01",
    "ICD9CM/533.20",
    "ICD9CM/533.21",
    "ICD9CM/533.40",
    "ICD9CM/533.41",
    "ICD9CM/533.60",
    "ICD9CM/533.61",
    "ICD9CM/534.00",
    "ICD9CM/534.01",
    "ICD9CM/534.20",
    "ICD9CM/534.21",
    "ICD9CM/534.40",
    "ICD9CM/534.41",
    "ICD9CM/534.60",
    "ICD9CM/534.61",
    "ICD9CM/569.3",
    "ICD9CM/578.0",
    "ICD9CM/578.1",
    "ICD9CM/578.9"
  ],
  "Hypertension with complications and secondary hypertension": [
    "ICD9CM/401.0",
    "ICD9CM/402.00",
    "ICD9CM/402.01",
    "ICD9CM/402.10",
    "ICD9CM/402.11",
    "ICD9CM/402.90",
    "ICD9CM/402.91",
    "ICD9CM/403.0",
    "ICD9CM/403.00",
    "ICD9CM/403.01",
    "ICD9CM/403.1",
    "ICD9CM/403.10",
    "ICD9CM/403.11",
    "ICD9CM/403.9",
    "ICD9CM/403.90",
    "ICD9CM/403.91",
    "ICD9CM/404.0",
    "ICD9CM/404.00",
    "ICD9CM/404.01",
    "ICD9CM/404.02",
    "ICD9CM/404.03",
    "ICD9CM/404.1",
    "ICD9CM/404.10",
    "ICD9CM/404.11",
    "ICD9CM/404.12",
    "ICD9CM/404.13",
    "ICD9CM/404.9",
    "ICD9CM/404.90",
    "ICD9CM/404.91",
    "ICD9CM/404.92",
    "ICD9CM/404.93",
    "ICD9CM/405.01",
    "ICD9CM/405.09",
    "ICD9CM/405.11",
    "ICD9CM/405.19",
    "ICD9CM/405.91",
    "ICD9CM/405.99",
    "ICD9CM/437.2"
  ],
  "Other liver diseases": [
    "ICD9CM/570",
    "ICD9CM/571.5",
    "ICD9CM/571.6",
    "ICD9CM/571.8",
    "ICD9CM/571.9",
    "ICD9CM/572.0",
    "ICD9CM/572.1",
    "ICD9CM/572.2",
    "ICD9CM/572.3",
    "ICD9CM/572.4",
    "ICD9CM/572.8",
    "ICD9CM/573.0",
    "ICD9CM/573.4",
    "ICD9CM/573.5",
    "ICD9CM/573.8",
    "ICD9CM/573.9",
    "ICD9CM/782.4",
    "ICD9CM/789.1",
    "ICD9CM/789.5",
    "ICD9CM/789.59",
    "ICD9CM/790.4",
    "ICD9CM/790.5",
    "ICD9CM/794.8",
    "ICD9CM/V42.7"
  ],
  "Other lower respiratory disease": [
    "ICD9CM/513.1",
    "ICD9CM/514",
    "ICD9CM/515",
    "ICD9CM/516.0",
    "ICD9CM/516.1",
    "ICD9CM/516.2",
    "ICD9CM/516.3",
    "ICD9CM/516.30",
    "ICD9CM/516.31",
    "ICD9CM/516.32",
    "ICD9CM/516.33",
    "ICD9CM/516.34",
    "ICD9CM/516.35",
    "ICD9CM/516.36",
    "ICD9CM/516.37",
    "ICD9CM/516.4",
    "ICD9CM/516.5",
    "ICD9CM/516.61",
    "ICD9CM/516.62",
    "ICD9CM/516.63",
    "ICD9CM/516.64",
    "ICD9CM/516.69",
    "ICD9CM/516.8",
    "ICD9CM/516.9",
    "ICD9CM/517.2",
    "ICD9CM/517.8",
    "ICD9CM/518.3",
    "ICD9CM/518.4",
    "ICD9CM/518.89",
    "ICD9CM/519.4",
    "ICD9CM/519.8",
    "ICD9CM/519.9",
    "ICD9CM/782.5",
    "ICD9CM/786.00",
    "ICD9CM/786.01",
    "ICD9CM/786.02",
    "ICD9CM/786.03",
    "ICD9CM/786.04",
    "ICD9CM/786.05",
    "ICD9CM/786.06",
    "ICD9CM/786.07",
    "ICD9CM/786.09",
    "ICD9CM/786.2",
    "ICD9CM/786.3",
    "ICD9CM/786.30",
    "ICD9CM/786.31",
    "ICD9CM/786.39",
    "ICD9CM/786.4",
    "ICD9CM/786.52",
    "ICD9CM/786.6",
    "ICD9CM/786.7",
    "ICD9CM/786.8",
    "ICD9CM/786.9",
    "ICD9CM/793.1",
    "ICD9CM/793.11",
    "ICD9CM/793.19",
    "ICD9CM/794.2",
    "ICD9CM/V12.6",
    "ICD9CM/V12.60",
    "ICD9CM/V12.61",
    "ICD9CM/V12.69",
    "ICD9CM/V42.6"
  ],
  "Other upper respiratory disease": [
    "ICD9CM/470",
    "ICD9CM/471.0",
    "ICD9CM/471.1",
    "ICD9CM/471.8",
    "ICD9CM/471.9",
    "ICD9CM/472.0",
    "ICD9CM/472.1",
    "ICD9CM/472.2",
    "ICD9CM/476.0",
    "ICD9CM/476.1",
    "ICD9CM/477.0",
    "ICD9CM/477.2",
    "ICD9CM/477.8",
    "ICD9CM/477.9",
    "ICD9CM/478.0",
    "ICD9CM/478.1",
    "ICD9CM/478.11",
    "ICD9CM/478.19",
    "ICD9CM/478.20",
    "ICD9CM/478.21",
    "ICD9CM/478.22",
    "ICD9CM/478.24",
    "ICD9CM/478.25",
    "ICD9CM/478.26",
    "ICD9CM/478.29",
    "ICD9CM/478.30",
    "ICD9CM/478.31",
    "ICD9CM/478.32",
    "ICD9CM/478.33",
    "ICD9CM/478.34",
    "ICD9CM/478.4",
    "ICD9CM/478.5",
    "ICD9CM/478.6",
    "ICD9CM/478.70",
    "ICD9CM/478.71",
    "ICD9CM/478.74",
    "ICD9CM/478.75",
    "ICD9CM/478.79",
    "ICD9CM/478.8",
    "ICD9CM/478.9",
    "ICD9CM/519.1",
    "ICD9CM/519.11",
    "ICD9CM/519.19",
    "ICD9CM/519.2",
    "ICD9CM/519.3",
    "ICD9CM/784.1",
    "ICD9CM/784.40",
    "ICD9CM/784.41",
    "ICD9CM/784.42",
    "ICD9CM/784.43",
    "ICD9CM/784.44",
    "ICD9CM/784.49",
    "ICD9CM/784.7",
    "ICD9CM/784.8",
    "ICD9CM/784.9",
    "ICD9CM/784.99",
    "ICD9CM/786.1",
    "ICD9CM/V41.4",
    "ICD9CM/V44.0",
    "ICD9CM/V55.0"
  ],
  "Pleurisy; pneumothorax; pulmonary collapse": [
    "ICD9CM/510.0",
    "ICD9CM/510.9",
    "ICD9CM/511.0",
    "ICD9CM/511.1",
    "ICD9CM/511.8",
    "ICD9CM/511.89",
    "ICD9CM/511.9",
    "ICD9CM/512.0",
    "ICD9CM/512.8",
    "ICD9CM/512.81",
    "ICD9CM/512.82",
    "ICD9CM/512.83",
    "ICD9CM/512.84",
    "ICD9CM/512.89",
    "ICD9CM/518.0",
    "ICD9CM/518.1",
    "ICD9CM/518.2"
  ],
  "Pneumonia": [
    "ICD9CM/031.0",
    "ICD9CM/039.1",
    "ICD9CM/052.1",
    "ICD9CM/055.1",
    "ICD9CM/073.0",
    "ICD9CM/083.0",
    "ICD9CM/112.4",
    "ICD9CM/114.0",
    "ICD9CM/114.4",
    "ICD9CM/114.5",
    "ICD9CM/115.05",
    "ICD9CM/115.15",
    "ICD9CM/115.95",
    "ICD9CM/130.4",
    "ICD9CM/136.3",
    "ICD9CM/203",
    "ICD9CM/204",
    "ICD9CM/205",
    "ICD9CM/212",
    "ICD9CM/221",
    "ICD9CM/322",
    "ICD9CM/480.0",
    "ICD9CM/480.1",
    "ICD9CM/480.2",
    "ICD9CM/480.3",
    "ICD9CM/480.8",
    "ICD9CM/480.9",
    "ICD9CM/481",
    "ICD9CM/482.0",
    "ICD9CM/482.1",
    "ICD9CM/482.2",
    "ICD9CM/482.3",
    "ICD9CM/482.30",
    "ICD9CM/482.31",
    "ICD9CM/482.32",
    "ICD9CM/482.39",
    "ICD9CM/482.4",
    "ICD9CM/482.40",
    "ICD9CM/482.41",
    "ICD9CM/482.42",
    "ICD9CM/482.49",
    "ICD9CM/482.8",
    "ICD9CM/482.81",
    "ICD9CM/482.82",
    "ICD9CM/482.83",
    "ICD9CM/482.84",
    "ICD9CM/482.89",
    "ICD9CM/482.9",
    "ICD9CM/483",
    "ICD9CM/483.0",
    "ICD9CM/483.1",
    "ICD9CM/483.8",
    "ICD9CM/484.1",
    "ICD9CM/484.3",
    "ICD9CM/484.5",
    "ICD9CM/484.6",
    "ICD9CM/484.7",
    "ICD9CM/484.8",
    "ICD9CM/485",
    "ICD9CM/486",
    "ICD9CM/513.0",
    "ICD9CM/517.1"
  ],
  "Respiratory failure; insufficiency; arrest": [
    "ICD9CM/517.3",
    "ICD9CM/518.5",
    "ICD9CM/518.51",
    "ICD9CM/518.52",
    "ICD9CM/518.53",
    "ICD9CM/518.81",
    "ICD9CM/518.82",
    "ICD9CM/518.83",
    "ICD9CM/518.84",
    "ICD9CM/799.1",
    "ICD9CM/V46.1",
    "ICD9CM/V46.11",
    "ICD9CM/V46.12",
    "ICD9CM/V46.13",
    "ICD9CM/V46.14",
    "ICD9CM/V46.2"
  ],
  "Septicemia (except in labor)": [
    "ICD9CM/003.1",
    "ICD9CM/020.2",
    "ICD9CM/022.3",
    "ICD9CM/036.2",
    "ICD9CM/038.0",
    "ICD9CM/038.1",
    "ICD9CM/038.10",
    "ICD9CM/038.11",
    "ICD9CM/038.12",
    "ICD9CM/038.19",
    "ICD9CM/038.2",
    "ICD9CM/038.3",
    "ICD9CM/038.40",
    "ICD9CM/038.41",
    "ICD9CM/038.42",
    "ICD9CM/038.43",
    "ICD9CM/038.44",
    "ICD9CM/038.49",
    "ICD9CM/038.8",
    "ICD9CM/038.9",
    "ICD9CM/054.5",
    "ICD9CM/449",
    "ICD9CM/771.81",
    "ICD9CM/790.7",
    "ICD9CM/995.91",
    "ICD9CM/995.92"
  ],
  "Shock": [
    "ICD9CM/785.50",
    "ICD9CM/785.51",
    "ICD9CM/785.52",
    "ICD9CM/785.59"
  ]
}

In [None]:
import os
import json
import pandas as pd
import random
from multiprocessing import Pool, Manager
from loguru import logger
from typing import List, Dict

# Core femr and ehrshot imports
import femr.datasets
from femr.labelers import LabeledPatients, Label
from ehrshot.labelers.mimic import Mimic_ICUEventStreamMortalityLabeler, Mimic_ICUEventStreamPhenotypeLabeler

In [None]:
# --- Please modify your configuration here ---

# 1. INPUT: Path to your successfully created FEMR database extract
PATH_TO_FEMR_DATABASE = "/root/autodl-tmp/femr/held_out/extract" 

# 2. OUTPUT: A directory where the generated label files will be saved
PATH_TO_OUTPUT_DIR = "/root/autodl-tmp/femr/held_out/femr_labels/"

# 3. TASK NAME: A subdirectory will be created with this name
TASK_NAME = "mimic_icu_phenotyping"

# 4. PARAMETERS
# Number of CPU cores to use for parallel processing
NUM_PROCESSES = 15
# Set to True if you want to randomly sample only one ICU stay per patient.
# Set to False to generate a label for every ICU stay.
IS_SAMPLE_ONE_LABEL_PER_PATIENT = False

# --- End of configuration ---

In [None]:
# Automatically create the full path for the output file
PATH_TO_TASK_OUTPUT_DIR = os.path.join(PATH_TO_OUTPUT_DIR, TASK_NAME)
PATH_TO_OUTPUT_FILE = os.path.join(PATH_TO_TASK_OUTPUT_DIR, "labeled_patients.csv")

# Create directories if they don't exist
os.makedirs(PATH_TO_TASK_OUTPUT_DIR, exist_ok=True)

# Setup logging
path_to_log_file = os.path.join(PATH_TO_TASK_OUTPUT_DIR, 'info.log')
if os.path.exists(path_to_log_file):
    os.remove(path_to_log_file)
logger.add(path_to_log_file, level="INFO")

logger.info(f"Task: {TASK_NAME}")
logger.info(f"FEMR Database Path: {PATH_TO_FEMR_DATABASE}")
logger.info(f"Output Directory: {PATH_TO_TASK_OUTPUT_DIR}")
logger.info(f"Sample one label per patient: {IS_SAMPLE_ONE_LABEL_PER_PATIENT}")
logger.info(f"Number of threads: {NUM_PROCESSES}")

In [None]:
def save_labeled_patients_to_csv(labeled_patients: LabeledPatients, path_to_csv: str):
    """Converts a LabeledPatients object to a pandas DataFrame and saves it as a CSV."""
    rows = []
    for patient_id, labels in labeled_patients.items():
        for l in labels:
            rows.append((patient_id, l.time, l.value, labeled_patients.labeler_type))
    df = pd.DataFrame(rows, columns=['patient_id', 'prediction_time', 'value', 'label_type'])
    df = df.sort_values(['patient_id', 'prediction_time', 'value'])
    df.to_csv(path_to_csv, index=False)
    logger.success(f"Successfully saved {len(df)} labels to {path_to_csv}")

# This function is needed for the IS_SAMPLE_ONE_LABEL_PER_PATIENT logic
def process_patient_ids_for_sampling(args):
    """Processes a subset of patient IDs to sample one label per patient."""
    pid_subset, labeled_patients_dict, path_to_database = args
    local_results = {}
    database = femr.datasets.PatientDatabase(path_to_database)
    labeled_patients = LabeledPatients.from_dict(labeled_patients_dict)

    for pid in pid_subset:
        random.seed(int(pid))
        labels = labeled_patients.get_labels_from_patient_idx(pid)
        
        # Filter out labels that occur for patients <= 18 yrs of age
        if not database[pid].events:
            continue
        birth_year = database[pid].events[0].start.year
        
        valid_labels = [l for l in labels if (l.time.year - birth_year) >= 18]
        
        if not valid_labels:
            local_results[pid] = []
        elif len(valid_labels) == 1:
            local_results[pid] = valid_labels
        else:
            local_results[pid] = [random.choice(valid_labels)]
            
    return local_results

In [None]:
# Load PatientDatabase and Ontology
logger.info("Start | Loading PatientDatabase and Ontology")
database = femr.datasets.PatientDatabase(PATH_TO_FEMR_DATABASE)
ontology = database.get_ontology()
logger.info("Finish | Loading PatientDatabase and Ontology")

# Initialize the labeler for ICU mortality
labeler = Mimic_ICUEventStreamPhenotypeLabeler(ontology, phenotype_to_codes)

# Apply the labeler to the database
logger.info("Start | Applying labeler to all patients")
labeled_patients = labeler.apply(
    path_to_patient_database=PATH_TO_FEMR_DATABASE,
    num_threads=NUM_PROCESSES,
)
logger.info("Finish | Applying labeler")

In [None]:
# Optional: Randomly sample one label per patient
if IS_SAMPLE_ONE_LABEL_PER_PATIENT:
    logger.info("Start | Sampling one label per patient")
    pids = list(labeled_patients.keys())
    pid_subsets = [pids[i::NUM_PROCESSES] for i in range(NUM_PROCESSES)]
    
    # We pass a dictionary representation of labeled_patients to avoid pickling issues
    labeled_patients_dict = labeled_patients.to_dict()

    with Pool(NUM_PROCESSES) as pool:
        results_list = list(tqdm(
            pool.imap(
                process_patient_ids_for_sampling, 
                [(subset, labeled_patients_dict, PATH_TO_FEMR_DATABASE) for subset in pid_subsets]
            ), 
            total=len(pid_subsets),
            desc="Sampling labels"
        ))
    
    # Combine results from all processes
    combined_results = {k: v for d in results_list for k, v in d.items()}
    labeled_patients = LabeledPatients(combined_results, labeler_type=labeler.get_labeler_type())
    logger.info("Finish | Sampling one label per patient")

# Force labels to be minute-level resolution for FEMR compatibility
logger.info("Start | Adjusting label timestamps to minute-level resolution")
for patient_id, labels in labeled_patients.items():
    new_labels = [Label(time=l.time.replace(second=0, microsecond=0), value=l.value) for l in labels]
    labeled_patients[patient_id] = new_labels
logger.info("Finish | Adjusting label timestamps")

# Save the final labeled patients object
logger.info(f"Saving final labeled patients to CSV format at {PATH_TO_OUTPUT_FILE}")
# save_labeled_patients_to_csv(labeled_patients, PATH_TO_OUTPUT_FILE)
labeled_patients.save(PATH_TO_OUTPUT_FILE)

In [None]:
# Final logging of statistics
logger.info("--- Final Label Statistics ---")
num_patients_total = labeled_patients.get_num_patients(is_include_empty_labels=True)
num_patients_with_labels = labeled_patients.get_num_patients(is_include_empty_labels=False)
num_labels = labeled_patients.get_num_labels()
_, label_values,_ = labeled_patients.as_numpy_arrays()
# num_positive_labels = int(label_values.sum())

logger.info(f"Total # of patients in database: {num_patients_total}")
logger.info(f"Total # of patients with at least one label: {num_patients_with_labels}")
logger.info(f"Total # of labels (ICU stays): {num_labels}")
# # logger.info(f"Total # of positive labels (deaths): {num_positive_labels}")
# if num_labels > 0:
    # logger.info(f"Mortality Rate: {num_positive_labels / num_labels:.2%}")

logger.success("ðŸŽ‰ Done! ðŸŽ‰")

In [None]:
labeled_patients.as_numpy_arrays()

In [None]:
labeled_patients.as_numpy_arrays()

# My Labeller

In [None]:
from datetime import timedelta
from femr.datasets import PatientDatabase

ICU_ADMIT_PREFIX = "MIMIC/ICU_ADMISSION"
ICU_DISCHARGE_PREFIX = "MIMIC/ICU_DISCHARGE"
DEATH_CODES = {"SNOMED/419620001"}

try:
    from tqdm import tqdm
except Exception:
    def tqdm(x, **kw): return x

def summarize_icu_from_femr(path_to_db: str):
    db = PatientDatabase(path_to_db)

    try:
        pids = list(db)  # patient_idï¼ˆMIMIC  subject_idï¼‰
    except TypeError:
        pids = list(range(len(db)))

    n_patients_total = len(pids)
    n_patients_any_icu = 0
    n_icu_episodes = 0
    n_episodes_ge24_no_early_death = 0

    for pid in tqdm(pids, desc="Scanning ICU", mininterval=0.5):
        patient = db[pid]
        evs = sorted(patient.events, key=lambda e: e.start)
        n = len(evs)

        any_icu = any(
            (isinstance(getattr(e, "code", None), str) and e.code.startswith(ICU_ADMIT_PREFIX))
            for e in evs
        )
        if any_icu:
            n_patients_any_icu += 1

        i = 0
        while i < n:
            code = getattr(evs[i], "code", None)
            if isinstance(code, str) and code.startswith(ICU_ADMIT_PREFIX):
                start_t = evs[i].start

                j = i + 1
                end_t = None
                while j < n:
                    cj = getattr(evs[j], "code", None)
                    if isinstance(cj, str) and cj.startswith(ICU_DISCHARGE_PREFIX):
                        end_t = evs[j].start
                        break
                    j += 1

                if end_t is None:
                    i += 1
                    continue  # ï¼š stay

                n_icu_episodes += 1

                if (end_t - start_t).total_seconds() >= 24 * 3600:
                    t_pred = start_t + timedelta(hours=24)
                    early_death = False
                    k = i + 1
                    while k < j:
                        ck = getattr(evs[k], "code", None)
                        tk = evs[k].start
                        if ck in DEATH_CODES and start_t < tk <= t_pred:
                            early_death = True
                            break
                        k += 1
                    if not early_death:
                        n_episodes_ge24_no_early_death += 1

                i = j + 1
            else:
                i += 1

    return {
        "n_patients_total": n_patients_total,
        "n_patients_any_icu": n_patients_any_icu,  # ICU ï¼ˆï¼‰
        "n_icu_episodes": n_icu_episodes,  # â†’  ICU
        "n_episodes_ge24_no_early_death": n_episodes_ge24_no_early_death,
    }


In [None]:
stats = summarize_icu_from_femr(PATH_TO_FEMR_DATABASE)
print(stats)