# Ceiling Data Processing Pipeline


In [None]:
import pandas as pd
import os
import sys
from config import Config as paths

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from data_cleaning.cleaners.episode.clean_data_ceiling import CeilingCleaner
from data_cleaning.renaming import rename_columns, generate_and_save_rename_columns_json

## Load Data


In [None]:
data_multi_outpatient = pd.read_parquet(paths.MELIOR_OV_MULTI)
data_multi_inpatient = pd.read_parquet(paths.MELIOR_SV_MULTI)

## Generate Rename Columns File


In [None]:
generate_and_save_rename_columns_json(
    data_multi_inpatient,
    f"{paths.RENAME_FILES_PATH_CEILING}/ceiling_inpatient_rename_columns.json",
)

generate_and_save_rename_columns_json(
    data_multi_outpatient,
    f"{paths.RENAME_FILES_PATH_CEILING}/ceiling_outpatient_rename_columns.json",
)


## Rename Columns


In [None]:
data_multi_inpatient_renamed = rename_columns(
    data_multi_inpatient,
    f"{paths.RENAME_FILES_PATH_CEILING}/ceiling_inpatient_rename_columns.json",
)

data_multi_outpatient_renamed = rename_columns(
    data_multi_outpatient,
    f"{paths.RENAME_FILES_PATH_CEILING}/ceiling_outpatient_rename_columns.json",
)

## Concat Inpatient and Outpatient Data

In [None]:
ceiling_data = pd.concat(
    [data_multi_inpatient_renamed, data_multi_outpatient_renamed], ignore_index=True
).reset_index(drop=True)

## Add Episode IDs to Ceiling Data


In [None]:
reference_data = pd.read_parquet(paths.REFERENCE_DATA_PATH)

In [None]:
ceiling_data_with_episodes = ceiling_data.merge(
    reference_data[["patient_id","hosp_id", "sample_date", "episode_id"]].drop_duplicates(),
    on="hosp_id",
    how="right",
)


In [None]:
def set_ceiling_level(df, ceiling_col='ceiling_level'):
    df = df.copy()

    mask_intensive = df[ceiling_col].str.lower().str.contains("ntensivvård", na=False)
    df.loc[mask_intensive, "ceiling_decision"] = 2

    mask_hlr = df[ceiling_col].str.lower().str.contains("hlr", na=False)
    df.loc[mask_hlr, "ceiling_decision"] = 1

    mask_palliative = df[ceiling_col].str.lower().str.contains("palliativ", na=False)
    df.loc[mask_palliative, "ceiling_decision"] = 3


    return df




In [None]:
ceiling_data_with_decision = set_ceiling_level(ceiling_data_with_episodes).copy()

ceiling_data_with_decision = ceiling_data_with_decision[ceiling_data_with_decision['ceiling_date'].notna()]
ceiling_data_with_decision['ceiling_date'] = pd.to_datetime(ceiling_data_with_decision['ceiling_date']).dt.tz_localize(None)
ceiling_data_with_decision = ceiling_data_with_decision[ceiling_data_with_decision['ceiling_date'] < ceiling_data_with_decision['sample_date']]

# sätt text
ceiling_data_with_decision.loc[ceiling_data_with_decision.ceiling_decision == 1, 'ceiling_decision_text'] = 'No CPR'
ceiling_data_with_decision.loc[ceiling_data_with_decision.ceiling_decision == 2, 'ceiling_decision_text'] = 'No CPR or ICU'
ceiling_data_with_decision.loc[ceiling_data_with_decision.ceiling_decision == 3, 'ceiling_decision_text'] = 'Palliative'

# välj värsta beslutet per episode
ceiling_data_with_decision = ceiling_data_with_decision.sort_values(
    ["episode_id", "ceiling_decision"]).drop_duplicates(subset=['episode_id'], keep='first')


## Summarize Episodes


In [None]:
ceiling_data_summary = ceiling_data_with_decision[["episode_id", "ceiling_date", "ceiling_decision_text"]]

## Save Processed Data


In [None]:
if not os.path.exists(paths.STORE_CEILING_DATA_PATH):
    os.makedirs(paths.STORE_CEILING_DATA_PATH)

ceiling_data_summary.to_parquet(
    f"{paths.STORE_CEILING_DATA_PATH}/ceiling_data_summary.parquet"
)