In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style()
plt.rcParams["figure.figsize"] = (12, 6)
pd.set_option('display.max_columns', None)

# Define dictionaries for column names and types
PROCEDURE_COLUMNS = {f"PROCEDIMIENTO{i}": str for i in range(1, 31)}
SERVICE_COLUMNS = {
    "SERVICIOINGRESO": 0,
    "SERVICIOTRASLADO1": 1,
    "SERVICIOTRASLADO2": 2,
    "SERVICIOTRASLADO3": 3,
    "SERVICIOTRASLADO4": 4,
    "SERVICIOTRASLADO5": 5,
    "SERVICIOTRASLADO6": 6,
    "SERVICIOTRASLADO7": 7,
    "SERVICIOTRASLADO8": 8,
    "SERVICIOTRASLADO9": 9,
    "SERVICIOALTA": 10,
}
DATE_COLUMNS = {
    "FECHA_INGRESO": 0,
    "FECHATRASLADO1": 1,
    "FECHATRASLADO2": 2,
    "FECHATRASLADO3": 3,
    "FECHATRASLADO4": 4,
    "FECHATRASLADO5": 5,
    "FECHATRASLADO6": 6,
    "FECHATRASLADO7": 7,
    "FECHATRASLADO8": 8,
    "FECHATRASLADO9": 9,
    "FECHAALTA": 10,
}
BED_TYPES = {
    "AREA MEDICA ADULTO CUIDADOS BASICOS": "Basicos",
    "AREA MEDICA ADULTO CUIDADOS MEDIOS": "Medias",
    "AREA MEDICO-QUIRURGICO CUIDADOS MEDIOS": "Medias",
    "AREA MÉDICA": "Medias",
    "AREA QUIRÚRGICA": "Medias",
    "CIRUGÍA CARDIOVASCULAR": "Medias",
    "UNIDAD DE CUIDADOS INTENSIVOS (UCI) (INDIFERENCIADO)": "UCI",
    "UNIDAD DE CUIDADOS INTENSIVOS ADULTO": "UCI",
    "UNIDAD DE CUIDADOS INTENSIVOS CARDIOLOGÍA": "UCI",
    "UNIDAD DE RECUPERACIÓN DE PABELLONES (CENTRAL Y CMA)": "AMBULATORIO",
    "UNIDAD DE TRATAMIENTO INTERMEDIO (UTI) (INDIFERENCIADO) ADULTO": "UTI",
    "UNIDAD DE TRATAMIENTO INTERMEDIO CARDIOVASCULAR": "UTI",
    "UNIDAD DE TRATAMIENTO INTERMEDIO CIRUGÍA ADULTO": "UTI",
    "UNIDAD DE TRATAMIENTO INTERMEDIO MEDICINA ADULTO": "UTI",
    "UNIDAD DE TRATAMIENTOS INTERMEDIOS MEDICINA": "UTI",
}

# Define relevant diagnoses
RELEVANT_DIAGNOSES = [
    "C33",
    "C34.0",
    "C34.1",
    "C34.2",
    "C34.3",
    "C38.1",
    "C38.4",
    "C45.0",
    "C78.0",
    "C78.2",
    "D14.3",
    "D38.1",
    "E84.8",
    "I05.1",
    "I08.0",
    "I08.1",
    "I34.0",
    "I35.0",
    "I35.1",
    "I35.2",
    "I42.0",
    "I45.6",
    "I47.2",
    "I49.5",
    "I71.0",
    "I71.2",
    "J39.8",
    "J47",
    "J67.9",
    "J84.1",
    "J84.8",
    "J84.9",
    "J86.0",
    "J86.9",
    "J90",
    "J93.1",
    "J95.5",
    "J98.0",
    "M34.8",
    "Q21.1",
    "Q23.1",
    "Q67.6",
    "T82.0",
    "T82.1",
    "Z45.0",
]

In [12]:
# Load data
df_procesada = pd.read_csv("../data/processed/df_procesada.csv", sep=";", dtype=PROCEDURE_COLUMNS)

  df_procesada = pd.read_csv("../data/processed/df_procesada.csv", sep=";", dtype=PROCEDURE_COLUMNS)


In [13]:
# Filter for Torax hospital
df_torax = df_procesada.query("COD_HOSPITAL == 112103").reset_index()

# Melt service and date columns
service_long = pd.melt(
    df_torax,
    id_vars=["index", "DIAGNOSTICO1", "ANIO_EGRESO", "IR_29301_SEVERIDAD", "CIP_ENCRIPTADO"],
    value_vars=SERVICE_COLUMNS,
    var_name="service_type",
    value_name="service",
)
date_long = pd.melt(
    df_torax,
    id_vars=["index"],
    value_vars=DATE_COLUMNS,
    var_name="date_type",
    value_name="date",
)

# Merge service and date data
patient_journey = pd.merge(date_long, service_long, how="inner", left_index=True, right_index=True)
patient_journey = patient_journey.drop(columns=["index_y"])
patient_journey = patient_journey.rename(columns={"index_x": "index"})

# Convert service type to categorical
patient_journey["service_type"] = pd.Categorical(
    patient_journey["service_type"], categories=SERVICE_COLUMNS.keys(), ordered=True
)

# Sort and clean data
patient_journey = (
    patient_journey.sort_values(["index", "service_type"]).dropna().reset_index(drop=True)
)
patient_journey["date"] = pd.to_datetime(patient_journey["date"])

# Calculate length of stay
patient_journey["length_of_stay"] = patient_journey.groupby("index")["date"].diff().shift(-1)

# Fix negative and zero length of stay
negative_indices = patient_journey[patient_journey["length_of_stay"] < pd.Timedelta(0)].index + 1
patient_journey.loc[negative_indices, "date"] += pd.offsets.DateOffset(years=1)
patient_journey["length_of_stay"] = patient_journey.groupby("index")["date"].diff().shift(-1)

zero_indices = patient_journey[patient_journey["length_of_stay"] == pd.Timedelta(0)].index
patient_journey.loc[zero_indices, "length_of_stay"] += pd.Timedelta(days=1)

# Map bed types
patient_journey["service"] = patient_journey["service"].replace(BED_TYPES)

# Summarize length of stay by diagnosis, bed type, and year
summary = (
    patient_journey.groupby(["ANIO_EGRESO", "DIAGNOSTICO1", "IR_29301_SEVERIDAD", "service"]).agg(
        dias_estada_totales=("length_of_stay", "sum"),
        n_pacientes=("CIP_ENCRIPTADO", "nunique"),
        n_egresos=("index", "nunique"),
    )
).reset_index()

# Calculates the percentage of occupancy per bed type
summary["porcentaje_ocupacion"] = summary["dias_estada_totales"] / summary.groupby(
    ["ANIO_EGRESO", "DIAGNOSTICO1"]
)["dias_estada_totales"].transform("sum")

# Calculates length of stay per pacient
summary["dias_estada_por_paciente"] = summary["dias_estada_totales"] / summary["n_pacientes"]

# Filter for relevant diagnoses
summary_relevant = summary.query("DIAGNOSTICO1.isin(@RELEVANT_DIAGNOSES)")

In [3]:
# Save to Excel
with pd.ExcelWriter("../data/interim/resumen_ocupacion_por_diagnostico.xlsx") as writer:
    summary_relevant.to_excel(writer, sheet_name="mas_relevantes", index=False)
    summary.to_excel(writer, sheet_name="todos", index=False)

In [118]:
# Obtiene el resumen de dias de estada totales por diagnostico
resumen_dias_de_estada = (
    patient_journey.groupby(["ANIO_EGRESO", "DIAGNOSTICO1", "service"])["length_of_stay"]
    .sum()
    .unstack(fill_value=pd.Timedelta(0))
)

# Total dias de estada por anio y diag
total_dias_de_estada = resumen_dias_de_estada.sum(axis=1)

porcentaje_de_ocupacion = resumen_dias_de_estada.div(total_dias_de_estada, axis=0)

In [119]:
porcentaje_de_ocupacion

Unnamed: 0_level_0,service,AMBULATORIO,Basicos,Medias,PENSIONADO ADULTO,UCI,UTI
ANIO_EGRESO,DIAGNOSTICO1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019.0,A08.4,0.0,0.0,0.600000,0.0,0.400000,0.000000
2019.0,A09.9,0.0,0.0,0.326531,0.0,0.061224,0.612245
2019.0,A15.0,0.0,0.0,0.775229,0.0,0.004587,0.220183
2019.0,A15.2,0.0,0.0,0.654867,0.0,0.000000,0.345133
2019.0,A15.3,0.0,0.0,0.740000,0.0,0.000000,0.260000
...,...,...,...,...,...,...,...
2022.0,Z47.0,0.0,0.0,0.541667,0.0,0.000000,0.458333
2022.0,Z51.1,0.0,0.0,0.914530,0.0,0.000000,0.085470
2022.0,Z51.2,0.0,0.0,0.375000,0.0,0.000000,0.625000
2022.0,Z51.4,0.0,0.0,1.000000,0.0,0.000000,0.000000
