In [1]:
import sys
import os

project_root = os.path.abspath('..')
if project_root not in sys.path:
    sys.path.append(project_root)
 
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import polars as pl

sns.set_theme(palette="pastel")
plt.rcParams["figure.figsize"] = (12, 6)
pd.set_option("display.max_columns", None)

DIAGS_PEDIATRICOS = [
    "Q200",
    "Q201",
    "Q202",
    "Q203",
    "Q204",
    "Q205",
    "Q206",
    "Q208",
    "Q209",
    "Q210",
    "Q211",
    "Q212",
    "Q213",
    "Q214",
    "Q218",
    "Q219",
    "Q220",
    "Q221",
    "Q222",
    "Q223",
    "Q224",
    "Q225",
    "Q226",
    "Q228",
    "Q229",
    "Q230",
    "Q231",
    "Q232",
    "Q233",
    "Q234",
    "Q238",
    "Q239",
    "Q240",
    "Q241",
    "Q242",
    "Q243",
    "Q244",
    "Q245",
    "Q246",
    "Q248",
    "Q249",
    "Q250",
    "Q251",
    "Q252",
    "Q253",
    "Q254",
    "Q255",
    "Q256",
    "Q257",
    "Q258",
    "Q259",
    "Q260",
    "Q261",
    "Q262",
    "Q263",
    "Q264",
    "Q265",
    "Q268",
    "Q269",
    "Q288",
    "Q289",
    "Q336",
    "Q339",
]

diccionario_cie = pl.read_excel("../data/external/CIE-10 - sin_puntos_y_X.xlsx")

diccionario_recintos = pl.read_excel(
    "../data/external/Esquema_Registro-2023.xlsx",
    sheet_name="Anexo 4",
).to_pandas()
diccionario_recintos.columns = diccionario_recintos.iloc[2]
diccionario_recintos = diccionario_recintos.iloc[3:, [0, 6]]
diccionario_recintos["Código nuevo Establecimiento"] = diccionario_recintos[
    "Código nuevo Establecimiento"
].astype(int)
diccionario_recintos = pl.from_dataframe(diccionario_recintos)
diccionario_recintos = diccionario_recintos.with_columns(
    pl.col("Código nuevo Establecimiento").cast(pl.Int64)
)

In [11]:
df_congenitos_agrupados = (
    pl.scan_csv("../data/processed/df_procesada.csv", separator=";", dtypes={"DIAGNOSTICO1": str})
    .with_columns(pl.col("DIAGNOSTICO1").str.replace(".", "", literal=True).str.ljust(4, "X"))
    .filter(
        (pl.col("DIAGNOSTICO1").is_in(DIAGS_PEDIATRICOS))
        & (pl.col("TIPO_ACTIVIDAD") == "HOSPITALIZACIÓN")
    )
    .with_columns(pl.col("FECHAINTERV1").is_not_null().alias("tiene_int_q"))
    .group_by(pl.col(["ANIO_EGRESO", "COD_HOSPITAL"]))
    .agg(
        pl.col("DIAGNOSTICO1").count().alias("n_egresos"),
        pl.col("ESTANCIA").sum().alias("dias_estada_totales"),
        pl.col("IR_29301_PESO").mean().alias("promedio_peso_grd"),
        pl.col("tiene_int_q").sum().alias("n_int_q"),
    )
    .collect()
    .join(diccionario_recintos, left_on="COD_HOSPITAL", right_on="Código nuevo Establecimiento")
    .to_pandas()
)

In [19]:
congenitas_agrupadas = pd.pivot_table(
    df_congenitos_agrupados,
    columns="ANIO_EGRESO",
    index=["Nombre Oficial"],
    values=["n_egresos", "dias_estada_totales", "promedio_peso_grd"],
    aggfunc="sum",
    fill_value=0,
).sort_values(("n_egresos", 2023), ascending=False)

In [20]:
congenitas_agrupadas

Unnamed: 0_level_0,dias_estada_totales,dias_estada_totales,dias_estada_totales,dias_estada_totales,dias_estada_totales,n_egresos,n_egresos,n_egresos,n_egresos,n_egresos,promedio_peso_grd,promedio_peso_grd,promedio_peso_grd,promedio_peso_grd,promedio_peso_grd
ANIO_EGRESO,2019,2020,2021,2022,2023,2019,2020,2021,2022,2023,2019,2020,2021,2022,2023
Nombre Oficial,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
Hospital de Niños Dr. Luis Calvo Mackenna,6074,5556,6093,5245,5446,602,441,503,516,456,3.506834,3.922691,3.829926,3.645792,4.075217
"Hospital Clínico de Niños Dr. Roberto del Río (Santiago, Independencia)",4538,4614,4831,3049,2643,434,361,377,348,310,3.783153,4.509283,3.818979,3.944264,3.956896
Instituto Nacional de Enfermedades Respiratorias y Cirugía Torácica,508,601,784,847,699,113,63,132,123,126,2.566710,2.851738,2.813847,2.385359,2.435549
"Complejo Hospitalario Dr. Sótero del Río (Santiago, Puente Alto)",379,1015,999,1246,1017,45,48,65,70,107,1.421171,3.268763,2.673557,2.432766,2.004679
Hospital Dr. Hernán Henríquez Aravena (Temuco),705,636,571,924,761,83,36,46,68,97,1.951401,1.608769,1.757676,1.870722,2.036566
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Hospital San Luis (Buin),16,15,12,0,0,3,3,3,0,0,0.546367,0.255867,0.445333,0.000000,0.000000
Hospital de San Carlos,0,9,24,2,0,0,3,4,1,0,0.000000,0.786233,1.391900,0.567000,0.000000
Hospital Dr. Abraham Godoy Peña (Lautaro),0,31,0,35,0,0,1,0,3,0,0.000000,1.010900,0.000000,0.407300,0.000000
Hospital de Villarrica,10,0,17,9,0,3,0,4,2,0,0.638267,0.000000,0.641650,0.842400,0.000000


In [3]:
df_congenitos = (
    pl.scan_csv("../data/processed/df_procesada.csv", separator=";", dtypes={"DIAGNOSTICO1": str})
    .with_columns(pl.col("DIAGNOSTICO1").str.replace(".", "", literal=True).str.ljust(4, "X"))
    .filter(
        (pl.col("DIAGNOSTICO1").is_in(DIAGS_PEDIATRICOS))
        & (pl.col("TIPO_ACTIVIDAD") == "HOSPITALIZACIÓN")
    )
    .with_columns(pl.col("FECHAINTERV1").is_not_null().alias("tiene_int_q"))
    .group_by(pl.col(["ANIO_EGRESO", "COD_HOSPITAL", "DIAGNOSTICO1"]))
    .agg(
        pl.col("DIAGNOSTICO1").count().alias("n_egresos"),
        pl.col("ESTANCIA").sum().alias("dias_estada_totales"),
        pl.col("IR_29301_PESO").mean().alias("promedio_peso_grd"),
        pl.col("tiene_int_q").sum().alias("n_int_q"),
    )
    .collect()
)

In [4]:
metricas_congenitas_grd = (
    (
        df_congenitos.sort(
            by=["ANIO_EGRESO", "DIAGNOSTICO1", "n_egresos"], descending=True
        ).with_columns(
            [
                (pl.col("dias_estada_totales") / pl.col("n_egresos")).alias("promedio_dias_estada"),
                (
                    pl.col("n_egresos")
                    .sum()
                    .over(["ANIO_EGRESO", "DIAGNOSTICO1"])
                    .alias("total_egresos")
                ),
                pl.col("n_egresos")
                .rank(method="min", descending=True)
                .over(["ANIO_EGRESO", "DIAGNOSTICO1"])
                .alias("ranking_n_egresos"),
            ]
        )
    )
    .join(diccionario_cie, left_on="DIAGNOSTICO1", right_on="Código")
    .join(diccionario_recintos, left_on="COD_HOSPITAL", right_on="Código nuevo Establecimiento")
)

In [23]:
with pd.ExcelWriter("../data/interim/resumen_congenitas_pais_GRD.xlsx") as file:
    congenitas_agrupadas.to_excel(file, sheet_name="resumen_total")
    metricas_congenitas_grd.to_pandas().to_excel(file, sheet_name="por_diagnostico")

In [50]:
metricas_congenitas_grd.write_excel("../data/interim/resumen_congenitas_GRD.xlsx")

<xlsxwriter.workbook.Workbook at 0x210cb7b3190>

In [None]:
df_pediatricos = (
    pl.scan_csv("../data/processed/df_procesada.csv", separator=";")
    .filter(
        (pl.col("COD_HOSPITAL").is_in(CODIGOS_PEDIATRICOS))
        & (pl.col("TIPO_ACTIVIDAD") == "HOSPITALIZACIÓN")
    )
    .group_by(pl.col(["ANIO_EGRESO", "COD_HOSPITAL"]))
    .agg(
        pl.col("DIAGNOSTICO1").count().alias("n_egresos"),
        pl.col("ESTANCIA").sum().alias("dias_estada_totales"),
        pl.col("IR_29301_PESO").mean().alias("promedio_peso_grd"),
    )
    .sort(by=["COD_HOSPITAL", "ANIO_EGRESO", "n_egresos"], descending=False)
).collect().to_pandas()