In [3]:
import pandas as pd
import numpy as np
import polars as pl

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style()
plt.rcParams["figure.figsize"] = (12, 6)

In [13]:
df = pl.read_csv(
    "../data/processed/df_procesada.csv",
    separator=";",
    columns=["ESTANCIA", "COD_HOSPITAL", "DIAGNOSTICO1", "ANIO_EGRESO"],
)

df = df.with_columns(pl.col("DIAGNOSTICO1").str.replace(".", "", literal=True).str.pad_end(4, "X"))

In [17]:
ranking_egresos_nacionales_grd = (
    df.group_by(pl.col(["ANIO_EGRESO", "COD_HOSPITAL", "DIAGNOSTICO1"]))
    .agg([pl.len().alias("n_egresos"), pl.col("ESTANCIA").sum().alias("dias_estada_totales")])
    .sort(by=["ANIO_EGRESO", "DIAGNOSTICO1", "n_egresos"], descending=True)
    .with_columns(
        pl.col("n_egresos")
        .rank(method="min", descending=True)
        .over(["ANIO_EGRESO", "DIAGNOSTICO1"])
        .alias("ranking_egresos")
    )
)

In [21]:
ranking_egresos_nacionales_deis = pl.read_csv(
    "../data/external/ranking_nacional_egresos.csv", encoding="latin-1", separator=";"
)

In [25]:
comparacion_grd_deis = ranking_egresos_nacionales_grd.join(
    ranking_egresos_nacionales_deis,
    left_on=["ANIO_EGRESO", "COD_HOSPITAL", "DIAGNOSTICO1"],
    right_on=["ANO_EGRESO", "ESTABLECIMIENTO_SALUD", "DIAG1"],
    how="inner",
)