# Analisis de Egresos Hospitalarios

In [47]:
import polars as pl
import glob

DICT_VARIABLES = {
    'ESTABLECIMIENTO_SALUD': pl.Int32,
    'GLOSA_ESTABLECIMIENTO_SALUD': pl.Categorical,
    'PERTENENCIA_ESTABLECIMIENTO_SALUD': pl.Categorical,
    'SEREMI': pl.Int8,
    'SERVICIO_DE_SALUD': pl.Int8,
    'SEXO': pl.Int8,
    'EDAD_CANT': pl.Int8,
    'TIPO_EDAD': pl.Int8,
    'EDAD_A_OS': pl.Int8,
    'PUEBLO_ORIGINARIO': pl.Int8,
    'PAIS_ORIGEN': pl.Int16,
    'GLOSA_COMUNA_RESIDENCIA': pl.Categorical,
    'REGION_RESIDENCIA': pl.Categorical,
    'GLOSA_REGION_RESIDENCIA': pl.Categorical,
    'PREVISION': pl.Int8,
    'BENEFICIARIO': pl.Categorical,
    'MODALIDAD': pl.Int8,
    'PROCEDENCIA': pl.Int8,
    'ANO_EGRESO': pl.Int16,
    'FECHA_EGRESO': pl.Date,
    'AREA_FUNCIONAL_EGRESO': pl.Int16,
    'DIAS_ESTADA': pl.Int16,
    'CONDICION_EGRESO': pl.Int8,
    'INTERV_Q': pl.Int8,
    'CODIGO_INTERV_Q_PPAL': pl.Int32,
    'PROCED': pl.Categorical,
    'CODIGO_PROCED_PPAL': pl.Categorical,
    'GLOSA_PROCED_PPAL': pl.Categorical
}

HOSPITAL_A_ANALIZAR = 112103 # Este es el Torax

In [74]:
def obtener_diagnosticos_unicos_de_hospital(df, hospital_a_analizar):
    diags_hospital = (df.filter(pl.col('ESTABLECIMIENTO_SALUD') == hospital_a_analizar)
                      .select(pl.col('DIAG1')).unique())

    return diags_hospital


def obtener_metricas_egresos(df, agrupar_por):
    metricas_agregadas = (df.groupby(agrupar_por).agg(
        [pl.col('DIAG1').count().alias('n_egresos'),
         pl.col('DIAS_ESTADA').mean().alias('dias_estada_promedio'),
         pl.col('INTERV_Q').sum().alias('n_int_q'),
         pl.col('CONDICION_EGRESO').sum().alias('n_muertos')]
    )
    )

    return metricas_agregadas


def remapear_int_q_y_muertes(df):
    tmp = df.with_columns([
        pl.col('INTERV_Q').map_dict({2: 0}, default=pl.col('INTERV_Q')).alias('INTERV_Q'),
        pl.col('CONDICION_EGRESO').map_dict({1: 0, 2: 1}, default=pl.col('CONDICION_EGRESO'))
        .alias('CONDICION_EGRESO')
    ])

    return tmp


In [83]:
with pl.StringCache():
    df_nacional = pl.scan_csv('input/utf-8/*.csv', separator=';')
    diags_torax = (obtener_diagnosticos_unicos_de_hospital(df_nacional, HOSPITAL_A_ANALIZAR)
                  .collect(streaming=True)).to_series()
    df = df_nacional.filter(pl.col('DIAG1').is_in(diags_torax))
    df = remapear_int_q_y_muertes(df)

    agrupacion_anio = ['ANO_EGRESO', 'ESTABLECIMIENTO_SALUD', 
                       'GLOSA_ESTABLECIMIENTO_SALUD', 'DIAG1']
    
    metricas = obtener_metricas_egresos(df, agrupacion_anio).collect(streaming=True)

In [86]:
resultados_metricas

ANO_EGRESO,ESTABLECIMIENTO_SALUD,GLOSA_ESTABLECIMIENTO_SALUD,DIAG1,n_egresos,dias_estada_promedio,n_int_q,n_muertos
i64,i64,str,str,u32,f64,i64,i64
2005,105103,"""Hospital Dr. H…","""D174""",1,2.0,1,0
2007,122202,"""Clínica Aleman…","""E876""",2,1.0,0,0
2007,113130,"""Hospital Dr. E…","""J159""",44,6.113636,0,0
2012,113180,"""Hospital El Pi…","""I871""",1,17.0,0,0
2014,109201,"""Clínica Dávila…","""J159""",182,8.692308,12,7
2014,116100,"""Hospital San J…","""N23X""",13,3.384615,1,0
2015,128109,"""Hospital Provi…","""J157""",7,4.0,0,0
2018,118107,"""Hospital Clori…","""K297""",1,2.0,0,0
2005,113150,"""Hospital San L…","""D143""",1,1.0,1,0
2007,120106,"""Hospital de Sa…","""J180""",5,5.2,0,0


In [6]:
import pandas as pd
import numpy as np

import geopandas as gpd
import geopy

from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [25]:
locator = Nominatim(user_agent='javier', timeout=20)
rgeocode = RateLimiter(locator.reverse, min_delay_seconds=0.001)

def location_info(x):
    data = locator.geocode(x).raw
    data_converted = pd.json_normalize(data).squeeze()
    return data_converted

In [3]:
df = pd.read_excel('output/distribucion_sociodemografica/distribucion_sociodemografica.xlsx')

In [43]:
df['region_pais'] = 'Region ' + df['GLOSA_REGION_RESIDENCIA'] + ', Chile'
df['comuna_region_pais'] = df['GLOSA_COMUNA_RESIDENCIA'] + ', ' + df['region_pais']

In [47]:
regiones_unicas = pd.Series(df['region_pais'].unique()).apply(location_info)

AttributeError: 'NoneType' object has no attribute 'raw'

In [52]:
sorted(df['GLOSA_REGION_RESIDENCIA'].unique())

['De Aisén del Gral. C. Ibáñez del Campo',
 'De Aisï¿½n del Gral. C. Ibï¿½ï¿½ez del Campo',
 'De Antofagasta',
 'De Arica y Parinacota',
 'De Atacama',
 'De Coquimbo',
 'De La Araucanía',
 'De La Araucanï¿½a',
 'De Los Lagos',
 'De Los Ríos',
 'De Los Rï¿½os',
 'De Magallanes y de La Antártica Chilena',
 'De Magallanes y de La Antï¿½rtica Chilena',
 'De Tarapacá',
 'De Tarapacï¿½',
 'De Valparaíso',
 'De Valparaï¿½so',
 'De Ñuble',
 'De ï¿½uble',
 'Del Bíobío',
 'Del Bï¿½obï¿½o',
 "Del Libertador B. O'Higgins",
 'Del Maule',
 'Ignorada',
 'Metropolitana de Santiago']

In [None]:
with open('input/Egresos_Hospitalarios_2014.csv', encoding='latin-1') as f:
    df = pl.read_csv(f.read())

In [58]:
df = pl.read_csv('input/Egresos_Hospitalarios_2014.csv', separator=';', dtypes=DICT_VARIABLES,
                 use_pyarrow=False)

In [107]:
with open('input/Egresos_Hospitalarios_2014.csv', encoding='utf-8') as f:
    df = pl.read_csv(f.read().encode('utf-8'), separator=';', dtypes=DICT_VARIABLES)

In [108]:
df

ID_PACIENTE,ESTABLECIMIENTO_SALUD,GLOSA_ESTABLECIMIENTO_SALUD,PERTENENCIA_ESTABLECIMIENTO_SALUD,SEREMI,SERVICIO_DE_SALUD,SEXO,FECHA_NACIMIENTO,EDAD_CANT,TIPO_EDAD,EDAD_A_OS,PUEBLO_ORIGINARIO,PAIS_ORIGEN,GLOSA_PAIS_ORIGEN,COMUNA_RESIDENCIA,GLOSA_COMUNA_RESIDENCIA,REGION_RESIDENCIA,GLOSA_REGION_RESIDENCIA,PREVISION,BENEFICIARIO,MODALIDAD,PROCEDENCIA,ANO_EGRESO,FECHA_EGRESO,AREA_FUNCIONAL_EGRESO,DIAS_ESTADA,CONDICION_EGRESO,DIAG1,GLOSA_DIAG1,DIAG2,GLOSA_DIAG2,INTERV_Q,CODIGO_INTERV_Q_PPAL,GLOSA_INTERV_Q_PPAL,PROCED,CODIGO_PROCED_PPAL,GLOSA_PROCED_PPAL
str,i32,cat,cat,i8,i8,i8,str,i8,i8,i8,i8,i16,str,i64,cat,cat,cat,i8,cat,i8,i8,i16,date,i16,i16,i8,str,str,str,str,i8,i32,str,str,str,str
"""NA""",112101,"""Hospital Dr. L…","""Pertenecientes…",,12,2,"""2014-12-08""",2,3,0,96,152,"""Chile""",13122,"""Pe�alol�n""","""13""","""Metropolitana …",99,,,1,2014,2014-12-10,324,2,1,"""P221""","""TAQUIPNEA TRAN…",,,2,,,,,
"""NA""",112101,"""Hospital Dr. L…","""Pertenecientes…",,12,1,"""2014-12-08""",5,3,0,96,152,"""Chile""",15101,"""Arica""","""15""","""De Arica y Par…",99,,,1,2014,2014-12-13,152,5,1,"""P221""","""TAQUIPNEA TRAN…",,,2,,,,,
"""NA""",112101,"""Hospital Dr. L…","""Pertenecientes…",,12,1,"""2014-12-05""",5,3,0,96,152,"""Chile""",13122,"""Pe�alol�n""","""13""","""Metropolitana …",99,,,5,2014,2014-12-10,152,2,1,"""P598""","""ICTERICIA NEON…",,,2,,,,,
"""NA""",112101,"""Hospital Dr. L…","""Pertenecientes…",,12,1,"""2014-12-09""",13,3,0,96,152,"""Chile""",13115,"""Lo Barnechea""","""13""","""Metropolitana …",99,,,1,2014,2014-12-22,324,13,1,"""P590""","""ICTERICIA NEON…",,,2,,,,,
"""NA""",112101,"""Hospital Dr. L…","""Pertenecientes…",,12,2,"""2014-12-07""",4,3,0,96,152,"""Chile""",13122,"""Pe�alol�n""","""13""","""Metropolitana …",99,,,5,2014,2014-12-11,152,2,1,"""P598""","""ICTERICIA NEON…",,,2,,,,,
"""NA""",112101,"""Hospital Dr. L…","""Pertenecientes…",,12,2,"""2014-12-09""",2,3,0,96,152,"""Chile""",13122,"""Pe�alol�n""","""13""","""Metropolitana …",99,,,5,2014,2014-12-11,152,2,1,"""Q185""","""MICROSTOMIA""",,,2,,,,,
"""NA""",107224,"""Hospital Cl�ni…","""No Pertenecien…",5,,2,"""2014-01-22""",2,3,0,96,152,"""Chile""",5804,"""Villa Alemana""","""05""","""De Valpara�so""",2,,,5,2014,2014-01-25,330,1,1,"""P599""","""ICTERICIA NEON…",,,2,,,,,
"""NA""",107224,"""Hospital Cl�ni…","""No Pertenecien…",5,,1,"""2014-01-14""",2,3,0,96,152,"""Chile""",5502,"""Calera""","""05""","""De Valpara�so""",1,"""C""",2,5,2014,2014-01-26,311,10,1,"""P239""","""NEUMONIA CONGE…",,,2,,,,,
"""NA""",101100,"""Hospital Dr. J…","""Pertenecientes…",,1,2,"""1928-01-21""",86,1,86,96,152,"""Chile""",15101,"""Arica""","""15""","""De Arica y Par…",1,"""B""",1,1,2014,2014-02-19,110,4,1,"""I509""","""INSUFICIENCIA …",,,2,,,,,
"""NA""",101100,"""Hospital Dr. J…","""Pertenecientes…",,1,2,"""1961-08-09""",52,1,52,96,152,"""Chile""",15101,"""Arica""","""15""","""De Arica y Par…",1,"""A""",1,1,2014,2014-02-19,120,18,1,"""K802""","""CALCULO DE LA …",,,1,1802028,"""Colecistectom�…",,,


In [77]:
import chardet

In [104]:
text = open('input/Egresos_Hospitalarios_2001.csv', 'rb').read(1000)
print(chardet.detect(text))

{'encoding': 'ISO-8859-1', 'confidence': 0.73, 'language': ''}


In [None]:
with open('input/Egresos_Hospitalarios_2014.csv', encoding=)