In [1]:
import pandas as pd 
from pathlib import Path


raw_data_folder = Path(os.path.abspath('.')).parent /  "raw_data"
data_folder = Path(os.path.abspath('.')).parent /  "data"

def load_sisab() -> pd.DataFrame:
    return pd.read_csv(data_folder / "SISAB/SISAB-consolidado.csv", sep=",")


def load_ibge() -> pd.DataFrame:
    return pd.read_csv(data_folder / "IBGE/populacao_consolidada.csv", sep=",")
 
def ibge_code_dict(ibge_df : pd.DataFrame)-> dict:
    """O SISAB não tem o DV do código IBGE. Essa função retorna um dicionário
    {\d{6}: \d{7}} que permite associar o DV à cada código
    """
    serie = ibge_df["Cód."].values
    return {x[:-1]: x for x in serie}




# SISAB x IBGE

In [2]:
ibge = load_ibge(
)

In [3]:
sisab = load_sisab()

In [4]:
mapa = {x//10: x for x in ibge["Cód."]}

def add_dv_ibge(mapa: dict, cod: int) -> int:
    return mapa[cod]

In [5]:
sisab.Ibge = sisab.Ibge.apply( lambda x: add_dv_ibge(mapa, x))

In [6]:
sisab.referencia//100

0         2021
1         2021
2         2021
3         2021
4         2021
          ... 
496540    2019
496541    2019
496542    2019
496543    2019
496544    2019
Name: referencia, Length: 496545, dtype: int64

In [7]:
ibge.set_index("Cód.", inplace=True)


In [8]:
sisab.Ibge.apply(lambda x: ibge.loc[x, "Município"])

0                          Alvorada (RS)
1                           Coroatá (MA)
2                           Guaxupé (MG)
3               São Miguel do Oeste (SC)
4            Oliveira dos Brejinhos (BA)
                       ...              
496540                     Gravatal (SC)
496541                   Centenário (RS)
496542          São Domingos do Sul (RS)
496543    Balneário Arroio do Silva (SC)
496544         Conceição do Castelo (ES)
Name: Ibge, Length: 496545, dtype: object

In [9]:
sisab.Municipio.str.title()

0                          Alvorada
1                           Coroatá
2                           Guaxupé
3               São Miguel Do Oeste
4            Oliveira Dos Brejinhos
                    ...            
496540                     Gravatal
496541                   Centenário
496542          São Domingos Do Sul
496543    Balneário Arroio Do Silva
496544         Conceição Do Castelo
Name: Municipio, Length: 496545, dtype: object

In [10]:
def match_populacao_referencia(row):
    codibge = row["Ibge"]
    ano = row["referencia"]//100
    if ano == 2022:
        ano = 2021
    return ibge.loc[codibge, str(ano)]

In [11]:
sisab["populacao"] = sisab.apply( match_populacao_referencia, axis=1)

In [12]:
sisab

Unnamed: 0,Uf,Ibge,Municipio,Asma,Desnutrição,Diabetes,DPOC,Hipertensão arterial,Obesidade,Pré-natal,...,Reabilitação,D.Transmissíveis - Dengue,Doenças transmissíveis - DST,D.Transmissíveis - Hanseníase,D.Transmissíveis - Tuberculose,Rast. câncer de mama,Rast. câncer do colo do útero,Rast. risco cardiovascular,referencia,populacao
0,RS,4300604,ALVORADA,310,59,974,61,1954,200,970,...,37,0,139,0,6,12,20,10,202105,212352
1,MA,2103604,COROATÁ,10,61,387,18,786,62,417,...,266,0,6,7,3,4,60,74,202105,65788
2,MG,3128709,GUAXUPÉ,3,4,118,14,357,30,125,...,184,0,1,0,0,1,2,0,202105,52234
3,SC,4217204,SÃO MIGUEL DO OESTE,32,9,165,20,714,103,223,...,12,0,16,0,2,6,5,60,202105,41246
4,BA,2923209,OLIVEIRA DOS BREJINHOS,7,1,71,3,305,12,158,...,17,2,8,1,1,2,85,2,202105,21797
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496540,SC,4206207,GRAVATAL,9,2,20,4,22,0,61,...,34,0,2,0,0,4,20,0,201904,11501
496541,RS,4305116,CENTENÁRIO,0,0,5,0,7,6,0,...,10,0,0,0,0,0,0,0,201904,2891
496542,RS,4318051,SÃO DOMINGOS DO SUL,0,0,4,0,4,1,0,...,0,0,0,0,0,0,0,0,201904,3074
496543,SC,4201950,BALNEÁRIO ARROIO DO SILVA,2,2,32,1,74,3,86,...,1,0,0,0,0,6,17,0,201904,13071


In [13]:
sisab.query("Uf == 'SC'").sample(n=20).sort_values(by=["Municipio", "referencia"]
).to_json("testSC.json", orient="split", force_ascii=False, index=False)

In [14]:
sisab.columns

Index(['Uf', 'Ibge', 'Municipio', 'Asma', 'Desnutrição', 'Diabetes', 'DPOC',
       'Hipertensão arterial', 'Obesidade', 'Pré-natal', 'Puericultura',
       'Puerpério (até 42 dias)', 'Saúde sexual e reprodutiva', 'Tabagismo',
       'Usuário de álcool', 'Usuário de outras drogas', 'Saúde mental',
       'Reabilitação', 'D.Transmissíveis - Dengue',
       'Doenças transmissíveis - DST', 'D.Transmissíveis - Hanseníase',
       'D.Transmissíveis - Tuberculose', 'Rast. câncer de mama',
       'Rast. câncer do colo do útero', 'Rast. risco cardiovascular',
       'referencia', 'populacao'],
      dtype='object')

In [15]:
ordenado = sisab.query("Uf == 'SC'").sample(n=20).sort_values(by=["Municipio", "referencia"]
)

In [16]:
ordenado.set_index("Ibge", inplace=True)
ordenado.to_dict(orient="dict")

{'Uf': {4201257: 'SC',
  4201703: 'SC',
  4202008: 'SC',
  4203204: 'SC',
  4204152: 'SC',
  4205209: 'SC',
  4206306: 'SC',
  4206900: 'SC',
  4207304: 'SC',
  4209508: 'SC',
  4209805: 'SC',
  4210704: 'SC',
  4211405: 'SC',
  4211702: 'SC',
  4212270: 'SC',
  4215406: 'SC',
  4217758: 'SC',
  4216701: 'SC',
  4216255: 'SC',
  4218301: 'SC'},
 'Municipio': {4201257: 'APIÚNA',
  4201703: 'ASCURRA',
  4202008: 'BALNEÁRIO CAMBORIÚ',
  4203204: 'CAMBORIÚ',
  4204152: 'CELSO RAMOS',
  4205209: 'ERVAL VELHO',
  4206306: 'GUABIRUBA',
  4206900: 'IBIRAMA',
  4207304: 'IMBITUBA',
  4209508: 'LAURENTINO',
  4209805: 'LEOBERTO LEAL',
  4210704: 'MATOS COSTA',
  4211405: 'NOVA ERECHIM',
  4211702: 'ORLEANS',
  4212270: 'PASSOS MAIA',
  4215406: 'SALTO VELOSO',
  4217758: 'SUL BRASIL',
  4216701: 'SÃO JOSÉ DO CEDRO',
  4216255: 'SÃO JOÃO DO OESTE',
  4218301: 'TRÊS BARRAS'},
 'Asma': {4201257: 22,
  4201703: 0,
  4202008: 28,
  4203204: 60,
  4204152: 0,
  4205209: 2,
  4206306: 6,
  4206900: 13,

In [17]:
muni = ordenado.Municipio.unique()[0]

In [18]:
apiuna = sisab.query(f"Uf == 'SC' and Municipio == '{muni}'").sort_values("referencia")

In [19]:
apiuna[sisab.columns[3:]].to_dict(orient="list")

{'Asma': [2,
  8,
  26,
  22,
  16,
  42,
  50,
  22,
  38,
  14,
  11,
  4,
  7,
  6,
  12,
  25,
  75,
  11,
  9,
  16,
  3,
  0,
  2,
  2,
  5,
  1,
  7,
  15,
  15,
  5,
  3,
  8,
  6,
  2,
  3,
  0,
  2,
  3,
  5,
  2,
  2,
  7,
  3,
  5,
  4,
  1,
  6,
  8,
  2,
  13,
  8,
  1,
  8,
  7,
  11,
  7,
  11,
  1,
  4,
  8,
  6,
  5,
  12,
  11,
  12,
  3,
  5,
  8,
  13,
  5,
  2,
  1,
  26,
  26,
  40,
  41,
  35,
  26,
  17],
 'Desnutrição': [0,
  2,
  14,
  20,
  4,
  8,
  12,
  10,
  16,
  3,
  2,
  0,
  0,
  2,
  1,
  12,
  3,
  4,
  4,
  4,
  0,
  0,
  0,
  0,
  2,
  3,
  0,
  4,
  2,
  4,
  2,
  4,
  4,
  2,
  3,
  3,
  0,
  4,
  3,
  1,
  1,
  3,
  2,
  6,
  4,
  2,
  3,
  5,
  0,
  0,
  4,
  8,
  5,
  3,
  2,
  4,
  3,
  2,
  4,
  5,
  7,
  2,
  4,
  3,
  2,
  5,
  7,
  3,
  6,
  3,
  10,
  6,
  6,
  7,
  3,
  2,
  21,
  12,
  13],
 'Diabetes': [0,
  60,
  156,
  94,
  142,
  170,
  174,
  192,
  185,
  41,
  65,
  13,
  14,
  35,
  49,
  72,
  39,
  58,
  66,
  46,
  24,
  

In [20]:
apiuna

Unnamed: 0,Uf,Ibge,Municipio,Asma,Desnutrição,Diabetes,DPOC,Hipertensão arterial,Obesidade,Pré-natal,...,Reabilitação,D.Transmissíveis - Dengue,Doenças transmissíveis - DST,D.Transmissíveis - Hanseníase,D.Transmissíveis - Tuberculose,Rast. câncer de mama,Rast. câncer do colo do útero,Rast. risco cardiovascular,referencia,populacao
76175,SC,4201257,APIÚNA,2,0,0,0,0,0,2,...,0,0,0,0,0,0,0,0,201512,10322
91659,SC,4201257,APIÚNA,8,2,60,8,114,42,40,...,34,0,0,0,0,4,4,14,201602,10432
358428,SC,4201257,APIÚNA,26,14,156,24,398,150,104,...,280,12,0,0,2,50,32,146,201603,10432
266695,SC,4201257,APIÚNA,22,20,94,14,322,116,110,...,293,6,4,0,0,18,20,90,201604,10432
197813,SC,4201257,APIÚNA,16,4,142,20,348,166,110,...,373,0,0,0,0,62,58,166,201605,10432
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352160,SC,4201257,APIÚNA,40,3,513,26,1110,13,239,...,5,17,1,2,1,14,58,111,202205,10951
403815,SC,4201257,APIÚNA,41,2,369,18,902,8,209,...,4,5,0,0,3,3,89,102,202206,10951
228426,SC,4201257,APIÚNA,35,21,348,20,783,4,171,...,8,2,1,1,2,0,18,89,202207,10951
331779,SC,4201257,APIÚNA,26,12,258,22,610,3,237,...,3,0,1,0,0,0,71,89,202208,10951


# Topojson

In [21]:
import json 

with open(data_folder / "geodata" / "SC.topojson") as fp:
    toposc = json.load(fp)

In [22]:
import numpy as np

In [23]:
centroides = np.array([ elem["properties"]["centroide"] 
    for elem in toposc["objects"]["foo"]["geometries"]
])

In [24]:
mediana_x = np.median(centroides[:,0])
mediana_y = np.median(centroides[:,1])

In [25]:
mediana_x, mediana_y

(-50.171262922837194, -27.09106991226981)

In [26]:
toposc["objects"]["foo"]["geometries"][23]

{'type': 'MultiPolygon',
 'arcs': [[[125, 126, -122, 127]]],
 'properties': {'codarea': '4204756',
  'centroide': [-53.10079867642443, -26.97589624460521],
  'municipio': 'Cunhataí'}}

In [27]:
sisab.columns[3:].values

array(['Asma', 'Desnutrição', 'Diabetes', 'DPOC', 'Hipertensão arterial',
       'Obesidade', 'Pré-natal', 'Puericultura',
       'Puerpério (até 42 dias)', 'Saúde sexual e reprodutiva',
       'Tabagismo', 'Usuário de álcool', 'Usuário de outras drogas',
       'Saúde mental', 'Reabilitação', 'D.Transmissíveis - Dengue',
       'Doenças transmissíveis - DST', 'D.Transmissíveis - Hanseníase',
       'D.Transmissíveis - Tuberculose', 'Rast. câncer de mama',
       'Rast. câncer do colo do útero', 'Rast. risco cardiovascular',
       'referencia', 'populacao'], dtype=object)

```js
 L.topoJson(null, {
      style: function (feature) {
        return {
          color: "#000",
          opacity: 1,
          weight: 1,
          fillColor: '#35495d',
          fillOpacity: 0.8
        }
      },
      onEachFeature: function (feature, layer) {
        layer.bindPopup(`<p>${feature.properties.codarea}</p><b>${feature.properties.municipio}</b>`)
      }
    }).addTo(GLOBAL_MAP)
    ```

In [28]:
UFs = sisab.Uf.unique()
UFs.sort()
UFs

array(['AC', 'AL', 'AM', 'AP', 'BA', 'CE', 'DF', 'ES', 'GO', 'MA', 'MG',
       'MS', 'MT', 'PA', 'PB', 'PE', 'PI', 'PR', 'RJ', 'RN', 'RO', 'RR',
       'RS', 'SC', 'SE', 'SP', 'TO'], dtype=object)