### Codigo para buscar os dados do CNES

O codigo abaixo pega os dados direto to FTP do CNES e tranforma em um DataFrame do Pandas. 

A função read_dbc_geopandas é baseada na função read_dbc, porém ela usa o geopandas para fazer a leitura do DBF, o dbfread não estava conseguindo abrir os DBFs do CNES.



In [72]:
from datetime import datetime
from tempfile import NamedTemporaryFile
import geopandas as gpd
import os
from ftplib import FTP
from pysus.utilities.readdbc import read_dbc, dbc2dbf
from dbfread import DBF
import pandas as pd
from pysus.online_data import CACHEPATH


group_dict = {
        "LT" :  ["Leitos - A partir de Out/2005", 10, 2005],
        "ST" :  ["Estabelecimentos - A partir de Ago/2005", 8, 2005],
        "DC" :  ["Dados Complementares - A partir de Ago/2005", 8, 2005],
        "EQ" :  ["Equipamentos - A partir de Ago/2005", 8],
        "SR" :  ["Serviço Especializado - A partir de Ago/2005", 8, 2005],
        "HB" :  ["Habilitação - A partir de Mar/2007", 3, 2007],
        "PF" :  ["Profissional - A partir de Ago/2005", 8, 2005],
        "EP" :  ["Equipes - A partir de Abr/2007", 5, 2007],
        "IN" :  ["Incentivos - A partir de Nov/2007", 11, 2007],
        "RC" :  ["Regra Contratual - A partir de Mar/2007", 3, 2007], 
        "EE" :  ["Estabelecimento de Ensino - A partir de Mar/2007", 3, 2007],
        "EF" :  ["Estabelecimento Filantrópico - A partir de Mar/2007", 3, 2007],
        "GM" :  ["Gestão e Metas - A partir de Jun/2007",  6, 2007]

}

def download(group: str, state: str, year: int, month: int, cache: bool=True) -> object:
    """
    Download CNES records for group, state, year and month and returns dataframe
    :param group: 
        LT – Leitos - A partir de Out/2005
        ST – Estabelecimentos - A partir de Ago/2005
        DC - Dados Complementares - A partir de Ago/2005
        EQ – Equipamentos - A partir de Ago/2005
        SR - Serviço Especializado - A partir de Ago/2005
        HB – Habilitação - A partir de Mar/2007
        PF – Profissional - A partir de Ago/2005
        EP – Equipes - A partir de Abr/2007
        IN – Incentivos - A partir de Nov/2007
        RC - Regra Contratual - A partir de Mar/2007
        EE - Estabelecimento de Ensino - A partir de Mar/2007
        EF - Estabelecimento Filantrópico - A partir de Mar/2007
        GM - Gestão e Metas - A partir de Jun/2007
    :param month: 1 to 12
    :param state: 2 letter state code
    :param year: 4 digit integer
    """
    state = state.upper()
    year2 = str(year)[-2:]
    month = str(month).zfill(2)
    input_date = datetime(int(year), int(month), 1)
    avaiable_date = datetime(group_dict[group][2], group_dict[group][1], 1)
    if input_date < avaiable_date:
        raise ValueError(f"CNES does not contain data for {group_dict[group][1]}")
    ftp = FTP('ftp.datasus.gov.br')
    ftp.login()
    if input_date >= avaiable_date:
        ftype = 'DBC'
        ftp.cwd('dissemin/publicos/CNES/200508_/Dados/{}/'.format(group))
        fname = '{}{}{}{}.dbc'.format(group, state, str(year2).zfill(2), month)
    cachefile = os.path.join(CACHEPATH, 'CNES_' + fname.split('.')[0] + '_.parquet')
    if os.path.exists(cachefile):
        df = pd.read_parquet(cachefile)
        return df
    df = _fetch_file(fname, ftp, ftype)
    if cache:
        df.to_parquet(cachefile)
    return df


def _fetch_file(fname, ftp, ftype):
    try:
        ftp.retrbinary('RETR {}'.format(fname), open(fname, 'wb').write)
    except:
        raise Exception("File {} not available".format(fname))
    if ftype == 'DBC':
        df = read_dbc_geopandas(fname, encoding='iso-8859-1')
    elif ftype == 'DBF':
        dbf = DBF(fname, encoding='iso-8859-1')
        df = pd.DataFrame(list(dbf))
    os.unlink(fname)
    return df


def read_dbc_geopandas(filename,encoding='utf-8'):
    """
    Opens a DATASUS .dbc file and return its contents as a pandas
    Dataframe, using geopandas
    :param filename: .dbc filename
    :param encoding: encoding of the data
    :return: Pandas Dataframe.
    """
    if isinstance(filename, str):
        filename = filename
    with NamedTemporaryFile(delete=False) as tf:
        out = tf.name + '.dbf'
        dbc2dbf(filename, out)
        dbf = gpd.read_file(out, encoding=encoding).drop("geometry",axis=1)
        df = pd.DataFrame(dbf)
    os.unlink(tf.name)

    return df 

In [74]:
download('ST','AM', 2020, 12)

Unnamed: 0,CNES,CODUFMUN,COD_CEP,CPF_CNPJ,PF_PJ,NIV_DEP,CNPJ_MAN,COD_IR,REGSAUDE,MICR_REG,DISTRSAN,DISTRADM,VINC_SUS,TPGESTAO,ESFERA_A,RETENCAO,ATIVIDAD,NATUREZA,CLIENTEL,TP_UNID,TURNO_AT,NIV_HIER,TP_PREST,CO_BANCO,CO_AGENC,C_CORREN,CONTRATM,DT_PUBLM,CONTRATE,DT_PUBLE,ALVARA,DT_EXPED,ORGEXPED,AV_ACRED,CLASAVAL,DT_ACRED,AV_PNASS,DT_PNASS,GESPRG1E,GESPRG1M,...,AP02CV07,AP03CV01,AP03CV02,AP03CV03,AP03CV04,AP03CV05,AP03CV06,AP03CV07,AP04CV01,AP04CV02,AP04CV03,AP04CV04,AP04CV05,AP04CV06,AP04CV07,AP05CV01,AP05CV02,AP05CV03,AP05CV04,AP05CV05,AP05CV06,AP05CV07,AP06CV01,AP06CV02,AP06CV03,AP06CV04,AP06CV05,AP06CV06,AP06CV07,AP07CV01,AP07CV02,AP07CV03,AP07CV04,AP07CV05,AP07CV06,AP07CV07,ATEND_PR,DT_ATUAL,COMPETEN,NAT_JUR
0,2016451,130002,69540000,00000000000000,3,3,04628335000100,,002,,,,1,M,,,04,,03,02,03,,99,,,,,,,,,,,,,,,,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,202010,202012,1244
1,2017768,130002,69540000,00697295006804,3,3,00697295000105,,002,,,,1,E,,,04,,03,05,06,,99,,,,,,,,002/2013,20130102,2,,,,,,0,0,...,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,202101,202012,1023
2,6570275,130002,69540000,00000000000000,3,3,04628335000100,,,,,,1,M,,,04,,03,68,03,,99,,,,,,,,,,2,,,,,,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,202010,202012,1244
3,6721265,130002,69540000,00000000000000,3,3,04628335000100,,,,,,1,M,,,04,,,50,03,,99,,,,,,,,,,2,,,,,,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,202010,202012,1244
4,6834132,130002,69540000,00000000000000,3,3,04628335000100,,02,,,,1,M,,,04,,01,02,03,,99,,,,,,,,47/2011,20110628,2,,,,,,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,202010,202012,1244
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2582,7057202,130440,69180000,00000000000000,3,3,04502571000185,,,,,,1,M,,,04,,03,01,03,,99,,,,,,,,,,,,,,,,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,202012,202012,1244
2583,7070292,130440,69180000,00000000000000,3,3,04502571000185,,,,,,1,M,,,04,,01,50,01,,99,,,,,,,,,,,,,,,,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,201907,202012,1244
2584,7070306,130440,69180000,00000000000000,3,3,04502571000185,,,,,,1,M,,,04,,03,68,03,,99,,,,,,,,,,,,,,,,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,201907,202012,1244
2585,9622934,130440,69180000,00000000000000,3,3,04502571000185,,,,,,1,M,,,04,,03,02,03,,99,,,,,,,,,,,,,,,,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,202012,202012,1244
