From e5c471b41c827a5f2789ee6dc7e04d33a1fcd449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=A3=20B=2E=20Vacaro?= Date: Mon, 15 Jul 2024 16:49:46 -0300 Subject: [PATCH] docs(ibge): include pop and censo ibge documentation (#197) * docs(ibge): include pop and censo ibge documentation * Include total cases by 100k hab on SINAN example * include documentation for utilities --- docs/source/databases/SINAN.ipynb | 2 +- docs/source/databases/Utilities.ipynb | 156 ++ docs/source/tutorials/IBGE_data.ipynb | 1077 +++++++- .../tutorials/Preprocessing SINAN.ipynb | 2230 +++++++++-------- pysus/online_data/SINAN.py | 1 - pysus/utilities/brasil.py | 17 +- 6 files changed, 2279 insertions(+), 1204 deletions(-) create mode 100644 docs/source/databases/Utilities.ipynb diff --git a/docs/source/databases/SINAN.ipynb b/docs/source/databases/SINAN.ipynb index 4cc4130..d94abe2 100644 --- a/docs/source/databases/SINAN.ipynb +++ b/docs/source/databases/SINAN.ipynb @@ -1209,7 +1209,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.8" }, "vscode": { "interpreter": { diff --git a/docs/source/databases/Utilities.ipynb b/docs/source/databases/Utilities.ipynb new file mode 100644 index 0000000..95bf895 --- /dev/null +++ b/docs/source/databases/Utilities.ipynb @@ -0,0 +1,156 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c153a255-ad53-4b27-b689-4c119ea8cc52", + "metadata": {}, + "source": [ + "## Utilities module\n", + "\n", + "Some helper functions that are used throughout the package: " + ] + }, + { + "cell_type": "markdown", + "id": "c5c639e6-fa54-482a-a91d-20a8bbe05206", + "metadata": {}, + "source": [ + "### brasil" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "451830fc-04af-4003-8e70-c71d61a57ac5", + "metadata": {}, + "outputs": [], + "source": [ + "from pysus.utilities import brasil" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "76a37da8-7b41-4565-83e2-e23bfbeae5bb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'BR': 'Brasil',\n", + " 'AC': 'Acre',\n", + " 'AL': 'Alagoas',\n", + " 'AP': 'Amapá',\n", + " 'AM': 'Amazonas',\n", + " 'BA': 'Bahia',\n", + " 'CE': 'Ceará',\n", + " 'ES': 'Espírito Santo',\n", + " 'GO': 'Goiás',\n", + " 'MA': 'Maranhão',\n", + " 'MT': 'Mato Grosso',\n", + " 'MS': 'Mato Grosso do Sul',\n", + " 'MG': 'Minas Gerais',\n", + " 'PA': 'Pará',\n", + " 'PB': 'Paraíba',\n", + " 'PR': 'Paraná',\n", + " 'PE': 'Pernambuco',\n", + " 'PI': 'Piauí',\n", + " 'RJ': 'Rio de Janeiro',\n", + " 'RN': 'Rio Grande do Norte',\n", + " 'RS': 'Rio Grande do Sul',\n", + " 'RO': 'Rondônia',\n", + " 'RR': 'Roraima',\n", + " 'SC': 'Santa Catarina',\n", + " 'SP': 'São Paulo',\n", + " 'SE': 'Sergipe',\n", + " 'TO': 'Tocantins',\n", + " 'DF': 'Distrito Federal'}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "brasil.UFs" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "965a2323-066c-45af-83f7-b20ece735089", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{1: 'Janeiro',\n", + " 2: 'Fevereiro',\n", + " 3: 'Março',\n", + " 4: 'Abril',\n", + " 5: 'Maio',\n", + " 6: 'Junho',\n", + " 7: 'Julho',\n", + " 8: 'Agosto',\n", + " 9: 'Setembro',\n", + " 10: 'Outubro',\n", + " 11: 'Novembro',\n", + " 12: 'Dezembro'}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "brasil.MONTHS" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "573f2f20-f038-4384-b6f2-558bad80f276", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Rio de Janeiro'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get municipality name by IBGE's geocode \n", + "# https://www.ibge.gov.br/explica/codigos-dos-municipios.php\n", + "brasil.get_city_name_by_geocode(3304557)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/tutorials/IBGE_data.ipynb b/docs/source/tutorials/IBGE_data.ipynb index 018a1e5..0d3901b 100644 --- a/docs/source/tutorials/IBGE_data.ipynb +++ b/docs/source/tutorials/IBGE_data.ipynb @@ -28,6 +28,7 @@ "source": [ "import pandas as pd\n", "from pysus.online_data import IBGE\n", + "from pysus.ftp.utils import zfill_year\n", "\n", "import ipywidgets as widgets\n", "%matplotlib inline" @@ -98,25 +99,25 @@ " 1\n", " CL\n", " Cadastro Central de Empresas\n", - " [{'id': '1732', 'nome': 'Dados gerais das empr...\n", + " [{'id': '1685', 'nome': 'Unidades locais, empr...\n", " \n", " \n", " 2\n", " CA\n", " Censo Agropecuário\n", - " [{'id': '1278', 'nome': 'Agroindústria rural n...\n", + " [{'id': '1005', 'nome': 'Número de estabelecim...\n", " \n", " \n", " 3\n", " ME\n", " Censo Comum do Mercosul, Bolívia e Chile\n", - " [{'id': '2059', 'nome': 'Domicílios e Populaçã...\n", + " [{'id': '1221', 'nome': 'População residente, ...\n", " \n", " \n", " 4\n", " CD\n", " Censo Demográfico\n", - " [{'id': '1301', 'nome': 'Área e Densidade demo...\n", + " [{'id': '102', 'nome': 'Mulheres de 10 anos ou...\n", " \n", " \n", " ...\n", @@ -125,38 +126,38 @@ " ...\n", " \n", " \n", - " 62\n", + " 63\n", " VS\n", " Produção da Extração Vegetal e da Silvicultura\n", - " [{'id': '5930', 'nome': 'Área total existente ...\n", + " [{'id': '289', 'nome': 'Quantidade produzida e...\n", " \n", " \n", - " 63\n", + " 64\n", " PO\n", " Produção de Ovos de Galinha\n", - " [{'id': '915', 'nome': 'Número de informantes,...\n", + " [{'id': '6672', 'nome': 'Quantidade de ovos pr...\n", " \n", " \n", - " 64\n", + " 65\n", " IO\n", " Produto Interno Bruto dos Municípios\n", - " [{'id': '599', 'nome': 'Índice de Gini do prod...\n", + " [{'id': '21', 'nome': 'Produto interno bruto a...\n", " \n", " \n", - " 65\n", + " 66\n", " XE\n", " Projeção da População\n", - " [{'id': '7362', 'nome': 'Esperança de vida ao ...\n", + " [{'id': '7358', 'nome': 'População, por sexo e...\n", " \n", " \n", - " 66\n", + " 67\n", " SI\n", " Sistema Nacional de Pesquisa de Custos e Índic...\n", - " [{'id': '33', 'nome': 'Custo de projeto m², po...\n", + " [{'id': '2062', 'nome': 'Preços medianos, por ...\n", " \n", " \n", "\n", - "

67 rows × 3 columns

\n", + "

68 rows × 3 columns

\n", "" ], "text/plain": [ @@ -167,26 +168,26 @@ "3 ME Censo Comum do Mercosul, Bolívia e Chile \n", "4 CD Censo Demográfico \n", ".. .. ... \n", - "62 VS Produção da Extração Vegetal e da Silvicultura \n", - "63 PO Produção de Ovos de Galinha \n", - "64 IO Produto Interno Bruto dos Municípios \n", - "65 XE Projeção da População \n", - "66 SI Sistema Nacional de Pesquisa de Custos e Índic... \n", + "63 VS Produção da Extração Vegetal e da Silvicultura \n", + "64 PO Produção de Ovos de Galinha \n", + "65 IO Produto Interno Bruto dos Municípios \n", + "66 XE Projeção da População \n", + "67 SI Sistema Nacional de Pesquisa de Custos e Índic... \n", "\n", " agregados \n", "0 [{'id': '8418', 'nome': 'Áreas urbanizadas, Lo... \n", - "1 [{'id': '1732', 'nome': 'Dados gerais das empr... \n", - "2 [{'id': '1278', 'nome': 'Agroindústria rural n... \n", - "3 [{'id': '2059', 'nome': 'Domicílios e Populaçã... \n", - "4 [{'id': '1301', 'nome': 'Área e Densidade demo... \n", + "1 [{'id': '1685', 'nome': 'Unidades locais, empr... \n", + "2 [{'id': '1005', 'nome': 'Número de estabelecim... \n", + "3 [{'id': '1221', 'nome': 'População residente, ... \n", + "4 [{'id': '102', 'nome': 'Mulheres de 10 anos ou... \n", ".. ... \n", - "62 [{'id': '5930', 'nome': 'Área total existente ... \n", - "63 [{'id': '915', 'nome': 'Número de informantes,... \n", - "64 [{'id': '599', 'nome': 'Índice de Gini do prod... \n", - "65 [{'id': '7362', 'nome': 'Esperança de vida ao ... \n", - "66 [{'id': '33', 'nome': 'Custo de projeto m², po... \n", + "63 [{'id': '289', 'nome': 'Quantidade produzida e... \n", + "64 [{'id': '6672', 'nome': 'Quantidade de ovos pr... \n", + "65 [{'id': '21', 'nome': 'Produto interno bruto a... \n", + "66 [{'id': '7358', 'nome': 'População, por sexo e... \n", + "67 [{'id': '2062', 'nome': 'Preços medianos, por ... \n", "\n", - "[67 rows x 3 columns]" + "[68 rows x 3 columns]" ] }, "execution_count": 2, @@ -251,108 +252,108 @@ " \n", " \n", " 0\n", - " 479\n", - " Chefes de domicílios particulares permanentes ...\n", + " 305\n", + " População residente em domicílios particulares...\n", " \n", " \n", " 1\n", - " 798\n", - " Domicílios particulares ocupados por sexo da p...\n", + " 319\n", + " Média de moradores por domicílio particular pe...\n", " \n", " \n", " 2\n", - " 482\n", - " Domicílios particulares permanentes por sexo d...\n", + " 472\n", + " População residente por idade, forma de declar...\n", " \n", " \n", " 3\n", - " 580\n", - " Domicílios particulares permanentes, População...\n", + " 473\n", + " Pessoas de 4 anos ou mais de idade por grupos ...\n", " \n", " \n", " 4\n", - " 579\n", - " Domicílios particulares permanentes, População...\n", + " 475\n", + " População residente por grupos de idade, sexo ...\n", " \n", " \n", " 5\n", - " 797\n", - " Domicílios recenseados por espécie de domicílio\n", + " 476\n", + " Pessoas de 4 anos ou mais de idade por frequên...\n", " \n", " \n", " 6\n", - " 319\n", - " Média de moradores por domicílio particular pe...\n", + " 477\n", + " Pessoas de 4 anos ou mais que frequentam escol...\n", " \n", " \n", " 7\n", - " 553\n", - " Média de moradores por domicílio particular pe...\n", + " 478\n", + " Pessoas não residentes no município de residên...\n", " \n", " \n", " 8\n", - " 481\n", - " Média de moradores por domicílio particular pe...\n", + " 479\n", + " Chefes de domicílios particulares permanentes ...\n", " \n", " \n", " 9\n", - " 552\n", - " Média de moradores por domicílio particular pe...\n", + " 480\n", + " População residente por relação com o chefe do...\n", " \n", " \n", " 10\n", - " 476\n", - " Pessoas de 4 anos ou mais de idade por frequên...\n", + " 481\n", + " Média de moradores por domicílio particular pe...\n", " \n", " \n", " 11\n", - " 473\n", - " Pessoas de 4 anos ou mais de idade por grupos ...\n", + " 482\n", + " Domicílios particulares permanentes por sexo d...\n", " \n", " \n", " 12\n", - " 477\n", - " Pessoas de 4 anos ou mais que frequentam escol...\n", + " 484\n", + " Pessoas não residentes no município de residên...\n", " \n", " \n", " 13\n", - " 484\n", - " Pessoas não residentes no município de residên...\n", + " 552\n", + " Média de moradores por domicílio particular pe...\n", " \n", " \n", " 14\n", - " 478\n", - " Pessoas não residentes no município de residên...\n", + " 553\n", + " Média de moradores por domicílio particular pe...\n", " \n", " \n", " 15\n", - " 794\n", - " População recenseada por situação do domicílio...\n", + " 579\n", + " Domicílios particulares permanentes, População...\n", " \n", " \n", " 16\n", - " 793\n", - " População residente\n", + " 580\n", + " Domicílios particulares permanentes, População...\n", " \n", " \n", " 17\n", - " 305\n", - " População residente em domicílios particulares...\n", + " 793\n", + " População residente\n", " \n", " \n", " 18\n", - " 475\n", - " População residente por grupos de idade, sexo ...\n", + " 794\n", + " População recenseada por situação do domicílio...\n", " \n", " \n", " 19\n", - " 472\n", - " População residente por idade, forma de declar...\n", + " 797\n", + " Domicílios recenseados por espécie de domicílio\n", " \n", " \n", " 20\n", - " 480\n", - " População residente por relação com o chefe do...\n", + " 798\n", + " Domicílios particulares ocupados por sexo da p...\n", " \n", " \n", "\n", @@ -360,27 +361,27 @@ ], "text/plain": [ " id nome\n", - "0 479 Chefes de domicílios particulares permanentes ...\n", - "1 798 Domicílios particulares ocupados por sexo da p...\n", - "2 482 Domicílios particulares permanentes por sexo d...\n", - "3 580 Domicílios particulares permanentes, População...\n", - "4 579 Domicílios particulares permanentes, População...\n", - "5 797 Domicílios recenseados por espécie de domicílio\n", - "6 319 Média de moradores por domicílio particular pe...\n", - "7 553 Média de moradores por domicílio particular pe...\n", - "8 481 Média de moradores por domicílio particular pe...\n", - "9 552 Média de moradores por domicílio particular pe...\n", - "10 476 Pessoas de 4 anos ou mais de idade por frequên...\n", - "11 473 Pessoas de 4 anos ou mais de idade por grupos ...\n", - "12 477 Pessoas de 4 anos ou mais que frequentam escol...\n", - "13 484 Pessoas não residentes no município de residên...\n", - "14 478 Pessoas não residentes no município de residên...\n", - "15 794 População recenseada por situação do domicílio...\n", - "16 793 População residente\n", - "17 305 População residente em domicílios particulares...\n", - "18 475 População residente por grupos de idade, sexo ...\n", - "19 472 População residente por idade, forma de declar...\n", - "20 480 População residente por relação com o chefe do..." + "0 305 População residente em domicílios particulares...\n", + "1 319 Média de moradores por domicílio particular pe...\n", + "2 472 População residente por idade, forma de declar...\n", + "3 473 Pessoas de 4 anos ou mais de idade por grupos ...\n", + "4 475 População residente por grupos de idade, sexo ...\n", + "5 476 Pessoas de 4 anos ou mais de idade por frequên...\n", + "6 477 Pessoas de 4 anos ou mais que frequentam escol...\n", + "7 478 Pessoas não residentes no município de residên...\n", + "8 479 Chefes de domicílios particulares permanentes ...\n", + "9 480 População residente por relação com o chefe do...\n", + "10 481 Média de moradores por domicílio particular pe...\n", + "11 482 Domicílios particulares permanentes por sexo d...\n", + "12 484 Pessoas não residentes no município de residên...\n", + "13 552 Média de moradores por domicílio particular pe...\n", + "14 553 Média de moradores por domicílio particular pe...\n", + "15 579 Domicílios particulares permanentes, População...\n", + "16 580 Domicílios particulares permanentes, População...\n", + "17 793 População residente\n", + "18 794 População recenseada por situação do domicílio...\n", + "19 797 Domicílios recenseados por espécie de domicílio\n", + "20 798 Domicílios particulares ocupados por sexo da p..." ] }, "execution_count": 3, @@ -444,8 +445,8 @@ " \n", " \n", " 0\n", - " 7362\n", - " Esperança de vida ao nascer e Taxa de mortalid...\n", + " 7358\n", + " População, por sexo e idade\n", " \n", " \n", " 1\n", @@ -454,18 +455,18 @@ " \n", " \n", " 2\n", - " 7358\n", - " População, por sexo e idade\n", + " 7362\n", + " Esperança de vida ao nascer e Taxa de mortalid...\n", " \n", " \n", " 3\n", - " 7365\n", - " Proporção de pessoas, por grupo de idade\n", + " 7363\n", + " Taxa específica de fecundidade, por grupo de i...\n", " \n", " \n", " 4\n", - " 7363\n", - " Taxa específica de fecundidade, por grupo de i...\n", + " 7365\n", + " Proporção de pessoas, por grupo de idade\n", " \n", " \n", "\n", @@ -473,11 +474,11 @@ ], "text/plain": [ " id nome\n", - "0 7362 Esperança de vida ao nascer e Taxa de mortalid...\n", + "0 7358 População, por sexo e idade\n", "1 7360 Indicadores implícitos na projeção da população\n", - "2 7358 População, por sexo e idade\n", - "3 7365 Proporção de pessoas, por grupo de idade\n", - "4 7363 Taxa específica de fecundidade, por grupo de i..." + "2 7362 Esperança de vida ao nascer e Taxa de mortalid...\n", + "3 7363 Taxa específica de fecundidade, por grupo de i...\n", + "4 7365 Proporção de pessoas, por grupo de idade" ] }, "execution_count": 4, @@ -517,12 +518,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c70f902c99ee451393f7ef18e1699499", + "model_id": "4b34f6337bfd460196a6967f849b9866", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Dropdown(description='Number:', index=18, options=(('Chefes de domicílios particulares permanentes por grupos …" + "Dropdown(description='Number:', index=4, options=(('População residente em domicílios particulares permanentes…" ] }, "metadata": {}, @@ -1942,13 +1943,869 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%%\n" } }, + "source": [ + "## Retrieving population and census data from IBGE" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function get_population in module pysus.online_data.IBGE:\n", + "\n", + "get_population(year: int, source: Literal['POP', 'censo', 'POPTCU', 'projpop'] = 'POPTCU', censo_data: Literal['ALF', 'ESCA', 'ESCB', 'IDOSO', 'RENDA'] = 'ALF') -> pandas.core.frame.DataFrame\n", + " Get population data from IBGE as shared by DATASUS\n", + " :param year: year of the data\n", + " :param source: \n", + " \"POP\" - 1992-presente: Estimativas populacionais estratificadas por \n", + " idade e sexo.\n", + " \"censo\" - 1991, 2000 e 2010: Censos Demográficos\n", + " \"POPTCU\" - 1992-presente: Estimativas populacionais enviadas para o TCU,\n", + " estratificadas por idade e sexo pelo MS/SGEP/Datasus.\n", + " \"projpop\": Estimativas preliminares para os anos intercensitários dos \n", + " totais populacionais, estratificadas por idade e sexo pelo \n", + " MS/SGEP/Datasus.\n", + " :param censo_data: \n", + " \"ALF\": Censo Demográfico\n", + " \"ESCA\": Censo Escolar da Educação Básica\n", + " \"ESCB\": Censo Escolar da Educação Superior\n", + " \"IDOSO\": População de pessoas com 65 anos ou mais\n", + " \"RENDA\": População de pessoas de acordo com a renda familiar\n", + " :return: DataFrame with population data\n", + "\n" + ] + } + ], + "source": [ + "help(IBGE.get_population)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sources:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### POP\n", + "Population estimates stratified by age and sex" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Available years for `POP`:\n", + "def get_available_years(source):\n", + " return sorted(set([zfill_year(f.name[-2:]) for f in IBGE.ibge.get_files(source=source)]))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('The years available for source `POP` are: [1992, 1993, 1994, 1995, 1997, '\n", + " '1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, '\n", + " '2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]')\n" + ] + } + ], + "source": [ + "from pprint import pprint\n", + "pprint(f\"The years available for source `POP` are: {get_available_years('POP')}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MUNIC_RESANOPOPULACAO
0110001199234768
1110002199261737
211000319928633
3110004199272462
4110005199223280
............
496952219019923361
497052220019929135
497152220519925384
497252222019922942
497353001019921639035
\n", + "

4974 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " MUNIC_RES ANO POPULACAO\n", + "0 110001 1992 34768\n", + "1 110002 1992 61737\n", + "2 110003 1992 8633\n", + "3 110004 1992 72462\n", + "4 110005 1992 23280\n", + "... ... ... ...\n", + "4969 522190 1992 3361\n", + "4970 522200 1992 9135\n", + "4971 522205 1992 5384\n", + "4972 522220 1992 2942\n", + "4973 530010 1992 1639035\n", + "\n", + "[4974 rows x 3 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "IBGE.get_population(1992, source=\"POP\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### censo\n", + "Demographic Censuses of 1991, 2000 and 2010. `censo` is subdivided in 5 categories in which can be selected using the attribute `censo_data`, please see the help() text for more information about `censo_data`, its default value is `ALF`." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MUNCODANOCORRACASITUACAONUMRENDADENRENDADENCRIRENDNUMPOBRESNUMPOBRESXNUMCRIPOBNUMCRIPOBXNUMDESOCUPDENDESOCUPNUMTRABINFDENTRABINF
011000120101I6000724.8810482261848312804164591418144511531127
111000120102I378314.66107918653918214446185541680
211000120103I31515.7895046120006508
311000120104I4791228.10119443348580230482096116030449061591583
411000120105I68893.154971824502501651070670108
................................................
2539553001020102I203004614.5819026731396414981437311574452210289119202181716766
2539653001020103I63474517.864238083086538190123115501669243014724001
2539753001020104I1409083997.912366073150012799629847411217139581568856613649720144469
2539853001020105I8884195.0968221140102129719567345389223589
2539953001020100I972433.65853167410330141121000101
\n", + "

25400 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " MUNCOD ANO CORRACA SITUACAO NUMRENDA DENRENDA DENCRIREND \\\n", + "0 110001 2010 1 I 6000724.88 10482 2618 \n", + "1 110001 2010 2 I 378314.66 1079 186 \n", + "2 110001 2010 3 I 31515.78 95 0 \n", + "3 110001 2010 4 I 4791228.10 11944 3348 \n", + "4 110001 2010 5 I 68893.15 497 182 \n", + "... ... ... ... ... ... ... ... \n", + "25395 530010 2010 2 I 203004614.58 190267 31396 \n", + "25396 530010 2010 3 I 63474517.86 42380 8308 \n", + "25397 530010 2010 4 I 1409083997.9 1236607 315001 \n", + "25398 530010 2010 5 I 8884195.09 6822 1140 \n", + "25399 530010 2010 0 I 972433.65 853 167 \n", + "\n", + " NUMPOBRES NUMPOBRESX NUMCRIPOB NUMCRIPOBX NUMDESOCUP DENDESOCUP \\\n", + "0 4831 2804 1645 914 181 4451 \n", + "1 539 182 144 46 18 554 \n", + "2 46 12 0 0 0 65 \n", + "3 5802 3048 2096 1160 304 4906 \n", + "4 450 250 165 107 0 67 \n", + "... ... ... ... ... ... ... \n", + "25395 41498 14373 11574 4522 10289 119202 \n", + "25396 6538 1901 2311 550 1669 24301 \n", + "25397 279962 98474 112171 39581 56885 661364 \n", + "25398 1021 297 195 67 345 3892 \n", + "25399 410 330 141 121 0 0 \n", + "\n", + " NUMTRABINF DENTRABINF \n", + "0 153 1127 \n", + "1 16 80 \n", + "2 0 8 \n", + "3 159 1583 \n", + "4 0 108 \n", + "... ... ... \n", + "25395 1817 16766 \n", + "25396 472 4001 \n", + "25397 9720 144469 \n", + "25398 23 589 \n", + "25399 0 101 \n", + "\n", + "[25400 rows x 15 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "IBGE.get_population(2010, source=\"censo\", censo_data=\"RENDA\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### POPTCU (default)\n", + "\n", + "Population estimates sent to TCU (Federal Court of Accounts - Brazil), stratified by age and sex by MS/SGEP/Datasus" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('The years available for source `POPTCU` are: [1992, 1993, 1994, 1995, 1997, '\n", + " '1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, '\n", + " '2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]')\n" + ] + } + ], + "source": [ + "pprint(f\"The years available for source `POPTCU` are: {get_available_years('POPTCU')}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MUNIC_RESANOPOPULACAO
01100015202122516
111000232021111148
2110003120215067
31100049202186416
41100056202116088
............
55655222005202114088
5566522205420219002
5567522220320216451
5568522230220215941
5569530010820213094325
\n", + "

5570 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " MUNIC_RES ANO POPULACAO\n", + "0 1100015 2021 22516\n", + "1 1100023 2021 111148\n", + "2 1100031 2021 5067\n", + "3 1100049 2021 86416\n", + "4 1100056 2021 16088\n", + "... ... ... ...\n", + "5565 5222005 2021 14088\n", + "5566 5222054 2021 9002\n", + "5567 5222203 2021 6451\n", + "5568 5222302 2021 5941\n", + "5569 5300108 2021 3094325\n", + "\n", + "[5570 rows x 3 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "IBGE.get_population(2021)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### projpop\n", + "\n", + "Preliminary estimates for the intercensal years of population totals, stratified by age and sex by MS/SGEP/Datasus." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('The years available for source `projpop` are: [1925, 1926, 1927, 1928, 1929, '\n", + " '1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, '\n", + " '1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, '\n", + " '1954, 1955, 1956, 1957, 1958, 1959, 1960, 2000, 2001, 2002, 2003, 2004, '\n", + " '2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, '\n", + " '2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]')\n" + ] + } + ], + "source": [ + "pprint(f\"The years available for source `projpop` are: {get_available_years('projpop')}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ANOFXETARIASEXOPOPULACAO
02024A0000M1326675
12024A0101M1339007
22024A0202M1353165
32024A0303M1367593
42024A0404M1382395
...............
1772024A8686F203662
1782024A8787F178219
1792024A8888F154388
1802024A8989F134069
1812024A9099F627780
\n", + "

182 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " ANO FXETARIA SEXO POPULACAO\n", + "0 2024 A0000 M 1326675\n", + "1 2024 A0101 M 1339007\n", + "2 2024 A0202 M 1353165\n", + "3 2024 A0303 M 1367593\n", + "4 2024 A0404 M 1382395\n", + ".. ... ... ... ...\n", + "177 2024 A8686 F 203662\n", + "178 2024 A8787 F 178219\n", + "179 2024 A8888 F 154388\n", + "180 2024 A8989 F 134069\n", + "181 2024 A9099 F 627780\n", + "\n", + "[182 rows x 4 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "IBGE.get_population(2024, \"projpop\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], "source": [] } @@ -1970,7 +2827,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.8" }, "latex_envs": { "LaTeX_envs_menu_present": true, diff --git a/docs/source/tutorials/Preprocessing SINAN.ipynb b/docs/source/tutorials/Preprocessing SINAN.ipynb index 52c9787..d2337f7 100644 --- a/docs/source/tutorials/Preprocessing SINAN.ipynb +++ b/docs/source/tutorials/Preprocessing SINAN.ipynb @@ -21,16 +21,6 @@ "execution_count": 2, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/bida/Projetos/InfoDengue/PySUS/pysus/online_data/SINAN.py:50: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.\n", - "\n", - "\n", - " df = pd.read_csv(\n" - ] - }, { "data": { "text/html": [ @@ -522,7 +512,8 @@ " CHAGBR18.dbc,\n", " CHAGBR19.dbc,\n", " CHAGBR20.dbc,\n", - " CHAGBR21.dbc]" + " CHAGBR21.dbc,\n", + " CHAGBR22.dbc]" ] }, "execution_count": 6, @@ -542,445 +533,12 @@ } }, "source": [ - "We can also check when it was last updated for every disease, and if the table is preliminary or final." + "We can see, that we have data from 2000 to present. Now we can download it:" ] }, { "cell_type": "code", "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2022-09-05T17:20:52.815560Z", - "start_time": "2022-09-05T17:20:52.314900Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
folderdatefile_sizefile_name
0/dissemin/publicos/SINAN/DADOS/FINAIS2023-01-16 14:15:0028326ACBIBR06.dbc
1/dissemin/publicos/SINAN/DADOS/FINAIS2023-01-16 14:15:00673314ACBIBR07.dbc
2/dissemin/publicos/SINAN/DADOS/FINAIS2023-01-16 14:15:001048406ACBIBR08.dbc
3/dissemin/publicos/SINAN/DADOS/FINAIS2023-01-16 14:15:001493392ACBIBR09.dbc
4/dissemin/publicos/SINAN/DADOS/FINAIS2023-01-16 14:15:001632311ACBIBR10.dbc
...............
753/dissemin/publicos/SINAN/DADOS/PRELIM2023-03-09 16:37:00169214VARCBR22.dbc
754/dissemin/publicos/SINAN/DADOS/PRELIM2023-08-10 10:28:00127626VARCBR23.dbc
755/dissemin/publicos/SINAN/DADOS/PRELIM2023-06-26 11:25:0026007974VIOLBR21.dbc
756/dissemin/publicos/SINAN/DADOS/PRELIM2023-06-26 11:25:0032269105VIOLBR22.dbc
757/dissemin/publicos/SINAN/DADOS/PRELIM2023-08-23 10:11:00384075ZIKABR23.dbc
\n", - "

758 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " folder date file_size \\\n", - "0 /dissemin/publicos/SINAN/DADOS/FINAIS 2023-01-16 14:15:00 28326 \n", - "1 /dissemin/publicos/SINAN/DADOS/FINAIS 2023-01-16 14:15:00 673314 \n", - "2 /dissemin/publicos/SINAN/DADOS/FINAIS 2023-01-16 14:15:00 1048406 \n", - "3 /dissemin/publicos/SINAN/DADOS/FINAIS 2023-01-16 14:15:00 1493392 \n", - "4 /dissemin/publicos/SINAN/DADOS/FINAIS 2023-01-16 14:15:00 1632311 \n", - ".. ... ... ... \n", - "753 /dissemin/publicos/SINAN/DADOS/PRELIM 2023-03-09 16:37:00 169214 \n", - "754 /dissemin/publicos/SINAN/DADOS/PRELIM 2023-08-10 10:28:00 127626 \n", - "755 /dissemin/publicos/SINAN/DADOS/PRELIM 2023-06-26 11:25:00 26007974 \n", - "756 /dissemin/publicos/SINAN/DADOS/PRELIM 2023-06-26 11:25:00 32269105 \n", - "757 /dissemin/publicos/SINAN/DADOS/PRELIM 2023-08-23 10:11:00 384075 \n", - "\n", - " file_name \n", - "0 ACBIBR06.dbc \n", - "1 ACBIBR07.dbc \n", - "2 ACBIBR08.dbc \n", - "3 ACBIBR09.dbc \n", - "4 ACBIBR10.dbc \n", - ".. ... \n", - "753 VARCBR22.dbc \n", - "754 VARCBR23.dbc \n", - "755 VIOLBR21.dbc \n", - "756 VIOLBR22.dbc \n", - "757 ZIKABR23.dbc \n", - "\n", - "[758 rows x 4 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pysus.online_data import FTP_Inspect\n", - "lu = FTP_Inspect('SINAN').last_update_df()\n", - "lu" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2022-09-05T17:21:04.411472Z", - "start_time": "2022-09-05T17:21:04.370232Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
folderdatefile_sizefile_name
80/dissemin/publicos/SINAN/DADOS/FINAIS2022-03-28 11:18:0041075CHAGBR00.dbc
81/dissemin/publicos/SINAN/DADOS/FINAIS2022-03-28 11:18:0047675CHAGBR01.dbc
82/dissemin/publicos/SINAN/DADOS/FINAIS2022-03-28 11:18:0069415CHAGBR02.dbc
83/dissemin/publicos/SINAN/DADOS/FINAIS2022-03-28 11:18:0090539CHAGBR03.dbc
84/dissemin/publicos/SINAN/DADOS/FINAIS2022-03-28 11:18:0086820CHAGBR04.dbc
85/dissemin/publicos/SINAN/DADOS/FINAIS2022-03-28 11:18:00223289CHAGBR05.dbc
86/dissemin/publicos/SINAN/DADOS/FINAIS2022-03-28 11:18:00135953CHAGBR06.dbc
87/dissemin/publicos/SINAN/DADOS/FINAIS2021-11-23 12:21:0011660CHAGBR07.dbc
88/dissemin/publicos/SINAN/DADOS/FINAIS2021-11-23 12:21:0011004CHAGBR08.dbc
89/dissemin/publicos/SINAN/DADOS/FINAIS2021-11-23 12:21:0017913CHAGBR09.dbc
90/dissemin/publicos/SINAN/DADOS/FINAIS2021-11-23 12:21:0013470CHAGBR10.dbc
91/dissemin/publicos/SINAN/DADOS/FINAIS2021-11-23 12:21:0017109CHAGBR11.dbc
92/dissemin/publicos/SINAN/DADOS/FINAIS2021-11-23 12:21:00264167CHAGBR12.dbc
93/dissemin/publicos/SINAN/DADOS/FINAIS2021-11-23 12:21:00245977CHAGBR13.dbc
94/dissemin/publicos/SINAN/DADOS/FINAIS2021-11-23 12:21:00261115CHAGBR14.dbc
95/dissemin/publicos/SINAN/DADOS/FINAIS2021-11-23 12:21:00188615CHAGBR15.dbc
96/dissemin/publicos/SINAN/DADOS/FINAIS2022-03-28 11:18:00244387CHAGBR16.dbc
97/dissemin/publicos/SINAN/DADOS/FINAIS2021-11-23 12:21:00215819CHAGBR17.dbc
98/dissemin/publicos/SINAN/DADOS/FINAIS2022-03-28 11:18:00254336CHAGBR18.dbc
99/dissemin/publicos/SINAN/DADOS/FINAIS2021-11-23 12:21:00285962CHAGBR19.dbc
536/dissemin/publicos/SINAN/DADOS/PRELIM2022-02-09 15:16:00189312CHAGBR20.dbc
537/dissemin/publicos/SINAN/DADOS/PRELIM2023-04-20 12:17:00282009CHAGBR21.dbc
\n", - "
" - ], - "text/plain": [ - " folder date file_size \\\n", - "80 /dissemin/publicos/SINAN/DADOS/FINAIS 2022-03-28 11:18:00 41075 \n", - "81 /dissemin/publicos/SINAN/DADOS/FINAIS 2022-03-28 11:18:00 47675 \n", - "82 /dissemin/publicos/SINAN/DADOS/FINAIS 2022-03-28 11:18:00 69415 \n", - "83 /dissemin/publicos/SINAN/DADOS/FINAIS 2022-03-28 11:18:00 90539 \n", - "84 /dissemin/publicos/SINAN/DADOS/FINAIS 2022-03-28 11:18:00 86820 \n", - "85 /dissemin/publicos/SINAN/DADOS/FINAIS 2022-03-28 11:18:00 223289 \n", - "86 /dissemin/publicos/SINAN/DADOS/FINAIS 2022-03-28 11:18:00 135953 \n", - "87 /dissemin/publicos/SINAN/DADOS/FINAIS 2021-11-23 12:21:00 11660 \n", - "88 /dissemin/publicos/SINAN/DADOS/FINAIS 2021-11-23 12:21:00 11004 \n", - "89 /dissemin/publicos/SINAN/DADOS/FINAIS 2021-11-23 12:21:00 17913 \n", - "90 /dissemin/publicos/SINAN/DADOS/FINAIS 2021-11-23 12:21:00 13470 \n", - "91 /dissemin/publicos/SINAN/DADOS/FINAIS 2021-11-23 12:21:00 17109 \n", - "92 /dissemin/publicos/SINAN/DADOS/FINAIS 2021-11-23 12:21:00 264167 \n", - "93 /dissemin/publicos/SINAN/DADOS/FINAIS 2021-11-23 12:21:00 245977 \n", - "94 /dissemin/publicos/SINAN/DADOS/FINAIS 2021-11-23 12:21:00 261115 \n", - "95 /dissemin/publicos/SINAN/DADOS/FINAIS 2021-11-23 12:21:00 188615 \n", - "96 /dissemin/publicos/SINAN/DADOS/FINAIS 2022-03-28 11:18:00 244387 \n", - "97 /dissemin/publicos/SINAN/DADOS/FINAIS 2021-11-23 12:21:00 215819 \n", - "98 /dissemin/publicos/SINAN/DADOS/FINAIS 2022-03-28 11:18:00 254336 \n", - "99 /dissemin/publicos/SINAN/DADOS/FINAIS 2021-11-23 12:21:00 285962 \n", - "536 /dissemin/publicos/SINAN/DADOS/PRELIM 2022-02-09 15:16:00 189312 \n", - "537 /dissemin/publicos/SINAN/DADOS/PRELIM 2023-04-20 12:17:00 282009 \n", - "\n", - " file_name \n", - "80 CHAGBR00.dbc \n", - "81 CHAGBR01.dbc \n", - "82 CHAGBR02.dbc \n", - "83 CHAGBR03.dbc \n", - "84 CHAGBR04.dbc \n", - "85 CHAGBR05.dbc \n", - "86 CHAGBR06.dbc \n", - "87 CHAGBR07.dbc \n", - "88 CHAGBR08.dbc \n", - "89 CHAGBR09.dbc \n", - "90 CHAGBR10.dbc \n", - "91 CHAGBR11.dbc \n", - "92 CHAGBR12.dbc \n", - "93 CHAGBR13.dbc \n", - "94 CHAGBR14.dbc \n", - "95 CHAGBR15.dbc \n", - "96 CHAGBR16.dbc \n", - "97 CHAGBR17.dbc \n", - "98 CHAGBR18.dbc \n", - "99 CHAGBR19.dbc \n", - "536 CHAGBR20.dbc \n", - "537 CHAGBR21.dbc " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "lu[lu.file_name.str.startswith('CHAG')]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "We can see, that we have data in final form, from 2000 until 2019, and preliminary data for 2020. Now we can download it:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2022-09-05T17:21:11.840622Z", @@ -995,7 +553,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "CHAGBR19.parquet: 100%|████████████████████████████████████████████████████████████████████████████████▉| 8.44k/8.44k [00:00<00:00, 19.4kB/s]\n" + "100%|███████████████████████████████████████████████████████████████████| 286k/286k [00:00<00:00, 266MB/s]\n" ] }, { @@ -1030,16 +588,16 @@ " ID_UNIDADE\n", " DT_SIN_PRI\n", " ...\n", - " DT_OBITO\n", - " CON_PROVAV\n", - " CON_OUTRA\n", - " CON_LOCAL\n", - " TPAUTOCTO\n", - " COUFINF\n", - " COPAISINF\n", - " COMUNINF\n", - " DOENCA_TRA\n", " DT_ENCERRA\n", + " DT_DIGITA\n", + " DT_TRANSUS\n", + " DT_TRANSDM\n", + " DT_TRANSSM\n", + " DT_TRANSRM\n", + " DT_TRANSRS\n", + " DT_TRANSSE\n", + " NU_LOTE_V\n", + " NU_LOTE_H\n", " \n", " \n", " \n", @@ -1056,16 +614,16 @@ " 2019639\n", " 2019-03-01\n", " ...\n", + " 20190513\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " 5\n", " \n", - " 2\n", - " 1\n", - " 16\n", - " 1\n", - " 160030\n", - " 2\n", - " 20190513\n", " \n", " \n", " 1\n", @@ -1080,16 +638,16 @@ " 2022192\n", " 2019-08-18\n", " ...\n", + " 20191002\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " 5\n", " \n", - " 2\n", - " 2\n", - " 16\n", - " 1\n", - " 160060\n", - " 2\n", - " 20191002\n", " \n", " \n", " 2\n", @@ -1104,16 +662,16 @@ " 2022192\n", " 2019-02-28\n", " ...\n", + " 20190325\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " 5\n", " \n", - " 2\n", - " 1\n", - " 16\n", - " 1\n", - " 160030\n", " \n", - " 20190325\n", " \n", " \n", " 3\n", @@ -1128,19 +686,19 @@ " 2020653\n", " 2019-09-09\n", " ...\n", + " 20191107\n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " 5\n", " \n", - " 2\n", - " 1\n", - " 16\n", - " 1\n", - " 160030\n", - " 2\n", - " 20191107\n", - " \n", - " \n", - " 4\n", + " \n", + " \n", + " \n", + " \n", + " 4\n", " 2\n", " B571\n", " 2019-09-10\n", @@ -1152,16 +710,16 @@ " 2020971\n", " 2019-08-28\n", " ...\n", + " 20191108\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " 5\n", " \n", - " 2\n", - " 1\n", - " 16\n", - " 1\n", - " 160060\n", - " 2\n", - " 20191108\n", " \n", " \n", " ...\n", @@ -1200,16 +758,16 @@ " 5740592\n", " 2019-09-04\n", " ...\n", + " 20191022\n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " 0\n", " \n", " \n", - " 20191022\n", " \n", " \n", " 4478\n", @@ -1224,16 +782,16 @@ " 2348489\n", " 2019-08-26\n", " ...\n", + " 20191008\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " 2\n", " \n", - " 2\n", - " 1\n", - " 26\n", - " 1\n", - " 261390\n", - " 2\n", - " 20191008\n", " \n", " \n", " 4479\n", @@ -1248,16 +806,16 @@ " 2711443\n", " 2019-01-05\n", " ...\n", + " 20190212\n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " 0\n", " \n", " \n", - " 20190212\n", " \n", " \n", " 4480\n", @@ -1272,16 +830,16 @@ " 5276403\n", " 2019-07-03\n", " ...\n", + " 20190903\n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " 0\n", " \n", " \n", - " 20190903\n", " \n", " \n", " 4481\n", @@ -1296,20 +854,20 @@ " 5844916\n", " 2019-08-04\n", " ...\n", + " 20200124\n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " 0\n", " \n", " \n", - " 20200124\n", " \n", " \n", "\n", - "

4482 rows × 99 columns

\n", + "

4482 rows × 108 columns

\n", "" ], "text/plain": [ @@ -1326,36 +884,49 @@ "4480 2 B571 2019-07-03 201927 2019 26 260820 \n", "4481 2 B571 2019-11-08 201945 2019 26 261220 \n", "\n", - " ID_REGIONA ID_UNIDADE DT_SIN_PRI ... DT_OBITO CON_PROVAV CON_OUTRA \\\n", - "0 2019639 2019-03-01 ... 5 \n", - "1 2022192 2019-08-18 ... 5 \n", - "2 2022192 2019-02-28 ... 5 \n", - "3 2020653 2019-09-09 ... 5 \n", - "4 2020971 2019-08-28 ... 5 \n", - "... ... ... ... ... ... ... ... \n", - "4477 1501 5740592 2019-09-04 ... \n", - "4478 1506 2348489 2019-08-26 ... 2 \n", - "4479 1501 2711443 2019-01-05 ... \n", - "4480 1498 5276403 2019-07-03 ... \n", - "4481 1502 5844916 2019-08-04 ... \n", + " ID_REGIONA ID_UNIDADE DT_SIN_PRI ... DT_ENCERRA DT_DIGITA DT_TRANSUS \\\n", + "0 2019639 2019-03-01 ... 20190513 \n", + "1 2022192 2019-08-18 ... 20191002 \n", + "2 2022192 2019-02-28 ... 20190325 \n", + "3 2020653 2019-09-09 ... 20191107 \n", + "4 2020971 2019-08-28 ... 20191108 \n", + "... ... ... ... ... ... ... ... \n", + "4477 1501 5740592 2019-09-04 ... 20191022 \n", + "4478 1506 2348489 2019-08-26 ... 20191008 \n", + "4479 1501 2711443 2019-01-05 ... 20190212 \n", + "4480 1498 5276403 2019-07-03 ... 20190903 \n", + "4481 1502 5844916 2019-08-04 ... 20200124 \n", "\n", - " CON_LOCAL TPAUTOCTO COUFINF COPAISINF COMUNINF DOENCA_TRA DT_ENCERRA \n", - "0 2 1 16 1 160030 2 20190513 \n", - "1 2 2 16 1 160060 2 20191002 \n", - "2 2 1 16 1 160030 20190325 \n", - "3 2 1 16 1 160030 2 20191107 \n", - "4 2 1 16 1 160060 2 20191108 \n", - "... ... ... ... ... ... ... ... \n", - "4477 0 20191022 \n", - "4478 2 1 26 1 261390 2 20191008 \n", - "4479 0 20190212 \n", - "4480 0 20190903 \n", - "4481 0 20200124 \n", + " DT_TRANSDM DT_TRANSSM DT_TRANSRM DT_TRANSRS DT_TRANSSE NU_LOTE_V \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... ... ... ... ... ... \n", + "4477 \n", + "4478 \n", + "4479 \n", + "4480 \n", + "4481 \n", "\n", - "[4482 rows x 99 columns]" + " NU_LOTE_H \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "4477 \n", + "4478 \n", + "4479 \n", + "4480 \n", + "4481 \n", + "\n", + "[4482 rows x 108 columns]" ] }, - "execution_count": 9, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -1384,7 +955,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": { "collapsed": false, "jupyter": { @@ -1399,7 +970,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "DENGBR20.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████| 2.40M/2.40M [02:24<00:00, 16.6kB/s]\n" + "100%|████████████████████████████████████████████████████████████████| 73.8M/73.8M [00:00<00:00, 71.6GB/s]\n" ] } ], @@ -1419,12 +990,12 @@ } }, "source": [ - "The cases of dengue where downloaded to multiple chunks to the directory above" + "The cases of dengue were downloaded to multiple chunks to the directory above" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": { "collapsed": false, "jupyter": { @@ -1441,7 +1012,7 @@ "50" ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -1453,7 +1024,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": { "collapsed": false, "jupyter": { @@ -1470,7 +1041,7 @@ "1495117" ] }, - "execution_count": 12, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -1483,10 +1054,6 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, "pycharm": { "name": "#%% md\n" } @@ -1498,433 +1065,35 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2022-09-05T17:38:01.818095Z", - "start_time": "2022-09-05T17:36:54.877226Z" + "end_time": "2022-09-05T17:21:42.746132Z", + "start_time": "2022-09-05T17:21:42.660877Z" }, "pycharm": { "name": "#%%\n" - }, - "scrolled": true + } }, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TP_NOTID_AGRAVODT_NOTIFICSEM_NOTNU_ANOSG_UF_NOTID_MUNICIPID_REGIONAID_UNIDADEDT_SIN_PRI...LACO_NPLASMATICOEVIDENCIAPLAQ_MENORCON_FHDCOMPLICATP_SISTEMANDUPLIC_NCS_FLXRETFLXRECEBI
02A90202004242020353554101352204645820200420...21
12A90202004302020353511101354734088520200428...20
22A90202003312020353514401573795922220200328...20
32A90202004212020353522701337207983620200421...20
42A90202005162020353502801336205932020200511...21
..................................................................
14951122A90202003312020313106201449002331020200330...21
14951132A90202004162020313132401459314793220200407...20
14951142A90202004142020313106201449002799520200413...21
14951152A90202002142020313116601472216030720200211...21
14951162A90202003162020313106201449965384820200316...21
\n", - "

1495117 rows × 119 columns

\n", - "
" - ], - "text/plain": [ - " TP_NOT ID_AGRAVO DT_NOTIFIC SEM_NOT NU_ANO SG_UF_NOT ID_MUNICIP \\\n", - "0 2 A90 20200424 2020 35 355410 \n", - "1 2 A90 20200430 2020 35 351110 \n", - "2 2 A90 20200331 2020 35 351440 \n", - "3 2 A90 20200421 2020 35 352270 \n", - "4 2 A90 20200516 2020 35 350280 \n", - "... ... ... ... ... ... ... ... \n", - "1495112 2 A90 20200331 2020 31 310620 \n", - "1495113 2 A90 20200416 2020 31 313240 \n", - "1495114 2 A90 20200414 2020 31 310620 \n", - "1495115 2 A90 20200214 2020 31 311660 \n", - "1495116 2 A90 20200316 2020 31 310620 \n", - "\n", - " ID_REGIONA ID_UNIDADE DT_SIN_PRI ... LACO_N PLASMATICO EVIDENCIA \\\n", - "0 1352 2046458 20200420 ... \n", - "1 1354 7340885 20200428 ... \n", - "2 1573 7959222 20200328 ... \n", - "3 1337 2079836 20200421 ... \n", - "4 1336 2059320 20200511 ... \n", - "... ... ... ... ... ... ... ... \n", - "1495112 1449 0023310 20200330 ... \n", - "1495113 1459 3147932 20200407 ... \n", - "1495114 1449 0027995 20200413 ... \n", - "1495115 1472 2160307 20200211 ... \n", - "1495116 1449 9653848 20200316 ... \n", - "\n", - " PLAQ_MENOR CON_FHD COMPLICA TP_SISTEMA NDUPLIC_N CS_FLXRET FLXRECEBI \n", - "0 2 1 \n", - "1 2 0 \n", - "2 2 0 \n", - "3 2 0 \n", - "4 2 1 \n", - "... ... ... ... ... ... ... ... \n", - "1495112 2 1 \n", - "1495113 2 0 \n", - "1495114 2 1 \n", - "1495115 2 1 \n", - "1495116 2 1 \n", - "\n", - "[1495117 rows x 119 columns]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from glob import glob\n", - "for i, f in enumerate(glob(f\"{fn}/*.parquet\")):\n", - " if i == 0:\n", - " df2 = pd.read_parquet(f)\n", - " else:\n", - " df2 = pd.concat([df2, pd.read_parquet(f)], ignore_index=True)\n", - "df2" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Decoding the age in SINAN tables\n", - "In SINAN the age comes encoded. PySUS can decode the age column `NU_IDADE_N` into any of these units: years, months, days, or hours." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "ExecuteTime": { - "end_time": "2022-09-05T17:21:42.746132Z", - "start_time": "2022-09-05T17:21:42.660877Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "\u001b[0;31mSignature:\u001b[0m \u001b[0mdecodifica_idade_SINAN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mType:\u001b[0m vectorize\n", - "\u001b[0;31mString form:\u001b[0m \n", - "\u001b[0;31mFile:\u001b[0m ~/micromamba/envs/pysus/lib/python3.11/site-packages/numpy/__init__.py\n", - "\u001b[0;31mDocstring:\u001b[0m \n", - "Em tabelas do SINAN frequentemente a idade é representada como um inteiro que precisa ser parseado\n", - "para retornar a idade em uma unidade cronológica padrão.\n", - ":param unidade: unidade da idade: 'Y': anos, 'M' meses, 'D': dias, 'H': horas\n", - ":param idade: inteiro ou sequencia de inteiros codificados.\n", - ":return:\n", - "\u001b[0;31mClass docstring:\u001b[0m\n", - "vectorize(pyfunc, otypes=None, doc=None, excluded=None, cache=False,\n", - " signature=None)\n", - "\n", - "Generalized function class.\n", + "Returns an object that acts like pyfunc, but takes arrays as input.\n", "\n", "Define a vectorized function which takes a nested sequence of objects or\n", "numpy arrays as inputs and returns a single numpy array or a tuple of numpy\n", @@ -1938,8 +1107,9 @@ "\n", "Parameters\n", "----------\n", - "pyfunc : callable\n", + "pyfunc : callable, optional\n", " A python function or method.\n", + " Can be omitted to produce a decorator with keyword arguments.\n", "otypes : str or list of dtypes, optional\n", " The output data type. It must be specified as either a string of\n", " typecode characters or a list of data type specifiers. There should\n", @@ -1971,8 +1141,9 @@ "\n", "Returns\n", "-------\n", - "vectorized : callable\n", - " Vectorized function.\n", + "out : callable\n", + " A vectorized function if ``pyfunc`` was provided,\n", + " a decorator otherwise.\n", "\n", "See Also\n", "--------\n", @@ -2068,9 +1239,21 @@ " [0., 1., 2., 1., 0., 0.],\n", " [0., 0., 1., 2., 1., 0.],\n", " [0., 0., 0., 1., 2., 1.]])\n", - "\u001b[0;31mCall docstring:\u001b[0m \n", - "Return arrays with the results of `pyfunc` broadcast (vectorized) over\n", - "`args` and `kwargs` not in `excluded`." + "\n", + "Decorator syntax is supported. The decorator can be called as\n", + "a function to provide keyword arguments.\n", + ">>>@np.vectorize\n", + "...def identity(x):\n", + "... return x\n", + "...\n", + ">>>identity([0, 1, 2])\n", + "array([0, 1, 2])\n", + ">>>@np.vectorize(otypes=[float])\n", + "...def as_float(x):\n", + "... return x\n", + "...\n", + ">>>as_float([0, 1, 2])\n", + "array([0., 1., 2.])" ] }, "metadata": {}, @@ -2095,7 +1278,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2022-09-05T17:21:57.041703Z", @@ -2112,110 +1295,9 @@ "text": [ "\n", "RangeIndex: 4482 entries, 0 to 4481\n", - "Data columns (total 99 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 TP_NOT 4482 non-null string \n", - " 1 ID_AGRAVO 4482 non-null string \n", - " 2 DT_NOTIFIC 4482 non-null datetime64[ns]\n", - " 3 SEM_NOT 4482 non-null string \n", - " 4 NU_ANO 4482 non-null string \n", - " 5 SG_UF_NOT 4482 non-null string \n", - " 6 ID_MUNICIP 4482 non-null int64 \n", - " 7 ID_REGIONA 3633 non-null float64 \n", - " 8 ID_UNIDADE 4482 non-null int64 \n", - " 9 DT_SIN_PRI 4482 non-null datetime64[ns]\n", - " 10 SEM_PRI 4482 non-null string \n", - " 11 DT_NASC 4447 non-null datetime64[ns]\n", - " 12 NU_IDADE_N 4482 non-null string \n", - " 13 CS_SEXO 4482 non-null string \n", - " 14 CS_GESTANT 4482 non-null string \n", - " 15 CS_RACA 4482 non-null string \n", - " 16 CS_ESCOL_N 4482 non-null string \n", - " 17 SG_UF 4482 non-null string \n", - " 18 ID_MN_RESI 4482 non-null int64 \n", - " 19 ID_RG_RESI 3682 non-null float64 \n", - " 20 ID_PAIS 4482 non-null int64 \n", - " 21 NDUPLIC_N 4482 non-null string \n", - " 22 DT_INVEST 4329 non-null datetime64[ns]\n", - " 23 ID_OCUPA_N 4482 non-null string \n", - " 24 ANT_UF_1 4482 non-null string \n", - " 25 MUN_1 4482 non-null string \n", - " 26 ANT_UF_2 4482 non-null string \n", - " 27 MUN_2 4482 non-null string \n", - " 28 ANT_UF_3 4482 non-null string \n", - " 29 MUN_3 4482 non-null string \n", - " 30 PRESENCA 4482 non-null string \n", - " 31 PARASITO 4482 non-null string \n", - " 32 HISTORIA 4482 non-null string \n", - " 33 CONTROLE 4482 non-null string \n", - " 34 MANIPULA 4482 non-null string \n", - " 35 MAECHAGA 4482 non-null string \n", - " 36 ORAL 4482 non-null string \n", - " 37 ASSINTOMA 4482 non-null string \n", - " 38 EDEMA 4482 non-null string \n", - " 39 MENINGOE 4482 non-null string \n", - " 40 POLIADENO 4482 non-null string \n", - " 41 FEBRE 4482 non-null string \n", - " 42 HEPATOME 4482 non-null string \n", - " 43 SINAIS_ICC 4482 non-null string \n", - " 44 ARRITMIAS 4482 non-null string \n", - " 45 ASTENIA 4482 non-null string \n", - " 46 ESPLENOM 4482 non-null string \n", - " 47 CHAGOMA 4482 non-null string \n", - " 48 OUTRO_SIN 4482 non-null string \n", - " 49 OUTRO_ESP 4482 non-null string \n", - " 50 DT_COL_DIR 2693 non-null datetime64[ns]\n", - " 51 EXAME 4482 non-null string \n", - " 52 MICRO_HEMA 4482 non-null string \n", - " 53 OUTRO 4482 non-null string \n", - " 54 DT_COL_IND 155 non-null datetime64[ns]\n", - " 55 XENODIAG 4482 non-null string \n", - " 56 HEMOCULT 4482 non-null string \n", - " 57 DT_COL_S1 3300 non-null datetime64[ns]\n", - " 58 DT_COL_S2 790 non-null datetime64[ns]\n", - " 59 ELI_IGM_S1 4482 non-null string \n", - " 60 ELI_IGG_S1 4482 non-null string \n", - " 61 ELI_IGM_S2 4482 non-null string \n", - " 62 ELI_IGG_S2 4482 non-null string \n", - " 63 HEM_IGM_S1 4482 non-null string \n", - " 64 HEM_IGG_S1 4482 non-null string \n", - " 65 HEM_IGM_S2 4482 non-null string \n", - " 66 HEM_IGG_S2 4482 non-null string \n", - " 67 IMU_IGM_S1 4482 non-null string \n", - " 68 TIT_IGM_S1 4482 non-null string \n", - " 69 IMU_IGM_S2 4482 non-null string \n", - " 70 TIT_IGM_S2 4482 non-null string \n", - " 71 IMU_IGG_S1 4482 non-null string \n", - " 72 TIT_IGG_S1 4482 non-null string \n", - " 73 IMU_IGG_S2 4482 non-null string \n", - " 74 TIT_IGG_S2 4482 non-null string \n", - " 75 RESUL_HIS 4482 non-null string \n", - " 76 RES_HIST 4482 non-null string \n", - " 77 ESPECIFICO 4482 non-null string \n", - " 78 SINTOMATIC 4482 non-null string \n", - " 79 DROGA 4482 non-null string \n", - " 80 TEMPO 4482 non-null string \n", - " 81 CON_TRIAT 4482 non-null string \n", - " 82 BIOSSEG 4482 non-null string \n", - " 83 FISCALIZA 4482 non-null string \n", - " 84 MED_OUTRO 4482 non-null string \n", - " 85 OUTRO_DES 4482 non-null string \n", - " 86 CLASSI_FIN 4482 non-null string \n", - " 87 CRITERIO 4482 non-null string \n", - " 88 EVOLUCAO 4482 non-null string \n", - " 89 DT_OBITO 54 non-null datetime64[ns]\n", - " 90 CON_PROVAV 4482 non-null string \n", - " 91 CON_OUTRA 4482 non-null string \n", - " 92 CON_LOCAL 4482 non-null string \n", - " 93 TPAUTOCTO 4482 non-null string \n", - " 94 COUFINF 4482 non-null string \n", - " 95 COPAISINF 4482 non-null string \n", - " 96 COMUNINF 4482 non-null string \n", - " 97 DOENCA_TRA 4482 non-null string \n", - " 98 DT_ENCERRA 4366 non-null datetime64[ns]\n", - "dtypes: datetime64[ns](10), float64(2), int64(4), string(83)\n", - "memory usage: 3.4 MB\n" + "Columns: 108 entries, TP_NOT to NU_LOTE_H\n", + "dtypes: Float64(2), Int64(4), datetime64[ns](16), string(86)\n", + "memory usage: 3.7 MB\n" ] } ], @@ -2244,7 +1326,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2022-09-05T17:21:59.409288Z", @@ -2358,7 +1440,7 @@ "[4482 rows x 2 columns]" ] }, - "execution_count": 16, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -2382,7 +1464,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2022-09-05T17:22:02.591827Z", @@ -2397,6 +1479,988 @@ "df['SG_UF_NOT'] = df.SG_UF_NOT.astype(int)\n", "df[df.SG_UF_NOT==31].to_csv('chagas_SP_2018_mod.csv',sep=';',compression='zip')" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Combining SINAN and IBGE databases\n", + "\n", + "We could also use different sources to enrich the analysis. In the example below, population data will be used to calculate how many dengue cases per 100k habitants there are in each geocode:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from pysus.online_data import IBGE" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MUNIC_RESANOPOPULACAO
0110001201024422
1110002201090354
211000320106309
3110004201078601
4110005201017030
............
5560522200201012549
556152220520107371
556252222020104742
556352223020105145
556453001020102562963
\n", + "

5565 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " MUNIC_RES ANO POPULACAO\n", + "0 110001 2010 24422\n", + "1 110002 2010 90354\n", + "2 110003 2010 6309\n", + "3 110004 2010 78601\n", + "4 110005 2010 17030\n", + "... ... ... ...\n", + "5560 522200 2010 12549\n", + "5561 522205 2010 7371\n", + "5562 522220 2010 4742\n", + "5563 522230 2010 5145\n", + "5564 530010 2010 2562963\n", + "\n", + "[5565 rows x 3 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop = IBGE.get_population(source=\"POP\", year=2010)\n", + "pop" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████| 30.5M/30.5M [00:00<00:00, 31.1GB/s]\n" + ] + } + ], + "source": [ + "df = sinan.download(sinan.get_files('DENG', 2010)).to_dataframe()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TP_NOTID_AGRAVODT_NOTIFICSEM_NOTNU_ANOSG_UF_NOTID_MUNICIPID_REGIONAID_UNIDADEDT_SIN_PRI...PLASMATICOEVIDENCIAPLAQ_MENORCON_FHDCOMPLICAHOSPITALIZDT_INTERNAUFMUNICIPIOTP_SISTEMA
02A902010-03-19201011201051510760157927016262010-03-14...1
12A902010-03-26201012201051510760157927016262010-03-21...1
22A902010-03-12201010201051510760157927016262010-03-07...1
32A902010-03-23201012201051510760157927016262010-03-18...1
42A902010-03-12201010201051510760157927016262010-03-07...1
..................................................................
13812492A902010-01-12201002201029291770140625333752009-12-13...0.0000000001
13812502A902010-01-07201001201029291770140625333752010-01-07...0.0000000001
13812512A902010-01-07201001201029291770140625333752010-01-04...0.0000000001
13812522A902010-01-17201003201029291770140625333752010-01-16...0.0000000001
13812532A902010-01-14201002201029291770140625333751982-04-05...0.0000000001
\n", + "

1381254 rows × 66 columns

\n", + "
" + ], + "text/plain": [ + " TP_NOT ID_AGRAVO DT_NOTIFIC SEM_NOT NU_ANO SG_UF_NOT ID_MUNICIP \\\n", + "0 2 A90 2010-03-19 201011 2010 51 510760 \n", + "1 2 A90 2010-03-26 201012 2010 51 510760 \n", + "2 2 A90 2010-03-12 201010 2010 51 510760 \n", + "3 2 A90 2010-03-23 201012 2010 51 510760 \n", + "4 2 A90 2010-03-12 201010 2010 51 510760 \n", + "... ... ... ... ... ... ... ... \n", + "1381249 2 A90 2010-01-12 201002 2010 29 291770 \n", + "1381250 2 A90 2010-01-07 201001 2010 29 291770 \n", + "1381251 2 A90 2010-01-07 201001 2010 29 291770 \n", + "1381252 2 A90 2010-01-17 201003 2010 29 291770 \n", + "1381253 2 A90 2010-01-14 201002 2010 29 291770 \n", + "\n", + " ID_REGIONA ID_UNIDADE DT_SIN_PRI ... PLASMATICO EVIDENCIA \\\n", + "0 1579 2701626 2010-03-14 ... \n", + "1 1579 2701626 2010-03-21 ... \n", + "2 1579 2701626 2010-03-07 ... \n", + "3 1579 2701626 2010-03-18 ... \n", + "4 1579 2701626 2010-03-07 ... \n", + "... ... ... ... ... ... ... \n", + "1381249 1406 2533375 2009-12-13 ... \n", + "1381250 1406 2533375 2010-01-07 ... \n", + "1381251 1406 2533375 2010-01-04 ... \n", + "1381252 1406 2533375 2010-01-16 ... \n", + "1381253 1406 2533375 1982-04-05 ... \n", + "\n", + " PLAQ_MENOR CON_FHD COMPLICA HOSPITALIZ DT_INTERNA UF \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... ... ... ... ... .. \n", + "1381249 0.000000000 \n", + "1381250 0.000000000 \n", + "1381251 0.000000000 \n", + "1381252 0.000000000 \n", + "1381253 0.000000000 \n", + "\n", + " MUNICIPIO TP_SISTEMA \n", + "0 1 \n", + "1 1 \n", + "2 1 \n", + "3 1 \n", + "4 1 \n", + "... ... ... \n", + "1381249 1 \n", + "1381250 1 \n", + "1381251 1 \n", + "1381252 1 \n", + "1381253 1 \n", + "\n", + "[1381254 rows x 66 columns]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ID_MUNICIPTOTAL_CASES
191431062071056
415852087048397
386750027042761
6112004038333
314735434036107
.........
36394208101
39725103951
24213160201
15332905151
37894314801
\n", + "

4307 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " ID_MUNICIP TOTAL_CASES\n", + "1914 310620 71056\n", + "4158 520870 48397\n", + "3867 500270 42761\n", + "61 120040 38333\n", + "3147 354340 36107\n", + "... ... ...\n", + "3639 420810 1\n", + "3972 510395 1\n", + "2421 316020 1\n", + "1533 290515 1\n", + "3789 431480 1\n", + "\n", + "[4307 rows x 2 columns]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Grouping all the cases by geocode\n", + "tot_cases = df[['ID_MUNICIP']].groupby('ID_MUNICIP').size().reset_index(name='TOTAL_CASES').sort_values(by='TOTAL_CASES', ascending=False)\n", + "tot_cases" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ID_MUNICIPTOTAL_CASESPOPULACAO
0310620710562375444
1520870483971301892
250027042761787204
312004038333335796
435434036107605114
............
4302420810120315
430351039513125
430431602014135
4305290515113666
4306431480130881
\n", + "

4307 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " ID_MUNICIP TOTAL_CASES POPULACAO\n", + "0 310620 71056 2375444\n", + "1 520870 48397 1301892\n", + "2 500270 42761 787204\n", + "3 120040 38333 335796\n", + "4 354340 36107 605114\n", + "... ... ... ...\n", + "4302 420810 1 20315\n", + "4303 510395 1 3125\n", + "4304 316020 1 4135\n", + "4305 290515 1 13666\n", + "4306 431480 1 30881\n", + "\n", + "[4307 rows x 3 columns]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.merge(tot_cases, pop, left_on='ID_MUNICIP', right_on='MUNIC_RES')\n", + "df = df[['ID_MUNICIP', 'TOTAL_CASES', 'POPULACAO']]\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ID_MUNICIPTOTAL_CASESPOPULACAOCASES_PER_100K
340411380630459213719.512195
708510719255219911596.180082
31200403833333579611415.561829
5431133036173232111190.866619
2727063072727043410324.559162
...............
388441191521171661.706980
41674312401594361.682482
41794322501613451.630125
39634119501932791.072053
418043224011255070.796768
\n", + "

4307 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " ID_MUNICIP TOTAL_CASES POPULACAO CASES_PER_100K\n", + "340 411380 630 4592 13719.512195\n", + "708 510719 255 2199 11596.180082\n", + "3 120040 38333 335796 11415.561829\n", + "54 311330 3617 32321 11190.866619\n", + "27 270630 7272 70434 10324.559162\n", + "... ... ... ... ...\n", + "3884 411915 2 117166 1.706980\n", + "4167 431240 1 59436 1.682482\n", + "4179 432250 1 61345 1.630125\n", + "3963 411950 1 93279 1.072053\n", + "4180 432240 1 125507 0.796768\n", + "\n", + "[4307 rows x 4 columns]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['CASES_PER_100K'] = (df['TOTAL_CASES'].astype(int) / df['POPULACAO'].astype(int)) * 100000\n", + "df = df.sort_values(by='CASES_PER_100K', ascending=False)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from pysus.utilities.brasil import get_city_name_by_geocode\n", + "from pysus.preprocessing.decoders import calculate_digit\n", + "import matplotlib.pyplot as plt\n", + "\n", + "df = df.sort_values(by='CASES_PER_100K', ascending=True).head(20)\n", + "# Get city name\n", + "df['MUNICIP_NAME'] = df['ID_MUNICIP'].apply(lambda x: get_city_name_by_geocode(int(str(x) + str(calculate_digit(x)))))\n", + "\n", + "plt.figure(figsize=(10, 12))\n", + "bars = plt.barh(df['MUNICIP_NAME'], df['CASES_PER_100K'], color='skyblue')\n", + "plt.ylabel('City')\n", + "plt.xlabel('Cases per 100k inhabitants')\n", + "plt.title('Cases per 100k inhabitants by city')\n", + "\n", + "# Adding labels with CASES_PER_100K values\n", + "for bar in bars:\n", + " plt.text(bar.get_width(), bar.get_y() + bar.get_height()/2, f'{int(bar.get_width())}', \n", + " va='center', ha='left', color='black')\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] } ], "metadata": { @@ -2415,7 +2479,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.8" }, "latex_envs": { "LaTeX_envs_menu_present": true, diff --git a/pysus/online_data/SINAN.py b/pysus/online_data/SINAN.py index e209b9b..83901fe 100644 --- a/pysus/online_data/SINAN.py +++ b/pysus/online_data/SINAN.py @@ -54,7 +54,6 @@ def metadata_df(disease_code: str) -> pd.DataFrame: header=0, sep=",", quotechar='"', - error_bad_lines=False, ) return df.iloc[:, 1:] diff --git a/pysus/utilities/brasil.py b/pysus/utilities/brasil.py index 6fbf1cc..e2655c9 100644 --- a/pysus/utilities/brasil.py +++ b/pysus/utilities/brasil.py @@ -2,6 +2,13 @@ from pathlib import Path from typing import Union +with open( + f"{Path(__file__).parent}/municipios.json", 'r', encoding='utf-8-sig' +) as muns: + MUNICIPALITIES = json.loads(muns.read()) + +MUN_BY_GEOCODE = {mun["geocodigo"]: mun["municipio"] for mun in MUNICIPALITIES} + UFs = { "BR": "Brasil", @@ -57,12 +64,4 @@ def get_city_name_by_geocode(geocode: Union[str, int]): :return: City name """ - with open(f"{Path(__file__).parent}/municipios.json") as muns: - _mun_decoded = muns.read().encode().decode("utf-8-sig") - municipalities = json.loads(_mun_decoded) - - mun_by_geocode = { - mun["geocodigo"]: mun["municipio"] for mun in municipalities - } - - return mun_by_geocode[int(geocode)] + return MUN_BY_GEOCODE[int(geocode)]