# Visualização Indicador Incidência

In [1]:
import numpy as np
import pandas as pd

import folium
import json
import os
import imageio
import time
import fileinput

from selenium import webdriver
from branca.colormap import linear
from branca.colormap import LinearColormap
from pathlib import Path
from PIL import Image

### Para criar a visualização da série para os anos de 2007 a 2019 - LV:

- 1. ler base de indicadores `../data/indicadores/visceral/2_taxa_geral_deteccao_lv_BR.csv`;
    - 1.1 fazer limpeza, preparar dados para visualização.
- 2. ler base de geolocalização `../data/geojsBR.json`:
    - 2.1 fazer reparos necessários (adicionar 5 municípios faltantes);
- 3. adicionar o código IBGE de 7 dígitos correto ao dataframe (SINAN utiliza apenas 6 dígitos)
- 4. gerar um arquivo/mapa html para cada ano com folium utilizando o arquivo de geolocalização e os indicadores.
- 5. com o webdriver criar snapshots (png) de cada mapa e salvar as imagens.
- 6. com Pil.Image cortar as imagens png (preparação)
- 7. com imageio criar um arquivo gif com as imagens anteriormente preparadas mostrando a evolução a cada ano dos indicadores.

In [2]:
df = pd.read_csv('../data/indicadores/visceral/2_taxa_geral_deteccao_lv_BR.csv')
df.municipio = df.municipio.str.lower()
df.id = df.id.astype('string')
df

Unnamed: 0,id,municipio,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,110001,alta floresta d'oeste,0,0,0,0,0,0,0,0,0,0,0,0,0
1,110037,alto alegre dos parecis,0,0,0,0,0,0,0,0,0,0,0,0,0
2,110040,alto paraíso,0,0,0,0,0,0,0,0,0,0,0,0,0
3,110034,alvorada d'oeste,0,0,0,0,0,0,0,0,0,0,0,0,0
4,110002,ariquemes,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5651,522120,tocantinópolis (transf. p/to),0,0,0,0,0,0,0,0,0,0,0,0,0
5652,522208,wanderlândia (transf. p/to),0,0,0,0,0,0,0,0,0,0,0,0,0
5653,522210,xambioá (transf. p/to),0,0,0,0,0,0,0,0,0,0,0,0,0
5654,520000,município ignorado - go,0,0,0,0,0,0,0,0,0,0,0,0,0


In [3]:
# anos considerados
anos = ['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']

## Dealing Data
***
60 municípios foram transferidos de Goiás para Tocantins.
Observa-se que não tem nenhum registro de caso para eles, visto que os casos são a partir do ano 2007, quando já tinham sido transferidos para Tocantins. Estes se iniciam com o código 52, de Goiás.
Existem os registros dos mesmos municípios transferidos, agora com registros de casos, já com o código 17, Tocantins. Alguns tiveram os nomes sutilmente atualizados.

- código UF Goiás: 52
- código UF Tocantins: 17

```
aurora do norte = aurora do tocantins
axixá de goiás = axixá do tocantins
colinas de goiás = colinas do tocantins
conceição do norte = conceição do tocantins
dois irmãos = dois irmãos do tocantins
itaporã de goiás = itaporã do tocantins
miracema do norte = miracema do tocantins
paraíso do norte de goiás = paraíso do tocantins
pindorama de goiás = pindorama do tocantins
ponte alta do norte = ponte alta do tocantins
sítio novo de goiás = sítio novo do tocantins
```

Ação: remover esses registros, pois não interessam para a análise. Esses municípios, que estão como sendo de Goiás e que não tem registro de casos, já foram transferidos para Tocantins. 
***

In [4]:
print(df.loc[df.municipio.str.endswith('(transf. p/to)'), :].shape)
df.loc[df.municipio.str.endswith('(transf. p/to)'), :]

(60, 15)


Unnamed: 0,id,municipio,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
5594,520040,almas (transf. p/to),0,0,0,0,0,0,0,0,0,0,0,0,0
5595,520070,alvorada (transf. p/to),0,0,0,0,0,0,0,0,0,0,0,0,0
5596,520100,ananás (transf. p/to),0,0,0,0,0,0,0,0,0,0,0,0,0
5597,520190,araguacema (transf. p/to),0,0,0,0,0,0,0,0,0,0,0,0,0
5598,520200,araguaçu (transf. p/to),0,0,0,0,0,0,0,0,0,0,0,0,0
5599,520210,araguaína (transf. p/to),0,0,0,0,0,0,0,0,0,0,0,0,0
5600,520220,araguatins (transf. p/to),0,0,0,0,0,0,0,0,0,0,0,0,0
5601,520230,arapoema (transf. p/to),0,0,0,0,0,0,0,0,0,0,0,0,0
5602,520240,arraias (transf. p/to),0,0,0,0,0,0,0,0,0,0,0,0,0
5603,520255,augustinópolis (transf. p/to),0,0,0,0,0,0,0,0,0,0,0,0,0


In [5]:
# estes não tem nenhum caso registrado.
df.loc[
    df.municipio.str.endswith('(transf. p/to)'), 
    anos
].sum(axis=1).any()

False

In [6]:
# apagar estes registros
df = df.drop(
    df.index[df.municipio.str.endswith('(transf. p/to)')], 
    axis=0
).reset_index(drop=True)

In [7]:
# municipios ignorados: um para cada UF, totalizando 26, termina com '0000'
df.loc[df.id.str.endswith('0000'), :]

Unnamed: 0,id,municipio,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
52,110000,município ignorado - ro,0,0,0,0,0,0,0,0,0,0,0,0,0
75,120000,município ignorado - ac,0,0,0,0,0,0,0,0,0,0,0,0,0
138,130000,município ignorado - am,0,0,0,0,0,0,0,0,0,0,0,0,0
154,140000,município ignorado - rr,0,0,0,0,0,0,0,0,0,0,0,0,0
299,150000,município ignorado - pa,0,0,0,0,0,0,0,0,0,0,0,0,0
316,160000,município ignorado - ap,0,0,0,0,0,0,0,0,0,0,0,0,0
456,170000,município ignorado - to,0,0,0,0,0,0,0,0,0,0,0,0,0
674,210000,município ignorado - ma,0,0,0,0,0,0,0,0,0,0,0,0,0
899,220000,município ignorado - pi,0,0,0,0,0,0,0,0,0,0,0,0,0
1084,230000,município ignorado - ce,0,0,0,0,0,0,0,0,0,0,0,0,0


In [8]:
# não há registro de casos para eles, então vamos descartar estas linhas
df.loc[df.id.str.endswith('0000'), anos].sum(axis=1).any()

False

In [9]:
# apagar estes registros
df = df.drop(
    df.index[df.id.str.endswith('0000')],
    axis=0
).reset_index(drop=True)

In [10]:
# pequena correção de nome
df.loc[df.municipio == 'parati', 'municipio'] = 'paraty'

***
Neste ponto vamos carregar o arquivo de geolocalização dos municípios brasileiros. Baixar e salvar como em `data/geojsBR.json`

Neste arquivo foi verificada a ausência de 5 municípios:

```
- "id": "1504752", "name": "Mojuí dos Campos"
- "id": "2206720", "name": "Nazária"
- "id": "4212650", "name": "Pescaria Brava"
- "id": "4220000", "name": "Balneário Rincão"
- "id": "4314548", "name": "Pinto Bandeira"
```

Ação: copiei a geolocalização deles a partir do repositório do `eduardo-veras` e colei no arquivo baixado `geojsBR.json`.

- Fonte: [geolocalização json - via datalivre](https://raw.githubusercontent.com/datalivre/Conjunto-de-Dados/master/br_states.json)
- Fonte: [eduardo-veras](https://github.com/eduardo-veras/kml-brasil/tree/master/lib/2010/municipios)
***

In [11]:
geolocation = json.load(open('../data/geojsBR.json', encoding='utf-8'))

In [12]:
# agora tanto o dataframe quanto json possuem mesma quantidade de municípios
df.shape, len(geolocation['features'])

((5570, 15), 5570)

In [13]:
# vamos iterar a propriedade geolocation.features, onde estão os municípios e seus dados
# vamos extrair id e nome para fazer algumas checagens.
ids = []
names = []
for feature in geolocation['features']:
    code = str(feature['properties'].get('id'))
    ids.append(code)
    names.append(feature['properties'].get('name'))

municipio_codes = dict(zip(ids, names))

#df['id'] = df['municipio'].map({v.lower():k for  k, v in municipio_codes.items()})
#df = df.set_index('id')

In [14]:
# possuem msm quantidade de municípios?
df_mun = set(df.id)
geo_mun = set(municipio_codes.keys())
len(df_mun), len(geo_mun)

(5570, 5570)

In [15]:
# existe algum nome de município contido em um conjunto que não esteja no outro?
# obs: existem municípios com nomes iguais (conjunto desconsidera repetições)

df_mun = set(df.municipio)
geo_mun = set([it.lower() for it in list(municipio_codes.values())])
df_mun.difference(geo_mun), geo_mun.difference(df_mun)

(set(), set())

*** 
Para criarmos a visualização dos indicadores no mapa, usando folium e o arquivo de geolocalização, precisamos que os ids do arquivo `geojsBR.json` sejam iguais ao do dataframe. E não são. O SINAN considera o código IBGE sem o dígito verificador, ou seja, 6 dígitos apenas. No geojson temos 7 dígitos! Isso deve ser consertado.
Antes vamos fazer algumas checagens.
***

Estratégia:

- iterar os ids do dataframe e os do json verificando se o primeiro está contido no segundo. Se sim, adiciona-o em uma nova coluna de ids de 7 dígitos.

- para isso deve-se garantir que o dataframe e json estejam na mesma ordem.

```
a=['101','291','387','443']
b=['10', '29', '38', '44']
result = np.array([True if i.startswith(j) else False for i, j in zip(a, b)])
if result.all():
    id_cv = a
```

In [16]:
df = df.sort_values(by='id').reset_index(drop=True)
municipio_codes = dict(sorted(municipio_codes.items()))

In [17]:
a = municipio_codes.keys()
b = df.id
result = np.array([True if i.startswith(j) else False for i, j in zip(a, b)])
if result.all():
    df['id_cv'] = a

In [18]:
df

Unnamed: 0,id,municipio,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,id_cv
0,110001,alta floresta d'oeste,0,0,0,0,0,0,0,0,0,0,0,0,0,1100015
1,110002,ariquemes,0,0,0,0,0,0,0,0,0,0,0,0,0,1100023
2,110003,cabixi,0,0,0,0,0,0,0,0,0,0,0,0,0,1100031
3,110004,cacoal,0,0,0,0,0,0,0,0,0,0,0,0,0,1100049
4,110005,cerejeiras,0,0,0,0,0,0,0,0,0,0,0,0,0,1100056
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5565,522200,vianópolis,0,0,0,8,0,0,0,0,0,0,0,0,0,5222005
5566,522205,vicentinópolis,0,0,0,0,0,0,0,0,0,0,0,0,0,5222054
5567,522220,vila boa,0,0,0,0,0,0,0,0,0,0,0,0,0,5222203
5568,522230,vila propício,0,0,0,0,0,0,0,0,0,0,0,0,0,5222302


In [19]:
df.dtypes

id           string
municipio    object
2007         object
2008         object
2009         object
2010         object
2011         object
2012         object
2013         object
2014         object
2015         object
2016         object
2017         object
2018         object
2019         object
id_cv        object
dtype: object

In [20]:
df[anos] = df[anos].apply(lambda x: x.str.replace(',', '.'))
df[anos] = df[anos].astype(float)
df.tail()

Unnamed: 0,id,municipio,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,id_cv
5565,522200,vianópolis,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5222005
5566,522205,vicentinópolis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5222054
5567,522220,vila boa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5222203
5568,522230,vila propício,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5222302
5569,530010,brasília,0.1,0.2,0.2,0.1,0.3,0.3,0.1,0.0,0.1,0.2,0.0,0.1,0.0,5300108


In [21]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
2007,5570.0,1.639372,8.072111,0.0,0.0,0.0,0.0,187.5
2008,5570.0,1.603411,7.31832,0.0,0.0,0.0,0.0,192.2
2009,5570.0,1.834919,8.323076,0.0,0.0,0.0,0.0,142.3
2010,5570.0,1.85833,7.797508,0.0,0.0,0.0,0.0,179.3
2011,5570.0,2.212765,11.409832,0.0,0.0,0.0,0.0,341.9
2012,5570.0,1.625386,8.90927,0.0,0.0,0.0,0.0,465.5
2013,5570.0,1.773285,7.438335,0.0,0.0,0.0,0.0,223.6
2014,5570.0,1.972729,8.368433,0.0,0.0,0.0,0.0,215.3
2015,5570.0,1.855691,6.67746,0.0,0.0,0.0,0.0,111.1
2016,5570.0,1.803321,6.850773,0.0,0.0,0.0,0.0,144.9


In [22]:
# aqui é um ponto a melhorar
# o intervalo valor mínimo, valor máximo é amplo. Logo fica difícil identificar
# a variação de cor no mapa.
# padronizar os dados? transformar? qual tipo?

colormap = linear.YlOrRd_09.scale(
    df[anos].values.min(),
    df[anos].values.max()
)
colormap.caption = 'Taxa de incidência de LV no Brasil'
colormap

In [23]:
# antes vamos transformar a nova coluna 'id_cv' em index
df = df.set_index('id_cv')

In [24]:
for ano in anos:
    title_html = f'''
    <h1 style="font-size:32px; position: absolute; left: 50vh; top: 49vh; z-index:999">{ano}</h1>
    '''   
    
    mapa = folium.Map(
        width=800, height=550,
        #location=[-2, -50.9306], 
        location=[-15.77972, -47.92972],
        zoom_start=4
    )
    
    mapa.get_root().html.add_child(folium.Element(title_html))
    
    folium.GeoJson(
        geolocation,    
        style_function=lambda feature: {
            'fillColor': colormap(df.loc[feature['properties']['id'], ano]),
            'color': 'black',
            'weight': 0.2,
        }
    ).add_to(mapa)
    
    
    colormap.add_to(mapa)
    #folium.LayerControl(collapsed=False).add_to(mapa)

    mapa.save(f'../output/mapas_html/{ano}.html')
    

In [25]:
#retira controle do zoom
for ano in anos:
    with fileinput.FileInput(f'../output/mapas_html/{ano}.html', inplace=True) as file:
        for line in file:
            print(line.replace('zoomControl: true', 'zoomControl: false'), end='')


In [26]:
delay=5

for ano in anos:
    tmpurl = f'file://{os.path.abspath("..")}/output/mapas_html/{ano}.html'

    browser = webdriver.Chrome(executable_path='../venv/bin/chromedriver')
    browser.get(tmpurl)

    #Give the map tiles some time to load
    time.sleep(delay)
    browser.save_screenshot(f'{os.path.abspath("..")}/output/mapas_png/{ano}.png')
    browser.quit()
    
    #remove html files
    #os.remove(f'{os.path.abspath("..")}/output/{ano}.html')

In [27]:
for ano in anos:
    image = Image.open(f'{os.path.abspath("..")}/output/mapas_png/{ano}.png')
    box = (100, 0, 600, 600)
    cropped_image = image.crop(box)
    cropped_image.save(f'{os.path.abspath("..")}/output/mapas_png/{ano}.png')

In [28]:
image_path = Path(f'{os.path.abspath("..")}/output/mapas_png')
images = list(image_path.glob('*.png'))
images.sort()
image_list = []
for file_name in images:
    image_list.append(imageio.imread(file_name))
    #os.remove(file_name)
    
imageio.mimwrite(f'{os.path.abspath("..")}/output/incidencia.gif', image_list, fps=1)

Referência:

- https://towardsdatascience.com/how-to-make-an-animated-gif-map-in-python-using-folium-and-imageio-91d3fc60d084