# Covid no Estado do Ceará

In [1]:
# importando as bibliotecas necessárias

import pandas as pd
import numpy as np
import requests
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import iplot
import datetime as dt
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

* Data from covid: https://brasil.io/home/
* Data of the cities: https://github.com/kelvins/Municipios-Brasileiros

In [2]:
data = pd.DataFrame()

url = 'https://brasil.io/api/dataset/covid19/caso/data/?page=1&state=CE'

while requests.get(url).status_code == 200:
    
    print('Collecting data from: ', url)
    
    if requests.get(url).json()['next'] != None:
        req = requests.get(url)
        new_data = pd.DataFrame(req.json()['results'])
        data = pd.concat([data, new_data])
        url = req.json()['next']
    else:
        req = requests.get(url)
        new_data = pd.DataFrame(req.json()['results'])
        data = pd.concat([data, new_data])
        break
        
data.info()

Collecting data from:  https://brasil.io/api/dataset/covid19/caso/data/?page=1&state=CE
Collecting data from:  https://brasil.io/api/dataset/covid19/caso/data/?page=2&state=CE
Collecting data from:  https://brasil.io/api/dataset/covid19/caso/data/?page=3&state=CE
Collecting data from:  https://brasil.io/api/dataset/covid19/caso/data/?page=4&state=CE
Collecting data from:  https://brasil.io/api/dataset/covid19/caso/data/?page=5&state=CE
Collecting data from:  https://brasil.io/api/dataset/covid19/caso/data/?page=6&state=CE
Collecting data from:  https://brasil.io/api/dataset/covid19/caso/data/?page=7&state=CE
Collecting data from:  https://brasil.io/api/dataset/covid19/caso/data/?page=8&state=CE
Collecting data from:  https://brasil.io/api/dataset/covid19/caso/data/?page=9&state=CE
Collecting data from:  https://brasil.io/api/dataset/covid19/caso/data/?page=10&state=CE
Collecting data from:  https://brasil.io/api/dataset/covid19/caso/data/?page=11&state=CE
Collecting data from:  https:/

In [3]:
data['city_ibge_code'] = pd.to_numeric(data['city_ibge_code'])
data['date'] = pd.to_datetime(data['date']).dt.strftime("%Y-%m-%d")
data['city'].replace('Itapajé', 'Itapagé', inplace = True)
data_ce = data[data['city'].isna()]
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16567 entries, 0 to 566
Data columns (total 12 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   city                            16442 non-null  object 
 1   city_ibge_code                  16450 non-null  float64
 2   confirmed                       16567 non-null  int64  
 3   confirmed_per_100k_inhabitants  16389 non-null  float64
 4   date                            16567 non-null  object 
 5   death_rate                      16567 non-null  float64
 6   deaths                          16567 non-null  int64  
 7   estimated_population_2019       16450 non-null  float64
 8   is_last                         16567 non-null  bool   
 9   order_for_place                 16567 non-null  int64  
 10  place_type                      16567 non-null  object 
 11  state                           16567 non-null  object 
dtypes: bool(1), float64(4), int64(3), 

In [4]:
cities_lat_long = pd.read_csv('data/municipios.csv')
cities_lat_long.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5570 entries, 0 to 5569
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   codigo_ibge  5570 non-null   int64  
 1   nome         5570 non-null   object 
 2   latitude     5570 non-null   float64
 3   longitude    5570 non-null   float64
 4   capital      5570 non-null   int64  
 5   codigo_uf    5570 non-null   int64  
dtypes: float64(2), int64(3), object(1)
memory usage: 239.4+ KB


In [5]:
data_ce.head(2)

Unnamed: 0,city,city_ibge_code,confirmed,confirmed_per_100k_inhabitants,date,death_rate,deaths,estimated_population_2019,is_last,order_for_place,place_type,state
0,,23.0,146972,1609.40369,2020-07-18,0.0488,7178,9132078.0,True,125,state,CE
186,,23.0,146064,1599.46071,2020-07-17,0.0491,7166,9132078.0,False,124,state,CE


In [6]:
data.head(2)

Unnamed: 0,city,city_ibge_code,confirmed,confirmed_per_100k_inhabitants,date,death_rate,deaths,estimated_population_2019,is_last,order_for_place,place_type,state
0,,23.0,146972,1609.40369,2020-07-18,0.0488,7178,9132078.0,True,125,state,CE
1,Abaiara,2300101.0,33,281.16214,2020-07-17,0.0303,1,11737.0,True,81,city,CE


In [7]:
data = data.merge(cities_lat_long,
                        how = 'inner',
                        left_on='city_ibge_code', 
                        right_on='codigo_ibge')
data.columns

Index(['city', 'city_ibge_code', 'confirmed', 'confirmed_per_100k_inhabitants',
       'date', 'death_rate', 'deaths', 'estimated_population_2019', 'is_last',
       'order_for_place', 'place_type', 'state', 'codigo_ibge', 'nome',
       'latitude', 'longitude', 'capital', 'codigo_uf'],
      dtype='object')

In [8]:
print(data.shape, data_ce.shape)

(16325, 18) (125, 12)


In [9]:
data.drop(['order_for_place', 'place_type', 'codigo_ibge', 'nome', 'capital', 'codigo_uf'], axis = 1, inplace=True)
data_ce.drop(['order_for_place', 'place_type'], axis = 1, inplace=True)
print(data.shape, data_ce.shape)

(16325, 12) (125, 10)


In [10]:
data_ce.columns

Index(['city', 'city_ibge_code', 'confirmed', 'confirmed_per_100k_inhabitants',
       'date', 'death_rate', 'deaths', 'estimated_population_2019', 'is_last',
       'state'],
      dtype='object')

In [11]:
data.columns

Index(['city', 'city_ibge_code', 'confirmed', 'confirmed_per_100k_inhabitants',
       'date', 'death_rate', 'deaths', 'estimated_population_2019', 'is_last',
       'state', 'latitude', 'longitude'],
      dtype='object')

Abaixo eu crio um DataFrame com o código do IBGE e a respectiva cidade, e em seguida eu junto com os dados anteriores.
Fiz isso porque ao tentar plotar as informações o gráfico ficava todo preto, porque por algum motivo o Código do IGE do arquivo Json e do DataFrame fornecido pelo brasil.io não eram iguais. 

No arquivo Json o Código do IBGE é uma string, e por mais que eu convertesse os valores do DataFrame pra string ainda assim aparentemente eles não eram iguais(mesmo eu claramente vendo que eram iguais). Comprovei isso ao tentar fazer inner join nos dois datasets pelo Código do IBGE e o retorno era um DataFrame vazio. Quando tentei fazer o mesmo com as cidades o resultado funcionou.

Então decidi fazer inner join com os nomes das cidades e excluir a coluna do Código do IBGE do DataFrame inicial, e a partir de então utilizar o Código do IBGE que extraí do arquivo Json, que funcionou.

Outros pontos que foram corrigidos:
* O município do Itapagé está escrito como 'Itapajé' no data set do brasil.io ;

In [12]:
import json

ce_cities = json.load(open('data/geojs-23-mun.json'))

id_cidade = []
nome_cidade = []

for i in range(len(ce_cities['features'])):
    Id = ce_cities['features'][i]['properties']['id']
    id_cidade.append(Id)
    nome = ce_cities['features'][i]['properties']['name']
    nome_cidade.append(nome)

ce = pd.DataFrame({'ibge_code': id_cidade, 'city': nome_cidade})

In [13]:
ce.shape

(184, 2)

In [14]:
data = data.merge(ce, how = 'outer').drop('city_ibge_code', axis = 1)

In [15]:
data.head()

Unnamed: 0,city,confirmed,confirmed_per_100k_inhabitants,date,death_rate,deaths,estimated_population_2019,is_last,state,latitude,longitude,ibge_code
0,Abaiara,33,281.16214,2020-07-17,0.0303,1,11737.0,True,CE,-7.34588,-39.0416,2300101
1,Abaiara,33,281.16214,2020-07-16,0.0303,1,11737.0,False,CE,-7.34588,-39.0416,2300101
2,Abaiara,29,247.08188,2020-07-15,0.0345,1,11737.0,False,CE,-7.34588,-39.0416,2300101
3,Abaiara,27,230.04175,2020-07-14,0.037,1,11737.0,False,CE,-7.34588,-39.0416,2300101
4,Abaiara,27,230.04175,2020-07-13,0.037,1,11737.0,False,CE,-7.34588,-39.0416,2300101


# Mapa Confirmados

* Geodata Ceará on: https://github.com/tbrugz/geodata-br

In [16]:
columns = ['ibge_code', 'city', 'latitude', 'longitude', 'deaths', 'death_rate',
           'confirmed','confirmed_per_100k_inhabitants']
current_data = data[data['is_last'].fillna(True)][columns].fillna(0)

In [17]:
current_data.head()

Unnamed: 0,ibge_code,city,latitude,longitude,deaths,death_rate,confirmed,confirmed_per_100k_inhabitants
0,2300101,Abaiara,-7.34588,-39.0416,1,0.0303,33,281.16214
81,2300150,Acarape,-4.22083,-38.7055,14,0.0173,807,5405.58644
171,2300200,Acaraú,-2.88769,-40.1183,49,0.0236,2072,3307.73774
262,2300309,Acopiara,-6.08911,-39.448,20,0.0526,380,700.20269
357,2300408,Aiuaba,-6.57122,-40.1178,1,0.0833,12,68.96948


In [32]:
!pip install psutil

Collecting psutil
  Downloading psutil-5.7.2-cp38-cp38-win32.whl (239 kB)
Installing collected packages: psutil
Successfully installed psutil-5.7.2


In [38]:
fig = px.choropleth(current_data,
                    geojson=ce_cities,
                    color=np.log2(current_data['confirmed']),
                    color_continuous_scale='YlOrRd',
                    locations="city",
                    featureidkey="properties.name",
                    projection="mercator",
                    hover_data=['confirmed', 'deaths'],
                    title='Casos de Covid-19 no Ceará',
                    width=700
                    )

fig.update_geos(fitbounds="locations", 
                visible=False)

val = [1,10,100,1000,10000, max(current_data['confirmed'])]
val_log = np.log2(val)

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
                  coloraxis_colorbar=dict(title="Casos Confirmados",
                                          tickvals=val_log,
                                          ticktext=["","10", "100", "1000", "10000", max(current_data['confirmed'])],
                                          lenmode="pixels", len=200))

fig.write_html('maps/confirmed_cases.html')
fig.show()

In [19]:
fig = px.choropleth(current_data,
                    geojson=ce_cities,
                    color=np.log2(current_data['deaths']+1),
                    color_continuous_scale='OrRd',
                    locations="city",
                    featureidkey="properties.name",
                    projection="mercator",
                    hover_data=['confirmed', 'deaths'],
                    title='Mortes por Covid-19 no Ceará',
                    width=700
                    )

fig.update_geos(fitbounds="locations", 
                visible=False)

val = [1,10,100,1000, max(current_data['deaths'])]
val_log = np.log2(val)

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
                  coloraxis_colorbar=dict(title="Óbitos Confirmados",
                                          tickvals=val_log,
                                          ticktext=["","10", "100", "1000", max(current_data['deaths'])],
                                          lenmode="pixels", len=200))

fig.write_html('maps/deaths.html')
fig.show()

In [42]:
df = current_data.sort_values(by='confirmed', ascending = False)

trace = go.Bar(x = df.iloc[1:21].sort_values(by = 'confirmed').confirmed,
               y = df.iloc[1:21].sort_values(by = 'confirmed').city,
               orientation='h',
               marker = dict(color = 'rgba(255, 51, 51, 0.9)',
                             line=dict(color='rgb(35, 32, 33)', width=1.5)),
               hoverinfo = 'x')

layout = {
    'title': 'Casos Confirmados em cidades do Interior',
    'yaxis': {'title': 'Cidades'},
    'xaxis': {'title': 'Número de casos confirmados'}
}
fig = go.Figure(data = trace, layout=layout)
fig.write_html('graphs/casos_confirmados_interior.html')
iplot(fig)
    

In [21]:
df = current_data.sort_values(by='deaths', ascending = False)

trace = go.Bar(x = df.iloc[1:21].sort_values(by='deaths').deaths,
               y = df.iloc[1:21].sort_values(by='deaths').city,
               orientation='h',
               marker = dict(color = 'rgba(255, 51, 51, 0.9)',
                             line=dict(color='rgb(35, 32, 33)', width=1.5)),
               hoverinfo = 'x')

layout = {
    'title': 'Óbitos em cidades do interior',
    'yaxis': {'title': 'Cidades'},
    'xaxis': {'title': 'Número de óbitos'}
}
fig = go.Figure(data = trace, layout=layout)
fig.write_html('graphs/deaths.html')
iplot(fig)

In [22]:
trace = go.Bar(x = df.iloc[0:20].sort_values(by='confirmed_per_100k_inhabitants').confirmed_per_100k_inhabitants,
               y = df.iloc[0:20].sort_values(by='confirmed_per_100k_inhabitants').city,
               orientation='h',
               marker = dict(color = 'rgba(255, 51, 51, 0.9)',
                             line=dict(color='rgb(35, 32, 33)', width=1.5)),
               hoverinfo = 'x')

layout = {
    'title': 'Casos confirmados a cada 100 mil habitantes',
    'yaxis': {'title': 'Cidades'},
    'xaxis': {'title': 'Casos por 100mil hab.'}
}
fig = go.Figure(data = trace, layout=layout)

fig.write_html('graphs/Cases_per_100k_hab.html')
iplot(fig)

# Preparar Séries Temporais

In [23]:
ts_total_casos = pd.pivot_table(index = 'city',
                          columns = 'date',
                          values = 'confirmed',
                          data = data
                         ).fillna(0).astype('int')

ts_total_obitos = pd.pivot_table(index = 'city',
                                 columns = 'date',
                                 values = 'deaths',
                                 data = data
                                ).fillna(0).astype('int')
ts_ce = data_ce.sort_values(by='date').set_index('date')

In [26]:
trace = go.Bar(x=ts_ce.index,
               y = ts_ce.confirmed,
               name = 'Casos confirmados',
               marker = dict(color = 'rgba(0, 0, 255, 0.8)'))
               
layout = go.Layout(title = 'Série Temporal de casos confirmados no Ceará', barmode= 'relative')
fig = go.Figure(trace, layout=layout)
fig.write_html('graphs/ts_casos_ce.html')
fig.show()

In [27]:
trace= go.Bar(x=ts_ce.index,
                y = ts_ce.deaths,
                name = 'Óbitos confirmados',
                marker = dict(color = 'rgba(255, 0, 0, 0.8)'),
                 text = ts_ce.deaths)

layout = go.Layout(title = 'Série Temporal de óbitos por COVID-19 no Ceará', barmode= 'relative')
fig = go.Figure(trace, layout=layout)
fig.write_html('graphs/ts_obitos_ce.html')
fig.show()

In [28]:
# Séries temporais fortaleza
ts_total_casos.sort_values(by='2020-06-10', ascending = False, inplace=True)
ts_total_obitos.sort_values(by='2020-06-10', ascending = False, inplace=True)

trace = go.Bar(x=ts_total_casos.columns,
                y = ts_total_casos.loc['Fortaleza'],
                name = 'Casos Confirmados',
                marker = dict(color = 'rgba(0, 0, 255, 0.8)'))

layout = go.Layout(title = 'Série Temporal de casos confirmados em Fortaleza', barmode= 'relative')
fig = go.Figure(trace, layout=layout)
fig.write_html('graphs/ts_casos_fortal.html')
fig.show()

In [29]:
trace= go.Bar(x=ts_total_obitos.columns,
                y = ts_total_obitos.loc['Fortaleza'],
                name = 'Número de Óbitos',
                marker = dict(color = 'rgba(255, 0, 0, 0.8)'))

layout = go.Layout(title = 'Série Temporal de óbitos por COVID-19 em Fortaleza', barmode= 'relative')
fig = go.Figure(trace, layout=layout)
fig.write_html('graphs/ts_obitos_fortal.html')
fig.show()

In [30]:

traces = []

for city in ts_total_casos.index[1:6]:
    trace = go.Scatter(x = ts_total_casos.columns,
                       y = ts_total_casos.loc[city],
                      name = city)
    traces.append(trace)
    
layout = go.Layout(title = 'Séries Temporais do municípios com mais casos confirmados')
fig = go.Figure(traces, layout=layout)
fig.write_html('graphs/ts_casos_interior.html')
fig.show()