## Análises COVID-19 

Serão analisadas as séries temporais sobre a contaminação do vírus COVID-19 pelo mundo.

In [26]:
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go

Agora, vamos analisar os dados. É importante já dizer no comando pd.read_csv quais são as colunas que serão "parseadas" como datas. O pandas
possui métodos robustos para trabalhar com esse tipo de informação.

In [27]:


# Carregar o arquivo CSV
caminho_arquivo = 'covid_19_data.csv'
df = pd.read_csv(caminho_arquivo, parse_dates=['ObservationDate'])

# Converter apenas os valores que não estão no formato correto
df['Last Update'] = df['Last Update'].apply(lambda x: pd.to_datetime(x) if isinstance(x, str) else x)

# Exibir o DataFrame atualizado
df


Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,2020-01-22,Anhui,Mainland China,2020-01-22 17:00:00,1.0,0.0,0.0
1,2,2020-01-22,Beijing,Mainland China,2020-01-22 17:00:00,14.0,0.0,0.0
2,3,2020-01-22,Chongqing,Mainland China,2020-01-22 17:00:00,6.0,0.0,0.0
3,4,2020-01-22,Fujian,Mainland China,2020-01-22 17:00:00,1.0,0.0,0.0
4,5,2020-01-22,Gansu,Mainland China,2020-01-22 17:00:00,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
26708,26709,2020-05-19,Wyoming,US,2020-05-20 02:32:19,776.0,10.0,0.0
26709,26710,2020-05-19,Xinjiang,Mainland China,2020-05-20 02:32:19,76.0,3.0,73.0
26710,26711,2020-05-19,Yukon,Canada,2020-05-20 02:32:19,11.0,0.0,11.0
26711,26712,2020-05-19,Yunnan,Mainland China,2020-05-20 02:32:19,185.0,2.0,183.0


In [28]:
# Conferir os tipos de cada coluna

df.dtypes

SNo                         int64
ObservationDate    datetime64[ns]
Province/State             object
Country/Region             object
Last Update        datetime64[ns]
Confirmed                 float64
Deaths                    float64
Recovered                 float64
dtype: object

Nomes de colunas não devem ter letras maiúsculas e nem caracteres especiais. Será implementada uma função para fazer a limpeza dos nomes
dessas colunas.

In [29]:
import re

def corrige_colunas(col_name):
    """
    Função para corrigir nomes de colunas removendo barras e espaços e convertendo para minúsculas.

    Args:
    col_name (str): Nome da coluna a ser corrigido.

    Returns:
    str: Nome da coluna corrigido, sem barras, espaços e em minúsculas.
    """
    # Substitui barras (/) e espaços por uma string vazia, e converte para minúsculas
    return re.sub(r"[/| ]", "", col_name).lower()



In [30]:
corrige_colunas("AdgE/P ou")        #teste

'adgepou'

In [31]:
# Corrigir todas as colunas do df

df.columns = [corrige_colunas(col) for col in df.columns]

In [32]:
df

Unnamed: 0,sno,observationdate,provincestate,countryregion,lastupdate,confirmed,deaths,recovered
0,1,2020-01-22,Anhui,Mainland China,2020-01-22 17:00:00,1.0,0.0,0.0
1,2,2020-01-22,Beijing,Mainland China,2020-01-22 17:00:00,14.0,0.0,0.0
2,3,2020-01-22,Chongqing,Mainland China,2020-01-22 17:00:00,6.0,0.0,0.0
3,4,2020-01-22,Fujian,Mainland China,2020-01-22 17:00:00,1.0,0.0,0.0
4,5,2020-01-22,Gansu,Mainland China,2020-01-22 17:00:00,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
26708,26709,2020-05-19,Wyoming,US,2020-05-20 02:32:19,776.0,10.0,0.0
26709,26710,2020-05-19,Xinjiang,Mainland China,2020-05-20 02:32:19,76.0,3.0,73.0
26710,26711,2020-05-19,Yukon,Canada,2020-05-20 02:32:19,11.0,0.0,11.0
26711,26712,2020-05-19,Yunnan,Mainland China,2020-05-20 02:32:19,185.0,2.0,183.0


## Brasil

Selecionar apenas os dados do Brasil para investigar

In [33]:
df.loc[df.countryregion == 'Brazil']

Unnamed: 0,sno,observationdate,provincestate,countryregion,lastupdate,confirmed,deaths,recovered
82,83,2020-01-23,,Brazil,2020-01-23 17:00:00,0.0,0.0,0.0
2455,2456,2020-02-26,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2559,2560,2020-02-27,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2668,2669,2020-02-28,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2776,2777,2020-02-29,,Brazil,2020-02-29 21:03:05,2.0,0.0,0.0
...,...,...,...,...,...,...,...,...
24850,24851,2020-05-15,,Brazil,2020-05-16 02:32:19,220291.0,14962.0,84970.0
25227,25228,2020-05-16,,Brazil,2020-05-17 02:32:32,233511.0,15662.0,89672.0
25604,25605,2020-05-17,,Brazil,2020-05-18 02:32:21,241080.0,16118.0,94122.0
25981,25982,2020-05-18,,Brazil,2020-05-19 02:32:18,255368.0,16853.0,100459.0


In [34]:
brasil = df.loc[(df.countryregion == 'Brazil') & (df.confirmed > 0)]

In [35]:
brasil

Unnamed: 0,sno,observationdate,provincestate,countryregion,lastupdate,confirmed,deaths,recovered
2455,2456,2020-02-26,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2559,2560,2020-02-27,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2668,2669,2020-02-28,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2776,2777,2020-02-29,,Brazil,2020-02-29 21:03:05,2.0,0.0,0.0
2903,2904,2020-03-01,,Brazil,2020-02-29 21:03:05,2.0,0.0,0.0
...,...,...,...,...,...,...,...,...
24850,24851,2020-05-15,,Brazil,2020-05-16 02:32:19,220291.0,14962.0,84970.0
25227,25228,2020-05-16,,Brazil,2020-05-17 02:32:32,233511.0,15662.0,89672.0
25604,25605,2020-05-17,,Brazil,2020-05-18 02:32:21,241080.0,16118.0,94122.0
25981,25982,2020-05-18,,Brazil,2020-05-19 02:32:18,255368.0,16853.0,100459.0


### Casos Confirmados

In [36]:
# Gráfico da evolução de casos confirmados

px.line(brasil, 'observationdate', 'confirmed', title= 'Casos Confirmados no Brasil')

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

### Novos Casos por Dia

In [37]:
# Técnica de Programação Funcional

# Cria uma nova coluna 'novoscasos' no DataFrame 'brasil'
brasil['novoscasos'] = list(map(
    # Define uma função lambda para calcular o número de novos casos
    lambda x: 0 if (x==0) else brasil['confirmed'].iloc[x] - brasil['confirmed'].iloc[x-1],
    # Cria um array com os índices das linhas do DataFrame 'brasil'
    np.arange(brasil.shape[0])
))



# Explicação:
# - np.arange(brasil.shape[0]) cria um array de índices que vai de 0 até o número de linhas do DataFrame 'brasil' - 1.
# - Para cada índice x, a função lambda calcula o número de novos casos:
#   - Se x é 0 (primeira linha), o valor é 0, pois não há um dia anterior para comparar.
#   - Para os demais índices, calcula a diferença entre o número de casos confirmados no dia x e no dia anterior (x-1).
# - map aplica a função lambda a cada índice do array gerado por np.arange.
# - list transforma o resultado de map em uma lista.
# - A lista resultante é atribuída à nova coluna 'novoscasos' no DataFrame 'brasil'.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [38]:
# Visualizando

px.line(brasil, x='observationdate', y='novoscasos', title='Novos casos por dia')

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

### Mortes

In [45]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.io import output_notebook

# Ordena os dados por data para garantir que o gráfico seja plotado corretamente
brasil_sorted = brasil.sort_values(by='observationdate')

# Cria uma fonte de dados para o Bokeh
source = ColumnDataSource(brasil_sorted)

# Configura a saída para o notebook (opcional, se estiver usando Jupyter Notebook)
output_notebook()

# Cria a figura Bokeh
p = figure(height=400, width=800, title='Mortes por COVID-19 no Brasil', x_axis_type='datetime')

# Adiciona uma linha ao gráfico
p.line(x='observationdate', y='deaths', source=source, line_width=2, line_color='red', legend_label='Mortes')

# Adiciona marcadores
p.circle(x='observationdate', y='deaths', source=source, size=8, fill_color='white', line_color='red', legend_label='Mortes')

# Adiciona ferramenta de hover para exibir informações ao passar o mouse
hover = HoverTool(tooltips=[('Data', '@observationdate{%F}'), ('Mortes', '@deaths')],
                formatters={'@observationdate': 'datetime'}, mode='vline')
p.add_tools(hover)

# Configurações de layout
p.legend.location = 'top_left'
p.xaxis.axis_label = 'Data de Observação'
p.yaxis.axis_label = 'Número de Mortes'

# Exibe o gráfico
show(p)




### Taxa de Crescimento

taxa_crescimento = (presente/passado)ˆ(1/n) - 1

In [48]:
def taxa_crescimento(data, variable, data_inicio=None, data_fim=None):
    # Se data início for None, define como a primeira data disponível
    if data_inicio is None:
        data_inicio = data.observationdate.loc[data[variable] > 0].min()
    else:
        data_inicio = pd.to_datetime(data_inicio)

    if data_fim is None:
        data_fim = data.observationdate.iloc[-1]
    else:
        data_fim = pd.to_datetime(data_fim)

    # Define os valores do presente e passado
    passado = data.loc[data.observationdate == data_inicio, variable].values[0]
    presente = data.loc[data.observationdate == data_fim, variable].values[0]

    # Define o número de pontos no tempo que vamos avaliar
    n = (data_fim - data_inicio).days

    # Calcular a taxa
    taxa = (presente / passado) ** (1 / n) - 1

    return taxa * 100



In [52]:
# Taxa de crescimento médio do COVID no Brasil em todo o período

taxa = taxa_crescimento(brasil, 'confirmed') 

print(f'A Taxa de Crescimento do COVID no Brasil é de {taxa: .2f}% ao dia')

A Taxa de Crescimento do COVID no Brasil é de  16.27% ao dia
