In [145]:
# Importando pacotes importantes
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go

(26713, 8)

In [146]:
# Leitura dos dados
url = 'https://github.com/neylsoncrepalde/projeto_eda_covid/blob/master/covid_19_data.csv?raw=true'
df = pd.read_csv(url, parse_dates=['ObservationDate', 'Last Update'])
df.shape

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,2020-01-22,Anhui,Mainland China,2020-01-22 17:00:00,1.0,0.0,0.0
1,2,2020-01-22,Beijing,Mainland China,2020-01-22 17:00:00,14.0,0.0,0.0
2,3,2020-01-22,Chongqing,Mainland China,2020-01-22 17:00:00,6.0,0.0,0.0
3,4,2020-01-22,Fujian,Mainland China,2020-01-22 17:00:00,1.0,0.0,0.0
4,5,2020-01-22,Gansu,Mainland China,2020-01-22 17:00:00,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
26708,26709,2020-05-19,Wyoming,US,2020-05-20 02:32:19,776.0,10.0,0.0
26709,26710,2020-05-19,Xinjiang,Mainland China,2020-05-20 02:32:19,76.0,3.0,73.0
26710,26711,2020-05-19,Yukon,Canada,2020-05-20 02:32:19,11.0,0.0,11.0
26711,26712,2020-05-19,Yunnan,Mainland China,2020-05-20 02:32:19,185.0,2.0,183.0


In [147]:
# Apresentando os dados
df

SNo                         int64
ObservationDate    datetime64[ns]
Province/State             object
Country/Region             object
Last Update        datetime64[ns]
Confirmed                 float64
Deaths                    float64
Recovered                 float64
dtype: object

In [148]:
# Vizualizando os tipos das colunas
df.dtypes

In [149]:
# Implementando a função para corrigir o nome das colunas
import re

def corrige_colunas(col_name):
    return re.sub(r"[/| ]", "", col_name).lower()

In [150]:
# Corrigindo o nome das colunas
df.columns = [corrige_colunas(col) for col in df.columns]

Index(['sno', 'observationdate', 'provincestate', 'countryregion',
       'lastupdate', 'confirmed', 'deaths', 'recovered'],
      dtype='object')

In [151]:
df.columns

### Brasil

In [152]:
df.countryregion.unique()

In [153]:
# Selecionando os dados do Brasil com pelo menos um caso confirmado
df_brasil = df.loc[
    (df.countryregion == 'Brazil') &
    (df.confirmed > 0)
]

Unnamed: 0,sno,observationdate,provincestate,countryregion,lastupdate,confirmed,deaths,recovered
2455,2456,2020-02-26,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2559,2560,2020-02-27,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2668,2669,2020-02-28,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2776,2777,2020-02-29,,Brazil,2020-02-29 21:03:05,2.0,0.0,0.0
2903,2904,2020-03-01,,Brazil,2020-02-29 21:03:05,2.0,0.0,0.0
...,...,...,...,...,...,...,...,...
24850,24851,2020-05-15,,Brazil,2020-05-16 02:32:19,220291.0,14962.0,84970.0
25227,25228,2020-05-16,,Brazil,2020-05-17 02:32:32,233511.0,15662.0,89672.0
25604,25605,2020-05-17,,Brazil,2020-05-18 02:32:21,241080.0,16118.0,94122.0
25981,25982,2020-05-18,,Brazil,2020-05-19 02:32:18,255368.0,16853.0,100459.0


In [154]:
df_brasil

### Casos Confirmados

In [155]:
# Gráfico de evolução de casos confirmados
px.line(df_brasil, 'observationdate', 'confirmed', labels={'observationdate': 'Data', 'confirmed':'Número de casos confirmados'}, title='Casos confirmados no Brasil')

### Novos casos por dia

In [133]:
# Técnica de programação funcional
df_brasil['novoscasos'] = list(map(
    lambda x: 0 if (x==0) else df_brasil['confirmed'].iloc[x] - df_brasil['confirmed'].iloc[x-1],
    np.arange(df_brasil.shape[0])
))

KeyError: 'novoscasos'