In [1]:
import pandas as pd

### Estudo Covid-19 PANDEMIA

In [2]:
# Lendo o arquivo CSV e informando que há uma coluna do tipo Data-Tempo
file = 'covid_19_clean_complete.csv'
df = pd.read_csv(file, sep=',', parse_dates=['Date'])

In [3]:
# Ignorando os warnings
import warnings
warnings.filterwarnings('ignore')

In [4]:
df.head(10)

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Afghanistan,33.0,65.0,2020-01-22,0,0,0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0
5,,Antigua and Barbuda,17.0608,-61.7964,2020-01-22,0,0,0
6,,Argentina,-38.4161,-63.6167,2020-01-22,0,0,0
7,,Armenia,40.0691,45.0382,2020-01-22,0,0,0
8,Australian Capital Territory,Australia,-35.4735,149.0124,2020-01-22,0,0,0
9,New South Wales,Australia,-33.8688,151.2093,2020-01-22,0,0,0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19836 entries, 0 to 19835
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Province/State  6080 non-null   object        
 1   Country/Region  19836 non-null  object        
 2   Lat             19836 non-null  float64       
 3   Long            19836 non-null  float64       
 4   Date            19836 non-null  datetime64[ns]
 5   Confirmed       19836 non-null  int64         
 6   Deaths          19836 non-null  int64         
 7   Recovered       19836 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(3), object(2)
memory usage: 1.2+ MB


In [6]:
# Criando a coluna Casos Ativos = Casos Confirmados - Mortos - Casos Recuperados
df['Active Cases'] = df['Confirmed'] - df['Deaths'] - df['Recovered']

In [7]:
# Subistituindo Mainland China por China na coluna Country/Region
df['Country/Region'] = df['Country/Region'].replace('Mainland China', 'China')

In [8]:
# Preechendo Missing Values
df[['Province/State']] = df[['Province/State']].fillna('')
df[['Confirmed','Deaths','Recovered','Active Cases']]=df[['Confirmed','Deaths','Recovered','Active Cases']].fillna('')

In [9]:
# Convertendo DataTypes
df['Recovered']=df['Recovered'].astype(int)

In [10]:
# Cabeçalho após tratamento
df.head(10)

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active Cases
0,,Afghanistan,33.0,65.0,2020-01-22,0,0,0,0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0,0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0,0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0,0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0,0
5,,Antigua and Barbuda,17.0608,-61.7964,2020-01-22,0,0,0,0
6,,Argentina,-38.4161,-63.6167,2020-01-22,0,0,0,0
7,,Armenia,40.0691,45.0382,2020-01-22,0,0,0,0
8,Australian Capital Territory,Australia,-35.4735,149.0124,2020-01-22,0,0,0,0
9,New South Wales,Australia,-33.8688,151.2093,2020-01-22,0,0,0,0


#### Examinando Dados Temporais

In [11]:
df.Date.describe()

count                   19836
unique                     76
top       2020-01-25 00:00:00
freq                      261
first     2020-01-22 00:00:00
last      2020-04-06 00:00:00
Name: Date, dtype: object

#### Agrupando Dados

In [12]:
# Obtem o número de casos fonfirmados, mortes, recuperados e ativos agrupado por data e por região
df_agrupado = df.groupby(['Date','Country/Region'])['Confirmed','Deaths','Recovered','Active Cases'].sum().reset_index()

In [13]:
df_agrupado

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active Cases
0,2020-01-22,Afghanistan,0,0,0,0
1,2020-01-22,Albania,0,0,0,0
2,2020-01-22,Algeria,0,0,0,0
3,2020-01-22,Andorra,0,0,0,0
4,2020-01-22,Angola,0,0,0,0
...,...,...,...,...,...,...
13979,2020-04-06,Vietnam,245,0,95,150
13980,2020-04-06,West Bank and Gaza,254,1,24,229
13981,2020-04-06,Western Sahara,4,0,0,4
13982,2020-04-06,Zambia,39,1,5,33


In [14]:
# Odena o DataFrame por mais casos confirmados
df_agrupado.sort_values(by='Confirmed', ascending=False)

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active Cases
13971,2020-04-06,US,366614,10783,19581,336250
13787,2020-04-05,US,337072,9619,17448,310005
13603,2020-04-04,US,308850,8407,14652,285791
13419,2020-04-03,US,275586,7087,9707,258792
13235,2020-04-02,US,243453,5926,9001,228526
...,...,...,...,...,...,...
8756,2020-03-09,Mauritania,0,0,0,0
4967,2020-02-17,Zimbabwe,0,0,0,0
8754,2020-03-09,Mali,0,0,0,0
4968,2020-02-18,Afghanistan,0,0,0,0


In [15]:
# Obtem o número de casos confirmados, mortes, recuperados e ativos agrupando por região.
df_group_paises = df.groupby('Country/Region')['Confirmed','Deaths','Recovered','Active Cases'].sum().reset_index()

In [16]:
# Ordena por países com mais casos fonfirmados
df_group_paises.sort_values('Confirmed', ascending=False)

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active Cases
36,China,4683417,165756,2847170,1670491
171,US,2831915,64777,98681,2668457
84,Italy,1942859,206052,266247,1470560
156,Spain,1472568,123441,295573,1053554
65,Germany,1142172,12016,217756,912400
...,...,...,...,...,...
103,Malawi,19,0,0,19
166,Timor-Leste,16,0,0,16
181,Western Sahara,8,0,0,8
143,Sao Tome and Principe,4,0,0,4


In [17]:
# Agrupa quantidade de casos recuperados, mortes e ativos por data
temp = df.groupby('Date')['Recovered','Deaths','Active Cases'].sum().reset_index()

In [18]:
# Remodela o dataframe com variável e valor para ter quantidades de recuperados, mortos e ativos
temp = temp.melt(id_vars='Date', value_vars=['Recovered','Deaths','Active Cases'],
                var_name='Case',value_name='Count')

In [19]:
temp

Unnamed: 0,Date,Case,Count
0,2020-01-22,Recovered,28
1,2020-01-23,Recovered,30
2,2020-01-24,Recovered,36
3,2020-01-25,Recovered,39
4,2020-01-26,Recovered,52
...,...,...,...
223,2020-04-02,Active Cases,751644
224,2020-04-03,Active Cases,813507
225,2020-04-04,Active Cases,889225
226,2020-04-05,Active Cases,945742


In [20]:
# Habilita modo offline do plot
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

In [21]:
# Definindo o renderizador
import plotly.io as pio
pio.renderers

Renderers configuration
-----------------------
    Default renderer: 'plotly_mimetype+notebook_connected'
    Available renderers:
        ['plotly_mimetype', 'jupyterlab', 'nteract', 'vscode',
         'notebook', 'notebook_connected', 'kaggle', 'azure', 'colab',
         'cocalc', 'databricks', 'json', 'png', 'jpeg', 'jpg', 'svg',
         'pdf', 'browser', 'firefox', 'chrome', 'chromium', 'iframe',
         'iframe_connected', 'sphinx_gallery']

In [22]:
# Cores
recuperados = '#21bf73'
mortes = '#ff2e63'
ativos = '#fe9801'

In [23]:
import plotly.express as px
fig = px.area(temp,
             x='Date',
             y='Count',
             color='Case',
             height=600,
             title='Casos ao longo do tempo',
             color_discrete_sequence=[recuperados, mortes, ativos])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

In [24]:
import numpy as np

### Casos ao longo do Tempo (Mapa com animação)

In [25]:
# Mapa de Choropleth é um mapa composto por polígonos coloridos,
# É usado para representar variações espaciais de uma quantidade
fig2 = px.choropleth(df_agrupado,                                                     # casos agrupados por país
                    locations = 'Country/Region',                                     # definindo as regiões do mapa
                    locationmode = 'country names',                                   # define o modo de localização para todas as regiões
                    color = np.log(df_agrupado['Confirmed']),                         # define a cor pela quantidade de casos confirmados
                    hover_name = 'Country/Region',                                    # Define o nome interativo com o nome da região
                    hover_data = ['Confirmed', 'Deaths'] ,                            # Define o texto interativo com o número de confirmados e mortos
                    animation_frame = df_agrupado['Date'].dt.strftime('%d-%m-%Y'),    # Define o animate frame com as datas
                    title = 'Casos ao longo do Tempo',                                # Define o título
                    color_continuous_scale = px.colors.sequential.Magenta)            # Define a paleta de cores            
fig2.update_layout(autosize=False, width=800, height=600)                             # Define o tamnho da figura
fig2.show()                                                                           # Exibe a figura

### Mortes ao longo do Tempo (Mapa com animação)

In [26]:
fig2 = px.choropleth(df_agrupado,                                                     # casos agrupados por país
                    locations = 'Country/Region',                                     # definindo as regiões do mapa
                    locationmode = 'country names',                                   # define o modo de localização para todas as regiões
                    color = np.log(df_agrupado['Deaths']),                            # define a cor pela quantidade de Mortos
                    hover_name = 'Country/Region',                                    # Define o nome interativo com o nome da região
                    hover_data = ['Confirmed', 'Deaths'] ,                            # Define o texto interativo com o número de confirmados e mortos
                    animation_frame = df_agrupado['Date'].dt.strftime('%d-%m-%Y'),    # Define o animate frame com as datas
                    title = 'Mortes ao longo do Tempo',                                # Define o título
                    color_continuous_scale = px.colors.sequential.Magenta)            # Define a paleta de cores            
fig2.update_layout(autosize=False, width=800, height=600)                              # Define o tamnho da figura
fig2.show()   

In [27]:
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

In [28]:
# Filtra os dados considerando o ultimo dia da base de dados
completo = df[df['Date'] == max(df['Date'])]

In [29]:
# Imprime as 5 primeiras linhas
completo.head(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active Cases
19575,,Afghanistan,33.0,65.0,2020-04-06,367,11,18,338
19576,,Albania,41.1533,20.1683,2020-04-06,377,21,116,240
19577,,Algeria,28.0339,1.6596,2020-04-06,1423,173,90,1160
19578,,Andorra,42.5063,1.5218,2020-04-06,525,21,31,473
19579,,Angola,-11.2027,17.8739,2020-04-06,16,2,2,12


In [30]:
# Plota painel
fig3 = px.treemap(completo.sort_values(by='Confirmed', ascending=False).reset_index(drop=True),
                 path=['Country/Region','Province/State'],
                 values='Confirmed',
                 height=600,
                 title='Número de Casos Confirmados',
                 color_discrete_sequence = px.colors.qualitative.Dark2)
fig3.data[0].textinfo = 'label+text+value'
fig3.show()
# Plota painel
fig3 = px.treemap(completo.sort_values(by='Confirmed', ascending=False).reset_index(drop=True),
                 path=['Country/Region','Province/State'],
                 values='Deaths',
                 height=600,
                 title='Número de Mortes Confirmadas',
                 color_discrete_sequence = px.colors.qualitative.Dark2)
fig3.data[0].textinfo = 'label+text+value'
fig3.show()

### Pico de Casos Confirmados e Mortes

In [31]:
# Por casos confirmados
fig4 = px.line(df_agrupado,
              x = 'Date',
              y = 'Confirmed',
              color = 'Country/Region',
              height = 600,
              title='Casos Confirmados',
              color_discrete_sequence = px.colors.qualitative.Dark2)
fig4.show()
# Por mortes confirmadas
fig4 = px.line(df_agrupado,
              x = 'Date',
              y = 'Deaths',
              color = 'Country/Region',
              height = 600,
              title='Mortes Confirmadas',
              color_discrete_sequence = px.colors.qualitative.Dark2)
fig4.show()

### Gráfico com Folium

In [32]:
import folium

In [33]:
# Filtra os dados considerando o ultimo dia da base de dados
temp = df[df['Date'] == max(df['Date'])]

In [34]:
mapa = folium.Map(location=[0,0], tiles='cartodbpositron',
                 min_zoom=1, max_zoom=4, zoom_start=1)

for i in range(0, len(temp)):
    folium.Circle(
        location=[temp.iloc[i]['Lat'], temp.iloc[i]['Long']],
        color = 'crimson', fill='crimson',
        tooltip = '<li><bold>Country : '+str(temp.iloc[i]['Country/Region'])+
                  '<li><bold>Province : '+str(temp.iloc[i]['Province/State'])+
                  '<li><bold>Confirmed : '+str(temp.iloc[i]['Confirmed'])+
                  '<li><bold>Deaths : '+str(temp.iloc[i]['Deaths']),
        radius=int(temp.iloc[i]['Confirmed'])**1.1).add_to(mapa)
mapa

#### Pequena demonstração da pandemia global
##### Por: Leandro M. Torres