In [35]:
import pandas as pd
df = pd.read_csv('covid_19_clean_complete.csv',parse_dates=['Date']) # usa-se parse_dates=['Date'] para colocoar a coluna do tipo datetime
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Afghanistan,33.0,65.0,2020-01-22,0,0,0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0


In [36]:
import warnings

In [37]:
warnings.filterwarnings('ignore')

In [38]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25938 entries, 0 to 25937
Data columns (total 8 columns):
Province/State    7920 non-null object
Country/Region    25938 non-null object
Lat               25938 non-null float64
Long              25938 non-null float64
Date              25938 non-null datetime64[ns]
Confirmed         25938 non-null int64
Deaths            25938 non-null int64
Recovered         25938 non-null int64
dtypes: datetime64[ns](1), float64(2), int64(3), object(2)
memory usage: 1.6+ MB


In [39]:
# caso ativo = casos confirmado - Mortes - casos recoperados
df['Active'] = df['Confirmed'] - df['Deaths'] - df['Recovered']

In [40]:
# subistituindo Mailand china por china
df['Country/Region'] = df['Country/Region'].replace('Mainland China','China')

In [41]:
# missing value
df['Province/State'] = df['Province/State'].fillna(' ') 
df[['Confirmed','Deaths','Recovered','Active']] = df[['Confirmed','Deaths','Recovered','Active']].fillna(0)

In [42]:
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active
0,,Afghanistan,33.0,65.0,2020-01-22,0,0,0,0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0,0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0,0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0,0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0,0


In [43]:
df.Date.describe()

count                   25938
unique                     99
top       2020-04-09 00:00:00
freq                      262
first     2020-01-22 00:00:00
last      2020-04-29 00:00:00
Name: Date, dtype: object

In [44]:
# obtem o numero de caso confirmado agrupado, Mortes, Recuperados, ativo agrupado por data e região
df_agrupado = df.groupby(['Date','Country/Region'])['Confirmed','Deaths','Recovered','Active'].sum().reset_index() # por causa do indece


In [45]:
# ordena dataframe por mais casos confirmados
df_agrupado.sort_values(by = 'Confirmed',ascending = False)

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active
18301,2020-04-29,US,1039909,60967,120720,858222
18116,2020-04-28,US,1012582,58355,115936,838291
17931,2020-04-27,US,988197,56259,111424,820514
17746,2020-04-26,US,965785,54881,106988,803916
17561,2020-04-25,US,938154,53755,100372,784027
...,...,...,...,...,...,...
5769,2020-02-22,Chad,0,0,0,0
5770,2020-02-22,Chile,0,0,0,0
5772,2020-02-22,Colombia,0,0,0,0
5773,2020-02-22,Congo (Brazzaville),0,0,0,0


In [46]:
df_agrupado_paises = df.groupby(['Country/Region'])['Confirmed','Deaths','Recovered','Active'].sum().reset_index() # por causa do indece



In [47]:
df_agrupado_paises.sort_values(by = 'Confirmed',ascending = False)

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active
171,US,19536787,977901,1613848,16945038
36,China,6604490,259447,4481528,1863515
84,Italy,5934150,732733,1336289,3865128
156,Spain,5884956,578196,2142687,3164073
65,Germany,4368674,112523,2136071,2120080
...,...,...,...,...,...
155,South Sudan,156,0,0,156
129,Papua New Guinea,143,0,0,143
181,Western Sahara,136,0,35,101
143,Sao Tome and Principe,104,0,8,96


In [48]:
temp = df.groupby(['Date'])['Deaths','Recovered','Active'].sum().reset_index() # por causa do indece
temp


Unnamed: 0,Date,Deaths,Recovered,Active
0,2020-01-22,17,28,510
1,2020-01-23,18,30,606
2,2020-01-24,26,35,880
3,2020-01-25,42,38,1354
4,2020-01-26,56,51,2011
...,...,...,...,...
94,2020-04-25,202868,796131,1898620
95,2020-04-26,206568,842917,1922873
96,2020-04-27,211167,869418,1961174
97,2020-04-28,217153,902901,1996339


In [49]:
temp = temp.melt(id_vars = 'Date', value_vars = ['Recovered','Deaths','Active'],
                var_name = 'Case',value_name = 'count' )

In [50]:
temp

Unnamed: 0,Date,Case,count
0,2020-01-22,Recovered,28
1,2020-01-23,Recovered,30
2,2020-01-24,Recovered,35
3,2020-01-25,Recovered,38
4,2020-01-26,Recovered,51
...,...,...,...
292,2020-04-25,Active,1898620
293,2020-04-26,Active,1922873
294,2020-04-27,Active,1961174
295,2020-04-28,Active,1996339


In [51]:
from plotly.offline import plot,iplot,init_notebook_mode
init_notebook_mode(connected = True)

In [52]:
import plotly.io as pio
pio.renderers
pio.renderers.default = 'colab'



In [53]:
# cores
recuperados = '#21bf73'
mortes = '#ff2e63'
ativos = '#fe9801'

In [54]:
import plotly.express as px
fig = px.area(temp,
             x = 'Date',
             y='count',
             color = 'Case',
             height = 600,
             title = 'Casos sobre o tempo',
             color_discrete_sequence = [recuperados,mortes,ativos])
fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()
            

In [55]:
import numpy as np

In [56]:
fig = px.choropleth(df_agrupado,
                    locations = 'Country/Region',
                    locationmode = 'country names',
                    color = np.log(df_agrupado['Confirmed']),
                    hover_name = ('Country/Region'),
                    hover_data = ['Confirmed','Deaths'],
                    animation_frame=df_agrupado['Date'].dt.strftime('%d-%m-%y'),
                    title = 'Casos ao longo do tempo',
                    color_continuous_scale = px.colors.sequential.Magenta)
fig.update_layout(autosize = False, width=1200, height=800)
fig.show()



In [57]:
fig = px.choropleth(df_agrupado,
                    locations = 'Country/Region',
                    locationmode = 'country names',
                    color = np.log(df_agrupado['Deaths']),
                    hover_name = ('Country/Region'),
                    hover_data = ['Confirmed','Deaths'],
                    animation_frame=df_agrupado['Date'].dt.strftime('%d-%m-%y'),
                    title = 'Casos ao longo do tempo',
                    color_continuous_scale = px.colors.sequential.Magenta) # paleta de cores sequencial
fig.update_layout(autosize = False, width=1200, height=800)
fig.show()




In [58]:
!pip install plotly==4.5.2



In [59]:
import plotly.figure_factory as ff
from  plotly.subplots import make_subplots
import plotly.express as px


In [68]:
# filtra os dados considerado os ultimos dias de base de dados
complete = df[df['Date']== max(df['Date'])]
complete

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active
25676,,Afghanistan,33.000000,65.000000,2020-04-29,1939,60,252,1627
25677,,Albania,41.153300,20.168300,2020-04-29,766,30,455,281
25678,,Algeria,28.033900,1.659600,2020-04-29,3848,444,1702,1702
25679,,Andorra,42.506300,1.521800,2020-04-29,743,42,423,278
25680,,Angola,-11.202700,17.873900,2020-04-29,27,2,7,18
...,...,...,...,...,...,...,...,...,...
25933,Saint Pierre and Miquelon,France,46.885200,-56.315900,2020-04-29,1,0,0,1
25934,,South Sudan,6.877000,31.307000,2020-04-29,34,0,0,34
25935,,Western Sahara,24.215500,-12.885800,2020-04-29,6,0,5,1
25936,,Sao Tome and Principe,0.186360,6.613081,2020-04-29,8,0,4,4


In [61]:
# plot painel
fig = px.treemap(complete.sort_values(by = 'Confirmed', ascending= False).reset_index(drop= True),
              path=['Country/Region','Province/State'],
              values = 'Confirmed',
              height = 600,
              title = 'Número de infetados Confirmado',
              color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label + text + value'
fig.show()

fig = px.treemap(complete.sort_values(by = 'Deaths', ascending= False).reset_index(drop= True),
              path=['Country/Region','Province/State'],
              values = 'Deaths',
              height = 600,
              title = 'Número de Mortes Confirmado',


              color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label + text + value'
fig.show()





In [62]:
fig = px.line(df_agrupado,
              x = 'Date',
              y = 'Confirmed',
              color = 'Country/Region',
              height = 600,
              title = 'Casos Confirmados',
              color_discrete_sequence = px.colors.qualitative.Dark2)
fig.show()

fig = px.line(df_agrupado,
              x = 'Date',
              y = 'Deaths',
              color = 'Country/Region',
              height = 600,
              title = 'Número de Mortos',
              color_discrete_sequence = px.colors.qualitative.Dark2)
fig.show()

In [63]:
# obtem ultio dia da base de dados
temp = df[df['Date'] == max(df['Date'])]

In [64]:
!pip install folium



In [65]:
import folium

In [66]:
m = folium.Map(location=[0,0], tiles = 'cartodbpositron',
        min_zoom = 1, max_zoom = 4, zoom_start = 1)

In [67]:
for i in range(0, len(temp)):
    folium.Circle(
    location = [temp.iloc[i]['Lat'], temp.iloc[i]['Long']],
    color = 'crimson', fill = 'crimson',  
    tooltip = '<li> <bold> Country :' + str(temp.iloc[i]['Country/Region'])+
              '<li> <bold> province :' + str(temp.iloc[i]['Province/State'])+
              '<li> <bold> Confirmed :' + str(temp.iloc[i]['Confirmed'])+
              '<li> <bold> Deaths :' + str(temp.iloc[i]['Deaths']),
    radius = int(temp.iloc[i]['Confirmed'])** 1.1).add_to(m)
m