In [48]:
import pandas as pd
import numpy as np
import time as time
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)
import plotly.io as pio
pio.renderers
pio.renderers.default = "colab"
import plotly.express as px
import plotly.graph_objects as go

In [49]:
df = pd.read_csv('covid_19_clean_complete.csv', delimiter=',', parse_dates=['Date'])

In [50]:
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Afghanistan,33.0,65.0,2020-01-22,0,0,0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0


In [51]:
df['Active'] = df['Confirmed'] - df['Deaths'] - df['Recovered']

In [52]:
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active
0,,Afghanistan,33.0,65.0,2020-01-22,0,0,0,0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0,0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0,0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0,0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0,0


In [53]:
# Trocando Mainland China por China
df['Country/Region'] = df['Country/Region'].replace('Mainland China', 'China')

In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19836 entries, 0 to 19835
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Province/State  6080 non-null   object        
 1   Country/Region  19836 non-null  object        
 2   Lat             19836 non-null  float64       
 3   Long            19836 non-null  float64       
 4   Date            19836 non-null  datetime64[ns]
 5   Confirmed       19836 non-null  int64         
 6   Deaths          19836 non-null  int64         
 7   Recovered       19836 non-null  int64         
 8   Active          19836 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(4), object(2)
memory usage: 1.4+ MB


In [55]:
# Preenchendo missing values
df[['Province/State']] = df[['Province/State']].fillna('')

In [56]:
df.isna().sum()

Province/State    0
Country/Region    0
Lat               0
Long              0
Date              0
Confirmed         0
Deaths            0
Recovered         0
Active            0
dtype: int64

In [57]:
df_group = df.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()

In [58]:
df_group.sort_values(by='Confirmed', ascending=False)

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active
13971,2020-04-06,US,366614,10783,19581,336250
13787,2020-04-05,US,337072,9619,17448,310005
13603,2020-04-04,US,308850,8407,14652,285791
13419,2020-04-03,US,275586,7087,9707,258792
13235,2020-04-02,US,243453,5926,9001,228526
...,...,...,...,...,...,...
8756,2020-03-09,Mauritania,0,0,0,0
4967,2020-02-17,Zimbabwe,0,0,0,0
8754,2020-03-09,Mali,0,0,0,0
4968,2020-02-18,Afghanistan,0,0,0,0


In [59]:
df_group_country = df.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
df_group_country

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active
0,Afghanistan,3031,70,94,2867
1,Albania,4143,200,792,3151
2,Algeria,11826,1028,1048,9750
3,Andorra,5536,140,151,5245
4,Angola,112,18,10,84
...,...,...,...,...,...
179,Vietnam,4344,0,1316,3028
180,West Bank and Gaza,2552,12,335,2205
181,Western Sahara,8,0,0,8
182,Zambia,425,5,12,408


In [173]:
# Dados agrupados por data 
df_group_date = df.groupby('Date')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
df_group_date

Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active
0,2020-01-22,555,17,28,510
1,2020-01-23,654,18,30,606
2,2020-01-24,941,26,36,879
3,2020-01-25,1434,42,39,1353
4,2020-01-26,2118,56,52,2010
...,...,...,...,...,...
71,2020-04-02,1013155,52983,208528,751644
72,2020-04-03,1095915,58787,223621,813507
73,2020-04-04,1197403,64606,243572,889225
74,2020-04-05,1272113,69374,256997,945742


In [193]:
df_group_date = df.groupby('Date')._add_numeric_operations(sum)
df_group_date

TypeError: _add_numeric_operations() takes 1 positional argument but 2 were given

In [170]:
df_group_date_melt = df_group_date.melt(id_vars='Date', value_vars=['Recovered', 'Deaths', 'Active'], var_name='Case', value_name='Count')
df_group_date_melt.loc[df_group_date_melt['Case'] == 'Active', 'Case'] = 'Ativos'
df_group_date_melt

Unnamed: 0,Date,Case,Count
0,2020-01-22,Recovered,28
1,2020-01-23,Recovered,30
2,2020-01-24,Recovered,36
3,2020-01-25,Recovered,39
4,2020-01-26,Recovered,52
...,...,...,...
223,2020-04-02,Ativos,751644
224,2020-04-03,Ativos,813507
225,2020-04-04,Ativos,889225
226,2020-04-05,Ativos,945742


In [63]:
fig = px.area(df_group_date_melt,
              x="Date",
              y="Count",
              color="Case",
              height=600,
              title='Casos ao longo do tempo')

fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

In [64]:
df_group.head()

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active
0,2020-01-22,Afghanistan,0,0,0,0
1,2020-01-22,Albania,0,0,0,0
2,2020-01-22,Algeria,0,0,0,0
3,2020-01-22,Andorra,0,0,0,0
4,2020-01-22,Angola,0,0,0,0


In [162]:
df_group = df_group[df_group['Country/Region'] == 'Brazil']
df_group

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active
23,2020-01-22,Brazil,0,0,0,0
207,2020-01-23,Brazil,0,0,0,0
391,2020-01-24,Brazil,0,0,0,0
575,2020-01-25,Brazil,0,0,0,0
759,2020-01-26,Brazil,0,0,0,0
...,...,...,...,...,...,...
13087,2020-04-02,Brazil,8044,324,127,7593
13271,2020-04-03,Brazil,9056,359,127,8570
13455,2020-04-04,Brazil,10360,445,127,9788
13639,2020-04-05,Brazil,11130,486,127,10517


In [165]:
fig = px.choropleth(df_group,
                    locations='Country/Region',
                    locationmode='country names',
                    color=np.log(df_group['Confirmed']),
                    hover_name='Country/Region',
                    hover_data=['Confirmed', 'Deaths'],
                    animation_frame=df_group['Date'].dt.strftime('%d-%m-%Y'),
                    title='Casos ao longo do tmepo',
                    color_continuous_scale= px.colors.sequential.Reds,
                    labels={'Confirmed': 'Confirmados', 'animation_frame': 'Data', 'Deaths': 'Mortes', 'Country/Region': 'País'}                 
                    )

fig.update_layout(autosize=False, 
                  width=1200, 
                  height=800,
                  geo = dict(
                            showframe = True,
                            showcoastlines = True,
                            projection = dict(type = 'equirectangular'),
                            bgcolor= "rgba(255,255,255,1)",
                            lakecolor="rgba(135,206,250,1)",
                            oceancolor = "rgba(135,206,250,0.5)",
                            rivercolor = "rgba(135,206,250,0.5)",
                            landcolor = "rgba(255,255,255,1)",
                            showlakes= True,
                            countrycolor= '#ff0000',
                            coastlinecolor= '#ff0000',
                            framecolor= '#ff0000',
                            showocean=True,
                            showsubunits=None
                          ),
                     hoverlabel = dict(),
                     geo_scope='south america'
                 )



fig.show()