In [42]:
import pandas as pd
import numpy as np
import pycountry 
import plotly_express as px
import plotly.graph_objs as go


In [43]:
df_confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
df_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
df_recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

In [44]:
df_confirmed.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,5/12/21,5/13/21,5/14/21,5/15/21,5/16/21,5/17/21,5/18/21,5/19/21,5/20/21,5/21/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,62718,63045,63355,63412,63484,63598,63819,64122,64575,65080
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,131845,131890,131939,131978,132015,132032,132071,132095,132118,132153
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,124682,124889,125059,125194,125311,125485,125693,125896,126156,126434
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,13470,13470,13510,13510,13510,13555,13569,13569,13569,13569
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,29405,29695,30030,30354,30637,30787,31045,31438,31661,31909


In [45]:
df_confirmed.columns

Index(['Province/State', 'Country/Region', 'Lat', 'Long', '1/22/20', '1/23/20',
       '1/24/20', '1/25/20', '1/26/20', '1/27/20',
       ...
       '5/12/21', '5/13/21', '5/14/21', '5/15/21', '5/16/21', '5/17/21',
       '5/18/21', '5/19/21', '5/20/21', '5/21/21'],
      dtype='object', length=490)

Converting date columns into a seperate date row using pd.melt func

In [46]:
dates = df_confirmed.columns[4:]

df_confirmed_change = df_confirmed.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Confirmed'
)

In [47]:
df_confirmed_change

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed
0,,Afghanistan,33.939110,67.709953,1/22/20,0
1,,Albania,41.153300,20.168300,1/22/20,0
2,,Algeria,28.033900,1.659600,1/22/20,0
3,,Andorra,42.506300,1.521800,1/22/20,0
4,,Angola,-11.202700,17.873900,1/22/20,0
...,...,...,...,...,...,...
133645,,Vietnam,14.058324,108.277199,5/21/21,4941
133646,,West Bank and Gaza,31.952200,35.233200,5/21/21,304968
133647,,Yemen,15.552727,48.516388,5/21/21,6632
133648,,Zambia,-13.133897,27.849332,5/21/21,92920


In [48]:
df_deaths_change = df_deaths.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Deaths'
)

In [49]:
df_deaths_change

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Deaths
0,,Afghanistan,33.939110,67.709953,1/22/20,0
1,,Albania,41.153300,20.168300,1/22/20,0
2,,Algeria,28.033900,1.659600,1/22/20,0
3,,Andorra,42.506300,1.521800,1/22/20,0
4,,Angola,-11.202700,17.873900,1/22/20,0
...,...,...,...,...,...,...
133645,,Vietnam,14.058324,108.277199,5/21/21,41
133646,,West Bank and Gaza,31.952200,35.233200,5/21/21,3452
133647,,Yemen,15.552727,48.516388,5/21/21,1302
133648,,Zambia,-13.133897,27.849332,5/21/21,1266


In [50]:
df_recovered_change = df_recovered.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Recovered'
)

In [51]:
df_recovered_change

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Recovered
0,,Afghanistan,33.939110,67.709953,1/22/20,0
1,,Albania,41.153300,20.168300,1/22/20,0
2,,Algeria,28.033900,1.659600,1/22/20,0
3,,Andorra,42.506300,1.521800,1/22/20,0
4,,Angola,-11.202700,17.873900,1/22/20,0
...,...,...,...,...,...,...
126355,,Vietnam,14.058324,108.277199,5/21/21,2689
126356,,West Bank and Gaza,31.952200,35.233200,5/21/21,296901
126357,,Yemen,15.552727,48.516388,5/21/21,3179
126358,,Zambia,-13.133897,27.849332,5/21/21,91019


In [52]:
df_recovered_change = df_recovered_change[df_recovered_change['Country/Region']!='Canada']

In [73]:
full_table = df_confirmed_change.merge(
  right=df_deaths_change, 
  how='left',
  on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long']
)
# Merging full_table and recovered_df_long
full_table = full_table.merge(
  right=df_recovered_change, 
  how='left',
  on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long']
)

In [74]:
full_table.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Afghanistan,33.93911,67.709953,1/22/20,0,0,0.0
1,,Albania,41.1533,20.1683,1/22/20,0,0,0.0
2,,Algeria,28.0339,1.6596,1/22/20,0,0,0.0
3,,Andorra,42.5063,1.5218,1/22/20,0,0,0.0
4,,Angola,-11.2027,17.8739,1/22/20,0,0,0.0


### Data Cleaning

In [75]:
full_table['Date'] = pd.to_datetime(full_table['Date'])

In [76]:
full_table.isnull().sum()

Province/State    91854
Country/Region        0
Lat                 972
Long                972
Date                  0
Confirmed             0
Deaths                0
Recovered         10206
dtype: int64

In [77]:
full_table['Recovered'] = full_table['Recovered'].fillna(0)

In [78]:
full_table.isnull().sum()

Province/State    91854
Country/Region        0
Lat                 972
Long                972
Date                  0
Confirmed             0
Deaths                0
Recovered             0
dtype: int64

In [79]:
full_table[full_table['Country/Region']=='MS Zaandam'].head(20)

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
172,,MS Zaandam,0.0,0.0,2020-01-22,0,0,0.0
447,,MS Zaandam,0.0,0.0,2020-01-23,0,0,0.0
722,,MS Zaandam,0.0,0.0,2020-01-24,0,0,0.0
997,,MS Zaandam,0.0,0.0,2020-01-25,0,0,0.0
1272,,MS Zaandam,0.0,0.0,2020-01-26,0,0,0.0
1547,,MS Zaandam,0.0,0.0,2020-01-27,0,0,0.0
1822,,MS Zaandam,0.0,0.0,2020-01-28,0,0,0.0
2097,,MS Zaandam,0.0,0.0,2020-01-29,0,0,0.0
2372,,MS Zaandam,0.0,0.0,2020-01-30,0,0,0.0
2647,,MS Zaandam,0.0,0.0,2020-01-31,0,0,0.0


In [80]:
ship_rows = full_table['Province/State'].str.contains('Grand Princess') | full_table['Province/State'].str.contains('Diamond Princess') | full_table['Country/Region'].str.contains('Diamond Princess') | full_table['Country/Region'].str.contains('MS Zaandam')
full_ship = full_table[ship_rows]

In [81]:
full_table = full_table[~(ship_rows)]

In [82]:
full_table

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Afghanistan,33.939110,67.709953,2020-01-22,0,0,0.0
1,,Albania,41.153300,20.168300,2020-01-22,0,0,0.0
2,,Algeria,28.033900,1.659600,2020-01-22,0,0,0.0
3,,Andorra,42.506300,1.521800,2020-01-22,0,0,0.0
4,,Angola,-11.202700,17.873900,2020-01-22,0,0,0.0
...,...,...,...,...,...,...,...,...
133645,,Vietnam,14.058324,108.277199,2021-05-21,4941,41,2689.0
133646,,West Bank and Gaza,31.952200,35.233200,2021-05-21,304968,3452,296901.0
133647,,Yemen,15.552727,48.516388,2021-05-21,6632,1302,3179.0
133648,,Zambia,-13.133897,27.849332,2021-05-21,92920,1266,91019.0


Data Aggregation

In [83]:
#Active cases = Confirmed - Deaths - recovered
full_table['Active'] = full_table['Confirmed'] - full_table['Deaths'] - full_table['Recovered']

In [84]:
full_table[full_table['Active']!= 0]

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active
58,Anhui,China,31.825700,117.226400,2020-01-22,1,0,0.0,1.0
59,Beijing,China,40.182400,116.414200,2020-01-22,14,0,0.0,14.0
60,Chongqing,China,30.057200,107.874000,2020-01-22,6,0,0.0,6.0
61,Fujian,China,26.078900,117.987400,2020-01-22,1,0,0.0,1.0
63,Guangdong,China,23.341700,113.424400,2020-01-22,26,0,0.0,26.0
...,...,...,...,...,...,...,...,...,...
133645,,Vietnam,14.058324,108.277199,2021-05-21,4941,41,2689.0,2211.0
133646,,West Bank and Gaza,31.952200,35.233200,2021-05-21,304968,3452,296901.0,4615.0
133647,,Yemen,15.552727,48.516388,2021-05-21,6632,1302,3179.0,2151.0
133648,,Zambia,-13.133897,27.849332,2021-05-21,92920,1266,91019.0,635.0


Aggregate data Country-wise and then group them by date

In [85]:
full_grouped= full_table.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()

  full_grouped= full_table.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()


In [89]:
full_grouped

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active
0,2020-01-22,Afghanistan,0,0,0.0,0.0
1,2020-01-22,Albania,0,0,0.0,0.0
2,2020-01-22,Algeria,0,0,0.0,0.0
3,2020-01-22,Andorra,0,0,0.0,0.0
4,2020-01-22,Angola,0,0,0.0,0.0
...,...,...,...,...,...,...
92335,2021-05-21,Vietnam,4941,41,2689.0,2211.0
92336,2021-05-21,West Bank and Gaza,304968,3452,296901.0,4615.0
92337,2021-05-21,Yemen,6632,1302,3179.0,2151.0
92338,2021-05-21,Zambia,92920,1266,91019.0,635.0


In [90]:
full

Unnamed: 0,Date,Province/State,Confirmed,Deaths,Recovered,Active
0,2020-01-22,Alberta,0,0,0.0,0.0
1,2020-01-22,Anguilla,0,0,0.0,0.0
2,2020-01-22,Anhui,1,0,0.0,1.0
3,2020-01-22,Aruba,0,0,0.0,0.0
4,2020-01-22,Australian Capital Territory,0,0,0.0,0.0
...,...,...,...,...,...,...
40819,2021-05-21,Western Australia,1016,9,1003.0,4.0
40820,2021-05-21,Xinjiang,980,3,977.0,0.0
40821,2021-05-21,Yukon,84,2,0.0,82.0
40822,2021-05-21,Yunnan,351,2,324.0,25.0


In [91]:
temp = full_grouped.groupby(['Country/Region','Date', ])['Confirmed', 'Deaths', 'Recovered']
temp = temp.sum().diff().reset_index()

mask = temp['Country/Region'] != temp['Country/Region'].shift(1)
temp.loc[mask, 'Confirmed'] = np.nan
temp.loc[mask, 'Deaths'] = np.nan
temp.loc[mask, 'Recovered'] = np.nan
# renaming columns
temp.columns = ['Country/Region', 'Date', 'New cases', 'New deaths', 'New recovered']
# merging new values
full_grouped = pd.merge(full_grouped, temp, on=['Country/Region','Date'])
# filling na with 0
full_grouped = full_grouped.fillna(0)
# fixing data types
cols = ['New cases', 'New deaths', 'New recovered']
full_grouped[cols] = full_grouped[cols].astype('int')
# 
full_grouped['New cases'] = full_grouped['New cases'].apply(lambda x: 0 if x<0 else x)


  temp = full_grouped.groupby(['Country/Region','Date', ])['Confirmed', 'Deaths', 'Recovered']


In [92]:
full_grouped

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered
0,2020-01-22,Afghanistan,0,0,0.0,0.0,0,0,0
1,2020-01-22,Albania,0,0,0.0,0.0,0,0,0
2,2020-01-22,Algeria,0,0,0.0,0.0,0,0,0
3,2020-01-22,Andorra,0,0,0.0,0.0,0,0,0
4,2020-01-22,Angola,0,0,0.0,0.0,0,0,0
...,...,...,...,...,...,...,...,...,...
92335,2021-05-21,Vietnam,4941,41,2689.0,2211.0,132,2,2
92336,2021-05-21,West Bank and Gaza,304968,3452,296901.0,4615.0,436,4,1258
92337,2021-05-21,Yemen,6632,1302,3179.0,2151.0,19,1,20
92338,2021-05-21,Zambia,92920,1266,91019.0,635.0,166,1,64


In [93]:
def get_country_code(name):
    try:
        return pycountry.countries.lookup(name).alpha_3
    except:
        None
full_grouped['iso_code'] = full_grouped['Country/Region'].apply(get_country_code)


In [94]:
full_grouped

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,iso_code
0,2020-01-22,Afghanistan,0,0,0.0,0.0,0,0,0,AFG
1,2020-01-22,Albania,0,0,0.0,0.0,0,0,0,ALB
2,2020-01-22,Algeria,0,0,0.0,0.0,0,0,0,DZA
3,2020-01-22,Andorra,0,0,0.0,0.0,0,0,0,AND
4,2020-01-22,Angola,0,0,0.0,0.0,0,0,0,AGO
...,...,...,...,...,...,...,...,...,...,...
92335,2021-05-21,Vietnam,4941,41,2689.0,2211.0,132,2,2,VNM
92336,2021-05-21,West Bank and Gaza,304968,3452,296901.0,4615.0,436,4,1258,
92337,2021-05-21,Yemen,6632,1302,3179.0,2151.0,19,1,20,YEM
92338,2021-05-21,Zambia,92920,1266,91019.0,635.0,166,1,64,ZMB


In [96]:
full_grouped.to_csv('pages/covid.csv')

In [42]:
#feature=['Confirmed','Recovered','Deaths']
color = px.colors.qualitative.Set1
full_grouped = full_grouped[full_grouped["Country/Region"]=='Albania']
temp = full_grouped.groupby(['Date']).agg({'Confirmed': "sum"}).reset_index()
temp1 = full_grouped.groupby(['Date']).agg({'Deaths': "sum"}).reset_index()
        
fig = go.Figure()
fig.add_trace(go.Line(
            x=temp['Date'],
            y=temp['Confirmed'],
            marker=dict(color=color[2]),
            hovertemplate='Date: %{x} <br>Count: %{y:,.2f}',))
fig.add_trace(go.Line(
            x=temp1['Date'],
            y=temp1['Deaths'],
            marker=dict(color=color[2]),
            hovertemplate='Date: %{x} <br>Count: %{y:,.2f}',))
fig


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [None]:
if(graph_type=="Total Count"):
    #barplot to show the changes in the covid 19 cases
        st.subheader('Changes in the covid cases over the world')
        fig = plot_snapshot_numbers(df, px.colors.qualitative.D3)
        st.plotly_chart(fig)

    if(graph_type=="Comparison of countries"):
        st.subheader('Top 10 countries with the highest Covid 19 cases')
        fig = plot_top_countries(df, px.colors.qualitative.D3)
        st.plotly_chart(fig)

        st.subheader('Timeline Comparision of covid 19 growth rate for various countries')
        #multiselect countries
        subset_data = df
        country_name_input = st.multiselect('Select Country Names', df.groupby('Country/Region').count().reset_index()['Country/Region'].tolist())
        # by country name
        if len(country_name_input) > 0:
            subset_data = df[df['Country/Region'].isin(country_name_input)]
    
    
            total_cases_graph  =alt.Chart(subset_data).transform_filter(
            alt.datum.Confirmed > 0  
            ).mark_line().encode(
            x=alt.X('yearquarter(Date)', type='nominal', title='Date'),
            y=alt.Y('sum(Confirmed):Q',  title='Confirmed cases'),
            color='Country/Region',
            tooltip = 'sum(Confirmed)',
            ).properties(
            width=850,
            height=500
            ).configure_axis(
            labelFontSize=17,
            titleFontSize=20
            )

            st.altair_chart(total_cases_graph)

In [None]:
def plot_map(df):
    """
    Function plots a hovering map 
    param df: dataframe
    return map
    """
    fig = px.choropleth(df,                             # Input Dataframe
                     locations="iso_code",           # identify country code column
                     color="Confirmed",                 # identify representing column
                     hover_name="Country/Region",        # identify hover name
                     animation_frame="Date",        # identify date column
                     projection="natural earth",        # select projection
                     color_continuous_scale = 'blues',  # select prefer color scale
                     range_color=[0,10000000]              # select range of dataset
                     )
    fig.update_layout(title="Use the slider to observe the rate of increase of the Covid 19 cases") 

In [None]:
fig = plot_map(full_grouped) 