In [1]:
import numpy as np
import pandas as pd
from pandas_summary import DataFrameSummary

import matplotlib as plt
import seaborn as sns

import plotly.express as px
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot 

init_notebook_mode(connected=True)
%matplotlib inline 

PLOT_PATH = '/Users/andang/Documents/workspace/private/python/COVID-19-Dashboard/html-graphs/'

# Loading Data 

In [2]:
ts_covid_confirmed = pd.read_csv(
    '/Users/andang/Documents/workspace/private/python/COVID-19-Dashboard/data/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
ts_covid_deaths = pd.read_csv(
    '/Users/andang/Documents/workspace/private/python/COVID-19-Dashboard/data/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
ts_covid_recovered = pd.read_csv(
    '/Users/andang/Documents/workspace/private/python/COVID-19-Dashboard/data/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

daily_report = pd.read_csv(
    '/Users/andang/Documents/workspace/private/python/COVID-19-Dashboard/data/COVID-19/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv')

# Functions

# Preprocessing

In [3]:
def sum_country_region(df):
    return df.groupby(df['Country/Region'], as_index=False).sum()


def header_to_row(df):
    return df.melt(id_vars=['Country/Region', 'Lat', 'Long'],
                   var_name='Date', value_name='Value').sort_values(by='Value', ascending=False)


def top_10_countries(df):
    grouped_df = df.groupby(df['Country/Region'],
               as_index=False).sum().sort_values(by='Value', ascending=False)[:10]
    
    return grouped_df['Country/Region'].values.tolist()


# Statistics

In [4]:
# Transformation recovered
transformed_covid_recov = header_to_row(sum_country_region(ts_covid_recovered))

# Top 10 recovered countries
countries = top_10_countries(transformed_covid_recov)

# Transformation confirmed
transformed_covid_conf = header_to_row(sum_country_region(ts_covid_confirmed))

# Top 10 confirmed countries
countries = top_10_countries(transformed_covid_conf)

In [12]:
top_recov = transformed_covid_recov.groupby(
    transformed_covid_recov['Country/Region'], as_index=False).sum().sort_values(by='Value', ascending=False)

transformed_covid_recov

Unnamed: 0,Country/Region,Lat,Long,Date,Value
13249,China,1083.3367,3684.4197,4/4/20,76946
13068,China,1083.3367,3684.4197,4/3/20,76760
12887,China,1083.3367,3684.4197,4/2/20,76565
12706,China,1083.3367,3684.4197,4/1/20,76405
12525,China,1083.3367,3684.4197,3/31/20,76206
...,...,...,...,...,...
4468,Nigeria,9.0820,8.6753,2/15/20,0
4469,North Macedonia,41.6086,21.7453,2/15/20,0
4470,Norway,60.4720,8.4689,2/15/20,0
4471,Oman,21.0000,57.0000,2/15/20,0


In [6]:
top_conf = transformed_covid_conf.groupby(
    transformed_covid_conf['Country/Region'], as_index=False).sum().sort_values(by='Value', ascending=False)

top_conf[:10]

Unnamed: 0,Country/Region,Lat,Long,Value
36,China,80166.9158,272647.0578,4516642
169,US,2744.6748,-7082.7546,2128229
84,Italy,3182.0,888.0,1681364
154,Spain,2960.0,-296.0,1204247
65,Germany,3774.0,666.0,938675
80,Iran,2368.0,3922.0,734551
61,France,3291.3128,8586.5974,684540
173,United Kingdom,19982.2126,-35736.4278,310945
90,"Korea, South",2664.0,9472.0,297163
159,Switzerland,3464.5468,608.835,231843


In [7]:
# Get top 10 conf Countries as list
top_10_conf_list = top_10_countries(top_conf)

# Filter recovered Countries by top 10 conf
tmp_1 = top_recov[top_recov['Country/Region'].isin(top_10_conf_list)]
tmp_1

Unnamed: 0,Country/Region,Lat,Long,Value
36,China,80166.9158,272647.0578,2692653
80,Iran,2368.0,3922.0,242549
84,Italy,3182.0,888.0,221595
154,Spain,2960.0,-296.0,217056
65,Germany,3774.0,666.0,160356
61,France,3291.3128,8586.5974,106320
90,"Korea, South",2664.0,9472.0,78011
169,US,2744.6748,-7082.7546,61652
159,Switzerland,3464.5468,608.835,27308
173,United Kingdom,19982.2126,-35736.4278,2835


In [8]:
# combine the values in one df

top_conf['recovered'] = tmp_1['Value']
top_conf['recovered_ratio'] = top_conf.recovered/top_conf.Value*100

top_conf[:10]

Unnamed: 0,Country/Region,Lat,Long,Value,recovered,recovered_ratio
36,China,80166.9158,272647.0578,4516642,2692653.0,59.616259
169,US,2744.6748,-7082.7546,2128229,61652.0,2.896869
84,Italy,3182.0,888.0,1681364,221595.0,13.179478
154,Spain,2960.0,-296.0,1204247,217056.0,18.024209
65,Germany,3774.0,666.0,938675,160356.0,17.083229
80,Iran,2368.0,3922.0,734551,242549.0,33.020035
61,France,3291.3128,8586.5974,684540,106320.0,15.531598
173,United Kingdom,19982.2126,-35736.4278,310945,2835.0,0.911737
90,"Korea, South",2664.0,9472.0,297163,78011.0,26.251922
159,Switzerland,3464.5468,608.835,231843,27308.0,11.778661


In [9]:
top_conf.drop(['Lat','Long','Value','recovered'], axis=1)[:10].reset_index(drop=True).round(1)

Unnamed: 0,Country/Region,recovered_ratio
0,China,59.6
1,US,2.9
2,Italy,13.2
3,Spain,18.0
4,Germany,17.1
5,Iran,33.0
6,France,15.5
7,United Kingdom,0.9
8,"Korea, South",26.3
9,Switzerland,11.8


# Plots

## Recovered

In [26]:
# Transformation
transformed_covid_recov = header_to_row(sum_country_region(ts_covid_recovered))

# Top 10 recovered countries
countries = top_10_countries(transformed_covid_recov)

### Top 10 Countries

In [27]:
fig = go.Figure()

for country in countries:
    recov_country = transformed_covid_recov[transformed_covid_recov['Country/Region'] == country].sort_index()
    
    fig.add_trace(go.Scatter(
        x=recov_country.Date,
        y=recov_country.Value,
        name=country,
        opacity=0.8,))

# Layout update 
fig.update_layout(title_text="Top 10 recovered countries")

# Reversed xaxis
#fig['layout']['xaxis']['autorange'] = "reversed"

fig.show()

plot(fig, filename=PLOT_PATH + 'Top 10 recovered countries.html', auto_open=False)

'/Users/andang/Documents/workspace/private/python/COVID-19-Dashboard/html-graphs/Top 10 recovered countries.html'

### Germany

In [13]:
fig = go.Figure()
countries = ['Germany']
for country in countries:
    recov_country = transformed_covid_recov[transformed_covid_recov['Country/Region'] == country].sort_index()
    fig.add_trace(go.Scatter(
        x=recov_country.Date,
        y=recov_country.Value,
        name=country,
        opacity=0.8,))

# Layout update 
fig.update_layout(title_text="Recovered in Germany")

# Reversed xaxis
#fig['layout']['xaxis']['autorange'] = "reversed"

fig.show()
# plot(fig, filename=PLOT_PATH + 'Recovered in Germany.html', auto_open=False)

In [14]:
recov_country = transformed_covid_recov[transformed_covid_recov['Country/Region'] == 'Germany']

## Confirmed

## Confirmed cases

In [15]:
# Transformation
transformed_covid_conf = header_to_row(sum_country_region(ts_covid_confirmed))

# Top 10 recovered countries
countries = top_10_countries(transformed_covid_conf)

In [16]:
fig = go.Figure()

for country in countries:
    conf_country = transformed_covid_conf[transformed_covid_conf['Country/Region'] == country].sort_index()
    
    fig.add_trace(go.Scatter(
        x=conf_country.Date,
        y=conf_country.Value,
        name=country,
        opacity=0.8,))

# Layout update 
fig.update_layout(title_text="Top 10 countries with confirmed cases")

# Reversed xaxis
#fig['layout']['xaxis']['autorange'] = "reversed"

fig.show()
plot(fig, filename=PLOT_PATH + 'Top 10 countries with confirmed cases.html', auto_open=False)

'/Users/andang/Documents/workspace/private/python/COVID-19-Dashboard/html-graphs/Top 10 countries with confirmed cases.html'

# All cases by countries

In [29]:
# Transformation
transformed_covid_conf = header_to_row(sum_country_region(ts_covid_confirmed))
transformed_covid_recov = header_to_row(sum_country_region(ts_covid_recovered))
transformed_covid_death = header_to_row(sum_country_region(ts_covid_deaths))


In [30]:
transformed_covid_conf['label'] = 'confirmed'
transformed_covid_recov['label'] = 'recovered'
transformed_covid_death['label'] = 'death'

In [35]:
fig = go.Figure()

countries = ['Germany']

for country in countries:
    conf_country = transformed_covid_conf[transformed_covid_conf['Country/Region'] == country].sort_index()
    recov_country = transformed_covid_recov[transformed_covid_recov['Country/Region'] == country].sort_index()
    death_country = transformed_covid_death[transformed_covid_death['Country/Region'] == country].sort_index()
    
    confirmed = 'confirmed in '+ country
    recovered = 'recovered in '+ country
    death = 'death in '+ country
    
    fig.add_trace(go.Scatter(
        x=conf_country.Date,
        y=conf_country.Value,
        name=confirmed,
        opacity=0.8))
    
    fig.add_trace(go.Scatter(
        x=recov_country.Date,
        y=recov_country.Value,
        name=recovered,
        opacity=0.8,))

    fig.add_trace(go.Scatter(
        x=death_country.Date,
        y=death_country.Value,
        name=death,
        opacity=0.8,))
    
    # Layout update 
fig.update_layout(title_text="Germany cases")

fig.show()

plot(fig, filename=PLOT_PATH + 'Germany cases.html', auto_open=False)

'/Users/andang/Documents/workspace/private/python/COVID-19-Dashboard/html-graphs/Germany cases.html'

In [36]:
fig = go.Figure()

countries = ['Vietnam']

for country in countries:
    conf_country = transformed_covid_conf[transformed_covid_conf['Country/Region'] == country].sort_index()
    recov_country = transformed_covid_recov[transformed_covid_recov['Country/Region'] == country].sort_index()
    death_country = transformed_covid_death[transformed_covid_death['Country/Region'] == country].sort_index()
    
    confirmed = 'confirmed in '+ country
    recovered = 'recovered in '+ country
    death = 'death in '+ country
    
    fig.add_trace(go.Scatter(
        x=conf_country.Date,
        y=conf_country.Value,
        name=confirmed,
        opacity=0.8))
    
    fig.add_trace(go.Scatter(
        x=recov_country.Date,
        y=recov_country.Value,
        name=recovered,
        opacity=0.8,))

    fig.add_trace(go.Scatter(
        x=death_country.Date,
        y=death_country.Value,
        name=death,
        opacity=0.8,))

# Layout update 
fig.update_layout(title_text="Vietnam cases")

fig.show()

plot(fig, filename=PLOT_PATH + 'Vietnam cases.html', auto_open=False)

'/Users/andang/Documents/workspace/private/python/COVID-19-Dashboard/html-graphs/Vietnam cases.html'

# USA

In [2]:
ts_covid_confirmed_usa = pd.read_csv(
    '/Users/andang/Documents/workspace/private/python/COVID-19-Dashboard/data/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv')
ts_covid_deaths_usa = pd.read_csv(
    '/Users/andang/Documents/workspace/private/python/COVID-19-Dashboard/data/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv')


In [13]:
def sum_province_state(df):
    return df.groupby(df['Province_State'], as_index=False).sum()


def header_to_row_USA(df):
    df.drop(['UID', 'code3', 'FIPS', 'Lat', 'Long_'], axis=1, inplace= True)
    return df.melt(id_vars=['Province_State'],
                   var_name='Date', value_name='Value').sort_values(by='Value', ascending=False)


def top_10_province_state(df):
    grouped_df = df.groupby(df['Province_State'],
               as_index=False).sum().sort_values(by='Value', ascending=False)[:10]
    
    return grouped_df['Province_State'].values.tolist()

In [14]:
transformed_covid_conf_usa = header_to_row_USA(sum_province_state(ts_covid_confirmed_usa))
states = top_10_province_state(transformed_covid_conf_usa)

In [15]:
transformed_covid_conf_usa.groupby(['Province_State', 'Date'], as_index=False).sum().sort_values(by='Value', ascending=False)[:10]

Unnamed: 0,Province_State,Date,Value
2663,New York,4/2/20,92506
2662,New York,4/1/20,83948
2655,New York,3/31/20,75833
2654,New York,3/30/20,66663
2652,New York,3/29/20,59648
2651,New York,3/28/20,52410
2650,New York,3/27/20,44876
2649,New York,3/26/20,37877
2648,New York,3/25/20,30841
2647,New York,3/24/20,25681


In [17]:
fig = go.Figure()

for state in states:
    conf_states = transformed_covid_conf_usa[transformed_covid_conf_usa['Province_State'] == state].sort_index()
    
    fig.add_trace(go.Scatter(
        x=conf_states.Date,
        y=conf_states.Value,
        name=state,
        opacity=0.8,))

# Layout update 
fig.update_layout(title_text="Confirmed cases in USA by States")

# Reversed xaxis
#fig['layout']['xaxis']['autorange'] = "reversed"

fig.show()
plot(fig, filename=PLOT_PATH + 'Confirmed cases in USA by States.html', auto_open=False)

'/Users/andang/Documents/workspace/private/python/COVID-19-Dashboard/html-graphs/Confirmed cases in USA by States.html'

In [9]:
transformed_covid_conf_usa[(transformed_covid_conf_usa['Province_State'] == 'New York') & (transformed_covid_conf_usa['Date'] == '3/30/20')]

Unnamed: 0,Province_State,Date,Value
223067,New York,3/30/20,37453
223096,New York,3/30/20,9326
223066,New York,3/30/20,7344
223088,New York,3/30/20,5791
223080,New York,3/30/20,2511
...,...,...,...
223079,New York,3/30/20,0
223086,New York,3/30/20,0
223098,New York,3/30/20,0
224437,New York,3/30/20,0


In [70]:
conf_states.Province_State.values

NameError: name 'unique' is not defined