# Dependencies

In [5]:
pip install plotly

Collecting plotly
[?25l  Downloading https://files.pythonhosted.org/packages/6d/ae/e9f208f59a074f17147295dff2111afec1b0fb9a2285d052e7eed12467c9/plotly-4.5.3-py2.py3-none-any.whl (7.1MB)
[K     |████████████████████████████████| 7.1MB 642kB/s eta 0:00:01
Collecting retrying>=1.3.3
  Downloading https://files.pythonhosted.org/packages/44/ef/beae4b4ef80902f22e3af073397f079c96969c69b2c7d52a57ea9ae61c9d/retrying-1.3.3.tar.gz
Building wheels for collected packages: retrying
  Building wheel for retrying (setup.py) ... [?25ldone
[?25h  Created wheel for retrying: filename=retrying-1.3.3-cp37-none-any.whl size=11429 sha256=f30b222e3b12ba843405dddb1b95017454f62323a76a5ad709deb9216ba315f4
  Stored in directory: /home/jovyan/.cache/pip/wheels/d7/a9/33/acc7b709e2a35caa7d4cae442f6fe6fbf2c43f80823d46460c
Successfully built retrying
Installing collected packages: retrying, plotly
Successfully installed plotly-4.5.3 retrying-1.3.3
Note: you may need to restart the kernel to use updated packages.


In [6]:
pip install datetime

Collecting datetime
[?25l  Downloading https://files.pythonhosted.org/packages/73/22/a5297f3a1f92468cc737f8ce7ba6e5f245fcfafeae810ba37bd1039ea01c/DateTime-4.3-py2.py3-none-any.whl (60kB)
[K     |████████████████████████████████| 61kB 1.5MB/s eta 0:00:01
Collecting zope.interface
[?25l  Downloading https://files.pythonhosted.org/packages/40/e7/7e1060c2826d6b8c25bebbd1c96177f4b85ecfe3e2478d31acf9eaca3a11/zope.interface-4.7.1-cp37-cp37m-manylinux2010_x86_64.whl (169kB)
[K     |████████████████████████████████| 174kB 4.6MB/s eta 0:00:01
Installing collected packages: zope.interface, datetime
Successfully installed datetime-4.3 zope.interface-4.7.1
Note: you may need to restart the kernel to use updated packages.


In [147]:
import os
import pandas as pd
import numpy as np
import plotly
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime as dd
import datetime
pd.set_option('display.max_columns', 5000)

In [149]:
go.Figure()

# Helper Functions

### Automate data pull through git 
Since the data is used in a public repo, I want to 
automate this so I don't have to git pull everyday
    
I followed this guide:
[automate git pulls](https://setapp.com/how-to/show-hidden-files-on-mac)
    
Data pulls from:
[data](https://github.com/CSSEGISandData/COVID-19)

### date_parse
Not all the last_updated timestamps are the same format\
input: str that represents the timestamp\
returns: str that is a date

In [136]:
def date_parse(date_string):
    if len(date_string)>= 18:
        date_string = date_string[0:10]
        date_string = dd.strptime(date_string, '%Y-%m-%d').date()
    else:
        date_string = date_string.split(" ")[0]
        try:
            date_string = dd.strptime(date_string,'%m/%d/%Y').date()
        except:
            date_string = dd.strptime(date_string,'%m/%d/%y').date()
    return date_string

### csv_concat:
    input: str that represents the folder
    returns: df that is a concat of all csvs in that folder


In [137]:
def csv_concat():
    path = '/home/jovyan/work/github/COVID-19/csse_covid_19_data/csse_covid_19_daily_reports/'
    files = os.listdir(path)
    df_list = []
    for csv in files:
        if ('README' in csv or '.gitignore' in csv):
            continue
        
        df = pd.read_csv(path + csv)
        df_list.append(df)
    final = pd.concat(df_list, sort=False)[['Country/Region',
                                            'Confirmed',
                                            'Deaths',
                                            'Recovered', 
                                            'Last Update']]

    final['Last Update'] = final['Last Update'].apply(lambda x : date_parse(x))

    final = final.rename(columns = {'Country/Region':'country',
                                    'Confirmed':'confirmed',
                                    'Deaths':'deaths',
                                    'Recovered':'recovered', 
                                    'Last Update':'last_updated'})
    return final
raw_data = csv_concat()
raw_data

Unnamed: 0,country,confirmed,deaths,recovered,last_updated
0,Mainland China,65187.0,2615.0,20969.0,2020-02-26
1,Mainland China,1347.0,7.0,851.0,2020-02-26
2,Mainland China,1271.0,19.0,1033.0,2020-02-26
3,South Korea,1261.0,12.0,22.0,2020-02-26
4,Mainland China,1205.0,1.0,867.0,2020-02-26
...,...,...,...,...,...
62,US,1.0,0.0,0.0,2020-02-01
63,US,1.0,0.0,0.0,2020-02-01
64,US,1.0,0.0,0.0,2020-02-01
65,US,1.0,0.0,0.0,2020-02-01


#### Raw data grouped by country, and day

In [138]:
daily_country_data = raw_data.groupby(['last_updated','country']).agg({'confirmed':['sum'],
                                                      'deaths':['sum'],
                                                      'recovered':['sum']}).reset_index()
daily_country_data.columns = ['last_updated','country','confirmed','deaths','recovered']

daily_country_data

Unnamed: 0,last_updated,country,confirmed,deaths,recovered
0,2020-01-22,Hong Kong,0.0,0.0,0.0
1,2020-01-22,Japan,2.0,0.0,0.0
2,2020-01-22,Macau,1.0,0.0,0.0
3,2020-01-22,Mainland China,547.0,17.0,28.0
4,2020-01-22,South Korea,1.0,0.0,0.0
...,...,...,...,...,...
928,2020-03-07,Togo,1.0,0.0,0.0
929,2020-03-07,UK,206.0,2.0,18.0
930,2020-03-07,US,311.0,16.0,5.0
931,2020-03-07,United Arab Emirates,45.0,0.0,7.0


#### Raw data grouped by day

In [139]:
daily_data = data.groupby(['last_updated']).agg({'confirmed':['sum'],
                                                      'deaths':['sum'],
                                                      'recovered':['sum']}).reset_index()
daily_data.columns = ['last_updated','confirmed','deaths','recovered']

daily_data

Unnamed: 0,last_updated,confirmed,deaths,recovered
0,2020-01-22,555.0,17.0,28.0
1,2020-01-23,653.0,18.0,30.0
2,2020-01-24,941.0,26.0,36.0
3,2020-01-25,1438.0,42.0,39.0
4,2020-01-26,2118.0,56.0,52.0
...,...,...,...,...
41,2020-03-03,94682.0,3176.0,48705.0
42,2020-03-04,93948.0,3248.0,50728.0
43,2020-03-05,97144.0,3346.0,53877.0
44,2020-03-06,103222.0,3463.0,56712.0


# Cases Confirmed, Death Toll, Recovered Count

In [148]:
fig = go.Figure()
fig.add_scatter(x=daily_data['last_updated'], y=daily_data['confirmed'], name = 'confirmed')
fig.add_scatter(x=daily_data['last_updated'], y=daily_data['deaths'], name = 'deaths')
fig.add_scatter(x=daily_data['last_updated'], y=daily_data['recovered'], name = 'recovered')

fig.update_layout(
    title = 'Covid-19 Overall Tracker',
    xaxis_title = 'Day',
    yaxis_title = 'People')

fig.show()

# Metric by Country

In [142]:
daily_country_data # this is the raw data grouped by day for each country

def metric_df(metric):
    final_df = daily_country_data[['last_updated','country', metric]]
    final_df = final_df.pivot(index = 'last_updated', columns = 'country', values = metric)
    return final_df.fillna(0)
metric_df('confirmed')

country,Azerbaijan,Afghanistan,Algeria,Andorra,Argentina,Armenia,Australia,Austria,Azerbaijan,Bahrain,Belarus,Belgium,Bhutan,Bosnia and Herzegovina,Brazil,Cambodia,Cameroon,Canada,Chile,Colombia,Costa Rica,Croatia,Czech Republic,Denmark,Dominican Republic,Ecuador,Egypt,Estonia,Faroe Islands,Finland,France,French Guiana,Georgia,Germany,Gibraltar,Greece,Hong Kong,Hungary,Iceland,India,Indonesia,Iran,Iraq,Ireland,Israel,Italy,Ivory Coast,Japan,Jordan,Kuwait,Latvia,Lebanon,Liechtenstein,Lithuania,Luxembourg,Macau,Mainland China,Malaysia,Malta,Martinique,Mexico,Monaco,Morocco,Nepal,Netherlands,New Zealand,Nigeria,North Ireland,North Macedonia,Norway,Oman,Others,Pakistan,Palestine,Peru,Philippines,Poland,Portugal,Qatar,Romania,Russia,Saint Barthelemy,San Marino,Saudi Arabia,Senegal,Serbia,Singapore,Slovakia,Slovenia,South Africa,South Korea,Spain,Sri Lanka,Sweden,Switzerland,Taiwan,Thailand,Togo,Tunisia,UK,US,Ukraine,United Arab Emirates,Vatican City,Vietnam
last_updated,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,547.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,639.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0
2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,916.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,3.0,5.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0
2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1399.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,3.0,7.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0
2020-01-26,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2062.0,4.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,4.0,8.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-03-03,0.0,0.0,5.0,0.0,1.0,0.0,35.0,21.0,0.0,0.0,0.0,13.0,0.0,0.0,0.0,0.0,0.0,47.0,1.0,0.0,0.0,9.0,5.0,6.0,0.0,7.0,0.0,4.0,0.0,0.0,204.0,0.0,0.0,196.0,0.0,0.0,100.0,0.0,11.0,0.0,0.0,2336.0,32.0,2.0,12.0,2502.0,0.0,293.0,5.0,0.0,0.0,0.0,0.0,0.0,3.0,30.0,80566.0,36.0,0.0,0.0,15.0,0.0,2.0,0.0,24.0,0.0,0.0,0.0,0.0,32.0,12.0,2118.0,15.0,0.0,0.0,0.0,0.0,2.0,7.0,3.0,0.0,0.0,10.0,0.0,2.0,0.0,220.0,0.0,0.0,0.0,5186.0,165.0,0.0,21.0,56.0,84.0,0.0,0.0,0.0,51.0,116.0,5.0,54.0,0.0,0.0
2020-03-04,0.0,0.0,24.0,0.0,2.0,0.0,66.0,29.0,0.0,52.0,24.0,23.0,0.0,0.0,8.0,0.0,0.0,12.0,1.0,0.0,0.0,20.0,8.0,20.0,0.0,10.0,0.0,0.0,4.0,0.0,285.0,0.0,0.0,262.0,4.0,9.0,105.0,2.0,26.0,28.0,0.0,2922.0,70.0,12.0,15.0,3089.0,0.0,331.0,0.0,0.0,4.0,13.0,4.0,0.0,0.0,0.0,80031.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,38.0,6.0,0.0,0.0,0.0,56.0,15.0,0.0,0.0,0.0,0.0,0.0,2.0,5.0,32.0,4.0,0.0,12.0,16.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,5621.0,222.0,0.0,35.0,90.0,0.0,0.0,0.0,4.0,85.0,74.0,0.0,0.0,0.0,0.0
2020-03-05,0.0,0.0,0.0,0.0,0.0,0.0,68.0,41.0,12.0,55.0,0.0,50.0,0.0,4.0,0.0,0.0,0.0,38.0,12.0,0.0,0.0,0.0,12.0,0.0,0.0,39.0,3.0,3.0,0.0,12.0,377.0,0.0,12.0,482.0,0.0,31.0,105.0,4.0,34.0,30.0,0.0,3513.0,0.0,0.0,16.0,3858.0,0.0,360.0,0.0,116.0,0.0,16.0,0.0,0.0,0.0,0.0,80414.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,82.0,0.0,0.0,0.0,0.0,87.0,48.0,0.0,0.0,4.0,0.0,0.0,0.0,8.0,0.0,6.0,4.0,0.0,42.0,15.0,0.0,0.0,117.0,0.0,2.0,1.0,6088.0,259.0,0.0,94.0,114.0,44.0,47.0,0.0,0.0,115.0,186.0,0.0,58.0,0.0,0.0
2020-03-06,0.0,0.0,34.0,0.0,2.0,0.0,40.0,55.0,0.0,60.0,0.0,109.0,2.0,0.0,26.0,0.0,2.0,47.0,0.0,1.0,1.0,11.0,18.0,46.0,4.0,0.0,30.0,20.0,0.0,30.0,653.0,0.0,0.0,670.0,0.0,45.0,107.0,0.0,43.0,31.0,8.0,4747.0,40.0,36.0,42.0,4636.0,0.0,420.0,0.0,0.0,0.0,44.0,0.0,0.0,4.0,20.0,81348.0,83.0,0.0,0.0,12.0,0.0,0.0,0.0,128.0,4.0,0.0,0.0,6.0,108.0,0.0,1392.0,12.0,16.0,2.0,5.0,10.0,13.0,0.0,9.0,26.0,0.0,0.0,0.0,0.0,2.0,130.0,2.0,14.0,2.0,6593.0,400.0,0.0,101.0,214.0,90.0,48.0,1.0,0.0,163.0,202.0,0.0,0.0,2.0,0.0


In [143]:
country_recovered = metric_df('recovered')
country_confirmed = metric_df('confirmed')
country_deaths = metric_df('deaths')

In [144]:
def plot(data):
    fig = go.Figure()
    for country in list(data.columns):
        try:
            fig.add_scatter(x=data.index, y=data[country], name = country)
        except:
            print(country)
        


In [146]:
plot(country_confirmed)