### Read And Understand World Data

In [1]:
import pandas as pd

In [70]:
full_world_df = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv')
cols_world_df = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-codebook.csv')

In [3]:
full_world_df.shape

(47116, 41)

In [4]:
cols_world_df

Unnamed: 0,column,description,source
0,iso_code,ISO 3166-1 alpha-3 – three-letter country codes,International Organization for Standardization
1,continent,Continent of the geographical location,Our World in Data
2,location,Geographical location,Our World in Data
3,date,Date of observation,Our World in Data
4,total_cases,Total confirmed cases of COVID-19,European Centre for Disease Prevention and Con...
5,new_cases,New confirmed cases of COVID-19,European Centre for Disease Prevention and Con...
6,new_cases_smoothed,New confirmed cases of COVID-19 (7-day smoothed),European Centre for Disease Prevention and Con...
7,total_deaths,Total deaths attributed to COVID-19,European Centre for Disease Prevention and Con...
8,new_deaths,New deaths attributed to COVID-19,European Centre for Disease Prevention and Con...
9,new_deaths_smoothed,New deaths attributed to COVID-19 (7-day smoot...,European Centre for Disease Prevention and Con...


Line Graph Columns = [total_cases, new_cases, total_deaths, new_deaths, total_test, new_tests, tests_per_case, positive_rate ]\
    - Line Graph Options: location -- Dropdown, per capita or total -- Radio Buttons

In [5]:
full_world_df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,ABW,North America,Aruba,2020-03-13,2.0,2.0,,0.0,0.0,,...,35973.781,,,11.62,,,,,76.29,
1,ABW,North America,Aruba,2020-03-19,,,0.286,,,0.0,...,35973.781,,,11.62,,,,,76.29,
2,ABW,North America,Aruba,2020-03-20,4.0,2.0,0.286,0.0,0.0,0.0,...,35973.781,,,11.62,,,,,76.29,
3,ABW,North America,Aruba,2020-03-21,,,0.286,,,0.0,...,35973.781,,,11.62,,,,,76.29,
4,ABW,North America,Aruba,2020-03-22,,,0.286,,,0.0,...,35973.781,,,11.62,,,,,76.29,


### Read And Understand United States Data

In [6]:
full_usa_df = pd.read_csv('https://api.covidtracking.com/v1/states/daily.csv')

In [7]:
full_usa_df.head()

Unnamed: 0,date,state,positive,negative,pending,totalTestResults,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,...,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade
0,20200929,AK,8674.0,442382.0,,451056.0,49.0,,,,...,451056,0,0,afd7a94cb0564b8c08d0c7d5a35034687ec529f9,0,0,0,0,0,
1,20200929,AL,153554.0,983163.0,,1119712.0,773.0,17182.0,,1802.0,...,1136717,16,91,f5cbe33a7d909f209064f42c91980edc96c86a63,0,0,0,0,0,
2,20200929,AR,82755.0,927902.0,,1007905.0,491.0,5354.0,231.0,,...,1010657,21,106,9674fdb795a3ec95f1a8018f9c8115611270bd2a,0,0,0,0,0,
3,20200929,AS,0.0,1571.0,,1571.0,,,,,...,1571,0,0,713455ea3b0b8f55ade2e9e1ed521e7e7e43ff81,0,0,0,0,0,
4,20200929,AZ,218184.0,1239906.0,,1453528.0,540.0,22047.0,119.0,,...,1458090,9,28,aff6e79d8385633b46e9e237ccc3ec4b39f3138e,0,0,0,0,0,


In [8]:
full_usa_df.shape

(11746, 54)

### Preprocessing United States Data

In [9]:
imp_features = ['date','state','death','deathIncrease','lastUpdateEt','negative','onVentilatorCurrently','positive','positiveIncrease','recovered','totalTestResults','totalTestResultsIncrease']

In [10]:
full_usa_df = full_usa_df[imp_features]
full_usa_df.rename(columns={'death':'Total Deaths','deathIncrease':'New Deaths','negative':'Total Negative Cases','onVentilatorCurrently':'Total Currently On Ventilator','positive':'Total Positive Cases','positiveIncrease':'New Positive Cases','recovered':'Total Recovered','totalTestResults':'Total Test Results','totalTestResultsIncrease':'New Test Results'}, inplace=True)

In [11]:
# Dict from this repo: https://gist.github.com/rogerallen/1583593
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

full_usa_df = full_usa_df[full_usa_df['state'].isin(us_state_abbrev.values())]

#flip dictionary
us_state_abbrev_dict = {value : key for (key, value) in us_state_abbrev.items()}

In [12]:
def get_state_df(df, state):
    return df[df['state'] == state]

In [13]:
dropdownoptions = ['Total Positive Cases','New Positive Cases','Total Deaths','New Deaths','Total Currently On Ventilator','Total Negative Cases','Total Recovered','Total Test Results','New Test Results']

In [14]:
import datetime
import time

full_usa_df['date'] = pd.to_datetime(full_usa_df['date'], format='%Y%m%d')

def convert_to_unixtime(dt):
    return int(time.mktime(dt.timetuple()))

def convert_to_datetime(dt):
    return datetime.datetime.fromtimestamp(int(dt)).strftime('%Y-%m-%d %H:%M:%S')

In [15]:
full_usa_df.head()

Unnamed: 0,date,state,Total Deaths,New Deaths,lastUpdateEt,Total Negative Cases,Total Currently On Ventilator,Total Positive Cases,New Positive Cases,Total Recovered,Total Test Results,New Test Results
0,2020-09-29,AK,56.0,0,9/29/2020 03:59,442382.0,7.0,8674.0,125,4324.0,451056.0,6265
1,2020-09-29,AL,2517.0,16,9/29/2020 11:00,983163.0,,153554.0,571,64583.0,1119712.0,3366
2,2020-09-29,AR,1350.0,21,9/29/2020 00:00,927902.0,98.0,82755.0,706,74440.0,1007905.0,10207
4,2020-09-29,AZ,5632.0,9,9/29/2020 00:00,1239906.0,64.0,218184.0,674,35125.0,1453528.0,4816
5,2020-09-29,CA,15640.0,32,9/29/2020 02:59,13806120.0,,807425.0,2162,,14613545.0,128693


In [16]:
usa_df = full_usa_df

In [17]:
usa_df.date.max()

Timestamp('2020-09-29 00:00:00')

In [18]:
usa_table_df = usa_df.loc[usa_df.date == usa_df.date.max()]

### Preprocessing World Data

In [72]:
important_cols = ['total_cases','new_cases','new_cases_smoothed','total_deaths','new_deaths','new_deaths_smoothed','total_tests','new_tests','new_tests_smoothed','total_cases_per_million','new_cases_per_million','new_cases_smoothed_per_million','total_deaths_per_million','new_deaths_per_million','new_deaths_smoothed_per_million','total_tests_per_thousand','new_tests_per_thousand','new_tests_smoothed_per_thousand']
MAIN_cols = ['Total Cases','New Cases', 'Total Deaths', 'New Deaths', 'Total Tests', 'New Tests', 'Total Cases Per Million', 'New Cases Per Million', 'Total Deaths Per Million', 'New Deaths Per Million', 'Total Tests Per Thousand', 'New Tests Per Thousand']
lg_cols = ['total_cases','new_cases_smoothed','total_deaths','new_deaths_smoothed','total_tests','new_tests_smoothed','total_cases_per_million','new_cases_smoothed_per_million','total_deaths_per_million','new_deaths_smoothed_per_million','total_tests_per_thousand','new_tests_smoothed_per_thousand']
tb_cols = ['total_cases','new_cases','total_deaths','new_deaths','total_tests','new_tests','total_cases_per_million','new_cases_per_million','total_deaths_per_million','new_deaths_per_million','total_tests_per_thousand','new_tests_per_thousand']
total_line_graph_cols = ['Total Cases','New Cases', 'Total Deaths', 'New Deaths', 'Total Tests', 'New Tests']
per_capita_line_graph_cols = ['Total Cases Per Million', 'New Cases Per Million', 'Total Deaths Per Million', 'New Deaths Per Million', 'Total Tests Per Thousand', 'New Tests Per Thousand']
MAIN_total_cols = ['Date', 'Country'] + MAIN_cols

In [79]:
# Only use countries that recorded enough data, change 'date' and 'location' column names, and forward fill na values
def get_important_countries(countries_list):
    cl=[]
    for country in countries_list:
        if len(full_world_df.loc[full_world_df.location == country]) > 250:
            cl.append(country)
    return cl

countries_list = full_world_df['location'].unique()
imp_countries = get_important_countries(countries_list)

world_df = full_world_df.loc[full_world_df.location.isin(imp_countries)]
world_df.rename(columns={'date':'Date', 'location':'Country'}, inplace=True)

In [80]:
#Create DF for Line Graphs
line_world_df = world_df.rename(columns={i:j for i,j in zip(lg_cols, MAIN_cols)})
line_world_df = line_world_df[MAIN_total_cols]

def get_country_df(country):
    return line_world_df[line_world_df['Country'] == country]

In [81]:
#Create DF for World Table
world_table_df = world_df.rename(columns={i:j for i,j in zip(tb_cols, MAIN_cols)})
world_table_df = world_table_df[MAIN_total_cols]
world_table_df = world_table_df.loc[world_table_df.Date == world_table_df.Date.max()]

In [82]:
len(world_table_df)

66

In [83]:
world_table_df.head(15)

Unnamed: 0,Date,Country,Total Cases,New Cases,Total Deaths,New Deaths,Total Tests,New Tests,Total Cases Per Million,New Cases Per Million,Total Deaths Per Million,New Deaths Per Million,Total Tests Per Thousand,New Tests Per Thousand
471,2020-09-30,Afghanistan,39254.0,15.0,1458.0,2.0,,,1008.366,0.385,37.453,0.051,,
1540,2020-09-30,United Arab Emirates,93090.0,995.0,416.0,3.0,9602111.0,98412.0,9412.157,100.603,42.061,0.303,970.852,9.95
2048,2020-09-30,Armenia,49901.0,327.0,958.0,1.0,,,16840.047,110.352,323.295,0.337,,
2518,2020-09-30,Australia,27063.0,19.0,882.0,7.0,7589332.0,26010.0,1061.299,0.745,34.588,0.275,297.622,1.02
2793,2020-09-30,Austria,44607.0,722.0,796.0,6.0,1617987.0,31409.0,4952.811,80.165,88.382,0.666,179.649,3.487
3068,2020-09-30,Azerbaijan,40119.0,58.0,590.0,2.0,,,3956.831,5.72,58.19,0.197,,
3526,2020-09-30,Belgium,117021.0,15.0,10001.0,0.0,3198628.0,27182.0,10097.056,1.294,862.928,0.0,275.991,2.345
4799,2020-09-30,Bahrain,70422.0,574.0,246.0,1.0,1429432.0,11303.0,41386.168,337.333,144.571,0.588,840.06,6.643
5475,2020-09-30,Belarus,78260.0,314.0,828.0,6.0,1792426.0,7315.0,8282.077,33.23,87.625,0.635,189.688,0.774
6339,2020-09-30,Brazil,4777522.0,32058.0,142921.0,863.0,6421441.0,,22476.173,150.819,672.381,4.06,30.21,


### Dash Application

In [112]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px

In [113]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

covidapp = dash.Dash(__name__, external_stylesheets=external_stylesheets)

covidapp.layout = html.Div([
    html.H1('Covid Dashboard', style={'text-align':'center'}),
    
    dcc.Tabs([
        
        dcc.Tab(label='World', children=[
    
        html.Div([
            html.P('Select Country: '),
        
            dcc.Dropdown(
            id='country',
            options=[{'label': i, 'value': i} for i in imp_countries],
            value='United States'
            )
        
        ],style={'width':'40%', 'display':'inline-block'}),
        
        html.Div([
            html.P('Select Metric: '),
        
            dcc.RadioItems(
            id='total_percapita',
            options=[{'label': i, 'value': i} for i in ['Total', 'Per Capita']],
            value='Total',
            labelStyle={'display':'inline-block','text-align':'justify'}
            )
        ],style={'width':'40%','display': 'inline-block', 'float':'right'}),

        dcc.Graph(id='totalcases_line'),
        html.P('Source: European Centre for Disease Prevention and Control', style={'fontSize':10,'text-align':'center'}),
        dcc.Graph(id='newcases_line'),
        html.P('Source: European Centre for Disease Prevention and Control', style={'fontSize':10,'text-align':'center'}),
        dcc.Graph(id='totaldeaths_line'),
        html.P('Source: European Centre for Disease Prevention and Control', style={'fontSize':10,'text-align':'center'}),
        dcc.Graph(id='newdeaths_line'),
        html.P('Source: European Centre for Disease Prevention and Control', style={'fontSize':10,'text-align':'center'}),
        dcc.Graph(id='totaltests_line'),
        html.P('Source: National government reports', style={'fontSize':10,'text-align':'center'}),
        dcc.Graph(id='newtests_line'),
        html.P('Source: National government reports', style={'fontSize':10,'text-align':'center'})
    
        ]),
    
        dcc.Tab(label='United States', children=[
            
            html.Div([
                dcc.Dropdown(
                id='choropleth_dropdown',
                options=[{'label': i, 'value': i} for i in dropdownoptions],
                value='Total Positive Cases'
                )
            ]),
            
            dcc.Graph(id='choropleth'),
            
            html.Div([
                dcc.Slider(
                    id='choropleth_slider',
                    min=convert_to_unixtime(usa_df['date'].min()),
                    max=convert_to_unixtime(usa_df['date'].max()),
                    value=convert_to_unixtime(usa_df['date'].max()),
                    step=86400
                )
            ]),
            html.Div(id='selected_date'),
            
            html.Div([
                dcc.Dropdown(
                    id='state',
                    options=[{'label': i, 'value': i} for i in us_state_abbrev_dict.values()],
                    value='New Jersey'
                )
            ]),

            dcc.Graph(id='total_positive_line'),
            dcc.Graph(id='new_positive_line'),
            dcc.Graph(id='total_death_line'),
            dcc.Graph(id='new_death_line'),
            dcc.Graph(id='total_ventilator_line'),
            dcc.Graph(id='total_negative_line'),
            dcc.Graph(id='total_recovered_line'),
            dcc.Graph(id='total_test_line'),
            dcc.Graph(id='new_test_line')
            
        ])
        
    ])
])

@covidapp.callback(
    [Output('totalcases_line', 'figure'),
     Output('newcases_line', 'figure'),
     Output('totaldeaths_line', 'figure'),
     Output('newdeaths_line', 'figure'),
     Output('totaltests_line', 'figure'),
     Output('newtests_line', 'figure')],
    [Input('country','value'),
     Input('total_percapita', 'value')])

def update_world_graphs(country, total_percapita):
    country_df = get_country_df(country)
    world_line_graphs = []
    
    if total_percapita == 'Total':
    
        for option in total_line_graph_cols:
            world_line_graphs.append(px.line(country_df, x='Date', y=option, title=f'{country} {option}'))
    
        return world_line_graphs[0], world_line_graphs[1], world_line_graphs[2], world_line_graphs[3], world_line_graphs[4], world_line_graphs[5]
    
    else:
        
        for option in per_capita_line_graph_cols:
            world_line_graphs.append(px.line(country_df, x='Date', y=option, title=f'{country} {option}'))
            
        return world_line_graphs[0], world_line_graphs[1], world_line_graphs[2], world_line_graphs[3], world_line_graphs[4], world_line_graphs[5]
        
        
@covidapp.callback(
    [Output('choropleth', 'figure'),
     Output('selected_date', 'children'),
     Output('total_positive_line', 'figure'),
     Output('new_positive_line', 'figure'),
     Output('total_death_line', 'figure'),
     Output('new_death_line', 'figure'),
     Output('total_ventilator_line', 'figure'),
     Output('total_negative_line', 'figure'),
     Output('total_recovered_line', 'figure'),
     Output('total_test_line', 'figure'),
     Output('new_test_line', 'figure')],
    [Input('choropleth_dropdown', 'value'),
     Input('choropleth_slider', 'value'),
     Input('state', 'value')])

def update_USA_graphs(choropleth_dropdown, choropleth_slider, state):
    #Choropleth
    dt = pd.to_datetime(convert_to_datetime(choropleth_slider)).normalize()
    day_df = usa_df.loc[usa_df['date'] == dt]
    choropleth = px.choropleth(day_df, locations='state', locationmode='USA-states', color=choropleth_dropdown, projection='albers usa')

    #Line graphs
    state_df = get_state_df(usa_df, us_state_abbrev[state])
    usa_line_graphs = []
    for option in dropdownoptions:
        usa_line_graphs.append(px.line(state_df, x='date', y=option, title=f'{state} {option}'))
    
    date_selection_string = f'Selected Date: {dt.month_name()} {dt.day}, {dt.year}'
    
    return choropleth, date_selection_string, usa_line_graphs[0], usa_line_graphs[1], usa_line_graphs[2], usa_line_graphs[3], usa_line_graphs[4], usa_line_graphs[5], usa_line_graphs[6], usa_line_graphs[7], usa_line_graphs[8]
    
if __name__ == '__main__':
    covidapp.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
