# COVID-19 Dashboard
#### Nina Lindsay-     Student Number: 200752370
<hr>

In [1]:
#Importing required libraries and functions, configuring matplotlib styles and to show inline:

import ipywidgets as wdg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from uk_covid19 import Cov19API
from time import sleep
from datetime import datetime

%matplotlib inline
plt.style.use('seaborn-bright')
plt.rcParams['figure.dpi'] = 100

This notebook shows cumulative cases and deaths by specimen date expressed as a rate per 100 000 people for each of the four UK regions: England, Scotland, Wales and Northern Ireland.

#### Key metrics used:
<ul>
<li>Cases and Deaths by Specimen Date: This records the date on which the test was taken or on which the death occured, irrespective of reporting date. Previously reported data is therefore continually updated as new reports of cases and deaths on previous specimen dates emerge.</li>
<li>Population Sizes used are Office for National Statistics 2019 mid-year estimates.</li>
</ul>

Full details on reporting sources and COVID death count methodologies for the four regions can be found on the [gov.uk website](https://coronavirus.data.gov.uk/details/about-data#daily-and-cumulative-numbers-of-cases).
<hr>

In [2]:
#function to convert imported dates data into datetime pandas object:
def parse_date(datestring):
    """ Convert a date string into a pandas datetime object """
    return pd.to_datetime(datestring, format="%Y-%m-%d")

#function to wrangle data/convert from JSON files to dfs:            
def wrangle_data():
    """Uses the UK region cases and deaths JSON files in the local directory and returns collated
    data frames for cumulative cases and cumulative deaths with regions as seperate columns in each df."""
    
    #opening and reading JSON files, saving as new variables for each nation

    data = []

    for name in ['england', 'scotland', 'wales', 'northernireland']:
        with open("timeseries{}.json".format(name), "rt") as INFILE:
            data.append(json.load(INFILE))

    englanddata = data[0]['data']
    scotlanddata = data[1]['data']
    walesdata = data[2]['data']
    nidata = data[3]['data']
    
    #Converting JSON to lists, extracting min and max dates from date column for each dataset 

    englanddates = [dictionary['date'] for dictionary in englanddata]
    englanddates.sort()

    scotlanddates = [dictionary['date'] for dictionary in scotlanddata]
    scotlanddates.sort()

    walesdates = [dictionary['date'] for dictionary in walesdata]
    walesdates.sort()

    nidates = [dictionary['date'] for dictionary in nidata]
    nidates.sort()
    
    #Creation of cumcases and cumdeaths data frames using min and max dates:
    global dataindex
    dataindex=pd.date_range(parse_date(min(englanddates[0],scotlanddates[0], walesdates[0], nidates[0])), \
                        parse_date(max(englanddates[-1],scotlanddates[-1], walesdates[-1], nidates[-1])), freq='D')
    global cumcasesdf
    global cumdeathsdf
    cumcasesdf = pd.DataFrame(index=dataindex, columns=['England', 'Scotland', 'Wales', 'Northern Ireland'])
    cumdeathsdf = pd.DataFrame(index=dataindex, columns=['England', 'Scotland', 'Wales', 'Northern Ireland'])

    #Populating data into cumcases and cumdeaths dataframes from the four regions:

    for x, y in zip([englanddata, scotlanddata, walesdata, nidata], ['England', 'Scotland', 'Wales', 'Northern Ireland']):
        for entry in x:
            date = parse_date(entry['date'])
            for column in ['cumCasesBySpecimenDateRate', 'cumDeaths28DaysByDeathDateRate']:
                if column == 'cumCasesBySpecimenDateRate' and pd.isna(cumcasesdf.loc[date, y]):
                    value= float(entry[column]) if entry[column]!=None else 0.0
                    cumcasesdf.loc[date, y] = value
                elif column == 'cumDeaths28DaysByDeathDateRate' and pd.isna(cumdeathsdf.loc[date, y]):
                    value= float(entry[column]) if entry[column]!=None else 0.0
                    cumdeathsdf.loc[date, y] = value

    #Fill any unpopulated values with 0.0:

    cumdeathsdf.fillna(0.0, inplace = True)
    cumcasesdf.fillna(0.0, inplace = True)


In [3]:
#function to call API data and save output as updated JSONs
    #(It is noted that these updated JSON files will not cause the deployed canned data to update when run on a binder, 
    #but the functionality is preserved should the user wish to download the code locally.)
    
def access_api(button):
    """Accesses the UK COVID19 API and returns the cumulative case and death rate data 
    for UK regions as JSON files in the local directory."""
    
    #selecting desired data through filters and structures parameters of api:
    timeserieslist = []
    structure = {
        "date": "date",
        "cumCasesBySpecimenDateRate": "cumCasesBySpecimenDateRate",
        "cumDeaths28DaysByDeathDateRate": "cumDeaths28DaysByDeathDateRate"
    }
    
    #exception handling for failed API call
    try:
        for name in ('england', 'scotland', 'wales', 'northern ireland'):
            filters = [
                'areaType=nation',
                'areaName={}'.format(name)
            ]
            api = Cov19API(filters=filters, structure=structure)
            timeserieslist.append(api.get_json())
            sleep(0.5)

        #saving data outputs as JSON files for future use
        for i, name in enumerate(['england', 'scotland', 'wales', 'northernireland']):
            with open("timeseries{}.json".format(name), "wt") as OUTF:
                json.dump(timeserieslist[i], OUTF)
    
    except: 
        print('There was an error accessing the API. Data has not been updated.')
        
    #call wrangle_data() function to update data frames used within the graphs, 
    #call refresh_graph() to simulate user widget interaction and force plot redraw
    global cumdeathsdf
    global cumcasesdf
    wrangle_data()
    refresh_casesgraph()
    refresh_deathsgraph()
    
    #following API call attempt; update button display to be disabled with 'Unavailable' text
    global apibutton
    apibutton.icon="check"
    apibutton.description='Unavailable'
    apibutton.disabled = True
    apibutton.tooltip='Data has already been refreshed or is unavailable'

#### Use the refresh button below to call the UK COVID19 API and refresh the data shown in the graphs:

In [4]:
#On initial loading:

#Call wrangle data to convert initial 'canned' JSON to dataframes for plotting:
wrangle_data()

#Create refresh data button for re-loading of data and updating of graphs with access_api() function:
apibutton=wdg.Button(
    description='Refresh data',
    disabled=False,
    button_style='',
    tooltip='Click to download current .gov.uk data',
    icon='download' 
)

apibutton.on_click(access_api)
display(apibutton)


Button(description='Refresh data', icon='download', style=ButtonStyle(), tooltip='Click to download current .g…

<br>The two graphs below show the cumulative deaths and cumulative cases per 100 000 people for each of the four regions (England, Scotland, Wales and Northern Ireland). <br>
Individual region plotlines can be isolated by clicking the multi-select widget above each dashboard. Multiple regions can be selected by holding 'CTRL' whilst clicking the desired regions. <br>
You are also able to narrow the date range shown within each graph using the slider above each plot. Click and drag the slider to select the start date from which the data will be shown.

<hr>

## Graph 1: Cumulative Deaths per 100 000 People

In [10]:
#create Region selection widget for cumdeaths dataframe
deathslines=wdg.SelectMultiple(
    options=['England', 'Scotland', 'Wales', 'Northern Ireland'], # options available
    value=['England', 'Scotland', 'Wales', 'Northern Ireland'], # initial value
    rows=4, # rows of the selection box
    description='Regions:',
    disabled=False
)

#create date selection widget for cumdeaths dataframe

sliderdataindex=pd.date_range(dataindex[0], parse_date(datetime.now()), freq='D')

deathsdate = wdg.SelectionSlider(
    options= [(date.strftime(' %d %b %Y '), date) for date in sliderdataindex],
    index=len(sliderdataindex)-1,
    description='Start Date:',
    style = {'description_width': 'initial'},
    orientation='horizontal',
    layout={'width': '500px'}
)

deathscontrols = wdg.HBox([deathslines, deathsdate])


def cumdeaths_graph(graphlines, daterange):
    """Creates plot based on region and date widget inputs and current version of cumdeathsdf.
    Prints error message if no regions are selected or if date selection is out of range of data frame."""
    start_date = daterange
    nlines=len(graphlines)
    if nlines>0 and (deathsdate.value + pd.Timedelta(1, unit='D')) in cumdeathsdf.index:
        cumdeathsdf.loc[start_date:].plot(figsize = (9,5), 
                                                  title = 'Cumulative Deaths per 100 000 People', y=list(graphlines))
    else:
        # if the user has not selected any column, print a message instead
        print("The graph could not be generated.")
        print("(Ensure you have selected at least one region and that your date selection includes more than one day of available data.)")

        
        
def refresh_deathsgraph():
    """Temporarily changes the value of the deathlines widget in order to simulate user interaction and 
    force a redraw of the cumulative deaths graph."""
    current=deathslines.value
    if current==(deathslines.options[0],):
        other=(deathslines.options[1],)
    else:
        other=(deathslines.options[0],)
    deathslines.value=other # forces the redraw
    deathslines.value=current 
        
#display widgets and plot, set initial value of date widget to minimum value of slider index, set plot layout height

output=wdg.interactive_output(cumdeaths_graph, {'graphlines': deathslines, 'daterange': deathsdate})
display(deathscontrols, output)
deathsdate.value = dataindex[0]
output.layout.height = '500px'

HBox(children=(SelectMultiple(description='Regions:', index=(0, 1, 2, 3), options=('England', 'Scotland', 'Wal…

Output()

<hr>

## Graph 2: Cumulative Cases per 100 000 People

In [11]:
#create region select widget for cases df
caseslines=wdg.SelectMultiple(
    options=['England', 'Scotland', 'Wales', 'Northern Ireland'], # options available
    value=['England', 'Scotland', 'Wales', 'Northern Ireland'], # initial value
    rows=4, 
    description='Regions:',
    disabled=False
)

#create date slider widget for cases df
sliderdataindex=pd.date_range(dataindex[0], parse_date(datetime.now()), freq='D')

casesdate = wdg.SelectionSlider(
    options= [(date.strftime(' %d %b %Y '), date) for date in sliderdataindex],
    index=len(sliderdataindex)-1,
    description='Start Date:',
    style = {'description_width': 'initial'},
    orientation='horizontal',
    layout={'width': '500px'}
)

casescontrols = wdg.HBox([caseslines, casesdate])


def cumcases_graph(graphlines, daterange):
    """Creates plot based on region and date widget inputs and current version of cumcasesdf.
    Prints error message if no regions are selected or if date selection is out of range of data frame."""
    start_date = daterange
    nlines=len(graphlines)
    if nlines>0 and (casesdate.value + pd.Timedelta(1, unit='D')) in cumcasesdf.index:
        cumcasesdf.loc[start_date:].plot(figsize = (9,5), 
                                                 title = 'Cumulative Cases per 100 000 People', y=list(graphlines))
    else:
        # if the user has not selected any column, print a message instead
        print("The graph could not be generated.")
        print("(Ensure you have selected at least one region and that your date selection includes more than one day of available data.)")

        
def refresh_casesgraph():
    """Temporarily changes the value of the caseslines widget in order to simulate user interaction and 
    force a redraw of the cumulative cases graph."""
    current=caseslines.value
    if current==(caseslines.options[0],):
        other=(caseslines.options[1],)
    else:
        other=(caseslines.options[0],)
    caseslines.value=other # forces the redraw
    caseslines.value=current 

#display widgets and plot, set initial value of date widget to minimum value of slider index, set plot layout height    
output=wdg.interactive_output(cumcases_graph, {'graphlines': caseslines, 'daterange': casesdate})
display(casescontrols, output)
casesdate.value = dataindex[0]
output.layout.height = '500px'

HBox(children=(SelectMultiple(description='Regions:', index=(0, 1, 2, 3), options=('England', 'Scotland', 'Wal…

Output()

(C) **Nina Lindsay**, data from [Public Health England](https://www.gov.uk/government/organisations/public-health-england), content available under the [Open Government License v3.0](https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/)