(C) Otto Balfour-Oatts 2020. [ec20783@qmul.ac.uk]

# COVID 19 Data from Public Health England displayed in graphs:

This is a simple dashboard created using data taken from Public Health England on the UK's Coronavirus statistics.

The first graph explores the relationships between New Deaths, New Cases, Cumulative Deaths and Cumulative Cases. In order to select one datatype, click on the relevent datatype in the drop down menu to the right of the graph. To compare multiple, shift + click on the datatypes you would like to focus on.

The second graph explores the relationship between Male and Female cases split by their age range.

Click the 'Refresh Data' icon to retrieve the most up to date information from PHE.

In [1]:
import ipywidgets as wdg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from uk_covid19 import Cov19API

In [2]:
%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100

In [3]:
### GETS THE DATA FOR THE TIMECUMCASES GRAPH
filters = ['areaType=nation', 'areaName=England']
structure = {
    'date' : 'date',
    'newCases' : 'newCasesByPublishDate',
    'cumCases' : 'cumCasesByPublishDate',
    'newDeaths' : 'newDeaths28DaysByDeathDate',
    'cumDeaths' : 'cumDeaths28DaysByDeathDate',
}
api = Cov19API(filters=filters, structure=structure)
timecumcases = api.get_json()

In [4]:
### GETS THE DATA FOR THE MF CASES GRAPH ###
filters = ['areaType=nation', 'areaName=England']
structure = {
    'date' : 'date',
    'males' : 'maleCases',
    'females' : 'femaleCases',
}
mf_api = Cov19API(filters=filters, structure=structure)
mf_cases = mf_api.get_json()

In [5]:
### OPENS TIMECUMCASES JSON
with open ('timecumcases.json', 'rt') as INFILEtcc:
    data = json.load(INFILEtcc)

In [6]:
### CREATING THE DATALIST
datalist = data['data']
#print(datalist)

In [7]:
### SORTING BY DATE
dates = [dictionary['date'] for dictionary in datalist]
dates.sort()

In [8]:
def parse_date(datestring):
    """ Convert a date string into a pandas datetime object """
    return pd.to_datetime(datestring, format= '%Y-%m-%d')

In [9]:
### SHOWS THE FORMAT OF THE DATE LIST
startdate = parse_date(dates[0])
enddate = parse_date(dates[-1])
#print(startdate, ' to ', enddate)

In [10]:
### OPENS MF_CASES JSON
with open ('mf_cases.json', 'rt') as INFILEmf:
    data = json.load(INFILEmf)

In [11]:

datadic = data['data'][1]
#print(datadic)

males = datadic['males']
females = datadic['females']
ageranges = []


ageranges = [x['age'] for x in males]
#print(ageranges)

In [12]:
def min_age(agerange):
    agerange = agerange.replace('+', '')
    start = agerange.split('_')[0]
    return int(start)

ageranges.sort(key = min_age)
#print(ageranges)

In [13]:
mf_casesdf = pd.DataFrame(index = ageranges, columns = ['males', 'females', 'total'])
#print (mf_casesdf)

In [14]:
index = pd.date_range(startdate, enddate, freq='D')
timecumcasesdf = pd.DataFrame(index = index, columns = ['newCases', 'cumCases', 'newDeaths', 'cumDeaths'])

def wrangle_data(rawdata):
    """Parameters: rawdata - data from json file or API call. Returns a dataframe."""
    for entry in datalist:
        #print(entry)
        date = parse_date(entry['date'])
        for column in ['newCases', 'cumCases', 'newDeaths','cumDeaths']:
            if pd.isna(timecumcasesdf.loc[date, column]):
                value = float(entry[column]) if entry[column] != None else 0.0
                timecumcasesdf.loc[date, column] = value
    timecumcasesdf.fillna(0.0, inplace = True)
    return timecumcasesdf


timecumcasesdf = wrangle_data(datalist)
#timecumcasesdf


In [15]:
### SAVES DATA FOR BOTH GRAPHS AS JSON FILES
with open('timecumcases.json', 'wt') as OUTFILEtcc:
    json.dump(timecumcases, OUTFILEtcc)
with open('mf_cases.json', 'wt') as OUTFILEmf:
    json.dump(mf_cases, OUTFILEmf)

In [16]:
def mf_wrangle(rawdata):
    for entry in males:
        ageband = entry['age']
        mf_casesdf.loc[ageband, 'males'] = entry['value']

    for entry in females:
        ageband = entry['age']
        mf_casesdf.loc[ageband, 'females'] = entry['value']
    mf_casesdf['total'] = mf_casesdf['males'] + mf_casesdf['females']
    return mf_casesdf, ageband

wrangled_mf = mf_wrangle(mf_casesdf)
#print(wrangled_mf)

In [17]:
# Button to access API
def access_api(button):
    """ Accesses the PHE API. Returns raw data in the same format as data loaded from the "canned" JSON file. """
    print("I'm downloading data from the API...")
    print("... all done.")


In [18]:
# Printout from this function will be lost in Voila unless captured in an
# output widget - therefore, we give feedback to the user by changing the 
# appearance of the button
def api_button_callback(button):
    """ Button callback - it must take the button as its parameter (unused in this case).
    Accesses API, wrangles data, updates global variable df used for plotting. """
    # Get fresh data from the API. If you have time, include some error handling
    # around this call.
    apidata=access_api()
    # wrangle the data and overwrite the dataframe for plotting
    global df
    df=wrangle_data(apidata)
    # the graph won't refresh until the user interacts with the widget.
    # this function simulates the interaction, see Graph and Analysis below.
    # you can omit this step in the first instance
    refresh_graph()
    # after all is done, you can switch the icon on the button to a "check" sign
    # and optionally disable the button - it won't be needed again. You can use icons
    # "unlink" or "times" and change the button text to "Unavailable" in case the 
    # api call fails.
    apibutton.icon="check"
    # apibutton.disabled=True

    
    
apibutton = wdg.Button(description = 'Refresh Data',
                      disabled = False,
                      button_style = 'info', 
                      tooltip = 'Click to download the current Public Health england data',
                      icon = 'download' 
                      )

            # register the callback with the button
apibutton.on_click(access_api)

            # used to display the widgets
display(apibutton)

Button(button_style='info', description='Refresh Data', icon='download', style=ButtonStyle(), tooltip='Click t…

In [19]:
### SAVES DATA FOR BOTH GRAPHS AS JSON FILES
with open('timecumcases.json', 'wt') as OUTFILEtcc:
    json.dump(timecumcases, OUTFILEtcc)
with open('mf_cases.json', 'wt') as OUTFILEmf:
    json.dump(mf_cases, OUTFILEmf)


# Cases / Deaths Graph

In [20]:
#timecumcasesdf = pd.read_pickle('timecumcasesdf.pkl')

series = wdg.SelectMultiple(
    options = [
        'newCases', 'cumCases', 'newDeaths','cumDeaths'], 
    value = [
        'newCases', 'cumCases', 'newDeaths','cumDeaths'], 
    rows = 3, 
    description = 'Stats: ',
    disabled = False
)

scale = wdg.RadioButtons(
    options = ['linear', 'log'],
    # defaults to linear
    # layout = {'width' : 'max content'}, if the items names are long
    description = 'Scale',
    disabled = False
)

# HBox = Horizzontal box, VBox = vertical box
controls = wdg.VBox([series, scale])

def timeseries_graph(gcols, gscale):
    if gscale == 'Linear':
        logscale = False
    else:
        logscale = True
    ncols = len(gcols)
    if ncols > 0:
        timecumcasesdf[list(gcols)].plot(logy = logscale)
        plt.title('Cases/Deaths Graph')
        plt.xlabel('Time')
        plt.ylabel('Number of cases')
    else:
        print('Click to start data for graph...')
        print('(CNTRL-Click to select more than one category)')

graph = wdg.interactive_output(timeseries_graph, 
                               {'gcols': series, 'gscale': scale})

ctrls = wdg.VBox([series, scale, apibutton])

form = wdg.HBox([graph, ctrls])

display(form)

HBox(children=(Output(), VBox(children=(SelectMultiple(description='Stats: ', index=(0, 1, 2, 3), options=('ne…

# Male / Female Cases Graph

In [21]:
#mf_casesdf = pd.read_pickle('mfcasesdf.pkl')

agecols = wdg.SelectMultiple(
    options = ['males', 'females', 'total'], 
    value = ['males', 'females'],
    rows = 3,
    description = 'Sex',
    disabled = False
)

def age_graph(graphcolumns):
    # our callback function
    ncols = len(graphcolumns)
    if ncols > 0:
        mf_casesdf.plot(kind = 'bar',
                   y = list(graphcolumns)
                   )
        plt.title('Male/Female Cases')
        plt.xlabel('Age Range')
        plt.ylabel('Number of cases')
    else:
        #if the user has not selected a column, print a message instead
        print('Click to start data for graph...')
        print('(CNTRL-Click to select more than one category)')
        
# keep calling age_graph(graphcolumns = value_of_agecols)
# capture output in variable output
output = wdg.interactive_output(age_graph, {'graphcolumns': agecols})

form2 = wdg.VBox([agecols, apibutton])

display = wdg.HBox([output, form2])
display

HBox(children=(Output(), VBox(children=(SelectMultiple(description='Sex', index=(0, 1), options=('males', 'fem…

*Based on UK Government [data](https://coronavirus.data.gov.uk/) published by [Public Health England](https://www.gov.uk/government/organisations/public-health-england).*