# DIY Covid-19 Dashboard

In [1]:
import ipywidgets as wdg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import time
from uk_covid19 import Cov19API

In [2]:
%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100

# 1) Data by nations

- The graphs display daily new cases and new deaths for each nation (England, Scotland, Wales and Northern Ireland).
- You can switch nations and scales (linear or log) by selecting options. You can also select multiple nations.
- By clicking the button below, you can download the latest UK governmnet data and refresh the graph. The button can be pressed only once.

In [3]:
with open("England.json", "rt") as INFILE:
    dataEngland=json.load(INFILE)
with open("Scotland.json", "rt") as INFILE:
    dataScotland=json.load(INFILE)  
with open("Wales.json", "rt") as INFILE:
    dataWales=json.load(INFILE)  
with open("Northern Ireland.json", "rt") as INFILE:
    dataNorthernIreland=json.load(INFILE)  

In [4]:
# Load JSON files and store the raw data in some variable
def creatDF(nationData):

    datalist = nationData["data"]
    
    dates=[dictionary['date'] for dictionary in datalist ]
    dates.sort()
        
    start_date = pd.to_datetime(dates[0], format="%Y-%m-%d")
    end_date = pd.to_datetime(dates[-1], format="%Y-%m-%d")
    
    index=pd.date_range(start_date, end_date, freq='D')
    df=pd.DataFrame(index=index, columns=['newCase', 'newDeath'])
    
    for entry in datalist: # each entry is a dictionary with date, cases, hospital and deaths
        date = pd.to_datetime(entry['date'], format="%Y-%m-%d")
        for column in ['newCase', 'newDeath']:
            # check that nothing is there yet - just in case some dates are duplicated,
            # maybe with data for different columns in each entry
            if pd.isna(df.loc[date, column]): 
                # replace None with 0 in our data 
                value= float(entry[column]) if entry[column]!=None else 0.0
                # this is the way you access a specific location in the dataframe - use .loc
                # and put index,column in a single set of [ ]
            df.loc[date, column]=value
            
    # fill in any remaining "holes" due to missing dates
    df.fillna(0.0, inplace=True)
    return df

In [5]:
def CombineNationsDF(England, Scotland, Wales, NorthernIreland):
    englandDF=creatDF(England)
    scotlandDF=creatDF(Scotland)
    nirelandDF=creatDF(Wales)
    walesDF=creatDF(NorthernIreland)
    
    # combine four nation DFs
    nationsDF=englandDF.merge(scotlandDF, left_index=True, right_index=True).\
    merge(nirelandDF, left_index=True, right_index=True).\
    merge(walesDF, left_index=True, right_index=True)
    nationsDF.columns = ['newCaseEngland', 'newDeathEngland', 'newCaseScotland','newDeathScotland',
                     'newCaseNorthernIreland', 'newDeathNorthernIreland', 'newCaseWales','newDeathWales']
    
    return nationsDF

In [6]:
nationsDF = CombineNationsDF(dataEngland, dataScotland, dataWales, dataNorthernIreland)

In [7]:
def access_api(nation):
    filters = [
    'areaType=nation',
    "areaName="+nation+ "\""
        ]
    structure = {
        "date": "date",
        "newCase": "newCasesByPublishDate",
        "newDeath": "newDeaths28DaysByPublishDate"
        }

    api = Cov19API(filters=filters, structure=structure)
    data = api.get_json()
    time.sleep(1)
    return data

In [8]:
# our API access function. This will be called by the button
def api_button_callback(button):
    # start message
    print("I'm downloading and updating data from the API...")

    # download data via API
    dataEngland= access_api("England")
    dataScotland= access_api("Scotland")
    dataWales= access_api("Wales")
    dataNorthernIreland= access_api("Northern Ireland")

    # create data frame from json file        
    nationsDF = CombineNationsDF(dataEngland, dataScotland, dataWales, dataNorthernIreland)

    # refresh_graph
    refresh_graph()
    
    # end message
    print("...all done.")
    apibutton.icon="check"
    apibutton.button_style="info"
    apibutton.description="Success"    
    apibutton.disabled=True
    
    
# see the doc for the parameters    
apibutton=wdg.Button(
    description='Refresh data',
    disabled=False,
    button_style='danger', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to download current Public Health England data',
    icon='download' # (FontAwesome names without the `fa-` prefix)
)

# register the callback function with the button
apibutton.on_click(api_button_callback)

# this is an iPython function that generalises print for Jupyter Notebooks; we use it to 
# display the widgets
display(apibutton)

Button(button_style='danger', description='Refresh data', icon='download', style=ButtonStyle(), tooltip='Click…

## a) New cases by nations

In [9]:
nations_cases=wdg.SelectMultiple(
    options=['newCaseEngland', 'newCaseScotland', 'newCaseNorthernIreland', 'newCaseWales'],
    value=['newCaseEngland', 'newCaseScotland', 'newCaseNorthernIreland', 'newCaseWales'],
    rows=4,
    description='Nations:',
    disabled=False
)

scale1=wdg.RadioButtons(
    options=['linear', 'log'],
#    value='pineapple', # Defaults to 'pineapple'
#    layout={'width': 'max-content'}, # If the items' names are long
    description='Scale:',
    disabled=False
)

def nations_graph1(gcols, gscale):
    if gscale=='linear':
        logscale=False
    else:
        logscale=True
    ncols=len(gcols)
    if ncols>0:
        nationsDF[list(gcols)].plot(logy=logscale)
    else:
        print("Click to select data for graph")
        print("(CTRL-Click to select more than one category)")

# keep calling timeseries_graph(gcols=value_of_series, gscale=value_of_scale); capture output in variable graph   
graph=wdg.interactive_output(nations_graph1, {'gcols': nations_cases, 'gscale': scale1})

ctrls=wdg.VBox([nations_cases, scale1])
display(wdg.HBox([graph, ctrls]))

HBox(children=(Output(), VBox(children=(SelectMultiple(description='Nations:', index=(0, 1, 2, 3), options=('n…

## b) New deaths by nations

In [10]:
nations_deaths=wdg.SelectMultiple(
    options=['newDeathEngland', 'newDeathScotland', 'newDeathNorthernIreland', 'newDeathWales'],
    value=['newDeathEngland', 'newDeathScotland', 'newDeathNorthernIreland', 'newDeathWales'],
    rows=4,
    description='Nations:',
    disabled=False
)

scale2=wdg.RadioButtons(
    options=['linear', 'log'],
#    value='pineapple', # Defaults to 'pineapple'
#    layout={'width': 'max-content'}, # If the items' names are long
    description='Scale:',
    disabled=False
)

def nations_graph2(gcols, gscale):
    if gscale=='linear':
        logscale=False
    else:
        logscale=True
    ncols=len(gcols)
    if ncols>0:
        nationsDF[list(gcols)].plot(logy=logscale)
    else:
        print("Click to select data for graph")
        print("(CTRL-Click to select more than one category)")
            
# keep calling timeseries_graph(gcols=value_of_series, gscale=value_of_scale); capture output in variable graph   
graph=wdg.interactive_output(nations_graph2, {'gcols': nations_deaths, 'gscale': scale2})

ctrls=wdg.VBox([nations_deaths, scale2])
display(wdg.HBox([graph, ctrls]))

HBox(children=(Output(), VBox(children=(SelectMultiple(description='Nations:', index=(0, 1, 2, 3), options=('n…

In [11]:
def refresh_graph():
    """ We change the value of the widget in order to force a redraw of the graph;
    this is useful when the data have been updated. This is a bit of a gimmick; it
    needs to be customised for one of your widgets. """
    current1=nations_cases.value
    if current1==tuple([nations_cases.options[0]]):
        other1=tuple([nations_cases.options[1]])
    else:
        other1=tuple([nations_cases.options[0]])
    nations_cases.value=other1 # forces the redraw
    nations_cases.value=current1 # now we can change it back
 
    current2=nations_deaths.value
    if current2==tuple([nations_deaths.options[0]]):
        other2=tuple([nations_deaths.options[1]])
    else:
        other2=tuple([nations_deaths.options[0]])
    nations_deaths.value=other2 # forces the redraw
    nations_deaths.value=current2 # now we can change it back

# 2) Monthly data

- The graphs display monthly new cases and infection rate per 100k people by age groups.
- The age groups set by the UK government are too detailed (19 age groups), so these graphs divides the whole into five age groups.
- The graphs display monthly data from February to October of 2020. The data is fixed to the data at the time this dashboard was createdThere is no button to refresh the data because .

In [12]:
with open("age.json", "rt") as INFILE:
    data=json.load(INFILE)

In [13]:
def getSpecificdate(datalist, date):
    specificdatedata = {}
    for datadict in datalist:
        if datadict["date"] == date:
            specificdatedata = datadict
    return specificdatedata

In [14]:
def min_age(agerange):
    agerange=agerange.replace('+','') # remove the + from 90+
    start=agerange.split('_')[0]
    return int(start)

In [15]:
def createDF(monthJson):
    male_data=monthJson['male']
    female_data=monthJson['female']
    df=pd.DataFrame(index=ageranges, columns=['male_value','male_rate', 'female_value', 'female_rate', 'total_value', 'total_rate'])    
    
    for entry in male_data: # each entry is a dictionary
        ageband=entry['age'] # our index position
        df.loc[ageband, 'male_value']=entry['value']
        df.loc[ageband, 'male_rate']=entry['rate']

    for entry in female_data:
        ageband=entry['age']
        df.loc[ageband, 'female_value']=entry['value']
        df.loc[ageband, 'female_rate']=entry['rate']

    # this is straightforward
    df['total_value']=(df['male_value']+df['female_value'])
    df['total_rate']=(df['male_rate']+df['female_rate'])/2
    
    return df

In [16]:
# get the agerangs
DataNov1st = getSpecificdate(data['data'], "2020-11-01")
males=DataNov1st['male']
females=DataNov1st['female']
ageranges=[x['age'] for x in males] # each entry of males is a dictionary
ageranges.sort(key=min_age)

In [17]:
# DF for storing monthly data
monthlyDF=pd.DataFrame(index=ageranges, columns=[]) 

In [18]:
# the list of months to get data
month_list = ["Feb", "Mar","Apr", "May","Jun", "Jul","Aug", "Sep", "Oct"]

In [19]:
for month in month_list:
    # get the index of the month
    list_index = month_list.index(month)
    
    # convert the index into str
    if list_index + 3 < 10:
        list_index_str = "0"+str(list_index+3)
    else:
        list_index_str = str(list_index+3)
        
    # get the data for the month
    data_for_month = getSpecificdate(data['data'], "2020-"+list_index_str+"-01")


    # populate the DF with the data of the month
    male_data=data_for_month['male']
    female_data=data_for_month['female']
        
    for entry in male_data: # each entry is a dictionary
        ageband=entry['age'] # our index position
        monthlyDF.loc[ageband, 'male_value']=entry['value']
        monthlyDF.loc[ageband, 'male_rate']=entry['rate']

    for entry in female_data:
        ageband=entry['age']
        monthlyDF.loc[ageband, 'female_value']=entry['value']
        monthlyDF.loc[ageband, 'female_rate']=entry['rate']

    monthlyDF['total_value']=(monthlyDF['male_value']+monthlyDF['female_value'])
    monthlyDF['total_rate']=(monthlyDF['male_rate']+monthlyDF['female_rate'])/2  
    
    # rename the columns
    monthlyDF = monthlyDF.rename(columns={'male_value': 'male_value_'+month, 'male_rate': 'male_rate_'+month,
                                            'female_value': 'female_value_'+month, 'female_rate': 'female_rate_'+month,
                                            'total_value': 'total_value_'+month, 'total_rate': 'total_rate_'+month
                                         })
    
    # subtract the number of the previous month 
    # because we want the data that arised during the month but every data is cumulative
    if list_index >0: # exclude the first month in the list (March)
        column_list = ['male_value_'+month, 'male_rate_'+month, 'female_value_'+month,
                       'female_rate_'+month, 'total_value_'+month, 'total_rate_'+month]
        prev_month = month_list[list_index-1]
        pevious_month_column_list = ['male_value_'+prev_month, 'male_rate_'+prev_month, 'female_value_'+prev_month,
                                     'female_rate_'+prev_month, 'total_value_'+prev_month, 'total_rate_'+prev_month]
        
        for column in  column_list:
            column_index = column_list.index(column)
            prev_month = pevious_month_column_list[column_index]
            for i in range(0, len(monthlyDF)):
                number_of_current_month = monthlyDF.iloc[i][column]
                number_of_previous_month  = monthlyDF.iloc[i][prev_month]
                monthlyDF.iloc[i][column] = number_of_current_month-number_of_previous_month

In [20]:
for i in range(0, len(monthlyDF)):
    if i in range(0,4):
        monthlyDF.at[monthlyDF.index[i], 'age_group']="0_to_19"
    elif i in range(4,8):
        monthlyDF.at[monthlyDF.index[i], 'age_group']="20_to_39"
    elif i in range(8,12):
        monthlyDF.at[monthlyDF.index[i], 'age_group']="40_to_59"
    elif i in range(12,16):
        monthlyDF.at[monthlyDF.index[i], 'age_group']="60_to_79"
    else:
        monthlyDF.at[monthlyDF.index[i], 'age_group']="80+"

## a) Monthly New Cases by Age Groups

In [21]:
total_monthlyDF = monthlyDF[["total_value_Feb", "total_value_Mar", "total_value_Apr", "total_value_May", "total_value_Jun"
                            ,"total_value_Jul", "total_value_Aug", "total_value_Sep", "total_value_Oct", "age_group"]]

In [22]:
total_monthlyDF = total_monthlyDF.groupby("age_group").sum()
total_monthlyDF.columns = month_list

In [23]:
monthly_cases=wdg.SelectMultiple(
    options=['0_to_19','20_to_39','40_to_59', '60_to_79','80+'],
    value=['0_to_19','20_to_39','40_to_59', '60_to_79','80+'],
    rows=5,
    description='Age range:',
    disabled=False
)

def monthly_graph(gcols):
    ncols=len(gcols)
    if ncols>0:
        total_monthlyDF.T[list(gcols)].plot()
    else:
        print("Click to select data for graph")
        print("(CTRL-Click to select more than one category)")
            
# keep calling timeseries_graph(gcols=value_of_series, gscale=value_of_scale); capture output in variable graph   
graph=wdg.interactive_output(monthly_graph, {'gcols': monthly_cases})


ctrls=wdg.VBox([monthly_cases])
form=wdg.HBox([graph, ctrls])
display(form)

HBox(children=(Output(), VBox(children=(SelectMultiple(description='Age range:', index=(0, 1, 2, 3, 4), option…

##   b) Monthly Infection Rates by Age Groups

In [24]:
rate_monthlyDF = monthlyDF[["total_rate_Feb", "total_rate_Mar", "total_rate_Apr", "total_rate_May", "total_rate_Jun"
                            ,"total_rate_Jul", "total_rate_Aug", "total_rate_Sep", "total_rate_Oct", "age_group"]]
rate_monthlyDF = rate_monthlyDF.groupby("age_group").mean()
rate_monthlyDF.columns = month_list

In [25]:
monthly_cases=wdg.SelectMultiple(
    options=['0_to_19','20_to_39','40_to_59', '60_to_79','80+'],
    value=['0_to_19','20_to_39','40_to_59', '60_to_79','80+'],
    rows=5,
    description='Age range:',
    disabled=False
)

def monthly_graph(gcols):
    ncols=len(gcols)
    if ncols>0:
        rate_monthlyDF.T[list(gcols)].plot()
    else:
        print("Click to select data for graph")
        print("(CTRL-Click to select more than one category)")
            
# keep calling timeseries_graph(gcols=value_of_series, gscale=value_of_scale); capture output in variable graph   
graph=wdg.interactive_output(monthly_graph, {'gcols': monthly_cases})


ctrls=wdg.VBox([monthly_cases])
form=wdg.HBox([graph, ctrls])
display(form)

HBox(children=(Output(), VBox(children=(SelectMultiple(description='Age range:', index=(0, 1, 2, 3, 4), option…

[DIY Covid-19 Dashboard Kit](https://github.com/Hinoshita/DIY-Covid19-Dashboard.git) (C) HInoshita, 2020. All rights reserved.

**Data source:** Based on UK Government [data](https://coronavirus.data.gov.uk/) published by [Public Health England](https://www.gov.uk/government/organisations/public-health-england).*