[DIY Disease Tracking Dashboard Kit](https://github.com/fsmeraldi/diy-covid19dash) (C) Fabrizio Smeraldi, 2020,2024 ([f.smeraldi@qmul.ac.uk](mailto:f.smeraldi@qmul.ac.uk) - [web](http://www.eecs.qmul.ac.uk/~fabri/)). This notebook is released under the [GNU GPLv3.0 or later](https://www.gnu.org/licenses/).

# DIY Disease Tracking Dashboard

This is a simple dashboard based on UKHSA data, displaying the daily COVID-19 admissions across different NHS regions.

The development of this dashboard is Chengkai's independent work, reusing code provided in the guidelines by Fabrizio. 

Special thanks to Fabrizio and the TAs for providing the foundational code and guidance.

In [46]:
# Tutorial code reused for class APIwrapper
import requests
import time

class APIwrapper:
    # class variables shared among all instances
    _access_point="https://api.ukhsa-dashboard.data.gov.uk"
    _last_access=0.0 # time of last api access
    
    def __init__(self, theme, sub_theme, topic, geography_type, geography, metric):
        """ Init the APIwrapper object, constructing the endpoint from the structure
        parameters """
        # build the path with all the required structure parameters. You do not need to edit this line,
        # parameters will be replaced by the actual values when you instantiate an object of the class!
        url_path=(f"/themes/{theme}/sub_themes/{sub_theme}/topics/{topic}/geography_types/" +
                  f"{geography_type}/geographies/{geography}/metrics/{metric}")
        # our starting API endpoint
        self._start_url=APIwrapper._access_point+url_path
        self._filters=None
        self._page_size=-1
        # will contain the number of items
        self.count=None

    def get_page(self, filters={}, page_size=5):
        """ Access the API and download the next page of data. Sets the count
        attribute to the total number of items available for this query. Changing
        filters or page_size will cause get_page to restart from page 1. Rate
        limited to three request per second. The page_size parameter sets the number
        of data points in one response page (maximum 365); use the default value 
        for debugging your structure and filters. """
        # Check page size is within range
        if page_size>365:
            raise ValueError("Max supported page size is 365")
        # restart from first page if page or filters have changed
        if filters!=self._filters or page_size!=self._page_size:
            self._filters=filters
            self._page_size=page_size
            self._next_url=self._start_url
        # signal the end of data condition
        if self._next_url==None: 
            return [] # we already fetched the last page
        # simple rate limiting to avoid bans
        curr_time=time.time() # Unix time: number of seconds since the Epoch
        deltat=curr_time-APIwrapper._last_access
        if deltat<0.33: # max 3 requests/second
            time.sleep(0.33-deltat)
        APIwrapper._last_access=curr_time
        # build parameter dictionary by removing all the None
        # values from filters and adding page_size
        parameters={x: y for x, y in filters.items() if y!=None}
        parameters['page_size']=page_size
        # the page parameter is already included in _next_url.
        # This is the API access. Response is a dictionary with various keys.
        # the .json() method decodes the response into Python object (dictionaries,
        # lists; 'null' values are translated as None).
        response = requests.get(self._next_url, params=parameters).json()
        # update url so we'll fetch the next page
        self._next_url=response['next']
        self.count=response['count']
        # data are in the nested 'results' list
        return response['results'] 

    def get_all_pages(self, filters={}, page_size=365):
        """ Access the API and download all available data pages of data. Sets the count
        attribute to the total number of items available for this query. API access rate
        limited to three request per second. The page_size parameter sets the number
        of data points in one response page (maximum 365), and controls the trade-off
        between time to load a page and number of pages; the default should work well 
        in most cases. The number of items returned should in any case be equal to 
        the count attribute. """
        data=[] # build up all data here
        while True:
            # use get_page to do the job, including the pacing
            next_page=self.get_page(filters, page_size)
            if next_page==[]:
                break # we are done
            data.extend(next_page)
        return data

In [47]:
#The aim of the code below is to look at addmission by day for different regions (geography) across NHS Region
structure={"theme": "infectious_disease", 
           "sub_theme": "respiratory",
           "topic": "COVID-19",
           "geography_type": "NHS Region", 
           "metric": "COVID-19_healthcare_admissionByDay"}

In [48]:
structure["geography"]="London"
api=APIwrapper(**structure)
Ldnadmissions=api.get_all_pages()

In [49]:
structure["geography"]="East of England"
api=APIwrapper(**structure)
EEadmissions=api.get_all_pages()

In [50]:
structure["geography"]="Midlands"
api=APIwrapper(**structure)
MLadmissions=api.get_all_pages()

In [51]:
structure["geography"]="North East and Yorkshire"
api=APIwrapper(**structure)
NEadmissions=api.get_all_pages()

In [52]:
structure["geography"]="North West"
api=APIwrapper(**structure)
NWadmissions=api.get_all_pages()

In [53]:
structure["geography"]="South East"
api=APIwrapper(**structure)
SEadmissions=api.get_all_pages()

In [54]:
structure["geography"]="South West"
api=APIwrapper(**structure)
SWadmissions=api.get_all_pages()

In [55]:
# Save the data into json files
import json
with open("East of England.json", "wt") as OUTF:
    json.dump(EEadmissions, OUTF)

with open("London.json", "wt") as OUTF:
    json.dump(Ldnadmissions, OUTF)

with open("Midlands.json", "wt") as OUTF:
    json.dump(MLadmissions, OUTF)

with open("North East and Yorkshire.json", "wt") as OUTF:
    json.dump(NEadmissions, OUTF)

with open("North West.json", "wt") as OUTF:
    json.dump(NWadmissions, OUTF)

with open("South East.json", "wt") as OUTF:
    json.dump(SEadmissions, OUTF)
    
with open("South West.json", "wt") as OUTF:
    json.dump(SWadmissions, OUTF)

In [56]:
from IPython.display import clear_output
import ipywidgets as wdg
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

%matplotlib inline
plt.rcParams['figure.dpi'] = 100

In [57]:
with open("East of England.json", "rt") as INFILE:
    EEadmissions=json.load(INFILE)
with open("London.json", "rt") as INFILE:
    Ldnadmissions=json.load(INFILE)
with open("Midlands.json", "rt") as INFILE:
    MLadmissions=json.load(INFILE)
with open("North East and Yorkshire.json", "rt") as INFILE:
    NEnadmissions=json.load(INFILE)
with open("North West.json", "rt") as INFILE:
    NWadmissions=json.load(INFILE)
with open("South West.json", "rt") as INFILE:
    SWnadmissions=json.load(INFILE)
with open("South East.json", "rt") as INFILE:
    SEadmissions=json.load(INFILE)

In [58]:
data = {}
for dataset in [EEadmissions, Ldnadmissions, MLadmissions, NEadmissions, NWadmissions, SEadmissions, SWadmissions]:
    for entry in dataset:
        date = entry['date']
        geography = entry['geography']
        metric = entry['metric']
        value = entry['metric_value']
        
        # Ensure the date exists in the dictionary
        if date not in data:
            data[date] = {}
        
        # Include geography in the key structure
        if geography not in data[date]:
            data[date][geography] = {}
        
        # Store the metric value
        data[date][geography][metric] = value

In [59]:
dates=list(data.keys())
dates.sort()

In [60]:
# Tutorial code reused for converting a date seting into a pandas datetime object
def parse_date(datestring):
    """ Convert a date string into a pandas datetime object """
    return pd.to_datetime(datestring, format="%Y-%m-%d")

In [61]:
startdate = parse_date(dates[0])
enddate = parse_date(dates[-1])

In [62]:
index=pd.date_range(startdate, enddate, freq='D')
timeseriesdf=pd.DataFrame(index=index, columns=['East England','London','Midlands','North East','North West','South East','South West'])

In [63]:
geography ={'East England': 'East of England',
          'London': 'London',
          'Midlands': 'Midlands',
          'North East': 'North East and Yorkshire',
          'North West': 'North West',
          'South East': 'South East',
          'South West': 'South West'}

for date, regions in data.items():
    pd_date=parse_date(date) # convert to Pandas format
    for column in geography.keys():
        geo_name=geography[column]
        # do not assume all values are there for every date - if a value is not available, insert a 0.0
        value = regions.get(geo_name, {}).get('COVID-19_healthcare_admissionByDay', 0.0)
        # this is the way you access a specific location in the dataframe - use .loc
        # and put index,column in a single set of [ ]
        timeseriesdf.loc[date, column]=value

## Download current data

Clicking the button below will allow you to download the latest available COVID-19 admission data directly from UKHSA. This ensures you always have access to the most up-to-date information for analysis.

In [64]:
def access_api(button):
    structure = {
        "theme": "infectious_disease",
        "sub_theme": "respiratory",
        "topic": "COVID-19",
        "geography_type": "NHS Region",
        "metric": "COVID-19_healthcare_admissionByDay",
    }
    
    regions = {
        "East of England": "East of England.json",
        "London": "London.json",
        "Midlands": "Midlands.json",
        "North East and Yorkshire": "North East and Yorkshire.json",
        "North West": "North West.json",
        "South East": "South East.json",
        "South West": "South West.json",
    }
# The try block is used to handle potential errors that could occur when run the code
# This would ensure the program does not crash
    try:
        for region, filename in regions.items():
            structure["geography"] = region
            api = APIwrapper(**structure)
            data = api.get_all_pages()
            with open(filename, "wt") as OUTF:
                json.dump(data, OUTF)
        apibutton.icon = "check"
        apibutton.description = "Done"
    except Exception as e:
        print(f"An error occured: {e}")

# Create and display button
apibutton = wdg.Button(
    description="Refresh data",
    disabled=False,
    tooltip="Click to download current Public Health England data",
    icon="download"
)
apibutton.on_click(access_api)
display(apibutton)


Button(description='Refresh data', icon='download', style=ButtonStyle(), tooltip='Click to download current Pu…

## Graphs and Analysis

The gragh below gives a visual representation of trends in COVID-19 admissions across NHS regions. It allows users to explore and analyse data by selecting one or more areas for comparison. This interactive feature shows how different regions have been affected over time, making it easier to identify patterns and regional differences in admissions.

In [65]:
from ipywidgets import interactive

In [66]:
region_selector = wdg.SelectMultiple(
    options=timeseriesdf.columns.unique(),
    value=['London'],  # Default selected region london
    description='NHS Regions:',
    disabled=False
)

# Function to update the plot based on selected regions
def update_plot(selected_regions):
    plt.figure(figsize=(10, 6))
    # Plot each selected region
    for region in selected_regions:
        plt.plot(timeseriesdf.index, timeseriesdf[region], label=region)

    plt.title('COVID-19 Healthcare Admissions by Region')
    plt.xlabel('Date')
    plt.ylabel('Admissions')
    plt.legend(title='NHS Regions')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Link the widget to the plot update function
interactive_plot = interactive(update_plot, selected_regions=region_selector)

display(interactive_plot)

interactive(children=(SelectMultiple(description='NHS Regions:', index=(1,), options=('East England', 'London'…