[DIY Disease Tracking Dashboard Kit](https://github.com/fsmeraldi/diy-covid19dash) (C) Fabrizio Smeraldi, 2020,2024 ([f.smeraldi@qmul.ac.uk](mailto:f.smeraldi@qmul.ac.uk) - [web](http://www.eecs.qmul.ac.uk/~fabri/)). This notebook is released under the [GNU GPLv3.0 or later](https://www.gnu.org/licenses/).

This chart shows the number of Covid-19 related deaths recorded in each of London’s 32 boroughs between January 2021 and December 2023.
Each line represents one borough, allowing for comparison of how mortality trends evolved across different areas during this period.

Following the severe impact of early 2021, most boroughs saw a decline in Covid-related deaths as vaccination rates increased and public health measures took effect. However, small fluctuations remained, reflecting local outbreaks and seasonal variations.

In [1]:
from IPython.display import clear_output
import ipywidgets as wdg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import time
import json
import os

In [2]:
%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100

In [None]:
import requests
import time

class APIwrapper:
    # class variables shared among all instances
    _access_point="https://api.ukhsa-dashboard.data.gov.uk"
    _last_access=0.0 # time of last api access
    
    def __init__(self, theme, sub_theme, topic, geography_type, geography, metric):
        """ Init the APIwrapper object, constructing the endpoint from the structure
        parameters """
        # build the path with all the required structure parameters. You do not need to edit this line,
        # parameters will be replaced by the actual values when you instantiate an object of the class!
        url_path=(f"/themes/{theme}/sub_themes/{sub_theme}/topics/{topic}/geography_types/" +
                  f"{geography_type}/geographies/{geography}/metrics/{metric}")
        # our starting API endpoint
        self._start_url=APIwrapper._access_point+url_path
        self._filters=None
        self._page_size=-1
        # will contain the number of items
        self.count=None

    def get_page(self, filters={}, page_size=5):
        """ Access the API and download the next page of data. Sets the count
        attribute to the total number of items available for this query. Changing
        filters or page_size will cause get_page to restart from page 1. Rate
        limited to three request per second. The page_size parameter sets the number
        of data points in one response page (maximum 365); use the default value 
        for debugging your structure and filters. """
        # Check page size is within range
        if page_size>365:
            raise ValueError("Max supported page size is 365")
        # restart from first page if page or filters have changed
        if filters!=self._filters or page_size!=self._page_size:
            self._filters=filters
            self._page_size=page_size
            self._next_url=self._start_url
        # signal the end of data condition
        if self._next_url==None: 
            return [] # we already fetched the last page
        # simple rate limiting to avoid bans
        curr_time=time.time() # Unix time: number of seconds since the Epoch
        deltat=curr_time-APIwrapper._last_access
        if deltat<0.33: # max 3 requests/second
            time.sleep(0.33-deltat)
        APIwrapper._last_access=curr_time
        # build parameter dictionary by removing all the None
        # values from filters and adding page_size
        parameters={x: y for x, y in filters.items() if y!=None}
        parameters['page_size']=page_size
        # the page parameter is already included in _next_url.
        # This is the API access. Response is a dictionary with various keys.
        # the .json() method decodes the response into Python object (dictionaries,
        # lists; 'null' values are translated as None).
        response = requests.get(self._next_url, params=parameters).json()
        # update url so we'll fetch the next page
        self._next_url=response['next']
        self.count=response['count']
        # data are in the nested 'results' list
        return response['results'] 

    def get_all_pages(self, filters={}, page_size=365):
        """ Access the API and download all available data pages of data. Sets the count
        attribute to the total number of items available for this query. API access rate
        limited to three request per second. The page_size parameter sets the number
        of data points in one response page (maximum 365), and controls the trade-off
        between time to load a page and number of pages; the default should work well 
        in most cases. The number of items returned should in any case be equal to 
        the count attribute. """
        data=[] # build up all data here
        while True:
            # use get_page to do the job, including the pacing
            next_page=self.get_page(filters, page_size)
            if next_page==[]:
                break # we are done
            data.extend(next_page)
        return data

In [None]:
#Setting the url structure to access data
london_structure={"theme": "infectious_disease", 
           "sub_theme": "respiratory",
           "topic": "COVID-19",
           "geography_type": "Lower%20Tier%20Local%20Authority"}

#Creating list of all london boroughs to be added to url structure
borough_list=["Barking%20and%20Dagenham","Barnet","Bexley","Brent","Bromley","Camden","City%20of%20London","Croydon","Ealing","Enfield","Greenwich","Hackney","Hammersmith%20and%20Fulham","Haringey",
    "Harrow","Havering","Hillingdon","Hounslow","Islington","Kensington%20and%20Chelsea","Kingston%20upon%20Thames","Lambeth","Lewisham","Merton","Newham","Redbridge","Richmond%20upon%20Thames",
    "Southwark","Sutton","Tower%20Hamlets","Waltham%20Forest","Wandsworth","Westminster"]

#Creating the list of metrics i want to look at
metric_list=["COVID-19_cases_casesByDay", "COVID-19_deaths_ONSByWeek"]

for b in borough_list: 
        for m in metric_list:
            london_structure["geography"]=b
            london_structure["metric"]=m
            api=APIwrapper(**london_structure)
            ls=api.get_all_pages()
            print(f"Data points expected: {api.count}")
            print(f"Data points retrieved: {len(ls)}")
        
            with open(('data/' + b + '_' + m + '.json'), 'wt') as OUTF:
                json.dump(ls, OUTF)



Use this visualisation to:

Identify boroughs with higher or lower mortality trends over time

Observe overall declines following key public health interventions

Compare how different parts of London were affected across pandemic phases

In [3]:
# Load JSON files and store the raw data in some variable. Edit as appropriate
jsondata=os.listdir('data/')

In [4]:

#Copying borough list here for accessing files
borough_list=["Barking%20and%20Dagenham","Barnet","Bexley","Brent","Bromley","Camden","City%20of%20London","Croydon","Ealing","Enfield","Greenwich","Hackney","Hammersmith%20and%20Fulham","Haringey",
    "Harrow","Havering","Hillingdon","Hounslow","Islington","Kensington%20and%20Chelsea","Kingston%20upon%20Thames","Lambeth","Lewisham","Merton","Newham","Redbridge","Richmond%20upon%20Thames",
    "Southwark","Sutton","Tower%20Hamlets","Waltham%20Forest","Wandsworth","Westminster"]
#Copying metric list to access files
metric_list=["COVID-19_deaths_ONSByWeek"]

death_borough={} #Creating dictionary to store deaths for all boroughs


for b in borough_list:
        with open('data/' + b + '_COVID-19_deaths_ONSByWeek.json' , 'rt') as INFILE:
            # case_borough[b]=json.load(INFILE)
            file = json.load(INFILE)

            ALL_DATA = []


            #Creating a loop to get the date, metric and metric value for all the boroughs in covid deaths
            for entry in file:
                date=entry.get('date')
                metric=entry.get('metric')
                value=entry.get('metric_value')
                
                tup=(metric,value,date) 
                ALL_DATA.append(tup)
                
            death_borough[b]=ALL_DATA 

#Data is stored as a dictionary, every borough is a key, in which the values are a list of tuples containing the date and number of deaths

#Removing duplicate week
for b in death_borough:
    if len(death_borough[b]) > 1 and death_borough[b][0] == death_borough[b][1]:
        death_borough[b] = death_borough[b][1:]


#Extracting data
dates=[]
for b in death_borough:

    for t in death_borough[b]:
        date=t[2]
        if date not in dates:
            dates.append(date)
dates.sort()
dates


#Converting dates to panda types
def parse_date(datestring):
    """ Convert a date string into a pandas datetime object """
    return pd.to_datetime(datestring, format="%Y-%m-%d")

#Converting start and end dates to panda type
startdate=parse_date(dates[0])
enddate=parse_date(dates[-1])
print('Covid death data from' ,startdate, 'to', enddate)

#Define  DataFrame. Define the DateFrame by specifying its index and the title of its columns.
index=pd.date_range(startdate, enddate, freq='W-MON')

boroughtimeseriesdf=pd.DataFrame(index=index)



#Creating loop to add all my boroughs to the graph
for x, y in death_borough.items():
    metric_list=[]
    for t in y:
        metrics =t[1]
        metric_list.append(metrics)
    boroughtimeseriesdf[x]=metric_list

#fill in any remaining "holes" due to missing dates
boroughtimeseriesdf.fillna(0.0, inplace=True)

#Creating interactive plot

series=wdg.SelectMultiple(
    options=borough_list,
    value=borough_list,
    rows=5,
    description='Boroughs:',
    disabled=False
)

#Creating widget to set start date
start_scale=wdg.DatePicker(
    description='Start Date',
    value=startdate,
    disabled=False
)
#Creating widget to set end date
end_scale=wdg.DatePicker(
    description='End Date',
    value=enddate,
    disabled=False
)

reset_button = wdg.Button(
    description='Reset dates',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to reset dates',
    icon='check' # (FontAwesome names without the `fa-` prefix)
)


def reset(x):
        start_scale.value=startdate
        end_scale.value=enddate

reset_button.on_click(reset)

controls=wdg.HBox([series, start_scale, end_scale, reset_button])


def timeseries_graph(gcols, start_gscale, end_gscale):

    ncols=len(gcols)
    print("Click to select data for graph")
    print("(CTRL-Click to select more than one category)")
    ax=boroughtimeseriesdf[list(gcols)].plot(figsize=(13,5))
    ax.set_xlim(xmin=start_gscale, xmax=end_gscale)
    ax.set_title('Covid deaths across the London Boroughs');
    ax.legend(prop={'size': 7}, bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()  # adjusts plot to fit labels and legend
    plt.show() # important - graphs won't update if this is missing

# keep calling timeseries_graph(gcols=value_of_series, gscale=value_of_scale); 
# capture output in widget graph   
graph=wdg.interactive_output(timeseries_graph, {'gcols': series, 'start_gscale':start_scale, 'end_gscale':end_scale})

display(controls, graph)


# putting the wrangling code into a function allows you to call it again after refreshing the data through 
# the API. You should call the function directly on the JSON data when the dashboard starts, by including 
# the call in this cell as below:


Covid death data from 2019-12-30 00:00:00 to 2023-12-25 00:00:00


HBox(children=(SelectMultiple(description='Boroughs:', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14…

Output()

Data source: Office for National Statistics (ONS), Covid-19 Mortality Data.
Note: Values represent registered deaths involving Covid-19 and may be subject to revision.