# ONS Charts

Created by Michael George (AKA Logiqx)

Website: https://logiqx.github.io/covid-stats/

## Imports

Standard python libraries plus determination of projdir, basic printable class, etc

In [1]:
import os
from datetime import date, datetime, timedelta

import unittest

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as tck

import common_core
import ons_core

## NumPy Helper Functions

Useful functionality such as moving average or rolling sum

In [2]:
def shiftRegistrations(data):
    """Shift registration data left by half a period"""
    
    # Final value is invalid (so not included in the convolution result) and needs to be zero
    result = np.append(np.convolve(data, np.array([0.5, 0.5]), mode="valid"), 0)
    
    return result

In [3]:
class TestShiftRegistrations(unittest.TestCase):
    '''Class to test rollingSum function'''   

    def testShift(self):
        '''Test processing of a list shorter than the window size'''

        actual = shiftRegistrations(np.arange(6))
        expected = np.array([0.5, 1.5, 2.5, 3.5, 4.5, 0])

        self.assertEqual((actual == expected).all(), True)

In [4]:
def getEstimatedOccurrences(cache, verbose=common_core.verbose):
    """Calculate missing values in cache"""

    estimates = {}
    
    # Use the ONS occurrences for England and Wales
    masterArea = common_core.ENGLAND_WALES

    # Non-COVID registrations for the master area, shifted left by ~3.5 days
    knownRegistrations = shiftRegistrations(cache[masterArea][ons_core.TOTAL_REGISTRATIONS] -
                                            cache[masterArea][ons_core.COVID_REGISTRATIONS])

    # Non-COVID occurrences for the master area without any shift
    knownOccurrences =  cache[masterArea][ons_core.TOTAL_OCCURRENCES] - cache[masterArea][ons_core.COVID_OCCURRENCES]

    # Run this process for all regions and nations other than the master area
    for areaName in common_core.regionNames + common_core.nationNames:
        if areaName != masterArea and areaName in cache:
            
            # Shift registrations left by half a week
            shiftedRegistrations = shiftRegistrations(cache[areaName][ons_core.TOTAL_REGISTRATIONS] -
                                                     cache[areaName][ons_core.COVID_REGISTRATIONS])

            # Estimate occurrences using a simple percentage of the known occurrences
            estimatedOccurrences = knownOccurrences * np.divide(shiftedRegistrations, knownRegistrations,
                                      out=np.zeros_like(shiftedRegistrations), where=knownRegistrations != 0) + \
                                    cache[areaName][ons_core.COVID_OCCURRENCES]
            
            # Locate the last week in the cache where total_occurrences is populated
            cacheIdx = np.where(cache[areaName][ons_core.TOTAL_OCCURRENCES] > 0)[0][-1]
            
            # Patch subsequent data using the estimates
            cache[areaName][cacheIdx + 1:][ons_core.TOTAL_OCCURRENCES] = estimatedOccurrences[cacheIdx + 1:]
            
            # Maintain a cache of estimates for testing purpopses
            estimates[areaName] = estimatedOccurrences
            
    return estimates

In [5]:
def calculateErrors(cache, estimates, verbose=common_core.verbose):
    """Calculate estimation errors"""

    masterArea = common_core.ENGLAND_WALES

    for areaName in common_core.regionNames + common_core.nationNames:
        if areaName in cache and areaName != masterArea:
            print(f"{areaName}:")
            
            totalPctMAE = 0
        
            years = range(2010, 2018)
            
            for year in years:
                startDate = f"{year}-01-01"
                stopDate = f"{year}-12-31"

                areaData = cache[areaName]

                startIdx = np.where(areaData[ons_core.WEEK_ENDED] >= startDate)[0][0]
                stopIdx = np.where(areaData[ons_core.WEEK_ENDED] < stopDate)[0][-1]

                pctMAE = 100 * np.average(np.abs(areaData[startIdx:stopIdx][ons_core.TOTAL_OCCURRENCES] -
                                            areaData[startIdx:stopIdx][ons_core.TOTAL_REGISTRATIONS].astype(np.float64)) /
                                        areaData[startIdx:stopIdx][ons_core.TOTAL_OCCURRENCES])

                pctMAE = 100 * np.average(np.abs(areaData[startIdx:stopIdx][ons_core.TOTAL_OCCURRENCES] -
                                            shiftRegistrations(areaData[startIdx:stopIdx][ons_core.TOTAL_REGISTRATIONS])) /
                                        areaData[startIdx:stopIdx][ons_core.TOTAL_OCCURRENCES])

                pctMAE = 100 * np.average(np.abs(areaData[startIdx:stopIdx][ons_core.TOTAL_OCCURRENCES] -
                                            estimates[areaName][startIdx:stopIdx]) /
                                        areaData[startIdx:stopIdx][ons_core.TOTAL_OCCURRENCES])

                totalPctMAE += pctMAE
                
                print(f"{year} = {pctMAE:.2f}%")

            print(f"Avg  = {totalPctMAE / len(years):.2f}%")
            print()

In [6]:
def calculateAverages(cache, areaName, verbose=common_core.verbose):
    """Calculate 5 year averages and ranges"""

    tmpMin = np.array([], dtype="u4")
    tmpMax = np.array([], dtype="u4")
    tmpAvg = np.array([])

    yearIdx = np.where(cache[areaName][ons_core.WEEK_NUMBER] == 1)[0]

    for i in range(5, len(yearIdx)):

        if i == len(yearIdx) - 1:
            numWeeks = len(cache[areaName]) - yearIdx[i]
        else:
            numWeeks = yearIdx[i + 1] - yearIdx[i]

        grid = np.vstack([
            cache[areaName][yearIdx[i - 5]:yearIdx[i - 5] + numWeeks][ons_core.TOTAL_OCCURRENCES],
            cache[areaName][yearIdx[i - 4]:yearIdx[i - 4] + numWeeks][ons_core.TOTAL_OCCURRENCES],
            cache[areaName][yearIdx[i - 3]:yearIdx[i - 3] + numWeeks][ons_core.TOTAL_OCCURRENCES],
            cache[areaName][yearIdx[i - 2]:yearIdx[i - 2] + numWeeks][ons_core.TOTAL_OCCURRENCES],
            cache[areaName][yearIdx[i - 1]:yearIdx[i - 1] + numWeeks][ons_core.TOTAL_OCCURRENCES]
        ])

        tmpMin = np.append(tmpMin, np.min(grid, axis = 0))
        tmpMax = np.append(tmpMax, np.max(grid, axis = 0))
        tmpAvg = np.append(tmpAvg, np.average(grid, axis = 0))

    minimums = np.concatenate((np.zeros(yearIdx[5], dtype="u4"), tmpMin))
    maximums = np.concatenate((np.zeros(yearIdx[5], dtype="u4"), tmpMax))
    averages = np.concatenate((np.zeros(yearIdx[5]), tmpAvg))

    return minimums, maximums, averages

## Plot Data

Simple plots of ONS data

In [7]:
def prunePoints(x_points, y_points):
    '''Remove leading and trailing zeros'''

    match = np.where(y_points > 0)[0]

    if len(match) > 0:
        start = match[0]
        end = match[-1]
    else:
        start = 0
        end = -1
        
    return x_points[start:end + 1], y_points[start:end + 1]
    

def updateAxisLabels(ax, xTickLabels, xMin, xMax, yMin, yMax):
    '''Update axis limits and labels'''

    # Determine x-axis tick interval
    tickInterval = len(xTickLabels) // 52
    if tickInterval == 0:
        tickInterval = 1

    # Update x-axis to use desired interval
    ax.set_xlim(xmin=xMin - (xMin - ax.get_xlim()[0]) / 5, xmax=xMax + (ax.get_xlim()[1] - xMax) / 5)
    ax.set_xticks(np.arange(0, len(xTickLabels), tickInterval))
    
    # Change the x-axis to shown the actual dates
    ax.set_xticklabels(xTickLabels[::tickInterval], rotation=90)

    # Determine y-axis tick interval
    yMax *= 1.1
    if yMax > 10000:
        tickInterval = yMax // 10 // 1000 * 1000
    elif yMax > 1000:
        tickInterval = yMax // 10 // 100 * 100
    else:
        tickInterval = yMax // 10

    # Update y-axis to use desired interval
    ax.set_ylim(ymin=0, ymax=yMax)
    ax.set_yticks(np.arange(0, yMax, tickInterval))
    
    # Ensure thousands are shown using commas
    ax.get_yaxis().set_major_formatter(tck.FuncFormatter(lambda x, p: format(int(x), ',')))

    # Assume all charts show weekly deaths
    ax.set_ylabel('Number of weekly deaths')

    
def addTextLegend(ax):
    """Add text boxes and legend to chart"""
    
    textStr = f'Created {datetime.now().strftime("%d %b")}\n@Mike_aka_Logiqx'
    ax.text(0.01, 0.04, textStr, transform=ax.transAxes, horizontalalignment='left', verticalalignment='bottom')

    ax.legend(loc = 'upper left', ncol=1)

In [8]:
def plotHistory(cache, estimates, minimums, maximums, averages, areaName, ax, verbose=common_core.verbose):
    '''Plot data for visual inspection'''
    
    areaData = cache[areaName]

    startDate = "2010-01-08"
    startIdx = np.where(areaData[ons_core.WEEK_ENDED] == startDate)[0][0]
    stopIdx = np.where(areaData[ons_core.TOTAL_REGISTRATIONS] > 0)[0][-1]

    # Total occurrences
    y_points = areaData[ons_core.TOTAL_OCCURRENCES][startIdx:stopIdx]
    x_points = np.arange(len(y_points))       
    x_points, y_points = prunePoints(x_points, y_points)
    xMin, xMax = min(x_points), max(x_points)
    yMin, yMax = min(y_points), max(y_points)
    ax.plot(x_points, y_points, label = "Total deaths - all causes", color="navy")

    # Average of past 5 years
    y_points = averages[startIdx:stopIdx]
    x_points = np.arange(len(y_points))       
    x_points, y_points = prunePoints(x_points, y_points)
    ax.plot(x_points, y_points, label = "Average of past 5 years", color="navy", linestyle="dotted")

    # Max + min of past 5 years
    y1_points = minimums[startIdx:stopIdx]
    y2_points = maximums[startIdx:stopIdx]
    x_points = np.arange(len(y1_points))       
    x_points, y1_points = prunePoints(x_points, y1_points)
    x_points = np.arange(len(y2_points))       
    x_points, y2_points = prunePoints(x_points, y2_points)
    ax.fill_between(x_points, y1_points, y2_points, label = "Max + min of past 5 years", color="lavender")

    # Chart titles and labels
    ax.set_title(f"Weekly Deaths in {areaName} since January 2010")

    # Add note about date of occurrence
    lastDate = datetime.strptime(areaData[ons_core.WEEK_ENDED][stopIdx - 1], "%Y-%m-%d").strftime("%A %-d %B %Y")
    textStr = f'All figures are based date of occurrence\nShowing deaths up to {lastDate}'
    ax.text(0.5, 0.98, textStr, transform=ax.transAxes, horizontalalignment='center', verticalalignment='top')

    # Source can go in a different place, depending on the chart
    textStr = 'Source: Office for National Statistics'
    ax.text(0.99, 0.04, textStr, transform=ax.transAxes, horizontalalignment='right', verticalalignment='bottom')

    # Update the x-axis and y-axis
    xTickLabels = []
    for weekEnded in areaData[ons_core.WEEK_ENDED][startIdx:stopIdx]:
        formattedDate = datetime.strptime(weekEnded, "%Y-%m-%d").strftime("%-d %b %y")
        xTickLabels.append(formattedDate)
    updateAxisLabels(ax, xTickLabels, xMin, xMax, yMin, yMax)

    # Add text and legend
    addTextLegend(ax)

In [9]:
def plotLatest(cache, covid, estimates, minimums, maximums, averages, areaName, ax, verbose=common_core.verbose):
    '''Plot data for visual inspection'''
    
    areaData = cache[areaName]

    startDate = "2020-01-03"
    startIdx = np.where(areaData[ons_core.WEEK_ENDED] == startDate)[0][0]
    stopIdx = np.where(areaData[ons_core.TOTAL_REGISTRATIONS] > 0)[0][-1]

    covidStartIdx = np.where(covid[ons_core.WEEK_ENDED] == startDate)[0][0]
    covidStopIdx = np.where(covid[ons_core.COVID_REGISTRATIONS] > 0)[0][-1] + 1
    
    # Determine ratios where COVID-19 is the underlying cause
    covidUnderlying = covid[covidStartIdx:covidStopIdx][ons_core.COVID_UNDERLYING].astype(np.float64)
    covidRegistrations = covid[covidStartIdx:covidStopIdx][ons_core.COVID_REGISTRATIONS].astype(np.float64)
    underlying = np.divide(covidUnderlying, covidRegistrations,
                           out=np.zeros_like(covidUnderlying), where=covidRegistrations != 0)

    xMin = yMin = stopIdx
    xMax = yMax = 0

    # Total occurrences
    y_points = areaData[ons_core.TOTAL_OCCURRENCES][startIdx:stopIdx]
    x_points = np.arange(len(y_points))       
    x_points, y_points = prunePoints(x_points, y_points)
    xMin, xMax = min(x_points), max(x_points)
    yMin, yMax = min(y_points), max(y_points)
    ax.plot(x_points, y_points, label = "Total deaths - all causes", color="navy")

    # COVID-19 not underlying
    y_points = areaData[ons_core.TOTAL_OCCURRENCES][startIdx:stopIdx] - \
                areaData[ons_core.COVID_OCCURRENCES][startIdx:stopIdx] * underlying
    x_points = np.arange(len(y_points))       
    x_points, y_points = prunePoints(x_points, y_points)
    ax.plot(x_points, y_points, label = "COVID-19 is not underlying", color="royalblue")

    # COVID-19 not mentioned
    y_points = areaData[ons_core.TOTAL_OCCURRENCES][startIdx:stopIdx] - \
                areaData[ons_core.COVID_OCCURRENCES][startIdx:stopIdx]
    x_points = np.arange(len(y_points))       
    x_points, y_points = prunePoints(x_points, y_points)
    ax.plot(x_points, y_points, label = "COVID-19 is not mentioned", color="deepskyblue")

    # COVID-19 mentioned
    y_points = areaData[ons_core.COVID_OCCURRENCES][startIdx:stopIdx]
    x_points = np.arange(len(y_points))       
    x_points, y_points = prunePoints(x_points, y_points)
    ax.plot(x_points, y_points, label = "COVID-19 is mentioned", color="darkorange")

    # COVID-19 underlying
    y_points = areaData[ons_core.COVID_OCCURRENCES][startIdx:stopIdx] * underlying
    x_points = np.arange(len(y_points))       
    x_points, y_points = prunePoints(x_points, y_points)
    ax.plot(x_points, y_points, label = "COVID-19 is underlying", color="red")

    # Average of past 5 years
    y_points = averages[startIdx:stopIdx]
    x_points = np.arange(len(y_points))       
    x_points, y_points = prunePoints(x_points, y_points)
    ax.plot(x_points, y_points, label = "Average of past 5 years", color="navy", linestyle="dotted")

    # Max + min of past 5 years
    y1_points = minimums[startIdx:stopIdx]
    y2_points = maximums[startIdx:stopIdx]
    x_points = np.arange(len(y1_points))       
    x_points, y1_points = prunePoints(x_points, y1_points)
    x_points = np.arange(len(y2_points))       
    x_points, y2_points = prunePoints(x_points, y2_points)
    ax.fill_between(x_points, y1_points, y2_points, label = "Max + min of past 5 years", color="lavender")

    # Chart titles and labels
    ax.set_title(f"Weekly Deaths in {areaName} since January 2020")

    # Add note about date of occurrence
    lastDate = datetime.strptime(areaData[ons_core.WEEK_ENDED][stopIdx - 1], "%Y-%m-%d").strftime("%A %-d %B %Y")
    textStr = f'All figures are based date of occurrence\nShowing deaths up to {lastDate}'
    ax.text(0.5, 0.98, textStr, transform=ax.transAxes, horizontalalignment='center', verticalalignment='top')

    # Source can go in a different place, depending on the chart
    textStr = 'Source: Office for National Statistics'
    ax.text(0.99, 0.98, textStr, transform=ax.transAxes, horizontalalignment='right', verticalalignment='top')

    # Update the x-axis and y-axis
    xTickLabels = []
    for weekEnded in areaData[ons_core.WEEK_ENDED][startIdx:stopIdx]:
        formattedDate = datetime.strptime(weekEnded, "%Y-%m-%d").strftime("%-d %b %y")
        xTickLabels.append(formattedDate)
    updateAxisLabels(ax, xTickLabels, xMin, xMax, yMin, yMax)

    # Add text and legend
    addTextLegend(ax)

In [10]:
def plotDelays(cache, areaName, ax, verbose=common_core.verbose):
    '''Plot data for visual inspection'''
    
    fields = [
        {
            "name": ons_core.TOTAL_OCCURRENCES,
            "label": "Total occurrences",
            "color": "navy",
            "linestyle": "solid",
            "extra" : 0
        },
        {
            "name": ons_core.TOTAL_REGISTRATIONS,
            "label": "Total registrations",
            "color": "cornflowerblue",
            "linestyle": "dotted",
            "extra" : 1
        },
        {
            "name": ons_core.COVID_OCCURRENCES,
            "label": "COVID-19 occurrences",
            "color": "red",
            "linestyle": "solid",
            "extra" : 0
        },
        {
            "name": ons_core.COVID_REGISTRATIONS,
            "label": "COVID-19 registrations",
            "color": "lightcoral",
            "linestyle": "dotted",
            "extra" : 1
        }
    ]
    
    areaData = cache[areaName]

    startDate = "2020-01-03"
    startIdx = np.where(areaData[ons_core.WEEK_ENDED] == startDate)[0][0]
    stopIdx = np.where(areaData[ons_core.TOTAL_REGISTRATIONS] > 0)[0][-1]

    xMin = yMin = stopIdx
    xMax = yMax = 0
    
    # Counts direct from the ONS
    for field in fields:
        y_points = areaData[field["name"]][startIdx:stopIdx + field["extra"]]
        x_points = np.arange(len(y_points))       
        x_points, y_points = prunePoints(x_points, y_points)
        ax.plot(x_points, y_points, label = field["label"], color=field["color"], linestyle=field["linestyle"])
        if len(x_points) > 0:
            xMin = min(xMin, min(x_points))
            yMin = min(yMin, min(y_points))
            xMax = max(xMax, max(x_points))
            yMax = max(yMax, max(y_points))

    # Chart titles and labels
    ax.set_title(f"Weekly Deaths in {areaName} since January 2020")

    # Add note about date of occurrence
    lastDate = datetime.strptime(areaData[ons_core.WEEK_ENDED][stopIdx], "%Y-%m-%d").strftime("%A %-d %B %Y")
    textStr = f'Showing deaths registered up to {lastDate}'
    ax.text(0.5, 0.98, textStr, transform=ax.transAxes, horizontalalignment='center', verticalalignment='top')

    # Source can go in a different place, depending on the chart
    textStr = 'Source: Office for National Statistics'
    ax.text(0.99, 0.98, textStr, transform=ax.transAxes, horizontalalignment='right', verticalalignment='top')

    # Update the x-axis and y-axis
    xTickLabels = []
    for weekEnded in areaData[ons_core.WEEK_ENDED][startIdx:stopIdx + 1]:
        formattedDate = datetime.strptime(weekEnded, "%Y-%m-%d").strftime("%-d %b %y")
        xTickLabels.append(formattedDate)
    updateAxisLabels(ax, xTickLabels, xMin, xMax, yMin, yMax)

    # Add text and legend
    addTextLegend(ax)

In [11]:
def legacy(cache):
    """Date ranges for some legacy testing"""
    
    # Check heatwave of August 2003 - https://www.eurosurveillance.org/content/10.2807/esm.10.07.00558-en
    startDate = '2003-07-01'
    stopDate = '2003-09-01'

    # Show reg delays
    startDate = date(2010, 1, 1).strftime('%Y-%m-%d')
    stopDate = date(2020, 1, 8)

    # Same range as error check
    startDate = '2014-01-01'
    stopDate = '2018-12-31'


def plotAreas(reportErrors = False, verbose=common_core.verbose):
    '''Plot data for visual inspection'''

    # Load all of the data from disk
    cache = ons_core.loadCsvFiles(ons_core.ONS_DEATHS, "weekly", verbose = False)
    covid = ons_core.loadCsvFile(ons_core.ONS_DEATHS, "weekly", "nation", "england_wales_covid", verbose=common_core.verbose)
    
    # Estimate the number of occurrences where necessary
    estimates = getEstimatedOccurrences(cache, verbose=verbose)
   
    # Shift COVID-19 registrations left so they can be used for occurrences
    covid[ons_core.COVID_REGISTRATIONS] = shiftRegistrations(covid[ons_core.COVID_REGISTRATIONS])
    covid[ons_core.COVID_UNDERLYING] = shiftRegistrations(covid[ons_core.COVID_UNDERLYING])

    # Evaluation of estimation errors
    if reportErrors:
        calculateErrors(cache, estimates, verbose=verbose)

    for areaName in common_core.nationNames + common_core.regionNames:
        if areaName in cache:
            
            minimums, maximums, averages = calculateAverages(cache, areaName, verbose=verbose)

            fig, axs = plt.subplots(3, figsize=(16, 18), dpi=150)

            plotHistory(cache, estimates, minimums, maximums, averages, areaName, axs[0], verbose=verbose)
            plotLatest(cache, covid, estimates, minimums, maximums, averages, areaName, axs[1], verbose=verbose)
            plotDelays(cache, areaName, axs[2], verbose=verbose)

            url = 'https://logiqx.github.io/covid-stats/weekly-deaths'
            textStr = f'All code and data used to create these charts can be found at {url}'
            fig.text(0.5, 0.05, textStr, horizontalalignment='center', verticalalignment='bottom')

            plt.subplots_adjust(hspace=0.4)
            plt.show(fig)

            imgPath = os.path.join(common_core.projdir, "docs", "weekly-deaths")
            imgName = common_core.getSafeName(areaName) + ".png"
            fig.savefig(os.path.join(imgPath, imgName), bbox_inches='tight')

## Automated Testing

In [12]:
if __name__ == '__main__':

    unittest.main(argv=['first-arg-is-ignored'], exit=False)

.
----------------------------------------------------------------------
Ran 1 test in 0.002s

OK


## Interactive Testing

In [13]:
if __name__ == '__main__':

    plotAreas(reportErrors = False, verbose = False)