# ONS Core

Created by Michael George (AKA Logiqx)

Website: https://logiqx.github.io/covid-stats/

## Imports

Standard python libraries plus determination of projdir, basic printable class, etc

In [1]:
import os
from datetime import date, datetime, timedelta

import numpy as np

import unittest

import common_core

## Configuration

Data to download from the NHS statistical work area

In [2]:
# The latest ONS age bands
ageDemographics = [
    '<1', '1-4', '5-9', '10-14', '15-19', '20-24', '25-29',
    '30-34', '35-39', '40-44', '45-49', '50-54', '55-59',
    '60-64', '65-69', '70-74', '75-79', '80-84', '85-89', '90+'
]

# The legacy ONS age bands
legacyAgeDemographics = [
    '01-14', '15-44', '45-64', '65-74', '75-84', '85+'
]

## Constants

Text strings to avoid hard-coded values throughout the code; avoids clutter and silent errors.

In [3]:
# Index from Fri 2 Jan 1970
minWeek = date(1970, 1, 2)
maxWeek = date(datetime.now().year, 12, 31)

In [4]:
# Weeks have an end date and a number, always ending on Fridays
WEEK_ENDED = "week_ended"
WEEK_NUMBER = "week_number"

# Internal names used by the cache
WEEK_COL_NOS = "week_col_nos"
WEEK_NUMBERS = "week_numbers"
WEEK_ENDINGS = "week_endings"
WEEK_OFFSETS = "week_offsets"

# Deaths are reported by registration date and occurrence date
TOTAL_REGISTRATIONS = "total_registrations"
TOTAL_OCCURRENCES = "total_occurrences"
COVID_REGISTRATIONS = "covid_registrations"
COVID_OCCURRENCES = "covid_occurrences"
COVID_UNDERLYING = "covid_underlying"

In [5]:
ONS_DEATHS = "ons-deaths"
ONS_LAHB_DEATHS = "ons-lahb-deaths"
ONS_EWM_DEATHS = "ons-ewm-deaths"

## Date Helper Functions

Functions to calculate week endings, etc.

In [6]:
def getFirstFriday(year):
    '''Get the first Friday of the year'''

    # Week 1 ends on/after Jan 2nd
    epoch = datetime.strptime(f"{year}-01-02", '%Y-%m-%d').date()
    while epoch.weekday() != 4:
        epoch += timedelta(days=1)

    return epoch

In [7]:
class TestGetFirstFriday(unittest.TestCase):
    '''Class to test getFirstFriday function'''

    def test(self):
        '''Test range of years'''

        # Test with 12 years, rather than a full 28 year cycle
        self.assertEqual(getFirstFriday(2010), date(2010, 1, 8))
        self.assertEqual(getFirstFriday(2011), date(2011, 1, 7))
        self.assertEqual(getFirstFriday(2012), date(2012, 1, 6))
        self.assertEqual(getFirstFriday(2013), date(2013, 1, 4))
        self.assertEqual(getFirstFriday(2014), date(2014, 1, 3))
        self.assertEqual(getFirstFriday(2015), date(2015, 1, 2))
        self.assertEqual(getFirstFriday(2016), date(2016, 1, 8))
        self.assertEqual(getFirstFriday(2017), date(2017, 1, 6))
        self.assertEqual(getFirstFriday(2018), date(2018, 1, 5))
        self.assertEqual(getFirstFriday(2019), date(2019, 1, 4))
        self.assertEqual(getFirstFriday(2020), date(2020, 1, 3))
        self.assertEqual(getFirstFriday(2021), date(2021, 1, 8))

In [8]:
def getWeekEnded(year, weekNo):
    '''Get the Friday for specific week ended in a given year'''
    
    weekEnded = getFirstFriday(year)
    weekEnded += timedelta(weeks=weekNo - 1)
    
    return(weekEnded.strftime("%Y-%m-%d"))

In [9]:
class TestGetWeekEnded(unittest.TestCase):
    '''Class to test getWeekEnded function'''

    def testWeek1(self):
        '''Test week 1'''

        self.assertEqual(getWeekEnded(2010, 1), '2010-01-08')
        self.assertEqual(getWeekEnded(2011, 1), '2011-01-07')
        self.assertEqual(getWeekEnded(2012, 1), '2012-01-06')
        self.assertEqual(getWeekEnded(2013, 1), '2013-01-04')
        self.assertEqual(getWeekEnded(2014, 1), '2014-01-03')
        self.assertEqual(getWeekEnded(2015, 1), '2015-01-02')
        self.assertEqual(getWeekEnded(2016, 1), '2016-01-08')
        self.assertEqual(getWeekEnded(2017, 1), '2017-01-06')
        self.assertEqual(getWeekEnded(2018, 1), '2018-01-05')
        self.assertEqual(getWeekEnded(2019, 1), '2019-01-04')
        self.assertEqual(getWeekEnded(2020, 1), '2020-01-03')
        self.assertEqual(getWeekEnded(2021, 1), '2021-01-08')

    def testWeek52(self):
        '''Test week 52'''

        self.assertEqual(getWeekEnded(2010, 52), '2010-12-31')
        self.assertEqual(getWeekEnded(2011, 52), '2011-12-30')
        self.assertEqual(getWeekEnded(2012, 52), '2012-12-28')
        self.assertEqual(getWeekEnded(2013, 52), '2013-12-27')
        self.assertEqual(getWeekEnded(2014, 52), '2014-12-26')
        self.assertEqual(getWeekEnded(2015, 52), '2015-12-25')
        self.assertEqual(getWeekEnded(2016, 52), '2016-12-30')
        self.assertEqual(getWeekEnded(2017, 52), '2017-12-29')
        self.assertEqual(getWeekEnded(2018, 52), '2018-12-28')
        self.assertEqual(getWeekEnded(2019, 52), '2019-12-27')
        self.assertEqual(getWeekEnded(2020, 52), '2020-12-25')
        self.assertEqual(getWeekEnded(2021, 52), '2021-12-31')

    def testWeek53(self):
        '''Test week 53'''

        self.assertEqual(getWeekEnded(2009, 53), '2010-01-01')
        self.assertEqual(getWeekEnded(2015, 53), '2016-01-01')
        self.assertEqual(getWeekEnded(2020, 53), '2021-01-01')

In [10]:
def checkWeekNo(weekEnded, weekNo):
    '''Check that the week number matches the week ended date'''
    
    year = int(weekEnded[:4])

    # Jan 1st cannot be week 1 and is week 53 of the previous year
    if weekEnded[:-5] == '01-01':
        year -= 1
    
    if getWeekEnded(year, weekNo) != weekEnded:
        raise RuntimeError
        
    return True

In [11]:
class TestCheckWeekNo(unittest.TestCase):
    '''Class to test checkWeekNo function'''

    def testGood(self):
        '''Test some bad dates'''

        self.assertEqual(checkWeekNo('2020-09-18', 38), True)

    def testBad(self):
        '''Test some bad dates'''

        self.assertRaises(RuntimeError, checkWeekNo, '2020-09-18', 37)
        self.assertRaises(RuntimeError, checkWeekNo, '2020-09-18', 39)

## Load CSV Files

Load CSV data into cache

In [12]:
def loadCsvFile(folder, period, areaType, areaName, verbose=common_core.verbose):
    '''Load weekly registrations and occurrences for a single area into cache'''

    csvPath = os.path.join(common_core.dataDir, folder, "csv", period, areaType)
    csvFn = os.path.join(csvPath, common_core.getSafeName(areaName) + ".csv")

    if os.path.exists(csvFn):
        data = common_core.loadCsvIntoArray(csvFn, verbose = verbose)
    else:
        data = None

    return data


def loadCsvFiles(folder, period, verbose=common_core.verbose):
    '''Load weekly registrations and occurrences for all areas into cache'''

    cache = {}

    for nationName in common_core.nationNames:
        data = loadCsvFile(folder, period, "nation", nationName, verbose = verbose)
        if data is not None:
            cache[nationName] = data

    for regionName in common_core.regionNames:
        data = loadCsvFile(folder, period, "region", regionName, verbose = verbose)
        if data is not None:
            cache[regionName] = data

    return cache

## NumPy Helper Functions

Useful functionality such as moving average or rolling sum

In [13]:
def shiftRegistrations(data):
    """Shift registration data left by half a period"""

    # Final value is invalid (so not included in the convolution result) and needs to be zero
    result = np.append(np.convolve(data, np.array([0.5, 0.5]), mode="valid"), 0)

    return result

In [14]:
class TestShiftRegistrations(unittest.TestCase):
    '''Class to test rollingSum function'''

    def testShift(self):
        '''Test processing of a list shorter than the window size'''

        actual = shiftRegistrations(np.arange(6))
        expected = np.array([0.5, 1.5, 2.5, 3.5, 4.5, 0])

        self.assertEqual((actual == expected).all(), True)

## Automated Testing

In [15]:
if __name__ == '__main__':

    unittest.main(argv=['first-arg-is-ignored'], exit=False)

.......
----------------------------------------------------------------------
Ran 7 tests in 0.017s

OK
