# ONS Convert Daily

Created by Michael George (AKA Logiqx)

Website: https://logiqx.github.io/covid-stats/

## Imports

Standard python libraries plus determination of projdir, basic printable class, etc

In [1]:
import os
from datetime import datetime, timedelta

import csv
from xlrd import open_workbook

import common_core
import ons_core

## Daily Deaths - User Requested Data

Load daily occurrence data into the cache as a means of converting from XLSX to CSV.

In [2]:
def processDailySheet(cache, sheet, verbose=common_core.verbose):
    '''Process daily occurrences in worksheet'''

    headerFound = False

    for rowNo in range(sheet.nrows):
        if headerFound == False:
            # Primitive check to determine presence of header
            if (sheet.cell(rowNo, 0).value == "Year" and
                sheet.cell(rowNo, 1).value == "Month" and
                sheet.cell(rowNo, 2).value == "Day" and
                sheet.cell(rowNo, 3).value == "Region" and
                sheet.cell(rowNo, 4).value == "Deaths"):
                headerFound = True
        else:
            # Stop at the first empty row
            if sheet.cell(rowNo, 0).value == "":
                break

            # Extract data from current row
            year, month, day = int(sheet.cell(rowNo, 0).value), int(sheet.cell(rowNo, 1).value), int(sheet.cell(rowNo, 2).value)
            areaCode = sheet.cell(rowNo, 3).value
            deaths = int(sheet.cell(rowNo, 4).value)

            # Handle non-standard nation codes such as W99999999
            if areaCode in common_core.nationMappings:
                areaCode = common_core.nationMappings[areaCode]

            # Initialise cache if necessary
            if areaCode not in cache:
                cache[areaCode] = []

            # Store row in cache
            cache[areaCode].append([f"{year:04}-{month:02}-{day:02}", deaths])


def validateDailyCache(cache, areaCode, verbose=common_core.verbose):
    '''Sort the daily occurrences for the specified area and report non-contiguous dates'''

    if verbose:
        print(f"Validating daily cache for {areaCode}...")

    # Sort data chronologically
    cache[areaCode].sort()

    # Ensure that dates are contiguous
    prevDate = None
    for record in cache[areaCode]:
        currDate = datetime.strptime(record[0], '%Y-%m-%d')

        if prevDate and currDate != prevDate + timedelta(days=1):
            raise RuntimeError(f"Non-contiguous dates for {areaCode} - {record[0]}")

        prevDate = currDate


def loadDailyDeaths(verbose=common_core.verbose):
    '''Load available daily deaths into cache.'''

    rawPath = os.path.join(common_core.dataDir, ons_core.ONS_DEATHS, "raw", "daily")

    cache = {}

    for baseName in os.listdir(rawPath):
        # Ignore temporary files related to spreadhseets that are open
        if not baseName.startswith("~$"):
            fileName = os.path.join(rawPath, baseName)
            if verbose:
                print(f"Loading {baseName}...")
            workbook = open_workbook(fileName)

            for sheet in workbook.sheets():
                if sheet.name.lower().startswith("regional"):
                    processDailySheet(cache, sheet, verbose=verbose)

    for areaCode in cache:
        validateDailyCache(cache, areaCode, verbose=verbose)

    return cache

In [3]:
def saveDailyArea(cache, areaType, areaName, areaCode, verbose=common_core.verbose):
    '''Save data in cache to CSV'''

    if areaCode in cache:
        if verbose:
            print(f"Saving {areaName}...")

        # Ensure CSV path exists
        csvPath = os.path.join(common_core.dataDir, ons_core.ONS_DEATHS, "csv", "daily", areaType)
        if not os.path.exists(csvPath):
            os.makedirs(csvPath)

        # Determine safe filename
        csvFn = os.path.join(csvPath, common_core.getSafeName(areaName) + ".csv")

        # Save data to CSV
        with open(csvFn, 'w') as csvFile:
            writer = csv.writer(csvFile)

            colNames = ["date", ons_core.TOTAL_OCCURRENCES]
            writer.writerow(colNames)

            writer.writerows(cache[areaCode])


def saveDailyDeaths(cache, verbose=common_core.verbose):
    '''Save all extracted data to CSV'''

    for nationCode in common_core.nations:
        nationName = common_core.nations[nationCode]
        saveDailyArea(cache, "nation", nationName, nationCode, verbose=verbose)

    for regionCode in common_core.regions:
        regionName = common_core.regions[regionCode]
        saveDailyArea(cache, "region", regionName, regionCode, verbose=verbose)

## Daily Deaths - Excess Winter Mortaility Bulletins

Load daily occurrence data into the cache as a means of converting from XLSX to CSV.

In [4]:
def processDailyEwmSheet(cache, nationName, sheet, verbose=common_core.verbose):
    '''Process daily occurrences in worksheet'''

    headerFound = False

    for rowNo in range(sheet.nrows):
        if headerFound == False:
            # Primitive check to determine presence of header
            if (sheet.cell(rowNo, 0).value == "Date" and
                sheet.cell(rowNo, 1).value == "Number of daily deaths"):
                headerFound = True
        else:
            # Stop at the first empty row
            if sheet.cell(rowNo, 0).value == "":
                break

            # Date format can vary from year to year in the EWM spreadsheets
            value = sheet.cell(rowNo, 0).value
            try:
                if "/" in value:
                    if len(value) == 10:
                        ymd = datetime.strptime(value, '%d/%m/%Y')
                    else:
                        ymd = datetime.strptime(value, '%d/%m/%y')
                elif " " in value:
                    ymd = datetime.strptime(value, '%d %b %y')
                else:
                    raise RuntimeError(f"Unsupported date format {value}")
            except:
                print(f"Error trying to parse {value}")
                raise

            deaths = int(sheet.cell(rowNo, 1).value)

            # Initialise cache if necessary
            if nationName not in cache:
                cache[nationName] = []

            # Store row in cache
            cache[nationName].append([ymd.strftime("%Y-%m-%d"), deaths])


def validateDailyEwmCache(cache, nationName, verbose=common_core.verbose):
    '''Sort the daily occurrences for the specified area and report non-contiguous dates'''

    if verbose:
        print(f"Validating daily cache for {nationName}...")

    # Sort data chronologically
    cache[nationName].sort()

    # Ensure that dates are contiguous
    prevDate = None
    for record in cache[nationName]:
        currDate = datetime.strptime(record[0], '%Y-%m-%d')

        if prevDate and currDate != prevDate + timedelta(days=1):
            raise RuntimeError(f"Non-contiguous dates for {nationName} - {record[0]}")

        prevDate = currDate


def loadDailyEwmDeaths(verbose=common_core.verbose):
    '''Load available daily deaths into cache.'''

    rawPath = os.path.join(common_core.dataDir, ons_core.ONS_EWM_DEATHS, "raw", "daily")

    cache = {}

    for baseName in os.listdir(rawPath):
        # Ignore temporary files related to spreadhseets that are open
        if not baseName.startswith("~$"):
            if common_core.ENGLAND in baseName:
                nationName = common_core.ENGLAND
            elif common_core.WALES in baseName:
                nationName = common_core.WALES
            else:
                raise RuntimeError(f"Unrecognised filename {baseName}")

            fileName = os.path.join(rawPath, baseName)
            if verbose:
                print(f"Loading {baseName}...")
            workbook = open_workbook(fileName)

            for sheet in workbook.sheets():
                processDailyEwmSheet(cache, nationName, sheet, verbose=verbose)

    for nationName in cache:
        validateDailyEwmCache(cache, nationName, verbose=verbose)

    return cache

In [5]:
def saveDailyEwmNation(cache, nationName, verbose=common_core.verbose):
    '''Save data in cache to CSV'''

    if nationName in cache:
        if verbose:
            print(f"Saving {nationName}...")

        # Ensure CSV path exists
        csvPath = os.path.join(common_core.dataDir, ons_core.ONS_EWM_DEATHS, "csv", "daily", "nation")
        if not os.path.exists(csvPath):
            os.makedirs(csvPath)

        # Determine safe filename
        csvFn = os.path.join(csvPath, common_core.getSafeName(nationName) + ".csv")

        # Save data to CSV
        with open(csvFn, 'w') as csvFile:
            writer = csv.writer(csvFile)

            colNames = ["date", ons_core.TOTAL_OCCURRENCES]
            writer.writerow(colNames)

            writer.writerows(cache[nationName])


def saveDailyEwmDeaths(cache, verbose=common_core.verbose):
    '''Save all extracted data to CSV'''

    for nationName in common_core.nationNames:
        saveDailyEwmNation(cache, nationName, verbose=verbose)

## Interactive Usage

In [6]:
if __name__ == '__main__':

    verbose = True

    # One-off conversion of daily occurrences (1970-01-01 to 2018-12-31)
    cache = loadDailyDeaths(verbose=verbose)
    saveDailyDeaths(cache, verbose=verbose)
    if verbose:
        print()

    # One-off conversion of daily EWM occurrences (2018-08-01 to 2020-07-31)
    cache = loadDailyEwmDeaths(verbose=verbose)
    saveDailyEwmDeaths(cache, verbose=verbose)
    if verbose:
        print()
    
    # All done!
    if verbose:
        print("All done!")

Loading Daily death occurrences, 2015-2018.xlsx...
Loading Daily death occurrences, 1970-2014.xlsx...
Validating daily cache for E12000001...
Validating daily cache for E12000002...
Validating daily cache for E12000003...
Validating daily cache for E12000004...
Validating daily cache for E12000005...
Validating daily cache for E12000006...
Validating daily cache for E12000007...
Validating daily cache for E12000008...
Validating daily cache for E12000009...
Validating daily cache for W92000004...
Saving Wales...
Saving North East...
Saving North West...
Saving Yorkshire and The Humber...
Saving East Midlands...
Saving West Midlands...
Saving East of England...
Saving London...
Saving South East...
Saving South West...

Loading Figure_3b__The_number_of_daily_winter_deaths_in_Wales_2018_2019.xls...
Loading Figure_3a__The_number_of_daily_winter_deaths_in_England_2019_2020.xls...
Loading Figure_3b__The_number_of_daily_winter_deaths_in_Wales_2019_2020.xls...
Loading Figure_3a__The_number_of