# ONS Local Authority + Health Board Deaths

Created by Michael George (AKA Logiqx)

Website: https://logiqx.github.io/covid-stats/

## Imports

Standard python libraries plus determination of projdir, basic printable class, etc

In [6]:
import os
from datetime import date, datetime, timedelta
import time

import csv
from xlrd import open_workbook

import ons_core
import ons_download
import common_core

## Configuration

Data to download from the NHS statistical work area

In [7]:
ONS_LOOKUPS = "ons-lookups"
ONS_LAHB_DEATHS = "ons-lahb-deaths"

NATION_CODE_ENGLAND_WALES = 'K04000001'
NATION_CODE_ENGLAND = 'E92000001'
NATION_CODE_WALES = 'W92000004'

## Convert Spreadsheets

In [8]:
def getFirstFriday(year):
    '''Get the first Firday of the year'''

    # Week 1 ends on/after Jan 2nd
    epoch = datetime.strptime(f"{year}-01-02", '%Y-%m-%d').date()
    while epoch.weekday() != 4:
        epoch += timedelta(days=1)

    return epoch


def getWeekEnded(year, weekNo):
    '''Get the Friday for specific week ended in a given year'''
    
    weekEnded = getFirstFriday(year)
    weekEnded += timedelta(weeks=weekNo - 1)
    
    return(weekEnded)

In [9]:
try:
    if workbookCache:
        pass
except:
    workbookCache = {}


def loadLocalAuthorityLookup(verbose=common_core.verbose):
    '''Load local authority lookup into dictionary'''
    
    localAuthorityLookup = {}

    csvPath = os.path.join(common_core.dataDir, ONS_LOOKUPS, "csv")
    csvFn = os.path.join(csvPath, "local_authority_region_2020_12.csv")

    if verbose:
        print(f"Loading {csvPath}...")

    with open(csvFn, 'r') as f:
        reader = csv.reader(f, delimiter = ',')
        
        # Skip header
        next(reader)
        
        # Process records
        for row in reader:
            localAuthority = row[0].strip()
            region = row[1].strip()
            
            if region in common_core.regions:
                localAuthorityLookup[localAuthority] = region
            else:
                raise RuntimeError(f"Unrecognised region {region}")
            
    return localAuthorityLookup


def loadWorkbook(partName, verbose=common_core.verbose):
    '''Load workbook into memory'''
    
    if partName in workbookCache:
        if verbose:
            print(f"Using cached {partName}...")
        workbook = workbookCache[partName]
    else:
        fileName = os.path.join(common_core.dataDir, partName)
        if verbose:
            print(f"Loading {partName}...")
            start = time.time()
        workbook = open_workbook(fileName)
        if verbose:
            end = time.time()
            print(f"Took {(end - start):.2f} seconds...")

        workbookCache[partName] = workbook
        
    return workbook


def updateRegionData(regionData, category, partName, localAuthorityLookup, verbose=common_core.verbose):
    '''Process workbook for local authority data'''
    
    workbook = workbookCache[partName]

    if '2020' in partName:
        year = 2020
    else:
        year = 2021

    sheetName = f'{category.capitalize()} - All data'
    found = False

    for sheet in workbook.sheets():
        if sheet.name == sheetName:
            for rowNo in range(5):
                for colNo in range(sheet.ncols):
                    if sheet.cell(rowNo, colNo).value == 'Area code':
                        colNoAreaCode = colNo
                        headerRowNo = rowNo
                    elif sheet.cell(rowNo, colNo).value == 'Geography type':
                        colNoGeographyType = colNo
                    elif sheet.cell(rowNo, colNo).value == 'Cause of death':
                        colNoCauseDeath = colNo
                    elif sheet.cell(rowNo, colNo).value == 'Week number':
                        colWeekNumber = colNo
                    elif sheet.cell(rowNo, colNo).value == 'Number of deaths':
                        colNumDeaths = colNo

            for rowNo in range(headerRowNo + 1, sheet.nrows):
                areaCode = sheet.cell(rowNo, colNoAreaCode).value
                if areaCode.startswith('W'):
                    regionCode = NATION_CODE_WALES
                elif areaCode in localAuthorityLookup:
                    regionCode = localAuthorityLookup[areaCode]
                else:
                    raise RuntimeError(f"Unrecognised area code {areaCode}")

                geographyType = sheet.cell(rowNo, colNoGeographyType).value
                if geographyType == 'Local Authority':
                    skip = False
                elif geographyType == 'Health Board':
                    skip = True
                else:
                    raise RuntimeError(f"Unrecognised geography type {geographyType}")

                if not skip:
                    causeDeath = sheet.cell(rowNo, colNoCauseDeath).value
                    if causeDeath == "All causes":
                        metric = f"total_{category}"
                    elif causeDeath == "COVID 19":
                        metric = f"covid_{category}"
                    else:
                        raise RuntimeError(f"Unrecognised cause of death {causeDeath}")

                    weekNumber = int(sheet.cell(rowNo, colWeekNumber).value)
                    numDeaths = int(sheet.cell(rowNo, colNumDeaths).value)

                    if regionCode not in regionData:
                        regionData[regionCode] = {}

                    if year not in regionData[regionCode]:
                        regionData[regionCode][year] = {}

                    if weekNumber not in regionData[regionCode][year]:
                        regionData[regionCode][year][weekNumber] = {}

                    if metric not in regionData[regionCode][year][weekNumber]:
                        regionData[regionCode][year][weekNumber][metric] = numDeaths
                    else:
                        regionData[regionCode][year][weekNumber][metric] += numDeaths

            found = True
            
    if not found:
        raise RuntimeError(f"Could not find {sheetName}")

    return regionData
    

def saveAreaData(regionType, regions, regionData):
    '''Save region data in CSV format'''
    
    currWeekNumber = 1
    currWeekEnded = getWeekEnded(2020, currWeekNumber)

    for regionCode in regionData:
        if regionCode in regions:
            regionName = regions[regionCode]
            
            csvPath = os.path.join(common_core.dataDir, ONS_LAHB_DEATHS, "csv", "weekly", regionType)
            if not os.path.exists(csvPath):
                os.makedirs(csvPath)
            
            csvFn = os.path.join(csvPath, common_core.getSafeName(regionName) + '.csv')

            # Save data to CSV
            with open(csvFn, 'w') as csvFile:
                writer = csv.writer(csvFile)

                colNames = ["week_ended", "week_number",
                            "total_registrations", "total_occurrences",
                            "covid_registrations", "covid_occurrences"]
                writer.writerow(colNames)

                years = [*regionData[regionCode]]
                years.sort()
                
                for year in years:
                    weekNumbers = [*regionData[regionCode][year]]
                    weekNumbers.sort()

                    for weekNumber in weekNumbers:
                        if weekNumber != currWeekNumber:
                            currWeekNumber = weekNumber
                            currWeekEnded = getWeekEnded(year, currWeekNumber)

                        weekEnded = regionData[regionCode][year][currWeekNumber]

                        totalRegistrations = weekEnded[ons_core.TOTAL_REGISTRATIONS]
                        totalOccurrences = weekEnded[ons_core.TOTAL_OCCURRENCES]
                        covidRegistrations = weekEnded[ons_core.COVID_REGISTRATIONS]
                        covidOccurrences = weekEnded[ons_core.COVID_OCCURRENCES]

                        colValues = [currWeekEnded, currWeekNumber,
                                     totalRegistrations, totalOccurrences,
                                     covidRegistrations, covidOccurrences]

                        writer.writerow(colValues)

                        
def calculateNationData(regionData):
    '''Calculate national figures'''

    nationData = {}

    for regionCode in regionData:
        if regionCode == NATION_CODE_WALES:
            nationCode = NATION_CODE_WALES
        else:
            nationCode = NATION_CODE_ENGLAND

        if NATION_CODE_ENGLAND_WALES not in nationData:
            nationData[NATION_CODE_ENGLAND_WALES] = {}

        if nationCode not in nationData:
            nationData[nationCode] = {}

        years = [*regionData[regionCode]]
        years.sort()

        for year in years:
            if year not in nationData[NATION_CODE_ENGLAND_WALES]:
                nationData[NATION_CODE_ENGLAND_WALES][year] = {}

            if year not in nationData[nationCode]:
                nationData[nationCode][year] = {}

            weekNumbers = [*regionData[regionCode][year]]
            weekNumbers.sort()

            for weekNumber in weekNumbers:
                if weekNumber not in nationData[NATION_CODE_ENGLAND_WALES][year]:
                    nationData[NATION_CODE_ENGLAND_WALES][year][weekNumber] = \
                    {
                        ons_core.TOTAL_REGISTRATIONS: 0,
                        ons_core.TOTAL_OCCURRENCES: 0,
                        ons_core.COVID_REGISTRATIONS: 0,
                        ons_core.COVID_OCCURRENCES: 0
                    }

                if weekNumber not in nationData[nationCode][year]:
                    nationData[nationCode][year][weekNumber] = \
                    {
                        ons_core.TOTAL_REGISTRATIONS: 0,
                        ons_core.TOTAL_OCCURRENCES: 0,
                        ons_core.COVID_REGISTRATIONS: 0,
                        ons_core.COVID_OCCURRENCES: 0
                    }

                weekEndedRegion = regionData[regionCode][year][weekNumber]

                weekEndedNation = nationData[NATION_CODE_ENGLAND_WALES][year][weekNumber]
                weekEndedNation[ons_core.TOTAL_REGISTRATIONS] += weekEndedRegion[ons_core.TOTAL_REGISTRATIONS]
                weekEndedNation[ons_core.TOTAL_OCCURRENCES] += weekEndedRegion[ons_core.TOTAL_OCCURRENCES]
                weekEndedNation[ons_core.COVID_REGISTRATIONS] += weekEndedRegion[ons_core.COVID_REGISTRATIONS]
                weekEndedNation[ons_core.COVID_OCCURRENCES] += weekEndedRegion[ons_core.COVID_OCCURRENCES]

                weekEndedNation = nationData[nationCode][year][weekNumber]
                weekEndedNation[ons_core.TOTAL_REGISTRATIONS] += weekEndedRegion[ons_core.TOTAL_REGISTRATIONS]
                weekEndedNation[ons_core.TOTAL_OCCURRENCES] += weekEndedRegion[ons_core.TOTAL_OCCURRENCES]
                weekEndedNation[ons_core.COVID_REGISTRATIONS] += weekEndedRegion[ons_core.COVID_REGISTRATIONS]
                weekEndedNation[ons_core.COVID_OCCURRENCES] += weekEndedRegion[ons_core.COVID_OCCURRENCES]

    return nationData


def main(verbose=common_core.verbose):
    '''Do the main processing'''

    localAuthorityLookup = loadLocalAuthorityLookup(verbose=verbose)
    
    partNames = ons_download.downloadLocalAuthorityDeaths(verbose=verbose)
    regionData = {}
    for partName in partNames:
        workbook = loadWorkbook(partName, verbose=verbose)
        regionData = updateRegionData(regionData, 'registrations', partName, localAuthorityLookup, verbose=verbose)
        regionData = updateRegionData(regionData, 'occurrences', partName, localAuthorityLookup, verbose=verbose)
        
    nationData = calculateNationData(regionData)

    saveAreaData("region", common_core.regions, regionData)
    saveAreaData("nation", common_core.nations, nationData)
    
    if verbose:
        print('All done!')

## Running Interactively

In [10]:
if __name__ == '__main__':

    main(verbose = True)

Loading /home/jovyan/work/covid-stats/data/ons-lookups/csv...
Skipping download of ons-lahb-deaths/raw/weekly/lahbtables2021week9.xlsx...
Skipping download of ons-lahb-deaths/raw/weekly/lahbtablesweek01to532020datawk92021.xlsx...
Using cached ons-lahb-deaths/raw/weekly/lahbtables2021week9.xlsx...
Using cached ons-lahb-deaths/raw/weekly/lahbtablesweek01to532020datawk92021.xlsx...
All done!
