# ONS Core

Created by Michael George (AKA Logiqx)

Website: https://logiqx.github.io/covid-stats/

## Imports

Standard python libraries plus determination of projdir, basic printable class, etc

In [1]:
import os
from datetime import datetime, timedelta

import csv
from xlrd import open_workbook

import common_core

## Configuration

Data to download from the NHS statistical work area

In [2]:
# The 4 nations in the UK
nationNames = common_core.nationNames

# The 9 regions in England
regionNames = common_core.regionNames

# Combine all of these area types into a single list
areas = [("nation", nationNames), ("region", regionNames)]

In [3]:
# The latest ONS age bands
ageDemographics = [
    '<1', '1-4', '5-9', '10-14', '15-19', '20-24', '25-29',
    '30-34', '35-39', '40-44', '45-49', '50-54', '55-59',
    '60-64', '65-69', '70-74', '75-79', '80-84', '85-89', '90+'
]

# The legacy ONS age bands
legacyAgeDemographics = [
    '01-14', '15-44', '45-64', '65-74', '75-84', '85+'
]

In [4]:
deathsUrl = "https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales"
deathsPath = os.path.join(common_core.projdir, "data", "ons-deaths", "raw")

deathsFiles = [
    ("weekly", ".*\.xlsx?$")
]

## Download Data

Download spreadsheets by parsing the HTML for suitable links

In [5]:
def downloadDeaths(skipExisting=common_core.skipExisting, verbose=common_core.verbose):
    webDownload = common_core.WebDownload(skipExisting=skipExisting, verbose=verbose)
    partNames = webDownload.downloadFiles(deathsPath, deathsUrl, deathsFiles)
    return partNames

## Convert to CSV

In [6]:
WEEK_NUMBER = "Week number"
WEEK_ENDED = "Week ended"

def findRowNos(sheet, headings, aliases):
    '''Find rows with the specified headings. Also check for possible aliases.'''
    rowNos = {}
    for heading in headings:
        headingLower = heading.lower()

        if heading in aliases:
            aliasesLower = [alias.lower() for alias in aliases[heading]]
        else:
            aliasesLower = []

        for rowNo in range(sheet.nrows):
            for colNo in range(3):
                cellValue = sheet.cell(rowNo, colNo).value

                if isinstance(cellValue, str):
                    cellValueLower = cellValue.lower()
                    if cellValueLower == headingLower or cellValueLower in aliasesLower:
                        rowNos[heading] = rowNo
                        break

    return rowNos 


def getWeekCols(sheet, rowNo):
    '''Determine the week numbers from the cells in the specified row.'''
    weekCols = []
    nextWeekNo = 1
    for colNo in range(sheet.ncols):
        cellValue = sheet.cell(rowNo, colNo).value
        if cellValue == nextWeekNo:
            weekCols.append(colNo)
            nextWeekNo += 1

    return weekCols


def getWeekEndings(sheet, rowNo, colNos):
    '''Determine the week endings from the cells in the specified row.'''
    epoch = datetime(1900, 1, 1)
    weekEndings = []
    for colNo in colNos:
        cellValue = sheet.cell(rowNo, colNo).value
        if isinstance(cellValue, str):
            weekEnding = datetime.strptime(cellValue, '%d-%b-%y')
            weekEndings.append(weekEnding)
        else:
            weekEnding = epoch + timedelta(days=cellValue - 2)
            weekEndings.append(weekEnding)

    return weekEndings


def getWeeklyDeaths(sheet, rowNo, colNos):
    '''Determine the weekly deaths from the cells in the specified row.'''
    numDeaths = []
    for colNo in colNos:
        cellValue = sheet.cell(rowNo, colNo).value
        numDeaths.append(int(cellValue))

    return numDeaths


def processSheet(sheet):
    '''Parse the specified worksheet for weekly deaths.'''
    cells = 0

    headings = [WEEK_NUMBER, WEEK_ENDED] + common_core.regionNames
    rowNos = findRowNos(sheet, headings, common_core.regionAliases)
    
    weekCols = getWeekCols(sheet, rowNos[WEEK_NUMBER])
    assert len(weekCols) == 52 or len(weekCols) == 53, "Number of weeks was not 52 or 53"

    weekEndings = getWeekEndings(sheet, rowNos[WEEK_ENDED], weekCols)
    assert len(weekEndings) == len(weekCols), "Number of week endings did not match number of weeks"
    
    for regionName in common_core.regionNames:
        weeklyDeaths = getWeeklyDeaths(sheet, rowNos[regionName], weekCols)
        assert len(weeklyDeaths) == len(weekCols), f"{regionName} did not have the correct number of weeks"


def convertDeaths(partNames):
    '''Parse the specified spreadsheet for weekly deaths.'''
    for partName in partNames:
        fileName = os.path.join(common_core.projdir, "data", "ons-deaths", "raw", partName)
        print(f"Parsing {partName}...")
        workbook = open_workbook(fileName)

        for sheet in workbook.sheets():           
            if sheet.name.lower().startswith("weekly figures"):
                processSheet(sheet)

In [7]:
partNames = downloadDeaths()

convertDeaths(partNames)

Parsing weekly/publishedweek532020.xlsx...
Parsing weekly/publishedweek522019.xls...
Parsing weekly/publishedweek522018withupdatedrespiratoryrow.xls...
Parsing weekly/publishedweek522017.xls...
Parsing weekly/publishedweek522016.xls...
Parsing weekly/publishedweek2015.xls...
Parsing weekly/publishedweek2014.xls...
Parsing weekly/publishedweek2013.xls...
Parsing weekly/publishedweek2012.xls...
Parsing weekly/publishedweek2011.xls...
Parsing weekly/publishedweek2010.xls...
