# ONS Population

Created by Michael George (AKA Logiqx)

Website: https://logiqx.github.io/covid-stats/

## Imports

Standard python libraries plus determination of projdir, basic printable class, etc

In [1]:
import os
from datetime import date

import unittest

import csv
import numpy as np

import common_core

## Configuration

Data to download from the NHS statistical work area

In [2]:
ONS_POPULATION = "ons-population"

   ## Common Functions

In [3]:
def getCsvPath():
    """Determine CSV path for ONS population data"""

    csvPath = os.path.join(common_core.dataDir, ONS_POPULATION, "csv", "persons")

    return csvPath

## Create Charts

In [4]:
class Population(common_core.Printable):
    """Population class containing UK estimates at detailed level"""
    
    def __init__(self):
        """Basic initialisation"""
        
        self.years = {}


    def calcRange(self, data, start, stop):
        """Calculate total across a range of ages"""

        result = np.copy(data[str(start)])

        for age in range(start + 1, stop):
            result += data[str(age)]

        return result


    def indexYear(self, year, verbose=common_core.verbose):
        """Index population data in memory"""

        self.years[year]["areas"] = {}
        
        # Index individual regions
        data = self.years[year]["raw"]
        for record in data:
            code = record["code"]
            self.years[year]["areas"][code] = {"raw": record, "age_bands": {}}
        
        # Create index for 5 year age bands
        for age in range(0, 90, 5):
            combined = self.calcRange(data, age, age + 5)

            for i in range(len(data)):
                code = data[i]["code"]
                self.years[year]["areas"][code]["age_bands"][f"{age:02d}_{age + 4:02d}"] = combined[i]

        # Finish 5 year age bands with the over 90s
        for i in range(len(data)):
            code = data[i]["code"]
            self.years[year]["areas"][code]["age_bands"]["90"] = data[i]["90"]

        # Create index for NHS age bands
        for ageBand in [(0, 5), (6, 17), (18, 64), (65, 84)]:
            combined = self.calcRange(data, ageBand[0], ageBand[1] + 1)

            for i in range(len(data)):
                code = data[i]["code"]
                self.years[year]["areas"][code]["age_bands"][f"{ageBand[0]:02d}_{ageBand[1]:02d}"] = combined[i]

        # Finish NHS age bands with the over 85s
        for i in range(len(data)):
            code = data[i]["code"]
            self.years[year]["areas"][code]["age_bands"]["85"] = self.years[year]["areas"][code]["age_bands"]["85_89"] + \
                                                                 self.years[year]["areas"][code]["age_bands"]["90"]


    def loadYear(self, year, verbose=common_core.verbose):
        """Load population data into memory"""

        fileName = os.path.join(getCsvPath(), str(year) + ".csv")
        partName = common_core.getPartName(fileName)

        try:
            if verbose:
                print(f"Loading {partName}...")

            with open(fileName, 'r') as f:
                reader = csv.reader(f, delimiter = ',')

                dtype = []
                converters = {}
                colNames = next(reader)

                for i in range(len(colNames)):
                    colName = colNames[i].lower()
                    if colName == "code":
                        dtype.append((colName, "U9"))
                    elif colName == "name":
                        dtype.append((colName, "U40"))
                    elif colName == "geography":
                        dtype.append((colName, "U30"))
                    else:
                        dtype.append((colName, "u4"))
                        converters[i] = lambda s: int(s or 0)

                # Since genfromtxt does not support quote characters, continue to use the CSV reader
                data = np.genfromtxt(("\t".join(i) for i in reader), dtype=dtype, converters=converters, delimiter="\t")

            # Store the data
            self.years[year] = {"raw": data}
            
            # Create indices
            self.indexYear(year, verbose)

        # General catch all
        except:
            print(f"Failed to load {partName}")
            raise


    def loadYears(self, minYear=None, maxYear=None, limit=100, verbose=common_core.verbose):
        """Load data from CSV files by scanning folder"""
        
        if minYear is None:
            minYear = 1900

        if maxYear is None:
            maxYear = date.today().year

        csvPath = getCsvPath()
        for csvFile in os.listdir(csvPath)[-limit:]:
            year = int(csvFile[:4])
            if year >= minYear and year <= maxYear:
                self.loadYear(year, verbose)


    def getPopulation(self, code, ageBand, year=None):
        """Get population for a specific year, code and age band"""
        
        if year == None:
            year = max(self.years)

        return self.years[year]["areas"][code]["age_bands"][ageBand]

In [5]:
class TestPopulation(unittest.TestCase):
    '''Class to test population class'''   

    def test_load_limited_years(self):
        '''Test use of previous year 5 years'''
        
        self.population = Population()
        
        self.population.loadYears(limit=5)
        
        self.assertEqual(len(self.population.years), 5)


    def test_load_specific_years(self):
        '''Test use of previous year 5 years'''
        
        self.population = Population()
        
        minYear = 2010
        maxYear = 2014
        self.population.loadYears(minYear=minYear, maxYear=maxYear)
        
        self.assertEqual([*self.population.years], [*range(minYear, maxYear + 1)])


    def test_ew_00_04_2019(self):
        '''Test use of previous year (ONS + PHE)'''
        
        self.population = Population()

        year = 2019
        self.population.loadYear(year)
        
        self.assertEqual(self.population.getPopulation("K04000001", "00_04", year), 3465179)


    def test_ew_00_05_2019(self):
        '''Test use of previous year (NHS)'''
        
        self.population = Population()

        year = 2019
        self.population.loadYear(year)
        
        self.assertEqual(self.population.getPopulation("K04000001", "00_05", year), 4191496)


    def test_ew_65_84_2019(self):
        '''Test use of previous year - 65-84 (NHS)'''
        
        self.population = Population()

        year = 2019
        self.population.loadYear(year)
        
        self.assertEqual(self.population.getPopulation("K04000001", "65_84", year), 9534647)


    def test_ew_80_84_2019(self):
        '''Test use of previous year - 80-84 (ONS + PHE)'''
        
        self.population = Population()

        year = 2019
        self.population.loadYear(year)
        
        self.assertEqual(self.population.getPopulation("K04000001", "80_84", year), 1529682)


    def test_ew_85_2019(self):
        '''Test use of previous year - 85+ (NHS)'''
        
        self.population = Population()

        year = 2019
        self.population.loadYear(year)
        
        self.assertEqual(self.population.getPopulation("K04000001", "85", year), 1481445)

        
    def test_ew_90_2019(self):
        '''Test use of previous year - 90+ (ONS + PHE)'''
        
        self.population = Population()

        year = 2019
        self.population.loadYear(year)
        
        self.assertEqual(self.population.getPopulation("K04000001", "90", year), 547789)

## Running Interactively

In [6]:
if __name__ == '__main__':

    unittest.main(argv=['first-arg-is-ignored'], exit=False)

........
----------------------------------------------------------------------
Ran 8 tests in 4.240s

OK
