# PHE Convert

Created by Michael George (AKA Logiqx)

Website: https://logiqx.github.io/covid-stats/

## Imports

Standard python libraries plus determination of projdir, basic printable class, etc

In [1]:
import os
import json
import csv

import common_core
import phe_core

## Configuration

Demographics fields are a special case and need to be expanded into multiple columns

In [2]:
demographicFields = [
    'cases', 'deaths', 'rollingRate', # newCasesBySpecimenDateAgeDemographics + newDeaths28DaysByDeathDateAgeDemographics
    'rate', 'value' # cumAdmissionsByAge
]

## Area Convert Class

Read data from the API and prepare it for analysis.

Supports nations, regions and LTLAs.

In [3]:
class AreaConvert(common_core.Printable):
    def __init__(self, areaType, areaName):
        """Initialisise the area object"""

        self.areaType = areaType
        self.areaName = areaName

        self.safeName = common_core.getSafeName(areaName)
        self.csvName = self.safeName + '.csv'
        

    def convert(self, category, period = "daily"):
        """Prepare data for analysis"""

        def getColNos(row, category):
            """Get column numbers relevant to the category"""

            if category == "cases":
                structure = phe_core.casesStructure
            elif category == "patients":
                structure = phe_core.patientsStructure
            elif category == "deaths":
                structure = phe_core.deathsStructure
            elif category == "ons":
                structure = phe_core.onsStructure
            else:
                raise ValueError(f"Unsupported category - {category}") 

            # Always include date and areaName - see default "structure" above
            colNos = [0, 1]

            # Other columns are dependent on the category and the fields defined in its "structure"
            for colNo in range(len(row)):
                if row[colNo] in structure:
                    # Age demographics are not available for deaths within "ltla"
                    if category == "cases":
                        if row[colNo].endswith("Demographics"):
                            if self.areaName == common_core.ENGLAND or self.areaType in ["region", "ltla"]:
                                colNos.append(colNo)
                        else:
                            colNos.append(colNo)
                    elif category == "patients":
                        if row[colNo] == "cumAdmissionsByAge":
                            if self.areaName == common_core.ENGLAND or self.areaType == "nhsregion":
                                colNos.append(colNo)
                        else:
                            colNos.append(colNo)
                    elif category == "deaths":
                        if row[colNo].endswith("Demographics"):
                            if self.areaName == common_core.ENGLAND or self.areaType == "region":
                                colNos.append(colNo)
                        else:
                            colNos.append(colNo)
                    else:
                        colNos.append(colNo)

            return colNos


        def getSpecialColNo(row, colNos):
            """Get position of special column - e.g. demographics"""

            # Use -1 to indicate not present
            specialColNo = -1

            for colNo in colNos:
                if row[colNo].endswith("Demographics"):
                    specialColNo = colNo
                    break
                elif row[colNo] == "cumAdmissionsByAge":
                    specialColNo = colNo
                    break
                
            return specialColNo


        def getColNames(row, colNos):
            """Get column names relevant to the category"""

            # Always include date and areaName
            colNames = []

            # Other columns are dependent on the category and its structure
            for colNo in colNos:
                # Demographics are provided as a Python structure
                if row[colNo].endswith("Demographics"):
                    # Only use the demographic fields that are actually required for cases and deaths
                    for ageDemographic in phe_core.ageDemographics:
                        colNames.append(f"{category}{ageDemographic}")
                        colNames.append(f"{category}RollingRate{ageDemographic}")
                elif row[colNo] == "cumAdmissionsByAge":
                    # Only use the demographic fields that are actually required for admissions
                    for ageDemographic in phe_core.admissionDemographics:
                        # Standardise format to match admissions and deaths
                        if "_" in ageDemographic:
                            parts = ageDemographic.split("_")
                            ageDemographic = f"{int(parts[0]):02}_{int(parts[2]):02}"

                        colNames.append(f"cumAdmissionsRollingRate{ageDemographic}")
                        colNames.append(f"cumAdmissions{ageDemographic}")
                else:
                    colNames.append(row[colNo])
                
            return colNames


        def getRowValues(row, colNos, specialColNo, demographics):
            """Get row values for specific field numbers"""

            tidyRow = []
            for colNo in colNos:
                if colNo == specialColNo:
                    if row[colNo].startswith('['):
                        items = eval(row[colNo])
                    else:
                        items = []
                    # Pick out the age demographics that are required
                    for demographic in demographics:
                        found = False
                        for item in items:
                            if item["age"] == demographic:
                                for demographicField in demographicFields:
                                    if demographicField in item:
                                        tidyRow.append(item[demographicField])
                                found = True
                                break

                        # Some records do not include the age demographics, just an empty list
                        if found == False:
                            tidyRow.append("")
                            tidyRow.append("")
                else:
                    # Copy value of regular column
                    tidyRow.append(row[colNo])

            return tidyRow


        # Catch all exceptions
        try:
            # Determine raw filename
            rawPath = os.path.join(common_core.dataDir, phe_core.PHE_DASHBOARD, "raw", period, self.areaType)
            rawFn = os.path.join(rawPath, self.csvName)

            # Ensure the CSV path exists
            csvPath = os.path.join(common_core.dataDir, phe_core.PHE_DASHBOARD, "csv", period, category, self.areaType)
            if not os.path.exists(csvPath):
                os.makedirs(csvPath)

            csvFn = os.path.join(csvPath, self.csvName)
            partName = common_core.getPartName(csvFn)

            print(f"Saving {partName}...")

            # Demographics vary for different categories
            if category == "patients":
                demographics = phe_core.admissionDemographics
            else:
                demographics = phe_core.ageDemographics

            # Generate the CSV from raw data
            with open(csvFn, 'w') as csvFile:
                writer = csv.writer(csvFile)

                with open(rawFn, 'r') as f:
                    reader = csv.reader(f, delimiter = ',')
                    rows = []
                    rowNo = 0

                    for row in reader:
                        if rowNo == 0:
                            # First row is column names
                            colNos = getColNos(row, category)
                            specialColNo = getSpecialColNo(row, colNos)
                            colNames = getColNames(row, colNos)
                        else:
                            # Subsequent rows are actual data
                            row = getRowValues(row, colNos, specialColNo, demographics)
                            rows.append(row)

                        rowNo += 1

                    # PHE publish data in reverse chronological order
                    rows.sort()

                    writer.writerow(colNames)
                    writer.writerows(rows)

        # General catch all to report exceptions then abort
        except:
            print(f"Failed to convert {period} {category} for {self.areaName}")
            raise


    def convertDaily(self):
        """Convert daily data to simple CSV prior to analysis"""

        # There is no daily case / death data for the "nhsregion" area type
        if self.areaType in ["overview", "nation", "region", "ltla"]:
            self.convert("cases")
            self.convert("deaths")

        # Patient data is only available for "nation" and "nhsregion", not "ltla"
        if self.areaType in ["overview", "nation", "nhsregion"]:
            self.convert("patients")


    def convertWeekly(self):
        """Convert weekly data to simple CSV prior to analysis"""

        if self.areaType in ["overview", "nation", "region", "ltla"]:
            self.convert("ons", "weekly")

## Interactive Testing

In [4]:
if __name__ == '__main__':

    print("--- Converting daily dashboard data ---")
    for areaType, areaNames in phe_core.areas:
        for areaName in areaNames:
            area = AreaConvert(areaType, areaName)
            area.convertDaily()

    print("\n--- Converting weekly dashboard data ---")
    for areaType, areaNames in phe_core.areas:
        for areaName in areaNames:
            area = AreaConvert(areaType, areaName)
            area.convertWeekly()

    print("\nAll done!")

--- Converting daily dashboard data ---
Saving phe-dashboard/csv/daily/cases/overview/united_kingdom.csv...
Saving phe-dashboard/csv/daily/deaths/overview/united_kingdom.csv...
Saving phe-dashboard/csv/daily/patients/overview/united_kingdom.csv...
Saving phe-dashboard/csv/daily/cases/nation/england.csv...
Saving phe-dashboard/csv/daily/deaths/nation/england.csv...
Saving phe-dashboard/csv/daily/patients/nation/england.csv...
Saving phe-dashboard/csv/daily/cases/nation/scotland.csv...
Saving phe-dashboard/csv/daily/deaths/nation/scotland.csv...
Saving phe-dashboard/csv/daily/patients/nation/scotland.csv...
Saving phe-dashboard/csv/daily/cases/nation/wales.csv...
Saving phe-dashboard/csv/daily/deaths/nation/wales.csv...
Saving phe-dashboard/csv/daily/patients/nation/wales.csv...
Saving phe-dashboard/csv/daily/cases/nation/northern_ireland.csv...
Saving phe-dashboard/csv/daily/deaths/nation/northern_ireland.csv...
Saving phe-dashboard/csv/daily/patients/nation/northern_ireland.csv...
Savi