# PHE Download

Created by Michael George (AKA Logiqx)

Website: https://logiqx.github.io/covid-stats/

In [1]:
import os, sys

projdir = os.path.realpath(os.path.join(sys.path[0], '..'))

In [2]:
class Printable:
    def __repr__(self):
        return str(self.__class__) + ": " + str(self.__dict__)

    def __str__(self):
        return str(self.__class__) + ": " + str(self.__dict__)

In [3]:
import requests
import json
import csv

ENDPOINT = "https://api.coronavirus.data.gov.uk/v1/data"

class Area(Printable):
    def __init__(self, areaType, areaName):
        """Initialisise the area object"""

        self.areaType = areaType
        self.areaName = areaName
        self.csvName = areaName.lower().replace(' ', '_') + '.csv'
        
        for word in 'of', 'and', 'the':
            self.csvName = self.csvName.replace(word + '_', '')


    def download(self, dirName, demographics):
        """Download demographic data - cases or deaths"""

        try:
            filters = [
                f"areaType={self.areaType}",
                f"areaName={self.areaName}"
            ]

            structure = {
                "date": "date",
                "areaName": "areaName",
                demographics: demographics,
            }

            api_params = {
                "filters": str.join(";", filters),
                "structure": json.dumps(structure, separators=(",", ":")),
                "format": "csv"
            }

            # Download the raw data (hybrid of CSV and Python)
            response = requests.get(ENDPOINT, params=api_params, timeout=10)
            assert response.status_code == 200, f"Failed request for {fmt}: {response.text}"

            # Ensure the raw path exists
            rawPath = os.path.join(projdir, "data", "phe-dashboard", "raw", dirName, "demographics", self.areaType)
            if not os.path.exists(rawPath):
                os.makedirs(rawPath)

            # Save the raw data
            rawFn = os.path.join(rawPath, self.csvName)
            with open(rawFn, 'w') as f:
                f.write(response.content.decode())

            # Ensure the CSV path exists
            csvPath = os.path.join(projdir, "data", "phe-dashboard", "csv", dirName, "demographics", self.areaType)
            if not os.path.exists(csvPath):
                os.makedirs(csvPath)

            # Generate the CSV from raw data
            csvFn = os.path.join(csvPath, self.csvName)
            with open(csvFn, 'w') as csvFile:
                writer = csv.writer(csvFile)
                with open(rawFn, 'r') as f:
                    reader = csv.reader(f, delimiter = ',')
                    rowNo = 0
                    for row in reader:
                        if rowNo == 0:
                            colNames = row[:-1]
                        else:
                            tidyRow = []
                            for field in row:
                                if field[0] != '[':
                                    tidyRow.append(field)
                                else:
                                    items = eval(field)
                                    for item in items:
                                        age = item['age']
                                        for colName in item:
                                            if colName != 'age':
                                                tidyRow.append(item[colName])
                                                if rowNo == 1:
                                                    colName += '_' + age
                                                    colNames.append(colName)
                            if rowNo == 1:
                                writer.writerow(colNames)
                            writer.writerow(tidyRow)

                        rowNo += 1

        # General catch all
        except:
            print(f"Failed to retrieve {dirName} for {self.areaName}")
            raise


    def downloadCases(self):
        """Download cases by specimen date age demographics"""

        self.download("cases", "newCasesBySpecimenDateAgeDemographics")
            

    def downloadDeaths(self):
        """Download deaths within 28 days of positive test by date of death age demographics"""

        self.download("deaths", "newDeaths28DaysByDeathDateAgeDemographics")

In [4]:
regionNames = ["North West", "North East", "Yorkshire and The Humber",
             "West Midlands", "East Midlands",
             "East of England", "London", "South East"]

ltlaNames = ["Dorset", "Bournemouth, Christchurch and Poole",
             "Stevenage", "Welwyn Hatfield", "North Hertfordshire", "East Hertfordshire",
             "Sandwell", "Dudley", "Birmingham",
             "Derbyshire Dales", "North East Derbyshire", "High Peak", "Sheffield",
             "Croydon"]

# Download demographic data for England
areaName = "England"
print(f"Downloading {areaName}...")
area = Area("nation", areaName)
area.downloadCases()
area.downloadDeaths()

# Download demographic data for the regions in England
for regionName in regionNames:
    print(f"Downloading {regionName}...")
    area = Area("region", regionName)
    area.downloadCases()
    area.downloadDeaths()

# Download demographic data for the regions in England
for ltlaName in ltlaNames:
    print(f"Downloading {ltlaName}...")
    area = Area("ltla", ltlaName)
    area.downloadCases()

print("All done!")

Downloading England...
Downloading North West...
Downloading North East...
Downloading Yorkshire and The Humber...
Downloading West Midlands...
Downloading East Midlands...
Downloading East of England...
Downloading London...
Downloading South East...
Downloading Dorset...
Downloading Bournemouth, Christchurch and Poole...
Downloading Stevenage...
Downloading Welwyn Hatfield...
Downloading North Hertfordshire...
Downloading East Hertfordshire...
Downloading Sandwell...
Downloading Dudley...
Downloading Birmingham...
Downloading Derbyshire Dales...
Downloading North East Derbyshire...
Downloading High Peak...
Downloading Sheffield...
Downloading Croydon...
All done!
