# PHE Download

Created by Michael George (AKA Logiqx)

Website: https://logiqx.github.io/covid-stats/

## PHE Core

Import library for working with PHE data

In [1]:
import os
import time

import requests
import json

import common_core
import phe_core

## Configuration

In [2]:
ENDPOINT = "https://api.coronavirus.data.gov.uk/v1/data"

In [3]:
PHE_SURVEILLANCE = "phe-surveillance"
surveillancePath = os.path.join(common_core.dataDir, PHE_SURVEILLANCE, "raw")

surveillanceUrl = "https://www.gov.uk/government/statistics/national-flu-and-covid-19-surveillance-reports"
surveillanceFiles = [
    ("weekly", "Weekly_Influenza_and_COVID19_report_data_.*\.xlsx$")
]

legacySurveillanceUrl = "https://www.gov.uk/government/publications/national-covid-19-surveillance-reports"
legacySurveillanceFiles = [
    ("weekly", "Weekly_COVID19_report_data_.*\.xlsx$")
]

## Area Download

Class to download data via the API.

Supports nations, regions and LTLAs.

In [4]:
class AreaDownload(common_core.Printable):
    def __init__(self, areaType, areaName):
        """Initialisise the area object"""

        self.areaType = areaType
        self.areaName = areaName

        self.safeName = common_core.getSafeName(areaName)
        self.csvName = self.safeName + '.csv'
        

    def download(self, period = "daily"):
        """Download data from PHE dashboard"""

        # Catch all exceptions
        try:
            filters = [
                f"areaType={self.areaType}",
                f"areaName={self.areaName}"
            ]

            structure = {
                "date": "date",
                "areaName": "areaName"
            }

            if period == "weekly":
                if self.areaType in ['overview', 'nation', 'region', 'ltla']:
                    structure.update(phe_core.onsStructure)
            else:
                if self.areaType in ['overview', 'nation', 'region', 'ltla']:
                    structure.update(phe_core.casesStructure)
                    structure.update(phe_core.deathsStructure)
                if self.areaType in ['overview', 'nation', 'nhsregion']:
                    structure.update(phe_core.patientsStructure)

            api_params = {
                "filters": str.join(";", filters),
                "structure": json.dumps(structure, separators=(",", ":")),
                "format": "csv"
            }

            dirName = rawPath = os.path.join(common_core.dataDir, phe_core.PHE_DASHBOARD, "raw", period, self.areaType)
            fileName = os.path.join(dirName, self.csvName)
            partName = fileName.replace(common_core.dataDir, "")[1:]

            print(f"Downloading {partName}...")

            # Allow up to 3 attempts
            for attempt in range(3):
                try:
                    response = requests.get(ENDPOINT, params=api_params, timeout=10)
                    break
                except:
                    time.sleep(1)

            assert response.status_code == 200, f"Failed request for {self.areaName}: {response.status_code} {response.text}"

            if not os.path.exists(dirName):
                os.makedirs(dirName)

            with open(fileName, 'w') as f:
                f.write(response.content.decode())

        # General catch all to report exceptions then abort
        except:
            print(f"Failed to download {period} data for {self.areaName}")
            raise


    def downloadDaily(self):
        """Download daily data for analysis"""

        # Daily data is available for all area types
        self.download()


    def downloadWeekly(self):
        """Download weekly data for analysis"""

        # ONS data is not available for 'nhsregion'
        if self.areaType in ['overview', 'nation', 'region', 'ltla']:
            self.download("weekly")

## Download Surveillance Data

Simple function to download COVID-19 and Flu surveillance data

In [5]:
def downloadSurveillance(skipExisting=common_core.skipExisting, verbose=common_core.verbose):
    webDownload = common_core.WebDownload(skipExisting=skipExisting, verbose=verbose)
    files = webDownload.downloadFiles(surveillancePath, surveillanceUrl, surveillanceFiles)
    files += webDownload.downloadFiles(surveillancePath, legacySurveillanceUrl, legacySurveillanceFiles)
    return files

## Interactive Testing

In [6]:
if __name__ == '__main__':

    print("--- Downloading daily dashboard data ---")
    for areaType, areaNames in phe_core.areas:
        for areaName in areaNames:
            area = AreaDownload(areaType, areaName)
            area.downloadDaily()

    print("\n--- Downloading weekly dashboard data ---")
    for areaType, areaNames in phe_core.areas:
        for areaName in areaNames:
            area = AreaDownload(areaType, areaName)
            area.downloadWeekly()

    print("\n--- Downloading surveillance data ---")
    downloadSurveillance()

    print("\nAll done!")

--- Downloading daily dashboard data ---
Downloading phe-dashboard/raw/daily/overview/united_kingdom.csv...
Downloading phe-dashboard/raw/daily/nation/england.csv...
Downloading phe-dashboard/raw/daily/nation/scotland.csv...
Downloading phe-dashboard/raw/daily/nation/wales.csv...
Downloading phe-dashboard/raw/daily/nation/northern_ireland.csv...
Downloading phe-dashboard/raw/daily/region/north_east.csv...
Downloading phe-dashboard/raw/daily/region/north_west.csv...
Downloading phe-dashboard/raw/daily/region/yorkshire_humber.csv...
Downloading phe-dashboard/raw/daily/region/east_midlands.csv...
Downloading phe-dashboard/raw/daily/region/west_midlands.csv...
Downloading phe-dashboard/raw/daily/region/east_england.csv...
Downloading phe-dashboard/raw/daily/region/london.csv...
Downloading phe-dashboard/raw/daily/region/south_east.csv...
Downloading phe-dashboard/raw/daily/region/south_west.csv...
Downloading phe-dashboard/raw/daily/nhsregion/north_east_yorkshire.csv...
Downloading phe-das