# NHS Core

Created by Michael George (AKA Logiqx)

Website: https://logiqx.github.io/covid-stats/

## Imports

Standard python libraries plus determination of projdir, basic printable class, etc

In [1]:
import os
import csv

import urllib.request
import re
from bs4 import BeautifulSoup

import common_core

## Runtime parameters

The "lazy" option skips files that already exist locally

In [2]:
lazy = True

## Configuration

Data to download from the NHS statistical work area

In [3]:
# Only using England for now due to the availability of age demographics, etc
nationNames = ["England"]

# All 7 NHS regions in England
nhsRegionNames = ["North West", "North East and Yorkshire", "Midlands",
                  "East of England", "London", "South East", "South West"]

# Combine all of these area types into a single list
areas = [("nation", ["England"]), ("nhsregion", nhsRegionNames)]

In [4]:
deathUrl = "https://www.england.nhs.uk/statistics/statistical-work-areas/covid-19-daily-deaths/"
admissionUrl = "https://www.england.nhs.uk/statistics/statistical-work-areas/covid-19-hospital-activity/"

In [5]:
deathFiles = [
    ("daily", "COVID-19-total-announced-deaths-.*-20[0-9][0-9]\.xlsx$"),
    ("weekly", "COVID-19-total-announced-deaths-.*-weekly-file\.xlsx$")
]

admissionFiles = [
    ("daily", "COVID-19-daily-admissions-and-beds.*\.xlsx$"),
    ("weekly", "Weekly-covid-admissions-and-beds-publication.*\.xlsx$"),
    ("monthly", "Covid-Publication-.*\.xlsx$")
]

In [6]:
ageDemographics = [
    '00_04', '05_09', '10_14', '15_19', '20_24', '25_29',
    '30_34', '35_39', '40_44', '45_49', '50_54', '55_59',
    '60_64', '65_69', '70_74', '75_79', '80_84', '85_89', '90+'
]

## Download Functions

Download spreadsheets by parsing the HTML for suitable links

In [7]:
def downloadFile(url, fileName, lazy=False):
    """Download a binary file from the URL provided"""

    if os.path.exists(fileName) and lazy:
        print(f"Skipping {os.path.basename(url)}...")
    else:
        print(f"Downloading {os.path.basename(url)}...")

        # Ensure raw path exists
        filePath = os.path.dirname(fileName)
        if not os.path.exists(filePath):
            os.makedirs(filePath)

        req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla'})
        response = urllib.request.urlopen(req, timeout=60)

        with open(fileName, "wb") as outfile:
            chunk = response.read(4096)
            while chunk:
                outfile.write(chunk)
                chunk = response.read(4096)

        response.close()


def downloadFiles(url, patterns, category, lazy=False):

    req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla'})
    response = urllib.request.urlopen(req, timeout=15)

    soup = BeautifulSoup(response, "lxml")
    anchors = soup.find_all("a")

    response.close()

    for anchor in anchors:
        url = anchor.get("href")

        for pattern in patterns:
            if re.search(pattern[1], url):
                filePath = os.path.join(common_core.projdir, "data", "nhs-statistics", "raw", pattern[0], category)
                fileName = os.path.join(filePath, os.path.basename(url))
                downloadFile(url, fileName, lazy=lazy)

In [8]:
def downloadDeaths():
    files = downloadFiles(deathUrl, deathFiles, "deaths", lazy)

In [9]:
def downloadAdmissions():
    files = downloadFiles(admissionUrl, admissionFiles, "admissions", lazy)