# PHE Charts

Created by Michael George (AKA Logiqx)

Website: https://logiqx.github.io/covid-stats/

## Common Libraries

Import libraries for working with PHE data

In [1]:
import os
from datetime import date, datetime

import csv
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as tck

import common_core
import phe_core
import ons_population

## Area Class

Supports a single area - nation, region, ltla, etc

- Load data into ndarray from CSV
- Plot data

In [2]:
CORONAVIRUS_URL = "coronavirus.data.gov.uk"

GITHUB_URL = "https://logiqx.github.io/covid-stats/daily-data"

START_DATE = "2020-08-31"

# Apply 2 day "lag" on top of the 4 days in the PHE data + 3 days relating to MA calculation
# Apply 5 day "lag" on top of the 4 days in the PHE data + 3 days relating to MA calculation

categoryLags = \
{
    "cases": 2,
    "admissions": 0,
    "deaths": 5
}

In [3]:
class Area(common_core.Printable):

    def __init__(self, areaType, areaName, areaCode=None):
        """Initialisise the area object"""

        self.data = {}

        self.areaType = areaType
        self.areaName = areaName
        self.areaCode = areaCode

        self.safeName = common_core.getSafeName(areaName)
        self.csvName = self.safeName + '.csv'
        

    def load(self, period, dataType):
        """Load demographic data - cases or deaths"""

        fileName = os.path.join(common_core.dataDir, phe_core.PHE_DASHBOARD, "csv", period, dataType, self.areaType, self.csvName)
        partName = common_core.getPartName(fileName)

        try:
            with open(fileName, 'r') as f:
                reader = csv.reader(f, delimiter = ',')

                dtype = []
                converters = {}
                colNames = next(reader)

                for i in range(len(colNames)):
                    colName = colNames[i]
                    if colName == "date":
                        dtype.append((colName, "U10"))
                    elif colName == "areaName":
                        dtype.append((colName, "U40"))
                    elif "RollingRate" in colName:
                        dtype.append((colName, "f8"))
                        converters[i] = lambda s: float(s or 0)
                    else:
                        dtype.append((colName, "u4"))
                        converters[i] = lambda s: int(s or 0)

                data = np.genfromtxt(f, dtype=dtype, converters=converters, delimiter=",")

            # Ensure period is present in data arrays
            if period not in self.data:
                self.data[period] = {}

            # Store the data
            self.data[period][dataType] = data

        # General catch all
        except:
            print(f"Failed to load {dataType} for {self.areaName}")
            raise


    def loadDaily(self):
        """Load PHE daily data from CSV files into ndarrays"""

        period = "daily"
        
        if self.areaType == "nation":
            self.load(period, "cases")
            self.load(period, "patients")
            self.load(period, "deaths")

        elif self.areaType == "nhsregion":
            self.load(period, "patients")

        else:
            self.load(period, "cases")
            self.load(period, "deaths")


    def prepCharts(self, data, category, lag):
        """Prepare data for matplotlib"""

        chart1 = f'Daily {category}'
        chart2 = f'Daily {category} per 100,000'
        chart3 = f'Daily {category} as % of max'
        
        charts = {
            chart1: {},
            chart2: {},
            chart3: {}
        }
        dates = []

        minIdx = np.where(data["date"] == START_DATE)[0][-1]
        maxIdx = 0

        # Different categories have different age demographics - NHS could be better!
        if category == "admissions":
            customDemographicKeys = [*phe_core.nhsDemographics.keys()]
        else:
            customDemographicKeys = [*phe_core.customDemographics12.keys()]
            # The under 25s have very few deaths so % of max is very noisy - exclude the age group!
            if category == "deaths":
                customDemographicKeys = customDemographicKeys[1:]

        for customDemographicKey in customDemographicKeys:
            
            # Create grid of all age demographics in the custom demographic
            grid = None
            numPersons = 0

            if category == "admissions":
                ageDemographics = phe_core.nhsDemographics[customDemographicKey]
            else:
                ageDemographics = phe_core.customDemographics12[customDemographicKey]

            for ageDemographic in ageDemographics:
                if category == "admissions":
                    cumCounts = data["cumAdmissions" + ageDemographic]
                    counts = np.append([0], cumCounts[1:] - cumCounts[:-1])
                else:
                    counts = data[category + ageDemographic]

                if grid is None:
                    grid = counts
                else:
                    grid = np.vstack([grid, counts])

                numPersons += population.getPopulation(self.areaCode, ageDemographic)

            # Calculate the totals
            if len(ageDemographics) > 1:
                counts = np.sum(grid, axis = 0)

            # Calculate moving average
            counts = common_core.movingAverage(counts)
            
            # This is the daily rate but can multiply by 7 to get the weekly rate
            rates = 100000 * counts / numPersons

            # Only interested in non-zero values
            nonZeroIdx = np.nonzero(counts)

            # It is possible that there were no significant values for this age band
            if len(nonZeroIdx[0]) > 0:
                
                # Ignore last X days of counts due to data lag
                lastIdx = nonZeroIdx[0][-1] - lag
                
                # Discard trailing zeros
                counts = counts[minIdx:lastIdx - 2]
                rates = rates[minIdx:lastIdx - 2]
                
                # TODO
                # - check that demographics add up to total
                # - confirm last data points are legit - i.e. check delays

                # First chart is just the centered moving average
                charts[chart1][customDemographicKey] = counts

                # Second chart is rates per 100,000
                charts[chart2][customDemographicKey] = rates

                # Third chart is the % of maximum
                charts[chart3][customDemographicKey] = counts / np.max(counts) * 100

                # Remember maximum index
                if lastIdx > maxIdx:
                    maxIdx = lastIdx

        # Re-format dates
        dates = []
        for yyyymmdd in np.array(data["date"])[minIdx:maxIdx + 1]:
            formattedDate = datetime.strptime(yyyymmdd, "%Y-%m-%d").strftime("%-d %b %y")
            dates.append(formattedDate)

        return charts, dates


    def setColorCycler(self, ax, numColors=10):
        """Set the colour cycler for the axis"""
        
        # Choose colour cycler - viridis, plasma, inferno, magma, cividis, etc.
        ax.set_prop_cycle(plt.cycler("color", plt.cm.inferno(np.linspace(0.9, 0.1, numColors))))


    def plotCharts(self, axs, period, dataType, charts, dates):
        """Plot data for daily deaths"""

        axTitles = [*charts.keys()]

        # Ignore the under 0-24 and 25-39 categories
        for i in range(len(axs)):
            
            # Determine the chart title and axis
            ax = axs[i]
            axTitle = axTitles[i]

            # Set the color cycler before plotting any data series
            self.setColorCycler(ax, numColors=len(charts[axTitle]))

            # Plot the data series
            plots = charts[axTitle]
            for plotName in plots:
                y_points = plots[plotName]
                x_points = np.arange(len(y_points))
                ax.plot(x_points, y_points, label=plotName)

            # Add title, axis labels and legend
            ax.set_title(axTitle)
            
            # Change the x-axis to show the actual dates
            tickInterval = 7
            ax.set_xlim(xmin=-2, xmax=len(x_points) + 1)
            ax.set_xticks(np.arange(0, len(dates), tickInterval))
            ax.set_xticklabels(dates[::tickInterval], rotation=90)
            
            # Ensure thousands are shown using commas
            ax.set_ylim(ymin=0)
            ax.get_yaxis().set_major_formatter(tck.FuncFormatter(lambda x, p: format(int(x), ',')))

            # Add a standard legend
            ax.legend(loc="upper left", borderaxespad=1, fontsize="small")


    def plotCategory(self, period, axs, category):
        """Plot data for a generic category"""

        if category == "admissions":
            data = self.data[period]["patients"]
        else:
            data = self.data[period][category]
        
        lag = categoryLags[category]

        charts, dates = self.prepCharts(data, category, lag=lag)

        self.plotCharts(axs, period, category, charts, dates)
        

    def setHeader(self, fig):
        """Set the header / title"""

        fig.suptitle(f"COVID-19 in {self.areaName} since Sept 2020", y=0.96,
                     fontsize="large", fontweight="bold")
        
        textStr = 'All plots are shown as a 7 day centered moving average.'
        fig.text(0.5, 0.94, textStr, horizontalalignment='center', verticalalignment='top')

        lastRefresh = date.today().strftime("%A %-d %B %Y")
        textStr = f"Last refreshed on {lastRefresh}."
        fig.text(0.5, 0.92, textStr, horizontalalignment='center', verticalalignment='top')


    def setFooter(self, fig):
        """Set the footer - external links, etc."""

        textStr = f"This data was retrieved via the API at {CORONAVIRUS_URL} and plotted by @Mike_aka_Logiqx"
        fig.text(0.5, 0.05, textStr, horizontalalignment='center', verticalalignment='top')

        textStr = f"Further images plus the code and data can be found at {GITHUB_URL}"
        fig.text(0.5, 0.03, textStr, horizontalalignment='center', verticalalignment='top')


    def saveImage(self, fig, suffix=None):
        """Save the image and close the figure"""

        plt.show()

        baseName = common_core.getSafeName(self.areaName)
        if suffix:
            baseName += "_" + suffix
        baseName += ".png"

        partName = os.path.join("docs", "daily-data", self.areaType, baseName)
        fileName = os.path.join(common_core.projdir, partName)

        dirName = os.path.dirname(fileName)
        if not os.path.exists(dirName):
            os.makedirs(dirName)

        print(f"Saving {partName}...")
        fig.savefig(fileName, bbox_inches='tight', facecolor='w')
        plt.close(fig)


    def createFigure(self, nrows, ncols, figsize=(16, 18), dpi=150):
        """Create a new figure with standard header and footer"""

        fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, dpi=dpi)
        
        plt.subplots_adjust(hspace=0.4)

        self.setHeader(fig)
        self.setFooter(fig)

        return fig, axs


    def plotDetail(self, period, category):
        """Plot data for cases, hospitalisations and deaths"""

        fig, axs = self.createFigure(nrows=3, ncols=1, figsize=(12, 18))

        self.plotCategory(period, axs, category)

        self.saveImage(fig, suffix=category)


    def plotCases(self, period):
        """Plot data for daily cases"""
        
        self.plotDetail(period, "cases")


    def plotAdmissions(self, period):
        """Plot data for daily admissions"""
        
        self.plotDetail(period, "admissions")


    def plotDeaths(self, period):
        """Plot data for daily deaths"""
        
        self.plotDetail(period, "deaths")


    def plotSummary(self, period):
        """Plot data for cases, hospitalisations and deaths"""

        fig, axs = self.createFigure(nrows=3, ncols=3)

        self.plotCategory(period, axs[0], "cases")
        self.plotCategory(period, axs[1], "admissions")
        self.plotCategory(period, axs[2], "deaths")

        self.saveImage(fig)

## Areas Class

Combines multiple areas - nation, region, ltla, etc

In [4]:
class Areas(common_core.Printable):

    def __init__(self, areaType):
        """Initialisise the areas object"""

        self.areaType = areaType
        self.areas = {}

    
    def addArea(self, areaName, areaCode=None):
        """Add a new area which can then be loadeded from disk"""
        
        area = Area(self.areaType, areaName, areaCode=areaCode)

        self.areas[areaName] = area
        

    def loadArea(self, areaName):
        """Load PHE data for a single area"""
        
        self.areas[areaName].loadDaily()
        

    def plotAreas(self, period="daily"):
        """Plot charts for each area"""

        if self.areaType == "nation":
            for areaName in ["England"]:
                self.areas[areaName].plotCases(period)
                self.areas[areaName].plotAdmissions(period)
                self.areas[areaName].plotDeaths(period)
                self.areas[areaName].plotSummary(period)

        if self.areaType == "region":
            for areaName in ["North East"]:
                self.areas[areaName].plotSummary(period)

In [5]:
print("Loading data...")

population = ons_population.Population()
population.loadYears(limit=1)

overview = Areas(areaType="overview")
for overviewName in phe_core.overviewNames:
    overview.addArea(overviewName)
    overview.loadArea(overviewName)

nations = Areas(areaType="nation")
nationLookup = {v: k for k, v in common_core.nations.items()}
for nationName in phe_core.nationNames:
    nationCode = nationLookup[nationName]
    nations.addArea(nationName, areaCode=nationCode)
    nations.loadArea(nationName)

regions = Areas(areaType="region")
for regionCode in common_core.regions:
    regionName = common_core.regions[regionCode]
    regions.addArea(regionName, areaCode=regionCode)
    regions.loadArea(regionName)

nhsRegions = Areas(areaType="nhsregion")
for nhsRegionName in common_core.nhsRegionNames:
    nhsRegions.addArea(nhsRegionName)
    nhsRegions.loadArea(nhsRegionName)

ltlas = Areas(areaType="ltla")
for ltlaName in phe_core.ltlaNames:
    ltlas.addArea(ltlaName)
    #ltlas.loadArea(ltlaName)

# Transfer patient data from NHS regions to standard regions
for areaName in common_core.nhsRegionMappings:
    nhsAreaName = common_core.nhsRegionMappings[areaName]
    periods = nhsRegions.areas[nhsAreaName].data

    for period in periods:
        periodData = periods[period]
        for dataType in periodData:
            regions.areas[areaName].data[period][dataType] = periodData[dataType]

print("\nAll done!")

Loading data...

All done!


## Draw Charts

In [6]:
nations.plotAreas()