In [5]:
from pathlib import Path
from urllib.error import HTTPError
import datetime

from IPython.display import display
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import re
import requests
import xmltodict

In [41]:
def get_england_deaths_df(date):
    url = f"https://www.england.nhs.uk/statistics/wp-content/uploads/sites/2/2020/04/" \
          f"COVID-19-all-announced-deaths-{date:%d-%B-%Y}.xlsx"
    
    deaths_df = pd.read_excel(
        url,
        sheet_name="COVID19 total deaths by region",
        header=15,
        index_col=1
    )
    
    deaths_df = deaths_df.dropna(how="all", axis="index").dropna(how="all", axis="columns")
    deaths_df = deaths_df.drop(["Up to 01-Mar-20", "Awaiting verification", "Total"], axis="columns")
    deaths_df = deaths_df.astype(int)
    deaths_df.index = deaths_df.index.rename("area_name").str.strip()
    deaths_df.columns = pd.to_datetime(deaths_df.columns)
    deaths_df = deaths_df.rename(index={
        "East Of England": "East of England", 
        "North East And Yorkshire": "North East and Yorkshire"
    })
        
    deaths_df = deaths_df.cumsum(axis=1)
    
    start_date = datetime.date(2020, 3, 9)
    return deaths_df.loc[:, pd.to_datetime(start_date):]

In [42]:
# deaths data taken from https://www.england.nhs.uk/statistics/statistical-work-areas/covid-19-daily-deaths/
today = datetime.date.today()

try:
    england_deaths_df = get_england_deaths_df(today)
except HTTPError:
    # today's data isn't out yet, so try getting yesterday's instead
    yesterday = today - datetime.timedelta(days=1)
    england_deaths_df = get_england_deaths_df(yesterday)

In [43]:
england_deaths_df

Unnamed: 0_level_0,2020-03-09,2020-03-10,2020-03-11,2020-03-12,2020-03-13,2020-03-14,2020-03-15,2020-03-16,2020-03-17,2020-03-18,...,2020-04-09,2020-04-10,2020-04-11,2020-04-12,2020-04-13,2020-04-14,2020-04-15,2020-04-16,2020-04-17,2020-04-18
area_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
England,23,24,31,40,55,78,106,152,191,250,...,9599,10259,10956,11590,12192,12772,13366,13886,14279,14397
East of England,6,6,6,6,7,9,10,11,12,17,...,1003,1076,1163,1260,1335,1391,1468,1526,1579,1592
London,4,4,8,10,16,29,38,54,73,97,...,2828,2977,3133,3265,3407,3522,3634,3742,3803,3824
Midlands,5,5,6,11,16,20,25,40,47,59,...,2000,2124,2260,2389,2499,2612,2755,2836,2915,2935
North East and Yorkshire,0,0,0,0,0,0,2,5,6,9,...,1083,1187,1275,1355,1436,1534,1612,1703,1771,1806
North West,2,2,2,4,6,7,11,14,16,20,...,1187,1280,1394,1497,1599,1714,1813,1925,1982,1996
South East,6,7,8,8,9,10,17,25,32,41,...,1064,1140,1215,1287,1355,1414,1472,1514,1561,1572
South West,0,0,1,1,1,3,3,3,5,7,...,434,475,516,537,561,585,612,640,668,672


In [122]:
# data taken from PHE's tracker:
# https://coronavirus.data.gov.uk
blob_url = "https://publicdashacc.blob.core.windows.net/publicdata?restype=container&comp=list"
covid_data_url_stem = "https://c19pub.azureedge.net/{latest_blob}"

blobs = xmltodict.parse(requests.get(blob_url).text)
blobs = blobs["EnumerationResults"]["Blobs"]["Blob"]
blobs = [blob["Name"] for blob in blobs if blob["Name"].startswith("data_")]

date_pattern = re.compile(r"\d+")
latest_blob = max(blobs, key=lambda blob: date_pattern.search(blob)[0])

covid_data = requests.get(covid_data_url_stem.format(latest_blob=latest_blob)).json()
date_index = pd.date_range(start="2020-01-30", end=datetime.date.today(), freq="D", name="date")
country_data = covid_data["countries"]
england_regions_data = covid_data["regions"]
england_regional_data = {region["name"]["value"]: {
    #"daily_new_cases": region.get("dailyConfirmedCases", []),
    "daily_total_cases": pd.DataFrame(
        region.get("dailyTotalConfirmedCases", 
                   [{"date": None, "value": None}])
    ).astype({"date": "datetime64"}).set_index("date").asfreq("D", method="ffill"),
    #"daily_new_deaths": region.get("dailyDeaths", []),
    #"daily_total_deaths": region.get("dailyTotalDeaths", []),
} for region in england_regions_data.values()}

{region: data["daily_total_cases"] for region, data in england_regional_data.items()}

date_index = pd.date_range(start="2020-01-30", end=datetime.date.today(), freq="D", name="date")
areas = england_regional_data.keys()
data_breakdown = ["daily_new_cases", "daily_total_cases"]
columns = areas
df = pd.DataFrame(index=idx, columns=columns).T

england_regional_data["West Midlands"]
# Scotland cases https://www.gov.scot/publications/trends-in-number-of-people-in-hospital-with-confirmed-or-suspected-covid-19/


# url = "https://fingertips.phe.org.uk/documents/Historic%20COVID-19%20Dashboard%20Data.xlsx"
# swni_deaths_df = pd.read_excel(
#     url, 
#     sheet_name="UK Deaths",
#     header=7,
#     index_col=0
# )

# swni_deaths_df = swni_deaths_df.fillna(method="bfill", axis=1)
# swni_deaths_df[["Deaths", "UK"]] = swni_deaths_df[["Deaths", "UK"]].fillna(method="bfill")
# swni_deaths_df = (
#     swni_deaths_df
#     .fillna(method="ffill", axis=1, limit=1)
#     .fillna(0.)
#     .astype(int)
#     .drop(columns=["Deaths", "UK", "England"])
#     .T
# )
# swni_deaths_df.insert(0, pd.to_datetime(datetime.date(2020, 3, 9)), 0)
# deaths_df = pd.concat([swni_deaths_df, england_deaths_df])
# deaths_df.index = deaths_df.index.rename("area_name")
# columns_index = pd.MultiIndex.from_product([deaths_df.columns, ["deaths"]], names=[None, None])
# deaths_df.columns = columns_index

{'daily_total_cases':             value
 date             
 2020-02-12      1
 2020-02-13      1
 2020-02-14      1
 2020-02-15      1
 2020-02-16      1
 ...           ...
 2020-04-14   8676
 2020-04-15   9040
 2020-04-16   9330
 2020-04-17   9417
 2020-04-18   9427
 
 [67 rows x 1 columns]}

In [2]:
# data taken from PHE's tracker:
# https://www.arcgis.com/apps/opsdashboard/index.html#/f94c3c90da5b4e9f9a0b19484dd4bb14
url = "https://fingertips.phe.org.uk/documents/Historic%20COVID-19%20Dashboard%20Data.xlsx"

england_regional_cases_df = pd.read_excel(
    url, 
    sheet_name="NHS Regions",
    header=7,
    index_col=1
)

country_cases_df = pd.read_excel(
    url,
    sheet_name="Countries",
    header=9,
    index_col=1,
)

country_cases_df.index = country_cases_df.index.rename("area_name").str.strip()
country_cases_df = country_cases_df.drop(index=["England", "UK"])

cases_df = pd.concat([england_regional_cases_df, country_cases_df])
cases_df = (
    cases_df
    .drop(index=[np.nan], columns=["Area Code"])
    .fillna(method="bfill", axis=1)
    .astype(int)
)
cases_df.index = cases_df.index.rename("area_name").str.strip()

index = pd.MultiIndex.from_product([cases_df.columns, ["confirmed_cases"]])
cases_df.columns = index

XLRDError: No sheet named <'NHS Regions'>

In [3]:
input_df = cases_df.merge(deaths_df, on="area_name").stack(level=1).fillna(method="ffill", axis=1).astype(int)
home_nations_data = input_df.loc[["England", "Scotland", "Wales", "Northern Ireland"]]
uk_confirmed_cases = home_nations_data.xs("confirmed_cases", level=1).sum()
uk_deaths = home_nations_data.xs("deaths", level=1).sum()
input_df.loc[("UK", "confirmed_cases"), :] = uk_confirmed_cases
input_df.loc[("UK", "deaths"), :] = uk_deaths
input_df = input_df.astype(int)
input_df

NameError: name 'cases_df' is not defined

In [4]:
# sort the rows by number of cases
input_df = input_df.sort_values(max(input_df.columns), ascending=False)

NameError: name 'input_df' is not defined

In [None]:
# NHS England population statistics taken from:
# https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/clinicalcommissioninggroupmidyearpopulationestimates
#
# Scotland, Wales and Northern Ireland population statistics taken from:
# https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates
regional_populations = {
    "London": 8_908_081,
    "North West": 7_012_947,
    "North East and Yorkshire": 8_566_925,
    "Midlands": 10_537_679,
    "East of England": 6_493_188,
    "South East": 8_852_361,
    "South West": 5_605_997,
    "England": 55_977_178,
    "Wales": 3_136_400,
    "Scotland": 5_454_000,
    "Northern Ireland": 1_876_000,
}

countries = ["England", "Scotland", "Wales", "Northern Ireland"]
regional_populations["UK"] = sum(regional_populations[country] for country in countries)

In [None]:
fig = go.Figure()

areas = input_df.index.droplevel(1).unique()

confirmed_cases_traces = []
deaths_traces = []
prevalence_traces = []
case_fatality_rate_traces = []

# add a line to the figure for each area
for area in areas:
    area_data = input_df.T[[area]].reset_index().rename(columns={area: "measure", "index": "date"})
    area_data.columns = area_data.columns.rename(names=[None, None])
    
    area_data["measure", "delta_cases"] = area_data["measure", "confirmed_cases"].diff().fillna(0).astype(int)
    area_data["measure", "delta_deaths"] = area_data["measure", "deaths"].diff().fillna(0).astype(int)
    area_data["measure", "prevalence"] = area_data["measure", "confirmed_cases"] / regional_populations[area]
    area_data["measure", "case_fatality_rate"] = (
        area_data["measure", "deaths"] / area_data["measure", "confirmed_cases"]
    )
        
    area_data["text"] = (area_data["date"].dt.strftime("%d %B") + " | " + f"{area}<br>" \
                         "Cases: " + area_data["measure", "confirmed_cases"].apply("{:,}".format) + " (" + \
                         area_data["measure", "delta_cases"].apply("{:+,}".format)) + ")<br>" \
                         "Deaths: " + area_data["measure", "deaths"].apply("{:,}".format) + " (" + \
                         area_data["measure", "delta_deaths"].apply("{:+,}".format) + ")"
    
    confirmed_cases_traces.append(
        go.Scatter(
            x=area_data["date"],
            y=area_data["measure", "confirmed_cases"],
            name=area,
            # deselect the "England" and "UK" cases by default to avoid overshadowing other lines
            visible=True if area not in {"UK", "England"} else "legendonly",
            text=area_data["text"],
            hovertemplate="%{text}<extra></extra>"
        )
    )
    
    deaths_traces.append(
        go.Scatter(
            x=area_data["date"],
            y=area_data["measure", "deaths"],
            name=area,
            # hide all the deaths traces by default
            visible=False,
            text=area_data["text"],
            hovertemplate="%{text}<extra></extra>"
        )
    )
    
    prevalence_traces.append(
        go.Scatter(
            x=area_data["date"],
            y=area_data["measure", "prevalence"],
            name=area,
            # hide all the deaths traces by default
            visible=False,
            text=area_data["text"],
            hovertemplate="%{text}<extra></extra>"
        )
    )
    
    case_fatality_rate_traces.append(
        go.Scatter(
            x=area_data["date"],
            y=area_data["measure", "case_fatality_rate"],
            name=area,
            # hide all the deaths traces by default
            visible=False,
            text=area_data["text"],
            hovertemplate="%{text}<extra></extra>"
        )
    )
    
# add all the traces
for trace in confirmed_cases_traces + deaths_traces + prevalence_traces + case_fatality_rate_traces:
    fig.add_trace(trace)

# add a vertical line showing when the UK went into "lockdown"...
fig.add_shape(
    type="line",
    xref="x",
    yref="paper",
    x0=datetime.date(2020, 3, 23),
    y0=0,
    x1=datetime.date(2020, 3, 23),
    y1=0.9,
    opacity=0.8,
)    

# ...and then annotate it with some text
lockdown_annotation = dict(
    xref="x",
    yref="paper",
    x=datetime.date(2020, 3, 23),
    y=0.7,
    text="Lockdown commenced 23/03/2020",
    font_color="#000",
    showarrow=True,
    align="center",
    arrowhead=2,
    arrowsize=1,
    arrowwidth=1,
    arrowcolor="#636363",
    ax=-20,
    ay=0,
    xanchor="right",
    opacity=0.8,
)

total_cases = input_df.loc["UK", "confirmed_cases"].iloc[-1]
total_deaths = input_df.loc["UK", "deaths"].iloc[-1]
latest_prevalence = total_cases / regional_populations["UK"]
latest_case_fatality_rate = total_deaths / total_cases

total_cases_annotation = dict(
    xref="paper",
    yref="paper",
    x=0.05,
    y=0.95,
    text=f"Total UK Cases as of {input_df.columns[-1]: %-d %B %Y}:<br>" \
         f"{total_cases:,}",
    font_color="#000",
    showarrow=False,
    align="left"
)

total_deaths_annotation = dict(
    xref="paper",
    yref="paper",
    x=0.05,
    y=0.95,
    text=f"Total UK Deaths as of {input_df.columns[-1]: %-d %B %Y}:<br>" \
         f"{total_deaths:,}",
    font_color="#000",
    showarrow=False,
    align="left"
)

total_prevalence_annotation = dict(
    xref="paper",
    yref="paper",
    x=0.05,
    y=0.95,
    text=f"Total UK Period Prevalence as of {input_df.columns[-1]: %-d %B %Y}:<br>" \
         f"{latest_prevalence:.2%}",
    font_color="#000",
    showarrow=False,
    align="left"
)

total_case_fatality_annotation = dict(
    xref="paper",
    yref="paper",
    x=0.05,
    y=0.95,
    text=f"Total UK Case Fatality Rate as of {input_df.columns[-1]: %-d %B %Y}:<br>" \
         f"{latest_case_fatality_rate:.1%}",
    font_color="#000",
    showarrow=False,
    align="left"
)


fig.update_layout(
    updatemenus=[{
        "type": "buttons",
        "direction": "right",
        "x": 0.505,
        "y": 1.125,
        "buttons": list([
            {
                "label": "Cases",
                "method": "update",
                "args": [
                    {"visible": ["legendonly"] * 2 + [True] * (len(areas) - 2) + [False] * (len(areas) * 3)}, 
                    {
                        "title": "Confirmed Covid-19 Cases Per Region",
                        "yaxis": {"title": "Number of Confirmed Cases"},
                        "annotations": [lockdown_annotation] + [total_cases_annotation],
                    }
                ]
            },
            {
                "label": "Deaths",
                "method": "update",
                "args": [
                    {
                        "visible": [False] * len(areas) + ["legendonly"] * 2 + [True] * (len(areas) - 2) + [False] * (len(areas) * 2),
                    }, 
                    {
                        "title": "Covid-19 Deaths Per Region",
                        "yaxis": {"title": "Number of Deaths", "tickformat": ",d"},
                        "annotations": [lockdown_annotation] + [total_deaths_annotation],
                    }
                ]
            },
            {
                "label": "Prevalence",
                "method": "update",
                "args": [
                    {
                        "visible": [False] * len(areas) * 2 + [True] + ["legendonly"] * (len(areas) - 1) + [False] * len(areas)
                    },
                    {
                        "title": "Covid-19 Prevalence Per Region",
                        "yaxis": {"title": "Prevalence", "tickformat": ".2%"},
                        "annotations": [lockdown_annotation] + [total_prevalence_annotation],
                    }
                ],
            },
            {
                "label": "Case Fatality Rate",
                "method": "update",
                "args": [
                    {
                        "visible": [False] * len(areas) * 3 + [True] + ["legendonly"] * (len(areas) - 1)
                    },
                    {
                        "title": "Covid-19 Case Fatality Rate Per Region",
                        "yaxis": {"title": "Case Fatality Rate", "tickformat": ".1%"},
                        "annotations": [lockdown_annotation] + [total_case_fatality_annotation],
                    }
                ],
            }
        ])
    }],
    title={
        "text": "Confirmed Covid-19 Cases Per Region",
        "x": 0.45,
    },
    xaxis={
        "title": "Date",
        "tickformat": '%d %b',
        "tickangle": -45,
    },
    yaxis={
        "title": "Number of Confirmed Cases",
        "tickformat": ",d",
    },
    hovermode="closest",
    annotations=[lockdown_annotation, total_cases_annotation],
)
    
fig

In [None]:
fig = go.Figure(
    layout={
        "title": {
            "text": "Daily Number of New Covid-19 Cases Per Region",
            "x": 0.5
        },
        "xaxis": {
            "title": "Date",
            "tickformat": '%d %b',
            "tickangle": -45
        },
        "yaxis": {
            "title": "Daily Number of New Cases",
            "tickformat": ',d',
        },
        "legend": {
            "x": 0,
            "y": 1,
            "bgcolor": "RGBA(0,0,0,0)"
        },
        "hovermode": "closest"
    }
)

new_cases_traces = []
new_deaths_traces = []

areas = input_df.index.droplevel(1).unique()
for area in areas:
    area_data = input_df.T[[area]].reset_index().rename(columns={area: "measure", "index": "date"})
    area_data.columns = area_data.columns.rename(names=[None, None])
    
    area_data["measure", "new_cases"] = (
        area_data["measure", "confirmed_cases"].diff().fillna(0.).astype(int).clip(lower=0)
    )
    area_data["measure", "delta_new_cases"] = (
        area_data["measure", "new_cases"].diff().fillna(0).astype(int)
    )
    area_data["measure", "new_deaths"] = (
        area_data["measure", "deaths"].diff().fillna(0.).astype(int).clip(lower=0)
    )
    area_data["measure", "delta_new_deaths"] = (
        area_data["measure", "new_deaths"].diff().fillna(0).astype(int)
    )
        
    area_data["text"] = (
        area_data["date"].dt.strftime("%d %B") + " | " + f"{area}<br>" \
        "New cases: " + area_data["measure", "new_cases"].apply("{:,}".format) + \
        " (" + area_data["measure", "delta_new_cases"].apply("{:+,}".format) + ")<br>" \
        "New deaths: " + area_data["measure", "new_deaths"].apply("{:,}".format) + \
        " (" + area_data["measure", "delta_new_deaths"].apply("{:+,}".format) + ")"
    )
    
    new_cases_traces.append(
        go.Bar(
            x=area_data["date"],
            y=area_data["measure", "new_cases"],
            name=area,
            visible=True if area == "UK" else "legendonly",
            text=area_data["text"],
            hovertemplate="%{text}<extra></extra>"
        )
    )
        
    new_deaths_traces.append(
        go.Bar(
            x=area_data["date"],
            y=area_data["measure", "new_deaths"],
            name=area,
            visible=False,
            text=area_data["text"],
            hovertemplate="%{text}<extra></extra>"
        )
    )

for trace in new_cases_traces + new_deaths_traces:
    fig.add_trace(trace)

# add a vertical line showing when the UK went into "lockdown"...
fig.add_shape(
    type="line",
    xref="x",
    yref="paper",
    x0=datetime.date(2020, 3, 23),
    y0=0,
    x1=datetime.date(2020, 3, 23),
    y1=0.9,
    opacity=0.8,
)    

# ...and then annotate it with some text
fig.add_annotation(
    xref="x",
    yref="paper",
    x=datetime.date(2020, 3, 23),
    y=0.45,
    text="Lockdown commenced 23/03/2020",
    font_color="#000",
    showarrow=True,
    align="center",
    arrowhead=2,
    arrowsize=1,
    arrowwidth=1,
    arrowcolor="#636363",
    ax=-20,
    ay=0,
    xanchor="right",
    opacity=0.8,
)

fig.update_layout(
    updatemenus=[{
        "type": "buttons",
        "direction": "right",
        "x": 0.225,
        "y": 1.125,
        "buttons": list([
            {
                "label": "New Cases",
                "method": "update",
                "args": [
                    {
                        "visible": [True] + ["legendonly"] * (len(areas) - 1) + [False] * len(areas)
                    }, 
                    {
                        "title": "Daily Number of New Covid-19 Cases Per Region",
                        "yaxis": {"title": "Number of New Confirmed Cases", "tickformat": ",d"},
                    }
                ]
            },
            {
                "label": "New Deaths",
                "method": "update",
                "args": [
                    {
                        "visible": [False] * len(areas) + ["True"] + ["legendonly"] * (len(areas) - 1),
                    }, 
                    {
                        "title": "Daily Number of New Covid-19 Deaths Per Region",
                        "yaxis": {"title": "Number of New Deaths", "tickformat": ",d"},
                    }
                ]
            }
        ])
    }]
)
    
fig