In [104]:
import pandas as pd
from datetime import date, timedelta
import urllib
import numpy as np
import altair as alt
from functools import reduce
from bs4 import BeautifulSoup as soup

In [105]:
# Configuration

## Jupyter options
pd.set_option('mode.chained_assignment', None)

## Global variables
today = date.today()
yesterday = date.today() - timedelta(days=1)
import_directory = 'Imports/'

In [106]:
## Defining the trust codes relevant to MNA
### All
our_trust_codes = [
    'RYW', # Birmingham Community Healthcare NHS Foundation Trust
    'RNA', # The Dudley Group NHS Foundation Trust
    'RL4', # The Royal Wolverhampton NHS Trust
    'RXK', # Sandwell And West Birmingham Hospitals NHS Trust
    'RXW', # Shrewsbury And Telford Hospital NHS Trust
    'RRJ', # The Royal Orthopaedic Hospital NHS Foundation Trust
    'RRK', # University Hospitals Birmingham NHS Foundation Trust
    'RJE', # University Hospitals Of North Midlands NHS Trust
    'RBK', # Walsall Healthcare NHS Trust
    'RWP', # Worcestershire Acute Hospitals NHS Trust
    'R1A', # Worcestershire Health And Care NHS Trust
    'RL1', # The Robert Jones And Agnes Hunt Orthopaedic Hospital NHS Foundation Trust
    'RLY', # North Staffordshire Combined Healthcare NHS Trust
    'R1D', # Shropshire Community Health NHS Trust
    'RRE', # Midlands Partnership NHS Foundation Trust
]

### Groups
trustcodes_expressandstar = [
    'RYW', # Birmingham Community Healthcare NHS Foundation Trust
    'RNA', # The Dudley Group NHS Foundation Trust
    'RL4', # The Royal Wolverhampton NHS Trust
    'RXK', # Sandwell And West Birmingham Hospitals NHS Trust
    'RRJ', # The Royal Orthopaedic Hospital NHS Foundation Trust
    'RRK', # University Hospitals Birmingham NHS Foundation Trust
    'RJE', # University Hospitals Of North Midlands NHS Trust
    'RBK', # Walsall Healthcare NHS Trust
    'RLY', # North Staffordshire Combined Healthcare NHS Trust
    'RRE', # Midlands Partnership NHS Foundation Trust
]

trustcodes_shropshirestar = [
    'RL1', # The Robert Jones And Agnes Hunt Orthopaedic Hospital NHS Foundation Trust
    'RXW', # Shrewsbury And Telford Hospital NHS Trust
    'R1D', # Shropshire Community Health NHS Trust
]

trustcodes_blackcountryandbirmingham = [
    'RYW', # Birmingham Community Healthcare NHS Foundation Trust
    'RNA', # The Dudley Group NHS Foundation Trust
    'RL4', # The Royal Wolverhampton NHS Trust
    'RXK', # Sandwell And West Birmingham Hospitals NHS Trust
    'RRJ', # The Royal Orthopaedic Hospital NHS Foundation Trust
    'RRK', # University Hospitals Birmingham NHS Foundation Trust
    'RBK', # Walsall Healthcare NHS Trust
]

trustcodes_sandwellandbirmingham = [
    'RYW', # Birmingham Community Healthcare NHS Foundation Trust
    'RXK', # Sandwell And West Birmingham Hospitals NHS Trust
    'RRJ', # The Royal Orthopaedic Hospital NHS Foundation Trust
    'RRK', # University Hospitals Birmingham NHS Foundation Trust
]

trustcodes_worcestershire = [
    'RWP', # Worcestershire Acute Hospitals NHS Trust
    'R1A', # Worcestershire Health And Care NHS Trust
]

trustcodes_staffordshire = [
    'RJE', # University Hospitals Of North Midlands NHS Trust
    'RLY', # North Staffordshire Combined Healthcare NHS Trust
    'RRE', # Midlands Partnership NHS Foundation Trust
]

trustcodes_wolverhampton = [
    'RL4', # The Royal Wolverhampton NHS Trust
]

trustcodes_walsall = [
    'RBK', # Walsall Healthcare NHS Trust
]

trustcodes_dudley = [
    'RNA', # The Dudley Group NHS Foundation Trust
]

# create an MNA group from the de-duplicated sum of E&S and SS
trustcodes_mna = trustcodes_expressandstar.copy()
for code in trustcodes_shropshirestar:
    if code not in trustcodes_mna:
        trustcodes_mna.append(code)

In [107]:
# Find URL
page_url = 'https://www.england.nhs.uk/statistics/statistical-work-areas/covid-19-daily-deaths/'
page = urllib.request.urlopen(page_url)
html = soup(page, 'html.parser')
xlsx_url = html.select_one("a[href*=\.xlsx]").get('href')

In [108]:
xlsx_url

'https://www.england.nhs.uk/statistics/wp-content/uploads/sites/2/2020/07/COVID-19-total-announced-deaths-7-July-2020.xlsx'

In [109]:
# Convert to CSV
csv_filename = import_directory + 'nhsdeathsbytrust.csv'

xlsx = pd.read_excel(
    xlsx_url,
    header=15,
    sheet_name='Tab4 Deaths by trust'
)

xlsx.to_csv(csv_filename)

In [110]:
# Convert CSV to Pandas DataFrame
csv = pd.read_csv(csv_filename)

## Filtering down to our_trusts
our_trusts = csv[csv.Code.isin(our_trust_codes)] # filter to our trusts
our_trusts = our_trusts.filter(regex='(^Code$|^Name$|[0-9]{4}-.+|^Up to.01-Mar-20$)', axis=1) # filter to only the required columns
our_trusts = our_trusts.rename(columns={'Code': 'code', 'Name' : 'name'}) # clean up column names
our_trusts = pd.melt(our_trusts, id_vars=['code','name'], var_name='date of death', value_name='deaths') # un-pivot the date columns
our_trusts['date of death'] = our_trusts['date of death'].str.replace('Up to 01-Mar-20','2020-02-29 00:00:00')
our_trusts['date of death'] = pd.to_datetime(our_trusts['date of death']) # clean all dates to YYYY-MM-DD

our_trusts['deaths'] = our_trusts['deaths'].astype(int)

## Adding cumulative deaths
our_trusts['cumulative deaths'] = our_trusts.groupby('name').cumsum()

## Cleaning float
our_trusts['deaths'] = our_trusts['deaths'].convert_dtypes(convert_integer=True)
our_trusts['cumulative deaths'] = our_trusts['cumulative deaths'].convert_dtypes(convert_integer=True)

In [111]:
our_trusts

Unnamed: 0,code,name,date of death,deaths,cumulative deaths
0,RYW,BIRMINGHAM COMMUNITY HEALTHCARE NHS FOUNDATION...,2020-02-29,0,0
1,RRE,MIDLANDS PARTNERSHIP NHS FOUNDATION TRUST,2020-02-29,0,0
2,RLY,NORTH STAFFORDSHIRE COMBINED HEALTHCARE NHS TRUST,2020-02-29,0,0
3,RXK,SANDWELL AND WEST BIRMINGHAM HOSPITALS NHS TRUST,2020-02-29,0,0
4,RXW,SHREWSBURY AND TELFORD HOSPITAL NHS TRUST,2020-02-29,0,0
...,...,...,...,...,...
1930,RRK,UNIVERSITY HOSPITALS BIRMINGHAM NHS FOUNDATION...,2020-07-06,0,958
1931,RJE,UNIVERSITY HOSPITALS OF NORTH MIDLANDS NHS TRUST,2020-07-06,0,350
1932,RBK,WALSALL HEALTHCARE NHS TRUST,2020-07-06,0,225
1933,RWP,WORCESTERSHIRE ACUTE HOSPITALS NHS TRUST,2020-07-06,0,275


In [112]:
def get_latest_deaths(code_group=False):
    df = our_trusts.copy()
    
    last_day = df['date of death'].max()
    penultimate_day = last_day - timedelta(days=10)
    
    df = df[(df['date of death'] == last_day) | (df['date of death'] == penultimate_day)]
    df['diff'] = df.groupby('code')['cumulative deaths'].diff().convert_dtypes(convert_integer=True)
    df = df[df['date of death'] == last_day]
    
    if code_group:
        df = df[df.code.isin(code_group)]
        df = df.groupby('date of death', as_index=False).sum()
    
    return df

In [113]:
get_latest_deaths()

Unnamed: 0,code,name,date of death,deaths,cumulative deaths,diff
1920,RYW,BIRMINGHAM COMMUNITY HEALTHCARE NHS FOUNDATION...,2020-07-06,0,37,0
1921,RRE,MIDLANDS PARTNERSHIP NHS FOUNDATION TRUST,2020-07-06,0,11,0
1922,RLY,NORTH STAFFORDSHIRE COMBINED HEALTHCARE NHS TRUST,2020-07-06,0,6,0
1923,RXK,SANDWELL AND WEST BIRMINGHAM HOSPITALS NHS TRUST,2020-07-06,0,377,1
1924,RXW,SHREWSBURY AND TELFORD HOSPITAL NHS TRUST,2020-07-06,0,166,1
1925,R1D,SHROPSHIRE COMMUNITY HEALTH NHS TRUST,2020-07-06,0,15,1
1926,RNA,THE DUDLEY GROUP NHS FOUNDATION TRUST,2020-07-06,0,263,0
1927,RL1,THE ROBERT JONES AND AGNES HUNT ORTHOPAEDIC HO...,2020-07-06,0,5,0
1928,RRJ,THE ROYAL ORTHOPAEDIC HOSPITAL NHS FOUNDATION ...,2020-07-06,0,15,0
1929,RL4,THE ROYAL WOLVERHAMPTON NHS TRUST,2020-07-06,0,286,1


In [114]:
get_latest_deaths(trustcodes_staffordshire)

Unnamed: 0,date of death,deaths,cumulative deaths,diff
0,2020-07-06,0,367,7


In [115]:
get_latest_deaths(trustcodes_worcestershire)

Unnamed: 0,date of death,deaths,cumulative deaths,diff
0,2020-07-06,0,303,2


In [116]:
get_latest_deaths(trustcodes_sandwellandbirmingham)

Unnamed: 0,date of death,deaths,cumulative deaths,diff
0,2020-07-06,0,1387,4


In [117]:
get_latest_deaths(trustcodes_blackcountryandbirmingham)

Unnamed: 0,date of death,deaths,cumulative deaths,diff
0,2020-07-06,0,2161,5


In [118]:
get_latest_deaths(trustcodes_expressandstar)

Unnamed: 0,date of death,deaths,cumulative deaths,diff
0,2020-07-06,0,2528,12


# Shropshire Star

In [119]:
get_latest_deaths(trustcodes_shropshirestar)

Unnamed: 0,date of death,deaths,cumulative deaths,diff
0,2020-07-06,0,186,2


# Charts

## Deaths on date died by trust

In [120]:
chart_doddbt = our_trusts.copy()

alt.Chart(chart_doddbt).mark_line().encode(
    x='date of death',
    y='deaths',
    color='name'
)

In [121]:
alt.Chart(chart_doddbt).mark_bar().encode(
    column='date of death',
    x='deaths',
    y='name',
    color='name'
).properties(width=220)

### Cumulative

In [122]:
alt.Chart(chart_doddbt).mark_line().encode(
    x='date of death',
    y='cumulative deaths',
    color='name'
)

## Deaths on date died Shropshire

In [123]:
chart_dodds = chart_doddbt.copy()
chart_dodds = chart_dodds[chart_dodds.code.isin(trustcodes_shropshirestar)]
chart_dodds = chart_dodds.groupby(['date of death'], as_index=False)['deaths'].sum()
alt.Chart(chart_dodds).mark_bar().encode(
    x='date of death',
    y='deaths'
)

### Cumulative

In [124]:
chart_dodds = chart_doddbt.copy()
chart_dodds = chart_dodds[chart_dodds.code.isin(trustcodes_shropshirestar)]
chart_dodds = chart_dodds.groupby(['date of death'], as_index=False)['cumulative deaths'].sum()
alt.Chart(chart_dodds).mark_area().encode(
    x='date of death',
    y='cumulative deaths'
)

### Cumulative by trust

In [125]:
chart_dodds = chart_doddbt.copy()
chart_dodds = chart_dodds[chart_dodds.code.isin(trustcodes_shropshirestar)]
alt.Chart(chart_dodds).mark_line().encode(
    x='date of death',
    y='cumulative deaths',
    color='name'
).configure_legend(labelLimit=0,orient='bottom')

## Deaths on date died Express & Star

In [126]:
chart_dodde = chart_doddbt.copy()
chart_dodde = chart_dodde[chart_dodde.code.isin(trustcodes_expressandstar)]
chart_dodde = chart_dodde.groupby(['date of death'], as_index=False)['deaths'].sum()
alt.Chart(chart_dodde).mark_bar().encode(
    x='date of death',
    y='deaths'
)

### Cumulative

In [127]:
chart_dodde = chart_doddbt.copy()
chart_dodde = chart_dodde[chart_dodde.code.isin(trustcodes_expressandstar)]
chart_dodde = chart_dodde.groupby(['date of death'], as_index=False)['cumulative deaths'].sum()
alt.Chart(chart_dodde).mark_area().encode(
    x='date of death',
    y='cumulative deaths'
)

### Cumulative by trust

In [128]:
chart_dodde = chart_doddbt.copy()
chart_dodde = chart_dodde[chart_dodde.code.isin(trustcodes_expressandstar)]
alt.Chart(chart_dodde).mark_line().encode(
    x='date of death',
    y='cumulative deaths',
    color=alt.Color('name',sort=[
        'UNIVERSITY HOSPITALS BIRMINGHAM NHS FOUNDATION TRUST',
        'SANDWELL AND WEST BIRMINGHAM HOSPITALS NHS TRUST',
        'THE ROYAL WOLVERHAMPTON NHS TRUST',
        'THE DUDLEY GROUP NHS FOUNDATION TRUST',
        'UNIVERSITY HOSPITALS OF NORTH MIDLANDS NHS TRUST',
        'BIRMINGHAM COMMUNITY HEALTHCARE NHS FOUNDATION TRUST'
    ])
).configure_legend(labelLimit=0, orient='bottom',direction='vertical')

## Deaths on date died MNA

In [129]:
chart_doddmna = chart_doddbt.copy()
chart_doddmna = chart_doddmna[chart_doddmna.code.isin(trustcodes_mna)]
chart_doddmna = chart_doddmna.groupby(['date of death'], as_index=False)['deaths'].sum()
alt.Chart(chart_doddmna).mark_bar().encode(
    x='date of death',
    y='deaths'
)

In [130]:
chart_doddmna = chart_doddbt.copy()
chart_doddmna = chart_doddmna[chart_doddmna.code.isin(trustcodes_mna)]
chart_doddmna = chart_doddmna.groupby(['date of death'], as_index=False)['cumulative deaths'].sum()
alt.Chart(chart_doddmna).mark_area().encode(
    x='date of death',
    y='cumulative deaths'
)