In [2]:
import pandas as pd
from datetime import date, timedelta
import urllib
import numpy as np
import altair as alt
from functools import reduce

In [3]:
# Configuration

## Jupyter options
pd.set_option('mode.chained_assignment', None)

## Global variables
today = date.today()
yesterday = date.today() - timedelta(days=1)
import_directory = 'Imports/'

## NHS File Format
nhs_file_format = 'https://www.england.nhs.uk/statistics/wp-content/uploads/sites/2/DIRECTORY/COVID-19-total-announced-deaths-DATE.xlsx'
nhs_file_date_format = '%-d-%B-%Y'
nhs_file_directory_format = '%Y/%m'

In [4]:
## Defining the trust codes relevant to MNA
### All
our_trust_codes = [
    'RYW', # Birmingham Community Healthcare NHS Foundation Trust
    'RNA', # The Dudley Group NHS Foundation Trust
    'RL4', # The Royal Wolverhampton NHS Trust
    'RXK', # Sandwell And West Birmingham Hospitals NHS Trust
    'RXW', # Shrewsbury And Telford Hospital NHS Trust
    'RRJ', # The Royal Orthopaedic Hospital NHS Foundation Trust
    'RRK', # University Hospitals Birmingham NHS Foundation Trust
    'RJE', # University Hospitals Of North Midlands NHS Trust
    'RBK', # Walsall Healthcare NHS Trust
    'RWP', # Worcestershire Acute Hospitals NHS Trust
    'R1A', # Worcestershire Health And Care NHS Trust
    'RL1', # The Robert Jones And Agnes Hunt Orthopaedic Hospital NHS Foundation Trust
    'RLY', # North Staffordshire Combined Healthcare NHS Trust
    'R1D', # Shropshire Community Health NHS Trust
    'RRE', # Midlands Partnership NHS Foundation Trust
]

### Groups
trustcodes_expressandstar = [
    'RYW', # Birmingham Community Healthcare NHS Foundation Trust
    'RNA', # The Dudley Group NHS Foundation Trust
    'RL4', # The Royal Wolverhampton NHS Trust
    'RXK', # Sandwell And West Birmingham Hospitals NHS Trust
    'RRJ', # The Royal Orthopaedic Hospital NHS Foundation Trust
    'RRK', # University Hospitals Birmingham NHS Foundation Trust
    'RJE', # University Hospitals Of North Midlands NHS Trust
    'RBK', # Walsall Healthcare NHS Trust
    'RLY', # North Staffordshire Combined Healthcare NHS Trust
    'RRE', # Midlands Partnership NHS Foundation Trust
]

trustcodes_shropshirestar = [
    'RL1', # The Robert Jones And Agnes Hunt Orthopaedic Hospital NHS Foundation Trust
    'RXW', # Shrewsbury And Telford Hospital NHS Trust
    'R1D', # Shropshire Community Health NHS Trust
]

trustcodes_blackcountryandbirmingham = [
    'RYW', # Birmingham Community Healthcare NHS Foundation Trust
    'RNA', # The Dudley Group NHS Foundation Trust
    'RL4', # The Royal Wolverhampton NHS Trust
    'RXK', # Sandwell And West Birmingham Hospitals NHS Trust
    'RRJ', # The Royal Orthopaedic Hospital NHS Foundation Trust
    'RRK', # University Hospitals Birmingham NHS Foundation Trust
    'RBK', # Walsall Healthcare NHS Trust
]

trustcodes_sandwellandbirmingham = [
    'RYW', # Birmingham Community Healthcare NHS Foundation Trust
    'RXK', # Sandwell And West Birmingham Hospitals NHS Trust
    'RRJ', # The Royal Orthopaedic Hospital NHS Foundation Trust
    'RRK', # University Hospitals Birmingham NHS Foundation Trust
]

trustcodes_worcestershire = [
    'RWP', # Worcestershire Acute Hospitals NHS Trust
    'R1A', # Worcestershire Health And Care NHS Trust
]

trustcodes_staffordshire = [
    'RJE', # University Hospitals Of North Midlands NHS Trust
    'RLY', # North Staffordshire Combined Healthcare NHS Trust
    'RRE', # Midlands Partnership NHS Foundation Trust
]

trustcodes_wolverhampton = [
    'RL4', # The Royal Wolverhampton NHS Trust
]

trustcodes_walsall = [
    'RBK', # Walsall Healthcare NHS Trust
]

trustcodes_dudley = [
    'RNA', # The Dudley Group NHS Foundation Trust
]

# create an MNA group from the de-duplicated sum of E&S and SS
trustcodes_mna = trustcodes_expressandstar.copy()
for code in trustcodes_shropshirestar:
    if code not in trustcodes_mna:
        trustcodes_mna.append(code)

In [5]:
start_date = date(2020, 4, 4) # as of today (2020-04-15) this is the first day/file to exist
end_date = date.today()

def get_real_url(date):
    real_url = False
    # builder variables
    ## how many suffixes to try ( highest so far is 1 — 20200422 )
    nhs_max_suffix = 2
    
    ## make a list of suffixes
    suffixes = list(
        map(
            lambda n: '.xlsx' if n==0 else f'-{n}.xlsx',
            reversed(range(nhs_max_suffix+1))
        )
    )
    
    ## filenames so far
    filenames = ['COVID-19-total-announced-deaths-','COVID-19-all-announced-deaths-',]
    
    ## date format, only one so far (!)
    date_format = '%-d-%B-%Y'
    
    ## directory date format
    directory_format = '%Y/%m/'
    
    ## domain and initial path
    file_start = 'https://www.england.nhs.uk/statistics/wp-content/uploads/sites/2/'
    
    # testing
    for suffix in suffixes:
        if (real_url==False):
            for name in filenames:
                if (real_url==False):
                    url = file_start + date.strftime(directory_format) + name + date.strftime(nhs_file_date_format) + suffix
                    try:
                        urllib.request.urlopen(url)
                        real_url = url
                        break
                    except:
                        pass
    
    return real_url
        
data = filter( # iterates through the object, leaving only files which exist
    lambda day: day['url'] != False,
    map(
        lambda date : { # iterates through list of days, returning an object / dictionary
            'date': date,
            'url': get_real_url(date)
        },
        map( # list of days
            lambda interval : start_date + timedelta(days=interval),
            range((end_date - start_date).days+1)
        )
    )
)

In [6]:
def map_nhsdeaths_to_dataframe(x):
    csv_filename = import_directory + 'nhsdeathsbytrust-' + x['date'].strftime('%Y%m%d') + '.csv'
    excel = pd.read_excel(
        x['url'],
        header=15,
        sheet_name='COVID19 total deaths by trust'
    )
    excel.to_csv(csv_filename)
    csv = pd.read_csv(csv_filename)

    ## Filtering down to our_trusts
    our_trusts = csv[csv.Code.isin(our_trust_codes)] # filter to our trusts
    our_trusts = our_trusts.filter(regex='(^Code$|^Name$|[0-9]{4}-.+|^Up to.01-Mar-20$)', axis=1) # filter to only the required columns
    our_trusts = our_trusts.rename(columns={'Code': 'code', 'Name' : 'name'}) # clean up column names
    our_trusts = pd.melt(our_trusts, id_vars=['code','name'], var_name='date of death', value_name='deaths') # un-pivot the date columns
    our_trusts['date of death'] = our_trusts['date of death'].str.replace('Up to 01-Mar-20','2020-02-29 00:00:00')
    our_trusts['date of death'] = pd.to_datetime(our_trusts['date of death']) # clean all dates to YYYY-MM-DD

    ## Adding cumulative deaths
    our_trusts['cumulative deaths'] = our_trusts.groupby('name').cumsum()

    ## Cleaning float
    our_trusts['deaths'] = our_trusts['deaths'].convert_dtypes(convert_integer=True)
    our_trusts['cumulative deaths'] = our_trusts['cumulative deaths'].convert_dtypes(convert_integer=True)
    
    ## Add dataframe as a new column
    x['dataframe'] = our_trusts
    
    return x

def map_add_reported_day(x):
    x['dataframe']['date reported'] = x['date']
    return x

our_trusts_historical = reduce(
    lambda all_data, day_data : all_data.append(day_data,ignore_index=True),
    map(
        lambda x : x['dataframe'],
        map(
            map_add_reported_day,
            map(map_nhsdeaths_to_dataframe, data)
        )
    ),
    pd.DataFrame(columns = ['code','name','date of death','date reported','deaths','cumulative deaths'])
)

In [7]:
def get_latest_deaths(code_group=False):
    df = our_trusts_historical.copy()
    
    last_day = df['date reported'].max()
    penultimate_day = last_day - timedelta(days=1)

    df = df[(df['date reported'] == last_day) | (df['date reported'] == penultimate_day)]
    df = df.groupby(['code','name','date reported'], as_index=False)['deaths'].sum()
    df['reporting diff'] = df.groupby('code')['deaths'].diff().convert_dtypes(convert_integer=True)
    df = df[df['date reported'] == last_day]
    df = df.sort_values(by=['reporting diff'], ascending=False)
    
    if code_group:
        df = df[df.code.isin(code_group)]
        df = df.groupby('date reported', as_index=False).sum()
        
    return df

In [8]:
get_latest_deaths()

Unnamed: 0,code,name,date reported,deaths,reporting diff
21,RRK,UNIVERSITY HOSPITALS BIRMINGHAM NHS FOUNDATION...,2020-05-20,871,6
7,RJE,UNIVERSITY HOSPITALS OF NORTH MIDLANDS NHS TRUST,2020-05-20,278,4
27,RXW,SHREWSBURY AND TELFORD HOSPITAL NHS TRUST,2020-05-20,129,4
23,RWP,WORCESTERSHIRE ACUTE HOSPITALS NHS TRUST,2020-05-20,242,3
25,RXK,SANDWELL AND WEST BIRMINGHAM HOSPITALS NHS TRUST,2020-05-20,332,2
11,RL4,THE ROYAL WOLVERHAMPTON NHS TRUST,2020-05-20,260,1
29,RYW,BIRMINGHAM COMMUNITY HEALTHCARE NHS FOUNDATION...,2020-05-20,33,1
1,R1A,WORCESTERSHIRE HEALTH AND CARE NHS TRUST,2020-05-20,23,0
3,R1D,SHROPSHIRE COMMUNITY HEALTH NHS TRUST,2020-05-20,11,0
5,RBK,WALSALL HEALTHCARE NHS TRUST,2020-05-20,192,0


In [9]:
get_latest_deaths(trustcodes_staffordshire)

Unnamed: 0,date reported,deaths,reporting diff
0,2020-05-20,289,4


In [10]:
get_latest_deaths(trustcodes_worcestershire)

Unnamed: 0,date reported,deaths,reporting diff
0,2020-05-20,265,3


In [11]:
get_latest_deaths(trustcodes_sandwellandbirmingham)

Unnamed: 0,date reported,deaths,reporting diff
0,2020-05-20,1248,9


In [12]:
get_latest_deaths(trustcodes_blackcountryandbirmingham)

Unnamed: 0,date reported,deaths,reporting diff
0,2020-05-20,1941,10


In [13]:
get_latest_deaths(trustcodes_expressandstar)

Unnamed: 0,date reported,deaths,reporting diff
0,2020-05-20,2230,14


# Shropshire Star

In [14]:
get_latest_deaths(trustcodes_shropshirestar)

Unnamed: 0,date reported,deaths,reporting diff
0,2020-05-20,145,4


# Charts

## Deaths on date died by trust

In [15]:
chart_doddbt = our_trusts_historical.copy()
chart_doddbt_latest = chart_doddbt['date reported'].max()
chart_doddbt = chart_doddbt[chart_doddbt['date reported'] == chart_doddbt_latest]
chart_doddbt = chart_doddbt.groupby(['code','name','date of death'], as_index=False)['deaths'].sum()
chart_doddbt['cumulative deaths'] = chart_doddbt.groupby('name').cumsum()

alt.Chart(chart_doddbt).mark_line().encode(
    x='date of death',
    y='deaths',
    color='name'
)

In [16]:
alt.Chart(chart_doddbt).mark_bar().encode(
    column='date of death',
    x='deaths',
    y='name',
    color='name'
).properties(width=220)

### Cumulative

In [17]:
alt.Chart(chart_doddbt).mark_line().encode(
    x='date of death',
    y='cumulative deaths',
    color='name'
)

## Deaths on date died Shropshire

In [18]:
chart_dodds = chart_doddbt.copy()
chart_dodds = chart_dodds[chart_dodds.code.isin(trustcodes_shropshirestar)]
chart_dodds = chart_dodds.groupby(['date of death'], as_index=False)['deaths'].sum()
alt.Chart(chart_dodds).mark_bar().encode(
    x='date of death',
    y='deaths'
)

### Cumulative

In [19]:
chart_dodds = chart_doddbt.copy()
chart_dodds = chart_dodds[chart_dodds.code.isin(trustcodes_shropshirestar)]
chart_dodds = chart_dodds.groupby(['date of death'], as_index=False)['cumulative deaths'].sum()
alt.Chart(chart_dodds).mark_area().encode(
    x='date of death',
    y='cumulative deaths'
)

### Cumulative by trust

In [20]:
chart_dodds = chart_doddbt.copy()
chart_dodds = chart_dodds[chart_dodds.code.isin(trustcodes_shropshirestar)]
alt.Chart(chart_dodds).mark_line().encode(
    x='date of death',
    y='cumulative deaths',
    color='name'
).configure_legend(labelLimit=0,orient='bottom')

## Deaths on date died Express & Star

In [21]:
chart_dodde = chart_doddbt.copy()
chart_dodde = chart_dodde[chart_dodde.code.isin(trustcodes_expressandstar)]
chart_dodde = chart_dodde.groupby(['date of death'], as_index=False)['deaths'].sum()
alt.Chart(chart_dodde).mark_bar().encode(
    x='date of death',
    y='deaths'
)

### Cumulative

In [22]:
chart_dodde = chart_doddbt.copy()
chart_dodde = chart_dodde[chart_dodde.code.isin(trustcodes_expressandstar)]
chart_dodde = chart_dodde.groupby(['date of death'], as_index=False)['cumulative deaths'].sum()
alt.Chart(chart_dodde).mark_area().encode(
    x='date of death',
    y='cumulative deaths'
)

### Cumulative by trust

In [23]:
chart_dodde = chart_doddbt.copy()
chart_dodde = chart_dodde[chart_dodde.code.isin(trustcodes_expressandstar)]
alt.Chart(chart_dodde).mark_line().encode(
    x='date of death',
    y='cumulative deaths',
    color=alt.Color('name',sort=[
        'UNIVERSITY HOSPITALS BIRMINGHAM NHS FOUNDATION TRUST',
        'SANDWELL AND WEST BIRMINGHAM HOSPITALS NHS TRUST',
        'THE ROYAL WOLVERHAMPTON NHS TRUST',
        'THE DUDLEY GROUP NHS FOUNDATION TRUST',
        'UNIVERSITY HOSPITALS OF NORTH MIDLANDS NHS TRUST',
        'BIRMINGHAM COMMUNITY HEALTHCARE NHS FOUNDATION TRUST'
    ])
).configure_legend(labelLimit=0, orient='bottom',direction='vertical')

## Deaths on date died MNA

In [24]:
chart_doddmna = chart_doddbt.copy()
chart_doddmna = chart_doddmna[chart_doddmna.code.isin(trustcodes_mna)]
chart_doddmna = chart_doddmna.groupby(['date of death'], as_index=False)['deaths'].sum()
alt.Chart(chart_doddmna).mark_bar().encode(
    x='date of death',
    y='deaths'
)

In [25]:
chart_doddmna = chart_doddbt.copy()
chart_doddmna = chart_doddmna[chart_doddmna.code.isin(trustcodes_mna)]
chart_doddmna = chart_doddmna.groupby(['date of death'], as_index=False)['cumulative deaths'].sum()
alt.Chart(chart_doddmna).mark_area().encode(
    x='date of death',
    y='cumulative deaths'
)