In [33]:
import pandas as pd
from datetime import date, timedelta
import urllib
import numpy as np
import altair as alt
from functools import reduce

In [34]:
# Configuration

## Jupyter options
pd.set_option('mode.chained_assignment', None)

## Global variables
today = date.today()
yesterday = date.today() - timedelta(days=1)
import_directory = 'Imports/'

## NHS File Format
nhs_file_format = 'https://www.england.nhs.uk/statistics/wp-content/uploads/sites/2/DIRECTORY/COVID-19-total-announced-deaths-DATE.xlsx'
nhs_file_date_format = '%-d-%B-%Y'
nhs_file_directory_format = '%Y/%m'

In [35]:
## Defining the trust codes relevant to MNA
### All
our_trust_codes = [
    'RYW', # Birmingham Community Healthcare NHS Foundation Trust
    'RNA', # The Dudley Group NHS Foundation Trust
    'RL4', # The Royal Wolverhampton NHS Trust
    'RXK', # Sandwell And West Birmingham Hospitals NHS Trust
    'RXW', # Shrewsbury And Telford Hospital NHS Trust
    'RRJ', # The Royal Orthopaedic Hospital NHS Foundation Trust
    'RRK', # University Hospitals Birmingham NHS Foundation Trust
    'RJE', # University Hospitals Of North Midlands NHS Trust
    'RBK', # Walsall Healthcare NHS Trust
    'RWP', # Worcestershire Acute Hospitals NHS Trust
    'R1A', # Worcestershire Health And Care NHS Trust
    'RL1', # The Robert Jones And Agnes Hunt Orthopaedic Hospital NHS Foundation Trust
    'RLY', # North Staffordshire Combined Healthcare NHS Trust
]

### Groups
group_expressandstar = [
    'RYW', # Birmingham Community Healthcare NHS Foundation Trust
    'RNA', # The Dudley Group NHS Foundation Trust
    'RL4', # The Royal Wolverhampton NHS Trust
    'RXK', # Sandwell And West Birmingham Hospitals NHS Trust
    'RRJ', # The Royal Orthopaedic Hospital NHS Foundation Trust
    'RRK', # University Hospitals Birmingham NHS Foundation Trust
    'RJE', # University Hospitals Of North Midlands NHS Trust
    'RBK', # Walsall Healthcare NHS Trust
    'RLY', # North Staffordshire Combined Healthcare NHS Trust
]

group_shropshirestar = [
    'RL1', # The Robert Jones And Agnes Hunt Orthopaedic Hospital NHS Foundation Trust
    'RXW', # Shrewsbury And Telford Hospital NHS Trust
]

group_blackcountryandbirmingham = [
    'RYW', # Birmingham Community Healthcare NHS Foundation Trust
    'RNA', # The Dudley Group NHS Foundation Trust
    'RL4', # The Royal Wolverhampton NHS Trust
    'RXK', # Sandwell And West Birmingham Hospitals NHS Trust
    'RRJ', # The Royal Orthopaedic Hospital NHS Foundation Trust
    'RRK', # University Hospitals Birmingham NHS Foundation Trust
    'RBK', # Walsall Healthcare NHS Trust
]

group_sandwellandbirmingham = [
    'RYW', # Birmingham Community Healthcare NHS Foundation Trust
    'RXK', # Sandwell And West Birmingham Hospitals NHS Trust
    'RRJ', # The Royal Orthopaedic Hospital NHS Foundation Trust
    'RRK', # University Hospitals Birmingham NHS Foundation Trust
]

group_worcestershire = [
    'RWP', # Worcestershire Acute Hospitals NHS Trust
    'R1A', # Worcestershire Health And Care NHS Trust
]

group_staffordshire = [
    'RJE', # University Hospitals Of North Midlands NHS Trust
    'RLY', # North Staffordshire Combined Healthcare NHS Trust
]

In [36]:
start_date = date(2020, 4, 4) # as of today (2020-04-15) this is the first day/file to exist
end_date = date.today()

def get_real_url(date):
    v1Url = nhs_file_format.replace(
        'DATE',
        date.strftime(nhs_file_date_format)
    ).replace(
        'DIRECTORY',
        date.strftime(nhs_file_directory_format)
    )
    v2Url = v1Url.replace('total-announced-deaths','all-announced-deaths')
    
    try:
        urllib.request.urlopen(v1Url)
        return v1Url
    except:
        try:
            urllib.request.urlopen(v2Url)
            return v2Url
        except:
            return False
        
data = filter( # iterates through the object, leaving only files which exist
    lambda day: day['url'] != False,
    map(
        lambda date : { # iterates through list of days, returning an object / dictionary
            'date': date,
            'url': get_real_url(date)
        },
        map( # list of days
            lambda interval : start_date + timedelta(days=interval),
            range((end_date - start_date).days+1)
        )
    )
)

In [37]:
# for each item in paths
    # convert excel to csv
    # filter and clean
    # create dataframe
    # add a column for date reported based on the file date / path date
    # merge/append this into the new dataframe

# convert to csv, filter and clean, returns dataframe
def map_nhsdeaths_to_dataframe(x):
    csv_filename = import_directory + 'nhsdeathsbytrust-' + x['date'].strftime('%Y%m%d') + '.csv'
    excel = pd.read_excel(
        x['url'],
        header=15,
        sheet_name='COVID19 total deaths by trust'
    )
    excel.to_csv(csv_filename)
    csv = pd.read_csv(csv_filename)

    ## Filtering down to our_trusts
    our_trusts = csv[csv.Code.isin(our_trust_codes)] # filter to our trusts
    our_trusts = our_trusts.filter(regex='(^Code$|^Name$|[0-9]{4}-.+|^Up to.01-Mar-20$)', axis=1) # filter to only the required columns
    our_trusts = our_trusts.rename(columns={'Code': 'code', 'Name' : 'name'}) # clean up column names
    our_trusts = pd.melt(our_trusts, id_vars=['code','name'], var_name='date of death', value_name='deaths') # un-pivot the date columns
    our_trusts['date of death'] = our_trusts['date of death'].str.replace('Up to 01-Mar-20','2020-02-29 00:00:00')
    our_trusts['date of death'] = pd.to_datetime(our_trusts['date of death']) # clean all dates to YYYY-MM-DD

    ## Adding cumulative deaths
    our_trusts['cumulative deaths'] = our_trusts.groupby('name').cumsum()

    ## Cleaning float
    our_trusts['deaths'] = our_trusts['deaths'].convert_dtypes(convert_integer=True)
    our_trusts['cumulative deaths'] = our_trusts['cumulative deaths'].convert_dtypes(convert_integer=True)
    
    ## Add dataframe as a new column
    x['dataframe'] = our_trusts
    
    return x

def map_add_reported_day(x):
    x['dataframe']['date reported'] = x['date']
    return x

our_trusts_historical = reduce(
    lambda all_data, day_data : all_data.append(day_data,ignore_index=True),
    map(
        lambda x : x['dataframe'],
        map(
            map_add_reported_day,
            map(map_nhsdeaths_to_dataframe, data)
        )
    ),
    pd.DataFrame(columns = ['code','name','date of death','date reported','deaths','cumulative deaths'])
)

In [38]:
last_day = our_trusts_historical['date reported'].max()
penultimate_day = last_day - timedelta(days=1)

latest_deaths = our_trusts_historical[(our_trusts_historical['date reported'] == last_day) | (our_trusts_historical['date reported'] == penultimate_day)]
latest_deaths = latest_deaths.groupby(['code','name','date reported'], as_index=False)['deaths'].sum()
latest_deaths['reporting diff'] = latest_deaths['deaths'].diff().convert_dtypes(convert_integer=True)
latest_deaths = latest_deaths[latest_deaths['date reported'] == last_day]
latest_deaths = latest_deaths.sort_values(by=['reporting diff'], ascending=False)

def get_latest_deaths_by_group(code_group):
    output = latest_deaths[latest_deaths.code.isin(code_group)]
    output = output.groupby('date reported', as_index=False).sum()
    return output

In [39]:
latest_deaths

Unnamed: 0,code,name,date reported,deaths,reporting diff
17,RRK,UNIVERSITY HOSPITALS BIRMINGHAM NHS FOUNDATION...,2020-04-17,546,41
21,RXK,SANDWELL AND WEST BIRMINGHAM HOSPITALS NHS TRUST,2020-04-17,231,12
9,RL4,THE ROYAL WOLVERHAMPTON NHS TRUST,2020-04-17,189,7
19,RWP,WORCESTERSHIRE ACUTE HOSPITALS NHS TRUST,2020-04-17,119,7
5,RJE,UNIVERSITY HOSPITALS OF NORTH MIDLANDS NHS TRUST,2020-04-17,115,5
13,RNA,THE DUDLEY GROUP NHS FOUNDATION TRUST,2020-04-17,165,3
23,RXW,SHREWSBURY AND TELFORD HOSPITAL NHS TRUST,2020-04-17,55,3
3,RBK,WALSALL HEALTHCARE NHS TRUST,2020-04-17,99,1
1,R1A,WORCESTERSHIRE HEALTH AND CARE NHS TRUST,2020-04-17,9,0
7,RL1,THE ROBERT JONES AND AGNES HUNT ORTHOPAEDIC HO...,2020-04-17,2,0


In [40]:
get_latest_deaths_by_group(group_staffordshire)

Unnamed: 0,date reported,deaths,reporting diff
0,2020-04-17,116,5


In [41]:
get_latest_deaths_by_group(group_worcestershire)

Unnamed: 0,date reported,deaths,reporting diff
0,2020-04-17,128,7


In [42]:
get_latest_deaths_by_group(group_sandwellandbirmingham)

Unnamed: 0,date reported,deaths,reporting diff
0,2020-04-17,793,53


In [43]:
get_latest_deaths_by_group(group_blackcountryandbirmingham)

Unnamed: 0,date reported,deaths,reporting diff
0,2020-04-17,1246,64


In [44]:
get_latest_deaths_by_group(group_expressandstar)

Unnamed: 0,date reported,deaths,reporting diff
0,2020-04-17,1362,69


# Shropshire Star

In [45]:
get_latest_deaths_by_group(group_shropshirestar)

Unnamed: 0,date reported,deaths,reporting diff
0,2020-04-17,57,3
