# Processing data from ONS

In [1]:
import datetime

import pandas as pd

In [2]:
def convert_year_week_to_date(d):
    # -5 to adjust that 8 Jan 2021 is considered as the last date of week 1
    # If year 2020, taking back one week so that week 53 is 1 Jan 2021; otherwise it's 8 Jan 2021
    if d[:4] == '2020':
        tokens = d.split('-')
        d = '-'.join([tokens[0], tokens[1], str(int(tokens[2]) - 1)])
    return datetime.datetime.strptime(d + '-5', '%Y-week-%W-%w').strftime('%Y-%m-%d')

assert convert_year_week_to_date('2020-week-53') == '2021-01-01'
assert convert_year_week_to_date('2021-week-1') == '2021-01-08'
assert convert_year_week_to_date('2021-week-7') == '2021-02-19'

In [3]:
def extract_data(df, cause_of_death):
    df = df.query('CauseOfDeath == @cause_of_death & RegistrationOrOccurrence == "Registrations"').copy()
    df['week'] = df.apply(lambda t: f"{t['calendar-years']}-{t['week-number']}", axis=1)
    df['date'] = df['week'].apply(convert_year_week_to_date)
    columns = ['Geography', 'date', 'PlaceOfDeath', 'v4_0']
    df = df[columns]
    df = df.pivot_table(index='date', columns=['Geography', 'PlaceOfDeath'], values='v4_0')
    df.columns = df.columns.map(lambda t: t[0] + '___' + t[1])
    return df

def build_data(df20, df21, cause_of_death):
    df20 = extract_data(df20, cause_of_death)
    df21 = extract_data(df21, cause_of_death)
    return pd.concat([df20, df21])

In [4]:
df20 = pd.read_csv('/Users/sbkr014/Downloads/covid-ons/weekly-deaths-local-authority-2020.csv')
df21 = pd.read_csv('/Users/sbkr014/Downloads/covid-ons/weekly-deaths-local-authority-2021-v7.csv')
cvd_df = build_data(df20, df21, 'COVID 19')
all_df = build_data(df20, df21, 'All causes')
assert len(all_df.columns) == len(cvd_df.columns) == 336 * 6

In [42]:
cvd_df.to_csv('../../data/live/ons/england/mortality/weekly_covid_deaths_local_authority.csv')
all_df.to_csv('../../data/live/ons/england/mortality/weekly_all_deaths_local_authority.csv')

## Generate list of options for ontology

In [5]:
import os
import sys

In [6]:
module_path = os.path.abspath(os.pardir)
if module_path not in sys.path:
    sys.path.append(module_path)

In [7]:
from app.utils.naming import format_component_name

In [8]:
print(sorted([format_component_name(x) for x in df20['PlaceOfDeath'].unique()]))

['care_home', 'elsewhere', 'home', 'hospice', 'hospital', 'other_communal_establishment']


In [62]:
name_mapping = {format_component_name(g): g for g in df20['Geography']}

In [63]:
name_mapping['scotland'] = 'Scotland'
name_mapping['england'] = 'England'
name_mapping['wales'] = 'Wales'

In [64]:
df = pd.read_csv('../../data/live/records/SARS-CoV-2/scotland/human-mortality/council_area_week_all_deaths.csv')
councils = df.columns.tolist()
councils.remove('week commencing')
for c in councils:
    name_mapping[format_component_name(c)] = c

In [65]:
df = pd.read_csv('../../data/live/records/SARS-CoV-2/scotland/human-mortality/nhs_health_board_week_all_deaths.csv')
boards = df.columns.tolist()
boards.remove('week commencing')
for c in boards:
    name_mapping[format_component_name(c)] = c

In [66]:
import json

In [68]:
with open('temp-data/name_mapping.json', 'w') as f:
    json.dump(name_mapping, f)