In [1]:
from uk_covid19 import Cov19API
import json
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as wdg
%matplotlib inline
plt.rcParams['figure.dpi'] = 100

In [2]:
death_filters = [
    'areaType=Overview'
]

sex_filter = [
    'areaType=nation',
    'areaName=England', # remember to highlight this
]

In [3]:
death_structure = {
    "date": "date",
    "deaths": "newDailyNsoDeathsByDeathDate",
}

sex_structure = {
    "date": "date",
    "femaleCases": "femaleCases",
    "maleCases": "maleCases",
}

In [4]:
death_api = Cov19API(filters=death_filters, structure=death_structure)
death_data = death_api.get_json()

sex_api = Cov19API(filters=sex_filter, structure=sex_structure)
sex_data=sex_api.get_json()

#print(death_data)
#print(sex_data)

#death_data_lst = death_data["data"]
#sex_data_lst = sex_data["data"]

#print(death_data_lst)


In [5]:
# dump into json


with open('death_data.json', "wt") as OUTF:
    json.dump(death_data, OUTF)

with open("sex_data.json", "wt") as OUTF:
    json.dump(sex_data, OUTF)


In [6]:
with open("death_data.json", "rt") as INFILE:
    death_data_raw = json.load(INFILE)

with open("sex_data.json", "rt") as INFILE:
    sex_data_raw = json.load(INFILE)   

In [7]:
# getting the data list

death_data = death_data_raw["data"]
sex_data = sex_data_raw["data"]



# getting the timeseries


death_dates_raw =[dictionary['date'] for dictionary in death_data ]



death_dates_dic = {date for date in death_dates_raw} # removing duplicates by transforming into a dictionary and back into a sorted list 
death_dates = list(death_dates_dic)
death_dates.sort()


#print(death_dates)

# gettingn start and end dates


def parse_date(datestring):
    """ Convert a date string into a pandas datetime object """
    return pd.to_datetime(datestring, format="%Y-%m-%d")

death_startdate=parse_date(death_dates[0])
death_enddate=parse_date(death_dates[-1])

death_index_d=pd.date_range(death_startdate, death_enddate, freq='D')
print(death_index_d)


DatetimeIndex(['2020-01-30', '2020-01-31', '2020-02-01', '2020-02-02',
               '2020-02-03', '2020-02-04', '2020-02-05', '2020-02-06',
               '2020-02-07', '2020-02-08',
               ...
               '2022-11-02', '2022-11-03', '2022-11-04', '2022-11-05',
               '2022-11-06', '2022-11-07', '2022-11-08', '2022-11-09',
               '2022-11-10', '2022-11-11'],
              dtype='datetime64[ns]', length=1017, freq='D')


In [8]:
# getting female / male cases and the age ranges

female_cases = [dictionary['femaleCases'] for dictionary in sex_data]
male_cases = [dictionary['maleCases'] for dictionary in sex_data]

age_bands =[]
for item in male_cases:
    age_bands += [dictionary['age'] for dictionary in item]

age_bands = list(set(age_bands)) # removing duplicates and sorting (how did sorting here actually happen? Im not sure)
age_bands.sort()
#print(age_bands)


def min_age(agerange):
    agerange=agerange.replace('+','') # remove the + from 90+
    start=agerange.split('_')[0]
    return int(start)

print(min_age('85_to_89'))
print(min_age('90+'))

age_bands.sort(key=min_age)
print(age_bands)


#print(female_cases)

85
90
['0_to_4', '5_to_9', '10_to_14', '15_to_19', '20_to_24', '25_to_29', '30_to_34', '35_to_39', '40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90+']


In [9]:
# Building the dataframe
print(age_bands)



death_df = pd.DataFrame(index=death_index_d, columns = ("DailyDeaths", "F_0_to_4", "F_5_to_9", "F_10_to_14", "F_15_to_19", "F_20_to_24", "F_25_to_29", "F_30_to_34", "F_35_to_39", "F_40_to_44", "F_45_to_49", "F_50_to_54", "F_55_to_59", "F_60_to_64", "F_65_to_69", "F_70_to_74", "F_75_to_79", "F_80_to_84", "F_85_to_89", "F_90_to_94", "F_90+", "M_0_to_4", "M_5_to_9", "M_10_to_14", "M_15_to_19", "M_20_to_24", "M_25_to_29", "M_30_to_34", "M_35_to_39", "M_40_to_44", "M_45_to_49", "M_50_to_54", "M_55_to_59", "M_60_to_64", "M_65_to_69", "M_70_to_74", "M_75_to_79", "M_80_to_84", "M_85_to_89", "M_90_to_94", "M_90+"))
#print(death_df)


['0_to_4', '5_to_9', '10_to_14', '15_to_19', '20_to_24', '25_to_29', '30_to_34', '35_to_39', '40_to_44', '45_to_49', '50_to_54', '55_to_59', '60_to_64', '65_to_69', '70_to_74', '75_to_79', '80_to_84', '85_to_89', '90+']


In [10]:
# filling the data frame

## filling in the gender / case / age_range data


for dictionary in sex_data:
    date=parse_date(dictionary['date'])
    #print(dictionary['femaleCases'])
    #iterable = dictionary['femaleCases']
    for item in dictionary['femaleCases'] :
        #print(item['age'])
        if item['age'] == '0_to_4':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_0_to_4"]= value
        if item['age'] == '5_to_9':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_5_to_9"]= value
        if item['age'] == '10_to_14':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_10_to_14"]= value
        if item['age'] == '15_to_19':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_15_to_19"]= value
        if item['age'] == '20_to_24':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_20_to_24"]= value
        if item['age'] == '25_to_29':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_25_to_29"]= value
        if item['age'] == '30_to_34':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_30_to_34"]= value
        if item['age'] == '35_to_39':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_35_to_39"]= value
        if item['age'] == '40_to_44':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_40_to_44"]= value
        if item['age'] == '45_to_49':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_45_to_49"]= value
        if item['age'] == '50_to_54':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_50_to_54"]= value
        if item['age'] == '55_to_59':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_55_to_59"]= value
        if item['age'] == '60_to_64':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_60_to_64"]= value
        if item['age'] == '65_to_69':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_65_to_69"]= value
        if item['age'] == '70_to_74':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_70_to_74"]= value
        if item['age'] == '80_to_84':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_80_to_84"]= value
        if item['age'] == '85_to_89':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_85_to_89"]= value
        if item['age'] == '90_to_94':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_90_to_94"]= value
        if item['age'] == '90+':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "F_90+"]= value
    for item in dictionary['maleCases']:
        if item['age'] == '0_to_4':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_0_to_4"]= value
        if item['age'] == '5_to_9':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_5_to_9"]= value
        if item['age'] == '10_to_14':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_10_to_14"]= value
        if item['age'] == '15_to_19':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_15_to_19"]= value
        if item['age'] == '20_to_24':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_20_to_24"]= value
        if item['age'] == '25_to_29':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_25_to_29"]= value
        if item['age'] == '30_to_34':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_30_to_34"]= value
        if item['age'] == '35_to_39':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_35_to_39"]= value
        if item['age'] == '40_to_44':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_40_to_44"]= value
        if item['age'] == '45_to_49':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_45_to_49"]= value
        if item['age'] == '50_to_54':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_50_to_54"]= value
        if item['age'] == '55_to_59':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_55_to_59"]= value
        if item['age'] == '60_to_64':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_60_to_64"]= value
        if item['age'] == '65_to_69':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_65_to_69"]= value
        if item['age'] == '70_to_74':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_70_to_74"]= value
        if item['age'] == '80_to_84':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_80_to_84"]= value
        if item['age'] == '85_to_89':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_85_to_89"]= value
        if item['age'] == '90_to_94':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_90_to_94"]= value
        if item['age'] == '90+':
            value= float(item['value']) if item['value'] !=None else 0.0
            death_df.loc[date, "M_90+"]= value






        


In [11]:
## filling in the daily deaths data

for entry in death_data:    # could probably use this method for the gender data to save lines of code... try if I have time
    #print(entry["deaths"])

    date = parse_date(entry["date"])

    if pd.isna(death_df.loc[date, "DailyDeaths"]):

        value = float(entry["deaths"]) if entry["deaths"] !=None else 0.0

        death_df.loc[date, "DailyDeaths"] = value


death_df.fillna(0.0, inplace=True)

In [12]:
death_df_m = death_df.resample('1m').sum() # why is the last bar shorter? in the line chart there is no drop-off ... could be because it avarages for missing data?



In [13]:
# making this interactive



# adding a selector for the gender data



female_age_bands = ["F_0_to_4", "F_5_to_9", "F_10_to_14", "F_15_to_19", "F_20_to_24", "F_25_to_29", "F_30_to_34", "F_35_to_39", "F_40_to_44", "F_45_to_49", "F_50_to_54", "F_55_to_59", "F_60_to_64", "F_65_to_69", "F_70_to_74", "F_75_to_79", "F_80_to_84", "F_85_to_89", "F_90_to_94", "F_90+"]

male_age_bands = ["M_0_to_4", "M_5_to_9", "M_10_to_14", "M_15_to_19", "M_20_to_24", "M_25_to_29", "M_30_to_34", "M_35_to_39", "M_40_to_44", "M_45_to_49", "M_50_to_54", "M_55_to_59", "M_60_to_64", "M_65_to_69", "M_70_to_74", "M_75_to_79", "M_80_to_84", "M_85_to_89", "M_90_to_94", "M_90+"]

female_age_cols=wdg.SelectMultiple(
    options=["F_0_to_4", "F_5_to_9", "F_10_to_14", "F_15_to_19", "F_20_to_24", "F_25_to_29", "F_30_to_34", "F_35_to_39", "F_40_to_44", "F_45_to_49", "F_50_to_54", "F_55_to_59", "F_60_to_64", "F_65_to_69", "F_70_to_74", "F_75_to_79", "F_80_to_84", "F_85_to_89", "F_90_to_94", "F_90+"],
    value=["F_0_to_4", "F_5_to_9", "F_10_to_14", "F_15_to_19", "F_20_to_24", "F_25_to_29", "F_30_to_34", "F_35_to_39", "F_40_to_44", "F_45_to_49", "F_50_to_54", "F_55_to_59", "F_60_to_64", "F_65_to_69", "F_70_to_74", "F_75_to_79", "F_80_to_84", "F_85_to_89", "F_90_to_94", "F_90+"],
    rows= 20, # rows of the selection box
    description='Female Age Bands',
    disabled=False
)

male_age_cols=wdg.SelectMultiple(
    options=["M_0_to_4", "M_5_to_9", "M_10_to_14", "M_15_to_19", "M_20_to_24", "M_25_to_29", "M_30_to_34", "M_35_to_39", "M_40_to_44", "M_45_to_49", "M_50_to_54", "M_55_to_59", "M_60_to_64", "M_65_to_69", "M_70_to_74", "M_75_to_79", "M_80_to_84", "M_85_to_89", "M_90_to_94", "M_90+"],
    value=[],
    rows= 20, # rows of the selection box
    description='Male Age Bands',
    disabled=False
)

timeslider=wdg.IntSlider(
    value=9,
    min=1,
    max=12,
    step=1,
    description='Monthly Intervall',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

controls = wdg.HBox([female_age_cols, male_age_cols, timeslider])

def age_graph(femagecols, menagecols, timevalue):

    # our callback function.
    ncols=len(femagecols)
    m_value = str(timeslider.value) + "m"


    if ncols>0:
        death_df_mm = death_df_m.resample(m_value).sum()
        death_df_mm.plot(y=list(femagecols + menagecols), kind="area", logy = False, use_index=True, figsize=(20,10)) # graphcolumns is a tuple - we need a list
        ax = death_df_m["DailyDeaths"].plot(secondary_y=True, color='k')
        ax.set_ylabel('Daily Deaths')

    else:
        
        print("Click to select data for graph")
        print("(CTRL-Click to select more than one category)")
    
  
output1=wdg.interactive_output(age_graph, {'femagecols': female_age_cols, "menagecols" : male_age_cols, "timevalue": timeslider}) # clean up slider names + order

display(controls, output1)


HBox(children=(SelectMultiple(description='Female Age Bands', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,…

Output()

+++ check the final data here +++