# Some interesting charts for Covid-19 in England

In [None]:
import os
import datetime
fetch_date = os.environ.get('TODAY', 'test')
fetch_time = os.environ.get('FETCH_TIME', 'a test run')

In [None]:
input_filename = 'covid_{}.csv'.format(fetch_date)

In [None]:
from IPython.core.display import display, HTML

In [None]:
display(HTML('<p>Data was fetched from coronavirus.data.gov.uk on {}.</p>'.format(fetch_time)))

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

In [None]:
df = pd.read_csv(input_filename).set_index('date').sort_index()
df.index = pd.to_datetime(df.index)

In [None]:
def plot_chart(df, title):
    ax = df.plot.line(title=title, figsize=(14,8))
    ax.grid(True, which='both')
    plt.xticks(rotation=45)
    ax.set_xlim(xmin=df.index[0])
    ax.set_ylim(ymin=0)
    plt.show()

In [None]:
df_hospital = df[['hospitalCases', 'covidOccupiedMVBeds', 'newDeaths28DaysByDeathDate']].dropna(how='all')
plot_chart(df_hospital, 'Cases in hospital; occupied ventilators; deaths: all available data')

In [None]:
df_deaths = df[['newDeaths28DaysByDeathDate']].dropna()
plot_chart(df_deaths, 'Deaths all available data')

In [None]:
df_hospital_last_60 = df[['hospitalCases', 'covidOccupiedMVBeds', 'newDeaths28DaysByDeathDate']].dropna(how='all').tail(60)
plot_chart(df_hospital_last_60, 'Cases in hospital; occupied ventilators; deaths: over last 60 days')


In [None]:
df['deaths 7 day mean'] = df['newDeaths28DaysByDeathDate'].rolling(7).mean()
df_deaths_last_60 = df[['newDeaths28DaysByDeathDate', 'deaths 7 day mean']].dropna().tail(60)
plot_chart(df_deaths_last_60, 'Deaths over last 60 days')

In [None]:
df['occupied ventilators 7 day mean'] = df['covidOccupiedMVBeds'].rolling(7).mean()
df_ventilators_last_60 = df[['covidOccupiedMVBeds', 'occupied ventilators 7 day mean']].dropna().tail(60)
plot_chart(df_ventilators_last_60, 'Occupied ventilators over last 60 days')

In [None]:
df['cases 7 day mean'] = df['newCasesByPublishDate'].rolling(7).mean()
df_cases_last_60 = df[['newCasesByPublishDate', 'cases 7 day mean']].dropna().tail(60)
plot_chart(df_cases_last_60, 'Cases over last 60 days')

In [None]:
df['total tests'] = df.apply(lambda x: x['newPillarOneTestsByPublishDate'] + x['newPillarTwoTestsByPublishDate'], axis=1)
df['total tests 7 day mean'] = df[['total tests']].rolling(7).mean()
df_test_last_60 = df[['newPillarOneTestsByPublishDate', 'newPillarTwoTestsByPublishDate', 'total tests', 'total tests 7 day mean']].dropna(how='all').tail(60)
plot_chart(df_test_last_60, 'Tests over last 60 days')

In [None]:
df['percentage cases per test'] = df.apply(lambda x: x['newCasesByPublishDate'] * 100.0 / x['total tests'], axis=1)
df['7 day mean percentage cases per test'] = df['percentage cases per test'].rolling(7).mean()
df_cases_per_test_last_60 = df[['percentage cases per test', '7 day mean percentage cases per test']].dropna().tail(60)
plot_chart(df_cases_per_test_last_60, '% Cases per test over last 60 days')

In [None]:
df_cases_per_test_all = df[['percentage cases per test', '7 day mean percentage cases per test']].dropna()
plot_chart(df_cases_per_test_all, 'Cases per test all data')

In [None]:
df.to_csv('covid-processed-{}.csv'.format(fetch_date))

## Second wave compared with first wave (experimental)


In [None]:
def wave_comparison(df, input_axis_name, output_axis_suffix, wave_2_start_date, wave_1_start_date=None, wave_index_offset=1):
    df_wave_comparison = pd.DataFrame()
    wave_1_axis_name = 'wave {} {}'.format(0 + wave_index_offset, output_axis_suffix)
    wave_2_axis_name = 'wave {} {}'.format(1 + wave_index_offset, output_axis_suffix)
    if wave_1_start_date is None:
        df_wave_comparison[wave_1_axis_name] = df[input_axis_name].dropna()
        wave_1_start_date = df_wave_comparison.index[0].strftime('%Y-%m-%d')
    else:
        df_wave_comparison[wave_1_axis_name] = df[input_axis_name][wave_1_start_date : ]
    df_wave_comparison.reset_index(drop=True, inplace=True)
    df_wave_comparison[wave_2_axis_name] = df[input_axis_name][wave_2_start_date : ].reset_index(drop=True)
    wave_1_start_cases = int(df_wave_comparison[wave_1_axis_name][0])
    wave_2_start_cases = int(df_wave_comparison[wave_2_axis_name][0])
    display(HTML("""
        <p>
        On {wave_1_start_date} there were {wave_1_start_cases} {output_axis_suffix}.
        On {wave_2_start_date} there were {wave_2_start_cases} {output_axis_suffix}.
        This chart compares what happened over proceding days starting from those two dates.
        </p>
        """.format(**locals())))
    return df_wave_comparison


In [None]:
def plot_wave_comparison(df, units):
    days_of_overlap = df.count().min()
    df_to_plot = df.head(days_of_overlap * 2)
    minimum_value = int(df.min(axis=1)[0])
    plot_chart(
        df_to_plot,
        'Waves in {units} by days since at least {minimum_value} {units}'.format(**locals())
    )

In [None]:
df_waves_by_hospital_cases = wave_comparison(df, 'hospitalCases', 'hospital cases', '2021-06-10', '2020-09-16', 2)

In [None]:
plot_wave_comparison(df_waves_by_hospital_cases, 'hospital cases')

In [None]:
df_waves_by_ventilator_cases = wave_comparison(df, 'covidOccupiedMVBeds', 'occupied ventilator beds', '2021-06-10', '2020-09-20', 2)

Note that the start date for wave 2 used here is arbitrary since it has not yet reached the minimum number of occupied ventilator beds at the start of the available data.

In [None]:
plot_wave_comparison(df_waves_by_ventilator_cases, 'occupied ventilator beds')

In [None]:
df_waves_by_deaths = wave_comparison(df, 'newDeaths28DaysByDeathDate', 'deaths', '2021-06-10', '2020-09-11', 2)

Note that the choice of 50 for the start of a wave is arbitrary.

In [None]:
plot_wave_comparison(df_waves_by_deaths, 'deaths')

In [None]:
df_waves_by_cases = wave_comparison(df, 'newCasesByPublishDate', 'cases', '2021-06-02', '2020-09-16', 2)

The choice of 1000 cases for the start of a wave is arbitrary.

In [None]:
plot_wave_comparison(df_waves_by_cases, 'cases')

In [None]:
df_waves_by_cases_smoothed = wave_comparison(df, 'cases 7 day mean', '7 day mean cases', '2021-06-02', '2020-09-16', 2)

The choice of start dates is arbitrary.

In [None]:
plot_wave_comparison(df_waves_by_cases_smoothed, '7 day mean cases')

In [None]:
display(HTML('<p>Data in these charts: <a href="covid-processed-{}.csv">covid-processed-{}.csv</a></p>'.format(
    fetch_date, fetch_date)))
display(HTML('<p><a href="https://github.com/RobFisher/rfcovid">Source code</a>.</p>'))
