In [1]:
import json, sys, os
import altair as alt
from altair import expr, datum
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
sys.path.append(os.getcwd() + "/../..")
from guidelines.utils import defaults

# Fig 8

In [3]:
uid = "2021-04-25-covid-status"  # article unique ID
eco_git_path, vega_embed, colors, color_scales, config = defaults.load_defaults(uid)
eco_git_path = (
    "https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/main/prints/"
    + uid
    + "/data/"
)
height = config["height"]
width = config["width"]

In [4]:
df = pd.read_csv("raw/data_2021-Apr-25.csv")

In [5]:
f = "fig8_tests"
f8 = defaults.save_csv_html(df, f, eco_git_path, vega_embed)

Saving CSV...
Saving HTML...
GitHub path:  https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/main/prints/2021-04-25-covid-status/data/fig8_tests.csv


In [6]:
yconfig = alt.Axis(
    title="Virus tests conducted",
    domain=False,
    ticks=False,
    labelAlign="left",
    labelBaseline="middle",
    labelPadding=-5,
    labelOffset=-10,
    titleX=40,
    titleY=-6,
    titleAngle=0,
    titleFontSize=11,
    titleAlign="left",
    tickCount=4,
    format="s",
)

In [7]:
bars = (
    alt.Chart(df)
    .mark_bar(color=colors["eco-turquiose"], size=2)
    .encode(
        x=alt.X("date:T", axis=alt.Axis(grid=False, title="")),
        y=alt.Y("newVirusTests:Q", axis=yconfig),
    )
)
line = (
    alt.Chart(df)
    .mark_line(color=colors["eco-blue"])
    .encode(
        x=alt.X("date:T"),
        y=alt.Y("mean_tests:Q"),
    )
    .transform_window(mean_tests="mean(newVirusTests)", frame=[-3, 3])
)
label1 = (
    alt.Chart(pd.DataFrame([{"x": "2021-03-15", "y": 1700000, "text": "tests / day"}]))
    .mark_text(
        color=colors["eco-turquiose"],
        align="right",
        baseline="bottom",
        fontSize=10,
    )
    .encode(alt.X("x:T"), alt.Y("y:Q"), alt.Text("text:N"))
)
label2 = (
    alt.Chart(
        pd.DataFrame([{"x": "2021-03-02", "y": 1100000, "text": "7-day average"}])
    )
    .mark_text(
        color=colors["eco-blue"],
        align="right",
        baseline="bottom",
        fontSize=10,
    )
    .encode(alt.X("x:T"), alt.Y("y:Q"), alt.Text("text:N"))
)
layer1 = (
    (bars + line + label1 + label2)
    .properties(width=500, height=300)
    .configure_view(height=height, width=width)
)
layer1.save("visualisation/" + f + ".json")
layer1

# Fig 9

In [8]:
df = pd.read_csv(
    "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv"
)

In [9]:
# need to pre-filter in pandas since it hits the 5000 lines altair-limitation.
# https://github.com/altair-viz/altair/issues/611
# https://altair-viz.github.io/user_guide/faq.html#maxrowserror-how-can-i-plot-large-datasets
# or use this (careful!)
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [10]:
f = "fig9_immunisation"
f9 = defaults.save_csv_html(df, f, eco_git_path, vega_embed)

Saving CSV...
Saving HTML...
GitHub path:  https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/main/prints/2021-04-25-covid-status/data/fig9_immunisation.csv


In [11]:
yconfig = alt.Axis(
    title="Share of the total population that received at least one vaccine dose",
    domain=False,
    ticks=False,
    labelAlign="left",
    labelBaseline="middle",
    labelPadding=-5,
    labelOffset=-10,
    titleX=35,
    titleY=-6,
    titleAngle=0,
    titleFontSize=11,
    titleAlign="left",
    tickCount=7,
    format="p",
)

In [12]:
countries = [
    "HUN",
    "GBR",
    "USA",
    "FRA",
    "ISR",
    "DEU",
    "CHL",
    "FRA",
    "BRA",
    "IND",
]

In [13]:
color_scale = [
    colors["eco-turquiose"],
    colors["eco-purple"],
    colors["eco-mid-blue"],
    colors["eco-yellow"],
    colors["eco-green"],
    colors["eco-orange"],
    colors["eco-light-blue"],
    colors["eco-red"],
    colors["eco-blue"],
]

In [14]:
line = (
    alt.Chart(df)
    .mark_line(interpolate="linear")
    .encode(
        x=alt.X("date:T", axis=alt.Axis(grid=False, title="")),
        order=alt.Order("location:N"),
        y=alt.Y(
            "s_people_vaccinated_per_hundred:Q",
            axis=yconfig,
            scale=alt.Scale(domain=[0, 0.7]),
        ),
        color=alt.Color(
            "location:N",
            legend=None,
            scale=alt.Scale(range=color_scale),
        ),
    )
    .transform_calculate(
        s_people_vaccinated_per_hundred="datum.people_vaccinated_per_hundred/100"
    )
    .transform_filter(alt.FieldOneOfPredicate(field="iso_code", oneOf=countries))
    .transform_filter("datum.people_vaccinated_per_hundred>0")
)
ylabels = (
    alt.Chart(df)
    .mark_text(
        interpolate="linear",
        align="left",
        fontSize=10,
        xOffset=8,
    )
    .encode(
        x=alt.X("date:T", axis=alt.Axis(grid=False, title="")),
        order=alt.Order("people_vaccinated_per_hundred:Q"),
        y=alt.Y("s_people_vaccinated_per_hundred:Q"),
        color=alt.Color("location:N"),
        text=alt.Text("location:N"),
    )
    .transform_calculate(
        s_people_vaccinated_per_hundred="datum.people_vaccinated_per_hundred/100+((datum.iso_code)=='CHL'?0.02:0)"
    )
    .transform_calculate(label="max(datum.people_vaccinated_per_hundred)")
    .transform_filter(
        alt.FieldOneOfPredicate(
            field="iso_code",
            oneOf=countries,
        )
    )
).transform_filter("datum.date==toDate('2021-04-23')")

layer1 = (
    (line + ylabels)
    .properties(width=400, height=300)
    .configure_view(height=height, width=width)
)
layer1.save("visualisation/" + f + ".json")
layer1

# Fig 7

## a

In [15]:
# https://www.ons.gov.uk/peoplepopulationandcommunity/healthandsocialcare/causesofdeath/bulletins/coronaviruscovid19relateddeathsbyoccupationenglandandwales/deathsregisteredbetween9marchand28december2020

In [16]:
df = (
    pd.read_excel("raw/reftablesfinal.xlsx", sheet_name="Table 3", skiprows=7, nrows=9)
    .dropna(how="all", axis=1)
    .dropna(how="all", axis=0)
)
df.columns = [
    "group",
    "desc",
    "m_deaths",
    "m_rate",
    "m_lci",
    "m_hci",
    "ma_deaths",
    "ma_rate",
    "ma_lci",
    "ma_hci",
    "f_deaths",
    "f_rate",
    "f_lci",
    "f_hci",
    "fa_deaths",
    "fa_rate",
    "fa_lci",
    "fa_hci",
]

In [17]:
f = "fig7a_deaths_by_occupation"
f7a = defaults.save_csv_html(df, f, eco_git_path, vega_embed)

Saving CSV...
Saving HTML...
GitHub path:  https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/main/prints/2021-04-25-covid-status/data/fig7a_deaths_by_occupation.csv


In [18]:
base = (
    alt.Chart(df)
    .mark_bar(size=12)
    .encode(
        y=alt.Y("desc:N", axis=None),
    )
)
bars_right = base.mark_bar(color=colors["eco-blue"]).encode(
    x=alt.X(
        "m_rate:Q",
        axis=alt.Axis(
            grid=True,
            title="Men",
            titleAnchor="start",
        ),
        scale=alt.Scale(domain=[0, 500]),
    ),
)
bars_right2 = bars_right.mark_bar(color=colors["eco-blue"], opacity=0.4).encode(
    x=alt.X("ma_rate:Q"),
)

text_right = bars_right.mark_text(
    align="left",
    baseline="middle",
    dx=5,
    dy=2,
    color=colors["eco-blue"],
).encode(text=alt.Text("m_rate:N", format=".0f"))
bars_left = base.mark_bar(color=colors["eco-light-blue"]).encode(
    x=alt.X(
        "f_rate:Q",
        axis=alt.Axis(
            grid=True, title="Women", titleAnchor="start", titleAlign="right"
        ),
        scale=alt.Scale(
            domain=[0, 500],
        ),
        sort=alt.SortOrder("descending"),
    ),
)
bars_left2 = bars_left.mark_bar(color=colors["eco-light-blue"], opacity=0.4).encode(
    x=alt.X("fa_rate:Q"),
)
text_left = bars_left.mark_text(
    align="right",
    baseline="middle",
    dx=-5,
    dy=2,
    color=colors["eco-light-blue"],
).encode(text=alt.Text("f_rate:N", format=".0f"))
text = base.mark_text(
    align="center",
    baseline="middle",
    dx=5,  # Nudges text to right so it doesn't appear on top of the bar
).encode(text=alt.Text("desc:N"))
label1 = (
    alt.Chart(
        pd.DataFrame(
            [
                {
                    "desc": "Associate professional and technical occupations",
                    "f_rate": 130,
                    "text": "Deaths involving COVID-19",
                }
            ]
        )
    )
    .mark_text(
        color=colors["eco-light-blue"],
        align="right",
        baseline="bottom",
        fontSize=10,
        dy=5,
    )
    .encode(alt.X("f_rate:Q"), alt.Y("desc:N"), alt.Text("text:N"))
)
label2 = (
    alt.Chart(
        pd.DataFrame(
            [
                {
                    "desc": "Caring, leisure and other service occupations",
                    "f_rate": 250,
                    "text": "All causes of death",
                }
            ]
        )
    )
    .mark_text(
        color=colors["eco-light-blue"],
        align="right",
        baseline="bottom",
        fontSize=10,
        opacity=0.6,
        dy=5,
    )
    .encode(alt.X("f_rate:Q"), alt.Y("desc:N"), alt.Text("text:N"))
)
layer1 = (bars_left2 + bars_left + text_left + label1 + label2).properties(
    height=alt.Step(20), width=200
)
layer2 = (bars_right2 + bars_right + text_right).properties(
    height=alt.Step(20), width=200
)
layer = (
    alt.concat(
        layer1,
        text.properties(height=alt.Step(20), width=10),
        layer2,
        spacing=5,
    )
    .configure_view(stroke=None)
    .properties(
        title={
            "text": "Deaths among major occupation groups",
            "dy": -10,
            "subtitle": "rates per 100 000 population, residents of England and Wales aged 20 to 64 years",
        }
    )
    .configure_title(anchor="middle")
)
layer.save("visualisation/" + f + ".json")
layer

## b

In [19]:
df = (
    pd.read_excel("raw/reftablesfinal.xlsx", sheet_name="Table 4", skiprows=7, nrows=25)
    .replace(" ", np.nan)
    .replace(":", np.nan)
    .dropna(how="all", axis=1)
    .dropna(how="all", axis=0)
)
df.columns = [
    "group",
    "desc",
    "m_deaths",
    "m_rate",
    "m_lci",
    "m_hci",
    "ma_deaths",
    "ma_rate",
    "ma_lci",
    "ma_hci",
    "f_deaths",
    "f_rate",
    "f_lci",
    "f_hci",
    "drop1",
    "fa_deaths",
    "fa_rate",
    "fa_lci",
    "fa_hci",
]

In [20]:
f = "fig7b_deaths_by_occupation2"
f7b = defaults.save_csv_html(df, f, eco_git_path, vega_embed)

Saving CSV...
Saving HTML...
GitHub path:  https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/main/prints/2021-04-25-covid-status/data/fig7b_deaths_by_occupation2.csv


In [21]:
base = (
    alt.Chart(df)
    .mark_bar(size=12)
    .encode(
        y=alt.Y("desc:N", axis=None),
    )
)
bars_right = base.mark_bar(color=colors["eco-blue"]).encode(
    x=alt.X(
        "m_rate:Q",
        axis=alt.Axis(
            grid=True,
            title="Men",
            titleAnchor="start",
        ),
        scale=alt.Scale(domain=[0, 500]),
    ),
)
bars_right2 = bars_right.mark_bar(color=colors["eco-blue"], opacity=0.4).encode(
    x=alt.X("ma_rate:Q"),
)

text_right = bars_right.mark_text(
    align="left",
    baseline="middle",
    dx=5,
    dy=2,
    color=colors["eco-blue"],
).encode(text=alt.Text("m_rate:N", format=".0f"))
bars_left = base.mark_bar(color=colors["eco-light-blue"]).encode(
    x=alt.X(
        "f_rate:Q",
        axis=alt.Axis(
            grid=True, title="Women", titleAnchor="start", titleAlign="right"
        ),
        scale=alt.Scale(
            domain=[0, 500],
        ),
        sort=alt.SortOrder("descending"),
    ),
)
bars_left2 = bars_left.mark_bar(color=colors["eco-light-blue"], opacity=0.4).encode(
    x=alt.X("fa_rate:Q"),
)
text_left = bars_left.mark_text(
    align="right",
    baseline="middle",
    dx=-5,
    dy=2,
    color=colors["eco-light-blue"],
).encode(text=alt.Text("f_rate:N", format=".0f"))
text = base.mark_text(
    align="center",
    baseline="middle",
    dx=5,  # Nudges text to right so it doesn't appear on top of the bar
).encode(text=alt.Text("desc:N"))
label1 = (
    alt.Chart(
        pd.DataFrame(
            [
                {
                    "desc": "Business and public service associate professionals",
                    "f_rate": 130,
                    "text": "Deaths involving COVID-19",
                }
            ]
        )
    )
    .mark_text(
        color=colors["eco-light-blue"],
        align="right",
        baseline="bottom",
        fontSize=10,
        dy=5,
    )
    .encode(alt.X("f_rate:Q"), alt.Y("desc:N"), alt.Text("text:N"))
)
label2 = (
    alt.Chart(
        pd.DataFrame(
            [
                {
                    "desc": "Caring personal service occupations",
                    "f_rate": 250,
                    "text": "All causes of death",
                }
            ]
        )
    )
    .mark_text(
        color=colors["eco-light-blue"],
        align="right",
        baseline="bottom",
        fontSize=10,
        opacity=0.6,
        dy=5,
    )
    .encode(alt.X("f_rate:Q"), alt.Y("desc:N"), alt.Text("text:N"))
)
layer1 = (bars_left2 + bars_left + text_left + label1 + label2).properties(
    height=alt.Step(20), width=200
)
layer2 = (bars_right2 + bars_right + text_right).properties(
    height=alt.Step(20), width=200
)
layer = (
    alt.concat(
        layer1,
        text.properties(height=alt.Step(20), width=10),
        layer2,
        spacing=5,
    )
    .configure_view(stroke=None)
    .properties(
        title={
            "text": "Deaths among major occupation groups",
            "dy": -10,
            "subtitle": "rates per 100 000 population, residents of England and Wales aged 20 to 64 years",
        }
    )
    .configure_title(anchor="middle")
)
layer.save("visualisation/" + f + ".json")
layer

## c

In [22]:
df = (
    pd.read_excel(
        "raw/publishedweek142021.xlsx",
        sheet_name="UK - Covid-19 - Weekly reg",
        skiprows=5,
        nrows=38,
    )
    .loc[21:]
    .drop(["Unnamed: 1"], axis=1)
    .replace(" ", np.nan)
    .replace(":", np.nan)
    .dropna(how="all", axis=1)
    .dropna(how="all", axis=0)
    .T
)
df.columns = [
    "m_Under 1 year",
    "m_01-14",
    "m_15-44",
    "m_45-64",
    "m_65-74",
    "m_75-84",
    "m_85+",
    "f_Under 1 year",
    "f_01-14",
    "f_15-44",
    "f_45-64",
    "f_65-74",
    "f_75-84",
    "f_85+",
]
df = df.stack().reset_index()
df.columns = ["date", "age", "value"]
df["sex"] = df["age"].str.split("_").str[0].replace("m", "Men").replace("f", "Women")
df["age"] = df["age"].str.split("_").str[1]

In [23]:
df2 = (
    pd.read_excel(
        "raw/publishedweek532020.xlsx",
        sheet_name="UK - Covid-19 - Weekly reg",
        skiprows=4,
        nrows=36,
    )
    .loc[19:]
    .drop(["Unnamed: 1"], axis=1)
    .replace(" ", np.nan)
    .replace(":", np.nan)
    .dropna(how="all", axis=1)
    .dropna(how="all", axis=0)
    .T
)
df2.columns = [
    "m_Under 1 year",
    "m_01-14",
    "m_15-44",
    "m_45-64",
    "m_65-74",
    "m_75-84",
    "m_85+",
    "f_Under 1 year",
    "f_01-14",
    "f_15-44",
    "f_45-64",
    "f_65-74",
    "f_75-84",
    "f_85+",
]
df2 = df2.stack().reset_index()
df2.columns = ["date", "age", "value"]
df2["sex"] = df2["age"].str.split("_").str[0].replace("m", "Men").replace("f", "Women")
df2["age"] = df2["age"].str.split("_").str[1]

In [24]:
df = pd.concat([df2, df]).sort_values(by="date")

In [25]:
f = "fig7c_deaths_by_age"
f7c = defaults.save_csv_html(df, f, eco_git_path, vega_embed)

Saving CSV...
Saving HTML...
GitHub path:  https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/main/prints/2021-04-25-covid-status/data/fig7c_deaths_by_age.csv


In [26]:
step = 200
overlap = 1
base = alt.Chart(df).mark_area(
    interpolate="monotone",
    fillOpacity=0.8,
    stroke=colors["eco-gray"],
    strokeWidth=0.5,
    opacity=0.8,
)

women = base.encode(
    x=alt.X("date:T", axis=None),
    y=alt.Y(
        "sum(value):Q",
        axis=alt.Axis(
            grid=True,
            title="Women",
            format="s",
        ),
        scale=alt.Scale(domain=[0, 5500]),
    ),
    color="age",
).transform_filter("datum.sex=='Women'")
men = base.encode(
    x=alt.X(
        "date:T",
        axis=alt.Axis(
            grid=False,
            title="",
        ),
    ),
    y=alt.Y(
        "value:Q",
        sort=alt.SortOrder("descending"),
        axis=alt.Axis(
            grid=True,
            title="Men",
            format="s",
        ),
        scale=alt.Scale(domain=[0, 5500]),
    ),
    color="age",
).transform_filter("datum.sex=='Men'")
ylabels = (
    alt.Chart(df)
    .mark_text(
        interpolate="linear",
        align="left",
        fontSize=10,
        xOffset=80,
    )
    .encode(
        x=alt.X("date:T", axis=alt.Axis(grid=False, title="")),
        #         order=alt.Order("people_vaccinated_per_hundred"),
        y=alt.Y(
            "sum(value):Q",
            scale=alt.Scale(domain=[0, 5500]),
        ),
        color=alt.Color("age"),
        text=alt.Text("age:N"),
    )
).transform_filter("datum.date==toDate('2020-04-24T00:00:00.000000000')")
layer = (
    alt.vconcat(women + ylabels, men, spacing=0)
    .configure_view(stroke=None, width=400, height=200)
    .properties(
        title={
            "text": "Deaths among major occupation groups",
            "dy": -10,
            "subtitle": "rates per 100 000 population, residents of England and Wales aged 20 to 64 years",
        }
    )
    .configure_title(anchor="middle")
)
layer.save("visualisation/" + f + ".json")
layer

In [27]:
step = 200
overlap = 1
base = alt.Chart(df).mark_area(
    interpolate="monotone",
    fillOpacity=0.8,
    stroke=colors["eco-gray"],
    strokeWidth=0.5,
    opacity=0.8,
)

women = base.encode(
    x=alt.X("date:T", axis=None),
    y=alt.Y(
        "value:Q",
        stack=None,
        axis=alt.Axis(
            grid=True,
            title="Women",
            format="s",
        ),
        scale=alt.Scale(domain=[0, 2500]),
    ),
    color="age",
).transform_filter("datum.sex=='Women'")
men = base.encode(
    x=alt.X(
        "date:T",
        axis=alt.Axis(
            grid=False,
            title="",
        ),
    ),
    y=alt.Y(
        "value:Q",
        stack=None,
        sort=alt.SortOrder("descending"),
        axis=alt.Axis(
            grid=True,
            title="Men",
            format="s",
        ),
        scale=alt.Scale(domain=[0, 2500]),
    ),
    color="age",
).transform_filter("datum.sex=='Men'")
ylabels = (
    (
        alt.Chart(df)
        .mark_text(
            interpolate="linear",
            align="right",
            fontSize=10,
            xOffset=-30,
        )
        .encode(
            x=alt.X("date:T", axis=alt.Axis(grid=False, title="")),
            #         order=alt.Order("people_vaccinated_per_hundred"),
            y=alt.Y(
                "value:Q",
                stack=None,
                scale=alt.Scale(domain=[0, 2500]),
            ),
            color=alt.Color("age", legend=None),
            text=alt.Text("age:N"),
        )
    )
    .transform_filter("datum.date==toDate('2020-04-24T00:00:00.000000000')")
    .transform_filter("datum.sex=='Women'")
)
layer = (
    alt.vconcat(women + ylabels, men, spacing=0)
    .configure_view(stroke=None, width=400, height=200)
    .properties(
        title={
            "text": "Weekly deaths among age groups",
            "dy": -10,
        }
    )
    .configure_title(anchor="middle")
)
layer.save("visualisation/" + f + "_nostack.json")
layer