In [2]:
import json
import altair as alt
from altair import expr, datum
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [3]:
uid = "2021-04-19-how-should-governments-source-public-services-during-a-crisis"  # article unique ID
eco_git_path = (
    "https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/main/articles/"
    + uid
    + "/data/"
)
vega_embed = open("../../guidelines/html/vega-embed.html", "r").read()
colors = json.loads(open("../../guidelines/colors/eco-colors.json", "r").read())
category_color = json.loads(
    open("../../guidelines/colors/eco-category-color.json", "r").read()
)
hue_color = json.loads(
    open("../../guidelines/colors/eco-single-hue-color.json", "r").read()
)
mhue_color = json.loads(
    open("../../guidelines/colors/eco-multi-hue-color.json", "r").read()
)
div_color = json.loads(
    open("../../guidelines/colors/eco-diverging-color.json", "r").read()
)
config = json.loads(open("../../guidelines/charts/eco-global-config.json", "r").read())
height = config["height"]
width = config["width"]
height, width

(300, 500)

# Fig 1

In [4]:
df = (
    pd.read_excel("raw/Figures data.xlsx", sheet_name="Figure 1", skiprows=63)
    .dropna(how="all", axis=1)
    .dropna(how="all", axis=0)
)
df.columns = [
    "Country",
    "perc_total_2017",
    "perc_total_2009",
    "drop1",
    "perc_total_2007",
    "drop2",
    "GDP_share_2017",
    "GDP_share_2009",
    "drop3",
    "GDP_share_2007",
    "drop4",
]
df = df.drop([i for i in df.columns if "drop" in i], axis=1)
df.head()

Unnamed: 0,Country,perc_total_2017,perc_total_2009,perc_total_2007,GDP_share_2017,GDP_share_2009,GDP_share_2007
0,CRI,15.793627,18.222236,15.293162,6.935823,6.395918,4.721976
1,IDN,33.979085,,,7.533923,,
2,COL,25.297053,,,11.049953,,
3,ZAF,,,,11.740174,11.893996,11.357135
4,BRA,24.701716,26.119665,24.340755,16.44872,16.00749,15.071296


In [5]:
df = df.fillna(0)  #! fill zeroes for axis consistency
df = df.set_index("Country").stack().reset_index()
df.columns = ["Country", "MEasure", "Amount"]
df["Measure"] = df["MEasure"].str[:-5]
df["Year"] = df["MEasure"].str[-4:]
df = df.drop("MEasure", axis=1)

In [6]:
f = "fig1_government_procurement_spending"
f1 = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f1.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)

567

In [7]:
df1 = df.set_index(["Country", "Year", "Measure"]).unstack().reset_index()
df1.columns = [i[0] if i[1] == "" else i[1] for i in df1.columns]
df1.to_csv("data/" + f + "_unstacked_measure.csv")

In [8]:
df2 = df.set_index(["Country", "Measure", "Year"]).unstack().reset_index()
df2.columns = [i[0] if i[1] == "" else i[1] for i in df2.columns]
df2.to_csv("data/" + f + "_unstacked_year.csv")

In [9]:
df.head()

Unnamed: 0,Country,Amount,Measure,Year
0,CRI,15.793627,perc_total,2017
1,CRI,18.222236,perc_total,2009
2,CRI,15.293162,perc_total,2007
3,CRI,6.935823,GDP_share,2017
4,CRI,6.395918,GDP_share,2009


In [10]:
bars = (
    alt.Chart(df)
    .mark_bar(size=12)
    .encode(
        color=alt.condition(
            datum.Country == "OECD",
            alt.ColorValue(hue_color[1]),
            alt.ColorValue(hue_color[0]),
        ),
        x=alt.X("signed_Amount:Q", axis=alt.Axis(grid=True, title="")),
        y=alt.Y("Country:N", axis=None),
    )
)

text = (
    (
        bars.mark_text(
            align="left",
            baseline="middle",
            dx=5,  # Nudges text to right so it doesn't appear on top of the bar
        )
        .encode(
            text=alt.Text("Country:N"),
            color=alt.condition(
                datum.Country == "OECD",
                alt.ColorValue(hue_color[3]),
                alt.ColorValue(hue_color[3]),
            ),
            x=alt.value(190),
        )
        .transform_filter("datum.signed_Amount>=0")
    )
    + (
        bars.mark_text(align="left", baseline="middle", color=hue_color[3], dx=15, dy=2)
        .encode(text=alt.Text("Amount:N", format=".1f"))
        .transform_filter("datum.Country=='OECD'")
        .transform_filter("datum.signed_Amount>=0")
    )
    + (
        bars.mark_text(
            align="left", baseline="middle", color=hue_color[3], dx=-38, dy=2
        )
        .encode(text=alt.Text("Amount:N", format=".1f"))
        .transform_filter("datum.Country=='OECD'")
        .transform_filter("datum.signed_Amount<=0")
    )
)

tick07 = (
    alt.Chart(df)
    .mark_tick(
        color=hue_color[3],
        thickness=4,
        size=15,  # controls width of tick.
    )
    .encode(x="signed_Amount:Q", y="Country:N")
    .transform_filter("datum.Amount!=0")
)

tick09 = (
    alt.Chart(df)
    .mark_tick(
        color=hue_color[1],
        thickness=4,
        size=15,  # controls width of tick.
    )
    .encode(x="signed_Amount:Q", y="Country:N")
    .transform_filter("datum.Amount!=0")
)

bars07 = (
    alt.Chart(df)
    .mark_bar(size=2, color=hue_color[3])
    .encode(
        x=alt.X("calc(signed_Amount:Q"),
        y=alt.Y("Country:N"),
    )
)

bars09 = (
    alt.Chart(df)
    .mark_bar(size=2, color=hue_color[1])
    .encode(
        x=alt.X("signed_Amount:Q"),
        y=alt.Y("Country:N"),
    )
)

bars2 = (
    alt.Chart(df)
    .mark_bar(size=12, color=hue_color[0])
    .encode(
        x=alt.X("signed_Amount:Q"),
        y=alt.Y("Country:N"),
    )
)

layer1 = (
    (
        bars07.transform_filter("datum.Year==2007")
        + bars09.transform_filter("datum.Year==2009")
        + bars.transform_filter("datum.Year==2017")
        + text.transform_filter("datum.Year==2017")
        + tick07.transform_filter("datum.Year==2007")
        + tick09.transform_filter("datum.Year==2009")
    )
    .properties(height=alt.Step(20), width=500)
    .transform_calculate(
        signed_Amount="datum.Measure == 'GDP_share' ? -datum.Amount : datum.Amount"
    )
)

# layer2 = (
#     (
#         bars07.transform_filter("datum.Year==2007")
#         + bars09.transform_filter("datum.Year==2009")
#         + bars2.transform_filter("datum.Year==2017")
#         + text.transform_filter("datum.Year==2017")
#         + tick07.transform_filter("datum.Year==2007")
#         + tick09.transform_filter("datum.Year==2009")
#     )
#     .properties(height=alt.Step(20))
#     .transform_filter("datum.Measure=='perc_total'")
# )

# layer = layer1 | layer2

# layer1.save("visualisation/" + f + ".json")
layer1

In [11]:
base = (
    alt.Chart(f1)
    .mark_bar(size=12)
    .encode(
        y=alt.Y("Country:N", axis=None),
    )
)
bars_right = base.encode(
    color=alt.condition(
        alt.FieldOneOfPredicate(field="Country", oneOf=["OECD", "GBR"]),
        alt.ColorValue(colors["eco-pink"]),
        alt.ColorValue(hue_color[0]),
    ),
    x=alt.X(
        "Amount:Q",
        axis=alt.Axis(
            grid=True,
            title="% of total government spending",
            titleAnchor="start",
        ),
    ),
)
bars_left = bars_right.encode(
    x=alt.X(
        "Amount:Q",
        axis=alt.Axis(
            grid=True, title="% of GDP", titleAnchor="start", titleAlign="right"
        ),
        sort=alt.SortOrder("descending"),
    ),
)

text = base.mark_text(
    align="center",
    baseline="middle",
    dx=5,  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    text=alt.Text("Country:N"),
    color=alt.condition(
        alt.FieldOneOfPredicate(field="Country", oneOf=["OECD", "GBR"]),
        alt.ColorValue(colors["eco-pink"]),
        alt.ColorValue(hue_color[3]),
    ),
)
text_left = (
    bars_left.mark_text(align="right", baseline="middle", dx=-30, dy=2)
    .encode(text=alt.Text("Amount:N", format=".1f"))
    .transform_filter(alt.FieldOneOfPredicate(field="Country", oneOf=["OECD", "GBR"]))
)
text_right = (
    bars_right.mark_text(align="left", baseline="middle", dx=15, dy=2)
    .encode(text=alt.Text("Amount:N", format=".1f"))
    .transform_filter(alt.FieldOneOfPredicate(field="Country", oneOf=["OECD", "GBR"]))
)

base = base.encode(
    x=alt.X("Amount:Q"),
).transform_filter("datum.Amount!=0")

tick07 = base.mark_tick(
    color=hue_color[3],
    thickness=4,
    size=15,  # controls width of tick.
)
tick09 = base.mark_tick(
    color=hue_color[1],
    thickness=4,
    size=15,  # controls width of tick.
)
bars07 = base.mark_bar(size=2, color=hue_color[3])
bars09 = base.mark_bar(size=2, color=hue_color[1])

labels = (
    alt.Chart(
        pd.DataFrame(
            [
                {"Year": 2007, "x": 15.5, "y": "CZE", "c": hue_color[3]},
                {"Year": 2009, "x": 19, "y": "CZE", "c": hue_color[1]},
                {"Year": 2017, "x": 12, "y": "CZE", "c": hue_color[0]},
            ],
            index=range(3),
        )
    )
    .mark_text(align="left", baseline="middle", dy=-18)
    .encode(
        text="Year",
        x="x",
        y="y",
        color=alt.Color(
            "Year",
            scale=alt.Scale(
                domain=[2007, 2009, 2017],
                range=[hue_color[3], hue_color[1], hue_color[0]],
            ),
            legend=None,
        ),
    )
)

layer1 = (
    bars07.transform_filter("datum.Year==2007")
    + bars09.transform_filter("datum.Year==2009")
    + bars_left.transform_filter("datum.Year==2017")
    + tick07.transform_filter("datum.Year==2007")
    + tick09.transform_filter("datum.Year==2009")
    + text_left.transform_filter("datum.Year==2017")
).properties(height=alt.Step(20), width=200).transform_filter(
    "datum.Measure == 'GDP_share'"
) + labels
layer2 = (
    (
        bars07.transform_filter("datum.Year==2007")
        + bars09.transform_filter("datum.Year==2009")
        + bars_right.transform_filter("datum.Year==2017")
        + tick07.transform_filter("datum.Year==2007")
        + tick09.transform_filter("datum.Year==2009")
        + text_right.transform_filter("datum.Year==2017")
    )
    .properties(height=alt.Step(20), width=200)
    .transform_filter("datum.Measure == 'perc_total'")
)
layer = (
    alt.concat(
        layer1,
        text.properties(height=alt.Step(20), width=10).transform_filter(
            "datum.Year==2017"
        ),
        layer2,
        spacing=5,
    )
    .configure_view(stroke=None)
    .properties(title="General government procurement spending")
    .configure_title(anchor="middle")
)
layer.save("visualisation/" + f + ".json")
layer

# Fig 2

In [12]:
df = pd.read_excel("raw/Figures data.xlsx", sheet_name="Figure 2")
df.head()

Unnamed: 0,Country,Defence,Public order and safety,Economic affairs,Health,Education,Social protection,other
0,Finland,4.568019,2.151131,13.267813,22.827559,11.55794,18.332247,27.295292
1,France,6.323427,2.488769,12.375364,38.546854,6.735229,14.814522,18.715835
2,Germany,4.047647,3.182319,9.280173,40.606025,6.45442,20.580823,15.848592
3,Japan,3.183465,1.852087,14.259301,44.61663,6.525101,13.769729,15.7938
4,Netherlands,2.79251,3.631587,11.782621,35.6998,8.746193,21.108641,16.238649


In [13]:
df.columns = [c.capitalize() for c in df.columns]

In [14]:
df = df.set_index("Country").stack().reset_index()
df.columns = ["Country", "Function", "Ammount"]

In [15]:
f = "fig2_ammount_by_country_and_function"
f2 = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f2.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)

567

In [16]:
df.head()

Unnamed: 0,Country,Function,Ammount
0,Finland,Defence,4.568019
1,Finland,Public order and safety,2.151131
2,Finland,Economic affairs,13.267813
3,Finland,Health,22.827559
4,Finland,Education,11.55794


In [17]:
bars = (
    alt.Chart(df)
    .mark_bar(size=23)
    .encode(
        x=alt.X(
            "sum(Ammount):Q",
            stack="zero",
            axis=alt.Axis(grid=False, title=""),
            scale=alt.Scale(domain=[0, 100]),
        ),
        y=alt.Y("Country:N", sort=[], axis=alt.Axis(grid=False, title="")),
        color=alt.Color(
            "Function",
            scale=alt.Scale(domain=df["Function"].unique(), range=category_color),
        ),
        order="Country",
    )
)
text = (
    alt.Chart(df)
    .mark_text(dx=-15, dy=2, color="white")
    .encode(
        x=alt.X("sum(Ammount):Q", stack="zero"),
        y=alt.Y("Country:N", sort=[]),
        detail="Function:N",
        text=alt.Text("sum(Ammount):Q", format=".0f"),
        order="Country",
        color=alt.condition(
            datum.x < 17,
            alt.ColorValue(None),
            alt.ColorValue("white"),
        ),
    )
)

layer1 = (bars + text).properties(height=alt.Step(30))
layer1

With explicit groupby

In [18]:
bars = (
    alt.Chart(df)
    .mark_bar(size=23)
    .encode(
        x=alt.X(
            "SAmmount:Q",
            stack="zero",
            axis=alt.Axis(grid=False, title=""),
            scale=alt.Scale(domain=[0, 100]),
        ),
        y=alt.Y("Country:N", sort=[], axis=alt.Axis(grid=False, title="")),
        color=alt.Color(
            "Function",
            scale=alt.Scale(domain=df["Function"].unique(), range=category_color),
        ),
        order="Country",
    )
)
text = (
    alt.Chart(df)
    .mark_text(dx=-15, dy=2, color="white")
    .encode(
        x=alt.X("SAmmount:Q", stack="zero"),
        y=alt.Y("Country:N", sort=[]),
        detail="Function:N",
        text=alt.Text("SAmmount:Q", format=".0f"),
        order="Country",
        color=alt.condition(
            datum.SAmmount < 6,
            alt.ColorValue(None),
            alt.ColorValue("white"),
        ),
    )
)

layer1 = (
    (bars + text)
    .properties(height=alt.Step(30))
    .transform_aggregate(SAmmount="sum(Ammount)", groupby=["Country", "Function"])
)
# layer1.save("visualisation/" + f + ".json")
layer1

In [19]:
bars = (
    alt.Chart(df)
    .mark_bar(size=23)
    .encode(
        x=alt.X(
            "SAmmount:Q",
            stack="zero",
            axis=alt.Axis(grid=False, title=""),
            scale=alt.Scale(domain=[0, 100]),
        ),
        y=alt.Y("Country:N", sort=[], axis=alt.Axis(grid=False, title="")),
        color=alt.Color(
            "Function",
            scale=alt.Scale(domain=df["Function"].unique(), range=category_color),
        ),
        order="Country",
    )
)
text = (
    alt.Chart(df)
    .mark_text(dx=-15, dy=2, color="white")
    .encode(
        x=alt.X("SAmmount:Q", stack="zero"),
        y=alt.Y("Country:N", sort=[]),
        detail="Function:N",
        text=alt.Text("SAmmount:Q", format=".0f"),
        order="Country",
        color=alt.condition(
            datum.SAmmount < 6,
            alt.ColorValue(None),
            alt.ColorValue("white"),
        ),
    )
)

layer1 = (
    (bars + text)
    .properties(height=alt.Step(30))
    .transform_aggregate(SAmmount="sum(Ammount)", groupby=["Country", "Function"])
).configure_view(
    height=height,
    width=width - 150,
)
# layer1.save("visualisation/" + f + ".json")
layer1

In [20]:
bars = (
    alt.Chart(f2)
    .mark_bar(size=23)
    .encode(
        x=alt.X(
            "SAmmount:Q",
            stack="zero",
            axis=alt.Axis(grid=False, title=""),
            scale=alt.Scale(domain=[0, 100]),
        ),
        y=alt.Y("Country:N", sort=[], axis=alt.Axis(grid=False, title="")),
        color=alt.Color(
            "Function:N",
            scale=alt.Scale(
                domain=df["Function"].unique(),
                range=mhue_color[::-1] + div_color[::-1][2:],
            ),
        ),
        order="Country:N",
    )
)
text = bars.mark_text(dx=-15, dy=2, color="white").encode(
    text=alt.Text("SAmmount:Q", format=".0f"),
    color=alt.condition(
        datum.SAmmount < 6,
        alt.ColorValue(None),
        alt.ColorValue("white"),
    ),
)

layer1 = (
    (bars + text)
    .properties(height=alt.Step(30))
    .transform_aggregate(SAmmount="sum(Ammount)", groupby=["Country", "Function"])
).configure_view(
    height=height,
    width=width - 150,
)
layer1.save("visualisation/" + f + ".json")
layer1