In [125]:
import json
import altair as alt
from altair import expr, datum
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests

In [126]:
LOCAL = True

if LOCAL:
    local_suffix = "_local"
else:
    local_suffix = ""

In [127]:
%%capture pwd
!pwd

In [128]:
# uid = "2021-05-05-which-firms-and-industries-have-been-most-affected-by-covid-update"  # article unique ID
uid = pwd.stdout.split("/")[-1].split("\r")[0]
eco_git_home = (
    "https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/main/"
)
eco_git_path = eco_git_home + "articles/" + uid + "/data/"
vega_embed = requests.get(eco_git_home + "guidelines/html/vega-embed.html").text
colors = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-colors.json").content
)
category_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-category-color.json").content
)
hue_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-single-hue-color.json").content
)
mhue_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-multi-hue-color.json").content
)
div_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-diverging-color.json").content
)
config = json.loads(
    requests.get(eco_git_home + "guidelines/charts/eco-global-config.json").content
)
height = config["height"]
width = config["width"]
height, width

(300, 500)

# Fig 1

## a

In [129]:
df = pd.read_excel("raw/excel data econ obs.xlsx", sheet_name="Sheet4").dropna(
    how="all"
)

In [130]:
f = "fig1a_bins_primary"
f1a = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f1a.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL:
    f1a = df
df.head()

Unnamed: 0,fsm_ks2,missed_days
0,0.159997,5.1
1,0.126644,3.7
2,0.206657,3.1
3,0.130983,2.2
4,0.138964,1.7


In [131]:
bins = (
    alt.Chart(f1a)
    .mark_rect()
    .encode(
        alt.X(
            "fsm_ks2:Q",
            bin=alt.Bin(maxbins=20),
            axis=alt.Axis(
                grid=False,
                title="FSM eligible Primary pupils",
                titleAnchor="end",
                titleY=-15,
                titleX=420,
                titleFontSize=10,
                labelColor=colors["eco-gray"],
                titleColor=colors["eco-gray"],
                tickColor=colors["eco-gray"],
                domainColor=colors["eco-gray"],
                labelAlign="left",
                labelOffset=2,
                domain=False,
                ticks=False,
            ),
        ),
        alt.Y(
            "missed_days:Q",
            bin=alt.Bin(maxbins=14),
            axis=alt.Axis(
                labelColor=colors["eco-gray"],
                titleColor=colors["eco-gray"],
                tickColor=colors["eco-gray"],
                domainColor=colors["eco-gray"],
                title="Average number of days missed",
                domain=False,
                ticks=False,
                labelAlign="left",
                labelBaseline="middle",
                labelPadding=-5,
                labelOffset=-10,
                titleX=25,
                titleY=-5,
                titleAngle=0,
                titleFontSize=10,
                titleAlign="left",
                tickCount=5,
                format=".1f",
            ),
        ),
        alt.Color(
            "count(missed_days):Q", scale=alt.Scale(scheme="greenblue"), legend=None
        ),
    )
)

layer = (
    (bins)
    .configure_view(stroke=None)
    .properties(title="")
    .properties(height=300, width=400)
)
layer.save("visualisation/" + f + ".json")
layer

In [132]:
f = "fig1a_scatter_primary"
f1a = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f1a.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL:
    f1a = df
df.head()

Unnamed: 0,fsm_ks2,missed_days
0,0.159997,5.1
1,0.126644,3.7
2,0.206657,3.1
3,0.130983,2.2
4,0.138964,1.7


In [133]:
base = (
    alt.Chart(f1a)
    .mark_circle(color=colors["eco-dot"], size=50)
    .encode(
        alt.X(
            "fsm_ks2:Q",
            axis=alt.Axis(
                grid=False,
                title="FSM eligible Primary pupils",
                titleAnchor="end",
                titleY=-15,
                titleX=400,
                titleFontSize=10,
                labelColor=colors["eco-gray"],
                titleColor=colors["eco-gray"],
                tickColor=colors["eco-gray"],
                domainColor=colors["eco-gray"],
            ),
        ),
        alt.Y(
            "missed_days:Q",
            axis=alt.Axis(
                labelColor=colors["eco-gray"],
                titleColor=colors["eco-gray"],
                tickColor=colors["eco-gray"],
                domainColor=colors["eco-gray"],
                title="Average number of days missed",
                grid=False,
                titleX=5,
                titleY=5,
                titleAngle=0,
                titleFontSize=10,
                titleAlign="left",
                tickCount=5,
                format=".0f",
            ),
        ),
    )
)

line = base.transform_regression("fsm_ks2", "missed_days", method="linear").mark_line(
    color=colors["eco-mid-blue"]
)
layer = (
    (base + line)
    .configure_view(stroke=None)
    .properties(title="")
    .properties(height=300, width=400)
)
layer.save("visualisation/" + f + ".json")
layer

## b

In [134]:
df = pd.read_excel("raw/excel data econ obs.xlsx", sheet_name="Sheet3").dropna(
    how="all"
)

In [135]:
f = "fig1b_bins_secondary"
f1b = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f1b.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL:
    f1b = df
df.head()

Unnamed: 0,fsm_ks4,missed_days
0,0.16427,5.9
1,0.123025,6.7
2,0.190756,4.1
3,0.092965,6.0
4,0.112387,4.4


In [140]:
bins = (
    alt.Chart(f1b)
    .mark_rect()
    .encode(
        alt.X(
            "fsm_ks4:Q",
            bin=alt.Bin(maxbins=20),
            axis=alt.Axis(
                grid=False,
                title="FSM eligible Secondary pupils",
                titleAnchor="end",
                titleY=-15,
                titleX=420,
                titleFontSize=10,
                labelColor=colors["eco-gray"],
                titleColor=colors["eco-gray"],
                tickColor=colors["eco-gray"],
                domainColor=colors["eco-gray"],
                labelAlign="left",
                labelOffset=2,
                domain=False,
                ticks=False,
            ),
        ),
        alt.Y(
            "missed_days:Q",
            bin=alt.Bin(maxbins=14),
            axis=alt.Axis(
                labelColor=colors["eco-gray"],
                titleColor=colors["eco-gray"],
                tickColor=colors["eco-gray"],
                domainColor=colors["eco-gray"],
                title="Average number of days missed",
                domain=False,
                ticks=False,
                labelAlign="left",
                labelBaseline="middle",
                labelPadding=-5,
                labelOffset=-10,
                titleX=25,
                titleY=-5,
                titleAngle=0,
                titleFontSize=10,
                titleAlign="left",
                tickCount=5,
                format=".0f",
            ),
        ),
        alt.Color(
            "count(missed_days):Q", scale=alt.Scale(scheme="greenblue"), legend=None
        ),
    )
)

layer = (
    (bins)
    .configure_view(stroke=None)
    .properties(title="")
    .properties(height=300, width=400)
)
layer.save("visualisation/" + f + ".json")
layer

# Fig 3

In [137]:
f = "fig1b_scatter_secondary"
f1b = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f1b.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL:
    f1b = df
df.head()

Unnamed: 0,fsm_ks4,missed_days
0,0.16427,5.9
1,0.123025,6.7
2,0.190756,4.1
3,0.092965,6.0
4,0.112387,4.4


In [138]:
base = (
    alt.Chart(f1b)
    .mark_circle(color=colors["eco-dot"], size=50)
    .encode(
        alt.X(
            "fsm_ks4:Q",
            axis=alt.Axis(
                grid=False,
                title="FSM eligible Primary pupils",
                titleAnchor="end",
                titleY=-15,
                titleX=400,
                titleFontSize=10,
                labelColor=colors["eco-gray"],
                titleColor=colors["eco-gray"],
                tickColor=colors["eco-gray"],
                domainColor=colors["eco-gray"],
            ),
        ),
        alt.Y(
            "missed_days:Q",
            axis=alt.Axis(
                labelColor=colors["eco-gray"],
                titleColor=colors["eco-gray"],
                tickColor=colors["eco-gray"],
                domainColor=colors["eco-gray"],
                title="Average number of days missed",
                grid=False,
                titleX=5,
                titleY=5,
                titleAngle=0,
                titleFontSize=10,
                titleAlign="left",
                tickCount=5,
                format=".0f",
            ),
        ),
    )
)

line = base.transform_regression("fsm_ks4", "missed_days", method="linear").mark_line(
    color=colors["eco-mid-blue"]
)
layer = (
    (base + line)
    .configure_view(stroke=None)
    .properties(title="")
    .properties(height=300, width=400)
)
layer.save("visualisation/" + f + ".json")
layer