In [362]:
import json
import altair as alt
from altair import expr, datum
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests

In [363]:
import colorsys
from matplotlib.colors import to_hex, to_rgb


def scale_lightness(rgb, scale_l):
    rgbhex = False
    if "#" in rgb:
        rgb = to_rgb(rgb)
        rgbhex = True
    # convert rgb to hls
    h, l, s = colorsys.rgb_to_hls(*rgb)
    # manipulate h, l, s values and return as rgb
    c = colorsys.hls_to_rgb(h, min(1, l * scale_l), s=s)
    if rgbhex:
        c = to_hex(c)
    return c

In [364]:
LOCAL = True

if LOCAL:
    local_suffix = "_local"
else:
    local_suffix = ""

In [365]:
%%capture pwd
!pwd

In [366]:
uid = pwd.stdout.split("/")[-1].split("\r")[0]
eco_git_home = (
    "https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/main/"
)
eco_git_path = eco_git_home + "articles/" + uid + "/data/"
vega_embed = requests.get(eco_git_home + "guidelines/html/vega-embed.html").text
colors = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-colors.json").content
)
category_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-category-color.json").content
)
hue_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-single-hue-color.json").content
)
mhue_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-multi-hue-color.json").content
)
div_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-diverging-color.json").content
)
config = json.loads(
    requests.get(eco_git_home + "guidelines/charts/eco-global-config.json").content
)
height = config["height"]
width = config["width"]
uid, height, width

('how-can-education-and-skills-contribute-to-levelling-up', 300, 500)

# Fig 1

In [367]:
df = pd.read_csv("raw/Default Dataset.csv", header=None)
df.columns=['inequality','elasticity','country']

In [368]:
f = "fig1_inequality"
f1 = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f1.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL:
    f1 = df
readme = "### " + f + '\n!["' + f + '"](visualisation/' + f + '.png "' + f + '")\n\n'
df.head()

Unnamed: 0,inequality,elasticity,country
0,20.0,0.27074,Sweden
1,20.903442,0.180551,Finland
2,22.021989,0.169535,Norway
3,22.021989,0.14957,Denmark
4,25.00478,0.319621,Germany


In [369]:
base = alt.Chart(f1).encode(
    x=alt.X(
        "inequality:Q",
        sort=[],
        axis=alt.Axis(
            grid=False,
            titleAlign="right",
            titleAnchor="end",
            title="Income inequality (higher values ➡ more inequality)",
            titleY=-15,
            # titleX=207,
            labelColor=colors["eco-gray"],
            titleColor=colors["eco-gray"],
            tickColor=colors["eco-gray"],
            domainColor=colors["eco-gray"],
            tickCount=10,
            orient="bottom",
            labelAngle=0,
        ),
        scale=alt.Scale(domain=[18, 35]),
    )
)
points = base.mark_circle(color=colors["eco-turquiose"],size=50).encode(
    y=alt.Y(
        "elasticity:Q",
        sort=[],
        axis=alt.Axis(
            grid=False,
            title="Generational earnings elasticity (higher values ➡ less mobility)",
            titleX=-5,
            titleY=-5,
            titleBaseline="bottom",
            titleAngle=0,
            titleAlign="left",
            labelColor=colors["eco-gray"],
            titleColor=colors["eco-gray"],
            tickColor=colors["eco-gray"],
            domainColor=colors["eco-gray"],
        ),
        scale=alt.Scale(domain=[0.05, 0.6]),
    ),
    color=alt.Color('country:N',legend=None)
)
reg=points.transform_regression('inequality', 'elasticity').mark_line(strokeWidth=1,stroke=colors["eco-gray"],opacity=0.4)
top=['United Kingdom','Australia','Norway', ]
bottom=['Sweden', 'Finland',  'Denmark', 'Germany', 'New Zealand',
       'Canada', 'Japan', 'France', 'Italy',
        'United States']
labels1 = points.mark_text(dy=10).encode(text='country:N').transform_filter(
    alt.FieldOneOfPredicate(field='country',oneOf=bottom)
)
labels2 = points.mark_text(dy=-10).encode(text='country:N').transform_filter(
    alt.FieldOneOfPredicate(field='country',oneOf=top)
)
layer1 = (
    ((reg+points+labels1+labels2).properties(height=300, width=400))
    .configure_view(stroke=None)
    .properties(title="")
)
layer1.save("visualisation/" + f + ".json")
layer1.save("visualisation/" + f + ".png")
open("README.md", "w").write(readme)
layer1

# Fig 2

https://geoportal.statistics.gov.uk/

Regions

In [370]:
topo = json.loads(open("visualisation/fig2/Regions_(December_2021)_EN_BFC.json", "r").read())

In [371]:
topo['objects']['Regions_(December_2021)_EN_BFC']['geometries'][0]

{'arcs': [[[0]], [[1, 2, 3]], [[4]]],
 'type': 'MultiPolygon',
 'properties': {'OBJECTID': 1,
  'RGN21CD': 'E12000001',
  'RGN21NM': 'North East',
  'BNG_E': 417314,
  'BNG_N': 600356,
  'LONG': -1.72888,
  'LAT': 55.297009,
  'GlobalID': '{200E0AB8-C057-4318-8FA7-83AFAAE4532F}',
  'SHAPE_Length': 13.036922133189785,
  'SHAPE_Area': 1.2084824201331612}}

In [372]:
df=pd.read_csv('raw/ks2_regional_and_local_authority_2016_to_2022_provisional.csv')

In [373]:
df=df[df['geographic_level']=='Regional'][['time_period','region_code','region_name','pt_rwm_met_expected_standard','gender']]

In [374]:
df['pt_rwm_met_expected_standard']=df['pt_rwm_met_expected_standard'].replace('x',np.nan).replace('c',np.nan).astype(float)
df=df.dropna(subset=['pt_rwm_met_expected_standard'])
df['year']=df['time_period'].astype(str).str[:4].astype(int)

In [375]:
open('visualisation/fig2/data_regions.json','w').write(json.dumps(list(df.T.to_dict().values())))

25565

LADs

In [376]:
topo = json.loads(open("visualisation/fig2/Counties_and_Unitary_Authorities_(December_2021)_EN_BFC.json", "r").read())

In [377]:
topo['objects'][list(topo['objects'].keys())[0]]['geometries'][0]

{'arcs': [[2, 3, 4, 5, 6]],
 'type': 'Polygon',
 'properties': {'OBJECTID': 1,
  'CTYCDE': 'E06000001',
  'CTYNME': 'Hartlepool',
  'BNG_E': 447160,
  'BNG_N': 531474,
  'LONG': -1.27018,
  'LAT': 54.67614,
  'GlobalID': '{36EA86F8-3163-4145-99A4-8E9586432638}',
  'SHAPE_Length': 0.8998598929545726,
  'SHAPE_Area': 0.013057380459647069}}

In [378]:
df=pd.read_csv('raw/ks2_regional_and_local_authority_2016_to_2022_provisional.csv')

In [379]:
df['la_name']=df['la_name'].str.replace("City of London,", "").str.replace(" City", "").str.replace("City of ", "").str.replace(" Islands", "").\
        str.replace(",", ", ").str.replace("  ", " ").str.strip()
df=df[df['geographic_level']=='Local authority'][['time_period','new_la_code','la_name','pt_rwm_met_expected_standard','gender']]

In [380]:
df['pt_rwm_met_expected_standard']=df['pt_rwm_met_expected_standard'].replace('x',np.nan).replace('c',np.nan).astype(float)
df=df.dropna(subset=['pt_rwm_met_expected_standard'])
df['year']=df['time_period'].astype(str).str[:4].astype(int)

In [381]:
open('visualisation/fig2/data_lads.json','w').write(json.dumps(list(df.T.to_dict().values())))

336094

Save

In [382]:
len(df['la_name'].unique())

153

In [383]:
readme = (
    "### fig2_school"
    + "\n[Interactive map](https://economicsobservatory.github.io/ECOvisualisations/articles/"+uid+"/visualisation/fig2/)\n\n"
)
open("README.md", "a").write(readme)

178

# Fig 3

In [384]:
df = pd.read_excel("raw/Book1.xlsx", header=None)
df['year']=df[0].str.split(',').str[0]
df['state']=df[0].str.split(',').str[1]
df=df.drop(0,axis=1).astype(float)
df['year']+=2004
df['year']=df['year'].apply(lambda x: int(x))
df1=df.groupby('year').mean()

df = pd.read_excel("raw/Book1.xlsx", header=None,sheet_name='Sheet2')
df['year']=df[0].str.split(',').str[0]
df['private']=df[0].str.split(',').str[1]
df=df.drop(0,axis=1).astype(float)
df['year']=df['year']*1.02+2003.9
df['year']=df['year'].apply(lambda x: int(x))
df2=df.groupby('year').mean()

df = pd.read_excel("raw/Book1.xlsx", header=None,sheet_name='Sheet3')
df['year']=df[0].str.split(',').str[0]
df['bursary']=df[0].str.split(',').str[1]
df=df.drop(0,axis=1).astype(float)
df['year']=df['year']*1.02+2003.9
df['year']=df['year'].apply(lambda x: int(x))
df3=df.groupby('year').mean()

In [385]:
df=df1.join(df2).join(df3).reset_index()

In [386]:
f = "fig3_private_schools"
f3 = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f3.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL:
    f3 = df
readme = "### " + f + '\n!["' + f + '"](visualisation/' + f + '.png "' + f + '")\n\n'
df.head()

Unnamed: 0,year,state,private,bursary
0,2003,6011.71229,8945.05023,9706.679464
1,2004,6261.519369,9094.905539,9839.803857
2,2005,6631.669335,9413.020491,10112.290344
3,2006,6879.883758,9830.679104,10549.402021
4,2007,7169.085422,10470.006905,11322.903324


In [387]:
base = alt.Chart(f3).encode(
    x=alt.X(
        "year:Q",
        sort=[],
        axis=alt.Axis(
            grid=False,
            titleAlign="right",
            titleAnchor="end",
            title="school-year starting in ⬇",
            titleY=-15,
            labelColor=colors["eco-gray"],
            titleColor=colors["eco-gray"],
            tickColor=colors["eco-gray"],
            domainColor=colors["eco-gray"],
            tickCount=10,
            orient="bottom",
            labelAngle=0,
            format='.0f'
        ),
        # scale=alt.Scale(domain=[2002, 2020]),
    )
)
line1 = base.mark_line(color=colors["eco-turquiose"]).encode(
    y=alt.Y(
        "state:Q",
        sort=[],
        axis=alt.Axis(
            # grid=False,
            gridOpacity=0.2,
            gridColor=colors["eco-gray"],
            title="Per pupil average price (£) in school-year starting in 2021",
            titleX=-5,
            titleY=-5,
            titleBaseline="bottom",
            titleAngle=0,
            titleAlign="left",
            labelColor=colors["eco-gray"],
            titleColor=colors["eco-gray"],
            tickColor=colors["eco-gray"],
            domainColor=colors["eco-gray"],
            format='s'
        ),
        # scale=alt.Scale(domain=[-0.16, 0.06]),
    )
)
line2=base.mark_line(color=colors["eco-light-blue"],strokeDash=[10,5]).encode(
    y=alt.Y(
        "private:Q"
    )
)
line3=base.mark_line(color=colors["eco-mid-blue"]).encode(
    y=alt.Y(
        "bursary:Q"
    )
)
label1=alt.Chart(pd.DataFrame([{'x':2020,'y':7500,'t':'State-funded'}])).mark_text(
    color=colors["eco-turquiose"],align='right').encode(
    x='x:Q',y='y:Q',text='t:N'
)
label2=alt.Chart(pd.DataFrame([{'x':2020,'y':13000,'t':'Private minus bursaries'}])).mark_text(
    color=colors["eco-light-blue"],align='right',angle=352).encode(
    x='x:Q',y='y:Q',text='t:N'
)
label3=alt.Chart(pd.DataFrame([{'x':2019,'y':15700,'t':'Private'}])).mark_text(
    color=colors["eco-mid-blue"],align='right',angle=350).encode(
    x='x:Q',y='y:Q',text='t:N'
)
label4=alt.Chart(pd.DataFrame([{'x':2009.5,'y':11150,'t':'⬅ £3,100 ➡'}])).mark_text(
    color=colors["eco-gray"],align='right',angle=270).encode(
    x='x:Q',y='y:Q',text='t:N'
)
label5=alt.Chart(pd.DataFrame([{'x':2019.5,'y':11800,'t':'⬅  £6,500  ➡'}])).mark_text(
    color=colors["eco-gray"],align='right',angle=270).encode(
    x='x:Q',y='y:Q',text='t:N'
)
layer1 = (
    ((line1+line2+line3+label1+label2+label3+label4+label5).properties(height=300, width=400))
    .configure_view(stroke=None)
    .properties(title="")
)
layer1.save("visualisation/" + f + ".json")
layer1.save("visualisation/" + f + ".png")
open("README.md", "a").write(readme)
layer1