In [1]:
import json
import altair as alt
from altair import expr, datum
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests

In [2]:
import colorsys
from matplotlib.colors import to_hex, to_rgb


def scale_lightness(rgb, scale_l):
    rgbhex = False
    if "#" in rgb:
        rgb = to_rgb(rgb)
        rgbhex = True
    # convert rgb to hls
    h, l, s = colorsys.rgb_to_hls(*rgb)
    # manipulate h, l, s values and return as rgb
    c = colorsys.hls_to_rgb(h, min(1, l * scale_l), s=s)
    if rgbhex:
        c = to_hex(c)
    return c

In [13]:
LOCAL = True

if LOCAL:
    local_suffix = "_local"
else:
    local_suffix = ""

In [14]:
%%capture pwd
!pwd

In [15]:
uid = pwd.stdout.split("/")[-1].split("\r")[0]
eco_git_home = (
    "https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/main/"
)
eco_git_path = eco_git_home + "articles/" + uid + "/data/"
vega_embed = requests.get(eco_git_home + "guidelines/html/vega-embed.html").text
colors = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-colors.json").content
)
category_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-category-color.json").content
)
hue_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-single-hue-color.json").content
)
mhue_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-multi-hue-color.json").content
)
div_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-diverging-color.json").content
)
config = json.loads(
    requests.get(eco_git_home + "guidelines/charts/eco-global-config.json").content
)
height = config["height"]
width = config["width"]
uid, height, width

('what-have-two-years-of-interrupted-schooling-taught-us-about-learning',
 300,
 500)

# Fig 1

In [95]:
df = pd.read_excel("raw/Fig1.xlsx")
df['Datex']=[str(i)[:8]+str(int(np.random.uniform()*20)) for i in df['Date']]

In [83]:
f = "fig1_learning_deficits"
f1 = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f1.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL:
    f1 = df
readme = "### " + f + '\n!["' + f + '"](visualisation/' + f + '.png "' + f + '")\n\n'
df.head()

Unnamed: 0,Country,Date,Estimate,N,Datex
0,Australia,2020-11-01,0.07,1387,2020-11-0
1,Australia,2020-11-01,0.04,1303,2020-11-5
2,Australia,2020-11-01,0.06,1301,2020-11-19
3,Australia,2020-11-01,-0.01,1368,2020-11-6
4,Belgium,2020-06-01,-0.16,3470,2020-06-11


In [236]:
base = alt.Chart(f1).encode(
    x=alt.X(
        "Datex:T",
        sort=[],
        axis=alt.Axis(
            grid=False,
            titleAlign="center",
            titleAnchor="middle",
            title="",
            titleY=-15,
            titleX=207,
            labelColor=colors["eco-gray"],
            titleColor=colors["eco-gray"],
            tickColor=colors["eco-gray"],
            domainColor=colors["eco-gray"],
            tickCount=10,
            orient="bottom",
            labelAngle=0,
        ),
    ),
    tooltip=[
        "Country",
        {"field": "a", "type": "nominal", "title": "Date"},
        {"field": "b", "type": "nominal", "title": "Learning deficit"},
        {"field": "c", "type": "nominal", "title": "Sample size"},
    ]
).transform_calculate(a='monthFormat(month(datum.Date))+" "+year(datum.Date)')\
.transform_calculate(b='round(datum.Estimate*100)/100')\
.transform_calculate(c='round(datum.N/1000)+"k"')\
.transform_joinaggregate(
    d='mean(Estimate)',
    groupby=["Date"],
)
c2=[colors["eco-blue"],colors["eco-yellow"],colors["eco-green"],
    colors["eco-red"],colors["eco-gray"],colors["eco-light-blue"],
    colors["eco-green"],colors["eco-orange"],colors["eco-orange"],
    "red",colors["eco-dot"],colors["eco-mid-blue"]]
line=base.mark_line(opacity=0.5,color=colors["eco-gray"]).encode(
    y='Estimate:Q',
    order='Datex:T'
).transform_loess('Datex', 'Estimate',bandwidth=0.7)
points=base.mark_point(opacity=0.6).encode(
    y=alt.Y(
        "Estimate:Q",
        sort=[],
        axis=alt.Axis(
            grid=False,
            title="Learning deficit (SD)",
            titleX=-5,
            titleY=-5,
            titleBaseline="bottom",
            titleAngle=0,
            titleAlign="left",
            labelColor=colors["eco-gray"],
            titleColor=colors["eco-gray"],
            tickColor=colors["eco-gray"],
            domainColor=colors["eco-gray"],
        ),
    ),
    fill=alt.Fill('Country:N',scale=alt.Scale(
                        range=c2
                    )),
    color=alt.Color('Country:N',
                    legend=alt.Legend(
                        offset=30,
                      labelColor=colors["eco-gray"],
            titleColor=colors["eco-gray"],
                  ),
                    scale=alt.Scale(
                        range=c2
                    )
                   ),
    size=alt.Size('N:Q',scale=alt.Scale(domain=[100,5000000],type='sqrt'),
                  legend=alt.Legend(
                      title='Sample size',
                      labelColor=colors["eco-gray"],
            titleColor=colors["eco-gray"],
                  )
                 )
)
o2=0.15
area1 = (
    alt.Chart(pd.DataFrame([{"x": "2020-05-01", "y": 0,"y2": 0.4}, {"x": "2021-10-31", "y": 0,"y2": 0.4}]))
    .mark_area(opacity=o2, color=colors["eco-green"])
    .encode(x=alt.X("x:T", sort=[]), y="y:Q", y2="y2:Q")
)
area2 = (
    alt.Chart(pd.DataFrame([{"x": "2020-05-01", "y": -0.8,"y2": 0}, {"x": "2021-10-31", "y": -0.8,"y2": 0}]))
    .mark_area(opacity=o2, color=colors["eco-red"])
    .encode(x=alt.X("x:T", sort=[]), y="y:Q", y2="y2:Q")
)
axis1 = (
    alt.Chart(pd.DataFrame([{"x": "2020-05-01", "y": 0}, {"x": "2021-10-31", "y": 0}]))
    .mark_line(strokeWidth=0.8, strokeDash=[10, 6], color=colors["eco-gray"])
    .encode(x=alt.X("x:T", sort=[]), y="y:Q")
)
layer1 = (
    ((area1+area2+axis1+line+points).properties(height=330, width=420))
    .configure_view(stroke=None)
    .properties(title="")
)
layer1.save("visualisation/" + f + ".json")
layer1.save("visualisation/" + f + ".svg")
layer1.save("visualisation/" + f + ".png")
open("README.md", "w").write(readme)
layer1

# Fig 2

In [327]:
df = pd.read_excel("raw/Fig2.xlsx",skiprows=38).dropna(how='all',axis=1)
df.columns=['a'+str(i) for i in range(len(df.columns))]
df['a1']=df['a1'].ffill()
df=df[df['a2']!='grade']
df=df.drop('a0',axis=1).set_index(['a1','a2'])
df1=df[df.columns[0:9]]
df2=df[df.columns[9:18]]
df3=df[df.columns[18:]]
df1.columns=range(1,10)
df1['inequality']='Decreased'
df2.columns=range(1,10)
df2['inequality']='No change'
df3.columns=range(1,10)
df3['inequality']='Increased'
df=pd.concat([df1,df2,df3]).set_index('inequality',append=True).stack().reset_index()
df.columns=['duration','subject','inequality','grade','value']
data=[]
for i in df.T.iteritems():
    for j in range(int(i[1]['value'])):
        data.append(i[1])
df=pd.DataFrame(data)
df['value']=1
df['subject']=df['subject'].str.title()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['inequality']='Decreased'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['inequality']='No change'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['inequality']='Increased'


In [328]:
f = "fig2_inequality"
f2 = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f2.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL:
    f2 = df
readme = "### " + f + '\n!["' + f + '"](visualisation/' + f + '.png "' + f + '")\n\n'
df.head()

Unnamed: 0,duration,subject,inequality,grade,value
0,13-19,Math,Decreased,2,1
1,13-19,Math,Decreased,8,1
2,1-12,Math,Decreased,1,1
3,1-12,Math,Decreased,7,1
3,1-12,Math,Decreased,7,1


In [331]:
base = alt.Chart(f2).encode(
    x=alt.X(
        "grade:Q",
        sort=[],
        axis=alt.Axis(
            grid=False,
            titleAlign="center",
            titleAnchor="middle",
            title="",
            titleY=-15,
            titleX=207,
            labelColor=colors["eco-gray"],
            titleColor=colors["eco-gray"],
            tickColor=colors["eco-gray"],
            domainColor=colors["eco-gray"],
            tickCount=10,
            orient="bottom",
            labelAngle=0,
        ),
        scale=alt.Scale(domain=[1,9])
    )
)
c2=[colors['eco-turquiose'],colors['eco-dot'],colors['eco-mid-blue']]
points = base.mark_point(
    size=25,
).encode(
    y=alt.Y('value:Q',stack=True,axis=None),
    shape=alt.Shape('subject:N',legend=alt.Legend(title='Subject')),
    column=alt.Column('inequality:N',sort=[],title='Inequality'),
    fill=alt.Fill('duration:N',scale=alt.Scale(range=c2),legend=alt.Legend(title='Duration')),
    color=alt.Color('duration:N',scale=alt.Scale(range=c2))
)
layer1 = (
    ((points).properties(height=300, width=100))
    .configure_view(stroke=None)
    .properties(title="")
)
layer1.save("visualisation/" + f + ".json")
layer1.save("visualisation/" + f + ".svg")
layer1.save("visualisation/" + f + ".png")
open("README.md", "a").write(readme)
layer1