In [347]:
import json
import altair as alt
from altair import expr, datum
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests

In [348]:
import colorsys
from matplotlib.colors import to_hex, to_rgb


def scale_lightness(rgb, scale_l):
    rgbhex = False
    if "#" in rgb:
        rgb = to_rgb(rgb)
        rgbhex = True
    # convert rgb to hls
    h, l, s = colorsys.rgb_to_hls(*rgb)
    # manipulate h, l, s values and return as rgb
    c = colorsys.hls_to_rgb(h, min(1, l * scale_l), s=s)
    if rgbhex:
        c = to_hex(c)
    return c

In [404]:
LOCAL = False

if LOCAL:
    local_suffix = "_local"
else:
    local_suffix = ""

In [405]:
%%capture pwd
!pwd

In [406]:
# uid = "2021-05-05-which-firms-and-industries-have-been-most-affected-by-covid-update"  # article unique ID
uid = pwd.stdout.split("/")[-1].split("\r")[0]
eco_git_home = (
    "https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/main/"
)
eco_git_path = eco_git_home + "articles/" + uid + "/data/"
vega_embed = requests.get(eco_git_home + "guidelines/html/vega-embed.html").text
colors = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-colors.json").content
)
category_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-category-color.json").content
)
hue_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-single-hue-color.json").content
)
mhue_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-multi-hue-color.json").content
)
div_color = json.loads(
    requests.get(eco_git_home + "guidelines/colors/eco-diverging-color.json").content
)
config = json.loads(
    requests.get(eco_git_home + "guidelines/charts/eco-global-config.json").content
)
height = config["height"]
width = config["width"]
uid, height, width

('2021-07-05-how-can-we-promote-diversity-in-economics', 300, 500)

# Fig 1

In [407]:
df = pd.read_csv("raw/J28_C1_formatted.csv").dropna(how="all")

In [408]:
f = "fig1_ethnicity"
f1 = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f1.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL:
    f1 = df
df.head()

Unnamed: 0,Group,Ethnicity,Value,Detail
0,All men,White,2.444594,Line at 2.10
1,All women,White,1.244503,
2,British men,White,1.583633,
3,British women,White,0.675873,
4,All men,Mixed,2.521797,


In [409]:
base0=alt.Chart(f1).transform_calculate(x="datum.Value/100").transform_calculate(
            tooltip="datum.Group+' | '+datum.Ethnicity+' | '+round(datum.Value*10)/10.0+'%'"
        )
base=base0.encode(
    x=alt.X("Ethnicity:N" ,sort=[],axis=alt.Axis(labelAngle=-25,grid=False,
                    title="",
                                                 labelOffset=10,
                    labelColor=colors["eco-gray"],
                    titleColor=colors["eco-gray"],
                    tickColor=colors["eco-gray"],
                    domainColor=colors["eco-gray"],
                    tickCount=10,
                    orient="bottom",)),
    y=alt.Y("x:Q",axis=alt.Axis(
                    grid=True,
                    title="of research academics from each ethnic group working in economics by sex (2018-19)",
                    titleAnchor="start",
                    labelColor=colors["eco-gray"],
                    titleColor=colors["eco-gray"],
                    tickColor=colors["eco-gray"],
                    domainColor=colors["eco-gray"],
                    titleFontSize=10,
                    titleFontWeight="normal",
                    ticks=False,
                    labelAlign="left",
                    labelBaseline="middle",
                    labelPadding=-5,
                    labelOffset=-10,
                    titleX=23,
                    titleY=22,
                    titleBaseline="bottom",
                    titleAngle=0,
                    titleAlign="left",
                    tickCount=7,
                    format=".0%",
                ),),
    color=alt.Color("Group:N",legend=None,scale=alt.Scale(range=[colors['eco-mid-blue'],
        colors['eco-turquiose'],colors['eco-orange'],colors['eco-yellow']])),
).encode(
            tooltip="tooltip:N",
)
bars1=base.mark_bar(xOffset=-12,size=8).transform_filter("datum.Group=='All men'")
bars2=base.mark_bar(xOffset=-4,size=8).transform_filter("datum.Group=='All women'")
bars3=base.mark_bar(xOffset=4,size=8).transform_filter("datum.Group=='British men'")
bars4=base.mark_bar(xOffset=12,size=8).transform_filter("datum.Group=='British women'")
labels1=bars1.mark_text(xOffset=-15,yOffset=-5,size=10,align='left',angle=335).transform_filter("datum.Ethnicity=='Other'")\
    .encode(text='Group:N')
labels2=bars2.mark_text(xOffset=-7,yOffset=-5,size=10,align='left',angle=335).transform_filter("datum.Ethnicity=='Other'")\
    .encode(text='Group:N')
labels3=bars3.mark_text(xOffset=4,yOffset=-5,size=10,align='left',angle=335).transform_filter("datum.Ethnicity=='Other'")\
    .encode(text='Group:N')
labels4=bars4.mark_text(xOffset=12,yOffset=-5,size=10,align='left',angle=335).transform_filter("datum.Ethnicity=='Other'")\
    .encode(text='Group:N')
line=base0.mark_rule(strokeDash=[5,5],color=colors['eco-gray']).encode(y='average(x):Q')
labels5=line.mark_text(xOffset=6,yOffset=5,size=10,align='left',angle=335).transform_filter("datum.Ethnicity=='Other'")\
    .encode(text='l:N',x=alt.X('Ethnicity:N',sort=[])).transform_filter("datum.Group=='All men'").transform_calculate(l="'Overall'")
layer1 = (
    ((bars1+bars2+bars3+bars4+line+labels1+labels2+labels3+labels4+labels5).properties(height=300, width=alt.Step(40)))
    .configure_view(stroke=None)
    .properties(title="")
)
layer1.save("visualisation/" + f + ".json")
layer1

# Fig 2

## a

In [401]:
df = pd.read_csv("raw/levels_time.csv").dropna(how="all")

In [402]:
f = "fig2a_degree"
f2a = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f2a.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL:
    f2a = df
df.head()

Unnamed: 0,ACYEAR,F_XLEV601,F_SEXID,n,percent_fem,percent_dist
0,2012,First degree,Female,9690.71,33.141861,72.673367
1,2012,First degree,Male,19549.38,66.858139,84.160891
2,2012,Masters,Female,3185.76,52.317687,23.890912
3,2012,Masters,Male,2903.5,47.682313,12.499688
4,2012,Doctorate,Female,458.14,37.131233,3.435721


In [403]:
base = (
    (
        alt.Chart(f2a)
        .encode(
            tooltip="tooltip:N",
            x=alt.X(
                "ACYEAR:O",
                axis=alt.Axis(
                    grid=False,
                    title="",
                    labelColor=colors["eco-gray"],
                    titleColor=colors["eco-gray"],
                    tickColor=colors["eco-gray"],
                    domainColor=colors["eco-gray"],
                    tickCount=10,
                    orient="bottom",
                    labelAngle=0,
                ),
            ),
            y=alt.Y(
                "x:Q",
                sort=[],
                axis=alt.Axis(
                    grid=True,
                    title="of women, by level of study",
                    titleAnchor="start",
                    labelColor=colors["eco-gray"],
                    titleColor=colors["eco-gray"],
                    tickColor=colors["eco-gray"],
                    domainColor=colors["eco-gray"],
                    titleFontSize=10,
                    titleFontWeight="normal",
                    ticks=False,
                    labelAlign="left",
                    labelBaseline="middle",
                    labelPadding=-5,
                    labelOffset=-10,
                    titleX=30,
                    titleY=-5,
                    titleBaseline="bottom",
                    titleAngle=0,
                    titleAlign="left",
                    tickCount=7,
                    format=".0%",
                ),
                scale=alt.Scale(domain=[0.2, 0.6]),
            ),
            color=alt.Color(
                "F_XLEV601:N",
                legend=None,
                scale=alt.Scale(
                    range=[
                        colors["eco-turquiose"],
                        colors["eco-mid-blue"],
                        colors["eco-light-blue"],
                    ]
                ),
            ),
        )
        .transform_calculate(
            tooltip="datum.ACYEAR+' 🎓 '+datum.F_XLEV601+' 📈 '+round(datum.percent_fem*10)/10.0+'%'"
        )
    )
    .transform_calculate(x="datum.percent_fem/100")
    .transform_filter("datum.F_SEXID=='Female'")
)
line = base.mark_line(opacity=1)
text = (
    base.mark_text(dx=4, dy=1, align="left")
    .encode(
        text=alt.Text("F_XLEV601:N"),
    )
    .transform_filter("datum.ACYEAR==2018")
)

layer1 = (
    ((line + text).properties(height=300, width=400))
    .configure_view(stroke=None)
    .properties(title="")
)
layer1.save("visualisation/" + f + ".json")
layer1

## b

In [370]:
df = pd.read_csv("raw/bme_level.csv").dropna(how="all")

In [371]:
f = "fig2b_bme"
f2b = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f2b.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL:
    f2b = df
df.head()

Unnamed: 0,ACYEAR,F_SEXID,F_XLEV601,BME_MKR,n,percent_fem
0,2012,Female,First degree,BME,2282.21,32.966431
1,2012,Female,First degree,Non-BME,3221.43,25.246851
2,2012,Female,Masters,BME,137.98,38.171909
3,2012,Female,Masters,Non-BME,162.29,28.196887
4,2012,Female,Doctorate,BME,16.83,22.639225


In [372]:
base = (
    alt.Chart(f2b)
    .encode(
        tooltip="tooltip:N",
        x=alt.X(
            "ACYEAR:O",
            axis=alt.Axis(
                grid=False,
                title="",
                labelColor=colors["eco-gray"],
                titleColor=colors["eco-gray"],
                tickColor=colors["eco-gray"],
                domainColor=colors["eco-gray"],
                tickCount=10,
                orient="bottom",
                labelAngle=0,
            ),
        ),
        y=alt.Y(
            "x:Q",
            sort=[],
            axis=alt.Axis(
                grid=True,
                title="of women, by level of study",
                titleAnchor="start",
                labelColor=colors["eco-gray"],
                titleColor=colors["eco-gray"],
                tickColor=colors["eco-gray"],
                domainColor=colors["eco-gray"],
                titleFontSize=10,
                titleFontWeight="normal",
                ticks=False,
                labelAlign="left",
                labelBaseline="middle",
                labelPadding=-5,
                labelOffset=-10,
                titleX=30,
                titleY=15,
                titleBaseline="bottom",
                titleAngle=0,
                titleAlign="left",
                tickCount=7,
                format=".0%",
            ),
            scale=alt.Scale(domain=[0.2, 0.4]),
        ),
        color=alt.Color(
            "BME_MKR:N",
            legend=alt.Legend(orient="bottom", title=""),
            scale=alt.Scale(
                range=[
                    colors["eco-turquiose"],
                    colors["eco-mid-blue"],
                    colors["eco-light-blue"],
                ]
            ),
        ),
        column=alt.Column("F_XLEV601:N", title="", sort=[]),
    )
    .transform_calculate(
        tooltip="datum.ACYEAR+' 🎓 '+datum.F_XLEV601+' 📈 '+round(datum.percent_fem*10)/10.0+'%'"
    )
    .transform_calculate(x="datum.percent_fem/100")
    .transform_filter("datum.F_SEXID=='Female'")
)
line = base.mark_line(opacity=1)
text = (
    base.mark_text(dx=4, dy=1, align="left")
    .encode(
        text=alt.Text("BME_MKR:N"),
    )
    .transform_filter("datum.ACYEAR==2018")
)

layer1 = (
    ((line).properties(height=200, width=200))
    .configure_view(stroke=None)
    .properties(title="")
)
layer1.save("visualisation/" + f + ".json")
layer1

In [211]:
f2a.set_index(["ACYEAR", "F_SEXID", "F_XLEV601"]).loc[2012, "Female", "Masters"]

Unnamed: 0         2.000000
n               3185.760000
percent_fem       52.317687
percent_dist      23.890912
Name: (2012, Female, Masters), dtype: float64

In [198]:
f2b.set_index(["ACYEAR", "F_SEXID", "F_XLEV601"]).loc[2012, "Female", "Masters"]

  f2b.set_index(['ACYEAR','F_SEXID','F_XLEV601']).loc[2012,'Female','Masters']


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,BME_MKR,n,percent_fem
ACYEAR,F_SEXID,F_XLEV601,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012,Female,Masters,BME,137.98,38.171909
2012,Female,Masters,Non-BME,162.29,28.196887
