In [None]:
import numpy as np
import pandas as pd

pd.options.mode.copy_on_write = True
pd.options.future.infer_string = True
pd.options.plotting.backend = "plotly"

In [None]:
import os

os.chdir("C:/Users/anton/sciebo/Intro Econometrics U Bonn/2023/PIAAC")

In [None]:
piaac = pd.read_feather("data-prep/piaac_selected.arrow")

In [None]:
piaac.columns

In [None]:
piaac["highest_educ"].unique().tolist()

In [None]:
education_mapping = {
    "No formal qualification or below ISCED 1": 0,
    "ISCED 1": 1,
    "ISCED 2": 2,
    "ISCED 3 (without distinction A-B-C, 2y+)": 3,
    "ISCED 3A-B": 3,
    "ISCED 3C 2 years or more": 3,
    "ISCED 3C shorter than 2 years": 3,
    "ISCED 4A-B": 4,
    "ISCED 4 (without distinction A-B-C)": 4,
    "ISCED 4C": 4,
    "ISCED 5B": 5,
    "ISCED 5A, master degree": 5,
    "ISCED 5A, bachelor degree": 5,
    "ISCED 5A bachelor degree, 5A master degree, and 6 (without distinction)": 5,
    "ISCED 6": 6,
    "Foreign qualification": np.nan,
}
tertiary = [
    "ISCED 5B",
    "ISCED 5A, master degree",
    "ISCED 5A, bachelor degree",
    "ISCED 6",
    "ISCED 5A bachelor degree, 5A master degree, and 6 (without distinction)",
]
nan = [np.nan, "Foreign qualification"]

In [None]:
piaac["tertiary_educ"] = np.where(
    piaac["highest_educ"].isin(tertiary),
    1,
    np.where(piaac["highest_educ"].isin(nan), np.nan, 0),
)

piaac["computer_experience_in_general_binary"] = np.where(
    piaac["computer_experience_in_general"] == "Yes",
    1,
    np.where(piaac["computer_experience_in_general"] == "No", 0, np.nan),
)

piaac["use_computer_at_work_binary"] = np.where(
    piaac["use_computer_at_work"] == "Yes",
    1,
    np.where(piaac["use_computer_at_work"] == "No", 0, np.nan),
)

In [None]:
piaac[["use_computer_at_work", "use_computer_at_work_binary"]]

In [None]:
piaac.use_computer_at_work

In [None]:
piaac["educ_level"] = piaac["highest_educ"].map(education_mapping)

In [None]:
piaac[["highest_educ", "tertiary_educ"]].head()

In [None]:
piaac_country_av = piaac.groupby("country")[
    [
        "tertiary_educ",
        "computer_experience_in_general_binary",
        "use_computer_at_work_binary",
    ]
].mean()

In [None]:
piaac_country_av

In [None]:
piaac_country_av.reset_index().country.value_counts()

In [None]:
fig = piaac_country_av.reset_index().plot.scatter(
    x="tertiary_educ",
    y="computer_experience_in_general_binary",
    text="country",
)
fig.update_traces(textposition="top center")
fig.show()

In [None]:
fig = piaac_country_av.reset_index().plot.scatter(
    x="tertiary_educ",
    y="use_computer_at_work_binary",
    text="country",
)
fig.update_traces(textposition="top center")
fig.show()