In [None]:
import pandas as pd
import plotly.io as pio
import statsmodels.formula.api as smf

pd.options.mode.copy_on_write = True
pd.options.future.infer_string = True
pd.options.plotting.backend = "plotly"
# Set the desired theme
# pio.templates.default = "plotly_dark+presentation" # noqa: ERA001
pio.templates.default = "plotly_white+presentation"

In [None]:
df = pd.read_feather(
    "../piaac_education_and_computer_usage_by_country_5.arrow",
)[
    [
        "fraction_with_tertiary_education",
        "fraction_using_computer_at_work",
    ]
].round(
    3,
)

In [None]:
print(df.to_markdown())

In [None]:
fig = df.reset_index().plot.scatter(
    x="fraction_with_tertiary_education",
    y="fraction_using_computer_at_work",
    text="country",
)
fig.update_traces(textposition="top center")
fig.update_xaxes(range=(0.15, 0.4))
fig.write_image("screencast/public/scatter.svg")
fig.show()

In [None]:
model = smf.ols(
    formula="fraction_using_computer_at_work ~ fraction_with_tertiary_education",
    data=df,
)
model

In [None]:
all_results = model.fit(cov_type="nonrobust")
all_results

In [None]:
print(all_results.summary().as_html())

In [None]:
all_results.summary()

In [None]:
df["predicted"] = all_results.predict(df)
df

In [None]:
print(df.round(3).to_markdown())

In [None]:
line_fig = df.plot(x="fraction_with_tertiary_education", y="predicted")
line_fig.show()

In [None]:
line_fig.write_image("screencast/public/line.svg")

In [None]:
fig = df.reset_index().plot.scatter(
    x="fraction_with_tertiary_education",
    y="fraction_using_computer_at_work",
    text="country",
)
# Add the regression line
fig.add_traces(line_fig.data)
# Nicer formatting
fig.update_traces(textposition="bottom center")
fig.update_xaxes(range=(0.15, 0.4))
fig.show()

In [None]:
fig.write_image("screencast/public/scatter-line.svg")