In [None]:
# Use base python environment
!pip install statsmodels stargazer --q

import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Load data
df = pd.read_stata("assignment7.dta")

# Keep Great Britain only
df = df[df["nireland"] == 0].copy()

# Generate polynomial controls
df["y1"] = df["yobirth"]
df["y2"] = df["yobirth"]**2
df["y3"] = df["yobirth"]**3
df["y4"] = df["yobirth"]**4

df["a1"] = df["age"]
df["a2"] = df["age"]**2
df["a3"] = df["age"]**3
df["a4"] = df["age"]**4


def reg(formula, df=df):
    """
    Runs WLS with clustering, ensuring that weights and cluster
    groups match the rows used by the regression.
    """

    # First run OLS formula to determine which observations are used
    temp = smf.ols(formula, data=df).fit()
    used = temp.model.data.row_labels   # index of rows kept

    # Filter weights + cluster to match used rows
    df_used = df.loc[used]

    # Now run WLS with aligned weights
    model = smf.wls(formula, data=df_used, weights=df_used["wght"]).fit(
        cov_type="cluster", cov_kwds={"groups": df_used["yobirth"]}
    )

    return model


# -----------------------------------------------------
# Run all Table 1 regressions
# -----------------------------------------------------
m1 = reg("agelfted ~ drop15 + y1 + y2 + y3 + y4")
m2 = reg("agelfted ~ drop15 + y1 + y2 + y3 + y4 + a1 + a2 + a3 + a4")
m3 = reg("agelfted ~ drop15 + y1 + y2 + y3 + y4 + C(age)")

m4 = reg("learn ~ drop15 + y1 + y2 + y3 + y4")
m5 = reg("learn ~ drop15 + y1 + y2 + y3 + y4 + a1 + a2 + a3 + a4")
m6 = reg("learn ~ drop15 + y1 + y2 + y3 + y4 + C(age)")


# -----------------------------------------------------
# Build clean output table like Table 1
# -----------------------------------------------------
results = [m1, m2, m3, m4, m5, m6]

table = pd.DataFrame({
    "Model": [
        "FS: YOB poly",
        "FS: +Age poly",
        "FS: Age FE",
        "RF: YOB poly",
        "RF: +Age poly",
        "RF: Age FE"
    ],
    "Coef_drop15": [m.params["drop15"] for m in results],
    "SE_drop15": [m.bse["drop15"] for m in results]
})

print("\n==============================================")
print("           TABLE 1 (Great Britain)")
print("==============================================\n")
print(table)




           TABLE 1 (Great Britain)

           Model  Coef_drop15  SE_drop15
0   FS: YOB poly     0.469280   0.067122
1  FS: +Age poly     0.461249   0.065468
2     FS: Age FE     0.474934   0.065239
3   RF: YOB poly     0.054815   0.014775
4  RF: +Age poly     0.049858   0.013683
5     RF: Age FE     0.056245   0.017050
