In [45]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [46]:
# Load Data
df = pd.read_csv("union.csv")

In [47]:
# Keep only the first 5 years (1980-1984)
df = df[df["year"] <= 1984]

In [48]:
#Create lagged union membership variable (union_{i,t-1})
df["union_lag"] = df.groupby("nr")["union"].shift(1)

# Drop first observation for each individual (since union_lag is NaN there)
df = df.dropna()

# Estimate a logit model for union membership dynamics
model = smf.logit("union ~ union_lag + married + educ", data=df).fit()

# Display estimation results
print(model.summary())

# Compute and display average partial effects (APE)
margins = model.get_margeff(at="mean")
print(margins.summary())

Optimization terminated successfully.
         Current function value: 0.388559
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                  union   No. Observations:                 2180
Model:                          Logit   Df Residuals:                     2176
Method:                           MLE   Df Model:                            3
Date:                Tue, 11 Mar 2025   Pseudo R-squ.:                  0.3103
Time:                        16:40:52   Log-Likelihood:                -847.06
converged:                       True   LL-Null:                       -1228.1
Covariance Type:            nonrobust   LLR p-value:                7.353e-165
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.1820      0.453     -4.817      0.000      -3.070      -1.294
union_lag      3.1325      0.

In [49]:
# Alternative: Estimate a probit model
probit_model = smf.probit("union ~ union_lag + married + educ", data=df).fit()
print(probit_model.summary())

# Save results in a DataFrame for easy comparison
results_df = pd.DataFrame({
    "Model": ["Logit", "Probit"],
    "Union_lag (p)": [model.params["union_lag"], probit_model.params["union_lag"]],
    "Married (Y1)": [model.params["married"], probit_model.params["married"]],
    "Educ (Y2)": [model.params["educ"], probit_model.params["educ"]],
    "Log-Likelihood": [model.llf, probit_model.llf]
})

# Display the results table
print(results_df.to_string(index=False))

Optimization terminated successfully.
         Current function value: 0.388579
         Iterations 6
                          Probit Regression Results                           
Dep. Variable:                  union   No. Observations:                 2180
Model:                         Probit   Df Residuals:                     2176
Method:                           MLE   Df Model:                            3
Date:                Tue, 11 Mar 2025   Pseudo R-squ.:                  0.3102
Time:                        16:40:52   Log-Likelihood:                -847.10
converged:                       True   LL-Null:                       -1228.1
Covariance Type:            nonrobust   LLR p-value:                7.684e-165
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.2133      0.245     -4.947      0.000      -1.694      -0.733
union_lag      1.8555      0.