In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
df_table2 = pd.read_stata("aer_primarysample.dta")

In [3]:
plist = "focs + f_focs + gncs + f_gncs + focs0 + f_focs0 + gncs0 + f_gncs0"
ulist = "phd_rank + phd_rank_miss + post_doc + ug_students + grad_students + faculty + full_av_salary + assist_av_salary + revenue + female_ratio + full_ratio + faculty_miss + revenue_miss + female_ratio_miss + full_ratio_miss"

In [4]:
model_2 = smf.ols(formula="tenure_policy_school ~ " + plist + " + " + ulist + " + C(pol_job_start):C(female) + C(female):C(pol_u)",
                data=df_table2).fit(cov_type='cluster', cov_kwds={'groups': df_table2['pol_u']})

In [5]:
print(model_2.summary())


                             OLS Regression Results                             
Dep. Variable:     tenure_policy_school   R-squared:                       0.165
Model:                              OLS   Adj. R-squared:                  0.051
Method:                   Least Squares   F-statistic:                     10.46
Date:                  Fri, 15 Dec 2023   Prob (F-statistic):           8.20e-14
Time:                          13:45:46   Log-Likelihood:                -755.13
No. Observations:                  1392   AIC:                             1846.
Df Residuals:                      1224   BIC:                             2726.
Df Model:                           167                                         
Covariance Type:                cluster                                         
                                              coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------



In [6]:
hypotheses = {'FOCS - Men': 'focs + focs0',
              'FOCS - Women': 'focs + f_focs + focs0 + f_focs0',
              'GNCS - Men': 'gncs + gncs0',
              'GNCS - Women': 'gncs + f_gncs + gncs0 + f_gncs0',
              '(Male - Female) FOCS': '-f_focs - f_focs0',
              '(Male - Female) GNCS': '-f_gncs - f_gncs0',
              '(GNCS - FOCS) Male': 'gncs + gncs0 - focs - focs0',
              '(GNCS - FOCS) Female': 'gncs + f_gncs + gncs0 + f_gncs0 - focs - f_focs - focs0 - f_focs0'}

print("Panel A. Policy effects years 0-3")

for hypothesis, terms in hypotheses.items():
    print(f"\nHypothesis Test: {hypothesis}")
    print(model_2.t_test(terms))

Panel A. Policy effects years 0-3

Hypothesis Test: FOCS - Men
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0            -0.0085      0.067     -0.126      0.900      -0.140       0.123

Hypothesis Test: FOCS - Women
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0             0.1723      0.141      1.225      0.221      -0.103       0.448

Hypothesis Test: GNCS - Men
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0      

In [7]:
hypotheses = {'FOCS - Men': 'focs',
              'FOCS - Women': 'focs + f_focs',
              'GNCS - Men': 'gncs',
              'GNCS - Women': 'gncs + f_gncs',
              '(Male - Female) FOCS': '-f_focs',
              '(Male - Female) GNCS': '-f_gncs',
              '(GNCS - FOCS) Male': 'gncs - focs',
              '(GNCS - FOCS) Female': 'gncs + f_gncs - focs - f_focs'}

print("Panel B. Policy effects years 4+")

for hypothesis, terms in hypotheses.items():
    print(f"\nHypothesis Test: {hypothesis}")
    print(model_2.t_test(terms))

Panel B. Policy effects years 4+

Hypothesis Test: FOCS - Men
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0             0.0023      0.075      0.030      0.976      -0.144       0.149

Hypothesis Test: FOCS - Women
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0             0.0493      0.102      0.485      0.628      -0.150       0.248

Hypothesis Test: GNCS - Men
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0       

In [8]:
df_table3 = pd.read_stata('aer_eventstudysample.dta')

In [9]:
eventlist = 'pre3 + f_pre3 + pre2 + f_pre2 + pre1 + f_pre1 + focs0 + f_focs0 + focs + f_focs + gncs0 + f_gncs0 + gncs + f_gncs'

In [10]:
model_3 = smf.ols(formula="tenure_policy_school ~ " + eventlist + " + " + ulist + " + C(pol_job_start):C(female) + C(female):C(pol_u)",
                data=df_table3).fit(cov_type='cluster', cov_kwds={'groups': df_table2['pol_u']})

In [11]:
print(model_3.summary())

                             OLS Regression Results                             
Dep. Variable:     tenure_policy_school   R-squared:                       0.168
Model:                              OLS   Adj. R-squared:                  0.050
Method:                   Least Squares   F-statistic:                -5.397e+11
Date:                  Fri, 15 Dec 2023   Prob (F-statistic):               1.00
Time:                          13:45:47   Log-Likelihood:                -752.32
No. Observations:                  1392   AIC:                             1853.
Df Residuals:                      1218   BIC:                             2764.
Df Model:                           173                                         
Covariance Type:                cluster                                         
                                              coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------



In [13]:
hypotheses = {'Men 13-18': 'pre3 - pre1',
              'Women 13-18': 'pre3 + f_pre3 - pre1 - f_pre1',
              'Men 7-12': 'pre2 - pre1',
              'Women 7-12': 'pre2 + f_pre2 - pre1 - f_pre1',
              '(Male - Female) 13 - 18': '-f_pre3 + f_pre1',
              '(Male - Female) 7 - 12': '-f_pre2 + f_pre1'}

print("Panel A. Pre first policy year effects")

for hypothesis, terms in hypotheses.items():
    print(f"\nHypothesis Test: {hypothesis}")
    print(model_3.t_test(terms))

Panel A. Pre first policy year effects

Hypothesis Test: Men 13-18
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0            -0.0814      0.073     -1.119      0.263      -0.224       0.061

Hypothesis Test: Women 13-18
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0             0.0448      0.138      0.323      0.746      -0.227       0.316

Hypothesis Test: Men 7-12
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0     

In [14]:
hypotheses = {'FOCS - Men': 'focs + focs0 - pre1',
              'FOCS - Women': 'focs + f_focs + focs0 + f_focs0 - pre1 - f_pre1',
              'GNCS - Men': 'gncs + gncs0 - pre1',
              'GNCS - Women': 'gncs + f_gncs + gncs0 + f_gncs0 - pre1 - f_pre1',
              '(Male - Female) FOCS': '-f_focs - f_focs0 + f_pre1',
              '(Male - Female) GNCS': '-f_gncs - f_gncs0 + f_pre1',
              '(GNCS - FOCS) Male': 'gncs + gncs0 - focs - focs0',
              '(GNCS - FOCS) Female': 'gncs + f_gncs + gncs0 + f_gncs0 - focs - f_focs - focs0 - f_focs0'}

print("Panel B. Policy effects years 0-3")

for hypothesis, terms in hypotheses.items():
    print(f"\nHypothesis Test: {hypothesis}")
    print(model_3.t_test(terms))

Panel B. Policy effects years 0-3

Hypothesis Test: FOCS - Men
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0             0.0147      0.055      0.269      0.788      -0.092       0.122

Hypothesis Test: FOCS - Women
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0             0.1643      0.130      1.262      0.207      -0.091       0.419

Hypothesis Test: GNCS - Men
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0      

In [17]:
hypotheses = {'FOCS - Men': 'focs - pre1',
              'FOCS - Women': 'focs + f_focs - pre1 - f_pre1',
              'GNCS - Men': 'gncs - pre1',
              'GNCS - Women': 'gncs + f_gncs - pre1 - f_pre1',
              '(Male - Female) FOCS': '-f_focs + f_pre1',
              '(Male - Female) GNCS': '-f_gncs + f_pre1',
              '(GNCS - FOCS) Male': 'gncs - focs',
              '(GNCS - FOCS) Female': 'gncs + f_gncs - focs - f_focs'}

print("Panel C. Policy effects years 4+")

for hypothesis, terms in hypotheses.items():
    print(f"\nHypothesis Test: {hypothesis}")
    print(model_3.t_test(terms))

Panel C. Policy effects years 4+

Hypothesis Test: FOCS - Men
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0             0.0342      0.073      0.468      0.640      -0.109       0.177

Hypothesis Test: FOCS - Women
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0             0.0328      0.123      0.268      0.789      -0.208       0.273

Hypothesis Test: GNCS - Men
                             Test for Constraints                             
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
c0       