In [1]:
%%capture
import stata_setup, os
if os.name == 'nt':
    stata_setup.config('C:/Program Files/Stata17/','mp')
else:
    stata_setup.config('/usr/local/stata17','mp')

## Data Preparation

In [2]:
%%stata -qui

use "../data/data", clear

quietly tabulate year, generate(y_)
quietly tabulate cluster, generate(c_)

* Generate the Flesch-Kincaid Grade Level indicator
summarize flesch_kincaid_grade_level, detail
local cutoff = r(p90)
gen int FKG_01 = (flesch_kincaid_grade_level >= `cutoff')

* Generate the excess number of sentences count
summarize num_sentences, detail
local cutoff_sentences = r(p5)
gen int excess_sentences = num_sentences - `cutoff_sentences'
replace excess_sentences = 0 if excess_sentences < 0

local journals  ecm jpe qje res  //AER based category

local jel_imp a_imp b_imp c_imp  e_imp f_imp g_imp h_imp i_imp j_imp k_imp /// 
		l_imp m_imp n_imp o_imp p_imp q_imp r_imp y_imp z_imp // D JEL based case


#delimit ;
vl set log_num_authors log_num_pages both_genders prop_women
       `journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag
       , dummy clear nonotes;
vl create fe = vldummy - (both_genders jel_flag);
vl substitute mfe = i.fe;
vl create controls = vlcontinuous - (prop_women);
vl create controls_dummy = (both_genders jel_flag);
vl substitute baseline = i.controls_dummy controls;
vl rebuild;
#delimit cr




# Lasso - Logistic

In [3]:
%%stata -qui -eret steret
#delimit ;
lasso logit FKG_01 prop_women $baseline $mfe, lambda(0.018) nolog;
#delimit cr
# $mfe efectos fijos, fijo lambda




In [4]:
%stata ereturn display

------------------------------------------------------------------------------
      FKG_01 | Coefficient
-------------+----------------------------------------------------------------
       0.ecm |  -.0062653
       0.c_4 |  -1.185463
      0.c_13 |  -.0127349
      0.c_42 |  -.0158715
      0.c_98 |  -.1500967
     0.c_165 |  -.0096062
     0.c_186 |  -.0158715
       _cons |   -.803255
------------------------------------------------------------------------------


# Lasso - Poisson

In [5]:
%%stata -qui -eret steret
#delimit ;
lasso poisson excess_sentences prop_women $baseline $mfe, lambda(.07) nolog;
#delimit cr
# $baseline: regresores




In [6]:
%stata ereturn display

------------------------------------------------------------------------------
excess_sen~s | Coefficient
-------------+----------------------------------------------------------------
log_num_pa~s |   .3637272
       0.ecm |  -.2927387
       0.qje |   -.174802
       0.res |  -.3358273
     0.c_imp |  -.0362706
      0.c_86 |   -.235316
     0.c_114 |  -.4203244
     0.c_136 |  -1.154179
     0.c_142 |   -.100394
     0.c_174 |  -.1172538
       _cons |   2.351952
------------------------------------------------------------------------------
