# Init

In [30]:
%run "/Users/abramo/Repos/Stata from Python/stata_from_python.py"

# Example Usage

Original data from [FiveThirtyEight](https://projects.fivethirtyeight.com/trump-approval-ratings/)

In [13]:
DATA = "https://www.dropbox.com/s/xq7ea8h2k66j0eu/approval_polllist.csv?dl=1"
df   = pd.read_csv(DATA)
df['startdate'] = pd.to_datetime(df['startdate'])
df   = df[['approve', 'disapprove', 'subgroup', 'pollster', 'grade', 'startdate']]
df['adults' ] = np.where(df['subgroup']=='Adults', 1, 0)

df['company'  ] = df['pollster'].astype("category").cat.codes

ordered_grades  = ['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D', 'D-'][::-1]
df['rank'] = df['grade'   ].astype("category", ordered=True, categories=ordered_grades).cat.codes

df.head()

  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,approve,disapprove,subgroup,pollster,grade,startdate,adults,company,rank
0,45.0,45.0,All polls,Gallup,B,2017-01-20,0,10,7
1,46.0,37.0,All polls,Morning Consult,B-,2017-01-20,0,25,6
2,42.1,45.2,All polls,Ipsos,B+,2017-01-20,0,18,8
3,45.0,46.0,All polls,Gallup,B,2017-01-21,0,10,7
4,46.0,45.0,All polls,Gallup,B,2017-01-22,0,10,7


In [7]:
# Save dataset in Stata format
df.to_stata("Regressions/approval_data.dta")

## First Example

In [14]:
# Define the regression template
rename_dict = {
    'approve'  :'Approval Rate',
    'startdate':'Date'
    }

reg_template = {
    "name"     : "baseline",
    "dataset"  : "approval_data",
    "dep_var"  : "approve",
    "exp_vars" : ["adults"],
    "FEs"      : [],
    "cluster"  : ['startdate', 'company'],
    "rename"   : rename_dict,
}

# Define 4 different specifications
reg    = reg_template.copy()
specs  = []
specs.append( {"FEs" : []} )
specs.append( {"FEs" : ['company']} )
specs.append( {"FEs" : ['startdate']} )
specs.append( {"FEs" : ['company', 'startdate']} )

# Write the do file
write_do_file_for_regression(reg, specs=specs, test_only=False)

In [16]:
# Run the regression (can be done on a cluster as well)
run_regression("baseline")

In [19]:
# Read results and create a latex table
tab = table_for_regression(reg , save_latex=True)
tab

baseline


Unnamed: 0,(1),(2),(3),(4)
Dependent Variable,Approval Rate,Approval Rate,Approval Rate,Approval Rate
,,,,
adults,-2.601**,-0.784***,-2.390**,-0.777***
,(-2.496),(-3.516),(-2.626),(-3.409)
Constant,42.194***,,,
,(41.621),,,
,,,,
Observations,5331,5331,5331,5331
R-squared,0.085,0.443,0.491,0.783
Company Fixed Effects,-,Yes,-,Yes


## Change the dependent variable

In [20]:
rename_dict['disapprove'] = 'Disapproval Rate'

reg    = reg_template.copy()

reg['name'   ] = 'disapproval'
reg['dep_var'] = 'disapprove'
specs  = []
specs.append( {"FEs" : []} )
specs.append( {"FEs" : ['company']} )
specs.append( {"FEs" : ['startdate']} )
specs.append( {"FEs" : ['company', 'startdate']} )

# Write the do file
write_do_file_for_regression(reg, specs=specs)

# Run the regression
run_regression("disapproval")

# Read results and create a latex table
tab2 = table_for_regression(reg , save_latex=True)
tab2

disapproval


Unnamed: 0,(1),(2),(3),(4)
Dependent Variable,Disapproval Rate,Disapproval Rate,Disapproval Rate,Disapproval Rate
,,,,
adults,0.857*,0.004,0.714*,0.001
,(1.767),(0.025),(1.925),(0.010)
Constant,53.856***,,,
,(108.944),,,
,,,,
Observations,5331,5331,5331,5331
R-squared,0.011,0.301,0.560,0.768
Company Fixed Effects,-,Yes,-,Yes


## Mixed Specifications and Subsamples

In [23]:
reg    = reg_template.copy()
reg['name' ] = 'mixed'
reg['FEs'  ] = ['company', 'startdate']
reg['desc_tit'] = "Pollster"

specs  = []
specs.append( {"dep_var" : 'approve',     'exp_vars':['adults']} )
specs.append( {"dep_var" : 'disapprove',  'exp_vars':['adults']} )

specs.append( {
    "dep_var"  : 'approve',     
    'exp_vars' :['adults'], 
    'condition':'pollster=="Gallup"',
    'desc_txt' :"Only Gallup",
    'FEs'      :[],
    'cluster'  :['startdate'],
} )

specs.append( {
    "dep_var"  : 'approve',     
    'exp_vars' :['adults'], 
    'condition':'pollster!="Gallup"',
    'desc_txt' :'All But Gallup',
    'FEs'      :['startdate', 'company'],
    'cluster'  :['startdate', 'company'],
} )

# Write the do file
write_do_file_for_regression(reg, specs=specs)

# Run the regression
run_regression("mixed")

# Read results and create a latex table
tab3 = table_for_regression(reg , save_latex=True)
tab3

mixed


Unnamed: 0,(1),(2),(3),(4)
Dependent Variable,Approval Rate,Disapproval Rate,Approval Rate,Approval Rate
,,,,
adults,-0.777***,0.001,0.000,-0.972***
,(-3.409),(0.010),(0.000),(-8.307)
Constant,,,38.668***,
,,,(293.564),
,,,,
Observations,5331,5331,748,4583
R-squared,0.783,0.768,0.000,0.783
Company Fixed Effects,Yes,Yes,-,Yes


## Example 4

In [26]:
df['adults_win'] = winsorize(df, 'adults', 0.05)
df.to_stata("Regressions/ex4.dta")

In [31]:
reg    = reg_template.copy()
reg['name' ] = 'ex4'
reg['dataset'] = 'ex4'
reg['FEs'  ] = []
reg['desc_tit' ] = "Adult Proxy"
reg['desc2_tit'] = "Note"

specs  = []

specs.append( {
    "dep_var"  : 'approve',     
    'exp_vars' :['adults'],
    'rename_exp_vars':{'adults':'adults_proxy'},
    'desc_txt'  :"Standard",
    'desc2_txt' :"Boh",
} )

specs.append( {
    "dep_var"  : 'approve',     
    'exp_vars' :['adults_win'],
    'rename_exp_vars':{'adults_win':'adults_proxy'},
    'desc_txt' :'Winsorized',
    'desc2_txt':"Jesse",
} )

# Write the do file
write_do_file_for_regression(reg, specs=specs)

# Run the regression
run_regression(reg)

# Read results and create a latex table
tab3 = table_for_regression(reg , save_latex=True)
tab3

ex4


Unnamed: 0,(1),(2)
Dependent Variable,Approval Rate,Approval Rate
,,
adults_proxy,-2.601**,-2.601**
,(-2.496),(-2.496)
Constant,42.194***,42.194***
,(41.621),(41.621)
,,
Observations,5331,5331
R-squared,0.085,0.085
Ses Clustered By,Date-Company,Date-Company


<br/><br/><br/>