Skip to content

Commit

Permalink
Fixed some estimation process issues
Browse files Browse the repository at this point in the history
  • Loading branch information
SeBecker committed Aug 20, 2018
1 parent 7bacde0 commit f124a2b
Show file tree
Hide file tree
Showing 24 changed files with 275 additions and 266 deletions.
Binary file modified development/tests/reliability/aer-simulation-mock.pkl
Binary file not shown.
145 changes: 62 additions & 83 deletions development/tests/reliability/reliability.grmpy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -10,99 +10,78 @@
start auto
agents 1000
optimizer SCIPY-BFGS
maxiter 100000
maxiter 20000
dependent wage
indicator state

TREATED

coeff const 168.0750 nonbinary
coeff exp 0.0794 nonbinary
coeff expsq -0.0035 nonbinary
coeff lwage5 0.8319 nonbinary
coeff lurate 0.0032 nonbinary
coeff cafqt 0.1222 nonbinary
coeff cafqtsq 0.0546 nonbinary
coeff mhgc -0.0097 nonbinary
coeff mhgcsq 0.0014 nonbinary
coeff numsibs -0.0102 nonbinary
coeff numsibssq 0.0002 nonbinary
coeff urban14 0.0457 nonbinary
coeff lavlocwage17 0.8999 nonbinary
coeff lavlocwage17sq -0.0431 nonbinary
coeff avurate 0.1459 nonbinary
coeff avuratesq -0.0135 nonbinary
coeff d57 0.0080 nonbinary
coeff d58 0.1340 nonbinary
coeff d59 -0.0012 nonbinary
coeff d60 0.0567 nonbinary
coeff d61 0.0882 nonbinary
coeff d62 -0.0004 nonbinary
coeff d63 0.0489 nonbinary
coeff const 168.0750
coeff exp 0.0794
coeff expsq -0.0035
coeff lwage5 0.8319
coeff lurate 0.0032
coeff cafqt 0.1222
coeff cafqtsq 0.0546
coeff mhgc -0.0097
coeff mhgcsq 0.0014
coeff numsibs -0.0102
coeff numsibssq 0.0002
coeff urban14 0.0457
coeff lavlocwage17 0.8999
coeff lavlocwage17sq -0.0431
coeff avurate 0.1459
coeff avuratesq -0.0135

UNTREATED

coeff const 110.0000 nonbinary
coeff exp 0.0540 nonbinary
coeff expsq 0.0004 nonbinary
coeff lwage5 0.5766 nonbinary
coeff lurate -0.0037 nonbinary
coeff cafqt 0.0506 nonbinary
coeff cafqtsq -0.0494 nonbinary
coeff mhgc -0.0186 nonbinary
coeff mhgcsq 0.0009 nonbinary
coeff numsibs 0.0043 nonbinary
coeff numsibssq -0.0005 nonbinary
coeff urban14 0.0077 nonbinary
coeff lavlocwage17 12.5816 nonbinary
coeff lavlocwage17sq -0.6056 nonbinary
coeff avurate 0.0717 nonbinary
coeff avuratesq -0.0059 nonbinary
coeff d57 -0.0007 nonbinary
coeff d58 0.0900 nonbinary
coeff d59 0.1234 nonbinary
coeff d60 -0.0234 nonbinary
coeff d61 0.0225 nonbinary
coeff d62 0.1004 nonbinary
coeff d63 0.0389 nonbinary
coeff const 110.0000
coeff exp 0.0540
coeff expsq 0.0004
coeff lwage5 0.5766
coeff lurate -0.0037
coeff cafqt 0.0506
coeff cafqtsq -0.0494
coeff mhgc -0.0186
coeff mhgcsq 0.0009
coeff numsibs 0.0043
coeff numsibssq -0.0005
coeff urban14 0.0077
coeff lavlocwage17 12.5816
coeff lavlocwage17sq -0.6056
coeff avurate 0.0717
coeff avuratesq -0.0059

CHOICE

coeff const 307.0000 nonbinary
coeff cafqt 3.6671 nonbinary
coeff cafqtsq 0.2008 nonbinary
coeff mhgc -1.8348 nonbinary
coeff mhgcsq 0.0096 nonbinary
coeff numsibs -4.2234 nonbinary
coeff numsibssq 0.0016 nonbinary
coeff urban14 0.1058 nonbinary
coeff lavlocwage17 -52.9084 nonbinary
coeff lavlocwage17sq 2.5985 nonbinary
coeff avurate 0.2693 nonbinary
coeff avuratesq -0.0205 nonbinary
coeff lwage5_17numsibs 0.4107 nonbinary
coeff lwage5_17mhgc 0.1846 nonbinary
coeff lwage5_17cafqt -0.3072 nonbinary
coeff lwage5_17 -4.0536 nonbinary
coeff lurate_17 0.4251 nonbinary
coeff lurate_17numsibs -0.0026 nonbinary
coeff lurate_17mhgc -0.0309 nonbinary
coeff lurate_17cafqt -0.0075 nonbinary
coeff tuit4c -0.0520 nonbinary
coeff tuit4cnumsibs -0.0033 nonbinary
coeff tuit4cmhgc 0.0044 nonbinary
coeff tuit4ccafqt 0.0065 nonbinary
coeff pub4 0.7519 nonbinary
coeff pub4numsibs 0.0104 nonbinary
coeff pub4mhgc -0.0559 nonbinary
coeff pub4cafqt 0.1585 nonbinary
coeff d57 0.0067 nonbinary
coeff d58 -0.0990 nonbinary
coeff d59 0.1234 nonbinary
coeff d60 -0.0234 nonbinary
coeff d61 0.0225 nonbinary
coeff d62 0.2000 nonbinary
coeff d63 0.0789 nonbinary
coeff const 307.0000
coeff cafqt 3.6671
coeff cafqtsq 0.2008
coeff mhgc -1.8348
coeff mhgcsq 0.0096
coeff numsibs -4.2234
coeff numsibssq 0.0016
coeff urban14 0.1058
coeff lavlocwage17 -52.9084
coeff lavlocwage17sq 2.5985
coeff avurate 0.2693
coeff avuratesq -0.0205
coeff lwage5_17numsibs 0.4107
coeff lwage5_17mhgc 0.1846
coeff lwage5_17cafqt -0.3072
coeff lwage5_17 -4.0536
coeff lurate_17 0.4251
coeff lurate_17numsibs -0.0026
coeff lurate_17mhgc -0.0309
coeff lurate_17cafqt -0.0075
coeff tuit4c -0.0520
coeff tuit4cnumsibs -0.0033
coeff tuit4cmhgc 0.0044
coeff tuit4ccafqt 0.0065
coeff pub4 0.7519
coeff pub4numsibs 0.0104
coeff pub4mhgc -0.0559
coeff pub4cafqt 0.1585

DIST

Expand All @@ -115,7 +94,7 @@

SCIPY-BFGS

gtol 0.0001
gtol 1e-05
eps 1.4901161193847656e-08

SCIPY-POWELL
Expand Down
29 changes: 16 additions & 13 deletions development/tests/reliability/reliability.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
used. Additionally the module creates two different figures for the reliability section of the
documentation.
"""
import os

import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas as pd
Expand All @@ -18,7 +16,6 @@

def create_data():
"""This function creates the a data set based on the results from Caineiro 2011."""

# Read in initialization file and the data set
init_dict = read('reliability.grmpy.ini')
df = pd.read_pickle('aer-simulation-mock.pkl')
Expand Down Expand Up @@ -71,11 +68,11 @@ def update_correlation_structure(model_dict, rho):

def get_effect_grmpy(file):
"""This function simply returns the ATE of the data set."""
df = pd.read_pickle(file)
dict_ = read('reliability.grmpy.ini')
df = pd.read_pickle('aer-simulation-mock.pkl')
beta_diff = dict_['TREATED']['all'] - dict_['UNTREATED']['all']
covars = [dict_['varnames'][j - 1] for j in dict_['TREATED']['order']]
ATE = np.mean(np.dot(df[covars], beta_diff))
ATE = np.dot(np.mean(df[covars]), beta_diff)

return ATE

Expand All @@ -87,7 +84,7 @@ def monte_carlo(file, grid_points):

# Define a dictionary with a key for each estimation strategy
effects = {}
for key_ in ['grmpy', 'ols']:
for key_ in ['grmpy', 'ols', 'true']:
effects[key_] = []

# Loop over different correlations between V and U_1
Expand All @@ -103,10 +100,15 @@ def monte_carlo(file, grid_points):
df_mc = create_data()
endog, exog, exog_ols = df_mc['wage'], df_mc[X], df_mc[['state'] + X]

# Calculate true average treatment effect
ATE = np.mean(df_mc['wage1'] - df_mc['wage0'])
effects['true'] += [ATE]

# Estimate via grmpy
rslt = estimate('reliability.grmpy.ini')
stat = np.dot(np.mean(exog), rslt['TREATED']['all']) - np.dot(np.mean(exog),
rslt['UNTREATED']['all'])
beta_diff = rslt['TREATED']['all'] - rslt['UNTREATED']['all']
stat = np.dot(np.mean(exog), beta_diff)

effects['grmpy'] += [stat]

# Estimate via OLS
Expand All @@ -133,12 +135,12 @@ def create_plots(effects, true):
ax = plt.figure().add_subplot(111)

grid = np.linspace(0.00, 0.99, len(effects[strategy]))
true_ = np.tile(true, len(effects[strategy]))

ax.set_xlim(0, 1)
ax.set_ylim(0.3, 0.55)
ax.set_ylabel(r"Effect")
ax.set_xlabel(r"$\rho_{U_1, V}$")
true_ = np.tile(true, len(effects[strategy]))
ax.plot(grid, effects[strategy], label="Estimate")

ax.plot(grid, true_, label="True")
Expand All @@ -151,8 +153,9 @@ def create_plots(effects, true):


if __name__ == '__main__':
directory = os.path.dirname(os.path.realpath(__file__))
target = os.path.split(os.path.split(os.path.split(directory)[0])[0])[0] + '/docs/figures'
x = monte_carlo('reliability.grmpy.ini', 10)
ATE = get_effect_grmpy('aer-simulation-mock.pkl')

ATE = get_effect_grmpy('reliability.grmpy.ini')

x = monte_carlo('reliability.grmpy.ini', 15)

create_plots(x, ATE)
6 changes: 3 additions & 3 deletions development/tests/robustness/replication.grmpy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

ESTIMATION

file aer-replication-true.pkl
file aer-replication-mock.pkl
start auto
agents 1000
optimizer SCIPY-BFGS
Expand Down Expand Up @@ -108,10 +108,10 @@

DIST

coeff 1.0000
coeff 0.1000
coeff 0.0000
coeff 0.0000
coeff 1.0000
coeff 0.1000
coeff 0.0000
coeff 1.0000

Expand Down
9 changes: 3 additions & 6 deletions development/tests/robustness/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def plot_est_mte(rslt, init_dict, data_frame):
mte = calculate_mte(rslt, init_dict, data_frame, quantiles)
mte = [i / 4 for i in mte]
mte_up, mte_d = calculate_cof_int(rslt, init_dict, data_frame, mte, quantiles)
# Plot both curves
# Plot both curveslscpu | grep MHz

ax = plt.figure().add_subplot(111)

ax.set_ylabel(r"$B^{MTE}$")
Expand Down Expand Up @@ -70,8 +71,6 @@ def calculate_cof_int(rslt, init_dict, data_frame, mte, quantiles):
# Create auxiliary parameters
part1 = np.dot(x, np.dot(param_cov, x))
part2 = np.dot(dist_gradients, np.dot(dist_cov, dist_gradients))
print(part1)
print(part2)
# Prepare two lists for storing the values
mte_up = []
mte_d = []
Expand All @@ -88,12 +87,10 @@ def calculate_cof_int(rslt, init_dict, data_frame, mte, quantiles):

if __name__ == '__main__':

filename = 'fig-marginal-benefit-parametric-replication.png'
init_dict = read('replication.grmpy.ini')
# Estimate the coefficients
rslt = estimate('replication.grmpy.ini')
print(rslt['AUX']['x_internal'])

# Calculate and plot the marginal treatment effect
data = pd.read_pickle('aer-replication-true.pkl')
data = pd.read_pickle('aer-replication-mock.pkl')
mte = plot_est_mte(rslt, init_dict, data)
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/figures/fig-marginal-benefit-parametric-replication.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed docs/figures/fig_2sls_average_effect_estimation.png
Binary file not shown.
Binary file not shown.
Binary file removed docs/figures/fig_ols_average_effect_estimation.png
Binary file not shown.
Binary file not shown.
4 changes: 2 additions & 2 deletions docs/reliability.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ For illustrating the reliability we estimate the ATE during each step with two d
The first estimation uses a simple OLS approach.


.. figure:: ../docs/figures/fig_ols_average_effect_estimation.png
.. figure:: ../docs/figures/fig-ols-average-effect-estimation.png
:align: center


As can be seen from the figure, the OLS estimator underestimates the effect significantly. The stronger the correlation between the unobservable variables the more or less stronger the downwards bias.

.. figure:: ../docs/figures/fig_grmpy_average_effect_estimation.png
.. figure:: ../docs/figures/fig-grmpy-average-effect-estimation.png
:align: center


Expand Down
8 changes: 8 additions & 0 deletions grmpy/check/check.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""This module provides some capabilities to check the integrity of the package."""
import os

import numpy as np

from grmpy.check.auxiliary import check_special_conf
from grmpy.check.custom_exceptions import UserError
from grmpy.check.auxiliary import is_pos_def
Expand Down Expand Up @@ -69,3 +71,9 @@ def check_init_file(dict_):
if len(set(dict_[key_]['order'])) != len(dict_[key_]['order']):
msg = 'There are two start coefficients {} Section'.format(key_)
raise UserError(msg)


def check_start_values(x0):
if False in np.isfinite(x0):
msg = '---'
raise UserError(msg)
15 changes: 9 additions & 6 deletions grmpy/estimate/estimate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
from scipy.optimize import minimize
import numpy as np

from grmpy.estimate.estimate_auxiliary import backward_transformation
from grmpy.estimate.estimate_auxiliary import minimizing_interface
from grmpy.estimate.estimate_auxiliary import calculate_criteria
from grmpy.estimate.estimate_auxiliary import optimizer_options
from grmpy.check.check import check_presence_estimation_dataset
from grmpy.estimate.estimate_output import write_comparison
from grmpy.estimate.estimate_auxiliary import adjust_output
from grmpy.estimate.estimate_auxiliary import backward_transformation
from grmpy.estimate.estimate_auxiliary import start_values
from grmpy.estimate.estimate_output import print_logfile
from grmpy.estimate.estimate_auxiliary import bfgs_dict
Expand Down Expand Up @@ -44,16 +45,18 @@ def estimate(init_file):

# define starting values
x0 = start_values(dict_, data, option)
print(x0)
opts, method = optimizer_options(dict_)
dict_['AUX']['criteria'] = calculate_criteria(dict_, data, x0)
dict_['AUX']['starting_values'] = backward_transformation(x0)
print(x0)
rslt_dict = bfgs_dict()
opt_rslt = minimize(
minimizing_interface, x0, args=(dict_, data, rslt_dict), method=method, options=opts)
rslt = adjust_output(opt_rslt, dict_, opt_rslt['x'],data, rslt_dict)
if opts['maxiter'] == 0:
rslt = adjust_output(None, dict_, x0, data, rslt_dict)
else:
opt_rslt = minimize(
minimizing_interface, x0, args=(dict_, data, rslt_dict), method=method, options=opts)
rslt = adjust_output(opt_rslt, dict_, opt_rslt['x'], data, rslt_dict)
# Print Output files
print_logfile(dict_, rslt)
write_comparison(dict_, data, rslt)

return rslt

0 comments on commit f124a2b

Please sign in to comment.