Fixed some estimation process issues

OpenSourceEconomics · Aug 20, 2018 · f124a2b · f124a2b
1 parent 7bacde0
commit f124a2b
Show file tree

Hide file tree

Showing 24 changed files with 275 additions and 266 deletions.
diff --git a/development/tests/reliability/aer-simulation-mock.pkl b/development/tests/reliability/aer-simulation-mock.pkl
diff --git a/development/tests/reliability/reliability.grmpy.ini b/development/tests/reliability/reliability.grmpy.ini
@@ -10,99 +10,78 @@
         start                                     auto
         agents                                    1000
         optimizer                           SCIPY-BFGS
-        maxiter                                 100000
+        maxiter                                  20000
         dependent                                 wage
         indicator                                state
 
    TREATED
 
-        coeff               const             168.0750         nonbinary
-        coeff                 exp               0.0794         nonbinary
-        coeff               expsq              -0.0035         nonbinary
-        coeff              lwage5               0.8319         nonbinary
-        coeff              lurate               0.0032         nonbinary
-        coeff               cafqt               0.1222         nonbinary
-        coeff             cafqtsq               0.0546         nonbinary
-        coeff                mhgc              -0.0097         nonbinary
-        coeff              mhgcsq               0.0014         nonbinary
-        coeff             numsibs              -0.0102         nonbinary
-        coeff           numsibssq               0.0002         nonbinary
-        coeff             urban14               0.0457         nonbinary
-        coeff        lavlocwage17               0.8999         nonbinary
-        coeff      lavlocwage17sq              -0.0431         nonbinary
-        coeff             avurate               0.1459         nonbinary
-        coeff           avuratesq              -0.0135         nonbinary
-        coeff                 d57               0.0080         nonbinary
-        coeff                 d58               0.1340         nonbinary
-        coeff                 d59              -0.0012         nonbinary
-        coeff                 d60               0.0567         nonbinary
-        coeff                 d61               0.0882         nonbinary
-        coeff                 d62              -0.0004         nonbinary
-        coeff                 d63               0.0489         nonbinary
+        coeff               const             168.0750
+        coeff                 exp               0.0794
+        coeff               expsq              -0.0035
+        coeff              lwage5               0.8319
+        coeff              lurate               0.0032
+        coeff               cafqt               0.1222
+        coeff             cafqtsq               0.0546
+        coeff                mhgc              -0.0097
+        coeff              mhgcsq               0.0014
+        coeff             numsibs              -0.0102
+        coeff           numsibssq               0.0002
+        coeff             urban14               0.0457
+        coeff        lavlocwage17               0.8999
+        coeff      lavlocwage17sq              -0.0431
+        coeff             avurate               0.1459
+        coeff           avuratesq              -0.0135
 
    UNTREATED
 
-        coeff               const             110.0000         nonbinary
-        coeff                 exp               0.0540         nonbinary
-        coeff               expsq               0.0004         nonbinary
-        coeff              lwage5               0.5766         nonbinary
-        coeff              lurate              -0.0037         nonbinary
-        coeff               cafqt               0.0506         nonbinary
-        coeff             cafqtsq              -0.0494         nonbinary
-        coeff                mhgc              -0.0186         nonbinary
-        coeff              mhgcsq               0.0009         nonbinary
-        coeff             numsibs               0.0043         nonbinary
-        coeff           numsibssq              -0.0005         nonbinary
-        coeff             urban14               0.0077         nonbinary
-        coeff        lavlocwage17              12.5816         nonbinary
-        coeff      lavlocwage17sq              -0.6056         nonbinary
-        coeff             avurate               0.0717         nonbinary
-        coeff           avuratesq              -0.0059         nonbinary
-        coeff                 d57              -0.0007         nonbinary
-        coeff                 d58               0.0900         nonbinary
-        coeff                 d59               0.1234         nonbinary
-        coeff                 d60              -0.0234         nonbinary
-        coeff                 d61               0.0225         nonbinary
-        coeff                 d62               0.1004         nonbinary
-        coeff                 d63               0.0389         nonbinary
+        coeff               const             110.0000
+        coeff                 exp               0.0540
+        coeff               expsq               0.0004
+        coeff              lwage5               0.5766
+        coeff              lurate              -0.0037
+        coeff               cafqt               0.0506
+        coeff             cafqtsq              -0.0494
+        coeff                mhgc              -0.0186
+        coeff              mhgcsq               0.0009
+        coeff             numsibs               0.0043
+        coeff           numsibssq              -0.0005
+        coeff             urban14               0.0077
+        coeff        lavlocwage17              12.5816
+        coeff      lavlocwage17sq              -0.6056
+        coeff             avurate               0.0717
+        coeff           avuratesq              -0.0059
 
    CHOICE
 
-        coeff               const             307.0000         nonbinary
-        coeff               cafqt               3.6671         nonbinary
-        coeff             cafqtsq               0.2008         nonbinary
-        coeff                mhgc              -1.8348         nonbinary
-        coeff              mhgcsq               0.0096         nonbinary
-        coeff             numsibs              -4.2234         nonbinary
-        coeff           numsibssq               0.0016         nonbinary
-        coeff             urban14               0.1058         nonbinary
-        coeff        lavlocwage17             -52.9084         nonbinary
-        coeff      lavlocwage17sq               2.5985         nonbinary
-        coeff             avurate               0.2693         nonbinary
-        coeff           avuratesq              -0.0205         nonbinary
-        coeff      lwage5_17numsibs               0.4107         nonbinary
-        coeff       lwage5_17mhgc               0.1846         nonbinary
-        coeff      lwage5_17cafqt              -0.3072         nonbinary
-        coeff           lwage5_17              -4.0536         nonbinary
-        coeff           lurate_17               0.4251         nonbinary
-        coeff      lurate_17numsibs              -0.0026         nonbinary
-        coeff       lurate_17mhgc              -0.0309         nonbinary
-        coeff      lurate_17cafqt              -0.0075         nonbinary
-        coeff              tuit4c              -0.0520         nonbinary
-        coeff       tuit4cnumsibs              -0.0033         nonbinary
-        coeff          tuit4cmhgc               0.0044         nonbinary
-        coeff         tuit4ccafqt               0.0065         nonbinary
-        coeff                pub4               0.7519         nonbinary
-        coeff         pub4numsibs               0.0104         nonbinary
-        coeff            pub4mhgc              -0.0559         nonbinary
-        coeff           pub4cafqt               0.1585         nonbinary
-        coeff                 d57               0.0067         nonbinary
-        coeff                 d58              -0.0990         nonbinary
-        coeff                 d59               0.1234         nonbinary
-        coeff                 d60              -0.0234         nonbinary
-        coeff                 d61               0.0225         nonbinary
-        coeff                 d62               0.2000         nonbinary
-        coeff                 d63               0.0789         nonbinary
+        coeff               const             307.0000
+        coeff               cafqt               3.6671
+        coeff             cafqtsq               0.2008
+        coeff                mhgc              -1.8348
+        coeff              mhgcsq               0.0096
+        coeff             numsibs              -4.2234
+        coeff           numsibssq               0.0016
+        coeff             urban14               0.1058
+        coeff        lavlocwage17             -52.9084
+        coeff      lavlocwage17sq               2.5985
+        coeff             avurate               0.2693
+        coeff           avuratesq              -0.0205
+        coeff      lwage5_17numsibs               0.4107
+        coeff       lwage5_17mhgc               0.1846
+        coeff      lwage5_17cafqt              -0.3072
+        coeff           lwage5_17              -4.0536
+        coeff           lurate_17               0.4251
+        coeff      lurate_17numsibs              -0.0026
+        coeff       lurate_17mhgc              -0.0309
+        coeff      lurate_17cafqt              -0.0075
+        coeff              tuit4c              -0.0520
+        coeff       tuit4cnumsibs              -0.0033
+        coeff          tuit4cmhgc               0.0044
+        coeff         tuit4ccafqt               0.0065
+        coeff                pub4               0.7519
+        coeff         pub4numsibs               0.0104
+        coeff            pub4mhgc              -0.0559
+        coeff           pub4cafqt               0.1585
 
    DIST
 
@@ -115,7 +94,7 @@
 
    SCIPY-BFGS
 
-        gtol                                    0.0001
+        gtol                                     1e-05
         eps                     1.4901161193847656e-08
 
    SCIPY-POWELL

diff --git a/development/tests/reliability/reliability.py b/development/tests/reliability/reliability.py
@@ -3,8 +3,6 @@
 used. Additionally the module creates two different figures for the reliability section of the
 documentation.
 """
-import os
-
 import matplotlib.pyplot as plt
 import statsmodels.api as sm
 import pandas as pd
@@ -18,7 +16,6 @@
 
 def create_data():
     """This function creates the a data set based on the results from Caineiro 2011."""
-
     # Read in initialization file and the data set
     init_dict = read('reliability.grmpy.ini')
     df = pd.read_pickle('aer-simulation-mock.pkl')
@@ -71,11 +68,11 @@ def update_correlation_structure(model_dict, rho):
 
 def get_effect_grmpy(file):
     """This function simply returns the ATE of the data set."""
-    df = pd.read_pickle(file)
     dict_ = read('reliability.grmpy.ini')
+    df = pd.read_pickle('aer-simulation-mock.pkl')
     beta_diff = dict_['TREATED']['all'] - dict_['UNTREATED']['all']
     covars = [dict_['varnames'][j - 1] for j in dict_['TREATED']['order']]
-    ATE = np.mean(np.dot(df[covars], beta_diff))
+    ATE = np.dot(np.mean(df[covars]), beta_diff)
 
     return ATE
 
@@ -87,7 +84,7 @@ def monte_carlo(file, grid_points):
 
     # Define a dictionary with a key for each estimation strategy
     effects = {}
-    for key_ in ['grmpy', 'ols']:
+    for key_ in ['grmpy', 'ols', 'true']:
         effects[key_] = []
 
     # Loop over different correlations between V and U_1
@@ -103,10 +100,15 @@ def monte_carlo(file, grid_points):
         df_mc = create_data()
         endog, exog, exog_ols = df_mc['wage'], df_mc[X], df_mc[['state'] + X]
 
+        # Calculate true average treatment effect
+        ATE = np.mean(df_mc['wage1'] - df_mc['wage0'])
+        effects['true'] += [ATE]
+
         # Estimate  via grmpy
         rslt = estimate('reliability.grmpy.ini')
-        stat = np.dot(np.mean(exog), rslt['TREATED']['all']) - np.dot(np.mean(exog),
-                                                                      rslt['UNTREATED']['all'])
+        beta_diff = rslt['TREATED']['all'] - rslt['UNTREATED']['all']
+        stat = np.dot(np.mean(exog), beta_diff)
+
         effects['grmpy'] += [stat]
 
         # Estimate via OLS
@@ -133,12 +135,12 @@ def create_plots(effects, true):
         ax = plt.figure().add_subplot(111)
 
         grid = np.linspace(0.00, 0.99, len(effects[strategy]))
-        true_ = np.tile(true, len(effects[strategy]))
 
         ax.set_xlim(0, 1)
         ax.set_ylim(0.3, 0.55)
         ax.set_ylabel(r"Effect")
         ax.set_xlabel(r"$\rho_{U_1, V}$")
+        true_ = np.tile(true, len(effects[strategy]))
         ax.plot(grid, effects[strategy], label="Estimate")
 
         ax.plot(grid, true_, label="True")
@@ -151,8 +153,9 @@ def create_plots(effects, true):
 
 
 if __name__ == '__main__':
-    directory = os.path.dirname(os.path.realpath(__file__))
-    target = os.path.split(os.path.split(os.path.split(directory)[0])[0])[0] + '/docs/figures'
-    x = monte_carlo('reliability.grmpy.ini', 10)
-    ATE = get_effect_grmpy('aer-simulation-mock.pkl')
+
+    ATE = get_effect_grmpy('reliability.grmpy.ini')
+
+    x = monte_carlo('reliability.grmpy.ini', 15)
+
     create_plots(x, ATE)
diff --git a/development/tests/robustness/replication.grmpy.ini b/development/tests/robustness/replication.grmpy.ini
@@ -6,7 +6,7 @@
 
    ESTIMATION
 
-        file                     aer-replication-true.pkl
+        file                     aer-replication-mock.pkl
         start                                        auto
         agents                                       1000
         optimizer                              SCIPY-BFGS
@@ -108,10 +108,10 @@
 
    DIST
 
-        coeff                                   1.0000
+        coeff                                   0.1000
         coeff                                   0.0000
         coeff                                   0.0000
-        coeff                                   1.0000
+        coeff                                   0.1000
         coeff                                   0.0000
         coeff                                   1.0000
 

diff --git a/development/tests/robustness/run.py b/development/tests/robustness/run.py
@@ -27,7 +27,8 @@ def plot_est_mte(rslt, init_dict, data_frame):
     mte = calculate_mte(rslt, init_dict, data_frame, quantiles)
     mte = [i / 4 for i in mte]
     mte_up, mte_d = calculate_cof_int(rslt, init_dict, data_frame, mte, quantiles)
-    # Plot both curves
+    # Plot both curveslscpu | grep MHz
+
     ax = plt.figure().add_subplot(111)
 
     ax.set_ylabel(r"$B^{MTE}$")
@@ -70,8 +71,6 @@ def calculate_cof_int(rslt, init_dict, data_frame, mte, quantiles):
     # Create auxiliary parameters
     part1 = np.dot(x, np.dot(param_cov, x))
     part2 = np.dot(dist_gradients, np.dot(dist_cov, dist_gradients))
-    print(part1)
-    print(part2)
     # Prepare two lists for storing the values
     mte_up = []
     mte_d = []
@@ -88,12 +87,10 @@ def calculate_cof_int(rslt, init_dict, data_frame, mte, quantiles):
 
 if __name__ == '__main__':
 
-    filename = 'fig-marginal-benefit-parametric-replication.png'
     init_dict = read('replication.grmpy.ini')
     # Estimate the coefficients
     rslt = estimate('replication.grmpy.ini')
-    print(rslt['AUX']['x_internal'])
 
     # Calculate and plot the marginal treatment effect
-    data = pd.read_pickle('aer-replication-true.pkl')
+    data = pd.read_pickle('aer-replication-mock.pkl')
     mte = plot_est_mte(rslt, init_dict, data)
diff --git a/docs/figures/fig-grmpy-average-effect-estimation.png b/docs/figures/fig-grmpy-average-effect-estimation.png
diff --git a/docs/figures/fig-marginal-benefit-parametric-replication.png b/docs/figures/fig-marginal-benefit-parametric-replication.png
diff --git a/docs/figures/fig-ols-average-effect-estimation.png b/docs/figures/fig-ols-average-effect-estimation.png
diff --git a/docs/figures/fig_2sls_average_effect_estimation.png b/docs/figures/fig_2sls_average_effect_estimation.png
diff --git a/docs/figures/fig_grmpy_average_effect_estimation.png b/docs/figures/fig_grmpy_average_effect_estimation.png
diff --git a/docs/figures/fig_ols_average_effect_estimation.png b/docs/figures/fig_ols_average_effect_estimation.png
diff --git a/docs/figures/fig_random_average_effect_estimation.png b/docs/figures/fig_random_average_effect_estimation.png
diff --git a/docs/reliability.rst b/docs/reliability.rst
@@ -17,13 +17,13 @@ For illustrating the reliability we estimate the ATE during each step with two d
 The first estimation uses a simple OLS approach.
 
 
-.. figure:: ../docs/figures/fig_ols_average_effect_estimation.png
+.. figure:: ../docs/figures/fig-ols-average-effect-estimation.png
     :align: center
 
 
 As can be seen from the figure, the OLS estimator underestimates the effect significantly. The stronger the correlation between the unobservable variables the more or less stronger the downwards bias.
 
-.. figure:: ../docs/figures/fig_grmpy_average_effect_estimation.png
+.. figure:: ../docs/figures/fig-grmpy-average-effect-estimation.png
     :align: center
 
 

diff --git a/grmpy/check/check.py b/grmpy/check/check.py
@@ -1,6 +1,8 @@
 """This module provides some capabilities to check the integrity of the package."""
 import os
 
+import numpy as np
+
 from grmpy.check.auxiliary import check_special_conf
 from grmpy.check.custom_exceptions import UserError
 from grmpy.check.auxiliary import is_pos_def
@@ -69,3 +71,9 @@ def check_init_file(dict_):
         if len(set(dict_[key_]['order'])) != len(dict_[key_]['order']):
             msg = 'There are two start coefficients {} Section'.format(key_)
             raise UserError(msg)
+
+
+def check_start_values(x0):
+    if False in np.isfinite(x0):
+        msg = '---'
+        raise UserError(msg)
diff --git a/grmpy/estimate/estimate.py b/grmpy/estimate/estimate.py
@@ -3,12 +3,13 @@
 from scipy.optimize import minimize
 import numpy as np
 
+from grmpy.estimate.estimate_auxiliary import backward_transformation
 from grmpy.estimate.estimate_auxiliary import minimizing_interface
 from grmpy.estimate.estimate_auxiliary import calculate_criteria
 from grmpy.estimate.estimate_auxiliary import optimizer_options
 from grmpy.check.check import check_presence_estimation_dataset
+from grmpy.estimate.estimate_output import write_comparison
 from grmpy.estimate.estimate_auxiliary import adjust_output
-from grmpy.estimate.estimate_auxiliary import backward_transformation
 from grmpy.estimate.estimate_auxiliary import start_values
 from grmpy.estimate.estimate_output import print_logfile
 from grmpy.estimate.estimate_auxiliary import bfgs_dict
@@ -44,16 +45,18 @@ def estimate(init_file):
 
     # define starting values
     x0 = start_values(dict_, data, option)
-    print(x0)
     opts, method = optimizer_options(dict_)
     dict_['AUX']['criteria'] = calculate_criteria(dict_, data, x0)
     dict_['AUX']['starting_values'] = backward_transformation(x0)
-    print(x0)
     rslt_dict = bfgs_dict()
-    opt_rslt = minimize(
-        minimizing_interface, x0, args=(dict_, data, rslt_dict), method=method, options=opts)
-    rslt = adjust_output(opt_rslt, dict_, opt_rslt['x'],data, rslt_dict)
+    if opts['maxiter'] == 0:
+        rslt = adjust_output(None, dict_, x0, data, rslt_dict)
+    else:
+        opt_rslt = minimize(
+            minimizing_interface, x0, args=(dict_, data, rslt_dict), method=method, options=opts)
+        rslt = adjust_output(opt_rslt, dict_, opt_rslt['x'], data, rslt_dict)
     # Print Output files
     print_logfile(dict_, rslt)
+    write_comparison(dict_, data, rslt)
 
     return rslt