Skip to content

Commit

Permalink
Added hessian matrix approximation tothe estimation process / Added s…
Browse files Browse the repository at this point in the history
…es to estimation output file / changed replication framework so that it provides the confidence intervals of the mte
  • Loading branch information
SeBecker committed Aug 10, 2018
1 parent 28b0664 commit 259d54b
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 21 deletions.
2 changes: 1 addition & 1 deletion development/tests/robustness/mte_original.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[0.2880329, 0.2048867, 0.1886118, 0.1782859, 0.1705181, 0.1641996, 0.1588216, 0.1541061, 0.1498839, 0.146044, 0.1425094, 0.1392244, 0.1361472, 0.1332459, 0.1304953, 0.1278752, 0.1253691, 0.1229635, 0.1206471, 0.1184101, 0.1162444, 0.1141428, 0.1120993, 0.1101084, 0.1081655, 0.1062661, 0.1044067, 0.1025839, 0.1007945, 0.0990358, 0.0973054, 0.0956009, 0.0939201, 0.0922613, 0.0906224, 0.0890019, 0.0873982, 0.0858098, 0.0842353, 0.0826733, 0.0811227, 0.0795823, 0.0780508, 0.0765272, 0.0750105, 0.0734995, 0.0719934, 0.070491, 0.0689915, 0.0674939, 0.0659972, 0.0645005, 0.0630029, 0.0615034, 0.060001, 0.0584949, 0.0569839, 0.0554672, 0.0539436, 0.0524121, 0.0508717, 0.0493211, 0.0477591, 0.0461846, 0.0445962, 0.0429925, 0.041372, 0.0397332, 0.0380743, 0.0363936, 0.034689, 0.0329586, 0.0311999, 0.0294106, 0.0275877, 0.0257283, 0.023829, 0.021886, 0.0198951, 0.0178516, 0.0157501, 0.0135843, 0.0113473, 0.0090309, 0.0066253, 0.0041192, 0.0014991, -0.0012515, -0.0041528, -0.00723, -0.010515, -0.0140496, -0.0178895, -0.0221117, -0.0268271, -0.0322052, -0.0385236, -0.0462915, -0.0566174, -0.0728923, -0.156036]
[[0.1333301, 0.1038857, 0.0977615, 0.0937644, 0.0906855, 0.0881264, 0.0859035, 0.0839161, 0.0821023, 0.0804216, 0.0788455, 0.0773535, 0.0759299, 0.0745627, 0.0732424, 0.0719611, 0.0707125, 0.0694913, 0.0682927, 0.067113, 0.0659487, 0.0647967, 0.0636544, 0.0625193, 0.0613892, 0.0602621, 0.0591361, 0.0580095, 0.0568806, 0.055748, 0.0546101, 0.0534656, 0.0523132, 0.0511516, 0.0499796, 0.0487959, 0.0475995, 0.0463892, 0.0451639, 0.0439227, 0.0426643, 0.0413879, 0.0400923, 0.0387767, 0.03744, 0.0360814, 0.0346997, 0.0332942, 0.0318638, 0.0304076, 0.0289248, 0.0274143, 0.0258752, 0.0243065, 0.0227074, 0.0210767, 0.0194135, 0.0177166, 0.0159851, 0.0142177, 0.0124132, 0.0105704, 0.0086878, 0.006764, 0.0047975, 0.0027865, 0.0007291, -0.0013765, -0.0035326, -0.0057417, -0.0080062, -0.0103292, -0.0127139, -0.0151638, -0.0176829, -0.0202757, -0.0229473, -0.0257031, -0.0285498, -0.0314945, -0.0345456, -0.0377127, -0.041007, -0.0444414, -0.0480313, -0.0517949, -0.0557538, -0.0599347, -0.0643701, -0.0691008, -0.0741789, -0.0796721, -0.0856711, -0.0923016, -0.0997451, -0.1082783, -0.1183562, -0.1308129, -0.1474677, -0.1738934, -0.3107372], [0.2880329, 0.2048867, 0.1886118, 0.1782859, 0.1705181, 0.1641996, 0.1588216, 0.1541061, 0.1498839, 0.146044, 0.1425094, 0.1392244, 0.1361472, 0.1332459, 0.1304953, 0.1278752, 0.1253691, 0.1229635, 0.1206471, 0.1184101, 0.1162444, 0.1141428, 0.1120993, 0.1101084, 0.1081655, 0.1062661, 0.1044067, 0.1025839, 0.1007945, 0.0990358, 0.0973054, 0.0956009, 0.0939201, 0.0922613, 0.0906224, 0.0890019, 0.0873982, 0.0858098, 0.0842353, 0.0826733, 0.0811227, 0.0795823, 0.0780508, 0.0765272, 0.0750105, 0.0734995, 0.0719934, 0.070491, 0.0689915, 0.0674939, 0.0659972, 0.0645005, 0.0630029, 0.0615034, 0.060001, 0.0584949, 0.0569839, 0.0554672, 0.0539436, 0.0524121, 0.0508717, 0.0493211, 0.0477591, 0.0461846, 0.0445962, 0.0429925, 0.041372, 0.0397332, 0.0380743, 0.0363936, 0.034689, 0.0329586, 0.0311999, 0.0294106, 0.0275877, 0.0257283, 0.023829, 0.021886, 0.0198951, 0.0178516, 0.0157501, 0.0135843, 0.0113473, 0.0090309, 0.0066253, 0.0041192, 0.0014991, -0.0012515, -0.0041528, -0.00723, -0.010515, -0.0140496, -0.0178895, -0.0221117, -0.0268271, -0.0322052, -0.0385236, -0.0462915, -0.0566174, -0.0728923, -0.156036], [0.4427358, 0.3058877, 0.279462, 0.2628073, 0.2503507, 0.2402727, 0.2317396, 0.224296, 0.2176655, 0.2116665, 0.2061733, 0.2010953, 0.1963645, 0.1919291, 0.1877482, 0.1837893, 0.1800257, 0.1764358, 0.1730014, 0.1697071, 0.16654, 0.1634889, 0.1605442, 0.1576976, 0.1549417, 0.1522702, 0.1496774, 0.1471582, 0.1447083, 0.1423237, 0.1400007, 0.1377361, 0.1355271, 0.1333709, 0.1312653, 0.129208, 0.1271969, 0.1252304, 0.1233066, 0.121424, 0.1195812, 0.1177767, 0.1160093, 0.1142778, 0.112581, 0.1109177, 0.109287, 0.1076879, 0.1061193, 0.1045802, 0.1030697, 0.1015868, 0.1001306, 0.0987002, 0.0972947, 0.0959131, 0.0945544, 0.0932177, 0.0919021, 0.0906066, 0.0893301, 0.0880718, 0.0868305, 0.0856052, 0.0843949, 0.0831985, 0.0820149, 0.0808428, 0.0796812, 0.0785288, 0.0773843, 0.0762464, 0.0751138, 0.0739849, 0.0728583, 0.0717323, 0.0706052, 0.0694751, 0.06834, 0.0671977, 0.0660457, 0.0648814, 0.0637017, 0.0625032, 0.0612819, 0.0600333, 0.058752, 0.0574317, 0.0560645, 0.0546409, 0.0531489, 0.0515728, 0.0498921, 0.0480783, 0.0460909, 0.043868, 0.041309, 0.0382299, 0.0342329, 0.0281087, -0.0013348]]
4 changes: 2 additions & 2 deletions development/tests/robustness/replication.grmpy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
start auto
agents 1000
optimizer SCIPY-BFGS
maxiter 40000
maxiter 80000
dependent wage
indicator state

Expand Down Expand Up @@ -118,7 +118,7 @@
SCIPY-BFGS

gtol 0.00001
eps 1.4901161193847656e-07
eps 1.4901161193847656e-09

SCIPY-POWELL

Expand Down
68 changes: 54 additions & 14 deletions development/tests/robustness/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@
Additionally it returns a figure of the Marginal treatment effect based on the estimation results.
"""
import matplotlib.pyplot as plt
from os.path import join
from shutil import move
from scipy.stats import norm
import pandas as pd
import numpy as np
import json
import os

from grmpy.estimate.estimate_auxiliary import calculate_mte
from grmpy.estimate.estimate import estimate
Expand All @@ -19,39 +17,81 @@ def plot_est_mte(rslt, data_frame):

# Define the Quantiles and read in the original results
quantiles = [0.0001] + np.arange(0.01, 1., 0.01).tolist() + [0.9999]
mte_original = json.load(open('mte_original.json', 'r'))
mte_ = json.load(open('mte_original.json', 'r'))
mte_original = mte_[1]
mte_original_d = mte_[0]
mte_original_u = mte_[2]

# Calculate the MTE
# Calculate the MTE and confidence intervals
mte = calculate_mte(rslt, data_frame, quantiles)
mte = [i / 4 for i in mte]

mte_up, mte_d = calculate_cof_int(rslt, data_frame, mte, quantiles)
# Plot both curves
ax = plt.figure().add_subplot(111)

ax.set_ylabel(r"$B^{MTE}$")
ax.set_xlabel("$u_S$")
ax.plot(quantiles, mte, label='grmpy MTE')
ax.plot(quantiles, mte_original, label='original MTE')
ax.plot(quantiles, mte, label='grmpy MTE', color='blue')
ax.plot(quantiles, mte_up, color='blue', linestyle=':')
ax.plot(quantiles, mte_d, color='blue', linestyle=':')
ax.plot(quantiles, mte_original, label='original MTE', color='red')
ax.plot(quantiles, mte_original_d, color='red', linestyle=':')
ax.plot(quantiles, mte_original_u, color='red', linestyle=':')

ax.set_ylim([-0.4, 0.5])

plt.legend()

plt.tight_layout()
plt.savefig('fig-marginal-benefit-parametric-replication.png')
return mte


def calculate_cof_int(rslt, data_frame, mte, quantiles):
"""This function calculates the confidence interval of the marginal treatment effect."""

# Import parameters and inverse hessian matrix
hess_inv = rslt['AUX']['hess_inv'] / data_frame.shape[0]
params = rslt['AUX']['x_internal']

# Distribute parameters
dist_cov = hess_inv[-4:, -4:]
param_cov = hess_inv[:46, :46]
dist_gradients = np.array([params[-4], params[-6], params[-2], params[-3]])

# Process data
covariates = [rslt['varnames'][j - 1] for j in rslt['TREATED']['order']]
x = np.mean(data_frame[covariates]).tolist()
x_neg = [-i for i in x]
x += x_neg
x = np.array(x)

# Create auxiliary parameters
part1 = np.dot(x, np.dot(param_cov, x))
part2 = np.dot(dist_gradients, np.dot(dist_cov, dist_gradients))

# Prepare two lists for storing the values
mte_up = []
mte_d = []

# Combine all auxiliary parameters and calculate the confidence intervals
for counter, i in enumerate(quantiles):
value = part2 * (norm.ppf(i)) ** 2
aux = np.sqrt(part1 + value) / 4
mte_up += [mte[counter] + norm.ppf(0.95) * aux]
mte_d += [mte[counter] - norm.ppf(0.95) * aux]
print(mte_up, mte_d)

return mte_up, mte_d


if __name__ == '__main__':

directory = os.path.dirname(os.path.realpath(__file__))
target = os.path.split(os.path.split(os.path.split(directory)[0])[0])[0] + '/docs/figures'
filename = 'fig-marginal-benefit-parametric-replication.png'

# Estimate the coefficients
rslt = estimate('replication.grmpy.ini')

# Calculate and plot the marginal treatment effect
data = pd.read_pickle('aer-replication-mock.pkl')
plot_est_mte(rslt, data)

# Move the plot to the documentation directory
move(join(directory, filename), join(target, filename))
mte = plot_est_mte(rslt, data)
Binary file modified docs/figures/fig-marginal-benefit-parametric-replication.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion grmpy/estimate/estimate.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,9 @@ def estimate(init_file):
opt_rslt = minimize(
minimizing_interface, x0, args=(dict_, data, rslt_dict), method=method, options=opts)
rslt = adjust_output(opt_rslt, dict_, opt_rslt['x'], rslt_dict)

# Print Output files
print_logfile(dict_, rslt)
print_logfile(dict_, rslt, data)
write_comparison(dict_, data, rslt)

return rslt
86 changes: 83 additions & 3 deletions grmpy/estimate/estimate_auxiliary.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
"""The module provides auxiliary functions for the estimation process"""
from statsmodels.tools.sm_exceptions import PerfectSeparationError
from statsmodels.tools.numdiff import approx_hess3
from scipy.stats import norm
import statsmodels.api as sm
import pandas as pd
import numpy as np
import math

from grmpy.simulate.simulate_auxiliary import construct_covariance_matrix
from grmpy.simulate.simulate_auxiliary import simulate_unobservables
Expand Down Expand Up @@ -184,10 +186,13 @@ def calculate_criteria(init_dict, data_frame, start_values):
return criteria


def print_logfile(init_dict, rslt):
def print_logfile(init_dict, rslt, data):
"""The function writes the log file for the estimation process."""
# Adjust output
init_dict, rslt = adjust_print_output(init_dict, rslt)
rslt = calculate_se(rslt, init_dict, data)
auxiliary_list = process_se_log(rslt, init_dict)
print(auxiliary_list)
with open('est.grmpy.info', 'w') as file_:

for label in ['Optimization Information', 'Criterion Function', 'Economic Parameters']:
Expand Down Expand Up @@ -234,11 +239,11 @@ def print_logfile(init_dict, rslt):

else:
file_.write(fmt.format(*['', 'Identifier', 'Start', 'Finish']) + '\n\n')
fmt = ' {:>10}' * 2 + ' {:>20.4f}' * 2
fmt = ' {:>10}' * 2 + ' {:>20.4f}' * 2 + '{:>10}'
for i in range(len(rslt['AUX']['x_internal'])):
file_.write('{0}\n'.format(
fmt.format('', str(i), init_dict['AUX']['starting_values'][i],
rslt['AUX']['x_internal'][i])))
rslt['AUX']['x_internal'][i], auxiliary_list[i])))


def optimizer_options(init_dict_):
Expand Down Expand Up @@ -604,3 +609,78 @@ def calculate_mte(rslt, data_frame, quant=None):
return value, args
else:
return value


def calculate_se(rslt, init_dict, data_frame):
"""This function calculates the standard errors for given parameterization via an approximation
of the hessian matrix."""

x0 = process_estimation_results(rslt['AUX']['x_internal'].tolist())
hess = approx_hess3(x0, loglikelihood_without_cholsky, args=(init_dict, data_frame),
epsilon=0.000001)
hess_inv = np.linalg.inv(hess)
se = np.sqrt(np.diag(hess_inv) / data_frame.shape[0])
rslt['AUX']['standard_errors'] = se
rslt['AUX']['hess_inv'] = hess_inv
return rslt


def process_estimation_results(x0):
"""This """
pseudo_x0 = x0[:-6]
pseudo_dist = [x0[-1]] + [x0[-6]] + x0[-4:-1]
x0 = pseudo_x0 + pseudo_dist
return x0


def loglikelihood_without_cholsky(x, init_dict, data_frame):
"""The function provides the log-likelihood function for the calculation of the standard errors.
"""
n_treated = init_dict['AUX']['num_covars_treated']
n_untreated = n_treated + int(init_dict['AUX']['num_covars_untreated'])

beta1 = x[:n_treated]
beta0 = x[n_treated:n_untreated]
gamma = x[n_untreated:-5]
sd1, sd0, sdv, rho1v, rho0v = x[-4], x[-2], x[-5], x[-3], x[-1]
likl = []
indicator = init_dict['ESTIMATION']['indicator']
dep = init_dict['ESTIMATION']['dependent']
for i in [0.0, 1.0]:
if i == 1.0:
beta, gamma, rho, sd, sdv = beta1, gamma, rho1v, sd1, sdv
key_ = 'TREATED'
else:
beta, gamma, rho, sd, sdv = beta0, gamma, rho0v, sd0, sdv
key_ = 'UNTREATED'
data = data_frame[data_frame[indicator] == i]
Z = data[[init_dict['varnames'][j - 1] for j in init_dict['CHOICE']['order']]]
X = data[[init_dict['varnames'][j - 1] for j in init_dict[key_]['order']]]

choice_ = pd.DataFrame.sum(gamma * Z, axis=1)
part1 = (data[dep] - pd.DataFrame.sum(beta * X, axis=1)) / sd
part2 = (choice_ - rho * sdv * part1) / (np.sqrt((1 - rho ** 2) * sdv ** 2))
dist_1, dist_2 = norm.pdf(part1), norm.cdf(part2)

if i == 1.0:
contrib = (1.0 / sd) * dist_1 * dist_2

else:
contrib = (1.0 / sd) * dist_1 * (1.0 - dist_2)

likl.append(contrib)
likl = np.append(likl[0], likl[1])
likl = - np.mean(np.log(np.clip(likl, 1e-20, np.inf)))

return likl


def process_se_log(rslt, init_dict):
"""This function processes the standard error values for the log file."""
se = ['------' if math.isnan(i) else str(i) for i in np.round(rslt['AUX']['standard_errors'], 4)
]
aux = [se[-4], '------', se[-3], se[-2], se[-1], se[-5]]
list_ = se[:-5]
list_ += aux
list_ = ['({})'.format(i) if len(i) == 6 else '({}0)'.format(i) for i in list_]
return list_

0 comments on commit 259d54b

Please sign in to comment.