You can find this code on my GitHub account: https://github.com/MaximilianJHuber/NYU
1.
==
a.
--

In [1]:
using Optim
using DataFrames
using ForwardDiff
using Roots
using Distributions

In [2]:
data = convert(Matrix,readtable("hsdata.csv"));
dataexp = exp(data);

In [3]:
#operates on single observations!
function g(w,z,theta) 
    return (theta[1] * w[1]^(-theta[2]) * w[2] - 1) .* collect(z[:])
end

#operates on data set!
function gn(w,z,theta)
    i = 1
    result = g(w[i,:],z[i,:],theta)
    
    for i in 2:size(w)[1]
        result .+= g(w[i,:],z[i,:],theta)
    end
    return result/size(w)[1]
end

#Wrapper creates a closure around the data set provided
function Qn_wrapper(w,z)
    return theta -> 1/2*(gn(w,z,theta)'*gn(w,z,theta))[1,1]
end

Qn_wrapper (generic function with 1 method)

In [4]:
w = dataexp[2:end,[2,4]]
z = hcat(ones(size(dataexp)[1]-1), dataexp[1:end-1,4], 1 .+ data[1:end-1,2])

Qn = Qn_wrapper(w,z)
optres = optimize(Qn, [1., 4.], BFGS(), Optim.OptimizationOptions(g_tol=1e-12, autodiff=true))

Results of Optimization Algorithm
 * Algorithm: BFGS
 * Starting Point: [1.0,4.0]
 * Minimizer: [0.9949535485769525,4.439870328581574]
 * Minimum: 1.811863e-08
 * Iterations: 10
 * Convergence: true
   * |x - x'| < 1.0e-32: false
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
   * |g(x)| < 1.0e-12: true
   * Reached Maximum Number of Iterations: false
 * Objective Function Calls: 53
 * Gradient Calls: 53

So $\theta_A = (0.995,4.4399)$.

b.
--
I define:
$$\hat{W}_A=\left(\frac{1}{n} \sum_{t=1}^n{g(w_t,\theta_A)g(w_t,\theta_A)'}\right)^{-1}$$
as in equation (3.5).

In [5]:
thetaA = optres.minimum

function What(w,z,theta) 
    result = zeros(Float64, size(z)[2], size(z)[2])
    for i in 1:size(w)[1]
        result .+= g(w[i,:],z[i,:],theta)*g(w[i,:],z[i,:],theta)'
    end
    return inv(result/size(z)[1])
end

WhatA = What(w,z,thetaA) 

3×3 Array{Float64,2}:
  54729.1    3945.03  -57729.5
   3945.03   2427.59   -6456.5
 -57729.5   -6456.5    63401.5

In [6]:
function Qn_wrapper(w,z,What)
    return theta -> 1/2*(gn(w,z,theta)'*What*gn(w,z,theta))[1,1]
end

QnWhatA = Qn_wrapper(w,z,WhatA)
optresB = optimize(QnWhatA, thetaA, BFGS(), Optim.OptimizationOptions(g_tol=1e-12))

Results of Optimization Algorithm
 * Algorithm: BFGS
 * Starting Point: [0.9949535485769525,4.439870328581574]
 * Minimizer: [0.97852918332263,3.386000838445892]
 * Minimum: 3.513570e-04
 * Iterations: 8
 * Convergence: true
   * |x - x'| < 1.0e-32: false
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: true
   * |g(x)| < 1.0e-12: true
   * Reached Maximum Number of Iterations: false
 * Objective Function Calls: 37
 * Gradient Calls: 37

So $\theta_B = (0.9785,3.3860)$.

c.
--

I need an estimate for the asymptotic variance $\left(G'S^{-1}G\right)^{-1}$:

$$\hat{G} = \frac{1}{n} \sum_{t=1}^n{\frac{\partial g(w_t,\theta_B)}{\partial \theta}} = \frac{\partial \frac{1}{n} \sum_{t=1}^n{g(w_t,\theta_B)}}{\partial \theta} = \frac{\partial gn(\theta_B)}{\partial \theta}$$ and 
$$\hat{S} = \hat{W}_B^{-1}$$

I use automatic differentiation (rather than numeric differentiation) for all calculations (see, http://www.juliadiff.org/).

In [7]:
thetaB = optresB.minimum

#creats a closure around the data set
function gn_wrapper(w,z)
    return theta -> gn(w,z,theta)
end

dgn(theta) = ForwardDiff.jacobian(gn_wrapper(w,z),theta)

GhatB = convert(Array{Float64,2},dgn(thetaB))

3×2 Array{Float64,2}:
 1.02203  -0.0165615
 1.10959  -0.0174242
 1.03983  -0.0166105

In [8]:
aVar = inv(GhatB'*What(w,z,thetaB)*GhatB)

2×2 Array{Float64,2}:
 0.10586    5.40968
 5.40968  321.828  

The confidence interval for $\delta$ is:

In [9]:
thetaB[1] .+ sqrt(aVar[1,1]/size(z)[1]) * [-1.96, 1.96]

2-element Array{Float64,1}:
 0.938116
 1.01894 

The confidence interval for $\gamma$ is:

In [10]:
thetaB[2] .+ sqrt(aVar[2,2]/size(z)[1]) * [-1.96, 1.96]

2-element Array{Float64,1}:
 1.15773
 5.61427

d.
--
Since I do minimization of a positive function, the Hansen-Sargan statistic is:
$$J=2Qn(\theta_B)$$

In [11]:
2*size(z)[1]*QnWhatA(thetaB)

0.17497579531348584

In [12]:
1-cdf(Chisq(1),2*size(z)[1]*QnWhatA(thetaB))

0.6757269985575208

The p-value is not significant. I accept the $H_0$, that the $\mathbb{E}\left[ g(w_t;\theta_0 \right]=0$.
e.
--

In [13]:
w = dataexp[3:end,[2,4]]
z = hcat(ones(size(dataexp)[1]-2), dataexp[2:end-1,4], 1 .+ data[2:end-1,3], 1 .+ data[2:end-1,2],
        dataexp[1:end-2,4], 1 .+ data[1:end-2,3], 1 .+ data[1:end-2,2])

Qn = Qn_wrapper(w,z)
optres = optimize(Qn, [1., 4.], BFGS(), Optim.OptimizationOptions(g_tol=1e-12, autodiff=true))

Results of Optimization Algorithm
 * Algorithm: BFGS
 * Starting Point: [1.0,4.0]
 * Minimizer: [0.9970035183861444,4.576082645831093]
 * Minimum: 1.614946e-07
 * Iterations: 10
 * Convergence: true
   * |x - x'| < 1.0e-32: false
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: true
   * |g(x)| < 1.0e-12: true
   * Reached Maximum Number of Iterations: false
 * Objective Function Calls: 54
 * Gradient Calls: 54

In [14]:
thetaA = optres.minimum
WhatA = What(w,z,thetaA) 

7×7 Array{Float64,2}:
      2.10229e5  -49990.2        …   35729.9  -45719.0      -1.82658e5
 -49990.2             1.93473e5     -11887.1   22359.0       2.34519e5
  66704.9            -2.27365e5      17095.0  -29530.4      -2.82283e5
 -29033.0            -1.75958e5       8384.1  -16963.8      -1.91669e5
  35729.9        -11887.1            27165.7  -32152.7  -45008.2      
 -45719.0         22359.0        …  -32152.7   41287.6   60775.8      
     -1.82658e5       2.34519e5     -45008.2   60775.8       3.88417e5

In [15]:
QnWhatA = Qn_wrapper(w,z,WhatA)
optresB = optimize(QnWhatA, thetaA, BFGS(), Optim.OptimizationOptions(g_tol=1e-12))

Results of Optimization Algorithm
 * Algorithm: BFGS
 * Starting Point: [0.9970035183861444,4.576082645831093]
 * Minimizer: [0.97962853228036,3.467092989529142]
 * Minimum: 1.378672e-03
 * Iterations: 6
 * Convergence: true
   * |x - x'| < 1.0e-32: false
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: true
   * |g(x)| < 1.0e-12: false
   * Reached Maximum Number of Iterations: false
 * Objective Function Calls: 28
 * Gradient Calls: 28

The confidence interval for $\delta$ is:

In [16]:
thetaB = optresB.minimum
dgn(theta) = ForwardDiff.jacobian(gn_wrapper(w,z),theta)
GhatB = convert(Array{Float64,2},dgn(thetaB))
Shat = inv(What(w,z,thetaB))
aVar = inv(GhatB'*inv(Shat)*GhatB)
thetaB[1] .+ sqrt(aVar[1,1]/size(z)[1]) * [-1.96, 1.96]



2-element Array{Float64,1}:
 0.943643
 1.01561 

The confidence interval for $\gamma$ is:

In [17]:
thetaB[2] .+ sqrt(aVar[2,2]/size(z)[1]) * [-1.96, 1.96]

2-element Array{Float64,1}:
 1.47358
 5.46061

The Hansen-Sargan test again accepts the $H_0$.

In [18]:
2*size(z)[1]*QnWhatA(thetaB)

0.683821428196953

In [19]:
1-cdf(Chisq(1),2*size(z)[1]*QnWhatA(thetaB))

0.4082739268399961

2.
==
a.
--
With $\Gamma=\left(\begin{array}{cc}
1 & 0\\
-\gamma & 1
\end{array}\right)^{-1}
$,$\Sigma=\left(\begin{array}{cc}
\sigma_{c}^{2} & \rho\sigma_{c}\sigma_{r}\\
\rho\sigma_{c}\sigma_{r} & \sigma_{r}^{2}
\end{array}\right)$ and $\sigma^{2}(\gamma)=\left(\begin{array}{cc}
-\gamma & 1\end{array}\right)\Sigma\left(\begin{array}{c}
-\gamma\\
1
\end{array}\right)$ I find:

$$x_{t+1}=\left(\begin{array}{c}
c_{t+1}\\
r_{t+1}
\end{array}\right)-\Gamma\left(\begin{array}{cc}
\alpha & \beta\\
0 & 0
\end{array}\right)\left(\begin{array}{c}
c_{t}\\
r_{t}
\end{array}\right)-\Gamma\left(\begin{array}{c}
\mu_{c}\\
\lg(\delta)+\frac{1}{2}\sigma^{2}(\gamma)
\end{array}\right)\sim\mathbb{\mathcal{N}}\left(\left(\begin{array}{c}
0\\
0
\end{array}\right),\Gamma\Sigma\Gamma'\right)$$


I maximize: $$Q_{n}\left(\theta\right)=\frac{1}{n}\sum_{t=1}^{n-1}\lg\: f\left(x_{t+1}\mid x_{t};\theta\right)$$

In [20]:
w = data[1:end,[2,4]]

#theta = (δ,γ,α,β,μc,σc,σr,ρ)

#for a single observation:
@inline function m(w,wold,theta)
    (δ,γ,α,β,μc,σc,σr,ρ) = theta
    Σ = [σc^2 ρ*σc*σr; ρ*σc*σr σr^2]
    Γ = inv([1 0; -γ 1])
    
    x = w[:] - (Γ*[α β; 0 0]*wold[:]) - Γ*[μc; log(δ) + 1/2*[-γ 1]*Σ*[-γ; 1]]
    return log(pdf(MvNormal([0, 0], Γ*Σ*Γ'),x))
end

function Qn_wrapper(w)
    return function(theta)
        i = 2
        result = m(w[i,:],w[i-1,:],theta)
        
        for i in 3:size(w)[1]
            result += m(w[i,:],w[i-1,:],theta)
        end
        
        return -result/size(w)[1] #I am going to minimize this function!
    
        end
end

Qn_wrapper (generic function with 3 methods)

In [21]:
Qn = Qn_wrapper(w)

optres = optimize(Qn, [1,4,-0.2,0.01,0.02,0.03,0.1,-0.3], 
    NelderMead(), Optim.OptimizationOptions(iterations=5000, autodiff=true))

Results of Optimization Algorithm
 * Algorithm: Nelder-Mead
 * Starting Point: [1.0,4.0,-0.2,0.01,0.02,0.03,0.1,-0.3]
 * Minimizer: [0.9937595447539255,3.51804969551279, ...]
 * Minimum: -2.700392e+00
 * Iterations: 436
 * Convergence: true
   *  √(Σ(yᵢ-ȳ)²)/n < 1.0e-08: true
   * Reached Maximum Number of Iterations: false
 * Objective Function Calls: 534


In [22]:
thetahat = optres.minimum

8-element Array{Float64,1}:
  0.99376    
  3.51805    
 -0.2064     
  0.000748018
  0.0211237  
  0.0329381  
  0.122884   
 -0.275374   

Since I am worried that the model is misspecified I calculate the sandwich estimator for the asymptotic variance $\hat{H}^{-1} \hat{\Sigma} \hat{H}^{-1}$ where:

$$\hat{H}=\frac{1}{n} \sum_{t=1}^{n-1} {H(w_t; \hat{\theta}}) = \frac{\partial^2 Q_n(\hat{\theta})}{\partial \theta \partial \theta'}$$

$$\hat{\Sigma} = \frac{1}{n} \sum_{t=1}^{n-1} {\frac{\partial \lg\: f\left(x_{t+1}\mid x_{t};\hat{\theta}\right) }{\partial \theta} \, \frac{\partial \lg\: f\left(x_{t+1}\mid x_{t};\hat{\theta}\right) }{\partial \theta}' }$$

In [23]:
Hhat = ForwardDiff.hessian(Qn,thetahat)

8×8 Array{Float64,2}:
 72.2691    1.61677     1.28554    …    37.8252     11.1138    -1.02308  
  1.61677   0.116937    0.0119883        3.26121    -0.398778   0.265963 
  1.28554   0.0119883   1.41312          0.675522    0.196542  -0.0187497
  5.32971   0.0810943   4.0242           3.99825     0.490494  -0.222594 
 73.7831    1.65063    17.3078          38.5517     11.3485    -1.0454   
 37.8252    3.26121     0.675522   …  1931.01      -14.3743     8.47311  
 11.1138   -0.398778    0.196542       -14.3743    139.036      2.25767  
 -1.02308   0.265963   -0.0187497        8.47311     2.25767    1.26894  

In [24]:
function Sigmahat(w,theta) 
    i = 2
    result = ForwardDiff.gradient(theta -> m(w[i,:],w[i-1,:],theta), thetahat)*
            ForwardDiff.gradient(theta -> m(w[i,:],w[i-1,:],theta), thetahat)'
    
    for i in 3:size(w)[1]
        result .+= ForwardDiff.gradient(theta -> m(w[i,:],w[i-1,:],theta), thetahat)*
            ForwardDiff.gradient(theta -> m(w[i,:],w[i-1,:],theta), thetahat)'
    end
    return result/size(w)[1]
end

Sigmahat (generic function with 1 method)

In [25]:
Shat = Sigmahat(w,thetahat);

In [26]:
aVar = inv(Hhat)*Shat*inv(Hhat)

8×8 Array{Float64,2}:
   0.314259     -10.7652      -0.286694    …  -0.0931831      2.63825   
 -10.7652       385.981        9.6238          3.39306      -94.4096    
  -0.286694       9.6238       1.62018         0.079392      -2.409     
   0.0681545     -2.3405      -0.259152       -0.0197188      0.560679  
  -0.0024981      0.0589833   -0.00900207      0.000157437   -0.0129214 
  -0.000447576    0.00640219  -0.00288125  …   0.000377299   -0.00610752
  -0.0931831      3.39306      0.079392        0.038193      -0.855773  
   2.63825      -94.4096      -2.409          -0.855773      24.0423    

Now the estimated confidence intervals for $\delta$ and $\gamma$:

In [29]:
thetahat[1] .+ sqrt(aVar[1,1]/size(z)[1]) * [-1.96, 1.96]

2-element Array{Float64,1}:
 0.923989
 1.06353 

In [30]:
thetahat[2] .+ sqrt(aVar[2,2]/size(z)[1]) * [-1.96, 1.96]

2-element Array{Float64,1}:
 1.07286
 5.96324

c.
--
The estimates are close to the ones in part 1. But I believe the normality assumption is particularily unwarranted and the linerization of the model leads to certainty equivalence. Both increase confidence intervals, especially those for the subjective discount rate.