# Empirical Asset Pricing - Midterm Exam

# Task 1

### Loading the Data

In [11]:
using CSV, DataFrames, Plots, Optim, Distributions, LaTeXStrings; plotlyjs();

In [12]:
main_data = CSV.read(pwd() * "\\midterm_EZ_data\\" * "main_data.csv", 
    delim=',', 
    null="NA", 
    rows_for_type_detect=281)

main_data[:CONSEXCL] = main_data[:CONSEXCL] ./ main_data[:POP]
head(main_data)

Unnamed: 0,QTR,CONSEXCL,POP,VWRETD,VWRETX,dp,TB3MS,PCE,CAY,CRSPEX,DEF,TRM,RREL
1,1947:01,7617.51,0.143155,-0.014973,-0.025734,-3.06759,0.0009482,13.03,missing,-0.0160344,0.576667,missing,missing
2,1947:02,7736.24,0.143804,-0.006664,-0.019613,-2.98656,0.0009482,13.135,missing,-0.0076345,0.643333,missing,missing
3,1947:03,7696.37,0.144469,0.018497,0.006159,-2.94234,0.00164458,13.38,missing,0.0166834,0.62,missing,missing
4,1947:04,7577.41,0.145142,0.036496,0.016783,-2.89371,0.00211602,13.713,missing,0.0337298,0.66,missing,0.002825
5,1948:01,7613.39,0.145755,-0.003641,-0.017062,-2.83392,0.00241331,13.865,missing,-0.00606096,0.68,missing,0.00255
6,1948:02,7683.79,0.146339,0.114969,0.100817,-2.91281,0.00248758,14.008,missing,0.106339,0.63,missing,0.0013


In [13]:
industry_returns = CSV.read(pwd() * "\\midterm_EZ_data\\" * "industry_returns.csv", 
    delim=',', 
    null="NA", 
    rows_for_type_detect=281)

head(industry_returns)

Unnamed: 0,RIndGrp1,RIndGrp2,RIndGrp3,RIndGrp4
1,1.0163,0.966228,0.969735,0.964625
2,0.991721,0.94003,0.99539,0.95785
3,1.07433,1.04614,1.00748,1.00475
4,1.06,1.00638,1.00264,1.01957
5,1.03595,1.03309,0.986418,0.98855
6,1.16129,1.08559,1.13698,1.0767


In [14]:
size_bm_returns = CSV.read(pwd() * "\\midterm_EZ_data\\" * "size_bm_returns.csv", 
    delim=',', 
    null="NA", 
    rows_for_type_detect=281)

head(size_bm_returns)

Unnamed: 0,Small_Low,Small_Mid,Small_High,Big_Low,Big_Mid,Big_High
1,0.997179,0.98319,1.01653,1.00416,0.971839,0.98621
2,0.89609,0.931382,0.930233,0.99849,1.00918,0.975781
3,1.03702,1.05072,1.08553,1.00903,1.0131,1.04783
4,0.991005,1.0111,1.03236,1.02122,1.04986,1.07789
5,0.98289,0.988766,1.05495,0.981367,0.995592,1.02259
6,1.07591,1.09933,1.12938,1.10281,1.11475,1.17449


The core function of the GMM estimator is:
$$g(x, z, \theta)$$
with $x[1] = C_t$, $x[2] = C_{t+1}$, $x[3] = R_{w,t+1}$, $y = R_{i, t+1}$ and $z$ are instruments. The parameters $\theta$ are: $\theta[1] = \beta$, $\theta[2] = \psi$, $\theta[3] = \theta$.

In [15]:
#operates on a single observation
function g(x, y, z, θ)
    vec((((θ[1] * (x[2]/x[1])^(-1/θ[2])) ^ θ[3] * (1 / x[3])^(1-θ[3])) .* y .- 1) * z')
end

g (generic function with 1 method)

This function stacks estimated moments and groups them by instrument.

The following function conduct averaging and define the objective function:

In [6]:
#operates on a data set
function gn(X, Y, Z, θ)
    if θ[1] < 0 
        warn("β negative!!!")
    end
    avg_g = zeros(size(Y, 2) * size(Z, 2))
    
    for t in 1:size(X, 1)
        avg_g += g(X[t, :], Y[t, :], Z[t, :], θ)
    end
    
    return avg_g/size(X, 1)
end

#creates a closure around the data set Y, X, Z
function gn_wrapper(X, Y, Z)
    return θ -> gn(X, Y, Z, θ)
end

function Qn(θ, gn_wrapped)
    1/2 * (gn_wrapped(θ)' * gn_wrapped(θ))[1]
end

function Qn(θ, gn_wrapped, W)
    1/2 * (gn_wrapped(θ)' * W * gn_wrapped(θ))[1]
end

Qn (generic function with 2 methods)

The HAC estimator of the variance is (equations 3.45 and 3.46 from Tim Christensen's notes):

In [7]:
#lag is by default +/- 4 quarters
function Shat_HAC(X, Y, Z, θ; Jn = 4)
    N = size(Y, 2) * size(Z, 2) #number of moments
    
    Γ = zeros(N, N)
    Γ!(Γ, X, Y, Z, θ, j = 0)
    
    #0th lag
    Shat = copy(Γ)
    
    #all other lags
    for j in 1:Jn
        fill!(Γ, zero(Float64))
        Γ!(Γ, X, Y, Z, θ; j = j)
        Shat .+= (1 - j / (Jn + 1)) .* (Γ .+ Γ')
    end
    
    return Shat
end

function Γ!(Γ, X, Y, Z, θ; j = 0)
    for t in j+1:size(X, 1)
        Γ .+= g(X[t, :], Y[t, :], Z[t, :], θ) * g(X[t-j, :], Y[t-j, :], Z[t-j, :], θ)'
    end
    
    Γ .= Γ ./ size(X, 1)
end

Γ! (generic function with 1 method)

The efficient GMM procedure:

In [8]:
function effientGMM(X, Y, Z, θmapping, initial, randomizer; 
        verbose = false, tol = 1e-8, max_iter = 100, min_iter = 100, Jn = 4, 
        print_iter = 1, optim_verbose = false, second_only=false, first_S=nothing, algorithm=BFGS())

    gn_wrapped = gn_wrapper(X, Y, Z)
    
    if verbose
        println("First stage")
    end

    #first stage, from initial value provided
    if first_S == nothing
        obj = OnceDifferentiable(θ -> Qn(θmapping(θ), gn_wrapped), initial; autodiff = :forward)
    else
        obj = OnceDifferentiable(θ -> Qn(θmapping(θ), gn_wrapped, first_S^-1), initial; autodiff = :forward)
    end
    
    θhat_first = Optim.minimizer(optimize(obj, initial, algorithm, Optim.Options(show_trace = optim_verbose)))

    #search for alternative maximum in first stage
    iteration = 0
    θhat_first_old = copy(θhat_first)
    while iteration <= min_iter #stop if no new solution found for 100 iterations!
        iteration += 1
        initial_draw = randomizer()
        
        try
            θhat_first = Optim.minimizer(optimize(obj, initial_draw, 
                algorithm, Optim.Options(show_trace = optim_verbose, iterations = 100)))
            
            if obj.f(θhat_first) < obj.f(θhat_first_old)
                #new optimum found
                if maximum(abs.(θhat_first - θhat_first_old)) > 0.0001
                    iteration = 0
                end
                
                if verbose
                    println("Better solution: $θhat_first compared to old $θhat_first_old")
                end
                θhat_first_old = copy(θhat_first)

                elseif verbose & (maximum(abs.(θhat_first - θhat_first_old)) > 0.01)
                #not a new optimum, but different solution!
                println("Other candidate solution: $θhat_first compared to optimum $θhat_first_old")
            end
            
            catch y #there might be an error with weird starting values!
            warn("$y while trying other starting value $initial_draw")
            iteration -= 1
        end
    end
    
    θhat_first = θhat_first_old
    
    #second stage   
    if verbose
        println("Second stage")
    end
    
    S = Shat_HAC(X, Y, Z, θmapping(θhat_first); Jn = 8)
    if verbose 
        println("S1 = $S")
    end
    
    #repeat GMM until convergence
    distance = 1.
    iteration = 0
    θhat_old = copy(θhat_first)
    θhat = copy(θhat_first)
    while (distance > tol) & (iteration <= max_iter)
        iteration += 1
        if cond(S) > 20000
            warn("S has condition number $(cond(S))")
        end
        
        W = inv(S)
        obj = OnceDifferentiable(θ -> Qn(θmapping(θ), gn_wrapped, W), θhat_old; autodiff = :forward)
        θhat = Optim.minimizer(optimize(obj, θhat_old, algorithm, Optim.Options(show_trace = optim_verbose)))
        if !second_only
            S = Shat_HAC(X, Y, Z, θmapping(θhat); Jn = Jn)
        end
        
        distance = maximum(abs.(θhat - θhat_old))
        θhat_old = copy(θhat)
        if verbose & (iteration % print_iter == 0)
            println("Iteration: $iteration | θ = $(θhat_old) | distance = $distance")
        end
    end
    
    if verbose & (iteration % print_iter == 0)
        println("S$iteration = $S")
        println("Second stage yielded: $(θhat_old)")
    end
    
    
    W = inv(S)
    obj = OnceDifferentiable(θ -> Qn(θmapping(θ), gn_wrapped, W), initial; autodiff = :forward)
    
    distance = 1.
    iteration = 0
    while iteration <= min_iter #stop if no new solution found for 100 iterations!
        iteration += 1
        initial_draw = randomizer()
        
        try
            θhat = Optim.minimizer(optimize(obj, initial_draw, algorithm, 
                Optim.Options(show_trace = optim_verbose)))
            
            if obj.f(θhat) < obj.f(θhat_old)
                #new optimum found
                iteration = 0
                if verbose
                    println("Better solution: $θhat compared to old $θhat_old")
                end
                θhat_old = copy(θhat)

                elseif verbose & (maximum(abs.(θhat - θhat_old)) > 0.01)
                #not a new optimum, but different solution!
                println("Other candidate solution: $θhat compared to optimum $θhat_old")
            end
            
            catch y #there might be an error with weird starting values!
            warn("$y while trying other starting value $initial_draw")
            iteration -= 1
        end        
    end
    S = Shat_HAC(X, Y, Z, θmapping(θhat_old); Jn = Jn)
    G = ForwardDiff.jacobian(θ -> gn_wrapped(θmapping(θ)), θhat_old)
    Vasympt = (G' * S^-1 * G)^-1
    J = 2 * size(X, 1) * Qn(θmapping(θhat_old), gn_wrapped, S^-1)
    
    return (θhat_first, 
        θhat_old, 
        sqrt.(diag(Vasympt/size(X, 1))), 
        J, 
        1-cdf(Chisq(size(Y, 2) * size(Z, 2) - length(initial)), J),
        G, 
        S,
        Vasympt,
        obj
    )
end

effientGMM (generic function with 1 method)

I do all data manipulations in a DataFrame and then select columns from it. The first consumption and return observation is useless:

In [9]:
data = DataFrame()
data[:QTR] = main_data[2:end, :QTR]
data[:C_t] = main_data[1:end-1, :CONSEXCL]
data[:C_t1] = main_data[2:end, :CONSEXCL]
data[:R_vwCRISP] = 1 + main_data[2:end, :VWRETD];

### Specification 1
In order to adjust the industry returns for inflation one needs to derive an inflation measure from the [PCE deflator](https://fred.stlouisfed.org/series/DPCERD3Q086SBEA). I assume that the deflator is presented in end-of-quarter values. Therefore I take the backward difference in order to calculate inflation within a given quarter.

In [10]:
data = hcat(data, industry_returns[1:end-1, :])
data[:inflation] = vcat(exp.(diff(log.(main_data[1:end-1, :PCE]))), missing)

for test_asset in [:RIndGrp1, :RIndGrp2, :RIndGrp3, :RIndGrp4]
    data[test_asset] = data[test_asset] ./ data[:inflation]
end

head(data)

Unnamed: 0,QTR,C_t,C_t1,R_vwCRISP,RIndGrp1,RIndGrp2,RIndGrp3,RIndGrp4,inflation
1,1947:02,7617.51,7736.24,0.993336,1.00818,0.958504,0.961983,0.956914,1.00806
2,1947:03,7736.24,7696.37,1.0185,0.973562,0.922818,0.977164,0.940311,1.01865
3,1947:04,7696.37,7577.41,1.0365,1.04824,1.02073,0.983017,0.980349,1.02489
4,1948:01,7577.41,7613.39,0.996359,1.04838,0.995349,0.991645,1.00839,1.01108
5,1948:02,7613.39,7683.79,1.11497,1.02538,1.02254,0.976348,0.978459,1.01031
6,1948:03,7683.79,7631.41,0.924865,1.14373,1.06918,1.11979,1.06042,1.01535


In [None]:
data1 = dropmissing(data)

@time (θfirst, θ, σ, J, p, G, S, Vasympt) = effientGMM(Matrix(data1[[:C_t, :C_t1, :R_vwCRISP]]), 
    Matrix(data1[[:RIndGrp1, :RIndGrp2, :RIndGrp3, :RIndGrp4]]), 
    ones(size(data1, 1)), 
    ψ -> [1., ψ[1], 1.], 
    [1.],
    () -> [max(rand(Normal(4, 4)), 0.01)], 
    Jn = 4,
    verbose = true)

### Specification 2

In [11]:
data[:TB3MS] = exp.(main_data[2:end, :TB3MS]) ./ data[:inflation];

In [None]:
data2 = dropmissing(data)

@time (θfirst, θ, σ, J, p, G, S, Vasympt) = effientGMM(Matrix(data2[[:C_t, :C_t1, :R_vwCRISP]]), 
    Matrix(data2[[:RIndGrp1, :RIndGrp2, :RIndGrp3, :RIndGrp4, :TB3MS]]), 
    ones(size(data2, 1)), 
    ψ -> [1., ψ[1], 1.], 
    [0.8],
    () -> [max(rand(Normal(4, 4)), 0.01)],
    Jn = 4,
    verbose = true)

The second-stage estimate is much higher than the first stage. 

### Specification 3

In [12]:
data = hcat(data, size_bm_returns[2:end, :])
for test_asset in [:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High]
    data[test_asset] = data[test_asset] ./ data[:inflation]
end

In [None]:
data3 = dropmissing(data)

@time (θfirst, θ, σ, J, p, G, S, Vasympt) = effientGMM(Matrix(data3[[:C_t, :C_t1, :R_vwCRISP]]),
    Matrix(data3[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High]]),
    ones(size(data3, 1)),
    ψ -> [1., ψ[1], 1.],
    [0.8],
    () -> [max(rand(Normal(4, 4)), 0.01)],
    Jn = 4,
    verbose = true)

In [None]:
pyplot()

plot(linspace(0.98, 1, 10), [1 / effientGMM(Matrix(data3[[:C_t, :C_t1, :R_vwCRISP]]),
    Matrix(data3[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High]]),
    ones(size(data3, 1)),
    ψ -> [β, ψ[1], 1.],
    [0.172142],
    () -> [max(rand(Normal(4, 4)), 0.01)], min_iter = 0,
    Jn = 4)[2][1] for β in linspace(0.98, 1, 10)], xlab="β", ylab="γ", label="estimate")

In [None]:
savefig("./midterm_EZ_data/figure1.png")

### Specification 4

TODO: clarification of which test assets to use!

$\theta[3] = \gamma$

In [None]:
@time (θfirst, θ, σ, J, p, G, S, Vasympt, obj) = effientGMM(Matrix(data3[[:C_t, :C_t1, :R_vwCRISP]]),
    Matrix(data3[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High]]),
    ones(size(data3, 1)),
    θ -> [1., θ[1], (1 - θ[2])/(1 - 1/θ[1])],
    [0.8, 2],
    () -> [max(rand(Normal(4, 4)), 0.01), max(rand(Normal(4, 4)), 0.01)],
    Jn = 4,
    verbose = true)

In [None]:
round.(G, 4)

### Specification 5

In [13]:
data[:ΔClagged] = vcat(missing, (data[:C_t1] ./ data[:C_t])[1:end-1])
data[:R_wlagged] = vcat(missing, data[1:end-1, :R_vwCRISP]);

In [None]:
data5 = dropmissing(data)

@time (θfirst, θ, σ, J, p, G, S, Vasympt, obj) = effientGMM(Matrix(data5[[:C_t, :C_t1, :R_vwCRISP]]),
    Matrix(data5[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High]]),
    hcat(ones(size(data5, 1)), Matrix(data5[[:ΔClagged, :R_wlagged]])),
    θ -> [1., θ[1], (1 - θ[2])/(1 - 1/θ[1])],
    [0.2, 2],
    () -> [max(rand(Normal(0.8, 1)), 0.01), max(rand(Normal(30, 30)), 0.01)],
    Jn = 4,
    verbose = true,
    second_only = true,
    min_iter = 10)

In [None]:
data5 = dropmissing(data)
X = Matrix(data5[[:C_t, :C_t1, :R_vwCRISP]])
Y =  Matrix(data5[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High]])
Z = hcat(ones(size(data5, 1)), Matrix(data5[[:ΔClagged, :R_wlagged]]));
gn_wrapped = gn_wrapper(X, Y, Z)
θmapping = θ -> [1., θ[1], (1 - θ[2])/(1 - 1/θ[1])]
θhat_old = [0.16181655136564985,6.901787374061048] #[0.160306, 7.50507]
S = Shat_HAC(X, Y, Z, θmapping(θhat_old); Jn = 24)
G = ForwardDiff.jacobian(θ -> gn_wrapped(θmapping(θ)), θhat_old)
Vasympt = (G' * S^-1 * G)^-1
J = 2 * size(X, 1) * Qn(θmapping(θhat_old), gn_wrapped, S^-1)
obj = OnceDifferentiable(θ -> Qn(θmapping(θ), gn_wrapped, S^-1), θhat_old; autodiff = :forward);

In [None]:
gr()
z = [obj.f([x, y]) for x in 0.15:0.005:0.2, y in 3:0.05:8]

In [None]:
contour(0.15:0.005:0.2, 3:0.05:8, z')
scatter!([0.160306], [7.50507], label="starting point")
scatter!([0.16387928518383837], [6.594640448824585], label="new point")

In [None]:
optimize(obj, [0.160306, 7.50507], BFGS(), Optim.Options(iterations = 1000))

In [None]:
G

### Specification 6

In [None]:
@time (θfirst, θ, σ, J, p, G, S, Vasympt, obj) = effientGMM(Matrix(data5[[:C_t, :C_t1, :R_vwCRISP]]),
    Matrix(data5[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High, :TB3MS]]),
    hcat(ones(size(data5, 1)), Matrix(data5[[:ΔClagged, :R_wlagged]])),
    θ -> [1., θ[1], (1 - θ[2])/(1 - 1/θ[1])],
    [0.2, 8],
    () -> [max(rand(Normal(4, 4)), 0.01), max(rand(Normal(4, 4)), 0.01)],
    Jn = 4,
    verbose = true,
    min_iter = 10, 
    first_S = Shat_HAC(Matrix(data5[[:C_t, :C_t1, :R_vwCRISP]]), Matrix(data5[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High, :TB3MS]]), hcat(ones(size(data5, 1)), Matrix(data5[[:ΔClagged, :R_wlagged]])), (θ -> [1., θ[1], (1 - θ[2])/(1 - 1/θ[1])])([0.168973, 6.02173]); Jn = 0),
    second_only = false)

In [None]:
data5 = dropmissing(data)
X = Matrix(data5[[:C_t, :C_t1, :R_vwCRISP]])
Y =  Matrix(data5[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High, :TB3MS]])
Z = hcat(ones(size(data5, 1)), Matrix(data5[[:ΔClagged, :R_wlagged]]));
gn_wrapped = gn_wrapper(X, Y, Z)
θmapping = θ -> [1., θ[1], (1 - θ[2])/(1 - 1/θ[1])]

θhat_old = [0.16181655136564985,6.901787374061048] #[0.160306, 7.50507]
S = Shat_HAC(X, Y, Z, θmapping(θhat_old); Jn = 24)
G = ForwardDiff.jacobian(θ -> gn_wrapped(θmapping(θ)), θhat_old)
Vasympt = (G' * S^-1 * G)^-1
J = 2 * size(X, 1) * Qn(θmapping(θhat_old), gn_wrapped, S^-1)
obj = OnceDifferentiable(θ -> Qn(θmapping(θ), gn_wrapped, (S/100000)^-1), θhat_old; autodiff = :forward);

In [None]:
(S/100000)^-1

In [None]:
obj.f([0.2, 8])

In [None]:
optimize(obj, [0.4, 30], BFGS(), Optim.Options(iterations = 1000))

In [None]:
DF = zeros(21, 2)
obj.df(DF, [0.16181655136564985,6.901787374061048])

In [None]:
1 - θ[2] * (1 - 1/θ[1])

### Specification 7


In [None]:
@time (θfirst, θ, σ, J, p, G, S, Vasympt, obj) = effientGMM(Matrix(data5[[:C_t, :C_t1, :R_vwCRISP]]),
    Matrix(data5[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High, :TB3MS]]),
    hcat(ones(size(data5, 1)), Matrix(data5[[:ΔClagged, :R_wlagged]])),
    θ -> [θ[1], θ[2], (1 - 1.5) / (1 - 1/θ[2])],
    [0.95, 0.5],
    () -> [rand()/20 + 0.8, max(rand(Normal(4, 4)), 0.01)],
    Jn = 4,
    verbose = true,
    algorithm=Newton())

First stage
Other candidate solution: [2932.8, 0.00197446] compared to optimum [4054.2, 0.0018997]




Better solution: [0.980605, 1.38766] compared to old [4054.2, 0.0018997]
Other candidate solution: [3796.93, 0.00191229] compared to optimum [0.980605, 1.38766]
Better solution: [0.980605, 1.38766] compared to old [0.980605, 1.38766]
Better solution: [0.980605, 1.38766] compared to old [0.980605, 1.38766]
Other candidate solution: [3565.65, 0.00192938] compared to optimum [0.980605, 1.38766]
Other candidate solution: [3237.66, 0.00194978] compared to optimum [0.980605, 1.38766]
Other candidate solution: [0.965169, -3120.68] compared to optimum [0.980605, 1.38766]
Other candidate solution: [3166.48, 0.00195451] compared to optimum [0.980605, 1.38766]
Other candidate solution: [0.965169, -3089.61] compared to optimum [0.980605, 1.38766]
Other candidate solution: [0.965167, -2140.05] compared to optimum [0.980605, 1.38766]
Other candidate solution: [0.965169, -2982.47] compared to optimum [0.980605, 1.38766]
Other candidate solution: [3738.71, 0.00191832] compared to optimum [0.980605, 1.



Other candidate solution: [2299.02, 0.00203484] compared to optimum [0.980605, 1.38766]
Other candidate solution: [2691.34, 0.00199546] compared to optimum [0.980605, 1.38766]
Other candidate solution: [0.965169, -2876.71] compared to optimum [0.980605, 1.38766]
Other candidate solution: [3789.13, 0.00191521] compared to optimum [0.980605, 1.38766]
Other candidate solution: [3978.71, 0.00190398] compared to optimum [0.980605, 1.38766]
Other candidate solution: [3731.18, 0.00191878] compared to optimum [0.980605, 1.38766]
Other candidate solution: [0.965167, -2121.68] compared to optimum [0.980605, 1.38766]
Other candidate solution: [0.965167, -2130.6] compared to optimum [0.980605, 1.38766]




Other candidate solution: [0.965168, -2590.57] compared to optimum [0.980605, 1.38766]
Other candidate solution: [0.965167, -2297.66] compared to optimum [0.980605, 1.38766]
Other candidate solution: [3872.65, 0.00191018] compared to optimum [0.980605, 1.38766]
Other candidate solution: [0.965169, -3133.18] compared to optimum [0.980605, 1.38766]
Other candidate solution: [2133.09, 0.00205531] compared to optimum [0.980605, 1.38766]
Other candidate solution: [2240.61, 0.00204306] compared to optimum [0.980605, 1.38766]




In [None]:
G

### Specification 8

In [14]:
data[:ΔClagged2] = vcat(missing, data[1:end-1, :ΔClagged]);

In [None]:
data8 = dropmissing(data)

@time (θfirst, θ, σ, J, p, G, S, Vasympt, obj) = effientGMM(Matrix(data8[[:C_t, :C_t1, :R_vwCRISP]]),
    Matrix(data8[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High, :TB3MS]]),
    hcat(ones(size(data8, 1)), Matrix(data8[[:ΔClagged, :ΔClagged2]])),
    θ -> [θ[1], θ[2], (1 - θ[3])/(1 - 1/θ[2])],
    [0.85, 0.5, 10],
    () -> [rand()/20 + 0.8, max(rand(Normal(2, 2)), 0.01), max(rand(Normal(30, 20)), 0.01)],
    Jn = 4,
    verbose = true,
    first_S = eye(21, 21) + 0.001 * Shat_HAC(Matrix(data8[[:C_t, :C_t1, :R_vwCRISP]]), Matrix(data8[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High, :TB3MS]]), hcat(ones(size(data8, 1)), Matrix(data8[[:ΔClagged, :ΔClagged2]])), (θ -> [θ[1], θ[2], (1 - θ[3])/(1 - 1/θ[2])])([0.850423, 0.719195, 30.3]); Jn = 4),
    min_iter = 100,
    #algorithm = NelderMead(),
    max_iter = 300)

In [None]:
X = Matrix(data8[[:C_t, :C_t1, :R_vwCRISP]])
Y = Matrix(data8[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High, :TB3MS]])
Z = hcat(ones(size(data8, 1)), Matrix(data8[[:ΔClagged, :ΔClagged2]]))
gn_wrapped = gn_wrapper(X, Y, Z)
θmapping = θ -> [θ[1], θ[2], (1 - θ[3])/(1 - 1/θ[2])]

θhat_old = [0.850496, 0.719037, 30.3]
S = Shat_HAC(X, Y, Z, θmapping(θhat_old); Jn = 4)
G = ForwardDiff.jacobian(θ -> gn_wrapped(θmapping(θ)), θhat_old)
Vasympt = (G' * S^-1 * G)^-1
J = 2 * size(X, 1) * Qn(θmapping(θhat_old), gn_wrapped, S^-1)
#obj = OnceDifferentiable(θ -> Qn(θmapping(θ), gn_wrapped, inv(Shat_HAC(Matrix(data8[[:C_t, :C_t1, :R_vwCRISP]]), Matrix(data8[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High, :TB3MS]]), hcat(ones(size(data8, 1)), Matrix(data8[[:ΔClagged, :ΔClagged2]])), (θ -> [θ[1], θ[2], (1 - θ[3])/(1 - 1/θ[2])])([0.850423, 0.719195, 30.3]); Jn = 4))), θhat_old; autodiff = :forward);
obj = OnceDifferentiable(θ -> Qn(θmapping(θ), gn_wrapped), θhat_old; autodiff = :forward);

In [None]:
x = zeros(3, 1)
obj.df(x, [0.8503, 0.719037, 30.3])

In [None]:
res = optimize(obj, [0.85, 0.5, 50], NelderMead(), Optim.Options(iterations = 1000))

In [None]:
Optim.minimizer(res)

### Specification 9

In [15]:
data[:R_wlagged2] = vcat(missing, data[1:end-1, :R_wlagged]);

In [None]:
data8 = dropmissing(data)

@time (θfirst, θ, σ, J, p, G, S, Vasympt, obj) = effientGMM(Matrix(data8[[:C_t, :C_t1, :R_vwCRISP]]),
    Matrix(data8[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High, :TB3MS]]),
    hcat(ones(size(data8, 1)), Matrix(data8[[:ΔClagged, :ΔClagged2, :R_wlagged, :R_wlagged2]])),
    θ -> [θ[1], θ[2], (1 - θ[3])/(1 - 1/θ[2])],
    [0.87109, 0.41, 100.628],
    () -> [rand()/20 + 0.8, (max(rand(Normal(2, 2)), 0.01)), (max(rand(Normal(30, 20)), 1.1))],
    Jn = 4,
    verbose = true,
    first_S = eye(35, 35) + 0.001 * Shat_HAC(Matrix(data8[[:C_t, :C_t1, :R_vwCRISP]]), Matrix(data8[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High, :TB3MS]]), hcat(ones(size(data8, 1)), Matrix(data8[[:ΔClagged, :ΔClagged2, :R_wlagged, :R_wlagged2]])), (θ -> [θ[1], θ[2], (1 - θ[3])/(1 - 1/θ[2])])([0.8455, 0.3841, 96.628]); Jn = 4),
    min_iter = 200,
    algorithm = NelderMead(),
    max_iter = 200)

First stage
Other candidate solution: [1.04486, 0.0678578, 10.5398] compared to optimum [3.02153, 0.00371448, 40.7882]
Better solution: [0.954158, 3.93955, 1.20309] compared to old [3.02153, 0.00371448, 40.7882]
Other candidate solution: [1.49753, 3.25247, 0.990855] compared to optimum [0.954158, 3.93955, 1.20309]
Better solution: [1.00042, 0.1989, -6.41788] compared to old [0.954158, 3.93955, 1.20309]
Better solution: [0.979391, 2.26685, 1.98758] compared to old [1.00042, 0.1989, -6.41788]
Other candidate solution: [1.44999, 1.7082, 0.994065] compared to optimum [0.979391, 2.26685, 1.98758]
Other candidate solution: [0.761413, 0.82213, 74.5987] compared to optimum [0.979391, 2.26685, 1.98758]
Other candidate solution: [1.22952, 0.0661874, 1.47348] compared to optimum [0.979391, 2.26685, 1.98758]
Better solution: [0.978276, 5.66838, 2.46123] compared to old [0.979391, 2.26685, 1.98758]
Other candidate solution: [0.981324, 1.13704, 1.21377] compared to optimum [0.978276, 5.66838, 2.4612

In [31]:
data8 = dropmissing(data)
X = Matrix(data8[[:C_t, :C_t1, :R_vwCRISP]])
Y = Matrix(data8[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High, :TB3MS]])
Z = hcat(ones(size(data8, 1)), Matrix(data8[[:ΔClagged, :ΔClagged2, :R_wlagged, :R_wlagged2]]))
gn_wrapped = gn_wrapper(X, Y, Z)
θmapping = θ -> [θ[1], θ[2], (1 - θ[3])/(1 - 1/θ[2])]

#θhat_old = [0.87109, 0.41, 100.628]
θhat_old = [0.984357, 0.130579, 52.7365]
S = Shat_HAC(X, Y, Z, θmapping(θhat_old); Jn = 4)
G = ForwardDiff.jacobian(θ -> gn_wrapped(θmapping(θ)), θhat_old)
Vasympt = (G' * S^-1 * G)^-1
J = 2 * size(X, 1) * Qn(θmapping(θhat_old), gn_wrapped, S^-1)
#obj = OnceDifferentiable(θ -> Qn(θmapping(θ), gn_wrapped, inv(Shat_HAC(Matrix(data8[[:C_t, :C_t1, :R_vwCRISP]]), Matrix(data8[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High, :TB3MS]]), hcat(ones(size(data8, 1)), Matrix(data8[[:ΔClagged, :ΔClagged2, :R_wlagged, :R_wlagged2]])), (θ -> [θ[1], θ[2], (1 - θ[3])/(1 - 1/θ[2])])([0.87109, 0.41, 100.628]); Jn = 4))), θhat_old; autodiff = :forward);
obj = OnceDifferentiable(θ -> Qn(θmapping(θ), gn_wrapped, inv(Shat_HAC(Matrix(data8[[:C_t, :C_t1, :R_vwCRISP]]), Matrix(data8[[:Small_Low, :Small_Mid, :Small_High, :Big_Low, :Big_Mid, :Big_High, :TB3MS]]), hcat(ones(size(data8, 1)), Matrix(data8[[:ΔClagged, :ΔClagged2, :R_wlagged, :R_wlagged2]])), (θ -> [θ[1], θ[2], (1 - θ[3])/(1 - 1/θ[2])])([0.984357, 0.130579, 52.7365]); Jn = 4))), θhat_old; autodiff = :forward);
#obj = OnceDifferentiable(θ -> Qn(θmapping(θ), gn_wrapped), θhat_old; autodiff = :forward);

In [32]:
obj = OnceDifferentiable(θ -> Qn(θmapping(θ), gn_wrapped), [0., 0., 0.]; autodiff = :forward)
x = zeros(3,1)
y = [0.984357, 0.130579, 52.7365]
obj.f(y), obj.df(x, y)

(0.009418703904580493, [0.638964; 0.371043; 0.000566644])

In [54]:
sqrt.(diag(Vasympt/size(X, 1))), 
        J, 
        1-cdf(Chisq(size(Y, 2) * size(Z, 2) - length([0.984357, 0.130579, 52.7365])), J)

([0.00621304, 0.0228173, 9.13714], 52.538792606635816, 0.012501583758183443)

In [35]:
res = optimize(obj, [0.87109, 0.41, 100.628], Newton(), Optim.Options(iterations = 5))

Results of Optimization Algorithm
 * Algorithm: Newton's Method
 * Starting Point: [0.87109,0.41,100.628]
 * Minimizer: [0.9843565743537283,0.1305794202884893, ...]
 * Minimum: 9.418567e-03
 * Iterations: 5
 * Convergence: false
   * |x - x'| ≤ 1.0e-32: false 
     |x - x'| = 1.93e+01 
   * |f(x) - f(x')| ≤ 1.0e-32 |f(x)|: false
     |f(x) - f(x')| = 3.85e+00 |f(x)|
   * |g(x)| ≤ 1.0e-08: false 
     |g(x)| = 6.38e-01 
   * Stopped by an increasing objective: false
   * Reached Maximum Number of Iterations: true
 * Objective Calls: 19
 * Gradient Calls: 19
 * Hessian Calls: 6

In [30]:
Optim.minimizer(res)

3-element Array{Float64,1}:
  0.984357
  0.130579
 52.7365  

In [138]:
gr()
xrange = 0.4:0.02:0.9
yrange = 1:0.2:150
z = [obj.f([0.95, x, y]) for x in xrange, y in yrange];

In [139]:
plotlyjs();
contour(xrange, yrange, log10.(z'), levels=100)

In [136]:
res = optimize(obj, [0.85, 0.6, 71], Newton(), Optim.Options(iterations = 100))

Results of Optimization Algorithm
 * Algorithm: Newton's Method
 * Starting Point: [0.85,0.6,71.0]
 * Minimizer: [0.9819883707423285,0.9830762865176769, ...]
 * Minimum: 5.734890e-04
 * Iterations: 100
 * Convergence: false
   * |x - x'| ≤ 1.0e-32: false 
     |x - x'| = 5.85e-03 
   * |f(x) - f(x')| ≤ 1.0e-32 |f(x)|: false
     |f(x) - f(x')| = 1.44e-05 |f(x)|
   * |g(x)| ≤ 1.0e-08: false 
     |g(x)| = 5.09e-04 
   * Stopped by an increasing objective: false
   * Reached Maximum Number of Iterations: true
 * Objective Calls: 259
 * Gradient Calls: 259
 * Hessian Calls: 101

In [137]:
Optim.minimizer(res)

3-element Array{Float64,1}:
 0.981988
 0.983076
 0.969312

In [118]:
obj.f([0.85, 0.45, 130])

0.481333661358475

In [117]:
obj.f([1.05246, 0.0547091, 24.7884])

0.0018721872521638484

In [122]:
obj.f([1.10998, 0.0840336, 1.80375 ])   

0.0010771263756032682

In [125]:
obj.f([0.986677, 0.485315, -0.903559 ]) 

0.0005757170289910831

# Task 2

I think regressors are measured at the end of the period.

## 1

In [16]:
data = DataFrame()
data[:QTR] = main_data[:QTR]
data[:R_vwCRISP] = log.(1 + main_data[:VWRETD]) .- log.(1 + main_data[:TB3MS])

for i in 0:7
    data[Symbol("R_$(i+1)")] = 
        vcat([sum(data[t:t+i, :R_vwCRISP]) for t in 1:size(data, 1)-i][2:end], ones(i+1) .* missing)
end

data[:CAY] = main_data[:CAY]
data[:dp] = main_data[:dp]
data[:RREL] = main_data[:RREL]
data[:dp] = main_data[:dp]
data[:DEF] = main_data[:DEF]
data[:TRM] = main_data[:TRM]

head(data)

Unnamed: 0,QTR,R_vwCRISP,R_1,R_2,R_3,R_4,R_5,R_6,R_7,R_8,CAY,dp,RREL,DEF,TRM
1,1947:01,-0.016034,-0.00763405,0.00905073,0.0427827,0.0367247,0.143067,0.0624748,0.0537124,0.0653001,missing,-3.06759,missing,0.576667,missing
2,1947:02,-0.00763405,0.0166848,0.0504168,0.0443587,0.150701,0.0701089,0.0613464,0.0729341,0.025171,missing,-2.98656,missing,0.643333,missing
3,1947:03,0.0166848,0.033732,0.027674,0.134016,0.0534241,0.0446617,0.0562493,0.00848624,0.118936,missing,-2.94234,missing,0.62,missing
4,1947:04,0.033732,-0.00605805,0.100284,0.0196921,0.0109296,0.0225173,-0.0252458,0.0852043,0.183214,missing,-2.89371,0.002825,0.66,missing
5,1948:01,-0.00605805,0.106342,0.0257501,0.0169877,0.0285754,-0.0191877,0.0912624,0.189272,0.232003,missing,-2.83392,0.00255,0.68,missing
6,1948:02,0.106342,-0.080592,-0.0893544,-0.0777667,-0.12553,-0.0150797,0.0829303,0.125661,0.146134,missing,-2.91281,0.0013,0.63,missing


In [17]:
#lag is by default +/- 4 quarters
function Shat_HAC(Y, X, β; Jn = 4)
    N = length(β)
    
    Γ = zeros(N, N)
    Γ!(Γ, Y, X, β, j = 0)
    
    #0th lag
    Shat = copy(Γ)
    
    #all other lags
    for j in 1:Jn
        Γ .= zeros(N, N)
        Γ!(Γ, Y, X, β, j = j)
        Shat .+= (1 - j / (Jn + 1)) .* (Γ .+ Γ')
    end
    
    return Shat
end

function Γ!(Γ, Y, X, β; j = 0)
    for t in j+1:size(X, 1)
        Γ .+= (Y[t] - X[t, :]' * β)^2 * X[t, :] * ((Y[t-j] - X[t-j, :]' * β)^2 * X[t-j, :])'
    end
    
    Γ .= Γ ./ size(X, 1)
end

Γ! (generic function with 1 method)

### (a)

In [18]:
[begin
    subdata = dropmissing(data[[Symbol("R_$(i)"), :CAY]])
    Y = subdata[Symbol("R_$(i)")]
    X = hcat(ones(length(Y)), Matrix(subdata[:, 2:end]))

    β = (X'X) \ (X'Y)
    S = Shat_HAC(Y, X, β; Jn = i-1)
    if cond(S) > 20000
        warn("S has condition number $(cond(S))")
    end
    Vasympt = inv((X'X)/size(X, 1) * inv(S) * (X'X)/size(X, 1))
    σ = sqrt.(diag(Vasympt / size(X, 1)))
    t = β ./ σ
    Rsq = 1 - sum((Y - X * β).^2) / sum((Y - mean(Y)).^2)
    adjRsq = 1-((1-Rsq) * (size(X, 1)-1) / (size(X, 1) - length(β) - 1))

    [β, σ, t, adjRsq]
        end for i in 1:8]

8-element Array{Array{Any,1},1}:
 Any[[0.0143667, 0.639674], [0.000908693, 0.0445], [15.8103, 14.3747], 0.0143631]
 Any[[0.0284629, 1.33092], [0.0022525, 0.103202], [12.6361, 12.8963], 0.0361588] 
 Any[[0.0424093, 2.05174], [0.00366085, 0.165809], [11.5846, 12.3741], 0.06222]  
 Any[[0.0558595, 2.77774], [0.00516474, 0.242176], [10.8156, 11.4699], 0.0903436]
 Any[[0.0692904, 3.42644], [0.00674822, 0.312876], [10.268, 10.9514], 0.113141]  
 Any[[0.0826209, 4.20603], [0.00819169, 0.391663], [10.0859, 10.7389], 0.14547]  
 Any[[0.096102, 5.03451], [0.00968124, 0.475507], [9.92663, 10.5877], 0.182323]  
 Any[[0.109296, 5.77018], [0.0108737, 0.502661], [10.0513, 11.4793], 0.219384]   

### (b)

In [19]:
[begin
    subdata = dropmissing(data[[Symbol("R_$(i)"), :dp]])
    Y = subdata[Symbol("R_$(i)")]
    X = hcat(ones(length(Y)), Matrix(subdata[:, 2:end]))

    β = (X'X) \ (X'Y)
    S = Shat_HAC(Y, X, β; Jn = i-1)
    if cond(S) > 20000
        warn("S has condition number $(cond(S))")
    end
    Vasympt = inv((X'X)/size(X, 1) * inv(S) * (X'X)/size(X, 1))
    σ = sqrt.(diag(Vasympt / size(X, 1)))
    t = β ./ σ
    Rsq = 1 - sum((Y - X * β).^2) / sum((Y - mean(Y)).^2)
    adjRsq = 1-((1-Rsq) * (size(X, 1)-1) / (size(X, 1) - length(β) - 1))

    [β, σ, t, adjRsq]
        end for i in 1:8]

8-element Array{Array{Any,1},1}:
 Any[[0.10861, 0.0264709], [0.00581482, 0.00167335], [18.6781, 15.8191], 0.0114681]
 Any[[0.22635, 0.0555687], [0.0142652, 0.00413412], [15.8672, 13.4415], 0.0308646] 
 Any[[0.335841, 0.0823542], [0.0234209, 0.00687461], [14.3394, 11.9795], 0.0489781]
 Any[[0.436671, 0.106709], [0.0349177, 0.0104354], [12.5057, 10.2257], 0.0649819]  
 Any[[0.535, 0.130336], [0.047815, 0.0143993], [11.189, 9.05153], 0.0805897]       
 Any[[0.630736, 0.153363], [0.0592956, 0.0179634], [10.6371, 8.53751], 0.0950658]  
 Any[[0.721958, 0.174923], [0.0726856, 0.0221594], [9.93261, 7.89383], 0.108636]   
 Any[[0.795552, 0.191407], [0.0841573, 0.0257445], [9.45315, 7.43486], 0.119647]   

### (c)

In [20]:
[begin
    subdata = dropmissing(data[[Symbol("R_$(i)"), :CAY, :dp, :RREL, :DEF, :TRM]])
    Y = subdata[Symbol("R_$(i)")]
    X = hcat(ones(length(Y)), Matrix(subdata[:, 2:end]))

    β = (X'X) \ (X'Y)
    S = Shat_HAC(Y, X, β; Jn = i-1)
    if cond(S) > 20000
        warn("S has condition number $(cond(S))")
    end
    Vasympt = inv((X'X)/size(X, 1) * (S \ (X'X)/size(X, 1)))
    σ = sqrt.(diag(Vasympt / size(X, 1)))
    t = β ./ σ
    Rsq = 1 - sum((Y - X * β).^2) / sum((Y - mean(Y)).^2)
    adjRsq = 1-((1-Rsq) * (size(X, 1)-1) / (size(X, 1) - length(β) - 1))

        round.(vcat(β[2:end], t[2:end], adjRsq), 4)
        end for i in 1:8]



8-element Array{Array{Float64,1},1}:
 [0.6394, 0.0273, -1.1082, 0.005, -0.0982, 12.9723, 10.9374, -10.2436, 2.2126, -7.577, 0.0393]
 [1.3288, 0.0538, -1.6959, 0.0133, -0.0505, 12.04, 8.6763, -4.1952, 2.3243, -1.7192, 0.0718]  
 [2.0331, 0.0777, -2.3036, 0.0173, -0.0138, 12.1994, 8.1331, -3.0202, 2.1464, -0.26, 0.1132]  
 [2.7343, 0.0995, -2.6611, 0.0165, -0.0174, 12.7323, 7.5634, -2.7353, 1.6384, -0.3076, 0.1505]
 [3.3182, 0.1222, -3.6028, 0.0077, -0.0033, 12.567, 6.9383, -3.9473, 0.6843, -0.0653, 0.1842] 
 [4.0528, 0.1371, -2.8338, 0.0131, 0.0196, 11.8998, 6.4304, -4.1198, 1.0469, 0.3763, 0.2114]  
 [4.7988, 0.147, -2.4969, 0.0153, 0.1413, 11.3502, 5.4155, -4.2001, 1.0662, 2.8511, 0.2457]   
 [5.4648, 0.1559, -2.6224, 0.0101, 0.1593, 12.2668, 4.9077, -5.3412, 0.634, 2.8086, 0.28]     

## 2

In [21]:
subdata = Matrix(dropmissing(data[[:R_vwCRISP, :CAY]]))
X = hcat(ones(size(subdata, 1)), subdata);

In [22]:
A = (((X[1:end-1, :])' * X[1:end-1, :]) \  ((X[1:end-1, :])' * X[2:end, :]))'

3×3 Array{Float64,2}:
  1.0          0.0         0.0     
  0.0130898    0.0871068   0.654659
 -0.000198989  0.00618562  0.912555

The VAR:
$$x_{t+1} = A x_t + \epsilon_{t+1}$$
$$r_{t+1} = e^2 x_{t+1} = e^2 A x_t + e^2 \epsilon_{t+1}$$
$$r_{t+2} = e^2 x_{t+2} = e^2 A x_{t+1} + e^2 \epsilon_{t+2} = e^2 A^2 x_{t} + e^2 A \epsilon_{t+1} +  e^2 \epsilon_{t+2}$$
...
$$r_{t+h} = e^2 A^h x_{t} + e^2 A^{h-1} \epsilon_{t+1} + ... +   e^2 \epsilon_{t+h}$$


In [23]:
ϵ = (X[2:end, :]' .- A * X[1:end-1, :]')';

In [88]:
function R_squared(A_vec; h=1)
    A = reshape(A_vec, 3, 3)
    T = size(ϵ, 1)
    μX = mean(X, 1)
    
    SS_tot = 0.
    for t in 1:T-h+1
        SS_tot += ((sum([A^j * sum([ϵ[t+k, :] for k in 0:h-1]) for j in 0:h-1]))[2]
                    + (sum([A^j for j in 1:h]) * μX')[2])^2
    end
    
    SS_res= 0.
    for t in 1:T-h+1
        SS_res += ((sum([A^j * sum([ϵ[t+k, :] for k in 0:h-1]) for j in 0:h-1]))[2])^2
    end
    
    return 1-SS_res/SS_tot
end

R_squared (generic function with 1 method)

#### Consistency Check
For h = 1, the original return series should be returned:

In [89]:
h = 1
T = size(ϵ, 1)
maximum(abs.([(sum([A^j for j in 1:h]) * X[t, :])[2] + 
    (sum([A^j * sum([ϵ[t+k, :] for k in 0:h-1]) for j in 0:h-1]))[2] for t in 1:T-h+1] .- X[2:end, 2]))

2.7755575615628914e-17

For h=2 the fit should be good, but not perfect, because the "realized" multi-period returns are imputed:

In [90]:
h = 2
T = size(ϵ, 1)
mean(([(sum([A^j for j in 1:h]) * X[t, :])[2] + 
            (sum([A^j * sum([ϵ[t+k, :] for k in 0:h-1]) for j in 0:h-1]))[2] for t in 1:T-h+1] .- X[2:end-h+1, 2]).^2)

0.007938395196279173

The mean square error is low.

In [94]:
[R_squared(A[:], h=h) for h in [2, 4, 8, 12, 16]]

5-element Array{Float64,1}:
 0.0523713
 0.109043 
 0.236432 
 0.349974 
 0.432999 

Now I [need](https://en.wikipedia.org/wiki/Vector_autoregression#Estimation_of_the_estimator's_covariance_matrix) the variance matrix of the VAR estimates. I use a naive error variance matrix estimation, correcting for only 6 estimated parameters, because the law of motion of the vector of ones is clear without estimation.

In [111]:
Σhat = ϵ'ϵ/(size(ϵ, 1) - 6 - 1)

3×3 Array{Float64,2}:
 0.0   0.0           0.0        
 0.0   0.00684642   -0.000293302
 0.0  -0.000293302   6.45886e-5 

In [112]:
kron(inv(X'X), Σhat)

9×9 Array{Float64,2}:
  0.0   0.0          0.0         -0.0  …  -0.0  -0.0          -0.0        
  0.0   2.71765e-5  -1.16424e-6  -0.0     -0.0  -3.1802e-5     1.3624e-6  
  0.0  -1.16424e-6   2.56381e-7  -0.0     -0.0   1.3624e-6    -3.00017e-7 
 -0.0  -0.0         -0.0          0.0      0.0   0.0           0.0        
 -0.0  -5.68299e-5   2.4346e-6    0.0      0.0   0.000679704  -2.91186e-5 
 -0.0   2.4346e-6   -5.36129e-7   0.0  …   0.0  -2.91186e-5    6.41228e-6 
 -0.0  -0.0         -0.0          0.0      0.0   0.0           0.0        
 -0.0  -3.1802e-5    1.3624e-6    0.0      0.0   0.0714094    -0.00305919 
 -0.0   1.3624e-6   -3.00017e-7   0.0      0.0  -0.00305919    0.000673671

Lets get the derivative of the R-squared function via automatic differentiation and apply the [Delta Method](https://en.wikipedia.org/wiki/Delta_method#Multivariate_delta_method):

In [113]:
using ForwardDiff

In [135]:
[
    begin ∇ = ForwardDiff.gradient(θ -> R_squared(θ, h=h), A[:])
    (∇' * kron(inv(X'X), Σhat) * size(X, 1) * ∇)/size(X, 1) 
    end
for h in [2, 4, 8, 12, 16]]

  4.280659 seconds (7.78 M allocations: 382.301 MiB, 1.94% gc time)


5-element Array{Float64,1}:
 0.00135536
 0.00547835
 0.0189219 
 0.0320286 
 0.0425976 