#### Econ 627: Assignment 5

##### <u> Question 2 </u>: Monte Carlo experiments with efficient instruments

In this exercise, using Monte Carlo simulations you will compare the small-sample performances of IV estimators based on efficient instruments with that of the 2SLS estimator.

######  Let us load the Libraries we need.

In [131]:
using Distributions, PrettyTables, Random, Parameters,DataFrames

a),  b) and c)

In [132]:
function generate_data(ρ,β,n)
      
    # IVs
    W = rand(Uniform(0,1), n)
    Z = -0.5 .*(W .< 0.2) .- 0.1 .* (0.2 .<= W .< 0.4) .+ 0.1 .* (0.4 .<= W .< 0.6) .+ (W .≥ 0.6)

    # errors
    Σ =[1.0 ρ; ρ 1.0;]
    mvnormal = MvNormal([0.0; 0.0], Σ)
    error = rand(mvnormal,n)'
    ϵ = error[:,1]
    V = error[:,2]
    U = (1 .+ Z).*ϵ

    # endogenous regressors Xi
    X = 4*Z.^2 + V
    
    # dependent variable Yi
    Y = β.*X .+ U

    return (Y = Y , X = X , Z = Z, U = U)

end


generate_data (generic function with 1 method)

In [134]:
@unpack X, Y, Z, U = generate_data(0.9, 0.15, 100);

d) Computations of the three IV estimators:

###### The 2SLS estimator using Zi as instruments.

In [135]:
# Function for estimation of the assymptotic variance Ω
function Ω(U,Z)
    n=length(U)
    zr = Z.*U
    omega = (zr' * zr)/n
     
    return omega
end

Ω (generic function with 1 method)

In [136]:
function β₂ₛₗₛ(Y,X,Z,U)
    
   n = length(Y)
   Ωᵤ = Ω(U,Z) 
   Q = X'*Z/n
   W = inv(Z'*Z/n)
   PZ = Z*( (Z'*Z)\Z' )
   β = (X'*PZ*X)\(X'*PZ*Y)                                    #inv(Q*W*Q')*Q*W*(Z'*Y)
   asy_var= ( (Q'*W*Q)\(Q'*W*Ωᵤ*W*Q)/(Q'*W*Q) )/n
   stderr = sqrt(asy_var)
   
   return β, stderr

end

β₂ₛₗₛ (generic function with 1 method)

In [138]:
βᵢᵥ, stderrᵢᵥ  = β₂ₛₗₛ(Y,X,Z,U)

(0.17605493858937465, 0.07949359208230178)

###### The infeasible efficient IV estimator that uses g∗(Zi)

In [139]:
# Definition of the function g
 g(z) = mean(X .* (Z .== z)) / mean(U.^2 .* (Z .== z))
   

g (generic function with 1 method)

In [140]:
β_infeasible, stderr_inf = β₂ₛₗₛ(Y,X,g.(Z),U)

(0.23913125152876374, 0.05526963004578373)

###### The feasible version of the efficient IV estimator 

In [141]:
function βₒₗₛ(Y,X)
     
    Ω = inv(X'*X)
    Q = X'*Y;
    
    return Ω*Q
end

βₒₗₛ (generic function with 1 method)

In [142]:
D = [Z .== -0.5 Z .== -0.1 Z .== 0.1 Z .== 1 ]
π = βₒₗₛ(X,D)
πᵤ = βₒₗₛ((Y-X*βᵢᵥ).^2,D)

G  = (D*π)./(D*πᵤ)
β_feasible, stderr_fe = β₂ₛₗₛ(Y,X,G,U)

(0.24194172289261826, 0.05536457351970067)

e) Confidence intervals and associated asymptotic coverage probability 1 −α for different value of α

In [143]:
β = 0.15
for α in [0.1, 0.05, 0.01]
        lower_boundᵢᵥ = βᵢᵥ-quantile(Normal(0,1),1-α/2)*stderrᵢᵥ
        upper_boundᵢᵥ = βᵢᵥ+quantile(Normal(0,1),1-α/2)*stderrᵢᵥ
        lower_bound_inf = β_infeasible-quantile(Normal(0,1),1-α/2)*stderr_inf
        upper_bound_inf = β_infeasible+quantile(Normal(0,1),1-α/2)*stderr_inf
        lower_bound_fe = β_feasible-quantile(Normal(0,1),1-α/2)*stderr_fe
        upper_bound_fe = β_feasible+quantile(Normal(0,1),1-α/2)*stderr_fe

        table_data = ["βᵢᵥ" lower_boundᵢᵥ upper_boundᵢᵥ α lower_boundᵢᵥ<= β <= upper_boundᵢᵥ upper_boundᵢᵥ- lower_boundᵢᵥ;
                "β_infeasible" lower_bound_inf upper_bound_inf  α lower_bound_inf<= β <= upper_bound_inf -lower_bound_inf+upper_bound_inf 
                "β_feasible" lower_bound_fe upper_bound_fe   α lower_bound_fe<= β <= upper_bound_fe -lower_bound_fe+upper_bound_fe
        ]
        header=[" ","lower","upper", "α", "β in CI", "lenght of CI" ]
        pretty_table(table_data;header)
end

┌──────────────┬───────────┬──────────┬─────┬─────────┬──────────────┐
│[1m              [0m│[1m     lower [0m│[1m    upper [0m│[1m   α [0m│[1m β in CI [0m│[1m lenght of CI [0m│
├──────────────┼───────────┼──────────┼─────┼─────────┼──────────────┤
│          βᵢᵥ │ 0.0452996 │  0.30681 │ 0.1 │    true │     0.261511 │
│ β_infeasible │  0.148221 │ 0.330042 │ 0.1 │    true │     0.181821 │
│   β_feasible │  0.150875 │ 0.333008 │ 0.1 │   false │     0.182133 │
└──────────────┴───────────┴──────────┴─────┴─────────┴──────────────┘
┌──────────────┬───────────┬──────────┬──────┬─────────┬──────────────┐
│[1m              [0m│[1m     lower [0m│[1m    upper [0m│[1m    α [0m│[1m β in CI [0m│[1m lenght of CI [0m│
├──────────────┼───────────┼──────────┼──────┼─────────┼──────────────┤
│          βᵢᵥ │ 0.0202504 │  0.33186 │ 0.05 │    true │     0.311609 │
│ β_infeasible │  0.130805 │ 0.347458 │ 0.05 │    true │     0.216653 │
│   β_feasible │  0.133429 │ 0.350454 │ 0.05 │ 


g) Repeat (a)-(f) 10,000 times. 


In [148]:
R=10^4
CI_length = zeros(3,3)
cov_prob = zeros(3,3)
prob_zero_in_CI = zeros(3,3)
levels =  [0.1, 0.05, 0.01]


for r=1:R

    @unpack X, Y, Z, U = generate_data(0.9, 0.15, 100);

    βᵢᵥ, stderrᵢᵥ  = β₂ₛₗₛ(Y,X,Z,U)
    β_infeasible, stderr_inf = β₂ₛₗₛ(Y,X,g.(Z),U)
    
    D = [Z .== -0.5 Z .== -0.1 Z .== 0.1 Z .== 1 ]
    π = βₒₗₛ(X,D)
    πᵤ = βₒₗₛ((Y-X*βᵢᵥ).^2,D)
    G  = (D*π)./(D*πᵤ)
    β_feasible, stderr_fe = β₂ₛₗₛ(Y,X,G,U)

    β = 0.15
    for i in 1:3
        
        α = levels[i]
        lower_boundᵢᵥ = βᵢᵥ-quantile(Normal(0,1),1-α/2)*stderrᵢᵥ
        upper_boundᵢᵥ = βᵢᵥ+quantile(Normal(0,1),1-α/2)*stderrᵢᵥ
        lower_bound_inf = β_infeasible-quantile(Normal(0,1),1-α/2)*stderr_inf
        upper_bound_inf = β_infeasible+quantile(Normal(0,1),1-α/2)*stderr_inf
        lower_bound_fe = β_feasible-quantile(Normal(0,1),1-α/2)*stderr_fe
        upper_bound_fe = β_feasible+quantile(Normal(0,1),1-α/2)*stderr_fe

        CI_length[1,i] += upper_boundᵢᵥ-lower_boundᵢᵥ
        CI_length[2,i] += upper_bound_inf-lower_bound_inf
        CI_length[3,i] += -lower_bound_fe+upper_bound_fe

        cov_prob[1,i] += (lower_boundᵢᵥ<= β <= upper_boundᵢᵥ)
        cov_prob[2,i] += (lower_bound_inf <= β <= upper_bound_inf)
        cov_prob[3,i] += (lower_bound_fe <= β <= upper_bound_fe)

        prob_zero_in_CI[1,i] += (0 < lower_boundᵢᵥ || 0> upper_boundᵢᵥ)
        prob_zero_in_CI[2,i] += (0 < lower_bound_inf || 0>upper_bound_inf)
        prob_zero_in_CI[3,i] += (0 < lower_bound_fe || 0>upper_bound_fe)
    end
end  


        


In [149]:
for i in 1:length(levels)
        table_data = ["βᵢᵥ" 1-levels[i] CI_length[1,i]/R cov_prob[1,i]/R prob_zero_in_CI[1,i]/R;
                "β_infeasible" 1-levels[i] CI_length[2,i]/R cov_prob[2,i]/R prob_zero_in_CI[2,i]/R;
                "β_feasible" 1-levels[i] CI_length[3,i]/R cov_prob[3,i]/R prob_zero_in_CI[3,i]/R;
                ]
                header=["estimator","1-α","lenght of CI","coverage prob","prob of signicance"]
                pretty_table(table_data;header)
end

┌──────────────┬─────┬──────────────┬───────────────┬────────────────────┐
│[1m    estimator [0m│[1m 1-α [0m│[1m lenght of CI [0m│[1m coverage prob [0m│[1m prob of signicance [0m│
├──────────────┼─────┼──────────────┼───────────────┼────────────────────┤
│          βᵢᵥ │ 0.9 │     0.280456 │        0.8988 │             0.5501 │
│ β_infeasible │ 0.9 │      0.20745 │        0.8956 │             0.7943 │
│   β_feasible │ 0.9 │     0.208766 │        0.8918 │             0.7728 │
└──────────────┴─────┴──────────────┴───────────────┴────────────────────┘
┌──────────────┬──────┬──────────────┬───────────────┬────────────────────┐
│[1m    estimator [0m│[1m  1-α [0m│[1m lenght of CI [0m│[1m coverage prob [0m│[1m prob of signicance [0m│
├──────────────┼──────┼──────────────┼───────────────┼────────────────────┤
│          βᵢᵥ │ 0.95 │     0.334184 │        0.9507 │             0.4413 │
│ β_infeasible │ 0.95 │     0.247192 │        0.9494 │               0.71 │
│   β_feasible 

h) Comment on whether the simulated coverage probabilities for each of the three
methods are close to the nominal levels of 1 −α. Does the result of Question 1
seem to hold in finite samples?

##### We observe that for each of the three methods the simulated coverage probalities are pretty close to the corresponding nominal levels 1−α.

i) 
##### The result shows that the infeseable estimator is the most powerful estimator since the lenght of the corresponding confidence interval is the smallest and the significance probabity is the greatest. Also, we observe slight discrepencies between the infeasible and feasible efficient IV methods in terms of the length and coverage probability.This is due to fact the estimation of the feasible estimator involved estimates of the conditional expections rather than their exact values.

j) Repeat the Monte Carlo experiment (as described in parts (a)-(g)) using the
sample size n = 400. Organize your results again in a table as in part (g).
Compare with the results for n = 100 and comment on the differences.

In [151]:
R=10^4
CI_length = zeros(3,3)
cov_prob = zeros(3,3)
prob_zero_in_CI = zeros(3,3)
levels =  [0.1, 0.05, 0.01]

3-element Vector{Float64}:
 0.1
 0.05
 0.01

In [152]:
for r=1:R

    @unpack X, Y, Z, U = generate_data(0.9, 0.15, 400);

    βᵢᵥ, stderrᵢᵥ  = β₂ₛₗₛ(Y,X,Z,U)
    β_infeasible, stderr_inf = β₂ₛₗₛ(Y,X,g.(Z),U)
    
    D = [Z .== -0.5 Z .== -0.1 Z .== 0.1 Z .== 1 ]
    π = βₒₗₛ(X,D)
    πᵤ = βₒₗₛ((Y-X*βᵢᵥ).^2,D)
    G  = (D*π)./(D*πᵤ)
    β_feasible, stderr_fe = β₂ₛₗₛ(Y,X,G,U)

    β = 0.15
    for i in 1:3
        
        α = levels[i]
        lower_boundᵢᵥ = βᵢᵥ-quantile(Normal(0,1),1-α/2)*stderrᵢᵥ
        upper_boundᵢᵥ = βᵢᵥ+quantile(Normal(0,1),1-α/2)*stderrᵢᵥ
        lower_bound_inf = β_infeasible-quantile(Normal(0,1),1-α/2)*stderr_inf
        upper_bound_inf = β_infeasible+quantile(Normal(0,1),1-α/2)*stderr_inf
        lower_bound_fe = β_feasible-quantile(Normal(0,1),1-α/2)*stderr_fe
        upper_bound_fe = β_feasible+quantile(Normal(0,1),1-α/2)*stderr_fe

        
        CI_length[1,i] += upper_boundᵢᵥ-lower_boundᵢᵥ
        CI_length[2,i] += upper_bound_inf-lower_bound_inf
        CI_length[3,i] += -lower_bound_fe+upper_bound_fe

        cov_prob[1,i] += (lower_boundᵢᵥ<= β <= upper_boundᵢᵥ)
        cov_prob[2,i] += (lower_bound_inf <= β <= upper_bound_inf)
        cov_prob[3,i] += (lower_bound_fe <= β <= upper_bound_fe)

        prob_zero_in_CI[1,i] += (0 < lower_boundᵢᵥ || 0> upper_boundᵢᵥ)
        prob_zero_in_CI[2,i] += (0 < lower_bound_inf || 0>upper_bound_inf)
        prob_zero_in_CI[3,i] += (0 < lower_bound_fe || 0>upper_bound_fe)
    end
end  

for i in 1:length(levels)
    table_data = ["βᵢᵥ" 1-levels[i] CI_length[1,i]/R cov_prob[1,i]/R prob_zero_in_CI[1,i]/R;
            "β_infeasible" 1-levels[i] CI_length[2,i]/R cov_prob[2,i]/R prob_zero_in_CI[2,i]/R;
            "β_feasible" 1-levels[i] CI_length[3,i]/R cov_prob[3,i]/R prob_zero_in_CI[3,i]/R;
            ]
            header=["estimator","1-α","lenght of CI","coverage prob","prob of signicance"]
            pretty_table(table_data;header)
end


┌──────────────┬─────┬──────────────┬───────────────┬────────────────────┐
│[1m    estimator [0m│[1m 1-α [0m│[1m lenght of CI [0m│[1m coverage prob [0m│[1m prob of signicance [0m│
├──────────────┼─────┼──────────────┼───────────────┼────────────────────┤
│          βᵢᵥ │ 0.9 │      0.13953 │        0.8939 │             0.9578 │
│ β_infeasible │ 0.9 │     0.105496 │        0.8992 │             0.9975 │
│   β_feasible │ 0.9 │     0.105653 │        0.8988 │             0.9975 │
└──────────────┴─────┴──────────────┴───────────────┴────────────────────┘
┌──────────────┬──────┬──────────────┬───────────────┬────────────────────┐
│[1m    estimator [0m│[1m  1-α [0m│[1m lenght of CI [0m│[1m coverage prob [0m│[1m prob of signicance [0m│
├──────────────┼──────┼──────────────┼───────────────┼────────────────────┤
│          βᵢᵥ │ 0.95 │      0.16626 │        0.9508 │             0.9241 │
│ β_infeasible │ 0.95 │     0.125706 │        0.9506 │             0.9939 │
│   β_feasible 

##### We see that when the sample size becomes large, the confident interval shrinks, The probability of significance increases,  and the infeasible estimator remains the most powerful with the smallest CI.