# copy of GLS from case 1 to easier combine files later and have consistent notation

In [1]:
type ols_results
  coefs 
  yhat
  res
  vcv
  tstat
  pval
end

In [2]:
Int(floor(3.1))

3

In [3]:
# keyword arguments are placed after semicolon
function gls(y, X; corr=:none, lags=nothing)
    
    # more stable: β̂ = X \ y, see notes at bottom
    β̂ = inv(X' * X) * (X' * y)
    ŷ = X * β̂
    μ̂ = y - ŷ
    
    T, K = size(X)
    σ̂² = dot(μ̂, μ̂) / (T - K)

    #use correction for variance covariance
    if corr == :none
        vcv = σ̂² * inv(X' * X)
        
    elseif corr == :white
        # or do newey_west with lags=0
        vcv = inv(X' * X) * X' * diagm(μ̂.^2) * X * inv(X' * X)
        
    elseif corr == :newey_west
        if lags == nothing 
            lags = Int(floor(T^(1/4)))
        end
        vcv = newey_west(X, μ̂, lags)
    else
        error("")
    end

    # T statistics for H₀: β₀ = 0
    tstat = β̂ ./ sqrt(diag(vcv))
    
    # absolute value and times two for double sided test
    pval  = 2 * ccdf(TDist(T-K), abs(tstat)) 

    return ols_results(β̂, ŷ, μ̂, vcv, tstat, pval)
end

gls (generic function with 1 method)

Strict set of GM assumptions:
* X is deterministic, x is thus fixed over repeated samples
* errors $\mu$ are normally distributed with assumed homoscedastic errors

Give the small sample and asymptotic properties of the OLS estimator for $\beta$ and for the estimator of the standard errors.

Small sample properties:
* best unbiased estimator
* estimator is normally distributed (stems from the fact that $\hat{\beta}$ is linear function of disturbance vector $\mu$)
* covariance matrix $\sigma^2(X'X)^{-1}$ with an unbiased estimator of $\sigma^2$ given by

$$\hat{\sigma}^2 = \frac{\hat{\mu}'\hat{\mu}}{N-K} = \frac{y'My}{N-K}$$


Asymptotic properties:
* same under the GM conditions
* $\bar{x}_N$ assymptotically approaches $N(\mu,\frac{\sigma^2}{N})$

In [4]:
using Distributions

# Setting up a monte carlo simulation

In [None]:
β₀ = 10
β₁ = 1
β = [β₀, β₁]
σ² = 1
sample_size = 100
runs = 100000

In [None]:
#Model to generate Y
y(X,β,μ) = X * β + μ

Step 1: specify a population = N(5,2) draw sample once to have a deterministic sample, sample errors R times

In [None]:
gen_X(sample_size) = hcat(ones(sample_size), rand(Normal(5, 2), sample_size))


In [None]:
gen_μ(sample_size) = randn(sample_size,1)*sqrt(σ²)

In [None]:
a = [1,2,3]

In [None]:
zeros(a)

In [None]:
function MC_simple_OLS_numeric!(β, sample_size, runs, σ²)
    β̂̄_MC = zeros(β) #it is beta-hat-bar
    se_β̂̄_MC = zeros(β)
    X = gen_X(sample_size) #deterministic
    TrueSE = diag(σ²*inv(X'*X))
    for MC_run = 1:runs
        μ = gen_μ(sample_size)
        y_model = y(X,β,μ)
        gls_results = gls(y_model,X)
        β̂̄_MC += gls_results.coefs
        se_β̂̄_MC += diag(gls_results.vcv) #I THINK THIS GIVES THE MEAN OF THE SE OF $\hat{\beta}$
    end
    β̂̄_MC /= runs
    se_β̂̄_MC /= runs
    return β, TrueSE, β̂̄_MC, se_β̂̄_MC
end

In [None]:
MC_simple_OLS_numeric!(β, sample_size, runs, σ²)

step 2: calculate statistics and save them (ols estimator, estimated ols standard error, t-statistic)

Note that std does not use the standard formulation of standard deviation

this gives the true standard error of the total simulation

In [None]:
std_of_x(x) = norm(x - mean(x))/sqrt(length(x))

In [None]:
t_test(vec,H₀) = (mean(vec) - H₀)/std(vec)

part 2: lagged dependent variable

Introducing lagged dependent variables makes it so that the assumption "X and $\mu$ are independent" has to be relaxed to $E[\mu_t|x_t] = 0$ or thus that the errors are contemporaneously independent with any explanatory variables.

The OLS estimator becomes:
* Biased: $E[\hat{\beta}|X] = \beta + (X'X)^{-1}X'E[\mu|X]$ => $E[\hat{\beta}] = E_X(E[\hat{\beta}|X]) \neq \beta$
* Consistent and asymptotically normally distributed: $plim\hat{\beta} = \beta + plim \frac{X'X}{T}^{-1} plim\frac{X'\mu}{T}$ = 0 because $plim\frac{X'\mu}{T} = E(x_t\mu_t) = 0$
* $\hat{\sigma}^2 = \frac{\hat{\mu}'\hat{\mu}}{T-k}$ is still a consistent estimator for $\sigma^2$

In [5]:
β₀ = 10
β₁ = 0
β = [β₀, β₁]
σ² = 1
#sample_size = 100
runs = 100000

100000

In [6]:
function gen_lagged_y!(β, sample_size)
    y_t = zeros(sample_size)
    y_t_min_1 = zeros(sample_size)
    β₀, β₁ = β
    y₀ = rand(Normal(β₀/(1-β₁), sqrt(σ²/(1-β₁^2)))) #keep fixed throughout sample sizes and drop from results
    μ = randn()*sqrt(σ²)
    y_t_min_1[1] = y₀
    y_t[1] = β₀ + β₁*y₀ + μ
    for t = 2:length(y_t)
        y_t_min_1[t] = y_t[t-1]
        μ = randn()*sqrt(σ²)
        y_t[t] = β₀ + β₁*y_t[t-1] + μ
    end
    return y_t, y_t_min_1
end

gen_lagged_y! (generic function with 1 method)

In [None]:
gen_lagged_y!(β, sample_size)

In [7]:
function MC_LDV_OLS_numeric!(β, sample_size, runs, σ²)
    β̂̄_MC = zeros(β) #it is beta-hat-bar
    se_β̂̄_MC = zeros(β)
    TrueSE = zeros(β)
    for MC_run = 1:runs
        y_t, y_t_min_1 = gen_lagged_y!(β, sample_size)
        X = hcat(ones(sample_size), y_t_min_1)
        TrueSE += diag(σ²*inv(X'*X))
        gls_results = gls(y_t,X)
        β̂̄_MC += gls_results.coefs
        se_β̂̄_MC += diag(gls_results.vcv) #I THINK THIS GIVES THE MEAN OF THE SE OF $\hat{\beta}$
    end
    β̂̄_MC /= runs
    se_β̂̄_MC /= runs
    TrueSE /= runs
    return β, TrueSE, β̂̄_MC, se_β̂̄_MC
end

MC_LDV_OLS_numeric! (generic function with 1 method)

In [9]:
results = MC_LDV_OLS_numeric!(β, 1000, runs, σ²)

([10,0],[0.101315,0.00100316],[10.0094,-0.000951111],[0.101099,0.001001])

In [None]:
bias = zeros(length(sample_sizes),length(β))
for i = 1:length(sample_sizes)
    results = MC_LDV_OLS_numeric!(β, sample_sizes[i], runs, σ²)
    bias[i,:] = (results[3] - results[1])'
end

In [10]:
function find_bias!(β, runs, σ²)
    sample_sizes = vcat(collect(10:10:100), collect(200:100:1000), collect(2000:1000:10000))
    bias = zeros(length(sample_sizes),length(β))
    for i = 1:length(sample_sizes)
        results = MC_LDV_OLS_numeric!(β, sample_sizes[i], runs, σ²)
        bias[i,:] = (results[3] - results[1])'
    end
    return sample_sizes, bias
end

find_bias! (generic function with 1 method)

In [11]:
sample_sizes, bias = find_bias!(β, runs, σ²)

LoadError: InterruptException:

how can we use the MC simulation results to correct the bias of the OLS estimator for $\beta_1$?

* repeatedly save abs(true parameter - simulated parameter) to find the mean and distribution of the bias

In [None]:
using Plots
gr()

In [None]:
plot(randn(10))