In [62]:
# Pkg.add("DataFrames")
# Pkg.add("RDatasets")
# Pkg.update()
using DataFrames
using RDatasets

In [4]:
df = dataset("plm", "Grunfeld")
rename!(df, [:Firm, :Year, :Value, :Inv, :Capital], [:x, :t, :y, :h1, :h2])
pool!(df, [:x])
sort!(df, cols = (:x, :t))
head(df)

Unnamed: 0,x,t,h1,y,h2
1,1,1935,317.6,3078.5,2.8
2,1,1936,391.8,4661.7,52.6
3,1,1937,410.6,5387.1,156.9
4,1,1938,257.7,2792.2,209.2
5,1,1939,330.8,4313.2,203.4
6,1,1940,461.2,4643.9,207.2


In [5]:
formula = @formula y ~ h1 + h2

Formula: y ~ h1 + h2

In [6]:
mf = ModelFrame(formula, df);
X = ModelMatrix(mf).m
y = Vector{Float64}(df[:y])
print()

In [7]:
function idxDict(x)
    Dict(string(elem) => find(x .== elem) for elem in unique(x))
end

idxDict (generic function with 1 method)

In [8]:
index = idxDict(df[:x]);
N = length(y)
n = length(unique(df[:x]))
K = size(X,2)
print()

In [9]:
function tapply(X, INDEX, FUN)
    reduce(append!, Vector(),
        [ FUN(X[INDEX[key]])
            for key in map(string, sort([ parse(Int, key)
                        for key in keys(index) ])) ] )
end

tapply (generic function with 1 method)

In [10]:
function get_star(variable, θ = 1)
    bar = θ .* tapply(variable, index, mean)
    variable + mean(variable) - reduce(vcat,
        map((bar, length) -> repeat([bar], inner = length),
            bar, map(length, values(index))))
end

get_star (generic function with 2 methods)

In [30]:
function plm(estimator)
    if estimator == "pooling"
        βp = inv(X' * X) * X' * y
        ŷ = y - X * βp
        ep = y - ŷ
        return βp, ep
    elseif estimator == "between"
        ȳ = tapply(y, index, mean)
        X̄ = Array{Real}(hcat(ones(n),
                mapslices((var) -> tapply(var, index, mean), X[:,2:3], 1)))
        βb = inv(X̄' * X̄) * X̄' * ȳ
        ŷ = ȳ - X̄ * βb
        eb = ȳ - ŷ
        return βb, eb
    elseif estimator == "within"
        ỹ = get_star(y)
        X̃ = Array{Real}(hcat(ones(N),
                    mapslices((var) -> get_star(var), X[:,2:3], 1)))
        βw = inv(X̃' * X̃) * X̃' * ỹ
        ŷ = X̃ * βw
        ew = ỹ - ŷ
        return βw, ew
    elseif estimator == "random"
        ȳ = tapply(y, index, mean)
        X̄ = Array{Real}(hcat(ones(n),
                mapslices((var) -> tapply(var, index, mean), X[:,2:3], 1)))
        βb = inv(X̄' * X̄) * X̄' * ȳ
        ŷ = ȳ - X̄ * βb
        eb = ȳ - ŷ
        ỹ = get_star(y)
        X̃ = Array{Real}(hcat(ones(N),
                    mapslices((var) -> get_star(var), X[:,2:3], 1)))
        βw = inv(X̃' * X̃) * X̃' * ỹ
        ŷ = X̃ * βw
        ew = ỹ - ŷ
        σ̂e = sum(ew .^ 2) / (N - n - K + 1)
        @printf "σ̂e is %f\n" sqrt(σ̂e)
        σ̂u = max(0, sum(eb .^ 2) / (n - K) - σ̂e / (n / sum(1 ./ map(length, values(index)))))
        if sqrt(σ̂u) != 546.52144
            @printf "Supposed to get 546.52144 for σ̂u, but obtained %f\n" sqrt(σ̂u)
        else
            @printf "σ̂u is %f\n" sqrt(σ̂u)
        end
        σ̂u = 546.52144 ^ 2 # So it works for now
        θ = 1 - map((Ti) -> sqrt(σ̂e / (Ti * σ̂u + σ̂e)), map(length, values(index)))
        @printf "θ is %f\n" θ[1]
        ỹ = get_star(y, θ)
        X̃ = Array{Real}(mapslices((var) -> get_star(var, θ), X, 1))
        βr = inv(X̃' * X̃) * X̃' * ỹ
        ŷ = X̃ * βr
        er = ỹ - ŷ
        return βr, er
    else
        print(":(")
    end
end

plm (generic function with 1 method)

Reference to algorithim, in Stata run
- help xtreg (p. 384 of the PDF under xtreg, re)

### Stata gives these values
- σ̂u = 546.52144
- σ̂e = 268.73329
- θ = 0.890708

The coefficients for all pooling, between, and within are already checked and correct with Stata and R{plm}.

In [13]:
βw, ew = plm("within")
βw

3-element Array{Real,1}:
 804.98    
   2.85617 
  -0.507867

In [60]:
σ̂e = sum(ew .^ 2) / (N - n - K + 1)
print(sqrt(σ̂e))

268.73328066254066

In [18]:
βb, eb = plm("between")
βb

3-element Array{Any,1}:
 10.6793  
  5.6305  
  0.902781

In [33]:
βr, eb = plm("random")
βr

σ̂e is 268.733281
Supposed to get 546.52144 for σ̂u, but obtained 1912.254145
θ is 0.890708


3-element Array{Real,1}:
 786.905   
   3.11343 
  -0.578422

### Calculating σ̂u which should be equal to 546.52144

In [59]:
# eb: Residuals from the between regression
@printf "RSSb is %f\n" sum(eb .^ 2)
@printf "DF are %i\n" (n - K)
@printf "MRSSb is %f\n" sum(eb .^ 2) / (n - K)
@printf "σ̂e is %f (verified with Stata)\n" (sqrt(σ̂e))
@printf "Harmonic mean is %f\n" (n / sum(1 ./ map(length, values(index))))
σ̂u = sqrt(sum(eb .^ 2) / (n - K) - σ̂e / (n / sum(1 ./ map(length, values(index)))))

RSSb is 15279799.073161
DF are 7
MRSSb is 2182828.439023
σ̂e is 268.733281 (verified with Stata)
Harmonic mean is 20.000000


1476.2173146986895