# Empirical Asset Pricing - Final Exam
This notebook contains data preparation procedures, coded up functions, function calls and results.

# Task 1

In [1]:
# load data
using CSV, DataFrames
main = CSV.read("./Data/MainVar.csv", header = true, allowmissing = :none)
main[:Year] = round.(Int64, main[:Date])
main[:Quarter] = round.(Int64, (main[:Date] .% 1) * 10)
delete!(main, :Date)

equity = CSV.read("./Data/Equity.csv", header = true, allowmissing = :none)

sizebm = CSV.read("./Data/SizeBM.csv", header = true, allowmissing = :none)
sizebm[:Month] = round.(Int64, (sizebm[:Year] .% 100))
sizebm[:Quarter] = Dates.quarterofyear.(Date.(sizebm[:Year], sizebm[:Month], 1))
sizebm[:Year] =  floor.(Int64, sizebm[:Year] ./ 100)
delete!(sizebm, :Month)

sizeinv = CSV.read("./Data/SizeInv.csv", header = true, allowmissing = :none)
sizeinv[:Month] = round.(Int64, (sizeinv[:Year] .% 100))
sizeinv[:Quarter] = Dates.quarterofyear.(Date.(sizeinv[:Year], sizeinv[:Month], 1))
sizeinv[:Year] =  floor.(Int64, sizeinv[:Year] ./ 100)
delete!(sizeinv, :Month)

sizeop = CSV.read("./Data/SizeOp.csv", header = true, allowmissing = :none)
sizeop[:Month] = round.(Int64, (sizeop[:Year] .% 100))
sizeop[:Quarter] = Dates.quarterofyear.(Date.(sizeop[:Year], sizeop[:Month], 1))
sizeop[:Year] =  floor.(Int64, sizeop[:Year] ./ 100)
delete!(sizeop, :Month)

rev = CSV.read("./Data/REV.csv", header = true, allowmissing = :none)
rev[:Month] = round.(Int64, (rev[:Year] .% 100))
rev[:Quarter] = Dates.quarterofyear.(Date.(rev[:Year], rev[:Month], 1))
rev[:Year] =  floor.(Int64, rev[:Year] ./ 100)
delete!(rev, :Month);

## 1

In [2]:
function aggregate_to_quarter(df)
    out = DataFrame()
    for col in eachcol(df[:, 3:size(df, 2)-1])
        out[col[1]] = prod(1 .+ col[2] ./ 100)
    end
    out[:RF] = prod(1 .+ df[:RF])
    out
end

sizebm = by(aggregate_to_quarter, sizebm, [:Year, :Quarter])
sizeinv = by(aggregate_to_quarter, sizeinv, [:Year, :Quarter])
sizeop = by(aggregate_to_quarter, sizeop, [:Year, :Quarter])
rev = by(aggregate_to_quarter, rev, [:Year, :Quarter]);

In [3]:
head(sizebm)

Unnamed: 0,Year,Quarter,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,RF
1,1963,3,1.01856,1.02661,1.00777,1.00643,1.03059,0.993746,1.03306,1.02821,1.01191,1.0383,0.986123,1.01103,1.04051,1.02303,1.01378,1.01815,1.01091,1.01845,1.01977,1.01333,1.04467,1.03893,1.05019,1.07544,1.01057,1.00804
2,1963,4,0.953204,0.945543,0.987236,0.973161,1.00252,0.963009,1.01532,1.02782,1.03159,1.0394,0.998697,1.00069,1.03312,1.02779,1.00316,0.986841,1.01333,1.03437,1.09897,1.10578,1.06728,1.0509,1.00433,1.02648,1.04271,1.00866
3,1964,1,1.07889,1.0709,1.08997,1.08338,1.12548,1.02106,1.04354,1.10034,1.09421,1.17389,1.00176,1.05888,1.09083,1.13735,1.14606,1.03987,1.06053,1.09959,1.19078,1.08882,1.06184,1.04453,1.06757,1.10315,1.05025,1.00883
4,1964,2,0.987847,1.02019,1.01339,1.00211,1.01653,1.03247,0.978928,1.05245,1.01456,1.05333,0.983484,1.02671,1.0229,1.0358,1.02713,0.995204,1.04593,1.03992,1.03391,1.0489,1.03297,1.04778,1.02254,1.04591,1.08973,1.00863
5,1964,3,1.06921,1.06002,1.07426,1.04472,1.08344,1.02765,1.01376,1.05674,1.05509,1.05331,1.01711,1.03785,1.06964,1.06403,1.02935,1.03213,1.07067,1.07018,1.06685,1.07769,1.04416,1.00309,1.08489,1.05547,1.04201,1.00829
6,1964,4,1.00004,0.985591,1.01021,1.00279,1.01986,0.992293,1.01727,1.02918,0.984766,0.986301,1.04998,1.04963,1.01333,0.999632,1.01948,1.02816,1.0279,0.990217,1.01351,0.978491,1.02215,1.02087,1.00898,0.978564,0.977016,1.00898


## 2

Let's lay down a timing convention: An excess return between $t+1$ and $t+2$, called $R_{t,t+1}^e$, simply is the $R_{t+1}^e$ that has been calculated above.

<b>Mind: This means that $R_{t,t+1}^e$ is not the excess return between $t$ and $t+1$!</b>

In [4]:
function H_horizon(data; H=1)
    vcat([prod(1 .+ data[i+1:i+H]) .- 1 for i in 1:length(data)-H]...)
end

H_horizon (generic function with 1 method)

## KS

In [5]:
H = 8
df = sizebm

# first step
R_excess = (vcat([prod(Matrix(df[i:i+H-1, 3:end-1]), 1) for i in 1:size(df, 1)-H+1]...) .- 
    [prod(df[i:i+H-1, :RF]) for i in 1:(size(df, 1)-H+1)])
KS = (1 .- main[Symbol("Labor Share")]/100)

X = hcat(ones(length(KS) - H), KS[1+H:end] ./ KS[1:end-H])
Y = R_excess[2:end, :]
β = ((X'X) \ X'Y)
ϵ = Y - X * β
β = (β[2:end, :])'
Σf = cov(X[:, 2:end])
Σ = cov(ϵ)

# second step
X = hcat(ones(size(β)), β)
Y = mean((Matrix(df[:, 3:end-1]) .- df[:RF]), 1)[:]

λ = ((X'X) \ X'Y)
Rsq = 1 - sum((Y - X * λ).^2) / sum((Y .- mean(Y)).^2)
rmse = sqrt(mean((Y - X * λ).^2))
rmsr = sqrt(mean(Y.^2))
rmse_rmsr = rmse / rmsr
BIC = length(Y)*log(mean((Y - X * λ).^2)) + 2*log(length(Y))

# see Cochrane - Asset Pricing p.245
Y = (Matrix(df[:, 3:end-1]) .- df[:RF])'
λ_t = ((X'X) \ X'Y)

σ_FM = 1/size(λ_t, 2) * sqrt.(diag((λ_t .- mean(λ_t, 2)) * (λ_t .- mean(λ_t, 2))'))
t_FM = λ ./ σ_FM

X = hcat(ones(size(β)), β)

σ_Sh = sqrt.(diag(1/size(X, 1)^2 * ((X'X)^-1 * (X' * Σ * X) * (X'X)^-1 .* (1 + (λ[2:end])' * Σf^-1 * λ[2:end]) + vcat(zeros(size(Σf, 1) + 1)', hcat(zeros(size(Σf, 1)), Σf)))))
t_Sh = λ ./ σ_Sh #vcat(t_FM[1], λ[2:end] ./ σ_Sh)

(λ .* 100, t_FM, t_Sh, Rsq, rmse_rmsr, BIC)

([1.55118, 0.668371], [2.19806, 4.45263], [1.35629, 1.7812], 0.8044096951215918, 0.12310904755574151, -283.2961662422493)

In [6]:
H = 8
df = equity

# first step
R_excess = (vcat([prod( 1.+ Matrix(df[i:i+H-1, 3:end-1]), 1) for i in 1:size(df, 1)-H+1]...) .- 
    [prod(1 .+ df[i:i+H-1, :RF]) for i in 1:(size(df, 1)-H+1)])
KS = (1 .- main[Symbol("Labor Share")]/100)

X = hcat(ones(length(KS) - H), KS[1+H:end] ./ KS[1:end-H])
Y = R_excess[2:end, :]
β = (X'X) \ (X'Y)
ϵ = Y - X * β
β = (β[2:end, :])'
Σf = cov(X[:, 2:end])
Σ = cov(ϵ)

# second step
X = hcat(ones(size(β)), β)
Y = mean((Matrix(df[:, 3:end-1]) .- df[:RF]), 1)[:]

λ = (X'X) \ (X'Y)
Rsq = 1 - sum((Y - X * λ).^2) / sum((Y .- mean(Y)).^2)
rmse = sqrt(mean((Y - X * λ).^2))
rmsr = sqrt(mean(Y.^2))
rmse_rmsr = rmse / rmsr
BIC = length(Y)*log(mean((Y - X * λ).^2)) + 2*log(length(Y))

# see Cochrane - Asset Pricing p.245
Y = (Matrix(df[:, 3:end-1]) .- df[:RF])'
λ_t = (X'X) \ (X'Y)

σ_FM = 1/size(λ_t, 2) * sqrt.(diag((λ_t .- mean(λ_t, 2)) * (λ_t .- mean(λ_t, 2))'))
t_FM = λ ./ σ_FM

X = hcat(ones(size(β)), β)

σ_Sh = sqrt.(diag(1/size(X, 1)^2 * ((X'X)^-1 * (X' * Σ * X) * (X'X)^-1 .* (1 + (λ[2:end])' * Σf^-1 * λ[2:end]) + vcat(zeros(size(Σf, 1) + 1)', hcat(zeros(size(Σf, 1)), Σf)))))
t_Sh = λ ./ σ_Sh #vcat(t_FM[1], λ[2:end] ./ σ_Sh)

(λ .* 100, t_FM, t_Sh, Rsq, rmse_rmsr, BIC)

([1.64497, 0.567932], [2.51399, 3.85548], [5.11579, 5.06383], 0.7354921071086783, 0.13525295312014526, -966.5266630056278)

## FF Factors

In [53]:
H = 8
df = sizebm

# first step
R_excess = (vcat([prod(Matrix(df[i:i+H-1, 3:end-1]), 1) for i in 1:size(df, 1)-H+1]...) .- 
    [prod(df[i:i+H-1, :RF]) for i in 1:(size(df, 1)-H+1)])

factors = vcat([prod(1 .+ Matrix(main[i:i+H-1, 3:5]) ./ 100, 1) 
            for i in 1:size(main, 1)-H+1]...)

#factors = vcat([prod(Matrix(main[i:i+H-1, 3:5]) ./ 100 .+ df[i:i+H-1, :RF], 1) 
#            for i in 1:size(main, 1)-H+1]...) .- 
#    [prod(df[i:i+H-1, :RF]) for i in 1:(size(df, 1)-H+1)]

#factors = hcat(vcat([prod(Matrix(main[i:i+H-1, 3:3]) ./ 100 .+ df[i:i+H-1, :RF], 1) 
#            for i in 1:size(main, 1)-H+1]...) .- 
#    [prod(df[i:i+H-1, :RF]) for i in 1:(size(df, 1)-H+1)],
#        vcat([prod(1 .+ Matrix(main[i:i+H-1, 4:5]) ./ 100, 1) 
#            for i in 1:size(main, 1)-H+1]...) .- 1)

X = hcat(ones(size(factors, 1)), factors[1:end, :])
Y = R_excess[1:end, :]
β = (X'X) \ (X'Y)
ϵ = Y - X * β
β = (β[2:end, :])'
Σf = cov(X[:, 2:end])
Σ = cov(ϵ)

# second step
X = hcat(ones(size(β, 1)), β)
Y = mean((Matrix(df[:, 3:end-1]) .- df[:RF]), 1)[:]
λ = (X'X) \ (X'Y)

Rsq = 1 - sum((Y - X * λ).^2) / sum((Y .- mean(Y)).^2)
rmse = sqrt(mean((Y - X * λ).^2))
rmsr = sqrt(mean(Y.^2))
rmse_rmsr = rmse / rmsr
BIC = length(Y)*log(mean((Y - X * λ).^2)) + 2*log(length(Y))

# see Cochrane - Asset Pricing p.245
Y = (Matrix(df[:, 3:end-1]) .- df[:RF])'
λ_t = (X'X) \ (X'Y)

σ_FM = 1/size(λ_t, 2) * sqrt.(diag((λ_t .- mean(λ_t, 2)) * (λ_t .- mean(λ_t, 2))'))
t_FM = λ ./ σ_FM

σ_Sh = sqrt.(diag(1/size(X, 1)^2 * ((X'X)^-1 * (X' * Σ * X) * (X'X)^-1 .* (1 + (λ[2:end])' * Σf^-1 * λ[2:end]) + vcat(zeros(size(Σf, 1) + 1)', hcat(zeros(size(Σf, 1)), Σf)))))
t_Sh = λ ./ σ_Sh #vcat(t_FM[1], λ[2:end] ./ σ_Sh)

(λ .* 100, t_FM, t_Sh, Rsq, rmse_rmsr, BIC)

([1.51262, 0.110869, 0.672911, 1.30751], [1.97411, 0.133149, 1.9806, 3.0825], [0.89843, 0.0608945, 0.79601, 1.4847], 0.7517431593302969, 0.13869695344352376, -277.33512454718806)

In [51]:
H = 8
df = equity

# first step
R_excess = (vcat([prod( 1.+ Matrix(df[i:i+H-1, 3:end-1]), 1) for i in 1:size(df, 1)-H+1]...) .- 
    [prod(1 .+ df[i:i+H-1, :RF]) for i in 1:(size(df, 1)-H+1)])

factors = vcat([prod(1 .+ Matrix(main[i:i+H-1, 3:5]) ./ 100 .+ df[i:i+H-1, :RF], 1) 
            for i in 1:size(main, 1)-H+1]...) .- 
    [prod(1 .+ df[i:i+H-1, :RF]) for i in 1:(size(df, 1)-H+1)]

X = hcat(ones(size(factors, 1)), factors[1:end, :])
Y = R_excess[1:end, :]
β = (X'X) \ (X'Y)
ϵ = Y - X * β
β = (β[2:end, :])'
Σf = cov(X[:, 2:end])
Σ = cov(ϵ)

# second step
X = hcat(ones(size(β, 1)), β)
Y = mean((Matrix(df[:, 3:end-1]) .- df[:RF]), 1)[:]
λ = (X'X) \ (X'Y)

Rsq = 1 - sum((Y - X * λ).^2) / sum((Y .- mean(Y)).^2)
rmse = sqrt(mean((Y - X * λ).^2))
rmsr = sqrt(mean(Y.^2))
rmse_rmsr = rmse / rmsr
BIC = length(Y)*log(mean((Y - X * λ).^2)) + 2*log(length(Y))

# see Cochrane - Asset Pricing p.245
Y = (Matrix(df[:, 3:end-1]) .- df[:RF])'
λ_t = (X'X) \ (X'Y)

σ_FM = 1/size(λ_t, 2) * sqrt.(diag((λ_t .- mean(λ_t, 2)) * (λ_t .- mean(λ_t, 2))'))
t_FM = λ ./ σ_FM

σ_Sh = sqrt.(diag(1/size(X, 1)^2 * ((X'X)^-1 * (X' * Σ * X) * (X'X)^-1 .* (1 + (λ[2:end])' * Σf^-1 * λ[2:end]) + vcat(zeros(size(Σf, 1) + 1)', hcat(zeros(size(Σf, 1)), Σf)))))
t_Sh = λ ./ σ_Sh #vcat(t_FM[1], λ[2:end] ./ σ_Sh)

(λ .* 100, t_FM, t_Sh, Rsq, rmse_rmsr, BIC)

([1.5624, 0.140652, 0.669882, 1.43504], [2.67289, 0.200131, 1.77963, 3.23835], [4.60893, 0.313006, 2.47639, 5.08237], 0.7183324488401472, 0.13957121121041782, -961.1838700898406)

## 3 Step Procedure

In [79]:
H = 8
df = sizebm

# first step
R_excess = (vcat([prod(Matrix(df[i:i+H-1, 3:end-1]), 1) for i in 1:size(df, 1)-H+1]...) .- 
    [prod(df[i:i+H-1, :RF]) for i in 1:(size(df, 1)-H+1)])
KS = (1 .- main[Symbol("Labor Share")]/100)
factors = vcat([prod(1 .+ Matrix(main[i:i+H-1, 3:5]) ./ 100, 1) 
            for i in 1:size(main, 1)-H+1]...)

X = hcat(ones(length(KS) - H), KS[1+H:end] ./ KS[1:end-H])
Y = R_excess[2:end, :]
β = ((X'X) \ X'Y)
ϵ = Y - X * β
βKS = (β[2:end, :])'
Σf = cov(hcat(KS[1+H:end] ./ KS[1:end-H], factors[2:end, :]))
Σ = cov(ϵ)

# second step
factors = 1 .+ Matrix(main[:, 3:5]) ./ 100
X = hcat(ones(size(factors, 1)), factors[1:end, 1:1])
Y = (Matrix(df[:, 3:end-1]) .- df[:RF])
βRm = (((X'X) \ (X'Y))')[:, 2]

X = hcat(ones(size(factors, 1)), factors[1:end, 2:2])
Y = (Matrix(df[:, 3:end-1]) .- df[:RF])
βSMB = (((X'X) \ (X'Y))')[:, 2]

X = hcat(ones(size(factors, 1)), factors[1:end, 3:3])
Y = (Matrix(df[:, 3:end-1]) .- df[:RF])
βHML = (((X'X) \ (X'Y))')[:, 2]

# thrid step
X = hcat(ones(length(βKS)), βKS, βRm, βSMB, βHML)
Y = mean((Matrix(df[:, 3:end-1]) .- df[:RF]), 1)[:]
λ = (X'X) \ (X'Y)

Rsq = 1 - sum((Y - X * λ).^2) / sum((Y .- mean(Y)).^2)
rmse = sqrt(mean((Y - X * λ).^2))
rmsr = sqrt(mean(Y.^2))
rmse_rmsr = rmse / rmsr
BIC = length(Y)*log(mean((Y - X * λ).^2)) + 2*log(length(Y))

# see Cochrane - Asset Pricing p.245
Y = (Matrix(df[:, 3:end-1]) .- df[:RF])'
λ_t = (X'X) \ (X'Y)

σ_FM = 1/size(λ_t, 2) * sqrt.(diag((λ_t .- mean(λ_t, 2)) * (λ_t .- mean(λ_t, 2))'))
t_FM = λ ./ σ_FM

σ_Sh = sqrt.(diag(1/size(X, 1)^2 * ((X'X)^-1 * (X' * Σ * X) * (X'X)^-1 .* (1 + (λ[2:end])' * Σf^-1 * λ[2:end]) + vcat(zeros(size(Σf, 1) + 1)', hcat(zeros(size(Σf, 1)), Σf)))))
t_Sh = λ ./ σ_Sh #vcat(t_FM[1], λ[2:end] ./ σ_Sh)

(λ .* 100, t_FM, t_Sh, Rsq, rmse_rmsr, BIC)

([3.64925, 0.493244, -3.04884, 1.21527, -0.191451], [3.82949, 3.91353, -2.02811, 1.86134, -0.309719], [1.41848, 1.69351, -0.794167, 0.625215, -0.13324], 0.8641767852653425, 0.10258959426634229, -292.41286728784615)

In [82]:
H = 8
df = equity

# first step
R_excess = (vcat([prod(1 .+ Matrix(df[i:i+H-1, 3:end-1]), 1) for i in 1:size(df, 1)-H+1]...) .- 
    [prod(1 .+ df[i:i+H-1, :RF]) for i in 1:(size(df, 1)-H+1)])
KS = (1 .- main[Symbol("Labor Share")]/100)
factors = vcat([prod(1 .+ Matrix(main[i:i+H-1, 3:5]) ./ 100, 1) 
            for i in 1:size(main, 1)-H+1]...)

X = hcat(ones(length(KS) - H), KS[1+H:end] ./ KS[1:end-H])
Y = R_excess[2:end, :]
β = ((X'X) \ X'Y)
ϵ = Y - X * β
βKS = (β[2:end, :])'
Σf = cov(hcat(KS[1+H:end] ./ KS[1:end-H], factors[2:end, :]))
Σ = cov(ϵ)

# second step
factors = 1 .+ Matrix(main[:, 3:5]) ./ 100
X = hcat(ones(size(factors, 1)), factors[1:end, 1:1])
Y = (Matrix(df[:, 3:end-1]) .- df[:RF])
βRm = (((X'X) \ (X'Y))')[:, 2]

X = hcat(ones(size(factors, 1)), factors[1:end, 2:2])
Y = (Matrix(df[:, 3:end-1]) .- df[:RF])
βSMB = (((X'X) \ (X'Y))')[:, 2]

X = hcat(ones(size(factors, 1)), factors[1:end, 3:3])
Y = (Matrix(df[:, 3:end-1]) .- df[:RF])
βHML = (((X'X) \ (X'Y))')[:, 2]

# thrid step
X = hcat(ones(length(βKS)), βKS, βRm, βSMB, βHML)
Y = mean((Matrix(df[:, 3:end-1]) .- df[:RF]), 1)[:]
λ = (X'X) \ (X'Y)

Rsq = 1 - sum((Y - X * λ).^2) / sum((Y .- mean(Y)).^2)
rmse = sqrt(mean((Y - X * λ).^2))
rmsr = sqrt(mean(Y.^2))
rmse_rmsr = rmse / rmsr
BIC = length(Y)*log(mean((Y - X * λ).^2)) + 2*log(length(Y))

# see Cochrane - Asset Pricing p.245
Y = (Matrix(df[:, 3:end-1]) .- df[:RF])'
λ_t = (X'X) \ (X'Y)

σ_FM = 1/size(λ_t, 2) * sqrt.(diag((λ_t .- mean(λ_t, 2)) * (λ_t .- mean(λ_t, 2))'))
t_FM = λ ./ σ_FM

σ_Sh = sqrt.(diag(1/size(X, 1)^2 * ((X'X)^-1 * (X' * Σ * X) * (X'X)^-1 .* (1 + (λ[2:end])' * Σf^-1 * λ[2:end]) + vcat(zeros(size(Σf, 1) + 1)', hcat(zeros(size(Σf, 1)), Σf)))))
t_Sh = λ ./ σ_Sh #vcat(t_FM[1], λ[2:end] ./ σ_Sh)

(λ .* 100, t_FM, t_Sh, Rsq, rmse_rmsr, BIC)

([2.92132, 0.38244, -1.84979, 0.849647, 0.143248], [3.2466, 3.40756, -1.30199, 1.36758, 0.223209], [4.53889, 4.52553, -1.8328, 1.54096, 0.34198], 0.7864557391575807, 0.12152661194507974, -984.7189554328453)