# Empirical Asset Pricing - PS3

Maximilian Huber

## Task 1
Let me load the data into an array of DataFrames:

In [112]:
using CSV, DataFrames, Distributions, Plots, GLM; gr();

In [113]:
function print_table(table; skipcol=0)
    if skipcol == 0
        table_rounded = DataFrame(round.(Matrix(table), 3))
    elseif skipcol == 1
        table_rounded = DataFrame(hcat(Vector(table[:, skipcol]), round.(Matrix(table[:, skipcol+1:end]), 3)))
    else
        table_rounded = DataFrame(hcat(Matrix(table[:, 1:skipcol]), round.(Matrix(table[:, skipcol+1:end]), 3)))
    end
    names!(table_rounded, names(table))
end

print_table (generic function with 1 method)

In [114]:
files = ["Global", "Europe", "Japan", "Asia_Pacific_ex_Japan", "North_America"]
abbrev = ['G', 'E', 'J', 'A', 'N']

data = [CSV.read("./Data/" * file * "_5_Factors.csv", delim=',', 
        types=[Date, Float64, Float64, Float64, Float64, Float64, Float64], 
        dateformat = DateFormat("yyyymm"), nullable=false) for file in files];

In [115]:
factor_names = [:MKT, :SMB, :HML, :RMW, :CMA, :RF]
data = DataFrame()
input = DataFrame()

for (i, file) in enumerate(files)
    
    input = CSV.read("./Data/" * file * "_5_Factors.csv", delim=',', 
        types=[Date, Float64, Float64, Float64, Float64, Float64, Float64], 
        dateformat = DateFormat("yyyymm"), nullable=false)
    
    names!(input, vcat(:Date, Symbol.(abbrev[i] .* '_' .* string.(factor_names))))
    
    data = hcat(data, input[:, 2:end])
end

data = hcat(input[[:Date]], data)
head(data)

Unnamed: 0,Date,G_MKT,G_SMB,G_HML,G_RMW,G_CMA,G_RF,E_MKT,E_SMB,E_HML,E_RMW,E_CMA,E_RF,J_MKT,J_SMB,J_HML,J_RMW,J_CMA,J_RF,A_MKT,A_SMB,A_HML,A_RMW,A_CMA,A_RF,N_MKT,N_SMB,N_HML,N_RMW,N_CMA,N_RF
1,1990-07-01,0.86,0.82,-0.25,0.17,1.56,0.68,4.52,0.41,-1.43,0.22,1.21,0.68,0.1,6.32,3.69,1.06,0.24,0.68,4.2,-2.93,-1.36,1.42,0.9,0.68,-1.51,-2.5,-0.9,0.53,2.59,0.68
2,1990-08-01,-10.82,-1.57,0.6,-0.22,0.99,0.66,-11.03,0.02,0.25,-1.06,1.46,0.66,-11.88,-5.0,0.26,1.28,-0.96,0.66,-8.68,3.76,1.71,1.12,0.67,0.66,-9.63,-2.56,0.49,-2.01,3.28,0.66
3,1990-09-01,-11.97,1.16,0.8,0.03,2.12,0.6,-12.28,1.71,0.84,-0.28,1.72,0.6,-17.38,0.67,-0.11,-1.29,-0.11,0.6,-8.8,3.7,-0.14,0.87,4.17,0.6,-6.02,-2.73,-0.13,1.28,4.23,0.6
4,1990-10-01,9.56,-7.58,-4.24,2.6,1.22,0.68,6.49,-2.61,-0.67,1.06,-0.79,0.68,24.9,0.8,-3.87,0.39,4.75,0.68,-1.95,-4.76,-1.52,0.17,-2.78,0.68,-2.0,-4.62,-1.67,4.18,0.87,0.68
5,1990-11-01,-3.86,1.37,1.14,1.47,-2.35,0.57,-0.43,-2.74,0.87,0.13,-0.47,0.57,-14.12,-5.34,-0.18,3.05,-2.18,0.57,-2.98,-1.59,-0.82,3.04,0.56,0.57,5.9,0.01,-1.42,0.26,-4.57,0.57
6,1990-12-01,1.1,-0.95,-1.6,1.17,-0.33,0.6,-1.55,0.93,0.0,0.92,0.27,0.6,1.93,-6.16,-3.65,0.85,1.87,0.6,-1.1,-2.77,-1.25,-0.19,-1.49,0.6,2.54,1.58,-0.92,1.24,-2.77,0.6


### (a)
The factors from Kenneth French's [website](http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html) are excess returns, as described [here](http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/Data_Library/f-f_factors.html).

In [116]:
data_excess = copy(data);

#### (i)
Now I regress the returns of the 20 test assets on the five factor model:
$$R_{i,t}^{e}=\alpha_i + \beta'_i F_t + \epsilon_{i,t}$$

In [117]:
test_assets = Symbol.([region * '_' * string(factor) for factor in factor_names[1:end-1], region in abbrev[2:end]][:])

function run_regression(field)
    formula = @formula($(field) ~ G_MKT + G_SMB + G_HML + G_RMW + G_CMA)
    model = lm(formula, data_excess)
    return DataFrame(hcat(field, coef(model)[1], stderr(model)[1], coef(model)[1]/stderr(model)[1], model))
end

table = vcat(run_regression.(test_assets)...)
names!(table, [:test_asset, :α, :stderr_α, :t_α, :model])
print_table(table[[:test_asset, :α, :stderr_α, :t_α]], skipcol=1)

Unnamed: 0,test_asset,α,stderr_α,t_α
1,E_MKT,-0.162,0.11,-1.469
2,E_SMB,-0.027,0.091,-0.298
3,E_HML,0.055,0.078,0.706
4,E_RMW,0.229,0.066,3.47
5,E_CMA,0.004,0.067,0.067
6,J_MKT,-0.479,0.242,-1.978
7,J_SMB,-0.128,0.166,-0.772
8,J_HML,0.202,0.136,1.487
9,J_RMW,0.017,0.111,0.152
10,J_CMA,0.05,0.121,0.413


The question is whether a model of global risk factors is sufficient, or there are regional differences. If the former were true, there should be no significant alphas. But there are quite a few. For example, the European profitability factor has a highly positive alpha, indicating that there is an unexplained component of risk.
#### (ii)

In [118]:
gl_factor_names = [:G_MKT, :G_SMB, :G_HML, :G_RMW, :G_CMA]

T = size(data_excess, 1)
N = 20
K = 5
Ωhat = cov(Matrix(data_excess[gl_factor_names]))
Σhat = cov(hcat([residuals(table[:model][i]) for (i, asset) in enumerate(test_assets)]...))

F = ((T - N - K)/N * 
    (1 + mean(Matrix(data_excess[gl_factor_names]), 1) * Ωhat^-1 * mean(Matrix(data_excess[gl_factor_names]), 1)')^(-1) *
    table[:α]' * Σhat^-1 * table[:α])[1]

4.13459213512726

Its p-value is:

In [119]:
1-cdf(FDist(N, T-N-K), F)

2.4535573905914987e-8

This is a very strong rejection of the proposed global risk factor model.
### (b)
I run the cross-sectional regression by GLS:
$$E_T(R^{ei}) = \alpha_i + \lambda' \beta_i + \epsilon_i$$
Let me retrieve the betas and calculate the left hand side:

In [120]:
data_cross = DataFrame(vcat(mean(Matrix(data_excess[test_assets]), 1), hcat([coef(table[:model][i])[2:end] 
    for (i, asset) in enumerate(test_assets)]...))')
names!(data_cross, vcat(:avg_ret, Symbol.("β_" .* string.(gl_factor_names))))

head(print_table(data_cross))

Unnamed: 0,avg_ret,β_G_MKT,β_G_SMB,β_G_HML,β_G_RMW,β_G_CMA
1,0.513,1.093,0.115,0.248,0.277,-0.169
2,0.076,-0.045,0.788,-0.023,0.055,0.077
3,0.338,0.109,-0.034,0.927,-0.109,-0.073
4,0.394,-0.003,0.147,-0.321,0.663,0.09
5,0.211,-0.009,0.022,0.088,0.058,0.674
6,0.098,1.043,0.347,-0.549,-0.044,0.905


In [121]:
β = Matrix(data_cross[:, 2:end])

λhat = (β'*Σhat^-1*β)^-1 * β'*Σhat^-1*data_cross[:avg_ret]
αhat = data_cross[:avg_ret] .- β * λhat
σsq_λ = 1/T * (β'*Σhat^-1*β)^-1
cov_α = 1/T * (Σhat - β*(β'*Σhat^-1*β)^(-1)*β')

table = DataFrame(hcat(test_assets, αhat, diag(cov_α), αhat ./ diag(cov_α)))
names!(table, [:test_asset, :α, :stderror_α, :t_α])
print_table(table, skipcol=1)

Unnamed: 0,test_asset,α,stderror_α,t_α
1,E_MKT,-0.181,0.01,-19.051
2,E_SMB,-0.039,0.006,-6.294
3,E_HML,0.072,0.005,14.828
4,E_RMW,0.232,0.003,67.2
5,E_CMA,0.023,0.004,6.613
6,J_MKT,-0.5,0.048,-10.489
7,J_SMB,-0.134,0.022,-5.979
8,J_HML,0.221,0.015,14.625
9,J_RMW,0.015,0.01,1.503
10,J_CMA,0.062,0.012,5.269


The GLS regression residuals show some massive alphas. And the joint test without Shanken correction rejects clearly:

In [122]:
J = T * αhat'*Σhat^-1*αhat
1-cdf(Chisq(N - K), J)

3.964606420936434e-13

The Shanken corrected version rejects too:

In [123]:
Σfhat = cov(Matrix(data_excess[gl_factor_names]))
J = T * (1 + mean(Matrix(data_excess[gl_factor_names]), 1)*Σfhat^-1*mean(Matrix(data_excess[gl_factor_names]), 1)')[1]^-1 * αhat'*Σhat^-1*αhat
1-cdf(Chisq(N - K), J)

1.5809997755411587e-10

The Shanken correction alleviates the earlier raised issue of generated regressors. The rejection is less stark, because the uncertainty about the regressors widens the confidence intervals.

## Task 2
### (a)
AEM work with subsidiary level broker-dealer book leverage, whereas HKM use market leverage (the inverse of which is the capital ratio) of holding companies.

Intermediary asset pricing theory points towards a countercyclical leverage (i.e. the intermediary looses a lot of equity, but the debt is still there) which is found by HKM only, AEM document a procyclicality. 
But the more precise data used by AEM is about security broker-dealers, not the kind of intermediary theory had in mind. And as KHM argue, if financial health is what we want to proxy for, then we should consider the holding company, not the subsidiary.

However, the capital ratio in HKM reads a bit like the market-to-book ratio, if book equity is marked-to-market, and we enter the realm of Q theory. 

### (b)
Let me load the data:

In [124]:
AEM_data = CSV.read("./Data/AEM_data.csv", delim=',', 
        types=[Int64, Float64], nullable=false)
names!(AEM_data, [:Quarter, :AEM])

HKM_data = CSV.read("./Data/HKM_data.csv", delim=',', 
        types=[Int64, Float64], nullable=false)
names!(HKM_data, [:Quarter, :HKM])

data_quarterly = DataFrame()
data_quarterly[:year] = round.(Int64, HKM_data[1:end-31, :Quarter] ./ 10)
data_quarterly[:quarter] = round.(Int64, HKM_data[1:end-31, :Quarter] .% 10)
data_quarterly[:HKM] = HKM_data[1:end-31, :HKM]
data_quarterly[:AEM] = AEM_data[9:end, :AEM]
data_quarterly = data_quarterly[1+4*20:end, :]

data_excess[:year] = Dates.value.(Dates.Year.(data_excess[:, 1]))
data_excess[:quarter] = Dates.quarterofyear.(data_excess[:, 1]);

The factors and test assets in data_excess are all excess returns. I add the region interest rate to the excess return, aggregate 3 months, and deduct the quarterly interest rate.

In [125]:
for (i, region) in enumerate(abbrev)
    for factor in factor_names[1:end-1]
        data_quarterly[Symbol.(region * '_' * string(factor))] = zeros(size(data_quarterly, 1))
        
        for year in minimum(data_quarterly[:year]):maximum(data_quarterly[:year])
            for quarter in 1:4
                data_quarterly[(data_quarterly[:year] .== year) .* (data_quarterly[:quarter] .== quarter), Symbol(region * '_' * string(factor))] = 
                1 + 100 * (prod(1 .+ 1/100 .* (data_excess[(data_excess[:year] .== year) .* (data_excess[:quarter] .== quarter), Symbol(region * '_' * string(factor))] + 
                    data_excess[(data_excess[:year] .== year) .* (data_excess[:quarter] .== quarter), Symbol.(region * "_RF")])) -
                prod(1 .+ 1/100 .* data_excess[(data_excess[:year] .== year) .* (data_excess[:quarter] .== quarter), Symbol(region * "_RF")]))
            end
        end
    end
end

I cut the first two quarters of 1990 and we are good to go:

In [126]:
data_quarterly = data_quarterly[3:end, :]
head(print_table(data_quarterly, skipcol=2))

Unnamed: 0,year,quarter,HKM,AEM,G_MKT,G_SMB,G_HML,G_RMW,G_CMA,E_MKT,E_SMB,E_HML,E_RMW,E_CMA,J_MKT,J_SMB,J_HML,J_RMW,J_CMA,A_MKT,A_SMB,A_HML,A_RMW,A_CMA,N_MKT,N_SMB,N_HML,N_RMW,N_CMA
1,1990.0,3.0,-0.288,6.549,-20.101,1.394,2.167,0.979,5.801,-17.675,3.176,0.643,-0.135,5.511,-26.494,2.704,4.894,2.046,0.158,-12.394,5.507,1.189,4.492,6.888,-15.571,-6.689,0.449,0.768,11.571
2,1990.0,4.0,0.085,12.832,7.564,-6.287,-3.753,6.391,-0.505,5.439,-3.452,1.197,3.148,-0.002,10.449,-9.595,-6.638,5.386,5.433,-4.993,-7.98,-2.592,4.058,-2.736,7.502,-2.138,-3.007,6.814,-5.489
3,1991.0,1.0,0.17,4.242,10.199,4.601,0.552,1.536,-2.652,2.778,0.275,0.949,4.546,-2.886,9.939,7.587,2.165,-0.914,1.148,18.586,-5.255,2.807,1.016,1.739,15.527,11.119,-1.638,1.09,-5.551
4,1991.0,2.0,-0.046,7.616,-3.953,2.185,2.95,2.571,1.636,-6.814,-1.506,0.171,3.852,5.13,-6.062,3.086,6.753,3.306,0.686,1.865,1.812,2.269,1.235,5.059,-0.395,2.204,3.995,1.8,0.477
5,1991.0,3.0,0.098,14.995,6.039,0.191,-0.147,3.801,-0.05,11.113,-1.366,0.415,8.739,0.023,3.219,-5.689,1.194,-0.697,2.998,5.987,2.088,9.082,-1.507,5.34,5.295,2.649,-4.289,3.151,-1.751
6,1991.0,4.0,-0.038,-1.955,3.221,-0.383,-4.41,2.506,-1.115,2.214,-3.108,-4.221,4.06,-0.421,-1.445,1.493,-2.899,-1.454,2.912,5.851,2.352,1.732,-0.049,-3.801,7.959,-0.788,-7.127,5.142,-4.473


I employ a simplified Fama-MacBeth scheme, in which I estimate the first stage as above. That is I do not run rolling time-series regressions as the first stage, because I think the role of leverage of broker-dealers did not change much over time. But I understand that this is just a first pass analysis.

In [127]:
function run_regression(field, lev)
    formula = @formula($(field) ~ $(lev) + G_MKT + G_SMB + G_HML + G_RMW + G_CMA)
    model = lm(formula, data_quarterly)
    return model
end

models1 = run_regression.(test_assets, :HKM)
models2 = run_regression.(test_assets, :AEM);

In [128]:
function run_2stage(lev, models)
    gl_factor_names = vcat(lev, [:G_MKT, :G_SMB, :G_HML, :G_RMW, :G_CMA])

    T = size(data_quarterly, 1)
    N = 20
    K = 6
    Ωhat = cov(Matrix(data_quarterly[gl_factor_names]))
    Σhat = cov(hcat([residuals(models[i]) for (i, asset) in enumerate(test_assets)]...))

    #extract betas from time-series regression
    data_cross = DataFrame(vcat(mean(Matrix(data_quarterly[test_assets]), 1), 
            hcat([coef(models[i])[2:end] for (i, asset) in enumerate(test_assets)]...))')
    names!(data_cross, vcat(:avg_ret, Symbol.("β_" .* string.(gl_factor_names))))
    β = Matrix(data_cross[:, 2:end])
    
    #cross-sectional regression for every quarter:
    λhat_t = (β'*β)^-1 * β'*Matrix(data_quarterly[test_assets])'
    αhat_t = Matrix(data_quarterly[test_assets])' .- β * λhat_t

    λhat = mean(λhat_t, 2)[:]
    αhat = mean(αhat_t, 2)[:]

    cov_α = 1/T^2 * (αhat_t .- αhat) * (αhat_t .- αhat)'

    table_α = DataFrame(hcat(test_assets, αhat, αhat ./ diag(cov_α)))
    names!(table_α, [:test_asset, :α, :t_α])

    table_λ = DataFrame(hcat(gl_factor_names, λhat))
    names!(table_λ, [:risk_factor, :λ])

    #cov_alpha is singular, but αhat lies in its spanned subspace
    J = αhat' * (cov_α \ αhat)

    return [table_α, table_λ, J, 1-cdf(Chisq(N - K), J)]
end

run_2stage (generic function with 1 method)

In [129]:
results1 = run_2stage(:HKM, models1)
results2 = run_2stage(:AEM, models2);

The results on the risk prices:

In [130]:
table = hcat(results1[2], results2[2][:, 2:end])
names!(table, [:risk_factor, :HKM_λ, :AEM_λ])
print_table(table, skipcol = 1)

Unnamed: 0,risk_factor,HKM_λ,AEM_λ
1,HKM,0.092,-4.094
2,G_MKT,1.798,1.745
3,G_SMB,1.267,0.893
4,G_HML,2.818,2.846
5,G_RMW,1.371,1.449
6,G_CMA,1.515,1.467


The procyclical leverage factor of HKM demands a compensation in form of a positive price. The countercyclical leverage factor of AEM is insurance and implies paying a premium, hence the negative price.

The results about the alphas:

In [131]:
table = hcat(results1[1], results2[1][:, 2:end])
names!(table, [:test_asset, :HKM_α, :HKM_t_α, :AEM_α, :AEM_t_α])
print_table(table, skipcol = 1)

Unnamed: 0,test_asset,HKM_α,HKM_t_α,AEM_α,AEM_t_α
1,E_MKT,0.294,2.094,-0.458,-3.314
2,E_SMB,0.856,6.722,0.753,7.847
3,E_HML,0.174,1.846,-0.079,-0.724
4,E_RMW,1.367,16.762,1.757,19.447
5,E_CMA,0.491,5.427,0.403,4.44
6,J_MKT,-0.694,-7.962,-0.907,-11.123
7,J_SMB,0.249,1.01,-0.027,-0.105
8,J_HML,1.964,14.584,1.41,7.944
9,J_RMW,0.672,5.585,0.686,5.248
10,J_CMA,1.765,10.69,1.534,8.726


The joint tests both reject:

In [132]:
[results1[4], results2[4]]

2-element Array{Float64,1}:
 0.0
 0.0

This is surprising, but then again I assumed constant betas, that might have been a bad idea.

### (c)

In [133]:
function run_regression_big(field)
    formula = @formula($(field) ~ HKM + AEM + G_MKT + G_SMB + G_HML + G_RMW + G_CMA)
    model = lm(formula, data_quarterly)
    return model
end

models_big = run_regression_big.(test_assets)

results_big = run_2stage([:HKM, :AEM], models_big)

table = results_big[2]
print_table(table, skipcol = 1)

Unnamed: 0,risk_factor,λ
1,HKM,0.095
2,AEM,-4.907
3,G_MKT,1.827
4,G_SMB,1.211
5,G_HML,2.798
6,G_RMW,1.384
7,G_CMA,1.506


Risk prices are similar to the ones above. 

The following 

In [134]:
table = results_big[1]
print_table(table, skipcol = 1)

Unnamed: 0,test_asset,α,t_α
1,E_MKT,0.099,0.741
2,E_SMB,1.117,10.995
3,E_HML,0.081,0.786
4,E_RMW,1.46,15.981
5,E_CMA,0.424,4.683
6,J_MKT,-0.544,-7.631
7,J_SMB,0.108,0.432
8,J_HML,1.965,14.583
9,J_RMW,0.567,4.479
10,J_CMA,1.758,10.694


The joint test rejects again clearly:

In [135]:
results_big[4]

0.0

The standard error is not calulated with any correction for generated regressors, maybe that is the issue.