# Empirical Asset Pricing - PS3

Maximilian Huber

## Task 1
Let me load the data into an array of DataFrames:

In [36]:
using CSV, DataFrames, Distributions, Plots, GLM; gr();

In [37]:
files = ["Global", "Europe", "Japan", "Asia_Pacific_ex_Japan", "North_America"]
abbrev = ['G', 'E', 'J', 'A', 'N']

data = [CSV.read("./Data/" * file * "_5_Factors.csv", delim=',', 
        types=[Date, Float64, Float64, Float64, Float64, Float64, Float64], 
        dateformat = DateFormat("yyyymm"), nullable=false) for file in files];

In [38]:
factor_names = [:MKT, :SMB, :HML, :RMW, :CMA, :RF]
data = DataFrame()
input = DataFrame()

for (i, file) in enumerate(files)
    
    input = CSV.read("./Data/" * file * "_5_Factors.csv", delim=',', 
        types=[Date, Float64, Float64, Float64, Float64, Float64, Float64], 
        dateformat = DateFormat("yyyymm"), nullable=false)
    
    names!(input, vcat(:Date, Symbol.(abbrev[i] .* '_' .* string.(factor_names))))
    
    data = hcat(data, input[:, 2:end])
end

data = hcat(input[[:Date]], data)
head(data)

Unnamed: 0,Date,G_MKT,G_SMB,G_HML,G_RMW,G_CMA,G_RF,E_MKT,E_SMB,E_HML,E_RMW,E_CMA,E_RF,J_MKT,J_SMB,J_HML,J_RMW,J_CMA,J_RF,A_MKT,A_SMB,A_HML,A_RMW,A_CMA,A_RF,N_MKT,N_SMB,N_HML,N_RMW,N_CMA,N_RF
1,1990-07-01,0.86,0.82,-0.25,0.17,1.56,0.68,4.52,0.41,-1.43,0.22,1.21,0.68,0.1,6.32,3.69,1.06,0.24,0.68,4.2,-2.93,-1.36,1.42,0.9,0.68,-1.51,-2.5,-0.9,0.53,2.59,0.68
2,1990-08-01,-10.82,-1.57,0.6,-0.22,0.99,0.66,-11.03,0.02,0.25,-1.06,1.46,0.66,-11.88,-5.0,0.26,1.28,-0.96,0.66,-8.68,3.76,1.71,1.12,0.67,0.66,-9.63,-2.56,0.49,-2.01,3.28,0.66
3,1990-09-01,-11.97,1.16,0.8,0.03,2.12,0.6,-12.28,1.71,0.84,-0.28,1.72,0.6,-17.38,0.67,-0.11,-1.29,-0.11,0.6,-8.8,3.7,-0.14,0.87,4.17,0.6,-6.02,-2.73,-0.13,1.28,4.23,0.6
4,1990-10-01,9.56,-7.58,-4.24,2.6,1.22,0.68,6.49,-2.61,-0.67,1.06,-0.79,0.68,24.9,0.8,-3.87,0.39,4.75,0.68,-1.95,-4.76,-1.52,0.17,-2.78,0.68,-2.0,-4.62,-1.67,4.18,0.87,0.68
5,1990-11-01,-3.86,1.37,1.14,1.47,-2.35,0.57,-0.43,-2.74,0.87,0.13,-0.47,0.57,-14.12,-5.34,-0.18,3.05,-2.18,0.57,-2.98,-1.59,-0.82,3.04,0.56,0.57,5.9,0.01,-1.42,0.26,-4.57,0.57
6,1990-12-01,1.1,-0.95,-1.6,1.17,-0.33,0.6,-1.55,0.93,0.0,0.92,0.27,0.6,1.93,-6.16,-3.65,0.85,1.87,0.6,-1.1,-2.77,-1.25,-0.19,-1.49,0.6,2.54,1.58,-0.92,1.24,-2.77,0.6


### (a)
The factors from Kenneth French's [website](http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html) are not excess returns, as described [here](http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/Data_Library/f-f_factors.html). So I deduct the region interest rate from the four factors in each region. This mimics a strategy where exchange rate risk is hedged.

In [39]:
data_excess = copy(data)

for (i, region) in enumerate(abbrev)
    for factor in factor_names[2:end-1]
        data_excess[Symbol(region * '_' * string(factor))] -= data_excess[Symbol(region * '_' * string(:RF))]
    end
end

#### (i)
Now I regress the returns of the 20 test assets on the five factor model:
$$R_t^{ei}=\alpha_i + \beta'_i f_t + \epsilon_t^i$$

In [40]:
test_assets = Symbol.([region * '_' * string(factor) for factor in factor_names[1:end-1], region in abbrev[2:end]][:])

function run_regression(field)
    formula = @formula($(field) ~ G_MKT + G_SMB + G_HML + G_RMW + G_CMA)
    model = lm(formula, data_excess)
    return DataFrame(hcat(field, coef(model)[1], stderr(model)[1], coef(model)[1]/stderr(model)[1], model))
end

table = vcat(run_regression.(test_assets)...)
names!(table, [:test_asset, :α, :stderr_α, :t_α, :model])
table[[:test_asset, :α, :stderr_α, :t_α]]

Unnamed: 0,test_asset,α,stderr_α,t_α
1,E_MKT,-0.0527406044716506,0.1033988554603387,-0.5100695190178453
2,E_SMB,-0.0542851969767472,0.0856372587259417,-0.6338969484120452
3,E_HML,-0.0074165033617942,0.073293209704331,-0.1011895016156722
4,E_RMW,0.1352469127670236,0.0619363851305006,2.183642336924523
5,E_CMA,-0.0326146199665229,0.0624435500249124,-0.5223056657334664
6,J_MKT,-0.3384677432730287,0.2264480250863972,-1.4946818067584928
7,J_SMB,-0.0342310837358698,0.1554557709240205,-0.2201982180037585
8,J_HML,0.1153942569712205,0.1270513127101894,0.9082492302495196
9,J_RMW,-0.1724864101212019,0.1040721220505061,-1.6573738165682297
10,J_CMA,-0.1078447569419012,0.1133801629850698,-0.9511783552128292


The question is whether a model of global risk factors is sufficient, or there are regional differences. If the former were true, there should be no significant alphas. But there are quite a few. For example, the European profitability factor has a highly positive alpha, indicating that there is an unexplained component of risk.
#### (ii)

In [41]:
gl_factor_names = [:G_MKT, :G_SMB, :G_HML, :G_RMW, :G_CMA]

T = size(data_excess, 1)
N = 20
K = 5
Ωhat = cov(Matrix(data_excess[gl_factor_names])) * (T-1) / T
Σhat = cov(hcat([residuals(table[:model][i]) for (i, asset) in enumerate(test_assets)]...))

F = ((T - N - K)/N * 
    (1 + mean(Matrix(data_excess[gl_factor_names]), 1) * Ωhat^-1 * mean(Matrix(data_excess[gl_factor_names]), 1)')^(-1) *
    table[:α]' * Σhat^-1 * table[:α])[1]

2.6894189576509064

In [42]:
Σhat

20×20 Array{Float64,2}:
  3.35236    -0.275107     0.266992   …  -0.0741044   -0.466731    0.526357 
 -0.275107    2.29956     -0.0568875      0.351297    -0.0354403   0.0906464
  0.266992   -0.0568875    1.6844        -0.836264     0.391716   -0.299323 
 -0.279696   -0.267391    -0.744031       0.268297    -0.45946     0.0817957
 -0.221894   -0.220436     0.524989      -0.515375     0.165598   -0.702777 
 -3.07408     0.759712    -0.202249   …  -0.358532     1.18144    -0.723059 
 -0.827339   -0.15525      0.342935      -0.510536     0.556562   -0.645266 
 -0.197058    0.0870831   -0.534456      -1.22348      0.157795   -0.798429 
  0.732467   -0.0495245    0.183722       0.314644    -0.976024    0.724148 
 -0.428955   -0.00696507  -0.244895      -0.614252     0.617787   -1.31399  
  0.142322   -0.758483    -0.295289   …  -0.302268     0.388499    0.28283  
 -0.267575    0.941168    -0.244362       0.32537      0.0404561   0.0748535
 -0.871485   -0.408756    -0.34289       -0.623701  

Its p-value is:

In [43]:
1-cdf(FDist(N, T-N-1), F)

0.00015880338622564771

This is a very strong rejection of the proposed global risk factor model.
### (b)
I run the cross-sectional regression by GLS:
$$E_T(R^{ei}) = \alpha_i + \lambda' \beta_i + \epsilon_i$$
Let me retrieve the betas and calculate the left hand side:

In [44]:
data_cross = DataFrame(vcat(mean(Matrix(data_excess[test_assets]), 1), hcat([coef(table[:model][i])[2:end] for (i, asset) in enumerate(test_assets)]...))')
names!(data_cross, vcat(:avg_ret, Symbol.("β_" .* string.(gl_factor_names))))

head(data_cross)

Unnamed: 0,avg_ret,β_G_MKT,β_G_SMB,β_G_HML,β_G_RMW,β_G_CMA
1,0.5134545454545455,1.0842578172931454,0.0924975066776787,0.2566013153412476,0.2502328596295561,-0.1888748914568086
2,-0.145,-0.0384141463240621,0.8033251196765424,-0.0279854074716552,0.0789455981249521,0.0918722429712508
3,0.1176363636363636,0.1091392044165728,-0.0336364364642644,0.9253070074358312,-0.114758748578649,-0.0739903854445199
4,0.1736363636363636,0.0004858807033843,0.1571538250170379,-0.3257759848270682,0.6698006363771443,0.0974018971380829
5,-0.0096666666666666,-0.0057211266442854,0.0301251595376786,0.0845756878970942,0.0686101151577334,0.6812791046769924
6,0.0978787878787878,1.0462171512873686,0.3523976180751952,-0.5461372449064847,-0.0176344134795162,0.9141647651584092


In [45]:
β = Matrix(data_cross[:, 2:end])

λhat = (β'*Σhat^-1*β)^-1 * β'*Σhat^-1*data_cross[:avg_ret]
αhat = data_cross[:avg_ret] .- β * λhat
σsq_λ = 1/T * (β'*Σhat^-1*β)^-1
cov_α = 1/T * (Σhat - β*(β'*Σhat^-1*β)^(-1)*β')

table = DataFrame(hcat(test_assets, αhat, diag(cov_α), αhat ./ diag(cov_α)))
names!(table, [:test_asset, :α, :stderror_α, :t_α])

Unnamed: 0,test_asset,α,stderror_α,t_α
1,E_MKT,-0.084157958933536,0.0095729541489882,-8.791221353800278
2,E_SMB,-0.0412103940450479,0.0061532971162287,-6.697286554936444
3,E_HML,0.0086578524715614,0.0048540252930762,1.783643872624867
4,E_RMW,0.1532956988833567,0.0034461307069628,44.48342559196159
5,E_CMA,-0.0152562002949488,0.0035570969436746,-4.288947008340005
6,J_MKT,-0.3789797652870288,0.04748774533503,-7.980580307894069
7,J_SMB,-0.0156651844433414,0.0222042064648891,-0.7055052594701962
8,J_HML,0.1374426820720548,0.0151016008186934,9.10119951667126
9,J_RMW,-0.1578192187746155,0.0100868658050929,-15.64601153858235
10,J_CMA,-0.1084230334052829,0.0119171869727167,-9.098039130669566


The GLS regression residuals show some massive alphas. And the joint test without Shanken correction rejects clearly:

In [46]:
J = T * α'*Σhat^-1*α
1-cdf(Chisq(N - 1), J)

LoadError: [91mUndefVarError: α not defined[39m

The Shanken corrected version rejects too:

In [47]:
Σfhat = cov(Matrix(data_excess[[:G_MKT, :G_SMB, :G_HML, :G_RMW, :G_CMA]]))
J = T * (1 + λhat'*Σfhat^-1*λhat) * αhat'*Σhat^-1*αhat
1-cdf(Chisq(N - K), J)

8.110014933093712e-5

The Shanken correction alleviates the earlier raised issue of generated regressors. The rejection is less stark, because the uncertainty about the regressors widens the confidence intervals.

## Task 2
### (a)

In [63]:
AEM_data = CSV.read("./Data/AEM_data.csv", delim=',', 
        types=[Int64, Float64], nullable=false)
names!(AEM_data, [:Quarter, :AEM])

HKM_data = CSV.read("./Data/HKM_data.csv", delim=',', 
        types=[Int64, Float64], nullable=false)
names!(HKM_data, [:Quarter, :HKM])

data_quarterly = copy(HKM_data[1:end-31, :])
data_quarterly[:AEM] = AEM_data[9:end, :AEM]
data_quarterly = data_quarterly[1+4*20:end, :]

Unnamed: 0,Quarter,HKM,AEM
1,19901,-0.1159,-0.2433
2,19902,0.0165,-0.5813
3,19903,-0.2885,6.5486
4,19904,0.0853,12.8321
5,19911,0.1695,4.2417
6,19912,-0.0457,7.6165
7,19913,0.0982,14.9952
8,19914,-0.0385,-1.9547
9,19921,0.1302,2.7868
10,19922,0.0335,-1.5409


In [64]:
head(data_excess)

Unnamed: 0,Date,G_MKT,G_SMB,G_HML,G_RMW,G_CMA,G_RF,E_MKT,E_SMB,E_HML,E_RMW,E_CMA,E_RF,J_MKT,J_SMB,J_HML,J_RMW,J_CMA,J_RF,A_MKT,A_SMB,A_HML,A_RMW,A_CMA,A_RF,N_MKT,N_SMB,N_HML,N_RMW,N_CMA,N_RF,year,month,quarter
1,1990-07-01,0.86,0.1399999999999999,-0.93,-0.51,0.88,0.68,4.52,-0.2700000000000001,-2.11,-0.4600000000000001,0.5299999999999999,0.68,0.1,5.640000000000001,3.01,0.38,-0.44,0.68,4.2,-3.61,-2.04,0.7399999999999999,0.2199999999999999,0.68,-1.51,-3.18,-1.58,-0.15,1.91,0.68,1990,7,3
2,1990-08-01,-10.82,-2.23,-0.06,-0.88,0.3299999999999999,0.66,-11.03,-0.64,-0.41,-1.7200000000000002,0.7999999999999999,0.66,-11.88,-5.66,-0.4,0.62,-1.62,0.66,-8.68,3.1,1.0499999999999998,0.4600000000000001,0.01,0.66,-9.63,-3.22,-0.17,-2.67,2.62,0.66,1990,8,3
3,1990-09-01,-11.97,0.5599999999999999,0.2,-0.57,1.52,0.6,-12.28,1.11,0.24,-0.88,1.12,0.6,-17.38,0.07,-0.71,-1.89,-0.71,0.6,-8.8,3.1,-0.74,0.27,3.57,0.6,-6.02,-3.33,-0.73,0.68,3.63,0.6,1990,9,3
4,1990-10-01,9.56,-8.26,-4.92,1.92,0.5399999999999999,0.68,6.49,-3.29,-1.35,0.38,-1.4700000000000002,0.68,24.9,0.12,-4.55,-0.29,4.07,0.68,-1.95,-5.44,-2.2,-0.51,-3.46,0.68,-2.0,-5.3,-2.35,3.5,0.1899999999999999,0.68,1990,10,4
5,1990-11-01,-3.86,0.8000000000000002,0.57,0.9,-2.92,0.57,-0.43,-3.31,0.3,-0.4399999999999999,-1.04,0.57,-14.12,-5.91,-0.75,2.48,-2.75,0.57,-2.98,-2.16,-1.39,2.47,-0.0099999999999998,0.57,5.9,-0.5599999999999999,-1.99,-0.3099999999999999,-5.140000000000001,0.57,1990,11,4
6,1990-12-01,1.1,-1.5499999999999998,-2.2,0.57,-0.93,0.6,-1.55,0.33,-0.6,0.32,-0.3299999999999999,0.6,1.93,-6.76,-4.25,0.25,1.27,0.6,-1.1,-3.37,-1.85,-0.79,-2.09,0.6,2.54,0.98,-1.52,0.64,-3.37,0.6,1990,12,4


In [54]:
data_excess[:year] = Dates.value.(Dates.Year.(data_excess[:, 1]))
data_excess[:quarter] = Dates.quarterofyear.(data_excess[:, 1])

by(data_excess, [:year, :quarter], df -> sum(Matrix(df[[:ret, :SMB, :HML, :RMW, :CMA, :Mom]]) .^ 2, 1))

330-element Array{Int64,1}:
 3
 3
 3
 4
 4
 4
 1
 1
 1
 2
 2
 2
 3
 ⋮
 1
 1
 1
 2
 2
 2
 3
 3
 3
 4
 4
 4