In [91]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats
from numpy.linalg import inv

In [92]:
data = pd.read_csv("data/25_Portfolios_5x5.csv",skiprows=15,index_col="DATE",parse_dates=True,nrows=1134)
data = data.dropna()
data.index = pd.to_datetime(data.index,format="%Y%m")

factors = pd.read_csv("data/F-F_Research_Data_Factors.csv",skiprows=3,index_col=0,parse_dates=True,nrows=1134)
factors = factors.dropna()
factors.index = pd.to_datetime(factors.index,format="%Y%m")
rm = factors["Mkt-RF"]
rf = factors["RF"]

mom = pd.read_csv("data/F-F_Momentum_Factor.csv",skiprows=13,index_col=0,parse_dates=True,nrows=1129)
mom = mom.dropna()
mom.index = pd.to_datetime(mom.index,format="%Y%m")

reversal = pd.read_csv("data/F-F_ST_Reversal_Factor.csv",skiprows=13,index_col=0,parse_dates=True,nrows=1139)
reversal = reversal.dropna()
reversal.index = pd.to_datetime(reversal.index,format="%Y%m")

factors_5 = pd.read_csv("data/F-F_Research_Data_5_Factors_2x3.csv",skiprows=3,index_col=0,parse_dates=True,nrows=690)
factors_5 = factors_5.dropna()
factors_5.index = pd.to_datetime(factors_5.index,format="%Y%m")

data = data["1963-07-01":"2020-06-01"]
rm = rm["1963-07-01":"2020-06-01"]
rf = rf["1963-07-01":"2020-06-01"]
factors_5 = factors_5["1963-07-01":"2020-06-01"]
factors = factors["1963-07-01":"2020-06-01"]
mom = mom["1963-07-01":"2020-06-01"]
reversal = reversal["1963-07-01":"2020-06-01"]

ex_data = data.subtract(rf,axis=0)
factors_5 = factors_5.drop("RF",axis=1)
factors_3 = factors.drop("RF",axis=1)
factors_3_mom = factors_3.copy()
factors_3_mom["Mom"] = mom
factors_3_rev = factors_3.copy()
factors_3_rev["ST Rev"] = reversal


In [107]:
alphas_5 = []
alphas_mom = []
alphas_rev = []

for columns in ex_data:
    x = sm.add_constant(factors_5)
    res = sm.OLS(ex_data[columns],x).fit()
    alphas_5.append(res.params[0])

    x = sm.add_constant(factors_3_mom)
    res = sm.OLS(ex_data[columns],x).fit()
    alphas_mom.append(res.params[0])

    x = sm.add_constant(factors_3_rev)
    res = sm.OLS(ex_data[columns],x).fit()
    alphas_rev.append(res.params[0])


In [94]:
def calc_GRS(alpha_vector, factor_dataframe, asset_returns):
    gen_cov = factor_dataframe.cov()
    generalized_sharpe = factor_dataframe.mean().T @ inv(gen_cov) @ factor_dataframe.mean()     #Cochrane 2005, p.217
    cov = asset_returns.cov()
    if isinstance(alpha_vector, list):
        alpha_vector = np.array(alpha_vector)
    #Calc GRS
    df = len(asset_returns) - len(asset_returns.columns) - len(factor_dataframe.columns) # T - N - K     (N = number of assets, K = number of factors)
    N = len(asset_returns.columns)
    GRS = (df/ N) * ((alpha_vector.T @ inv(cov) @ alpha_vector) / (1+ generalized_sharpe))
    p = 1-scipy.stats.f.cdf(GRS, len(asset_returns.columns), df) #find p-value of F test statistic
    return np.round(GRS,2), p

In [95]:
calc_GRS(alphas_5,factors_5,ex_data)

(2.84, 6.418074695369036e-06)

In [96]:
calc_GRS(alphas_mom,factors_3_mom,ex_data)

(2.77, 1.07656329082495e-05)

In [97]:
calc_GRS(alphas_rev,factors_3_mom,ex_data)

(3.48, 3.5862004810205406e-08)

In [206]:
## Farma MacBeth methodology for 5 factors

beta_mkt = []
beta_smb = []
beta_hml = []
beta_rmw = []
beta_cma = []

for columns in ex_data:
    x = sm.add_constant(factors_5)
    res = sm.OLS(ex_data[columns],x).fit()
    beta_mkt.append(res.params[1])
    beta_smb.append(res.params[2])
    beta_hml.append(res.params[3])
    beta_rmw.append(res.params[4])
    beta_cma.append(res.params[5])

betas = pd.DataFrame([beta_mkt,beta_smb,beta_hml,beta_rmw,beta_cma]).T
betas.columns = ["Mkt-RF","SMB","HML","RMW","CMA"]
betas = sm.add_constant(betas)
betas.index = ex_data.T.index

alpha = []
lambda_mkt = []
lambda_SMB = []
lambda_HML = []
lambda_RMW = []
lambda_CMA = []

for column in ex_data.T:
    x = betas
    y = ex_data.T[column]
    res = sm.OLS(y,x).fit()
    alpha.append(res.params[0])
    lambda_mkt.append(res.params[1])
    lambda_SMB.append(res.params[2])
    lambda_HML.append(res.params[3])
    lambda_RMW.append(res.params[4])
    lambda_CMA.append(res.params[5])

alpha = pd.DataFrame(np.array(alpha))
lambda_mkt = pd.DataFrame(np.array(lambda_mkt))
lambda_SMB = pd.DataFrame(np.array(lambda_SMB))
lambda_HML = pd.DataFrame(np.array(lambda_HML))
lambda_RMW = pd.DataFrame(np.array(lambda_RMW))
lambda_CMA = pd.DataFrame(np.array(lambda_CMA))

lambdas = pd.concat([lambda_mkt, lambda_SMB, lambda_HML, lambda_RMW, lambda_CMA], axis=1)
lambdas.columns = ['lambda_mkt', "lambda_SMB", 'lambda_HML', "lambda_RMW", 'lambda_CMA']

params = [alpha, lambda_mkt, lambda_SMB, lambda_HML, lambda_RMW, lambda_CMA]

variances = []
for i in params:
    deviations = i - i.mean()
    sum_of_squared_dev = (deviations ** 2).sum()
    var = sum_of_squared_dev / (len(i) ** 2) 
    variances.append(var)


variances = pd.DataFrame(np.array(variances)).T
variances.columns = ['alpha', 'lambda_mkt', 'lambda_SMB', 'lambda_HML', 'lambda_RMW', 'lambda_CMA']

#Shanken Adjustment
adjustment = (1 + (lambdas.mean().T @ inv(factors_5.cov()) @ lambdas.mean()))
#Adjusted variances
variances = variances * adjustment

#t-tests
t_alpha = alpha.mean() / np.sqrt(variances['alpha'])
t_mkt = lambda_mkt.mean() / np.sqrt(variances['lambda_mkt'])
t_SMB = lambda_SMB.mean() / np.sqrt(variances['lambda_SMB'])
t_HML = lambda_HML.mean() / np.sqrt(variances['lambda_HML'])
t_RMW = lambda_RMW.mean() / np.sqrt(variances['lambda_RMW'])
t_CMA = lambda_CMA.mean() / np.sqrt(variances['lambda_CMA'])
t_stats = pd.concat([t_alpha, t_mkt, t_SMB, t_HML, t_RMW, t_CMA], axis=1)
t_stats.columns = ['t_alpha', 't_mkt', 't_SMB', 't_HML', 't_RMW', 't_CMA']
t_stats = np.round(t_stats,2)

print("Factor premia and pricing error for the 5 factor model are:")
print()
print("Pricing error:", alpha.mean())
print()
print("Factor Premia:", np.round(lambdas.mean(),4))
print()
print(t_stats)


Factor premia and pricing error for the 5 factor model are:

Pricing error: 0    0.993823
dtype: float64

Factor Premia: lambda_mkt   -0.4743
lambda_SMB    0.2603
lambda_HML    0.2252
lambda_RMW    0.4411
lambda_CMA   -0.0088
dtype: float64

   t_alpha  t_mkt  t_SMB  t_HML  t_RMW  t_CMA
0     3.52  -1.42   2.12   1.92   2.59  -0.05


In [210]:
## Farma MacBeth methodology for 3 factors and momentum

beta_mkt = []
beta_smb = []
beta_hml = []
beta_mom = []

for columns in ex_data:
    x = sm.add_constant(factors_3_mom)
    res = sm.OLS(ex_data[columns],x).fit()
    beta_mkt.append(res.params[1])
    beta_smb.append(res.params[2])
    beta_hml.append(res.params[3])
    beta_mom.append(res.params[4])

betas = pd.DataFrame([beta_mkt,beta_smb,beta_hml,beta_mom]).T
betas.columns = ["Mkt-RF","SMB","HML","MoM"]
betas = sm.add_constant(betas)
betas.index = ex_data.T.index

alpha = []
lambda_mkt = []
lambda_SMB = []
lambda_HML = []
lambda_Mom = []

for column in ex_data.T:
    x = betas
    y = ex_data.T[column]
    res = sm.OLS(y,x).fit()
    alpha.append(res.params[0])
    lambda_mkt.append(res.params[1])
    lambda_SMB.append(res.params[2])
    lambda_HML.append(res.params[3])
    lambda_Mom.append(res.params[4])

alpha = pd.DataFrame(np.array(alpha))
lambda_mkt = pd.DataFrame(np.array(lambda_mkt))
lambda_SMB = pd.DataFrame(np.array(lambda_SMB))
lambda_HML = pd.DataFrame(np.array(lambda_HML))
lambda_Mom = pd.DataFrame(np.array(lambda_Mom))

lambdas = pd.concat([lambda_mkt, lambda_SMB, lambda_HML, lambda_Mom], axis=1)
lambdas.columns = ['lambda_mkt', "lambda_SMB", 'lambda_HML', "lambda_Mom"]

params = [alpha, lambda_mkt, lambda_SMB, lambda_HML, lambda_Mom]

variances = []
for i in params:
    deviations = i - i.mean()
    sum_of_squared_dev = (deviations ** 2).sum()
    var = sum_of_squared_dev / (len(i) ** 2) 
    variances.append(var)


variances = pd.DataFrame(np.array(variances)).T
variances.columns = ['alpha', 'lambda_mkt', 'lambda_SMB', 'lambda_HML', 'lambda_Mom']

#Shanken Adjustment
adjustment = (1 + (lambdas.mean().T @ inv(factors_3_mom.cov()) @ lambdas.mean()))
#Adjusted variances
variances = variances * adjustment

#t-tests
t_alpha = alpha.mean() / np.sqrt(variances['alpha'])
t_mkt = lambda_mkt.mean() / np.sqrt(variances['lambda_mkt'])
t_SMB = lambda_SMB.mean() / np.sqrt(variances['lambda_SMB'])
t_HML = lambda_HML.mean() / np.sqrt(variances['lambda_HML'])
t_Mom = lambda_Mom.mean() / np.sqrt(variances['lambda_Mom'])
t_stats = pd.concat([t_alpha, t_mkt, t_SMB, t_HML, t_Mom], axis=1)
t_stats.columns = ['t_alpha', 't_mkt', 't_SMB', 't_HML', 't_Mom']
t_stats = np.round(t_stats,2)

print("Factor premia and pricing error for the 3 factor and momentum model are:")
print()
print("Pricing error:", alpha.mean())
print()
print("Factor Premia:", np.round(lambdas.mean(),4))
print()
print(t_stats)


Factor premia and pricing error for the 3 factor and momentum model are:

Pricing error: 0    0.561218
dtype: float64

Factor Premia: lambda_mkt    0.0400
lambda_SMB    0.1414
lambda_HML    0.3091
lambda_Mom    2.1682
dtype: float64

   t_alpha  t_mkt  t_SMB  t_HML  t_Mom
0     1.62    0.1   1.03   2.37   2.82


In [213]:
## Farma MacBeth methodology for 3 factors and ST reversal factor

beta_mkt = []
beta_smb = []
beta_hml = []
beta_rev = []

for columns in ex_data:
    x = sm.add_constant(factors_3_rev)
    res = sm.OLS(ex_data[columns],x).fit()
    beta_mkt.append(res.params[1])
    beta_smb.append(res.params[2])
    beta_hml.append(res.params[3])
    beta_rev.append(res.params[4])

betas = pd.DataFrame([beta_mkt,beta_smb,beta_hml,beta_rev]).T
betas.columns = ["Mkt-RF","SMB","HML","REV"]
betas = sm.add_constant(betas)
betas.index = ex_data.T.index

alpha = []
lambda_mkt = []
lambda_SMB = []
lambda_HML = []
lambda_REV = []

for column in ex_data.T:
    x = betas
    y = ex_data.T[column]
    res = sm.OLS(y,x).fit()
    alpha.append(res.params[0])
    lambda_mkt.append(res.params[1])
    lambda_SMB.append(res.params[2])
    lambda_HML.append(res.params[3])
    lambda_REV.append(res.params[4])

alpha = pd.DataFrame(np.array(alpha))
lambda_mkt = pd.DataFrame(np.array(lambda_mkt))
lambda_SMB = pd.DataFrame(np.array(lambda_SMB))
lambda_HML = pd.DataFrame(np.array(lambda_HML))
lambda_REV = pd.DataFrame(np.array(lambda_REV))

lambdas = pd.concat([lambda_mkt, lambda_SMB, lambda_HML, lambda_REV], axis=1)
lambdas.columns = ['lambda_mkt', "lambda_SMB", 'lambda_HML', "lambda_REV"]

params = [alpha, lambda_mkt, lambda_SMB, lambda_HML, lambda_REV]

variances = []
for i in params:
    deviations = i - i.mean()
    sum_of_squared_dev = (deviations ** 2).sum()
    var = sum_of_squared_dev / (len(i) ** 2) 
    variances.append(var)


variances = pd.DataFrame(np.array(variances)).T
variances.columns = ['alpha', 'lambda_mkt', 'lambda_SMB', 'lambda_HML', 'lambda_REV']

#Shanken Adjustment
adjustment = (1 + (lambdas.mean().T @ inv(factors_3_rev.cov()) @ lambdas.mean()))
#Adjusted variances
variances = variances * adjustment

#t-tests
t_alpha = alpha.mean() / np.sqrt(variances['alpha'])
t_mkt = lambda_mkt.mean() / np.sqrt(variances['lambda_mkt'])
t_SMB = lambda_SMB.mean() / np.sqrt(variances['lambda_SMB'])
t_HML = lambda_HML.mean() / np.sqrt(variances['lambda_HML'])
t_REV = lambda_REV.mean() / np.sqrt(variances['lambda_REV'])
t_stats = pd.concat([t_alpha, t_mkt, t_SMB, t_HML, t_REV], axis=1)
t_stats.columns = ['t_alpha', 't_mkt', 't_SMB', 't_HML', 't_ST_REV']
t_stats = np.round(t_stats,2)

print("Factor premia and pricing error for the 3 factor and Short Term Reversal model are:")
print()
print("Pricing error:", alpha.mean())
print()
print("Factor Premia:", np.round(lambdas.mean(),4))
print()
print(t_stats)


Factor premia and pricing error for the 3 factor and Short Term Reversal model are:

Pricing error: 0    1.167159
dtype: float64

Factor Premia: lambda_mkt   -0.5933
lambda_SMB    0.1410
lambda_HML    0.2832
lambda_REV   -0.5522
dtype: float64

   t_alpha  t_mkt  t_SMB  t_HML  t_ST_REV
0     4.34  -1.86   1.15   2.45     -1.23
