In [20]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats
from numpy.linalg import inv

In [85]:
data = pd.read_csv("data/25_Portfolios_5x5.csv",skiprows=15,index_col="DATE",parse_dates=True,nrows=1134)
data = data.dropna()
data.index = pd.to_datetime(data.index,format="%Y%m")

factors = pd.read_csv("data/F-F_Research_Data_Factors.csv",skiprows=3,index_col=0,parse_dates=True,nrows=1134)
factors = factors.dropna()
factors.index = pd.to_datetime(factors.index,format="%Y%m")
rm = factors["Mkt-RF"]
rf = factors["RF"]

mom = pd.read_csv("data/F-F_Momentum_Factor.csv",skiprows=13,index_col=0,parse_dates=True,nrows=1129)
mom = mom.dropna()
mom.index = pd.to_datetime(mom.index,format="%Y%m")

reversal = pd.read_csv("data/F-F_ST_Reversal_Factor.csv",skiprows=13,index_col=0,parse_dates=True,nrows=1139)
reversal = reversal.dropna()
reversal.index = pd.to_datetime(reversal.index,format="%Y%m")

factors_5 = pd.read_csv("data/F-F_Research_Data_5_Factors_2x3.csv",skiprows=3,index_col=0,parse_dates=True,nrows=690)
factors_5 = factors_5.dropna()
factors_5.index = pd.to_datetime(factors_5.index,format="%Y%m")

data = data["1963-07-01":"2020-06-01"]
rm = rm["1963-07-01":"2020-06-01"]
rf = rf["1963-07-01":"2020-06-01"]
factors_5 = factors_5["1963-07-01":"2020-06-01"]
factors = factors["1963-07-01":"2020-06-01"]
mom = mom["1963-07-01":"2020-06-01"]
reversal = reversal["1963-07-01":"2020-06-01"]

ex_data = data.subtract(rf,axis=0)
factors_5 = factors_5.drop("RF",axis=1)
factors_3 = factors.drop("RF",axis=1)
factors_3_mom = factors_3.copy()
factors_3_mom["Mom"] = mom
factors_3_rev = factors_3.copy()
factors_3_rev["ST Rev"] = reversal


In [86]:
factors_3_rev

Unnamed: 0,Mkt-RF,SMB,HML,ST Rev
1963-07-01,-0.39,-0.56,-0.83,-0.03
1963-08-01,5.07,-0.94,1.67,1.40
1963-09-01,-1.57,-0.30,0.18,1.19
1963-10-01,2.53,-0.54,-0.10,-0.66
1963-11-01,-0.85,-1.13,1.71,1.16
...,...,...,...,...
2020-02-01,-8.13,1.00,-3.96,-2.35
2020-03-01,-13.38,-5.10,-14.11,-11.87
2020-04-01,13.65,2.75,-1.35,15.58
2020-05-01,5.58,2.47,-4.95,-3.09


In [87]:
alphas_5 = []
alphas_mom = []
alphas_rev = []

for columns in data:
    x = sm.add_constant(factors_5)
    res = sm.OLS(ex_data[columns],x).fit()
    alphas_5.append(res.params[0])

    x = sm.add_constant(factors_3_mom)
    res = sm.OLS(ex_data[columns],x).fit()
    alphas_mom.append(res.params[0])

    x = sm.add_constant(factors_3_rev)
    res = sm.OLS(ex_data[columns],x).fit()
    alphas_rev.append(res.params[0])


In [88]:
def calc_GRS(alpha_vector, factor_dataframe, asset_returns):
    gen_cov = factor_dataframe.cov()
    generalized_sharpe = factor_dataframe.mean().T @ inv(gen_cov) @ factor_dataframe.mean()     #Cochrane 2005, p.217
    cov = asset_returns.cov()
    if isinstance(alpha_vector, list):
        alpha_vector = np.array(alpha_vector)
    #Calc GRS
    df = len(asset_returns) - len(asset_returns.columns) - len(factor_dataframe.columns) # T - N - K     (N = number of assets, K = number of factors)
    N = len(asset_returns.columns)
    GRS = (df/ N) * ((alpha_vector.T @ inv(cov) @ alpha_vector) / (1+ generalized_sharpe))
    p = 1-scipy.stats.f.cdf(GRS, len(asset_returns.columns), df) #find p-value of F test statistic
    return np.round(GRS,2), p

In [43]:
calc_GRS(alphas_5,factors_5,ex_data)

(2.84, 6.418074695369036e-06)

In [89]:
calc_GRS(alphas_mom,factors_3_mom,ex_data)

(2.77, 1.07656329082495e-05)

In [90]:
calc_GRS(alphas_rev,factors_3_mom,ex_data)

(3.48, 3.5862004810205406e-08)

In [39]:
 np.array(alphas_5).T @ inv(ex_data.cov())   @ np.array(alphas_5)

0.11809704799782655