In [22]:
# Imports
import numpy as np
import pandas as pd
import statsmodels.api as sm
import scipy.stats as stats

In [39]:
# 1. Import FF five factor
FF5 = pd.read_csv('./Data/FF5.csv',index_col="Date")
FF5.index = pd.to_datetime(FF5.index, format="%Y%m")
FF5 = FF5.resample("ME").last().loc["1963-07-31":"2024-06-30"]
FF5.name = "FF5"

# 2. Import FF 3 factor
FF3 = pd.read_csv('./Data/FF3.csv',index_col="Date")
FF3.index = pd.to_datetime(FF3.index, format="%Y%m")
FF3 = FF3.resample("ME").last().loc["1963-07-31":"2024-06-30"]

# 3. Import MOM
MOM = pd.read_csv('./Data/MOM.csv',index_col="Date")
MOM.index = pd.to_datetime(MOM.index, format="%Y%m")
MOM = MOM.resample("ME").last().loc["1963-07-31":"2024-06-30"]

# 4. Import REV
REV = pd.read_csv('./Data/REV.csv',index_col="Date")
REV.index = pd.to_datetime(REV.index, format="%Y%m")
REV = REV.resample("ME").last().loc["1963-07-31":"2024-06-30"]

#5. INV
INV = pd.read_csv('./Data/INV.csv',index_col="Date")
INV.index = pd.to_datetime(INV.index, format="%Y%m")
INV = INV.resample("ME").last().loc["1963-07-31":"2024-06-30"]

# 5. Import Portfolios
Portfolios = pd.read_csv('./Data/Portfolios.csv',index_col="Date")
Portfolios.index = pd.to_datetime(Portfolios.index, format="%Y%m")
Portfolios = Portfolios.resample("ME").last().loc["1963-07-31":"2024-06-30"]

# 5. Import Rf
RF = pd.read_csv('./Data/Rf.csv',index_col="Date")
RF.index = pd.to_datetime(RF.index, format="%Y%m")
RF = RF.resample("ME").last().loc["1963-07-31":"2024-06-30"]

# Take out RF from portfolio returns
Portfolios["RF"] = RF
Portfolios = Portfolios.iloc[:, :-1].subtract(Portfolios["RF"], axis=0)

# 5. Merge
FFMOM = pd.concat([MOM,FF3], axis=1)
FFMOM.name = "FFMOM"
FFREV = pd.concat([REV,FF3], axis=1)
FFREV.name = "FFREV"

FF3INV = pd.concat([INV["INV"],FF3], axis=1)
FF3INV.name = "FF3INV"

FF3NW = pd.concat([INV["Non weighted"],FF3], axis=1)
FF3NW.name = "FF3INW"

In [40]:
#5. SARAH
SARAH = pd.read_csv('./Data/SARAH.csv',index_col="Date")
SARAH.index = pd.to_datetime(SARAH.index, format="%Y%m")
SARAH = SARAH.resample("ME").last().loc["1963-07-31":"2024-06-30"]

In [42]:
bozo = SARAH - Portfolios
bozo.sum()

BIG HiBM      0.000000e+00
BIG LoBM      0.000000e+00
ME1 BM1       0.000000e+00
ME1 BM2      -9.828076e-15
ME1 BM3      -1.996407e-14
ME1 BM4      -1.089406e-15
ME1 BM5       0.000000e+00
ME2 BM1       1.191582e-14
ME2 BM2      -3.975119e-15
ME2 BM3      -2.987194e-15
ME2 BM4      -6.883383e-15
ME2 BM5       4.716713e-15
ME3 BM1       1.991463e-15
ME3 BM2       1.488414e-14
ME3 BM3       6.303985e-15
ME3 BM4      -5.568462e-16
ME3 BM5      -3.573530e-16
ME4 BM1      -1.138846e-14
ME4 BM2      -3.091711e-15
ME4 BM3      -5.223252e-15
ME4 BM4       4.614364e-16
ME4 BM5      -1.552578e-15
ME5 BM1       0.000000e+00
ME5 BM2      -1.436351e-15
ME5 BM3       4.128642e-16
ME5 BM4       7.210812e-15
ME5 BM5       0.000000e+00
SMALL HiBM    0.000000e+00
SMALL LoBM    0.000000e+00
dtype: float64

In [51]:
def GRS(factor, y):
    x = sm.add_constant(factor)
    model = sm.OLS(y, x).fit()
    betas = model.params
    betas.columns = y.columns
    resid = model.resid

    # Calculate GRS
    # term 1
    alphas = betas.iloc[0]
    T, N = resid.shape
    K = factor.shape[1]
    factor_mean = factor.mean()
    alphas_mean = alphas.mean()

    # Make sure it actually is ddof=0. BASED ON PAGE 233 OF THE THING. LOOK AT SUPPORT CLASS
    omega = np.linalg.inv(factor.cov(ddof=1))
    sigma = np.linalg.inv(resid.cov(ddof=1))

    Term1 = (T - N - K) / N
    Term2 = 1 + factor_mean @ omega @ factor_mean
    Term3 = alphas @ sigma @ alphas

    GRS = Term1 * Term3 / Term2

    # Calculate if GRS is significant

    # it is distributed on f with dof1 = N and dof2 = T - N - K
    # Can either calculate p-value or critical value. Doing both

    # p-value
    df1, df2 = N, T - N - K
    p_value = 1 - stats.f.cdf(GRS, df1, df2)

    # Critical value
    alpha = 0.05
    critical_value = stats.f.ppf(1 - alpha, df1, df2)
    
    print(f"For factors {factor.columns.tolist()}, the GRS stat is {GRS}. \nThe p value is {p_value} and the critical value for a {alpha} level of significance is {critical_value}. \nAverage intercept:{alphas_mean}\n")

In [52]:
GRS(FF3INV, Portfolios)

For factors ['INV', 'Mkt-RF', 'SMB', 'HML'], the GRS stat is 4.00172178342955. 
The p value is 4.2301540048583774e-10 and the critical value for a 0.05 level of significance is 1.5217882947118426. 
Average intercept:-0.029276378893421003



In [53]:
alphas = {}
alphas_t = {}
results = {}

for column in Portfolios:
    x = sm.add_constant(FF3NW)
    y = Portfolios[column]
    model = sm.OLS(y, x).fit()

    alphas[column] = model.params[0]
    alphas_t[column] = model.tvalues[0]
    results[column] = model.summary()

  alphas[column] = model.params[0]
  alphas_t[column] = model.tvalues[0]
  alphas[column] = model.params[0]
  alphas_t[column] = model.tvalues[0]
  alphas[column] = model.params[0]
  alphas_t[column] = model.tvalues[0]
  alphas[column] = model.params[0]
  alphas_t[column] = model.tvalues[0]
  alphas[column] = model.params[0]
  alphas_t[column] = model.tvalues[0]
  alphas[column] = model.params[0]
  alphas_t[column] = model.tvalues[0]
  alphas[column] = model.params[0]
  alphas_t[column] = model.tvalues[0]
  alphas[column] = model.params[0]
  alphas_t[column] = model.tvalues[0]
  alphas[column] = model.params[0]
  alphas_t[column] = model.tvalues[0]
  alphas[column] = model.params[0]
  alphas_t[column] = model.tvalues[0]
  alphas[column] = model.params[0]
  alphas_t[column] = model.tvalues[0]
  alphas[column] = model.params[0]
  alphas_t[column] = model.tvalues[0]
  alphas[column] = model.params[0]
  alphas_t[column] = model.tvalues[0]
  alphas[column] = model.params[0]
  alphas_t[colum

In [55]:
results["SMALL LoBM"]

0,1,2,3
Dep. Variable:,SMALL LoBM,R-squared:,0.907
Model:,OLS,Adj. R-squared:,0.906
Method:,Least Squares,F-statistic:,1765.0
Date:,"Thu, 13 Feb 2025",Prob (F-statistic):,0.0
Time:,16:59:30,Log-Likelihood:,-1695.0
No. Observations:,732,AIC:,3400.0
Df Residuals:,727,BIC:,3423.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.5172,0.093,-5.590,0.000,-0.699,-0.336
Non weighted,-0.0587,0.048,-1.221,0.223,-0.153,0.036
Mkt-RF,1.0885,0.022,48.784,0.000,1.045,1.132
SMB,1.4072,0.031,44.775,0.000,1.345,1.469
HML,-0.2365,0.040,-5.915,0.000,-0.315,-0.158

0,1,2,3
Omnibus:,55.385,Durbin-Watson:,1.959
Prob(Omnibus):,0.0,Jarque-Bera (JB):,185.381
Skew:,0.29,Prob(JB):,5.56e-41
Kurtosis:,5.396,Cond. No.,5.0


In [30]:
GRS(FF5, Portfolios)
GRS(FFMOM, Portfolios)
GRS(FFREV, Portfolios)

TypeError: 'numpy.float64' object is not callable

In [None]:
Portfolios

Unnamed: 0_level_0,SMALL LoBM,ME1 BM2,ME1 BM3,ME1 BM4,SMALL HiBM,ME2 BM1,ME2 BM2,ME2 BM3,ME2 BM4,ME2 BM5,...,ME4 BM1,ME4 BM2,ME4 BM3,ME4 BM4,ME4 BM5,BIG LoBM,ME5 BM2,ME5 BM3,ME5 BM4,BIG HiBM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1963-07-31,0.8607,-0.5791,0.4379,-0.1638,-1.5911,-2.0771,-0.0801,-1.2805,-2.2344,-1.4589,...,-1.2506,-1.9481,-2.1884,-1.8441,-2.1267,-0.1153,0.2133,0.9586,-0.8562,-1.3726
1963-08-31,3.9870,1.1334,1.2477,2.1255,4.5067,5.3165,4.2691,4.1924,4.1688,7.9829,...,5.1759,4.4806,5.9832,7.4282,5.0969,5.5191,4.0050,4.3436,8.0331,6.1324
1963-09-30,-3.1578,0.3563,-1.2904,-1.8700,-0.7020,-4.3202,-1.7734,-1.1498,-1.4512,-3.1849,...,-3.0674,-2.3501,-2.0500,-4.2339,-2.2643,-1.6295,-1.0754,-1.0835,-0.4845,-3.7663
1963-10-31,0.9985,-0.9971,1.0232,-0.2045,2.1088,0.9016,3.9442,2.0624,1.9112,3.6435,...,-0.6802,0.3929,2.3329,4.5592,0.3213,5.0439,1.4527,-0.5369,2.1015,0.1957
1963-11-30,-3.6451,-4.0234,-2.0755,-1.3187,-1.3238,-4.5261,-2.0234,-1.0509,-0.3702,-0.3832,...,-1.1498,-0.9134,-1.0633,1.0910,3.2688,-1.5256,0.7372,-2.0125,-2.3538,1.0755
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-29,9.3215,6.3990,11.3072,2.2084,3.8553,9.2445,6.2036,6.7660,3.9033,0.5555,...,6.6618,7.6348,3.2897,4.1891,2.1229,5.6499,1.0896,7.6569,4.5735,2.4889
2024-03-31,-3.7123,1.1675,0.3170,1.2694,4.2800,-0.3284,1.9809,2.1231,2.3958,3.8506,...,3.4048,4.2532,4.7087,7.7568,7.2353,1.2413,5.8597,3.2374,5.7107,8.5563
2024-04-30,-13.0128,-7.6561,-7.9804,-7.9124,-8.0168,-7.2979,-8.7422,-6.8110,-6.0808,-7.3370,...,-6.1889,-6.8962,-6.4723,-7.3261,-6.2037,-4.4696,0.7208,-4.9422,-5.4638,-5.9445
2024-05-31,6.8393,2.8121,4.4908,3.1165,4.3490,4.1544,2.7902,5.0656,4.4027,4.6318,...,2.1254,1.7390,1.9487,4.8526,3.1667,5.5412,3.7588,2.6122,3.6084,3.8259


In [None]:
def Cross_Sectional(factor, Portfolios):
    # Fit everthing a first time
    y = Portfolios
    x = sm.add_constant(factor)
    model = sm.OLS(y, x).fit()
    betas = model.params.iloc[1:]
    alphas = model.params.iloc[0]
    alphas.index = Portfolios.mean().index
    betas.columns = y.columns # Ask if we just throw the betas away
    resid = model.resid
    T, N = resid.shape
    K = factor.shape[1]   
    
    # Initialize lambda and alpha dictionaries
    lambda_estimates = []
    alpha_estimates = []

    # Do regression at time t where 
    for t in range(T):
        y_t = Portfolios.iloc[t]  # Excess returns at time t. iloc of a single row returns a series so we ok doing this
        X_t = sm.add_constant(betas.T)  # Factor betas. They should be aligne already with y_t
        model_t = sm.OLS(y_t, X_t).fit() # OLS regression easy peasy
        lambda_estimates.append(model_t.params.iloc[1:])  # Store factor risk premia
        alpha_estimates.append(model_t.params.iloc[0])  # Store pricing error (alpha)

    # Convert lists to DataFrame and one as an array for some reason but I forgot and don't wanna mess it up maybe
    lambda_estimates = pd.DataFrame(lambda_estimates)
    alpha_estimates = np.array(alpha_estimates)

    # Compute mean risk premia
    lambda_mean = lambda_estimates.mean()
    lambda_var = lambda_estimates.var(ddof=0)/T # TO TRIPLE CHECK. BASED ON NOTES PAGE 40. THE DIVIDED BY T IS ABOUT NOT DOING IT LATER IN THE T-TEST BUT I STILL DOUBT IT SOMEWHAT. I THINK USING VAR INSTEAD OF COV IS GOOD BECAUSE WE ASSUME UNCORELATED BUT I DOUBT STILL.
    # THE PROPER COV AND VAR ARE ON PAGE 256 OF THE TEXTBOOK. GO BACK AND CHECK RECORDING BECAUSE HE TALKED ABOUT IT I THINK

    alpha_mean = np.mean(alpha_estimates)

    # Calculate the correction for generated regressors
    shanken_correction = (1 + (factor.mean()-alpha_mean) @ np.linalg.inv(factor.cov()) @ (factor.mean()-alpha_mean))

    lambda_var_corrected = lambda_var

    # Compute t-statistics
    lambda_t = lambda_mean / np.sqrt(lambda_var_corrected)

    # Compute alhpa
    alpha_var = alpha_estimates.var(ddof=0)/T # TO TRIPLE CHECK. BASED ON NOTES PAGE 40. THE DIVIDED BY T IS ABOUT NOT DOING IT LATER IN THE T-TEST BUT I STILL DOUBT IT SOMEWHAT

    alpha_var_corrected = alpha_var 

    # Compute mean pricing error and t-stat

    alpha_t = alpha_mean / np.sqrt(alpha_var_corrected)

    return lambda_mean, lambda_t, alpha_t, alpha_mean, alphas

In [None]:
lambda_mean = {}
lambda_t = {}
alpha_mean = {}
alpha_t = {}

factors = [FF5,FFMOM,FFREV]

for factor in factors:
    lambda_mean_temp, lambda_t_temp, alpha_t_temp, alpha_mean_temp, alphas = Cross_Sectional(factor, Portfolios)
    lambda_mean[factor.name] = lambda_mean_temp
    lambda_t[factor.name] = lambda_t_temp
    alpha_mean[factor.name] = alpha_mean_temp
    alpha_t[factor.name] = alpha_t_temp




In [None]:
alpha_t

{'FF5': 3.2937180132347708,
 'FFMOM': 2.270479684289368,
 'FFREV': 4.3185880830502885}