In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import math


In [2]:
def fm_summary(p): #Summary Statistics Function
    s = p.describe().T
    s['std_error'] = s['std']/np.sqrt(s['count'])
    s['tstat'] = s['mean']/s['std_error']
    return s[['mean','std','tstat']].T

In [68]:
data=pd.read_csv("F-F_Research_Data_5_Factors_2x3.csv", skiprows=3)
datamom=pd.read_csv("F-F_Momentum_Factor.csv",skiprows=13)
dataportA=pd.read_csv("6_Portfolios_2x3.csv",skiprows=1)
dataportB=pd.read_csv("6_Portfolios_MOM_2x3.csv",skiprows=1)

Fama French

$ r = R_f + b_m \cdot \text{MER} + b_s \cdot \text{SMB} + b_v \cdot \text{HML} + b_p \cdot \text{RMW} + b_i \cdot \text{CMA}+ b_k \cdot \text{UMD} $

$r$ is expected portfolio return \ $b_m$, $b_s$, $b_v$, $b_p$, $b_i$, $b_K$ are factor sensitivities

## General Rename index function

In [59]:
def update(data):
    data.set_index('Unnamed: 0',drop=True,inplace=True) #data 688-5=683
    data.index.names = ['Date']
#https://github.com/anshulb1331/Fama-MacBeth/blob/master/Fin%20Tech%20Final%20Project%20Fama%20Macbeth%20Fama%20-%203.ipynb
def update_withrf(dataport):
    for x in range(len(dataport.columns)):
        dataport.iloc[:,x]=dataport.iloc[:,x]-dataport['RF']
    dataport.drop('RF',axis=1,inplace=True)
        

## Pre-Process Data

In [69]:
update(data) #Format data from csv to pd
update(datamom)
update(dataportA)
update(dataportB)

In [70]:
#Get data from 1963 July to 2018 Aug
data=data[:662] 
#data.drop('RF', axis=1, inplace=True)
datamom.columns=['UMD']
datamom=datamom[438:1100]
dataportA=dataportA[:662]  #Return of 18 portfolio
dataportB=dataportB[:662]  #Return of 24 portfolio
dataportA_rf=dataportA.copy()
dataportB_rf=dataportB.copy()
dataportB_rf['RF']=data['RF'].astype('float64')
dataportA_rf['RF']=data['RF'].astype('float64')
finaldata=pd.concat([data,datamom],axis=1,sort=False)

In [71]:
finaldata=finaldata.astype('float64')
dataportA=dataportA.astype('float64')#Convert String to Float
dataportB=dataportB.astype('float64')
update_withrf(dataportA_rf) #Excess Return of 18 portfolio
update_withrf(dataportB_rf) #Excess Return of 24 portfolio

## Table Output

In [80]:
Table1_PanelA=fm_summary(finaldata).copy()

In [81]:
#Summary Statistics for time-series factor returns
#Table1_PanelA.drop('RF', axis=1, inplace=True)
Table1_PanelA.drop('RF',axis=1)

Unnamed: 0,Mkt-RF,SMB,HML,RMW,CMA,UMD
mean,0.539094,0.256027,0.324154,0.257598,0.276858,0.669879
std,4.37375,3.020256,2.804961,2.178139,1.999095,4.180634
tstat,3.171314,2.181079,2.973402,3.04289,3.563303,4.122715


## Summary Statistics for cross-section regression

In [90]:
# Calculate betas by regressing sets of 662 Monthly Returns
vec4_data=finaldata[['SMB','HML','RMW','CMA']]
vec5_data=finaldata[['Mkt-RF','SMB','HML','RMW','CMA','UMD']]

## Factor Exposures ($\beta$)

In [91]:
#dataport=np.mat(dataport)
def cross_section(fact,dataport,dataport2):
    #dataport excess return, dataport2 normal return
    # Step 1 Running Times Series Regression
    F=sm.add_constant(fact)
    
    ts_res=sm.OLS(dataport,F).fit()
    betas=ts_res.params[1:] 
    #Factor value normalization
    betas=(betas-betas.mean())/betas.std()
    b=sm.add_constant(betas.T)
    a=dataport2.mean(axis=0)
    a.index=b.index
    #Step 2 Running Cross Section Regression
    cs_res=sm.OLS(a,b,missing='drop').fit()
    print(cs_res.summary())
    return(betas.T)
    

## 4 Factor Cross Sectional Regression 

In [92]:
cross_section(vec4_data,dataportA_rf,dataportA)


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.259
Model:                            OLS   Adj. R-squared:                  0.101
Method:                 Least Squares   F-statistic:                     1.635
Date:                Wed, 09 Sep 2020   Prob (F-statistic):              0.226
Time:                        15:03:33   Log-Likelihood:                 7.3603
No. Observations:                  18   AIC:                            -6.721
Df Residuals:                      14   BIC:                            -3.159
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0640      1.035     -0.062      0.9

  "anyway, n=%i" % int(n))


Unnamed: 0,SMB,HML,RMW,CMA
0,1.42146,-0.113736,-0.417804,-0.889919
1,1.254126,0.264933,-0.440778,-1.078282
2,1.107803,0.527607,-0.537418,-1.097992
3,0.983333,0.241086,0.17052,-1.394939
4,0.558932,1.001094,-0.295985,-1.264041
5,0.410305,1.154828,-0.41297,-1.152163
6,1.353577,0.158197,-0.761829,-0.749945
7,1.21554,0.259014,-0.308197,-1.166357
8,1.167781,0.135643,-0.031861,-1.271563
9,0.791677,0.937301,-0.868397,-0.860581


## 5 Factor Cross sectional regression


In [125]:
cross_section(vec5_data,dataportB)

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.339
Model:                            OLS   Adj. R-squared:                  0.156
Method:                 Least Squares   F-statistic:                     1.848
Date:                Wed, 09 Sep 2020   Prob (F-statistic):              0.154
Time:                        01:10:48   Log-Likelihood:                 6.5476
No. Observations:                  24   AIC:                            -1.095
Df Residuals:                      18   BIC:                             5.973
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.5701      0.767      2.048      0.0

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
Mkt-RF,1.317152,1.418073,1.299636,1.937809,1.918921,1.480384,1.267039,1.435115,1.298213,1.801567,...,1.344023,1.59603,1.977244,1.873512,1.295817,1.43471,1.38911,1.755356,1.930515,1.888454
SMB,1.219714,1.118198,1.021872,-0.519567,-0.877986,-0.490853,1.18352,1.057968,1.025716,-0.305246,...,1.178706,-0.749029,-0.841785,-0.241694,1.116187,1.002779,1.071812,-0.161756,-0.741948,-0.530107
HML,-0.965915,-0.3765,0.24194,-0.884126,0.013296,1.059248,-0.48532,-0.394288,-0.576889,0.156562,...,-0.446067,-0.729742,-0.311521,-0.181636,-0.155164,-0.264005,-0.603036,-0.1178,-0.303528,-0.470329
RMW,-0.631348,-0.618348,-0.817621,0.077156,-0.271666,-0.623869,-1.215022,-0.352314,0.197548,-1.242508,...,-0.552829,-0.317939,-0.214913,-0.164364,-0.496407,-0.340366,-0.907853,0.028717,0.093403,-0.449429
CMA,-0.516644,-0.655379,-0.78423,-0.321253,-0.10258,-0.855846,-0.356868,-0.782514,-0.969842,-0.270918,...,-1.025188,0.887073,-0.141395,-1.16486,-0.523195,-0.72903,-0.850824,-0.141493,-0.221734,-0.782035
UMD,-0.42296,-0.886044,-0.961597,-0.290019,-0.679985,-0.569064,-0.393349,-0.963967,-0.974747,-0.139457,...,-0.498645,-0.686393,-0.46763,-0.120959,-1.237238,-1.104088,-0.099209,-1.363025,-0.756708,0.343446


https://quant.stackexchange.com/questions/24343/fama-macbeth-second-step-confusion
https://www.quantopian.com/posts/implementing-fama-macbeth-1973-and-fama-french-1992-portfolios-construction-and-regressions
https://www.quantopian.com/lectures/fundamental-factor-models

In [None]:
https://github.com/jerryxyx/AlphaTrading/blob/master/APT_FammaMacbeth.ipynb
    cross_sectional_z = X.apply(lambda row: (row-row.mean())/row.std(), axis=1)
