In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import datetime

from pandas_datareader import data, wb
from __future__ import division

plt.style.use('ggplot')
%matplotlib inline

Estimate the risk premia of the 4-factor model using a cross-sectional regression. 
- Calculate the risk premia for the four factors (i.e. their λ’s) and the pricing errors (the α’s). 
- Compute the standard errors of the factor premia assuming that the beta’s in the first stage are know. (It is sufficient to run an OLS cross-sectional regression and you do not need to run a GLS regression for this question.)
- Are the factors significant?

## Data Acquiring and Cleaning

In [2]:
s_date = datetime.datetime(1963, 7, 1)
e_date = datetime.datetime(2017, 3, 31)

ff_5f_data = data.DataReader('F-F_Research_Data_5_Factors_2x3','famafrench', start=s_date, end=e_date)
ff_mom_data = data.DataReader('F-F_Momentum_Factor','famafrench', start=s_date, end=e_date)
port_data = data.DataReader('25_Portfolios_ME_Prior_12_2','famafrench', start=s_date, end=e_date)

ff_5f_df = ff_5f_data[0]
ff_mom_df = ff_mom_data[0]
port_df = port_data[0]

In [3]:
# 5 Fama-French factors (Mkt-RF, SMB, HML, RMW, CMA, RF)

print ff_5f_data['DESCR']
ff_5f_df.head()

F-F Research Data 5 Factors 2x3
-------------------------------

This file was created by CMPT_ME_BEME_OP_INV_RETS using the 201703 CRSP database. The 1-month TBill return is from Ibbotson and Associates Inc.

  0 : (645 rows x 6 cols)
  1 : Annual Factors: January-December (53 rows x 6 cols)


Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1963-07,-0.39,-0.46,-0.82,0.72,-1.16,0.27
1963-08,5.07,-0.81,1.63,0.42,-0.4,0.25
1963-09,-1.57,-0.48,0.19,-0.8,0.23,0.27
1963-10,2.53,-1.29,-0.11,2.75,-2.26,0.29
1963-11,-0.85,-0.84,1.66,-0.34,2.22,0.27


In [4]:
# Fama-French momentum factor (MOM)

print ff_mom_data['DESCR']
ff_mom_df.head()

F-F Momentum Factor
-------------------

This file was created by CMPT_ME_PRIOR_RETS using the 201703 CRSP database. It contains a momentum factor, constructed from six value-weight portfolios formed using independent sorts on size and prior return of NYSE, AMEX, and NASDAQ stocks. Mom  is the average of the returns on two (big and small) high prior return portfolios minus the average of the returns on two low prior return portfolios. The portfolios are constructed monthly. Big means a firm is above the median market cap on the NYSE at the end of the previous month; small firms are below the median NYSE market cap. Prior return is measured from month -12 to - 2. Firms in the low prior return portfolio are below the 30th NYSE percentile. Those in the high portfolio are above the 70th NYSE percentile. Missing data are indicated by -99.99 or -999. Copyright 2017 Kenneth R. French

  0 : (645 rows x 1 cols)
  1 : Annual Factors: January-December (54 rows x 1 cols)


Unnamed: 0_level_0,Mom
Date,Unnamed: 1_level_1
1963-07,0.99
1963-08,1.08
1963-09,0.13
1963-10,3.14
1963-11,-0.75


In [5]:
# Intersection of 5 size and 5 momentum portfolios

print port_data['DESCR']
port_df.head()

25 Portfolios ME Prior 12 2
---------------------------

This file was created by CMPT_ME_PRIOR_RETS using the 201703 CRSP database. It contains value- weighted returns for the intersections of 5 ME portfolios and 5 prior return portfolios. The portfolios are constructed monthly. ME is market cap at the end of the previous month. PRIOR_RET is from -12 to - 2. Missing data are indicated by -99.99 or -999. Copyright 2017 Kenneth R. French

  0 : Average Value Weighted Returns -- Monthly (645 rows x 25 cols)
  1 : Average Equal Weighted Returns -- Monthly (645 rows x 25 cols)
  2 : Average Value Weighted Returns -- Annual (54 rows x 25 cols)
  3 : Average Equal Weighted Returns -- Annual (54 rows x 25 cols)
  4 : Number of Firms in Portfolios (645 rows x 25 cols)
  5 : Average Firm Size (645 rows x 25 cols)
  6 : Equally-Weighted Average of Prior Returns (645 rows x 25 cols)
  7 : Value-Weighted Average of Prior Returns (645 rows x 25 cols)


Unnamed: 0_level_0,SMALL LoPRIOR,ME1 PRIOR2,ME1 PRIOR3,ME1 PRIOR4,SMALL HiPRIOR,ME2 PRIOR1,ME2 PRIOR2,ME2 PRIOR3,ME2 PRIOR4,ME2 PRIOR5,...,ME4 PRIOR1,ME4 PRIOR2,ME4 PRIOR3,ME4 PRIOR4,ME4 PRIOR5,BIG LoPRIOR,ME5 PRIOR2,ME5 PRIOR3,ME5 PRIOR4,BIG HiPRIOR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1963-07,-0.19,-1.16,1.11,-0.79,-0.51,-2.07,-1.43,-2.24,-1.43,-0.06,...,-2.72,-1.14,-1.23,-1.44,-1.4,-1.39,-0.4,1.03,0.36,-0.07
1963-08,1.93,1.8,2.93,2.81,5.83,4.26,4.93,4.19,4.32,7.17,...,5.97,4.6,4.97,4.83,7.94,5.92,4.85,5.78,4.31,7.12
1963-09,-0.99,-0.97,-0.58,-0.84,0.33,-3.74,-0.93,-0.63,-1.67,-0.82,...,-4.72,-1.61,-2.24,-1.82,-2.2,-2.28,1.51,-1.49,-1.6,-2.6
1963-10,-0.66,-0.32,1.53,3.11,2.81,3.76,0.3,1.75,1.97,4.36,...,0.0,0.1,0.84,0.71,3.81,2.56,1.6,3.1,0.94,8.51
1963-11,-2.08,-1.51,-1.46,-1.62,-2.38,-2.76,-0.48,-0.83,-0.99,-1.03,...,1.12,-0.41,-1.28,0.23,-0.2,-1.04,1.77,-0.97,-2.16,-0.7


## STEP 1: Time Series

$R_{i,t} - R_{f,t} = \alpha_i +b_i (R_{m,t} −R_{f,t})+s_iSMB_t +h_iHML_t +w_iMOM_t + \epsilon_{t} $

In [6]:
# 4 factors

ff_4f_df = pd.concat([ff_5f_df[['Mkt-RF', 'SMB', 'HML']], ff_mom_df], axis=1)
X = sm.add_constant(ff_4f_df)

In [7]:
def get_coefs(port_name, x=X):
    re = port_df[port_name] - ff_5f_df['RF']
    results = sm.OLS(re, x).fit(cov_type='HC0')
    return results.params

## STEP 2: Cross-Sectional Regression

$E(R_i - R_f) = \alpha + b_i \lambda_b + s_i \lambda_s + h_i \lambda_h + w_i \lambda_w$

In [8]:
betas = map(get_coefs, port_df.columns)

f_model = pd.concat(betas, axis=1).T
f_model.index = port_df.columns
f_model['ERe'] = port_df.mean() - ff_5f_df['RF'].mean()
f_model

Unnamed: 0,const,Mkt-RF,SMB,HML,Mom,ERe
SMALL LoPRIOR,-0.374881,1.050699,1.234908,0.009731,-0.701756,0.032698
ME1 PRIOR2,0.013437,0.887058,0.979273,0.296299,-0.25496,0.66569
ME1 PRIOR3,0.178759,0.865077,0.894879,0.324013,-0.077206,0.924171
ME1 PRIOR4,0.230807,0.88134,0.918611,0.261583,0.078935,1.070744
SMALL HiPRIOR,0.301665,1.04077,1.146688,0.042583,0.287063,1.341147
ME2 PRIOR1,-0.199643,1.177728,0.959606,-0.050107,-0.728481,0.162977
ME2 PRIOR2,0.108984,0.973437,0.779531,0.205272,-0.315007,0.681891
ME2 PRIOR3,0.103537,0.939425,0.689156,0.268444,-0.05778,0.826651
ME2 PRIOR4,0.158453,0.963387,0.759886,0.251326,0.086115,1.000527
ME2 PRIOR5,0.153886,1.139345,0.949923,-0.052744,0.352645,1.201907


In [9]:
model = sm.OLS(f_model['ERe'], sm.add_constant(f_model.iloc[:,1:-1]))
res = model.fit(cov_type='HC0', use_t=True)
res.summary()

0,1,2,3
Dep. Variable:,ERe,R-squared:,0.85
Model:,OLS,Adj. R-squared:,0.82
Method:,Least Squares,F-statistic:,34.25
Date:,"Wed, 10 May 2017",Prob (F-statistic):,1.09e-08
Time:,21:37:35,Log-Likelihood:,15.172
No. Observations:,25,AIC:,-20.34
Df Residuals:,20,BIC:,-14.25
Df Model:,4,,
Covariance Type:,HC0,,

0,1,2,3,4,5
,coef,std err,t,P>|t|,[95.0% Conf. Int.]
const,0.0265,0.863,0.031,0.976,-1.775 1.827
Mkt-RF,0.5586,0.777,0.719,0.481,-1.063 2.180
SMB,0.2273,0.087,2.619,0.016,0.046 0.408
HML,0.6265,0.531,1.180,0.252,-0.481 1.734
Mom,0.7562,0.090,8.425,0.000,0.569 0.943

0,1,2,3
Omnibus:,1.762,Durbin-Watson:,1.314
Prob(Omnibus):,0.414,Jarque-Bera (JB):,1.04
Skew:,-0.499,Prob(JB):,0.594
Kurtosis:,3.035,Cond. No.,55.6


In [10]:
res.params

const     0.026469
Mkt-RF    0.558553
SMB       0.227279
HML       0.626517
Mom       0.756182
dtype: float64

In [11]:
res.bse

const     0.863405
Mkt-RF    0.777159
SMB       0.086790
HML       0.530858
Mom       0.089757
dtype: float64