In [22]:
from pprint import pprint
from pandas_datareader.famafrench import get_available_datasets
import pandas as pd
import numpy as np
from statsmodels.api import OLS, add_constant
import pandas_datareader.data as web
from pathlib import Path
from linearmodels.asset_pricing import TradedFactorModel, LinearFactorModel, LinearFactorModelGMM

In [52]:
ff_factor = 'F-F_Research_Data_5_Factors_2x3'
ff_data = web.DataReader(ff_factor, 'famafrench')[0].loc['2010': '2017']
ff_data.info()

<class 'pandas.core.frame.DataFrame'>
PeriodIndex: 96 entries, 2010-01 to 2017-12
Freq: M
Data columns (total 6 columns):
Mkt-RF    96 non-null float64
SMB       96 non-null float64
HML       96 non-null float64
RMW       96 non-null float64
CMA       96 non-null float64
RF        96 non-null float64
dtypes: float64(6)
memory usage: 5.2 KB


In [53]:
with pd.HDFStore('../00_data/assets.h5') as store:
    prices = store['/quandl/wiki/prices'].adj_close.unstack().loc['2010':'2017']
    equities = store['/us_equities/stocks'].drop_duplicates()

In [54]:
sectors = equities.filter(prices.columns, axis=0).sector.to_dict()
prices = prices.filter(sectors.keys()).dropna(how='all', axis=1)

In [56]:
returns = prices.resample('M').last().pct_change().to_period('M').dropna(how='all')
returns.info()

<class 'pandas.core.frame.DataFrame'>
PeriodIndex: 95 entries, 2010-02 to 2017-12
Freq: M
Columns: 2409 entries, A to ZUMZ
dtypes: float64(2409)
memory usage: 1.7 MB


In [100]:
ff_data = ff_data.loc[returns.index]
rf = ff_data.RF
factors = ff_data.drop('RF', axis=1)
betas = pd.DataFrame()
for asset in returns.dropna(axis=1):
    r = returns[asset].sub(rf)
    step1 = OLS(endog=r, exog=add_constant(factors)).fit()
    betas[asset] = step1.params.drop('const')

In [101]:
betas = betas.T
# betas['sector'] = betas.index.map(sectors.get)
# betas = betas.dropna()

In [102]:
betas.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1936 entries, A to ZUMZ
Data columns (total 5 columns):
Mkt-RF    1936 non-null float64
SMB       1936 non-null float64
HML       1936 non-null float64
RMW       1936 non-null float64
CMA       1936 non-null float64
dtypes: float64(5)
memory usage: 90.8+ KB


In [103]:
lambdas = pd.DataFrame()
for period in returns.index:
    r = returns.loc[period, betas.index].sub(rf[period])
    step2 = OLS(endog=r, exog=add_constant(betas)).fit()
    lambdas[period] = step2.params.drop('const')

In [104]:
lambdas.T.mean()

Mkt-RF    1.058334
SMB      -0.077842
HML       0.350370
RMW      -0.750405
CMA      -0.025789
dtype: float64

In [98]:
lambdas.T.mean()

Mkt-RF                          1.064148
SMB                            -0.018849
HML                             0.495143
RMW                            -0.755549
CMA                            -0.018360
sector_Basic Industries        -0.001968
sector_Capital Goods           -0.000517
sector_Consumer Durables        0.001175
sector_Consumer Non-Durables    0.003677
sector_Consumer Services        0.002540
sector_Energy                  -0.013439
sector_Finance                 -0.003216
sector_Health Care              0.000502
sector_Miscellaneous           -0.000675
sector_Public Utilities         0.004551
sector_Technology              -0.001246
sector_Transportation          -0.000573
dtype: float64

In [105]:
from linearmodels import TradedFactorModel, FamaMacBeth

In [None]:
FamaMacBeth()