In [2]:
import numpy as np
import pandas as pd
import os
import pandas_datareader as web
import urllib.request
import zipfile

In [3]:
def get_fama_french_factors(ff_tag = "F-F_Research_Data_Factors", header_rows=3):
    ff_url = 'https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/' + ff_tag + '_CSV.zip'
    ff_file = ff_tag + '.csv'
    
    urllib.request.urlretrieve(ff_url,'fama_french.zip')
    zip_file = zipfile.ZipFile('fama_french.zip', 'r')

    zip_file.extractall()

    zip_file.close()

    ff_factors = pd.read_csv(ff_file, skiprows = header_rows, index_col = 0)

    ff_row = ff_factors.isnull().any(1).to_numpy().nonzero()[0][0]

    ff_factors = pd.read_csv(ff_file, skiprows = header_rows, nrows = ff_row, index_col = 0)

    ff_factors.index = pd.to_datetime(ff_factors.index, format= '%Y%m')
    
    ff_factors.index = ff_factors.index + pd.offsets.MonthEnd()

    ff_factors = ff_factors.apply(lambda x: x/ 100)

    # Delete the source files
    if os.path.exists("fama_french.zip"):
        os.remove("fama_french.zip")

    if os.path.exists(ff_file):
        os.remove(ff_file)
        
    return ff_factors


In [10]:
def process_fama_french_factors(number_factors=5, use_momentum=True, excess_returns=True):

    if number_factors == 5:
        ff_tag = "F-F_Research_Data_5_Factors_2x3"
    else:
        ff_tag = "F-F_Research_Data_Factors"
   
    ff = get_fama_french_factors(ff_tag=ff_tag, header_rows=3)
    print(ff)

    if use_momentum:
        ff_tag = "F-F_Momentum_Factor"
        mom = get_fama_french_factors(ff_tag=ff_tag, header_rows=13)
        mom.columns = ['UMD']
        ff = ff.join(mom, how='inner')
        

    if excess_returns:
        ff = ff.drop(columns=['RF'])
    else:
        rf = ff['RF']
        ff = ff.drop(columns=['RF']).add(rf,axis=0)
        ff = ff.join(rf,how='inner')

    ff.columns = ['MKT'] + list(ff.columns[1:])
    
    return ff

In [11]:
ff_ex = process_fama_french_factors()
ff_ex = ff_ex['2000':]
ff_tot = process_fama_french_factors(excess_returns=False)
ff_tot = ff_tot['2000':]
ff_tot = ff_tot['2009-05-31':]
ff_ex = ff_ex['2009-05-31':]
ff_ex

            Mkt-RF     SMB     HML     RMW     CMA      RF
1963-07-31 -0.0039 -0.0041 -0.0097  0.0068 -0.0118  0.0027
1963-08-31  0.0507 -0.0080  0.0180  0.0036 -0.0035  0.0025
1963-09-30 -0.0157 -0.0052  0.0013 -0.0071  0.0029  0.0027
1963-10-31  0.0253 -0.0139 -0.0010  0.0280 -0.0201  0.0029
1963-11-30 -0.0085 -0.0088  0.0175 -0.0051  0.0224  0.0027
...            ...     ...     ...     ...     ...     ...
2022-03-31  0.0305 -0.0215 -0.0180 -0.0156  0.0317  0.0001
2022-04-30 -0.0946 -0.0040  0.0619  0.0363  0.0592  0.0001
2022-05-31 -0.0034 -0.0006  0.0841  0.0144  0.0398  0.0003
2022-06-30 -0.0843  0.0130 -0.0597  0.0185 -0.0470  0.0006
2022-07-31  0.0956  0.0185 -0.0408  0.0070 -0.0689  0.0008

[709 rows x 6 columns]
            Mkt-RF     SMB     HML     RMW     CMA      RF
1963-07-31 -0.0039 -0.0041 -0.0097  0.0068 -0.0118  0.0027
1963-08-31  0.0507 -0.0080  0.0180  0.0036 -0.0035  0.0025
1963-09-30 -0.0157 -0.0052  0.0013 -0.0071  0.0029  0.0027
1963-10-31  0.0253 -0.0139 -0.00

Unnamed: 0,MKT,SMB,HML,RMW,CMA,UMD
2009-05-31,0.0521,-0.0232,0.0027,-0.0078,-0.0216,-0.1249
2009-06-30,0.0043,0.0229,-0.0273,-0.0141,-0.0033,0.0548
2009-07-31,0.0772,0.0239,0.0484,-0.0046,0.0314,-0.0555
2009-08-31,0.0333,-0.0009,0.0763,-0.0302,0.0334,-0.0907
2009-09-30,0.0408,0.0273,0.0104,0.0131,0.0035,-0.0479
...,...,...,...,...,...,...
2022-03-31,0.0305,-0.0215,-0.0180,-0.0156,0.0317,0.0300
2022-04-30,-0.0946,-0.0040,0.0619,0.0363,0.0592,0.0489
2022-05-31,-0.0034,-0.0006,0.0841,0.0144,0.0398,0.0248
2022-06-30,-0.0843,0.0130,-0.0597,0.0185,-0.0470,0.0079


In [6]:
info = pd.DataFrame(columns=['Name','Construction', 'Description'])
info.loc['MKT'] = ['Market', 'Market-cap-weighted', 'US Equities']
info.loc['SML'] = ['Size', 'Small Minus Low', 'Long small stocks and short large stocks']
info.loc['HML'] = ['Value', 'High Minus Low', 'Long value (high book-to-market) stocks and short growth (low book-to-market) stocks']
info.loc['UMD'] = ['Momentum', 'Up Minus Down', 'Long stocks that are recently up and short stocks that are recently down']
info.loc['RF'] = ['Risk-free rate', 'Tbills', '']
info

Unnamed: 0,Name,Construction,Description
MKT,Market,Market-cap-weighted,US Equities
SML,Size,Small Minus Low,Long small stocks and short large stocks
HML,Value,High Minus Low,Long value (high book-to-market) stocks and sh...
UMD,Momentum,Up Minus Down,Long stocks that are recently up and short sto...
RF,Risk-free rate,Tbills,


In [7]:
with pd.ExcelWriter('fama_french_data.xlsx') as writer:  
    info.to_excel(writer, sheet_name= 'descriptions')
    ff_tot.to_excel(writer, sheet_name='total returns')
    ff_ex.to_excel(writer, sheet_name='excess returns')