In [1]:
import numpy as np
import pandas as pd
import os
import pandas_datareader as web
import urllib.request
import zipfile

In [2]:
def get_fama_french_factors(ff_tag = "F-F_Research_Data_Factors", header_rows=3):

    # Download the file and save it
    # We will name it fama_french.zip file
    
    ff_url = 'https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/' + ff_tag + '_CSV.zip'
    ff_file = ff_tag + '.csv'
    
    urllib.request.urlretrieve(ff_url,'fama_french.zip')
    zip_file = zipfile.ZipFile('fama_french.zip', 'r')

    # Next we extact the file data

    zip_file.extractall()

    # Make sure you close the file after extraction

    zip_file.close()

    # Now open the CSV file

    ff_factors = pd.read_csv(ff_file, skiprows = header_rows, index_col = 0)
    # We want to find out the row with NULL value
    # We will skip these rows

    ff_row = ff_factors.isnull().any(1).to_numpy().nonzero()[0][0]

    # Read the csv file again with skipped rows
    ff_factors = pd.read_csv(ff_file, skiprows = header_rows, nrows = ff_row, index_col = 0)

    # Format the date index
    ff_factors.index = pd.to_datetime(ff_factors.index, format= '%Y%m')

    # Format dates to end of month
    ff_factors.index = ff_factors.index + pd.offsets.MonthEnd()

    # Convert from percent to decimal
    ff_factors = ff_factors.apply(lambda x: x/ 100)

    # Delete the source files
    if os.path.exists("fama_french.zip"):
        os.remove("fama_french.zip")

    if os.path.exists(ff_file):
        os.remove(ff_file)
        
    return ff_factors

In [3]:
def process_fama_french_factors(number_factors=3, use_momentum=True, excess_returns=True):

    if number_factors == 5:
        ff_tag = "F-F_Research_Data_5_Factors_2x3"
    else:
        ff_tag = "F-F_Research_Data_Factors"
   
    ff = get_fama_french_factors(ff_tag=ff_tag, header_rows=3)

    if use_momentum:
        ff_tag = "F-F_Momentum_Factor"
        mom = get_fama_french_factors(ff_tag=ff_tag, header_rows=13)
        mom.columns = ['UMD']
        ff = ff.join(mom, how='inner')

    if excess_returns:
        ff = ff.drop(columns=['RF'])
    else:
        rf = ff['RF']
        ff = ff.drop(columns=['RF']).add(rf,axis=0)
        ff = ff.join(rf,how='inner')

    ff.columns = ['MKT'] + list(ff.columns[1:])
    
    return ff

In [4]:
ff_ex = process_fama_french_factors()
ff_ex = ff_ex['2000':]
ff_tot = process_fama_french_factors(excess_returns=False)
ff_tot = ff_tot['2000':]

In [5]:
info = pd.DataFrame(columns=['Name','Construction', 'Description'])
info.loc['MKT'] = ['Market', 'Market-cap-weighted', 'US Equities']
info.loc['SML'] = ['Size', 'Small Minus Low', 'Long small stocks and short large stocks']
info.loc['HML'] = ['Value', 'High Minus Low', 'Long value (high book-to-market) stocks and short growth (low book-to-market) stocks']
info.loc['UMD'] = ['Momentum', 'Up Minus Down', 'Long stocks that are recently up and short stocks that are recently down']
info.loc['RF'] = ['Risk-free rate', 'Tbills', '']
info

Unnamed: 0,Name,Construction,Description
MKT,Market,Market-cap-weighted,US Equities
SML,Size,Small Minus Low,Long small stocks and short large stocks
HML,Value,High Minus Low,Long value (high book-to-market) stocks and sh...
UMD,Momentum,Up Minus Down,Long stocks that are recently up and short sto...
RF,Risk-free rate,Tbills,


In [6]:
with pd.ExcelWriter('fama_french_data.xlsx') as writer:  
    info.to_excel(writer, sheet_name= 'descriptions')
    ff_tot.to_excel(writer, sheet_name='total returns')
#    ff_ex.to_excel(writer, sheet_name='excess returns')