In [2]:
!pip install yfinance

Collecting yfinance
  Downloading yfinance-0.1.66-py2.py3-none-any.whl (25 kB)
Collecting lxml>=4.5.1
  Downloading lxml-4.6.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 5.8 MB/s 
Installing collected packages: lxml, yfinance
  Attempting uninstall: lxml
    Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
Successfully installed lxml-4.6.4 yfinance-0.1.66


In [3]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import pandas_datareader.data as web

In [4]:
assets=['MSFT','TSLA','NVDA','GOOG']
weight=[0.25,0.25,0.25,0.25]
start_date='2013-12-31'
end_date='2018-12-31'

In [7]:
# three factors 
df_three_factor = web.DataReader('F-F_Research_Data_Factors', 'famafrench', 
                                 start=start_date)[0]
df_three_factor.index = df_three_factor.index.format()

# momentum factor
df_mom = web.DataReader('F-F_Momentum_Factor', 'famafrench', 
                        start=start_date)[0]
df_mom.index = df_mom.index.format()

# five factors
df_five_factor = web.DataReader('F-F_Research_Data_5_Factors_2x3', 
                                'famafrench', 
                                start=start_date)[0]
df_five_factor.index = df_five_factor.index.format()

In [8]:
port=yf.download(assets,start=start_date,end=end_date,adjusted=True,progress=False)

In [9]:
y=port['Adj Close'].resample('W')\
                    .last()\
                    .pct_change()\
                    .dropna()
y.index=y.index.strftime('%Y-%m')

In [11]:
y['portfolio_rtn']=np.matmul(y[assets].values,weight)
y.head()

Unnamed: 0_level_0,GOOG,MSFT,NVDA,TSLA,portfolio_rtn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01,0.022787,-0.023571,0.003829,-0.025675,-0.005657
2014-01,0.018006,0.009434,0.016529,0.16669,0.052664
2014-01,-0.023207,0.01182,-0.026892,0.026998,-0.00282
2014-02,0.050844,0.027981,0.008997,0.039004,0.031707
2014-02,-0.002989,-0.033827,0.010828,0.028223,0.000559


In [12]:
# join all datasets on the index
four_factor_data = df_three_factor.join(df_mom).join(y).drop(assets,axis=1)

# rename columns
four_factor_data.columns = ['mkt', 'smb', 'hml', 'rf', 'mom', 'rtn']

# divide everything (except returns) by 100
four_factor_data.loc[:, four_factor_data.columns != 'rtn'] /= 100

# convert index to datetime
four_factor_data.index = [pd.to_datetime(x, format='%Y-%m') for x in four_factor_data.index]

# select period of interest
four_factor_data = four_factor_data.loc[start_date:end_date]

# calculate excess returns
four_factor_data['excess_rtn'] = four_factor_data.rtn - four_factor_data.rf

four_factor_data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,mkt,smb,hml,rf,mom,rtn,excess_rtn
2014-01-01,-0.0332,0.0092,-0.0202,0.0,0.0171,-0.005657,-0.005657
2014-01-01,-0.0332,0.0092,-0.0202,0.0,0.0171,0.052664,0.052664
2014-01-01,-0.0332,0.0092,-0.0202,0.0,0.0171,-0.00282,-0.00282
2014-02-01,0.0465,0.0037,-0.0031,0.0,0.0207,0.031707,0.031707
2014-02-01,0.0465,0.0037,-0.0031,0.0,0.0207,0.000559,0.000559


In [13]:
# join all datasets on the index
five_factor_data = df_five_factor.join(y).drop(assets,axis=1)

# rename columns
five_factor_data.columns = ['mkt', 'smb', 'hml', 'rmw', 'cma', 'rf', 'rtn']

# divide everything (except returns) by 100
five_factor_data.loc[:, five_factor_data.columns != 'rtn'] /= 100

# convert index to datetime
five_factor_data.index = [pd.to_datetime(x, format='%Y-%m') for x in five_factor_data.index]

# select period of interest
five_factor_data = five_factor_data.loc[start_date:end_date]

# calculate excess returns
five_factor_data['excess_rtn'] = five_factor_data.rtn - five_factor_data.rf

five_factor_data.head()

Unnamed: 0,mkt,smb,hml,rmw,cma,rf,rtn,excess_rtn
2014-01-01,-0.0332,0.0059,-0.0202,-0.0388,-0.0143,0.0,-0.005657,-0.005657
2014-01-01,-0.0332,0.0059,-0.0202,-0.0388,-0.0143,0.0,0.052664,0.052664
2014-01-01,-0.0332,0.0059,-0.0202,-0.0388,-0.0143,0.0,-0.00282,-0.00282
2014-02-01,0.0465,0.0016,-0.0031,-0.0023,-0.0048,0.0,0.031707,0.031707
2014-02-01,0.0465,0.0016,-0.0031,-0.0023,-0.0048,0.0,0.000559,0.000559


In [14]:
four_factor_model=smf.ols(formula='excess_rtn ~ mkt + smb + hml + mom',data=four_factor_data).fit()
print(four_factor_model.summary())

                            OLS Regression Results                            
Dep. Variable:             excess_rtn   R-squared:                       0.044
Model:                            OLS   Adj. R-squared:                  0.029
Method:                 Least Squares   F-statistic:                     2.932
Date:                Wed, 17 Nov 2021   Prob (F-statistic):             0.0214
Time:                        10:10:45   Log-Likelihood:                 527.35
No. Observations:                 260   AIC:                            -1045.
Df Residuals:                     255   BIC:                            -1027.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0038      0.002      1.853      0.0

In [15]:
five_factor_model=smf.ols(formula='excess_rtn ~ mkt + smb + hml + rmw + cma',data=five_factor_data).fit()
print(five_factor_model.summary())

                            OLS Regression Results                            
Dep. Variable:             excess_rtn   R-squared:                       0.055
Model:                            OLS   Adj. R-squared:                  0.037
Method:                 Least Squares   F-statistic:                     2.974
Date:                Wed, 17 Nov 2021   Prob (F-statistic):             0.0125
Time:                        10:12:13   Log-Likelihood:                 528.90
No. Observations:                 260   AIC:                            -1046.
Df Residuals:                     254   BIC:                            -1024.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0037      0.002      1.799      0.0