# Import libraries

In [138]:
import pandas as pd
import pandas_datareader as dr
import datetime
import statsmodels.api as sm
import seaborn as sns

import statsmodels

from matplotlib import rcParams
import matplotlib.pyplot as plt

# figure size in inches
rcParams['figure.figsize'] = 11.7,8.27

# Functions

In [139]:
def change_date(input):
    return datetime.date(year = int(input[0:4]), month = int(input[4:6]), day = 1)

# Data prerpocessing

In [140]:
five_factor_data = pd.read_csv('Emerging_5_Factors.csv', skiprows=3)
five_factor_data.rename(columns={'Unnamed: 0':'date'}, inplace=True)

# it is necessary to set an upper limit because there is an end of relevant data
data = five_factor_data.iloc[:375,:]

In [141]:
data['date'] = data['date'].apply(change_date)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['date'] = data['date'].apply(change_date)


In [142]:
data

Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA,RF
0,1989-07-01,0.85,1.53,11.15,-99.99,-99.99,0.70
1,1989-08-01,1.71,7.27,-0.90,-99.99,-99.99,0.74
2,1989-09-01,14.29,1.57,-4.48,-99.99,-99.99,0.65
3,1989-10-01,-2.04,-0.09,-6.34,-99.99,-99.99,0.68
4,1989-11-01,0.64,0.76,1.17,-99.99,-99.99,0.69
...,...,...,...,...,...,...,...
370,2020-05-01,1.67,1.28,-3.23,-3.04,-1.39,0.01
371,2020-06-01,7.29,1.17,-1.30,-1.35,-3.96,0.01
372,2020-07-01,8.25,-0.73,-1.70,1.31,-2.44,0.01
373,2020-08-01,2.61,1.91,-0.51,-1.87,-2.73,0.01


In [143]:
moex = dr.get_data_yahoo('IMOEX.ME', start='1989-07-01', end='2020-09-01')
moex.reset_index(inplace = True)
moex = moex[['Date', 'Adj Close']]
moex['moex_return'] = 100*(moex['Adj Close'] - 
                      moex.shift(1)['Adj Close'])/moex.shift(1)['Adj Close']

data = data.merge(moex['moex_return'],left_index=True, right_index= True)
data.dropna(inplace = True)
data.set_index('date', inplace = True)

data = data.replace(" ", "")
for i in data.columns:
    data[i] = data[i].apply(lambda x: float(x))

In [163]:
data.tail(20)

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF,moex_return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-02-01,0.14,0.93,-1.28,0.81,-0.77,0.18,1.797939
2019-03-01,0.8,0.24,-0.05,0.87,-2.39,0.19,0.12678
2019-04-01,1.51,-0.96,-1.9,0.51,-0.83,0.21,1.794514
2019-05-01,-6.34,1.89,3.52,-1.48,2.43,0.21,0.620514
2019-06-01,5.39,-1.76,0.13,1.23,-0.41,0.18,0.731062
2019-07-01,-1.83,-0.81,-1.63,1.02,-1.0,0.19,0.519114
2019-08-01,-4.6,0.07,-2.17,-0.75,-0.2,0.16,0.871463
2019-09-01,1.97,-0.3,1.37,2.13,-0.28,0.18,0.688649
2019-10-01,3.8,-1.34,-0.76,-0.18,-0.57,0.15,0.984456
2019-11-01,-0.4,-1.6,0.87,-0.56,-1.64,0.12,-1.036431


# Models

## Linear regression

In [159]:
X = data[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']]
Y = data['moex_return']
model = sm.OLS(Y,X)
results = model.fit(cov_type = 'HC1')
print(results.summary())

                                 OLS Regression Results                                
Dep. Variable:            moex_return   R-squared (uncentered):                   0.008
Model:                            OLS   Adj. R-squared (uncentered):             -0.005
Method:                 Least Squares   F-statistic:                             0.8974
Date:                Mon, 02 Nov 2020   Prob (F-statistic):                       0.483
Time:                        22:50:51   Log-Likelihood:                         -619.54
No. Observations:                 374   AIC:                                      1249.
Df Residuals:                     369   BIC:                                      1269.
Df Model:                           5                                                  
Covariance Type:                  HC1                                                  
                 coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------