# Import libraries

In [17]:
import pandas as pd
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

from linearmodels import PanelOLS, RandomEffects

from stargazer.stargazer import Stargazer
from statsmodels.formula.api import ols

#!pip install patsy
#!pip install transliterate
from transliterate import translit

from sklearn.svm import LinearSVC
from sklearn.feature_selection import SelectFromModel

import pandas_datareader as dr

import lightgbm as lgbm
from pandas.plotting import scatter_matrix

import random
random.seed(13)

from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform
from scipy.stats import loguniform

import warnings
warnings.filterwarnings('ignore')

# Download data

In [18]:
data = pd.read_csv('final_data_without_prefs_and_with_HAMADA_factors.csv')
data_ret = pd.read_csv('data_ret.csv')
data_ret.drop(columns=['Unnamed: 0'], inplace=True)
#data_ret.set_index('Date', inplace=True)
data.set_index('Date', inplace = True)
macro_data = pd.read_csv('macro_data.csv')
macro_data.set_index('Date', inplace = True)

# Summary for R2 adjusted and MSE quality metrics for classical multifactor models and their extensions

## CAPM

In [19]:
lag_autolag_return_30 = 30
lag_autolag_return_60 = 60
lag_autolag_return_90 = 90
lag_30 = 30
lag_60 = 60
lag_90 = 90

r_2_results_capm = []
mse_results_capm = []

for name in data_ret.columns[2:]:
    
    df1 = data[[name, 'Hamada_beta_MRP_for_{}'.format(name[7:11]), 'risk_free_rate_based_on_g_curve',
                 'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
                 'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]),
                 'MOM']].dropna()
    df_autolag = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30)})
    df_autolag = pd.DataFrame(df_autolag['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30)].shift(lag_autolag_return_30).dropna())
    
    df_autolag_2 = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60)})
    df_autolag_2 = pd.DataFrame(df_autolag_2['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60)].shift(lag_autolag_return_60).dropna())
    
    df_autolag_3 = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90)})
    df_autolag_3 = pd.DataFrame(df_autolag_3['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90)].shift(lag_autolag_return_90).dropna())
    
    df2 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_30).dropna()
    
    
    
    df2.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_30),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_30), 
                          'MOM':'MOM_lag_{}_days'.format(lag_30)}, inplace = True)
    
    result = pd.concat([df1, df2], axis=1, join="inner")
    
    df3 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_60).dropna()
    
    df3.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_60),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_60), 
                          'MOM':'MOM_lag_{}_days'.format(lag_60)}, inplace = True)
    
    result = pd.concat([result, df3], axis=1, join="inner")
    
    df4 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_90).dropna()
    
    df4.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_90),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_90), 
                          'MOM':'MOM_lag_{}_days'.format(lag_90)}, inplace = True)
    
    result = pd.concat([result, df4], axis=1, join="inner")
    
    result = pd.merge(result, macro_data, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag_2, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag_3, left_index=True, right_index=True)
    
    Base = result.dropna()
    
    Y = pd.DataFrame(Base[name] - Base['risk_free_rate_based_on_g_curve'])
    Y.rename(columns = {0:'{} - risk free rate'.format(name)}, inplace = True)
    X = Base[['Hamada_beta_MRP_for_{}'.format(name[7:11])]]
    model = sm.OLS(Y,X)
    results = model.fit(cov_type = 'HC1')
    print('###################################################################################')
    print(name[7:11])
    print(results.summary())
    
    print(results)
    r_2_results_capm.append(results.rsquared_adj)
    mse_results_capm.append(results.mse_resid)
    
    

###################################################################################
SBER
                                           OLS Regression Results                                          
Dep. Variable:     return_SBER_RM_Equity - risk free rate   R-squared (uncentered):                   0.403
Model:                                                OLS   Adj. R-squared (uncentered):              0.403
Method:                                     Least Squares   F-statistic:                              232.5
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    1.00e-49
Time:                                            19:27:31   Log-Likelihood:                          2245.3
No. Observations:                                    1979   AIC:                                     -4489.
Df Residuals:                                        1978   BIC:                                     -4483.
Df Model:                                      

###################################################################################
TATN
                                           OLS Regression Results                                          
Dep. Variable:     return_TATN_RM_Equity - risk free rate   R-squared (uncentered):                   0.658
Model:                                                OLS   Adj. R-squared (uncentered):              0.658
Method:                                     Least Squares   F-statistic:                              2622.
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                        0.00
Time:                                            19:27:31   Log-Likelihood:                          2794.7
No. Observations:                                    1979   AIC:                                     -5587.
Df Residuals:                                        1978   BIC:                                     -5582.
Df Model:                                      

###################################################################################
ALRS
                                           OLS Regression Results                                          
Dep. Variable:     return_ALRS_RM_Equity - risk free rate   R-squared (uncentered):                   0.202
Model:                                                OLS   Adj. R-squared (uncentered):              0.201
Method:                                     Least Squares   F-statistic:                              303.8
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    7.31e-63
Time:                                            19:27:31   Log-Likelihood:                          1674.9
No. Observations:                                    1752   AIC:                                     -3348.
Df Residuals:                                        1751   BIC:                                     -3342.
Df Model:                                      

###################################################################################
PIKK
                                           OLS Regression Results                                          
Dep. Variable:     return_PIKK_RM_Equity - risk free rate   R-squared (uncentered):                   0.059
Model:                                                OLS   Adj. R-squared (uncentered):              0.059
Method:                                     Least Squares   F-statistic:                              122.8
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    1.00e-27
Time:                                            19:27:31   Log-Likelihood:                          2026.4
No. Observations:                                    1979   AIC:                                     -4051.
Df Residuals:                                        1978   BIC:                                     -4045.
Df Model:                                      

###################################################################################
LNTA
                                           OLS Regression Results                                          
Dep. Variable:     return_LNTA_RM_Equity - risk free rate   R-squared (uncentered):                   0.129
Model:                                                OLS   Adj. R-squared (uncentered):              0.128
Method:                                     Least Squares   F-statistic:                              138.0
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    2.04e-30
Time:                                            19:27:32   Log-Likelihood:                          1418.3
No. Observations:                                    1367   AIC:                                     -2835.
Df Residuals:                                        1366   BIC:                                     -2829.
Df Model:                                      

###################################################################################
URKA
                                           OLS Regression Results                                          
Dep. Variable:     return_URKA_RM_Equity - risk free rate   R-squared (uncentered):                   0.040
Model:                                                OLS   Adj. R-squared (uncentered):              0.039
Method:                                     Least Squares   F-statistic:                              63.20
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    3.11e-15
Time:                                            19:27:32   Log-Likelihood:                          1716.0
No. Observations:                                    1979   AIC:                                     -3430.
Df Residuals:                                        1978   BIC:                                     -3424.
Df Model:                                      

###################################################################################
MSTT
                                           OLS Regression Results                                          
Dep. Variable:     return_MSTT_RM_Equity - risk free rate   R-squared (uncentered):                   0.005
Model:                                                OLS   Adj. R-squared (uncentered):              0.004
Method:                                     Least Squares   F-statistic:                              2.306
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                       0.129
Time:                                            19:27:32   Log-Likelihood:                          1485.4
No. Observations:                                    1922   AIC:                                     -2969.
Df Residuals:                                        1921   BIC:                                     -2963.
Df Model:                                      

## 3 factor Fama-French model

In [20]:
lag_autolag_return_30 = 30
lag_autolag_return_60 = 60
lag_autolag_return_90 = 90
lag_30 = 30
lag_60 = 60
lag_90 = 90

r_2_results_ff3 = []
mse_results_ff3 = []

for name in data_ret.columns[2:]:
    
    df1 = data[[name, 'Hamada_beta_MRP_for_{}'.format(name[7:11]), 'risk_free_rate_based_on_g_curve',
                 'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
                 'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]),
                 'MOM']].dropna()
    df_autolag = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30)})
    df_autolag = pd.DataFrame(df_autolag['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30)].shift(lag_autolag_return_30).dropna())
    
    df_autolag_2 = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60)})
    df_autolag_2 = pd.DataFrame(df_autolag_2['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60)].shift(lag_autolag_return_60).dropna())
    
    df_autolag_3 = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90)})
    df_autolag_3 = pd.DataFrame(df_autolag_3['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90)].shift(lag_autolag_return_90).dropna())
    
    df2 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_30).dropna()
    
    
    
    df2.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_30),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_30), 
                          'MOM':'MOM_lag_{}_days'.format(lag_30)}, inplace = True)
    
    result = pd.concat([df1, df2], axis=1, join="inner")
    
    df3 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_60).dropna()
    
    df3.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_60),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_60), 
                          'MOM':'MOM_lag_{}_days'.format(lag_60)}, inplace = True)
    
    result = pd.concat([result, df3], axis=1, join="inner")
    
    df4 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_90).dropna()
    
    df4.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_90),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_90), 
                          'MOM':'MOM_lag_{}_days'.format(lag_90)}, inplace = True)
    
    result = pd.concat([result, df4], axis=1, join="inner")
    
    result = pd.merge(result, macro_data, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag_2, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag_3, left_index=True, right_index=True)
    
    Base = result.dropna()
    
    Y = pd.DataFrame(Base[name] - Base['risk_free_rate_based_on_g_curve'])
    Y.rename(columns = {0:'{} - risk free rate'.format(name)}, inplace = True)
    X = Base[['Hamada_beta_MRP_for_{}'.format(name[7:11]), 
              'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
              'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11])]]
    model = sm.OLS(Y,X)
    results = model.fit(cov_type = 'HC1')
    print('###################################################################################')
    print(name[7:11])
    print(results.summary())
    
    print(results)
    r_2_results_ff3.append(results.rsquared_adj)
    mse_results_ff3.append(results.mse_resid)
    

###################################################################################
SBER
                                           OLS Regression Results                                          
Dep. Variable:     return_SBER_RM_Equity - risk free rate   R-squared (uncentered):                   0.410
Model:                                                OLS   Adj. R-squared (uncentered):              0.409
Method:                                     Least Squares   F-statistic:                              84.56
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    1.80e-51
Time:                                            19:27:32   Log-Likelihood:                          2256.5
No. Observations:                                    1979   AIC:                                     -4507.
Df Residuals:                                        1976   BIC:                                     -4490.
Df Model:                                      

###################################################################################
TATN
                                           OLS Regression Results                                          
Dep. Variable:     return_TATN_RM_Equity - risk free rate   R-squared (uncentered):                   0.668
Model:                                                OLS   Adj. R-squared (uncentered):              0.668
Method:                                     Least Squares   F-statistic:                              904.8
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                        0.00
Time:                                            19:27:33   Log-Likelihood:                          2824.8
No. Observations:                                    1979   AIC:                                     -5644.
Df Residuals:                                        1976   BIC:                                     -5627.
Df Model:                                      

###################################################################################
ALRS
                                           OLS Regression Results                                          
Dep. Variable:     return_ALRS_RM_Equity - risk free rate   R-squared (uncentered):                   0.203
Model:                                                OLS   Adj. R-squared (uncentered):              0.201
Method:                                     Least Squares   F-statistic:                              103.5
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    1.13e-61
Time:                                            19:27:33   Log-Likelihood:                          1676.0
No. Observations:                                    1752   AIC:                                     -3346.
Df Residuals:                                        1749   BIC:                                     -3330.
Df Model:                                      

                                           OLS Regression Results                                          
Dep. Variable:     return_MAGN_RM_Equity - risk free rate   R-squared (uncentered):                   0.394
Model:                                                OLS   Adj. R-squared (uncentered):              0.394
Method:                                     Least Squares   F-statistic:                              256.2
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   1.89e-140
Time:                                            19:27:33   Log-Likelihood:                          1743.6
No. Observations:                                    1979   AIC:                                     -3481.
Df Residuals:                                        1976   BIC:                                     -3464.
Df Model:                                               3                                                  
Covariance Type:            

###################################################################################
UPRO
                                           OLS Regression Results                                          
Dep. Variable:     return_UPRO_RM_Equity - risk free rate   R-squared (uncentered):                   0.202
Model:                                                OLS   Adj. R-squared (uncentered):              0.200
Method:                                     Least Squares   F-statistic:                              104.0
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    1.35e-60
Time:                                            19:27:33   Log-Likelihood:                          1789.1
No. Observations:                                    1353   AIC:                                     -3572.
Df Residuals:                                        1350   BIC:                                     -3557.
Df Model:                                      

###################################################################################
MSNG
                                           OLS Regression Results                                          
Dep. Variable:     return_MSNG_RM_Equity - risk free rate   R-squared (uncentered):                   0.218
Model:                                                OLS   Adj. R-squared (uncentered):              0.217
Method:                                     Least Squares   F-statistic:                              78.83
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    3.63e-48
Time:                                            19:27:33   Log-Likelihood:                          1658.8
No. Observations:                                    1979   AIC:                                     -3312.
Df Residuals:                                        1976   BIC:                                     -3295.
Df Model:                                      

###################################################################################
MSTT
                                           OLS Regression Results                                          
Dep. Variable:     return_MSTT_RM_Equity - risk free rate   R-squared (uncentered):                   0.038
Model:                                                OLS   Adj. R-squared (uncentered):              0.037
Method:                                     Least Squares   F-statistic:                              32.89
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    1.00e-20
Time:                                            19:27:34   Log-Likelihood:                          1518.6
No. Observations:                                    1922   AIC:                                     -3031.
Df Residuals:                                        1919   BIC:                                     -3015.
Df Model:                                      

                                           OLS Regression Results                                          
Dep. Variable:     return_VZRZ_RM_Equity - risk free rate   R-squared (uncentered):                   0.036
Model:                                                OLS   Adj. R-squared (uncentered):              0.034
Method:                                     Least Squares   F-statistic:                              21.39
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    1.23e-13
Time:                                            19:27:34   Log-Likelihood:                          1660.8
No. Observations:                                    1979   AIC:                                     -3316.
Df Residuals:                                        1976   BIC:                                     -3299.
Df Model:                                               3                                                  
Covariance Type:            

## 4 factor Carhart model

In [21]:
lag_autolag_return_30 = 30
lag_autolag_return_60 = 60
lag_autolag_return_90 = 90
lag_30 = 30
lag_60 = 60
lag_90 = 90

r_2_results_carh = []
mse_results_carh = []

for name in data_ret.columns[2:]:
    
    df1 = data[[name, 'Hamada_beta_MRP_for_{}'.format(name[7:11]), 'risk_free_rate_based_on_g_curve',
                 'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
                 'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]),
                 'MOM']].dropna()
    df_autolag = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30)})
    df_autolag = pd.DataFrame(df_autolag['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30)].shift(lag_autolag_return_30).dropna())
    
    df_autolag_2 = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60)})
    df_autolag_2 = pd.DataFrame(df_autolag_2['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60)].shift(lag_autolag_return_60).dropna())
    
    df_autolag_3 = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90)})
    df_autolag_3 = pd.DataFrame(df_autolag_3['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90)].shift(lag_autolag_return_90).dropna())
    
    df2 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_30).dropna()
    
    
    
    df2.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_30),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_30), 
                          'MOM':'MOM_lag_{}_days'.format(lag_30)}, inplace = True)
    
    result = pd.concat([df1, df2], axis=1, join="inner")
    
    df3 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_60).dropna()
    
    df3.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_60),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_60), 
                          'MOM':'MOM_lag_{}_days'.format(lag_60)}, inplace = True)
    
    result = pd.concat([result, df3], axis=1, join="inner")
    
    df4 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_90).dropna()
    
    df4.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_90),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_90), 
                          'MOM':'MOM_lag_{}_days'.format(lag_90)}, inplace = True)
    
    result = pd.concat([result, df4], axis=1, join="inner")
    
    result = pd.merge(result, macro_data, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag_2, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag_3, left_index=True, right_index=True)
    
    Base = result.dropna()
    
    Y = pd.DataFrame(Base[name] - Base['risk_free_rate_based_on_g_curve'])
    Y.rename(columns = {0:'{} - risk free rate'.format(name)}, inplace = True)
    X = Base[['Hamada_beta_MRP_for_{}'.format(name[7:11]), 
              'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
              'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]),'MOM']]
    model = sm.OLS(Y,X)
    results = model.fit(cov_type = 'HC1')
    print('###################################################################################')
    print(name[7:11])
    print(results.summary())
    
    print(results)
    r_2_results_carh.append(results.rsquared_adj)
    mse_results_carh.append(results.mse_resid)
    

###################################################################################
SBER
                                           OLS Regression Results                                          
Dep. Variable:     return_SBER_RM_Equity - risk free rate   R-squared (uncentered):                   0.410
Model:                                                OLS   Adj. R-squared (uncentered):              0.409
Method:                                     Least Squares   F-statistic:                              65.35
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    5.71e-52
Time:                                            19:27:34   Log-Likelihood:                          2256.8
No. Observations:                                    1979   AIC:                                     -4506.
Df Residuals:                                        1975   BIC:                                     -4483.
Df Model:                                      

###################################################################################
SNGS
                                           OLS Regression Results                                          
Dep. Variable:     return_SNGS_RM_Equity - risk free rate   R-squared (uncentered):                   0.547
Model:                                                OLS   Adj. R-squared (uncentered):              0.546
Method:                                     Least Squares   F-statistic:                              329.9
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   1.53e-200
Time:                                            19:27:34   Log-Likelihood:                          1627.8
No. Observations:                                    1417   AIC:                                     -3248.
Df Residuals:                                        1413   BIC:                                     -3227.
Df Model:                                      

###################################################################################
MOEX
                                           OLS Regression Results                                          
Dep. Variable:     return_MOEX_RM_Equity - risk free rate   R-squared (uncentered):                   0.105
Model:                                                OLS   Adj. R-squared (uncentered):              0.103
Method:                                     Least Squares   F-statistic:                              36.32
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    5.17e-29
Time:                                            19:27:34   Log-Likelihood:                          1665.9
No. Observations:                                    1507   AIC:                                     -3324.
Df Residuals:                                        1503   BIC:                                     -3303.
Df Model:                                      

###################################################################################
PHOR
                                           OLS Regression Results                                          
Dep. Variable:     return_PHOR_RM_Equity - risk free rate   R-squared (uncentered):                   0.150
Model:                                                OLS   Adj. R-squared (uncentered):              0.148
Method:                                     Least Squares   F-statistic:                              46.51
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    3.73e-37
Time:                                            19:27:34   Log-Likelihood:                          1988.4
No. Observations:                                    1720   AIC:                                     -3969.
Df Residuals:                                        1716   BIC:                                     -3947.
Df Model:                                      

###################################################################################
AFLT
                                           OLS Regression Results                                          
Dep. Variable:     return_AFLT_RM_Equity - risk free rate   R-squared (uncentered):                   0.278
Model:                                                OLS   Adj. R-squared (uncentered):              0.276
Method:                                     Least Squares   F-statistic:                              117.4
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    6.57e-90
Time:                                            19:27:35   Log-Likelihood:                          1525.5
No. Observations:                                    1979   AIC:                                     -3043.
Df Residuals:                                        1975   BIC:                                     -3021.
Df Model:                                      

                                           OLS Regression Results                                          
Dep. Variable:     return_TRMK_RM_Equity - risk free rate   R-squared (uncentered):                   0.221
Model:                                                OLS   Adj. R-squared (uncentered):              0.219
Method:                                     Least Squares   F-statistic:                              118.5
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    5.81e-90
Time:                                            19:27:35   Log-Likelihood:                          1344.5
No. Observations:                                    1831   AIC:                                     -2681.
Df Residuals:                                        1827   BIC:                                     -2659.
Df Model:                                               4                                                  
Covariance Type:            

                                           OLS Regression Results                                          
Dep. Variable:     return_URKA_RM_Equity - risk free rate   R-squared (uncentered):                   0.071
Model:                                                OLS   Adj. R-squared (uncentered):              0.069
Method:                                     Least Squares   F-statistic:                              30.28
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    1.74e-24
Time:                                            19:27:35   Log-Likelihood:                          1748.8
No. Observations:                                    1979   AIC:                                     -3490.
Df Residuals:                                        1975   BIC:                                     -3467.
Df Model:                                               4                                                  
Covariance Type:            

###################################################################################
BSPB
                                           OLS Regression Results                                          
Dep. Variable:     return_BSPB_RM_Equity - risk free rate   R-squared (uncentered):                   0.218
Model:                                                OLS   Adj. R-squared (uncentered):              0.216
Method:                                     Least Squares   F-statistic:                              68.90
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    1.17e-54
Time:                                            19:27:35   Log-Likelihood:                          1709.8
No. Observations:                                    1979   AIC:                                     -3412.
Df Residuals:                                        1975   BIC:                                     -3389.
Df Model:                                      

## Carhart model with lags

In [22]:
lag_autolag_return_30 = 30
lag_autolag_return_60 = 60
lag_autolag_return_90 = 90
lag_30 = 30
lag_60 = 60
lag_90 = 90

r_2_results_carh_l = []
mse_results_carh_l = []

for name in data_ret.columns[2:]:
    
    df1 = data[[name, 'Hamada_beta_MRP_for_{}'.format(name[7:11]), 'risk_free_rate_based_on_g_curve',
                 'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
                 'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]),
                 'MOM']].dropna()
    df_autolag = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30)})
    df_autolag = pd.DataFrame(df_autolag['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30)].shift(lag_autolag_return_30).dropna())
    
    df_autolag_2 = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60)})
    df_autolag_2 = pd.DataFrame(df_autolag_2['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60)].shift(lag_autolag_return_60).dropna())
    
    df_autolag_3 = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90)})
    df_autolag_3 = pd.DataFrame(df_autolag_3['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90)].shift(lag_autolag_return_90).dropna())
    
    df2 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_30).dropna()
    
    
    
    df2.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_30),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_30), 
                          'MOM':'MOM_lag_{}_days'.format(lag_30)}, inplace = True)
    
    result = pd.concat([df1, df2], axis=1, join="inner")
    
    df3 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_60).dropna()
    
    df3.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_60),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_60), 
                          'MOM':'MOM_lag_{}_days'.format(lag_60)}, inplace = True)
    
    result = pd.concat([result, df3], axis=1, join="inner")
    
    df4 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_90).dropna()
    
    df4.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_90),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_90), 
                          'MOM':'MOM_lag_{}_days'.format(lag_90)}, inplace = True)
    
    result = pd.concat([result, df4], axis=1, join="inner")
    
    result = pd.merge(result, macro_data, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag_2, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag_3, left_index=True, right_index=True)
    
    Base = result.dropna()
    
    Y = pd.DataFrame(Base[name] - Base['risk_free_rate_based_on_g_curve'])
    Y.rename(columns = {0:'{} - risk free rate'.format(name)}, inplace = True)
    X = Base[['Hamada_beta_MRP_for_{}'.format(name[7:11]), 
              'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
              'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 
              'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_30),
              'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_30),
              'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_60),
              'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_60),
              'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_90),
              'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_90),
              'MOM','MOM_lag_{}_days'.format(lag_30),'MOM_lag_{}_days'.format(lag_60),
              'MOM_lag_{}_days'.format(lag_90)]]
    model = sm.OLS(Y,X)
    results = model.fit(cov_type = 'HC1')
    print('###################################################################################')
    print(name[7:11])
    print(results.summary())
   
    print(results)
    r_2_results_carh_l.append(results.rsquared_adj)
    mse_results_carh_l.append(results.mse_resid)
    

###################################################################################
SBER
                                           OLS Regression Results                                          
Dep. Variable:     return_SBER_RM_Equity - risk free rate   R-squared (uncentered):                   0.545
Model:                                                OLS   Adj. R-squared (uncentered):              0.542
Method:                                     Least Squares   F-statistic:                              106.8
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   7.35e-217
Time:                                            19:27:36   Log-Likelihood:                          2514.2
No. Observations:                                    1979   AIC:                                     -5002.
Df Residuals:                                        1966   BIC:                                     -4930.
Df Model:                                      

###################################################################################
PLZL
                                           OLS Regression Results                                          
Dep. Variable:     return_PLZL_RM_Equity - risk free rate   R-squared (uncentered):                   0.305
Model:                                                OLS   Adj. R-squared (uncentered):              0.293
Method:                                     Least Squares   F-statistic:                              53.25
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                    7.12e-97
Time:                                            19:27:36   Log-Likelihood:                          745.23
No. Observations:                                     753   AIC:                                     -1464.
Df Residuals:                                         740   BIC:                                     -1404.
Df Model:                                      

###################################################################################
IRAO
                                           OLS Regression Results                                          
Dep. Variable:     return_IRAO_RM_Equity - risk free rate   R-squared (uncentered):                   0.546
Model:                                                OLS   Adj. R-squared (uncentered):              0.541
Method:                                     Least Squares   F-statistic:                              106.4
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   4.50e-184
Time:                                            19:27:36   Log-Likelihood:                          1363.9
No. Observations:                                    1114   AIC:                                     -2702.
Df Residuals:                                        1101   BIC:                                     -2637.
Df Model:                                      

###################################################################################
PHOR
                                           OLS Regression Results                                          
Dep. Variable:     return_PHOR_RM_Equity - risk free rate   R-squared (uncentered):                   0.361
Model:                                                OLS   Adj. R-squared (uncentered):              0.356
Method:                                     Least Squares   F-statistic:                              57.71
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   6.44e-125
Time:                                            19:27:36   Log-Likelihood:                          2233.8
No. Observations:                                    1720   AIC:                                     -4442.
Df Residuals:                                        1707   BIC:                                     -4371.
Df Model:                                      

###################################################################################
AFLT
                                           OLS Regression Results                                          
Dep. Variable:     return_AFLT_RM_Equity - risk free rate   R-squared (uncentered):                   0.405
Model:                                                OLS   Adj. R-squared (uncentered):              0.401
Method:                                     Least Squares   F-statistic:                              104.6
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   2.50e-213
Time:                                            19:27:36   Log-Likelihood:                          1717.5
No. Observations:                                    1979   AIC:                                     -3409.
Df Residuals:                                        1966   BIC:                                     -3336.
Df Model:                                      

                                           OLS Regression Results                                          
Dep. Variable:     return_TRMK_RM_Equity - risk free rate   R-squared (uncentered):                   0.347
Model:                                                OLS   Adj. R-squared (uncentered):              0.343
Method:                                     Least Squares   F-statistic:                              73.76
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   1.15e-156
Time:                                            19:27:37   Log-Likelihood:                          1507.0
No. Observations:                                    1831   AIC:                                     -2988.
Df Residuals:                                        1818   BIC:                                     -2916.
Df Model:                                              13                                                  
Covariance Type:            

###################################################################################
BANE
                                           OLS Regression Results                                          
Dep. Variable:     return_BANE_RM_Equity - risk free rate   R-squared (uncentered):                   0.362
Model:                                                OLS   Adj. R-squared (uncentered):              0.358
Method:                                     Least Squares   F-statistic:                              103.4
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   8.39e-207
Time:                                            19:27:37   Log-Likelihood:                          1669.9
No. Observations:                                    1805   AIC:                                     -3314.
Df Residuals:                                        1792   BIC:                                     -3242.
Df Model:                                      

###################################################################################
BSPB
                                           OLS Regression Results                                          
Dep. Variable:     return_BSPB_RM_Equity - risk free rate   R-squared (uncentered):                   0.468
Model:                                                OLS   Adj. R-squared (uncentered):              0.465
Method:                                     Least Squares   F-statistic:                              103.5
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   1.75e-211
Time:                                            19:27:37   Log-Likelihood:                          2091.6
No. Observations:                                    1979   AIC:                                     -4157.
Df Residuals:                                        1966   BIC:                                     -4085.
Df Model:                                      

## Carhart model with lags and autolags

In [23]:
lag_autolag_return_30 = 30
lag_autolag_return_60 = 60
lag_autolag_return_90 = 90
lag_30 = 30
lag_60 = 60
lag_90 = 90

r_2_results_carh_l_al = []
mse_results_carh_l_al = []

for name in data_ret.columns[2:]:
    
    df1 = data[[name, 'Hamada_beta_MRP_for_{}'.format(name[7:11]), 'risk_free_rate_based_on_g_curve',
                 'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
                 'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]),
                 'MOM']].dropna()
    df_autolag = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30)})
    df_autolag = pd.DataFrame(df_autolag['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30)].shift(lag_autolag_return_30).dropna())
    
    df_autolag_2 = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60)})
    df_autolag_2 = pd.DataFrame(df_autolag_2['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60)].shift(lag_autolag_return_60).dropna())
    
    df_autolag_3 = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90)})
    df_autolag_3 = pd.DataFrame(df_autolag_3['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90)].shift(lag_autolag_return_90).dropna())
    
    df2 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_30).dropna()
    
    
    
    df2.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_30),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_30), 
                          'MOM':'MOM_lag_{}_days'.format(lag_30)}, inplace = True)
    
    result = pd.concat([df1, df2], axis=1, join="inner")
    
    df3 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_60).dropna()
    
    df3.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_60),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_60), 
                          'MOM':'MOM_lag_{}_days'.format(lag_60)}, inplace = True)
    
    result = pd.concat([result, df3], axis=1, join="inner")
    
    df4 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_90).dropna()
    
    df4.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_90),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_90), 
                          'MOM':'MOM_lag_{}_days'.format(lag_90)}, inplace = True)
    
    result = pd.concat([result, df4], axis=1, join="inner")
    
    result = pd.merge(result, macro_data, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag_2, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag_3, left_index=True, right_index=True)
    
    Base = result.dropna()
    
    Y = pd.DataFrame(Base[name] - Base['risk_free_rate_based_on_g_curve'])
    Y.rename(columns = {0:'{} - risk free rate'.format(name)}, inplace = True)
    X = Base[['Hamada_beta_MRP_for_{}'.format(name[7:11]), 
              '{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30),
              '{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60),
              '{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90),
              'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
              'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 
              'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_30),
              'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_30),
              'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_60),
              'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_60),
              'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_90),
              'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_90),
              'MOM','MOM_lag_{}_days'.format(lag_30),'MOM_lag_{}_days'.format(lag_60),
              'MOM_lag_{}_days'.format(lag_90)]]
    model = sm.OLS(Y,X)
    results = model.fit(cov_type = 'HC1')
    print('###################################################################################')
    print(name[7:11])
    print(results.summary())
    hypotheses = '({} = 0), ({} = 0), (MOM = 0)'.format('Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]), 'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]))
    f_test = results.f_test(hypotheses)
    print(f_test)
    print(results)
    r_2_results_carh_l_al.append(results.rsquared_adj)
    mse_results_carh_l_al.append(results.mse_resid)
    

###################################################################################
SBER
                                           OLS Regression Results                                          
Dep. Variable:     return_SBER_RM_Equity - risk free rate   R-squared (uncentered):                   0.555
Model:                                                OLS   Adj. R-squared (uncentered):              0.551
Method:                                     Least Squares   F-statistic:                              93.66
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   8.39e-228
Time:                                            19:27:37   Log-Likelihood:                          2535.4
No. Observations:                                    1979   AIC:                                     -5039.
Df Residuals:                                        1963   BIC:                                     -4949.
Df Model:                                      

                                           OLS Regression Results                                          
Dep. Variable:     return_SNGS_RM_Equity - risk free rate   R-squared (uncentered):                   0.612
Model:                                                OLS   Adj. R-squared (uncentered):              0.608
Method:                                     Least Squares   F-statistic:                              138.0
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   7.96e-274
Time:                                            19:27:37   Log-Likelihood:                          1737.8
No. Observations:                                    1417   AIC:                                     -3444.
Df Residuals:                                        1401   BIC:                                     -3360.
Df Model:                                              16                                                  
Covariance Type:            

                                           OLS Regression Results                                          
Dep. Variable:     return_MTSS_RM_Equity - risk free rate   R-squared (uncentered):                   0.491
Model:                                                OLS   Adj. R-squared (uncentered):              0.486
Method:                                     Least Squares   F-statistic:                              78.62
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   2.06e-197
Time:                                            19:27:38   Log-Likelihood:                          2650.6
No. Observations:                                    1979   AIC:                                     -5269.
Df Residuals:                                        1963   BIC:                                     -5180.
Df Model:                                              16                                                  
Covariance Type:            

                                           OLS Regression Results                                          
Dep. Variable:     return_CHMF_RM_Equity - risk free rate   R-squared (uncentered):                   0.406
Model:                                                OLS   Adj. R-squared (uncentered):              0.402
Method:                                     Least Squares   F-statistic:                              90.66
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   6.41e-222
Time:                                            19:27:38   Log-Likelihood:                          2137.9
No. Observations:                                    1979   AIC:                                     -4244.
Df Residuals:                                        1963   BIC:                                     -4154.
Df Model:                                              16                                                  
Covariance Type:            

                                           OLS Regression Results                                          
Dep. Variable:     return_MAGN_RM_Equity - risk free rate   R-squared (uncentered):                   0.532
Model:                                                OLS   Adj. R-squared (uncentered):              0.528
Method:                                     Least Squares   F-statistic:                              120.2
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   1.05e-276
Time:                                            19:27:38   Log-Likelihood:                          1997.6
No. Observations:                                    1979   AIC:                                     -3963.
Df Residuals:                                        1963   BIC:                                     -3874.
Df Model:                                              16                                                  
Covariance Type:            

                                           OLS Regression Results                                          
Dep. Variable:     return_LSRG_RM_Equity - risk free rate   R-squared (uncentered):                   0.603
Model:                                                OLS   Adj. R-squared (uncentered):              0.599
Method:                                     Least Squares   F-statistic:                              163.0
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                        0.00
Time:                                            19:27:38   Log-Likelihood:                          2374.8
No. Observations:                                    1979   AIC:                                     -4718.
Df Residuals:                                        1963   BIC:                                     -4628.
Df Model:                                              16                                                  
Covariance Type:            

###################################################################################
MTLR
                                           OLS Regression Results                                          
Dep. Variable:     return_MTLR_RM_Equity - risk free rate   R-squared (uncentered):                   0.431
Model:                                                OLS   Adj. R-squared (uncentered):              0.426
Method:                                     Least Squares   F-statistic:                              63.42
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   2.32e-164
Time:                                            19:27:39   Log-Likelihood:                          708.36
No. Observations:                                    1979   AIC:                                     -1385.
Df Residuals:                                        1963   BIC:                                     -1295.
Df Model:                                      

[1] Standard Errors are heteroscedasticity robust (HC1)
<F test: F=array([[9.75906748]]), p=2.187673277834923e-06, df_denom=1.79e+03, df_num=3>
<statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7fe719f21d30>
###################################################################################
VSMO
                                           OLS Regression Results                                          
Dep. Variable:     return_VSMO_RM_Equity - risk free rate   R-squared (uncentered):                   0.581
Model:                                                OLS   Adj. R-squared (uncentered):              0.576
Method:                                     Least Squares   F-statistic:                              76.42
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   4.10e-173
Time:                                            19:27:39   Log-Likelihood:                          1842.5
No. Observations:              

###################################################################################
BSPB
                                           OLS Regression Results                                          
Dep. Variable:     return_BSPB_RM_Equity - risk free rate   R-squared (uncentered):                   0.504
Model:                                                OLS   Adj. R-squared (uncentered):              0.500
Method:                                     Least Squares   F-statistic:                              104.2
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   7.43e-248
Time:                                            19:27:39   Log-Likelihood:                          2160.1
No. Observations:                                    1979   AIC:                                     -4288.
Df Residuals:                                        1963   BIC:                                     -4199.
Df Model:                                      

## Carhart model with lags autolags and specific for Russia factors

In [24]:
lag_autolag_return_30 = 30
lag_autolag_return_60 = 60
lag_autolag_return_90 = 90
lag_30 = 30
lag_60 = 60
lag_90 = 90

r_2_results_carh_l_al_spec = []
mse_results_carh_l_al_spec = []

for name in data_ret.columns[2:]:
    
    df1 = data[[name, 'Hamada_beta_MRP_for_{}'.format(name[7:11]), 'risk_free_rate_based_on_g_curve',
                 'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
                 'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]),
                 'MOM']].dropna()
    df_autolag = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30)})
    df_autolag = pd.DataFrame(df_autolag['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30)].shift(lag_autolag_return_30).dropna())
    
    df_autolag_2 = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60)})
    df_autolag_2 = pd.DataFrame(df_autolag_2['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60)].shift(lag_autolag_return_60).dropna())
    
    df_autolag_3 = pd.DataFrame(data[name] - data['risk_free_rate_based_on_g_curve']).rename(columns = {0:'{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90)})
    df_autolag_3 = pd.DataFrame(df_autolag_3['{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90)].shift(lag_autolag_return_90).dropna())
    
    df2 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_30).dropna()
    
    
    
    df2.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_30),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_30), 
                          'MOM':'MOM_lag_{}_days'.format(lag_30)}, inplace = True)
    
    result = pd.concat([df1, df2], axis=1, join="inner")
    
    df3 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_60).dropna()
    
    df3.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_60),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_60), 
                          'MOM':'MOM_lag_{}_days'.format(lag_60)}, inplace = True)
    
    result = pd.concat([result, df3], axis=1, join="inner")
    
    df4 = data[['Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
            'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 'MOM']].shift(lag_90).dropna()
    
    df4.rename(columns = {'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_90),
                          'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]):'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_90), 
                          'MOM':'MOM_lag_{}_days'.format(lag_90)}, inplace = True)
    
    result = pd.concat([result, df4], axis=1, join="inner")
    
    result = pd.merge(result, macro_data, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag_2, left_index=True, right_index=True)
    result = pd.merge(result, df_autolag_3, left_index=True, right_index=True)
    
    Base = result.dropna()
    
    Y = pd.DataFrame(Base[name] - Base['risk_free_rate_based_on_g_curve'])
    Y.rename(columns = {0:'{} - risk free rate'.format(name)}, inplace = True)
    X = Base[['Hamada_beta_MRP_for_{}'.format(name[7:11]), 
              '{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_30),
              '{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_60),
              '{}_premium_lag_{}_days'.format(name[7:], lag_autolag_return_90),
              'Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]),
              'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]), 
              'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_30),
              'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_30),
              'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_60),
              'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_60),
              'Hamada_d_mcap_ratio_SMB_for_{}_lag_{}_days'.format(name[7:11], lag_90),
              'Hamada_d_mcap_ratio_HML_for_{}_lag_{}_days'.format(name[7:11], lag_90),
              'MOM','MOM_lag_{}_days'.format(lag_30),'MOM_lag_{}_days'.format(lag_60),
              'MOM_lag_{}_days'.format(lag_90),'dollar_30d_return','wti_30d_return']]
    model = sm.OLS(Y,X)
    results = model.fit(cov_type = 'HC1')
    print('###################################################################################')
    print(name[7:11])
    print(results.summary())
    hypotheses = '({} = 0), ({} = 0), (MOM = 0)'.format('Hamada_d_mcap_ratio_SMB_for_{}'.format(name[7:11]), 'Hamada_d_mcap_ratio_HML_for_{}'.format(name[7:11]))
    f_test = results.f_test(hypotheses)
    print(f_test)
    print(results)
    r_2_results_carh_l_al_spec.append(results.rsquared_adj)
    mse_results_carh_l_al_spec.append(results.mse_resid)
    

###################################################################################
SBER
                                           OLS Regression Results                                          
Dep. Variable:     return_SBER_RM_Equity - risk free rate   R-squared (uncentered):                   0.680
Model:                                                OLS   Adj. R-squared (uncentered):              0.677
Method:                                     Least Squares   F-statistic:                              190.8
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                        0.00
Time:                                            19:27:39   Log-Likelihood:                          2860.9
No. Observations:                                    1979   AIC:                                     -5686.
Df Residuals:                                        1961   BIC:                                     -5585.
Df Model:                                      

###################################################################################
SNGS
                                           OLS Regression Results                                          
Dep. Variable:     return_SNGS_RM_Equity - risk free rate   R-squared (uncentered):                   0.616
Model:                                                OLS   Adj. R-squared (uncentered):              0.611
Method:                                     Least Squares   F-statistic:                              121.5
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   2.75e-270
Time:                                            19:27:39   Log-Likelihood:                          1744.0
No. Observations:                                    1417   AIC:                                     -3452.
Df Residuals:                                        1399   BIC:                                     -3357.
Df Model:                                      

###################################################################################
MTSS
                                           OLS Regression Results                                          
Dep. Variable:     return_MTSS_RM_Equity - risk free rate   R-squared (uncentered):                   0.515
Model:                                                OLS   Adj. R-squared (uncentered):              0.511
Method:                                     Least Squares   F-statistic:                              76.02
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   7.07e-210
Time:                                            19:27:40   Log-Likelihood:                          2699.4
No. Observations:                                    1979   AIC:                                     -5363.
Df Residuals:                                        1961   BIC:                                     -5262.
Df Model:                                      

###################################################################################
CHMF
                                           OLS Regression Results                                          
Dep. Variable:     return_CHMF_RM_Equity - risk free rate   R-squared (uncentered):                   0.474
Model:                                                OLS   Adj. R-squared (uncentered):              0.469
Method:                                     Least Squares   F-statistic:                              109.7
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   2.36e-280
Time:                                            19:27:40   Log-Likelihood:                          2257.4
No. Observations:                                    1979   AIC:                                     -4479.
Df Residuals:                                        1961   BIC:                                     -4378.
Df Model:                                      

                                           OLS Regression Results                                          
Dep. Variable:     return_MAGN_RM_Equity - risk free rate   R-squared (uncentered):                   0.537
Model:                                                OLS   Adj. R-squared (uncentered):              0.533
Method:                                     Least Squares   F-statistic:                              111.6
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   4.71e-284
Time:                                            19:27:40   Log-Likelihood:                          2009.0
No. Observations:                                    1979   AIC:                                     -3982.
Df Residuals:                                        1961   BIC:                                     -3881.
Df Model:                                              18                                                  
Covariance Type:            

###################################################################################
RSTI
                                           OLS Regression Results                                          
Dep. Variable:     return_RSTI_RM_Equity - risk free rate   R-squared (uncentered):                   0.751
Model:                                                OLS   Adj. R-squared (uncentered):              0.744
Method:                                     Least Squares   F-statistic:                              107.4
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   7.93e-177
Time:                                            19:27:40   Log-Likelihood:                          793.63
No. Observations:                                     638   AIC:                                     -1551.
Df Residuals:                                         620   BIC:                                     -1471.
Df Model:                                      

###################################################################################
NMTP
                                           OLS Regression Results                                          
Dep. Variable:     return_NMTP_RM_Equity - risk free rate   R-squared (uncentered):                   0.335
Model:                                                OLS   Adj. R-squared (uncentered):              0.329
Method:                                     Least Squares   F-statistic:                              45.00
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   7.06e-133
Time:                                            19:27:41   Log-Likelihood:                          1903.3
No. Observations:                                    1979   AIC:                                     -3771.
Df Residuals:                                        1961   BIC:                                     -3670.
Df Model:                                      

                                           OLS Regression Results                                          
Dep. Variable:     return_VSMO_RM_Equity - risk free rate   R-squared (uncentered):                   0.583
Model:                                                OLS   Adj. R-squared (uncentered):              0.577
Method:                                     Least Squares   F-statistic:                              70.20
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   7.18e-176
Time:                                            19:27:41   Log-Likelihood:                          1845.8
No. Observations:                                    1291   AIC:                                     -3656.
Df Residuals:                                        1273   BIC:                                     -3563.
Df Model:                                              18                                                  
Covariance Type:            

###################################################################################
MSRS
                                           OLS Regression Results                                          
Dep. Variable:     return_MSRS_RM_Equity - risk free rate   R-squared (uncentered):                   0.602
Model:                                                OLS   Adj. R-squared (uncentered):              0.591
Method:                                     Least Squares   F-statistic:                              59.45
Date:                                    Thu, 29 Apr 2021   Prob (F-statistic):                   6.02e-122
Time:                                            19:27:41   Log-Likelihood:                          941.66
No. Observations:                                     638   AIC:                                     -1847.
Df Residuals:                                         620   BIC:                                     -1767.
Df Model:                                      

# Make a plot of R2 adj and MSE average for all companies

In [25]:
R2_total = pd.DataFrame([r_2_results_capm,
                        r_2_results_ff3,
                        r_2_results_carh,
                        r_2_results_carh_l, 
                        r_2_results_carh_l_al, 
                        r_2_results_carh_l_al_spec])


names = {'SBER':'Сбербанк', 'GAZP':'Газпром', 'LKOH':'Лукойл','GMKN':'Норникель' ,'NVTK':'Новатэк', 'SNGS':'Сургутнефтегаз',
         'PLZL':'Полюс','TATN':'Татнефть','ROSN':'Роснефть','MGNT':'Магнит','MTSS':'МТС', 'MOEX':'МосБиржа', 
         'IRAO':'Интер РАО', 'NLMK':'НЛМК', 'ALRS':'Алроса','CHMF':'Северсталь','VTBR':'ВТБ','RTKM':'Ростелеком',
         'PHOR':'Фосагро','AFKS':'АФК «Система»','MAGN':'ММК','PIKK':'ПИК',
         'HYDR':'Русгидро','FEES':'ФСК ЕЭС','AFLT':'Аэрофлот','LSRG':'ЛСР','RSTI':'Россети','UPRO':'Юнипро',
         'LNTA':'Лента','MVID':'М.Видео','TRMK':'ТМК','MTLR':'Мечел','NMTP':'НМТП','AKRN':'Акрон',
         'MSNG':'Мосэнерго','URKA':'Уралкалий','BANE':'Башнефть','VSMO':'ВСМПО-Ависма','NKNC':'Нижнекамскнефтехим',
         'GCHE':'Черкизово','SVAV':'Соллерс','MSTT':'Мостотрест','BSPB':'Банк Санкт-Петербург', 'MSRS':'МОЭСК',
         'KMAZ':'Камаз','RASP':'Распадская','OGKB':'ОГК №2','VZRZ':'Банк "Возрождение"'}
R2_total.columns = names.values()
R2_total.index = ['CAPM','Фама-Френч','Кархарт','Кархарт + лаги','Кархарт + лаги и автолаги', 'Кархарт + лаги и автолаги + специфические']
R2_total['Средний исправленный R2'] = R2_total.mean(axis = 1)

MSE_total = pd.DataFrame([mse_results_capm,
                          mse_results_ff3,
                          mse_results_carh,
                          mse_results_carh_l, 
                          mse_results_carh_l_al, 
                          mse_results_carh_l_al_spec])

MSE_total.columns = names.values()
MSE_total.index = ['CAPM','Фама-Френч','Кархарт','Кархарт + лаги','Кархарт + лаги и автолаги', 'Кархарт + лаги и автолаги + специфические']
MSE_total['Средний MSE'] = MSE_total.mean(axis = 1)

# Plot summary

In [26]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=MSE_total.index, y=MSE_total['Средний MSE'],mode='lines+markers',name='Средний MSE'),
    secondary_y=True,
)

fig.add_trace(
    go.Scatter(x=R2_total.index, y=R2_total['Средний исправленный R2'],mode='lines+markers',name='Средний исправленный R2'),
    secondary_y=False,
)
    

    

# Set y-axes titles
fig.update_yaxes(title_text="Средний по всем компаниям MSE", secondary_y=True)
fig.update_yaxes(title_text="Средний по всем компаниям исправленный R2", secondary_y=False)
fig.update_layout(xaxis_title = 'Модель',autosize=True,
                 legend=dict(yanchor="middle", y=0.5, xanchor="left", x=0.6))

fig.show()