In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" # 한 실행칸에 프린트 여러개 해도 다 출력시키도록 하는 코드.
from sklearn.linear_model import LinearRegression

In [48]:
ffdata_month = pd.read_excel('./ff5FactorData.xlsx', sheet_name = 'monthlyData')
cols = ffdata_month.columns.to_list()
cols[0] = 'date'
ffdata_month.columns = cols
#ffdata_month['date'] = pd.to_datetime(ffdata_month['date'], format = '%Y%m')
ffdata_month.drop('RF', inplace = True, axis = 1)
ffdata_month.head()
ffdata_month.tail()

Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA
0,196307,-0.39,-0.47,-0.83,0.66,-1.15
1,196308,5.07,-0.79,1.67,0.4,-0.4
2,196309,-1.57,-0.48,0.18,-0.76,0.24
3,196310,2.53,-1.29,-0.1,2.75,-2.24
4,196311,-0.85,-0.84,1.71,-0.45,2.22


Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA
682,202005,5.58,1.92,-4.95,0.71,-3.28
683,202006,2.46,1.94,-2.22,0.03,0.34
684,202007,5.77,-3.03,-1.31,0.55,1.06
685,202008,7.63,-0.94,-2.95,4.27,-1.44
686,202009,-3.63,0.1,-2.59,-1.18,-1.81


In [49]:
ffdata_day = pd.read_excel('./ff5FactorData.xlsx', sheet_name = 'dailyData')
cols = ffdata_day.columns.to_list()
cols[0] = 'date'
ffdata_day.columns = cols
#ffdata_day['date'] = pd.to_datetime(ffdata_day['date'], format = '%Y%m%d')
ffdata_day.drop('RF', inplace = True, axis = 1)
ffdata_day.head()

Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA
0,19630701,-0.67,0.0,-0.32,-0.01,0.15
1,19630702,0.79,-0.27,0.27,-0.07,-0.19
2,19630703,0.63,-0.17,-0.09,0.17,-0.33
3,19630705,0.4,0.08,-0.28,0.08,-0.33
4,19630708,-0.63,0.04,-0.18,-0.29,0.13


In [50]:
def extract_RV2(data_day, data_month):
    # month : low_frequency, day : high_frequency
    # defining variables
    period_month = data_month.iloc[:, 0]
    period_day = data_day.iloc[:, 0]
    
    res = []
    for i in data_month.columns.to_list()[1:]:
        # Construction of RV2
        RV2 = np.zeros(len(period_month))
        daily_factor = data_day[i]
        for t in range(len(period_month)):
            current_month = period_month[t]
            daily_data_location = (np.floor(period_day/100) == current_month) # 해당 month에만 True
            daily_returns_for_current_month = daily_factor[daily_data_location]
            RV2[t] = (daily_returns_for_current_month ** 2).sum() # 해당 달의 return들의 제곱합
        res.append(RV2)
    return np.array(res).T

# Convert the original factors to the volatility-managed factors

In [51]:
def convert_into_volatility_managed_factors(data_day, data_month): # 첫 번째 column은 date이기 때문에 건들지 말자.
    # Construction of RV2
    RV2 = extract_RV2(data_day, data_month)
    # construct volatility-managed factors (Fvm = c*f/RV^2)
    c = 1
    res = data_month.copy()
    res.iloc[:,1:] = c * res.iloc[:,1:].shift(-1)/RV2
    res.iloc[:,1:] = res.iloc[:,1:].shift(1)
    return res

# Jensen's alpha test for searching best factor

In [98]:
# Jensen's alpha test
# Jensen's alpha = Fund 실제 수익률 - (beta * mean market return)
def jensen_test_all_factors(data_day, data_month): # return : risk_adjusted_return, sharpe_ratio, tstat
    
    # convert into volatility managed factors
    cdata = convert_into_volatility_managed_factors(data_day, data_month)
    
    # Construction of RV2
    RV2 = extract_RV2(data_day, data_month)
    risk_adjusted_returns_list = []
    sharpe_ratio_of_risk_adjusted_returns_list = []
    tstat_JensenAlpha_list = []
    
    for i in range(len(data_month.columns.to_list()[1:])):
        # Jensen_test
        y = cdata.iloc[1:,1+i]
        x = data_month[data_month.columns.to_list()[1:][i]].iloc[1:].to_numpy().reshape(-1,1)
        lr = LinearRegression()
        lr.fit(x, y)
        coef, intercept = lr.coef_[0], lr.intercept_
        risk_adjusted_returns = y - x.reshape(-1) * coef
        sharpe_ratio_of_risk_adjusted_returns = risk_adjusted_returns.mean()/risk_adjusted_returns.std()
        tstat_JensenAlpha = np.sqrt(len(y)) * sharpe_ratio_of_risk_adjusted_returns
        
        # save results into lists
        risk_adjusted_returns_list.append(risk_adjusted_returns)
        sharpe_ratio_of_risk_adjusted_returns_list.append(sharpe_ratio_of_risk_adjusted_returns)
        tstat_JensenAlpha_list.append(tstat_JensenAlpha)
    
    return risk_adjusted_returns_list, sharpe_ratio_of_risk_adjusted_returns_list, tstat_JensenAlpha_list

# volatility-managed factors

Volaility-managed factor는 아래에 보여진다

In [86]:
convert_into_volatility_managed_factors(ffdata_day, ffdata_month)

Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA
0,196307,,,,,
1,196308,1.077623,-1.714038,2.077890,0.695048,-0.416710
2,196309,-0.532438,-0.671517,0.197976,-2.063535,0.382897
3,196310,0.828503,-1.210018,-0.150534,4.195911,-3.362858
4,196311,-0.203505,-0.451832,0.679515,-0.265111,1.086957
...,...,...,...,...,...,...
682,202005,0.038163,0.042726,-0.058185,0.060931,-0.803646
683,202006,0.055307,0.066791,-0.019603,0.003407,0.085998
684,202007,0.078579,-0.178834,-0.017009,0.066731,0.389234
685,202008,0.417378,-0.039226,-0.046889,0.586942,-0.209251


# Which volatility managed factor is the best?

In [99]:
_, _, jensen_tstat_result = jensen_test_all_factors(ffdata_day, ffdata_month)
jensen_tstat_result

[1.6249891151359122,
 -0.1505203319813786,
 1.574860757701013,
 2.7478259598042,
 0.314184691650182]

순서대로 column순서인 Mkt-RF, SMB, HML, RMW, CMA 이다. 따라서, RMW가 제일 좋게 나온다.

## answer : RMW(Robust Minus Weak)