In [1]:
import numpy as np 
import pandas as pd 
import matplotlib as mpl
import matplotlib.pyplot as plt

import ray

from tools_ryu import mytool
from multi_run_v3.data_pp import DataPreprocess
from multi_run_v3.momentum import make_group_mask
from multi_run_v3.momentum_strategy import *
from multi_run_v2.initialize_v2 import make_market_index
#from multi_run_v3.backtest import simulate_longonly, simulate_longshort

2023-08-26 10:50:31,526	INFO worker.py:1625 -- Started a local Ray instance.


In [2]:
data = pd.read_pickle("Data/cmktcap_data_final.pickle")

days_lst = ["MON","TUE","WED","THU","FRI","SAT","SUN"]

In [3]:
data_pp = DataPreprocess()
data_pp.initialize(data=data)
data_pp.make_mask(mktcap_thresh=1_000_000,
                  vol_thresh=1_000_000,
                  ma=True)

In [4]:
result = {}
for day in ['MON','TUE','WED','THU','FRI','SAT','SUN']:
    mask_dict = make_group_mask(
                            price_df=data_pp.price_df,
                            weekly_rtn_df=data_pp.weekly_rtn_df,
                            mask_df=data_pp.mask,
                            n_group=5,
                            day_of_week=day
                            )
    result[day] = mask_dict

In [5]:
mkt = make_market_index(data_pp.price_df,
                        data_pp.mktcap_df,
                        data_pp.vol_df,
                        1_000_000,
                        1_000_000,
                        True)

In [6]:
# Ray Data 생성
ray_price = ray.put(data_pp.price_df)
ray_mktcap = ray.put(data_pp.mktcap_df)
ray_vol = ray.put(data_pp.vol_df)
ray_daily_rtn = ray.put(data_pp.daily_rtn_df)
ray_weekly_rtn = ray.put(data_pp.weekly_rtn_df)
ray_mask = ray.put(data_pp.mask)

In [7]:
# Terra Luna 제거

#price_pp = cmkt_data["price"].drop("4172", axis=1)
#mktcap_pp = cmkt_data["mktcap"].drop("4172", axis=1)
#vol_pp = cmkt_data["vol"].drop("4172", axis=1)
#ray_price = ray.put(price_pp)
#ray_mktcap = ray.put(mktcap_pp)
#ray_vol = ray.put(vol_pp)

### Value Weighted, Marketcap Capped 

In [9]:
n_list = [0,1,2,3]
look_back_list = [7,14,21,28]

for n, look in zip(n_list, look_back_list):
    print('r', n+1, ',0')
    momentum_capped_future2 = [weekly_momentum_value_weighted.remote(price_df=ray_price,
                                                                           mktcap_df=ray_mktcap,
                                                                           #vol_df=ray_vol,
                                                                           daily_rtn_df = ray_daily_rtn,
                                                                           weekly_rtn_df = ray_weekly_rtn,
                                                                           mask_df = ray_mask,
                                                                           fee_rate = 0.0015,
                                                                           n_group=5,
                                                                           day_of_week=day,
                                                                           margin ='cross',
                                                                           look_back=look,
                                                                           #num_cap=0.95,
                                                                           leverage_ratio=2) 
                  for day in days_lst]

    momentum_capped_result2 = ray.get(momentum_capped_future2)

    momentum_result_dict_c2 = {"MON":momentum_capped_result2[0],
                              "TUE":momentum_capped_result2[1],
                              "WED":momentum_capped_result2[2],
                              "THU":momentum_capped_result2[3],
                              "FRI":momentum_capped_result2[4],
                              "SAT":momentum_capped_result2[5],
                              "SUN":momentum_capped_result2[6]}

    result_dict = {}

    for day in days_lst:
        small_dict = {}
        for long_ in momentum_result_dict_c2['MON'].keys():
            feature = momentum_result_dict_c2[f'{day}'][long_].pct_change(7).fillna(0).resample(f'W-{day}').last().iloc[1:]
            label = ((mkt + 1).resample(f'W-{day}').prod() - 1).loc[feature.index]

            df = pd.concat([feature, label], axis=1, keys=['y', 'X'])
            small_dict[long_] = df

        result_dict[day] = small_dict

    import statsmodels.api as sm

    for what in ['Q1','Q2', 'Q3', 'Q4', 'Q5', 'LS']:
        q1 = []
        for day, data_dict in result_dict.items():
            for pos, dic in data_dict.items():
                if what in pos:
                    q1.append(dic)

        df = pd.concat(q1)
        df = df.replace(0, np.nan).dropna()     

        # Prepare the data
        X = df['X']
        X = sm.add_constant(X)  # Adds a constant term to the predictor
        y = df['y']

        # Run the regression
        model = sm.OLS(y, X)
        results = model.fit()

        # Adjust the standard errors using Newey-West with `maxlags`
        results_nw = results.get_robustcov_results(cov_type='HAC', use_t=True, maxlags=n)

        # Get the coefficient for the constant
        const_coef = results_nw.params[0]
        const_tstats = results_nw.tvalues[0]
        coefficients = results_nw.params

        print(f"{what}", "coef:", round(const_coef, 4), " / t-stats:", round(const_tstats, 4))
        print(coefficients)

r 1 ,0
Q1 coef: -0.0171  / t-stats: -9.3054
[-0.01712649  1.06561635]
Q2 coef: -0.0079  / t-stats: -5.5274
[-0.00792441  1.0729289 ]
Q3 coef: -0.0007  / t-stats: -0.474
[-7.16227023e-04  1.06672112e+00]
Q4 coef: 0.0026  / t-stats: 1.9663
[0.00263612 1.10422144]
Q5 coef: 0.0052  / t-stats: 2.7905
[0.00518164 1.0335183 ]
LS coef: 0.0589  / t-stats: 3.1466
[0.05885775 0.53258847]
r 2 ,0
Q1 coef: -0.02  / t-stats: -10.587
[-0.01997026  1.05212606]
Q2 coef: -0.0073  / t-stats: -4.3312
[-0.00732794  1.11255845]
Q3 coef: -0.001  / t-stats: -0.6537
[-9.96457928e-04  1.06835296e+00]
Q4 coef: 0.0001  / t-stats: 0.0656
[8.54907875e-05 1.06277149e+00]
Q5 coef: 0.0098  / t-stats: 5.397
[0.00981224 1.0288889 ]
LS coef: 0.0482  / t-stats: 5.5322
[0.04817814 0.172314  ]
r 3 ,0
Q1 coef: -0.014  / t-stats: -5.923
[-0.01398572  1.10306872]
Q2 coef: -0.0084  / t-stats: -5.5431
[-0.0084133   1.08868715]
Q3 coef: -0.0017  / t-stats: -1.2635
[-0.00173983  1.05993325]
Q4 coef: 0.0024  / t-stats: 1.7821
[0.002

In [None]:
n_list = [0,1,2,3]
look_back_list = [7,14,21,28]

for n, look in zip(n_list, look_back_list):
    print('r', n+1, ',0')
    momentum_capped_future2 = [weekly_momentum_value_weighted.remote(price_df=ray_price,
                                                                           mktcap_df=ray_mktcap,
                                                                           #vol_df=ray_vol,
                                                                           daily_rtn_df = ray_daily_rtn,
                                                                           weekly_rtn_df = ray_weekly_rtn,
                                                                           mask_df = ray_mask,
                                                                           fee_rate = 0.0015,
                                                                           n_group=5,
                                                                           day_of_week='SUN',
                                                                           num_cap=0.95, # 5% capped
                                                                           margin ='cross',
                                                                           look_back=look,
                                                                           leverage_ratio=1)]

    momentum_capped_result2 = ray.get(momentum_capped_future2)

    momentum_result_dict_c2 = {"MON":momentum_capped_result2[0],
                              "TUE":momentum_capped_result2[1],
                              "WED":momentum_capped_result2[2],
                              "THU":momentum_capped_result2[3],
                              "FRI":momentum_capped_result2[4],
                              "SAT":momentum_capped_result2[5],
                              "SUN":momentum_capped_result2[6]}

    result_dict = {}

    for day in days_lst:
        small_dict = {}
        for long_ in momentum_result_dict_c2['MON'].keys():
            feature = momentum_result_dict_c2[f'{day}'][long_].pct_change(7).fillna(0).resample(f'W-{day}').last().iloc[1:]
            label = ((mkt + 1).resample(f'W-{day}').prod() - 1).loc[feature.index]

            df = pd.concat([feature, label], axis=1, keys=['y', 'X'])
            small_dict[long_] = df

        result_dict[day] = small_dict

    import statsmodels.api as sm

    for what in ['Q1','Q2', 'Q3', 'Q4', 'Q5', 'LS']:
        q1 = []
        for day, data_dict in result_dict.items():
            for pos, dic in data_dict.items():
                if what in pos:
                    q1.append(dic)

        df = pd.concat(q1)
        df = df.replace(0, np.nan).dropna()     

        # Prepare the data
        X = df['X']
        X = sm.add_constant(X)  # Adds a constant term to the predictor
        y = df['y']

        # Run the regression
        model = sm.OLS(y, X)
        results = model.fit()

        # Adjust the standard errors using Newey-West with `maxlags`
        results_nw = results.get_robustcov_results(cov_type='HAC', use_t=True, maxlags=n)

        # Get the coefficient for the constant
        const_coef = results_nw.params[0]
        const_tstats = results_nw.tvalues[0]
        coefficients = results_nw.params

        print(f"{what}", "coef:", round(const_coef, 4), " / t-stats:", round(const_tstats, 4))
        print(coefficients)

In [18]:
momentum_capped_future2 = [jk_volume_weighted_capped.remote(mktcap_df=ray_mktcap,
                                                            vol_df = ray_vol,
                                                            daily_rtn_df = ray_daily_rtn,
                                                            mask_df = ray_mask,
                                                            fee_rate = 0.0015,
                                                            day_of_week=day,
                                                            num_cap=0.95, # 5% capped
                                                            n_group=5,
                                                            leverage_ratio=2) 
              for day in days_lst]
momentum_capped_result2 = ray.get(momentum_capped_future2)
momentum_result_dict_c2 = {"MON":momentum_capped_result2[0],
                          "TUE":momentum_capped_result2[1],
                          "WED":momentum_capped_result2[2],
                          "THU":momentum_capped_result2[3],
                          "FRI":momentum_capped_result2[4],
                          "SAT":momentum_capped_result2[5],
                          "SUN":momentum_capped_result2[6]}
result_dict = {}
for day in days_lst:
    small_dict = {}
    for long_ in momentum_result_dict_c2['MON'].keys():
        feature = momentum_result_dict_c2[f'{day}'][long_].pct_change(7).fillna(0).resample(f'W-{day}').last().iloc[1:]
        label = ((mkt + 1).resample(f'W-{day}').prod() - 1).loc[feature.index]
        df = pd.concat([feature, label], axis=1, keys=['y', 'X'])
        small_dict[long_] = df
    result_dict[day] = small_dict
import statsmodels.api as sm
for what in ['Q1','Q2', 'Q3', 'Q4', 'Q5', 'LS']:
    q1 = []
    for day, data_dict in result_dict.items():
        for pos, dic in data_dict.items():
            if what in pos:
                q1.append(dic)
    df = pd.concat(q1)
    df = df.replace(0, np.nan).dropna()     
    # Prepare the data
    X = df['X']
    X = sm.add_constant(X)  # Adds a constant term to the predictor
    y = df['y']
    # Run the regression
    model = sm.OLS(y, X)
    results = model.fit()
    # Adjust the standard errors using Newey-West with `maxlags`
    results_nw = results.get_robustcov_results(cov_type='HAC', use_t=True, maxlags=2)
    # Get the coefficient for the constant
    const_coef = results_nw.params[0]
    const_tstats = results_nw.tvalues[0]
    
    print(f"{what}", "coef:", round(const_coef, 4), " / t-stats:", round(const_tstats, 4))

Q1 coef: -0.0105  / t-stats: -4.7287
Q2 coef: -0.0048  / t-stats: -2.5413
Q3 coef: 0.0001  / t-stats: 0.037
Q4 coef: -0.0022  / t-stats: -1.3279
Q5 coef: -0.0085  / t-stats: -4.7709
LS coef: -0.0022  / t-stats: -0.3554


Volume_weighted_capped

In [None]:
momentum_capped_cmkt = {}

momentum_capped_future = [weekly_momentum_volume_weighted_capped.remote(price_df=ray_price,
                                                                       mktcap_df=ray_mktcap,
                                                                       vol_df = ray_vol,
                                                                       daily_rtn_df = ray_daily_rtn,
                                                                       weekly_rtn_df = ray_weekly_rtn,
                                                                       mask_df = ray_mask,
                                                                       fee_rate = 0.0015,
                                                                       n_group=5,
                                                                       day_of_week=day,
                                                                       num_cap=0.95, # 5%
                                                                       leverage_ratio=1
                                                                       ) 
              for day in days_lst]

momentum_capped_result = ray.get(momentum_capped_future)

In [None]:
momentum_result_dict_c = {"MON":momentum_capped_result[0],
                          "TUE":momentum_capped_result[1],
                          "WED":momentum_capped_result[2],
                          "THU":momentum_capped_result[3],
                          "FRI":momentum_capped_result[4],
                          "SAT":momentum_capped_result[5],
                          "SUN":momentum_capped_result[6]}

In [None]:
final_c2 = {}

for day in days_lst:
    df_day = momentum_result_dict_c[day]
    #print(df_day.values())
    
    concat_df = pd.concat(df_day.values(), axis=1, keys=df_day.keys())
    final_c2[day] = concat_df

fin_c2= {}

for pf in ["Long_Q1","Long_Q2","Long_Q3","Long_Q4","Long_Q5","LS"]:
    concat_series_c = pd.concat([df[pf] for key, df in final_c2.items()], axis=1).fillna(1).sum(1)
    
    # 첫날 pf_value를 7로 셋팅
    start_idx = concat_series_c.index[0] - pd.Timedelta(days=1)
    concat_series_c[start_idx] = 7
    fin_c2[pf] = concat_series_c.sort_index().pct_change().fillna(0)

In [None]:
from statsmodels.api import OLS, add_constant

def run_alpha_regression(return_dict:dict, 
                         mkt_rtn:pd.Series,
                         constant:bool=True,
                         weekly:bool=False):
    
    '''return_dict : dict(포트폴리오 리턴)
       mkt_rtn : pd.Series(마켓 인덱스의 리턴이 들어감)
       constant : True(Default)/ False
       
       Note) 투자 시작일(첫날)은 제외하고 회귀합니다
       '''
    if weekly:
        mkt_rtn = mkt_rtn.resample('W').mean()
    else:
        mkt_rtn = mkt_rtn.iloc[1:] 
  
    for key, strategy_df in return_dict.items():  
        
        if weekly:
            strategy_df = strategy_df.resample('W').mean()
        else:
            strategy_df = strategy_df.iloc[1:]
        
        if constant:
            model = OLS(strategy_df, add_constant(mkt_rtn))
        else:
            model = OLS(strategy_df, mkt_rtn)
        result = model.fit()
        
        print(f"{key} Regression Result")
        print(result.summary2())

In [None]:
market = mkt.loc['2017-11-30':]

# value-weight와 리그래션
run_alpha_regression(return_dict=fin_c,
                     mkt_rtn=market,
                     weekly=False
                     )

In [None]:
# volume-weight와 리그래션
run_alpha_regression(return_dict=fin_c2,
                     mkt_rtn=market,
                     )