In [1]:
import numpy as np 
import pandas as pd 
import matplotlib as mpl
import matplotlib.pyplot as plt

import statsmodels.api as sm
from tqdm import tqdm

from multi_run_v3.data_pp import DataPreprocess
from multi_run_v3.momentum_strategy_group import *

2023-09-03 10:06:50,227	INFO worker.py:1625 -- Started a local Ray instance.


In [2]:
data = pd.read_pickle("Data/cmc_final_data.pickle")
days_lst = ["MON","TUE","WED","THU","FRI","SAT","SUN"]

data_pp = DataPreprocess()
data_pp.initialize(data=data)
data_pp.make_mask(mktcap_thresh=1_000_000,
                  vol_thresh=1_000_000,
                  ma=True)

In [3]:
def make_group_percentile(
    price_df:pd.DataFrame, 
    weekly_rtn_df:pd.DataFrame,
    mask_df:pd.DataFrame,
    n_group:int,
    day_of_week:str,
    reb:str = '1',# 1이면 일주일, 2이면 2주일 간격 리벨런싱
    look_back:int = 7,
    coin_group:int = 20
    ): 
    '''
    그룹의 마스크를 반환합니다

        n_group : 몇 개의 그룹으로 나눌 지
        day_of_week : Rebalancing을 진행할 요일 [MON,TUE,WED,THU,FRI,SAT,SUN]
    '''
    last_day = price_df.index[-1]
    
    weekly_mask = mask_df.resample(reb + "W-" + day_of_week).last()[:last_day]
    
    if look_back != 7:
        weekly_rtn_df = price_df.pct_change(look_back, fill_method=None)
    
    weekly_rtn = weekly_rtn_df.resample(reb + "W-" + day_of_week).last()[:last_day]
    weekly_rtn_masked = weekly_rtn * weekly_mask 
        
    # 언제부터 시작하는 지 (최소 q*n개의 코인이 필요)
    cnt = weekly_rtn_masked.count(1)
    thresh = cnt[cnt >= (n_group * coin_group)] # 여기서 start date가 나온다 / 각 그룹당 최소 20개의 코인이 필요함
    strategy_start = thresh.index[0] 
        
    # rank 계산
    pct_df = weekly_rtn_masked[strategy_start:].rank(axis=1, method="first", pct=True)
    return pct_df

In [23]:
# constant 있는 결과
holding = [1,2,3,4,5,6,7,8]
look_back_list = [i * 7 for i in holding]

for reb in holding:
    for look_back in look_back_list:
        reb = str(reb)
        #for day in days_lst:
        pct_df = make_group_percentile(
            price_df = data_pp.price_df,
            weekly_rtn_df = data_pp.weekly_rtn_df, 
            mask_df = data_pp.mask,
            n_group = 5,
            day_of_week = 'FRI',
            coin_group=20,
            look_back=look_back,
            reb=reb
        )

        d = pd.concat([pct_df.melt(), pct_df.shift(-1).melt()], axis=1).iloc[:, [0,1,3]].dropna()
        d.columns = ['coin_id', 'x', 'y']
    
        #all_df = pd.concat([all_df, d])
    
        ols =  sm.OLS(d.y, sm.add_constant(d.x))
        result = ols.fit()
        print("future-week:", reb, "look-back windows (days):",  look_back)
        print(f'Constant: {result.params[0].round(4)},  Coef: {result.params[1].round(4)}')
        print(f'T-stat for Constant: {result.tvalues[0].round(4)}, T-stat for Coef: {result.tvalues[1].round(4)}')

future-week: 1 look-back windows (days): 7
Constant: 0.5074,  Coef: -0.018
T-stat for Constant: 305.496, T-stat for Coef: -6.2738
future-week: 1 look-back windows (days): 14
Constant: 0.2749,  Coef: 0.4409
T-stat for Constant: 184.4338, T-stat for Coef: 171.7403
future-week: 1 look-back windows (days): 21
Constant: 0.1943,  Coef: 0.5997
T-stat for Constant: 146.0144, T-stat for Coef: 262.0581
future-week: 1 look-back windows (days): 28
Constant: 0.1476,  Coef: 0.6904
T-stat for Constant: 122.437, T-stat for Coef: 333.4546
future-week: 1 look-back windows (days): 35
Constant: 0.1214,  Coef: 0.7436
T-stat for Constant: 108.5337, T-stat for Coef: 386.838
future-week: 1 look-back windows (days): 42
Constant: 0.1023,  Coef: 0.7839
T-stat for Constant: 98.5868, T-stat for Coef: 438.6651
future-week: 1 look-back windows (days): 49
Constant: 0.0892,  Coef: 0.8116
T-stat for Constant: 91.079, T-stat for Coef: 481.2265
future-week: 1 look-back windows (days): 56
Constant: 0.0799,  Coef: 0.831
T-

In [24]:
# constant 없는 결과
holding = [1,2,3,4,5,6,7,8]
look_back_list = [i * 7 for i in holding]

for reb in holding:
    for look_back in look_back_list:
        reb = str(reb)
        #for day in days_lst:
        pct_df = make_group_percentile(
            price_df = data_pp.price_df,
            weekly_rtn_df = data_pp.weekly_rtn_df, 
            mask_df = data_pp.mask,
            n_group = 5,
            day_of_week = 'FRI',
            coin_group=20,
            look_back=look_back,
            reb=reb
        )

        d = pd.concat([pct_df.melt(), pct_df.shift(-1).melt()], axis=1).iloc[:, [0,1,3]].dropna()
        d.columns = ['coin_id', 'x', 'y']
    
        #all_df = pd.concat([all_df, d])
    
        ols =  sm.OLS(d.y, (d.x))
        result = ols.fit()
        print("future-week:", reb, "look-back windows (days):",  look_back)
        print(f'Coef: {result.params[0].round(4)}')
        print(f'T-stat for Coef: {result.tvalues[0].round(4)}')


future-week: 1 look-back windows (days): 7
Coef: 0.7412
T-stat for Coef: 390.2491
future-week: 1 look-back windows (days): 14
Coef: 0.8519
T-stat for Coef: 590.0606
future-week: 1 look-back windows (days): 21
Coef: 0.8899
T-stat for Coef: 723.4736
future-week: 1 look-back windows (days): 28
Coef: 0.9108
T-stat for Coef: 839.3947
future-week: 1 look-back windows (days): 35
Coef: 0.925
T-stat for Coef: 929.2402
future-week: 1 look-back windows (days): 42
Coef: 0.937
T-stat for Coef: 1018.182
future-week: 1 look-back windows (days): 49
Coef: 0.945
T-stat for Coef: 1093.3646
future-week: 1 look-back windows (days): 56
Coef: 0.9506
T-stat for Coef: 1156.8723
future-week: 2 look-back windows (days): 7
Coef: 0.7511
T-stat for Coef: 285.1795
future-week: 2 look-back windows (days): 14
Coef: 0.7385
T-stat for Coef: 279.6986
future-week: 2 look-back windows (days): 21
Coef: 0.8084
T-stat for Coef: 360.5953
future-week: 2 look-back windows (days): 28
Coef: 0.8429
T-stat for Coef: 421.3752
future-

Only Major coins

In [37]:
# constant 있는 결과
holding = [1,2,3,4,5,6,7,8]
look_back_list = [i * 7 for i in holding]

for reb in holding:
    for look_back in look_back_list:
        reb = str(reb)
        #for day in days_lst:
        pct_df = make_group_percentile(
            price_df = data_pp.price_df,
            weekly_rtn_df = data_pp.weekly_rtn_df, 
            mask_df = data_pp.mask,
            n_group = 5,
            day_of_week = 'FRI',
            coin_group=20,
            look_back=look_back,
            reb=reb
        )
        
        mktcap_pp = data_pp.mktcap_df.loc[pct_df.index]
        mask_pp = data_pp.mask.loc[pct_df.index]

        # Capped 씌워주기 (전체에서 Cap을 씌우고, 그룹을 나눠준다)
        mktcap_df_used = mktcap_pp.copy() * mask_pp
        mktcap_rank = mktcap_df_used.rank(1)
        rank_thresh = (mktcap_rank.max(1) * 0.95).dropna().map(int) 

        # index alignment
        mktcap_rank = mktcap_rank.loc[rank_thresh.index]
        coin_thresh_series = mktcap_rank.eq(rank_thresh, axis=0).idxmax(axis=1) # values가 타겟 코인의 컬럼명, index가 날짜인 시리즈
        filtered = mktcap_rank.apply(lambda x: x > rank_thresh, axis=0).replace({True:1, False:np.nan})
        
        pct_df = (pct_df * filtered)
        d = pd.concat([pct_df.melt(), pct_df.shift(-1).melt()], axis=1).iloc[:, [0,1,3]].dropna()
        d.columns = ['coin_id', 'x', 'y']
    
        #all_df = pd.concat([all_df, d])
    
        ols =  sm.OLS(d.y, sm.add_constant(d.x))
        result = ols.fit()
        print("future-week:", reb, "look-back windows (days):",  look_back)
        print(f'Constant: {result.params[0].round(4)},  Coef: {result.params[1].round(4)}')
        print(f'T-stat for Constant: {result.tvalues[0].round(4)}, T-stat for Coef: {result.tvalues[1].round(4)}')

future-week: 1 look-back windows (days): 7
Constant: 0.532,  Coef: 0.0392
T-stat for Constant: 67.071, T-stat for Coef: 3.0326
future-week: 1 look-back windows (days): 14
Constant: 0.2674,  Coef: 0.525
T-stat for Constant: 38.5715, T-stat for Coef: 47.4433
future-week: 1 look-back windows (days): 21
Constant: 0.1897,  Coef: 0.6663
T-stat for Constant: 30.9713, T-stat for Coef: 68.7972
future-week: 1 look-back windows (days): 28
Constant: 0.1443,  Coef: 0.7466
T-stat for Constant: 26.3206, T-stat for Coef: 86.6547
future-week: 1 look-back windows (days): 35
Constant: 0.1197,  Coef: 0.7892
T-stat for Constant: 23.5241, T-stat for Coef: 99.1205
future-week: 1 look-back windows (days): 42
Constant: 0.0952,  Coef: 0.8317
T-stat for Constant: 20.5138, T-stat for Coef: 114.8637
future-week: 1 look-back windows (days): 49
Constant: 0.0843,  Coef: 0.8514
T-stat for Constant: 19.1468, T-stat for Coef: 124.5374
future-week: 1 look-back windows (days): 56
Constant: 0.0759,  Coef: 0.8658
T-stat for

Below 5 percent

In [38]:
# constant 있는 결과
holding = [1,2,3,4,5,6,7,8]
look_back_list = [i * 7 for i in holding]

for reb in holding:
    for look_back in look_back_list:
        reb = str(reb)
        #for day in days_lst:
        pct_df = make_group_percentile(
            price_df = data_pp.price_df,
            weekly_rtn_df = data_pp.weekly_rtn_df, 
            mask_df = data_pp.mask,
            n_group = 5,
            day_of_week = 'FRI',
            coin_group=20,
            look_back=look_back,
            reb=reb
        )
        
        mktcap_pp = data_pp.mktcap_df.loc[pct_df.index]
        mask_pp = data_pp.mask.loc[pct_df.index]

        # Capped 씌워주기 (전체에서 Cap을 씌우고, 그룹을 나눠준다)
        mktcap_df_used = mktcap_pp.copy() * mask_pp
        mktcap_rank = mktcap_df_used.rank(1)
        rank_thresh = (mktcap_rank.max(1) * 0.95).dropna().map(int) 

        # index alignment
        mktcap_rank = mktcap_rank.loc[rank_thresh.index]
        coin_thresh_series = mktcap_rank.eq(rank_thresh, axis=0).idxmax(axis=1) # values가 타겟 코인의 컬럼명, index가 날짜인 시리즈
        filtered = mktcap_rank.apply(lambda x: x <= rank_thresh, axis=0).replace({True:1, False:np.nan})
        
        pct_df = (pct_df * filtered)
        d = pd.concat([pct_df.melt(), pct_df.shift(-1).melt()], axis=1).iloc[:, [0,1,3]].dropna()
        d.columns = ['coin_id', 'x', 'y']
    
        #all_df = pd.concat([all_df, d])
    
        ols =  sm.OLS(d.y, sm.add_constant(d.x))
        result = ols.fit()
        print("future-week:", reb, "look-back windows (days):",  look_back)
        print(f'Constant: {result.params[0].round(4)},  Coef: {result.params[1].round(4)}')
        print(f'T-stat for Constant: {result.tvalues[0].round(4)}, T-stat for Coef: {result.tvalues[1].round(4)}')

future-week: 1 look-back windows (days): 7
Constant: 0.5077,  Coef: -0.0234
T-stat for Constant: 293.0515, T-stat for Coef: -7.8163
future-week: 1 look-back windows (days): 14
Constant: 0.2762,  Coef: 0.4347
T-stat for Constant: 176.9241, T-stat for Coef: 161.6179
future-week: 1 look-back windows (days): 21
Constant: 0.1953,  Coef: 0.5944
T-stat for Constant: 139.6793, T-stat for Coef: 247.3697
future-week: 1 look-back windows (days): 28
Constant: 0.1481,  Coef: 0.6878
T-stat for Constant: 117.0694, T-stat for Coef: 316.5861
future-week: 1 look-back windows (days): 35
Constant: 0.1215,  Coef: 0.7441
T-stat for Constant: 104.4101, T-stat for Coef: 371.1785
future-week: 1 look-back windows (days): 42
Constant: 0.1034,  Coef: 0.7813
T-stat for Constant: 95.1701, T-stat for Coef: 416.4355
future-week: 1 look-back windows (days): 49
Constant: 0.0903,  Coef: 0.8083
T-stat for Constant: 88.0782, T-stat for Coef: 455.7856
future-week: 1 look-back windows (days): 56
Constant: 0.0808,  Coef: 0.8