In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from statsmodels.api import OLS, add_constant

In [None]:
def calculate_cagr(return_df):
    '''rtn을 받았을 때, CAGR을 계산합니다'''
    holding_year = (len(return_df) / 365)
    cum = (return_df+1).cumprod()
    cagr = (cum.iloc[-1] / cum.iloc[0]) ** (1/holding_year) -1
    return cagr

def run_alpha_regression(return_dict:dict, 
                         mkt_rtn:pd.DataFrame,
                         constant=True):
    
    '''return_dict : 포트폴리오 리턴(dict)
       mkt_rtn : 마켓 인덱스의 리턴이 들어감
       constant : True(Default)/ False
       '''
    
    for key, strategy_df in return_dict.items():  
        if str(key) =="count":
            continue
        
        if constant:
            model = OLS(strategy_df, add_constant(mkt_rtn))
        else:
            model = OLS(strategy_df, mkt_rtn)
        result = model.fit()
        
        print(f"{key} Regression Result")
        print(result.summary2())
        
def print_statistics(return_dict:dict,
                     mkt_rtn:pd.DataFrame):
        
    mean = [df.mean().round(6) for key, df in return_dict.items()]          
    std = [df.std() for key, df in return_dict.items()]
    cagr =[calculate_cagr(df) * 100 for key, df in return_dict.items()]          
    return_df = pd.DataFrame([cagr,mean,std], 
                             index=["CAGR", "Mean","STD"])
    
    
    mkt = pd.DataFrame([calculate_cagr(mkt_rtn) * 100, mkt_rtn.mean(), mkt_rtn.std()],
                           index=["CAGR", "Mean","STD"], 
                           columns=["MKT"])
    
    return_df = pd.concat([return_df, mkt], axis=1)
    return_df.loc["Shape",:] = (return_df.loc["Mean",:]*365)/ (return_df.loc["STD",:]*np.sqrt(365))
    return return_df
        
def draw_return_result(return_dict:dict, 
                       with_mkt=False,
                       mkt_rtn=None):
    
    '''rtn_result : dict(리턴이 담긴 딕셔러니)
       with_mkt   : bool -> 마켓인덱스를 함께 그릴지 표시
       with_mkt가 True일 경우 mkt_rtn을 줘야한다'''
        
    for key, df in return_dict.items():
        fig, axes = plt.subplots(3,1, sharex=True, figsize=(24,24), 
                                 gridspec_kw={'height_ratios': [4, 1, 1]})
        cum_df = (df+1).cumprod()
        cum_df.plot(ax=axes[0])
            
        axes[0].set_title("Cross-Sectional Momentum Cummulative returns weighted by marketcap")
        axes[0].grid()
        axes[0].legend(["Startegy","MKT"])
        
        peak = cum_df.cummax()
        drawdown = (cum_df-peak)/peak
        drawdown.plot(ax=axes[1])
        axes[1].set_title("Draw Down")
        axes[1].grid()
        
        df.plot(ax=axes[2])
        axes[2].grid()
        
        if with_mkt:
            mktcum = (mkt_rtn+1).cumprod()
            mktcum.plot(ax=axes[0])
            axes[0].grid()
            axes[0].legend(["Startegy","MKT"])
            
            peak = mktcum.cummax()
            drawdown = (mktcum-peak) / peak
            drawdown.plot(ax=axes[1], alpha=0.3)
            axes[1].grid()
            
            mkt_rtn.plot(ax=axes[2], alpha=0.3)
            axes[2].grid();      

# 마켓인덱스

Vol 0으로 스크리닝한 Return 계산

In [None]:
mktcap = pd.read_pickle("ryu_new_mktcap9.pickle")
price = pd.read_pickle("ryu_new_price9.pickle")
vol = pd.read_pickle("ryu_new_volume9.pickle")

rtn = price.pct_change(fill_method=None) * np.sign(price)
#weight = mktcap.apply(lambda x: x/ np.nansum(x), axis=1)

vol_mask = (vol > 0).replace(False, np.nan)
new_mktcap = (vol_mask * mktcap)

In [None]:
# new_mktcap이 0보다 큰 날을 찾아보자
sum_num = (~new_mktcap.isna()).sum(1)
sum_num.loc[sum_num>0]

In [None]:
new_weight = new_mktcap.loc["2013-12-27":].apply(lambda x:x/np.nansum(x), axis=1)

In [None]:
fig, axes = plt.subplots(1,1)

# 마켓 리턴      "2013-12-27"
mkt_rtn = ((rtn.loc[:] * new_weight).sum(1))

Cross-Sectional : Weekly Rebalancing

In [None]:
mktcap = pd.read_pickle("ryu_new_mktcap9.pickle")
price  = pd.read_pickle("ryu_new_price9.pickle")
vol = pd.read_pickle("ryu_new_volume9.pickle")

daily_rtn = price.pct_change(fill_method=None) * np.sign(price)
vol_screener = (vol > 0).replace({True:1, 
                                  False:np.nan})

# vol>0 은 너무 당연한거라 변수명을 그냥 daily, weekly로만 해주겠음
daily_mktcap = vol_screener * mktcap

In [None]:
# 언제부터 시작하는 지 찾자 (최소 100개의 코인이 필요)
cnt = daily_mktcap.count(1)
cnt.loc[cnt > 99]

In [None]:
# return도 같은 기간으로 잘라야함
start_date = "2017-05-02"
daily_rtn_sample = daily_rtn.loc[start_date:]
daily_mktcap_sample = daily_mktcap.loc[start_date:]

# 나중에 plot할때도 필요해서 market rtn도 구해둬야한다
mktrtn_sample = mkt_rtn.loc["2017-05-02":]

In [None]:
# 각 그룹의 filter를 구하고, 그것을 mkt와 곱한이후, 각 그룹의 weight를 구해야하는데, mktcap과 곱하는 과정에서 없어지는 코인이 너무 많은것
## 해결 -> mktcap으로 마스킹

## 우선 mktcap에 음수값이 있는 지 확인
print((daily_mktcap_sample <= 0).sum().sum())

daily_rtn_sample_pp = daily_rtn_sample * np.sign(daily_mktcap_sample)

In [None]:
# rank를 계산해줍니다
rank = daily_rtn_sample_pp.rank(axis=1, method="first")

# rank가 존재하는 코인의 수를 구합니다
coin_count = rank.count(axis=1)

# 그룹별로 동일한 코인 수를 포함하기 위해 각 그룹의 rank thresh를 생성해줍니다
rank_thresh = coin_count.apply(lambda x: [i for i in range(0,x, x//5)])

t1 = rank_thresh.apply(lambda x: x[1])
t2 = rank_thresh.apply(lambda x: x[2])
t3 = rank_thresh.apply(lambda x: x[3])
t4 = rank_thresh.apply(lambda x: x[4])

rank_thresh

In [None]:
g1_mask = rank.apply(lambda x: x <= t1, axis=0) \
              .replace({True:1, False:np.nan})
g2_mask = rank.apply(lambda x: (t1 < x)&(x <= t2), axis=0) \
              .replace({True:1, False:np.nan})
g3_mask = rank.apply(lambda x: (t2 < x)&(x <= t3), axis=0) \
              .replace({True:1, False:np.nan})
g4_mask = rank.apply(lambda x: (t3 < x)&(x <= t4), axis=0) \
              .replace({True:1, False:np.nan})
g5_mask = rank.apply(lambda x: x > t4, axis=0) \
              .replace({True:1, False:np.nan})           

In [None]:
# 각 그룹별 weight를 생성
weight_g1 = (g1_mask * daily_mktcap_sample).apply(lambda x: x/np.nansum(x), axis=1)
weight_g2 = (g2_mask * daily_mktcap_sample).apply(lambda x: x/np.nansum(x), axis=1)
weight_g3 = (g3_mask * daily_mktcap_sample).apply(lambda x: x/np.nansum(x), axis=1)
weight_g4 = (g4_mask * daily_mktcap_sample).apply(lambda x: x/np.nansum(x), axis=1)
weight_g5 = (g5_mask * daily_mktcap_sample).apply(lambda x: x/np.nansum(x), axis=1)

In [None]:
# 그룹별 코인 개수 출력 
concat = pd.concat([weight_g1.count(1), weight_g2.count(1),
                    weight_g3.count(1), weight_g4.count(1),
                    weight_g5.count(1)], axis=1)#.plot();

coin = pd.concat([concat, concat.sum(1)], axis=1)
coin.columns = [0,1,2,3,4,"Sum"]

coin

In [None]:
concat.plot()

In [None]:
# filter를 마켓켑과 리턴에 곱해줘서 각 그룹별 리턴 계산
rtn_g1 = (daily_rtn_sample * weight_g1.shift(1)).sum(1)
rtn_g2 = (daily_rtn_sample * weight_g2.shift(1)).sum(1)
rtn_g3 = (daily_rtn_sample * weight_g3.shift(1)).sum(1)
rtn_g4 = (daily_rtn_sample * weight_g4.shift(1)).sum(1)
rtn_g5 = (daily_rtn_sample * weight_g5.shift(1)).sum(1)

In [None]:
rtn_dict = {"Q1":rtn_g1,
            "Q2":rtn_g2,
            "Q3":rtn_g3,
            "Q4":rtn_g4,
            "Q5":rtn_g5}

In [None]:
draw_return_result(rtn_dict, with_mkt=True, mkt_rtn=mktrtn_sample)

In [None]:
print_statistics(rtn_dict,
                 mkt_rtn=mktrtn_sample)

In [None]:
run_alpha_regression(rtn_dict,
                     mkt_rtn=mktrtn_sample)