In [1]:
import numpy as np
import pandas as pd
import import_ipynb
import pair_selection_DBSCAN_funclist_for_training as myFunc
import matplotlib.pyplot as plt

import math
import sklearn
from datetime import datetime

from sklearn import datasets, linear_model
from tqdm import tqdm
from sklearn.preprocessing import PolynomialFeatures

import warnings
warnings.filterwarnings('ignore')

importing Jupyter notebook from pair_selection_DBSCAN_funclist_for_training.ipynb


In [2]:
price_df = pd.read_csv('./Data/us_etf_price.csv')
etf_info = pd.read_csv('./Data/etfs_details_equity.csv')
er_df = etf_info[['Symbol','ER']]
er_df = er_df.set_index('Symbol')
er_df = er_df['ER'].apply(lambda x: x.split('%')[0]).astype(float)

In [3]:

pc_selecting_threshold = 0.9
eps = 1.8
min_samples = 4
cluster_size_limit = 100
cluster_member_counts = 100
inverse_threshold=-0.99
coint_pvalue_threshold=0.01
hurst_threshold=0.5
half_life_threshold=10
mean_reverting_freq=12

In [4]:
class Backtesting():

    def __init__(self, z_score_list, stop_loss, buy_z, sell_z, total_money=1000000):
        
        self.pairs_num = z_score_list.shape[0]
        self.total_time = z_score_list.shape[1]
        self.stop_loss = stop_loss
        self.buy_z = buy_z
        self.sell_z = sell_z
        
        self.inverse_price = np.zeros((2*self.pairs_num))
        self.total_stock = np.zeros((2*self.pairs_num))
        self.total_inverse = np.zeros((2*self.pairs_num))
        self.money_for_pair = int(total_money/self.pairs_num) * np.ones((self.pairs_num))
    
    def trade_decision(self, z_score, pairs_num):
        
        stop_loss = self.stop_loss
        buy_z = self.buy_z
        sell_z = self.sell_z
        total_stock = self.total_stock[2*(pairs_num)]
        total_inverse = self.total_inverse[2*(pairs_num)]
        stock, inverse = 0, 0 

        # 스탑로스 컷에 걸릴 때는 다 팔기
        if z_score >= stop_loss or z_score <= -1 * stop_loss:
            stock, inverse = -2, -2
        
        # buy threshold 이상이면서 해당 etf 보유하고 있지 않으면 매수
        elif z_score >= buy_z and total_stock==0:
            stock, inverse = 1, 0

        # sell threshold 이하이면서, 해당 etf 보유하고 있으면 etf 팔기
        elif z_score <= sell_z and total_stock > 0:
            stock, inverse = -1, 0

        # -buy threshold 이하로 떨어지는데, 인버스 etf 보유하고 있지 않을 땐 inverse etf 사기
        elif z_score <= -1 * buy_z and total_inverse==0:
            stock, inverse = 0, 1
        
        # -sell threshold 이상으로 올라가는데, 인버스 etf를 보유하고 있으면 inverse etf 팔기
        elif z_score >= -1 * sell_z and total_inverse > 0:
            stock, inverse = 0, -1 

        return stock, inverse
    
    def cal_trade_vol(self, stock_num, stock_signal, inv_signal, current_stock_price, current_inverse_price):

        trade_stock_vol = 0
        trade_inverse_vol = 0

        money_for_stock = self.money_for_pair[int(stock_num/2)] / 2

        if stock_signal < 0:
            trade_stock_vol = -1 * self.total_stock[stock_num]

        elif stock_signal > 0:
            trade_stock_vol = int(money_for_stock/current_stock_price)

        if inv_signal < 0:
            trade_inverse_vol = -1 * self.total_inverse[stock_num]
        
        elif inv_signal > 0:
            trade_inverse_vol = int(money_for_stock/current_inverse_price)

        return trade_stock_vol, trade_inverse_vol


    def cal_result(self, stock_num, stock_vol, inverse_vol, current_stock_price, current_inverse_price):

        # if self.total_inverse[stock_num] > 0:
        #     change_ratio = (current_price - prev_price) / prev_price
        #     self.inverse_price[stock_num] = (1 - change_ratio) * self.inverse_price[stock_num]


        if stock_vol > 0:
            self.total_stock[stock_num] += stock_vol
            self.money_for_pair[int(stock_num/2)] -= stock_vol * current_stock_price

        elif stock_vol < 0:
            self.total_stock[stock_num] += stock_vol
            self.money_for_pair[int(stock_num/2)] -= stock_vol * current_stock_price
        
            # Buy the inverse
        if inverse_vol > 0:
            self.total_inverse[stock_num] += inverse_vol
            self.money_for_pair[int(stock_num/2)] -= inverse_vol * current_inverse_price
            # self.inverse_price[stock_num] = current_price
        
        # Sell the inverse
        if inverse_vol < 0:
            self.total_inverse[stock_num] += inverse_vol
            self.money_for_pair[int(stock_num/2)] -= inverse_vol * current_inverse_price
            # self.money_for_pair[int(stock_num/2)] -= inverse_vol * self.inverse_price[stock_num]
            # self.inverse_price[stock_num] = 0

        # total_asset = (self.total_inverse[stock_num] * self.inverse_price[stock_num] + 
        #                 self.total_stock[stock_num] * current_price) 

        total_asset = (self.total_inverse[stock_num] * current_inverse_price +
                        self.total_stock[stock_num] * current_stock_price)

        return total_asset

    def backtesting(self, z_score_list, stock_price_list, inverse_price_list):

        total_asset = np.zeros((self.pairs_num, self.total_time))
        stock_a_vol_list = []
        inverse_a_vol_list = []
        stock_b_vol_list = []
        inverse_b_vol_list = []

        for t in range(self.total_time):
            for pair_num in range(self.pairs_num):
                stock_a_num = 2* pair_num
                stock_b_num = 2* pair_num + 1
                z_score = z_score_list[pair_num]

                # 매수매입 시그널
                stock_a, inverse_a = self.trade_decision(z_score[t], pair_num)
                stock_b = inverse_a
                inverse_b = stock_a

                # 매수매입 수량 결정
                stock_a_vol, inverse_a_vol = self.cal_trade_vol(stock_a_num, stock_a, inverse_a, stock_price_list[stock_a_num][t], inverse_price_list[stock_a_num][t])
                stock_b_vol, inverse_b_vol = self.cal_trade_vol(stock_b_num, stock_b, inverse_b, stock_price_list[stock_b_num][t], inverse_price_list[stock_b_num][t])
                stock_a_vol_list.append(stock_a_vol)
                inverse_a_vol_list.append(inverse_a_vol)
                stock_b_vol_list.append(stock_b_vol)
                inverse_b_vol_list.append(inverse_b_vol)

                # 투자 결과
                total_asset[pair_num][t] += self.cal_result(stock_a_num, stock_a_vol, inverse_a_vol, stock_price_list[stock_a_num][t], inverse_price_list[stock_a_num][t])
                total_asset[pair_num][t] += self.cal_result(stock_b_num, stock_b_vol, inverse_b_vol, stock_price_list[stock_b_num][t], inverse_price_list[stock_b_num][t])
                total_asset[pair_num][t] += self.money_for_pair[int(pair_num)]

        return total_asset, stock_a_vol_list, inverse_a_vol_list, stock_b_vol_list, inverse_b_vol_list

In [5]:
def get_pair(start_date, end_date):

    _, close_df, rtn_df, low_volume_etf = myFunc.preprocessing(price_df,etf_info, start_date, end_date)
    pc_rtn = myFunc.get_pca_return(rtn_df, pc_selecting_threshold)
    clusters_viz_list, clustered_series = myFunc.dbscan_clustering(close_df, pc_rtn, eps, min_samples, cluster_size_limit, cluster_member_counts)
    selected_pair, short_pair = myFunc.Pair_selection(close_df=close_df,
                                                rtn_df=rtn_df,
                                                low_volume_etf=low_volume_etf,
                                                clusters_viz_list=clusters_viz_list, 
                                                clustered_series=clustered_series, 
                                                inverse_threshold=inverse_threshold, 
                                                coint_pvalue_threshold=coint_pvalue_threshold, 
                                                hurst_threshold=hurst_threshold, 
                                                half_life_threshold=half_life_threshold, 
                                                mean_reverting_freq=mean_reverting_freq)

    return selected_pair, short_pair, close_df, rtn_df

def create_spread_function(pair_a, pair_b, start_date, end_date, alg='log'):

    def log_spread_func(pair_a, pair_b):
        
        spread = math.log(pair_b) - w_avg * math.log(pair_a)
        z_score = spread/w_std

        return (spread, z_score)

    def lr_spread_func(pair_a, pair_b):
        pair_a, pair_b = np.log(pair_a), np.log(pair_b)
        pair_a = pair_a * np.ones((1,1))
        poly = PolynomialFeatures(degree=best_degree)
        pair_a = poly.fit_transform(pair_a)

        spread = pair_b - model.predict(pair_a)
        z_score = spread / spread_std

        return (spread, z_score)

    target_a = np.log(pair_a[start_date:end_date])
    target_b = np.log(pair_b[start_date:end_date])

    # holding_period = end_date - start_date

    if alg == 'log':
        
        w_list = target_b / target_a
        w_avg = np.average(w_list)
        w_std = np.std(w_list)

        return log_spread_func
    
    elif alg == 'lr':

        min_cv_n = float('inf')
        best_degree = 0
        total_len = target_a.size

        permute_order = np.random.permutation(total_len)
        target_a = target_a[permute_order]
        target_b = target_b[permute_order]

        train_num = int(target_a.size/3*2)

        train_a = target_a[:train_num]
        train_b = target_b[:train_num]
        val_a = target_a[train_num:]
        val_b = target_b[train_num:]

        train_a = train_a.reshape(-1,1)
        val_a = val_a.reshape(-1,1)

        for degree in range(1,10,1):

            poly = PolynomialFeatures(degree=degree)
            poly_train_a = poly.fit_transform(train_a)
            poly_val_a = poly.fit_transform(val_a)

            model = linear_model.LassoCV(cv=5)
            model.fit(poly_train_a, train_b)
            
            mse = np.average((val_b - model.predict(poly_val_a))**2)

            if mse < min_cv_n:
                best_degree = degree
                min_cv_n = mse

        if best_degree == 0:
            print("error!")

        poly = PolynomialFeatures(degree= best_degree)
        poly_train_a = poly.fit_transform(train_a)
        model = linear_model.LassoCV(cv=5)
        model.fit(poly_train_a, train_b)

        b_pred = model.predict(poly_train_a)
        spread = train_b - b_pred
        spread_std = np.std(spread)

        return lr_spread_func

def gen_z_score_history(a, b, windows_width, spread_func_update_period):

    T = a.shape[0]
    z_score_list = np.zeros((T-windows_width))

    for t in range(T-windows_width):

        if t % spread_func_update_period==0:
            spread_func = create_spread_function(a,b,t,t+windows_width, 'lr')
        
        _, z_score = spread_func(a[t],b[t])
        z_score_list[t] = z_score

    return z_score_list

def get_total_transaction_cost(pair, trading_vol, short_list):
    
    a_roundtrip = len([i for i, x  in enumerate(trading_vol[0]) if x !=0]) / 2
    a_inv_roundtrip = len([i for i, x  in enumerate(trading_vol[1]) if x !=0]) / 2
    b_roundtrip = len([i for i, x  in enumerate(trading_vol[2]) if x !=0]) / 2
    b_inv_roundtrip = len([i for i, x  in enumerate(trading_vol[3]) if x !=0]) / 2

    a_holding_period = pd.Series([i for i, x  in enumerate(trading_vol[0]) if x !=0]).diff()[1:].sum()
    a_inv_holding_period = pd.Series([i for i, x  in enumerate(trading_vol[1]) if x !=0]).diff()[1:].sum()
    b_holding_period = pd.Series([i for i, x  in enumerate(trading_vol[2]) if x !=0]).diff()[1:].sum()
    b_inv_holding_period = pd.Series([i for i, x  in enumerate(trading_vol[3]) if x !=0]).diff()[1:].sum()

    a = pair[0]
    a_inv = short_list[a]
    b = pair[0]
    b_inv = short_list[b]

    def get_transaction_cost(holding_period, num_of_roundtrip, tic):
    
        bid_ask_spread = 0.1/100 * num_of_roundtrip
        operating_expenses = er_df[tic] * holding_period/252

        return bid_ask_spread + operating_expenses

    total_transaction_cost = get_transaction_cost(a_holding_period, a_roundtrip, a) 
    + get_transaction_cost(a_inv_holding_period, a_inv_roundtrip, a_inv)
    + get_transaction_cost(b_holding_period, b_roundtrip, b)
    + get_transaction_cost(b_inv_holding_period, b_inv_roundtrip, b_inv)

    return total_transaction_cost

def get_best_threshold(z_score_list, stock_price_list, inverse_price_list, pair, short_list, initial_money=1000000):
    stop_loss_candi = [1, 1.5, 2, 3]
    buy_z_candi = np.round(np.linspace(0, 2, 21), 1)
    sell_z_candi = np.round(np.linspace(0, 2, 21), 1)
    grid_search_result = {}
    for x in stop_loss_candi:
        stop_loss = x

        for y in buy_z_candi:
            buy_z = y

            for z in sell_z_candi:
                sell_z = z

                BT = Backtesting(z_score_list, total_money=initial_money, stop_loss=stop_loss, buy_z=buy_z, sell_z=sell_z)
                asset_per_pair, stock_a_vol_list, inverse_a_vol_list, stock_b_vol_list, inverse_b_vol_list = BT.backtesting(z_score_list, stock_price_list, inverse_price_list)
                total_asset = np.sum(asset_per_pair, axis=0)
                total_earning_ratio = total_asset[-1] / initial_money
                trading_vol = (stock_a_vol_list, inverse_a_vol_list, stock_b_vol_list, inverse_b_vol_list)
                total_earning_ratio_tr = total_earning_ratio - get_total_transaction_cost(pair, trading_vol, short_list)
                grid_search_result[(x,y,z)] = total_earning_ratio_tr

    best_param = max(grid_search_result, key=grid_search_result.get)
    best_earning_ratio = grid_search_result[best_param]

    print('best_earning_ratio is: ', best_earning_ratio)

    return best_param

# -----------------------------------------------------------------------------------------------

In [6]:
def get_sharpe_ratio(start_date, end_date, test_start_date, test_end_date):
    
    selected_pair, _, close_df, rtn_df = get_pair(start_date=start_date, end_date=end_date)

    selected_pair_final = []
    for i in range(len(selected_pair)):
        if len(selected_pair[i]) == 0:
            continue
        for j in range(len(selected_pair[i])):
            selected_pair_final.append(selected_pair[i][j])

    if len(selected_pair_final) == 0:
        
        return np.nan, np.nan, np.nan, np.nan, np.nan

    else: 
        # inverse etf list 뽑기
        short_list = {}
        for i in range(len(selected_pair_final)):
            short_list[selected_pair_final[i][0]] = rtn_df.corr()[selected_pair_final[i][0]].idxmin()
            short_list[selected_pair_final[i][1]] = rtn_df.corr()[selected_pair_final[i][1]].idxmin()

        # 모델 트레이닝을 위한 포메이션 기간동안의 페어 가격 추출
        training_set_price = pd.DataFrame(index=close_df.index)
        for i in range(len(selected_pair_final)):
            training_set_price = pd.concat([training_set_price, close_df[list(selected_pair_final[i])]], axis=1)

        training_set_inverse_price = close_df[[short_list[x] for x in training_set_price.columns]]

        # 페어별 z-score 뽑기
        for i in range(len(selected_pair_final)):

            print("pairs = (" + str(selected_pair_final[i]) + ")\n")

            a = training_set_price.iloc[:,2*i].to_numpy()
            b = training_set_price.iloc[:,2*i+1].to_numpy()

            spread_func = create_spread_function(a, b, 0, -1, alg='lr')
            x = np.arange(len(a))
            z_score_history = np.zeros((len(a)))

            for j in range(len(a)):
                (spread, z_score_history[j]) = spread_func(a[j], b[j])

        # 패어별로 z-score time series 뽑기
        z_score_history_list = []

        for i in range(len(selected_pair_final)):
            
            price_history = training_set_price.to_numpy()[:,2*i:2*i+2].T
            z_score_history = gen_z_score_history(price_history[0], price_history[1], 20, 20).reshape(1, -1)
            z_score_history_list.append(z_score_history)

        
        z_score_history_list = np.array(z_score_history_list).reshape(len(selected_pair_final), -1)
        z_score_list = z_score_history_list[:len(selected_pair_final),:]

        stock_price_list = training_set_price.T.to_numpy()[:,20:]
        inverse_price_list = training_set_inverse_price.T.to_numpy()[:,20:]

        # best threshold 찾기
        best_threshold = {}
        for i in range(len(selected_pair_final)):
            z_score_list_arb = z_score_list[i].reshape(1,-1)
            stock_price_list_arb = stock_price_list[2*i:2*(i+1)]
            inverse_price_list_arb = inverse_price_list[2*i:2*(i+1)]
            best_threshold[selected_pair_final[i]] = get_best_threshold(z_score_list_arb, stock_price_list_arb, inverse_price_list_arb, selected_pair_final[i], short_list)

        # 테스트 기간 데이터 로딩
        _, close_df_test, rtn_df_test, low_volume_etf_test = myFunc.preprocessing(price_df, etf_info, test_start_date, test_end_date)

        test_set_price = pd.DataFrame(index=close_df_test.index)
        for i in range(len(selected_pair_final)):
            test_set_price = pd.concat([test_set_price, close_df_test[list(selected_pair_final[i])]], axis=1)

        test_set_inverse_price = close_df_test[[short_list[x] for x in test_set_price.columns]]

        z_score_history_test_list = []
        for i in range(len(selected_pair_final)):

            print("pairs = (" + str(selected_pair_final[i]) + ")\n")

            a = test_set_price.iloc[:,2*i].to_numpy()
            b = test_set_price.iloc[:,2*i+1].to_numpy()

            spread_func = create_spread_function(a, b, 0, -1, alg='lr')
            z_score_history_test = np.zeros((a.shape[0]))

            for j in range(a.shape[0]):
                (spread, z_score_history_test[j]) = spread_func(a[j], b[j])

            z_score_history_test_list.append(z_score_history_test)

        z_score_list_test = np.array(z_score_history_test_list).reshape(len(selected_pair_final), -1)

        stock_price_list_test = test_set_price.T.to_numpy()
        inverse_price_list_test = test_set_inverse_price.T.to_numpy()

            # 트레이딩 결과 
        asset_per_pair_dic = {}        
        total_asset_dic = {}
        threshold_dic = {}
        trading_vol_dic = {}
        transaction_cost_dic = {}        

        for i in range(len(selected_pair_final)):

            initial_money = 1000000
            stop_loss = best_threshold[selected_pair_final[i]][0]
            buy_z = best_threshold[selected_pair_final[i]][1]
            sell_z = best_threshold[selected_pair_final[i]][2]

            z_score_test_arb = z_score_list_test[i].reshape(1,-1)
            stock_price_test_arb = stock_price_list_test[2*i:2*(i+1)]
            inverse_price_test_arb = inverse_price_list_test[2*i:2*(i+1)]

            BT = Backtesting(z_score_test_arb, total_money=initial_money, stop_loss=stop_loss, buy_z=buy_z, sell_z=sell_z)
            asset_per_pair_test, stock_a_vol_list_test, inverse_a_vol_list_test, stock_b_vol_list_test, inverse_b_vol_list_test = BT.backtesting(z_score_test_arb, stock_price_test_arb, inverse_price_test_arb)
            trading_vol = (stock_a_vol_list_test, inverse_a_vol_list_test, stock_b_vol_list_test, inverse_b_vol_list_test)
            total_asset_test = np.sum(asset_per_pair_test, axis=0)
            transaction_cost = get_total_transaction_cost(selected_pair_final[i], trading_vol, short_list)

            asset_per_pair_dic[selected_pair_final[i]] = asset_per_pair_test
            total_asset_dic[selected_pair_final[i]] = total_asset_test
            trading_vol_dic[selected_pair_final[i]] = trading_vol
            threshold_dic[selected_pair_final[i]] = [stop_loss, buy_z, sell_z]
            transaction_cost_dic[selected_pair_final[i]] = transaction_cost

        return asset_per_pair_dic, total_asset_dic, threshold_dic, trading_vol_dic, transaction_cost_dic

In [7]:
monthly = pd.date_range('2009-12-31', '2022-1-1', freq='MS')

In [8]:
period_candi = [(3,1),(6,1),(12,1),(24,12)]

In [9]:
formation_train = {}
for cnd in period_candi:

    train_window_width = cnd[0]
    test_window_width = cnd[1]

    train_period = []
    test_period = []
    for i in range(len(monthly)- train_window_width-test_window_width):
        train_start_date = monthly[i]
        train_end_date = monthly[i+train_window_width]
        test_end_date = monthly[i+train_window_width+test_window_width]

        if test_end_date.year < 2017:

            train_period.append((train_start_date.strftime("%Y-%m-%d"), train_end_date.strftime("%Y-%m-%d"), test_end_date.strftime("%Y-%m-%d")))
        
        elif test_end_date.year >=2017:

            test_period.append((train_start_date.strftime("%Y-%m-%d"), train_end_date.strftime("%Y-%m-%d"), test_end_date.strftime("%Y-%m-%d")))
            
    formation_train[cnd] = (train_period, test_period)

In [1]:
# trading 기간 2019-01-01 ~ 2021-12-31
performance_test = formation_train[(24,12)][1][60:72]
performance_test

NameError: name 'formation_train' is not defined

In [60]:
# pair_reg = {}

# for idx, x in enumerate(performance_test):
#     pair_reg[idx] = get_pair(x[0], x[1])

In [58]:
# import pickle
# with open('2020~2021_pair_reg.pkl','wb') as f:
#     pickle.dump(pair_reg, f)

In [61]:
result = {}
for i in range(len(performance_test)):

    try:
        print('=================== {} 번째 시작 ======================'.format(i))
        start_date = performance_test[i][0]
        end_date = performance_test[i][1]
        test_start_date = performance_test[i][1]
        test_end_date = performance_test[i][2]
        asset_per_pair_dic, total_asset_dic, threshold_dic, trading_vol_dic, transaction_cost_dic = get_sharpe_ratio(start_date, end_date, test_start_date, test_end_date)

        result['trial_{}'.format(i+1)] = (asset_per_pair_dic, total_asset_dic, threshold_dic, trading_vol_dic, transaction_cost_dic)

    except KeyboardInterrupt as e:

        print(e)
        pass



100%|██████████| 1086/1086 [00:27<00:00, 39.48it/s]


Clusters discovered: 15
Clusters formed: 15
Pairs to evaluate: 3304
final_clusters index :  [14, 13, 7, 6, 11, 8, 5, 1, 10, 9, 12, 4, 0, 3, 2]


0it [00:00, ?it/s]/15 [00:00<?, ?it/s]
0it [00:00, ?it/s]/15 [00:04<00:59,  4.21s/it]
0it [00:00, ?it/s]/15 [00:08<00:53,  4.14s/it]
100%|██████████| 6/6 [00:00<00:00, 116.87it/s]
0it [00:00, ?it/s]/15 [00:21<01:05,  5.93s/it]
0it [00:00, ?it/s]/15 [00:25<00:53,  5.34s/it]
100%|██████████| 3/3 [00:00<00:00, 103.41it/s]
100%|██████████| 10/10 [00:00<00:00, 106.66it/s]
100%|██████████| 1/1 [00:00<00:00, 123.14it/s]
100%|██████████| 6/6 [00:00<00:00, 29.53it/s]]
0it [00:00, ?it/s]0/15 [01:07<00:41,  8.38s/it]
100%|██████████| 6/6 [00:00<00:00, 135.83it/s]]
0it [00:00, ?it/s]2/15 [01:25<00:27,  9.04s/it]
100%|██████████| 21/21 [00:00<00:00, 129.63it/s]
100%|██████████| 276/276 [00:01<00:00, 151.26it/s]
100%|██████████| 15/15 [03:23<00:00, 13.59s/it]


pairs = (('SPDN', 'SH'))

pairs = (('SPY', 'IVV'))

pairs = (('VONE', 'VV'))

pairs = (('SPLG', 'SCHX'))

best_earning_ratio is:  1.0
best_earning_ratio is:  1.0
best_earning_ratio is:  1.0
best_earning_ratio is:  1.0202650254796346


100%|██████████| 1338/1338 [00:23<00:00, 56.46it/s]


pairs = (('SPDN', 'SH'))

pairs = (('SPY', 'IVV'))

pairs = (('VONE', 'VV'))

pairs = (('SPLG', 'SCHX'))



100%|██████████| 1099/1099 [00:26<00:00, 41.40it/s]


Clusters discovered: 16
Clusters formed: 16
Pairs to evaluate: 3962
final_clusters index :  [11, 15, 13, 7, 6, 14, 2, 8, 5, 1, 10, 9, 12, 0, 4, 3]


0it [00:00, ?it/s]/16 [00:00<?, ?it/s]
0it [00:00, ?it/s]/16 [00:03<00:48,  3.24s/it]
0it [00:00, ?it/s]/16 [00:07<00:53,  3.84s/it]
0it [00:00, ?it/s]/16 [00:11<00:52,  4.01s/it]
100%|██████████| 6/6 [00:00<00:00, 92.71it/s]]
0it [00:00, ?it/s]/16 [00:25<01:04,  5.89s/it]
0it [00:00, ?it/s]/16 [00:29<00:52,  5.29s/it]
0it [00:00, ?it/s]/16 [00:33<00:44,  4.99s/it]
100%|██████████| 3/3 [00:00<00:00, 103.33it/s]
100%|██████████| 10/10 [00:00<00:00, 132.79it/s]
100%|██████████| 3/3 [00:00<00:00, 133.98it/s]]
100%|██████████| 6/6 [00:00<00:00, 143.91it/s]]
0it [00:00, ?it/s]2/16 [01:18<00:35,  8.88s/it]
0it [00:00, ?it/s]3/16 [01:25<00:24,  8.19s/it]
100%|██████████| 21/21 [00:00<00:00, 138.66it/s]
100%|██████████| 378/378 [00:02<00:00, 137.86it/s]
100%|██████████| 16/16 [03:40<00:00, 13.76s/it]


pairs = (('SPDN', 'SH'))

pairs = (('SPY', 'IVV'))

pairs = (('MGC', 'FTEC'))

pairs = (('SPLG', 'SCHX'))

pairs = (('QQQ', 'VOO'))

pairs = (('VONE', 'VV'))

best_earning_ratio is:  1.0
best_earning_ratio is:  1.0
best_earning_ratio is:  1.1100443571311107
best_earning_ratio is:  1.040575108295531
best_earning_ratio is:  1.0
best_earning_ratio is:  1.0


100%|██████████| 1347/1347 [00:24<00:00, 54.48it/s]


pairs = (('SPDN', 'SH'))

pairs = (('SPY', 'IVV'))

pairs = (('MGC', 'FTEC'))

pairs = (('SPLG', 'SCHX'))

pairs = (('QQQ', 'VOO'))

pairs = (('VONE', 'VV'))



100%|██████████| 1110/1110 [00:25<00:00, 43.38it/s]


Clusters discovered: 16
Clusters formed: 16
Pairs to evaluate: 3412
final_clusters index :  [12, 11, 14, 7, 6, 15, 5, 1, 10, 9, 8, 13, 4, 0, 3, 2]


0it [00:00, ?it/s]/16 [00:00<?, ?it/s]
0it [00:00, ?it/s]/16 [00:04<01:03,  4.24s/it]
0it [00:00, ?it/s]/16 [00:08<01:00,  4.31s/it]
0it [00:00, ?it/s]/16 [00:12<00:55,  4.28s/it]
100%|██████████| 6/6 [00:00<00:00, 59.97it/s]]
0it [00:00, ?it/s]/16 [00:25<01:04,  5.86s/it]
100%|██████████| 3/3 [00:00<00:00, 87.73it/s]]
100%|██████████| 10/10 [00:00<00:00, 62.03it/s]
100%|██████████| 1/1 [00:00<00:00, 98.74it/s]]
100%|██████████| 6/6 [00:00<00:00, 137.83it/s]
0it [00:00, ?it/s]0/16 [01:12<00:57,  9.51s/it]
0it [00:00, ?it/s]1/16 [01:17<00:41,  8.25s/it]
100%|██████████| 21/21 [00:00<00:00, 118.20it/s]
0it [00:00, ?it/s]3/16 [01:45<00:35, 11.91s/it]
100%|██████████| 210/210 [00:01<00:00, 132.08it/s]
100%|██████████| 21/21 [00:00<00:00, 122.72it/s]
100%|██████████| 16/16 [03:54<00:00, 14.66s/it]


pairs = (('SPY', 'IVV'))

pairs = (('SPLG', 'SCHX'))

pairs = (('VONE', 'VV'))

best_earning_ratio is:  1.0
best_earning_ratio is:  1.0499496599766873
best_earning_ratio is:  1.0731500229247564


100%|██████████| 1363/1363 [00:24<00:00, 54.89it/s]


pairs = (('SPY', 'IVV'))

pairs = (('SPLG', 'SCHX'))

pairs = (('VONE', 'VV'))



100%|██████████| 1115/1115 [00:26<00:00, 41.78it/s]


Clusters discovered: 12
Clusters formed: 11
Pairs to evaluate: 1702
final_clusters index :  [4, 11, 5, 10, 1, 9, 6, 8, 2, 3, 7]


100%|██████████| 6/6 [00:00<00:00, 124.13it/s]
0it [00:00, ?it/s]/11 [00:08<01:25,  8.54s/it]
0it [00:00, ?it/s]/11 [00:14<01:01,  6.83s/it]
100%|██████████| 10/10 [00:00<00:00, 143.45it/s]
0it [00:00, ?it/s]/11 [00:32<01:01,  8.79s/it]
0it [00:00, ?it/s]/11 [00:40<00:49,  8.29s/it]
0it [00:00, ?it/s]/11 [00:49<00:43,  8.68s/it]
0it [00:00, ?it/s]/11 [00:53<00:28,  7.23s/it]
100%|██████████| 55/55 [00:00<00:00, 100.58it/s]
100%|██████████| 10/10 [00:00<00:00, 102.84it/s]
100%|██████████| 6/6 [00:00<00:00, 95.96it/s]t]
100%|██████████| 11/11 [02:18<00:00, 12.62s/it]




100%|██████████| 1119/1119 [00:30<00:00, 36.85it/s]


Clusters discovered: 11
Clusters formed: 10
Pairs to evaluate: 1892
final_clusters index :  [4, 10, 5, 6, 1, 8, 9, 2, 3, 7]


100%|██████████| 6/6 [00:00<00:00, 116.42it/s]
0it [00:00, ?it/s]/10 [00:08<01:18,  8.71s/it]
0it [00:00, ?it/s]/10 [00:11<00:43,  5.44s/it]
0it [00:00, ?it/s]/10 [00:18<00:40,  5.82s/it]
0it [00:00, ?it/s]/10 [00:22<00:31,  5.22s/it]
0it [00:00, ?it/s]/10 [00:30<00:30,  6.09s/it]
0it [00:00, ?it/s]/10 [00:39<00:29,  7.35s/it]
100%|██████████| 120/120 [00:00<00:00, 121.06it/s]
100%|██████████| 15/15 [00:00<00:00, 127.36it/s]
100%|██████████| 6/6 [00:00<00:00, 81.27it/s]]
100%|██████████| 10/10 [02:18<00:00, 13.90s/it]




100%|██████████| 1122/1122 [00:33<00:00, 33.38it/s]


Clusters discovered: 12
Clusters formed: 11
Pairs to evaluate: 1578
final_clusters index :  [4, 11, 5, 6, 1, 10, 8, 9, 7, 2, 3]


100%|██████████| 6/6 [00:00<00:00, 116.38it/s]
0it [00:00, ?it/s]/11 [00:10<01:41, 10.16s/it]
0it [00:00, ?it/s]/11 [00:14<00:59,  6.59s/it]
0it [00:00, ?it/s]/11 [00:21<00:56,  7.12s/it]
0it [00:00, ?it/s]/11 [00:26<00:43,  6.16s/it]
0it [00:00, ?it/s]/11 [00:34<00:39,  6.64s/it]
100%|██████████| 6/6 [00:00<00:00, 119.94it/s]
0it [00:00, ?it/s]/11 [00:53<00:33,  8.49s/it]
0it [00:00, ?it/s]/11 [01:03<00:26,  8.79s/it]
100%|██████████| 120/120 [00:00<00:00, 144.71it/s]
100%|██████████| 15/15 [00:00<00:00, 104.86it/s]
100%|██████████| 11/11 [02:23<00:00, 13.09s/it]




100%|██████████| 1130/1130 [00:31<00:00, 35.33it/s]


Clusters discovered: 11
Clusters formed: 10
Pairs to evaluate: 1682
final_clusters index :  [10, 9, 4, 5, 1, 7, 8, 6, 2, 3]


0it [00:00, ?it/s]/10 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 69.76it/s]]
100%|██████████| 6/6 [00:00<00:00, 136.85it/s]
0it [00:00, ?it/s]/10 [00:19<00:53,  7.70s/it]
0it [00:00, ?it/s]/10 [00:24<00:38,  6.48s/it]
100%|██████████| 6/6 [00:00<00:00, 104.81it/s]
0it [00:00, ?it/s]/10 [00:48<00:39,  9.89s/it]
0it [00:00, ?it/s]/10 [00:59<00:30, 10.22s/it]
100%|██████████| 136/136 [00:01<00:00, 111.06it/s]
100%|██████████| 15/15 [00:00<00:00, 93.53it/s]
100%|██████████| 10/10 [02:35<00:00, 15.52s/it]




100%|██████████| 1136/1136 [00:33<00:00, 33.85it/s]


Clusters discovered: 12
Clusters formed: 11
Pairs to evaluate: 1640
final_clusters index :  [11, 9, 10, 4, 5, 1, 7, 8, 6, 2, 3]


0it [00:00, ?it/s]/11 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 114.26it/s]
0it [00:00, ?it/s]/11 [00:07<00:39,  4.44s/it]
100%|██████████| 6/6 [00:00<00:00, 131.24it/s]
0it [00:00, ?it/s]/11 [00:21<00:45,  6.45s/it]
0it [00:00, ?it/s]/11 [00:26<00:36,  6.01s/it]
100%|██████████| 6/6 [00:00<00:00, 135.38it/s]
0it [00:00, ?it/s]/11 [00:49<00:35,  8.94s/it]
0it [00:00, ?it/s]/11 [01:00<00:28,  9.60s/it]
100%|██████████| 136/136 [00:01<00:00, 93.59it/s] 
100%|██████████| 15/15 [00:00<00:00, 86.32it/s]
100%|██████████| 11/11 [02:33<00:00, 13.91s/it]




100%|██████████| 1151/1151 [00:37<00:00, 30.55it/s]


Clusters discovered: 12
Clusters formed: 11
Pairs to evaluate: 1638
final_clusters index :  [11, 9, 10, 4, 5, 1, 7, 8, 6, 2, 3]


0it [00:00, ?it/s]/11 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 77.43it/s]]
0it [00:00, ?it/s]/11 [00:08<00:41,  4.56s/it]
100%|██████████| 6/6 [00:00<00:00, 123.08it/s]
0it [00:00, ?it/s]/11 [00:20<00:43,  6.16s/it]
0it [00:00, ?it/s]/11 [00:25<00:34,  5.75s/it]
100%|██████████| 6/6 [00:00<00:00, 118.67it/s]
0it [00:00, ?it/s]/11 [00:54<00:42, 10.62s/it]
0it [00:00, ?it/s]/11 [01:04<00:31, 10.64s/it]
100%|██████████| 136/136 [00:02<00:00, 51.92it/s]
100%|██████████| 15/15 [00:00<00:00, 125.13it/s]
100%|██████████| 11/11 [02:48<00:00, 15.28s/it]




100%|██████████| 1165/1165 [00:41<00:00, 27.74it/s]


Clusters discovered: 12
Clusters formed: 11
Pairs to evaluate: 1766
final_clusters index :  [11, 10, 5, 2, 6, 1, 8, 9, 7, 3, 4]


0it [00:00, ?it/s]/11 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 116.35it/s]
100%|██████████| 6/6 [00:00<00:00, 138.35it/s]
0it [00:00, ?it/s]/11 [00:24<01:16,  9.62s/it]
0it [00:00, ?it/s]/11 [00:32<01:02,  8.98s/it]
0it [00:00, ?it/s]/11 [00:36<00:44,  7.42s/it]
100%|██████████| 6/6 [00:00<00:00, 30.17it/s]]
0it [00:00, ?it/s]/11 [01:06<00:47, 11.96s/it]
0it [00:00, ?it/s]/11 [01:18<00:35, 11.82s/it]
100%|██████████| 136/136 [00:02<00:00, 62.25it/s]
100%|██████████| 15/15 [00:00<00:00, 113.99it/s]
100%|██████████| 11/11 [03:21<00:00, 18.28s/it]




100%|██████████| 1184/1184 [00:46<00:00, 25.57it/s]


Clusters discovered: 11
Clusters formed: 10
Pairs to evaluate: 1772
final_clusters index :  [10, 5, 6, 2, 8, 1, 9, 7, 3, 4]


0it [00:00, ?it/s]/10 [00:00<?, ?it/s]
100%|██████████| 6/6 [00:00<00:00, 89.94it/s]]
0it [00:00, ?it/s]/10 [00:12<00:57,  7.19s/it]
0it [00:00, ?it/s]/10 [00:20<00:52,  7.46s/it]
100%|██████████| 6/6 [00:00<00:00, 134.23it/s]
0it [00:00, ?it/s]/10 [00:41<00:47,  9.52s/it]
0it [00:00, ?it/s]/10 [00:52<00:39,  9.92s/it]
0it [00:00, ?it/s]/10 [01:02<00:30, 10.18s/it]
100%|██████████| 136/136 [00:01<00:00, 68.02it/s]
100%|██████████| 15/15 [00:00<00:00, 94.29it/s]
100%|██████████| 10/10 [02:49<00:00, 16.90s/it]




100%|██████████| 1193/1193 [00:42<00:00, 27.90it/s]


Clusters discovered: 8
Clusters formed: 7
Pairs to evaluate: 1356
final_clusters index :  [7, 6, 4, 1, 5, 2, 3]


0it [00:00, ?it/s]/7 [00:00<?, ?it/s]
0it [00:00, ?it/s]/7 [00:01<00:08,  1.41s/it]
100%|██████████| 6/6 [00:00<00:00, 125.57it/s]
0it [00:00, ?it/s]/7 [00:14<00:25,  6.30s/it]
0it [00:00, ?it/s]/7 [00:30<00:29,  9.83s/it]
100%|██████████| 136/136 [00:01<00:00, 127.61it/s]
100%|██████████| 15/15 [00:00<00:00, 83.02it/s]
100%|██████████| 7/7 [02:09<00:00, 18.56s/it]


In [62]:
# 돌린거 저장
import pickle
with open('1901~1912_pair_team_reg.pkl','wb') as f:
    pickle.dump(result, f)

In [16]:
with open('1901~1912_pair_select.pkl','rb') as f:
    result = pickle.load(f)

In [17]:
result[]

{'trial_1': ({('IJR',
    'SLY'): array([[1000000., 1000000., 1000000., 1000000., 1000000., 1000000.,
           1000000., 1000000., 1000000., 1000000., 1000000., 1000000.,
           1000000., 1000000., 1000000., 1000000., 1000000., 1000000.,
           1000000., 1000000., 1000000.]]),
   ('VONE',
    'VV'): array([[1000000., 1000000., 1000000., 1000000., 1000000., 1000000.,
           1000000., 1000000., 1000000., 1000000., 1000000., 1000000.,
           1000000., 1000000., 1000000., 1000000., 1000000., 1000000.,
           1000000., 1000000., 1000000.]]),
   ('SPTM',
    'SCHX'): array([[1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        ,  995172.12072754,
            995172.12072754,  995172.12072754,  995172.12072754,
            995172.12072754, 1011092.53103638,  999507.12965393,
            999847.3151