In [1]:
import numpy as np
import pandas as pd
import import_ipynb
import pair_selection_DBSCAN_funclist_for_training as myFunc
import matplotlib.pyplot as plt

import math
import sklearn
from datetime import datetime

from sklearn import datasets, linear_model
from tqdm import tqdm
from sklearn.preprocessing import PolynomialFeatures

import warnings
warnings.filterwarnings('ignore')

importing Jupyter notebook from pair_selection_DBSCAN_funclist_for_training.ipynb


In [2]:
price_df = pd.read_csv('./Data/us_etf_price.csv')
etf_info = pd.read_csv('./Data/etfs_details_equity.csv')
er_df = etf_info[['Symbol','ER']]
er_df = er_df.set_index('Symbol')
er_df = er_df['ER'].apply(lambda x: x.split('%')[0]).astype(float)

In [3]:

pc_selecting_threshold = 0.9
eps = 1.8
min_samples = 4
cluster_size_limit = 100
cluster_member_counts = 100
inverse_threshold=-0.99
coint_pvalue_threshold=0.01
hurst_threshold=0.5
half_life_threshold=10
mean_reverting_freq=12

In [4]:
class Backtesting():

    def __init__(self, z_score_list, stop_loss, buy_z, sell_z, total_money=1000000):
        
        self.pairs_num = z_score_list.shape[0]
        self.total_time = z_score_list.shape[1]
        self.stop_loss = stop_loss
        self.buy_z = buy_z
        self.sell_z = sell_z
        
        self.inverse_price = np.zeros((2*self.pairs_num))
        self.total_stock = np.zeros((2*self.pairs_num))
        self.total_inverse = np.zeros((2*self.pairs_num))
        self.money_for_pair = int(total_money/self.pairs_num) * np.ones((self.pairs_num))
    
    def trade_decision(self, z_score, pairs_num):
        
        stop_loss = self.stop_loss
        buy_z = self.buy_z
        sell_z = self.sell_z
        total_stock = self.total_stock[2*(pairs_num)]
        total_inverse = self.total_inverse[2*(pairs_num)]
        stock, inverse = 0, 0 

        # 스탑로스 컷에 걸릴 때는 다 팔기
        if z_score >= stop_loss or z_score <= -1 * stop_loss:
            stock, inverse = -2, -2
        
        # buy threshold 이상이면서 해당 etf 보유하고 있지 않으면 매수
        elif z_score >= buy_z and total_stock==0:
            stock, inverse = 1, 0

        # sell threshold 이하이면서, 해당 etf 보유하고 있으면 etf 팔기
        elif z_score <= sell_z and total_stock > 0:
            stock, inverse = -1, 0

        # -buy threshold 이하로 떨어지는데, 인버스 etf 보유하고 있지 않을 땐 inverse etf 사기
        elif z_score <= -1 * buy_z and total_inverse==0:
            stock, inverse = 0, 1
        
        # -sell threshold 이상으로 올라가는데, 인버스 etf를 보유하고 있으면 inverse etf 팔기
        elif z_score >= -1 * sell_z and total_inverse > 0:
            stock, inverse = 0, -1 

        return stock, inverse
    
    def cal_trade_vol(self, stock_num, stock_signal, inv_signal, current_stock_price, current_inverse_price):

        trade_stock_vol = 0
        trade_inverse_vol = 0

        money_for_stock = self.money_for_pair[int(stock_num/2)] / 2

        if stock_signal < 0:
            trade_stock_vol = -1 * self.total_stock[stock_num]

        elif stock_signal > 0:
            trade_stock_vol = int(money_for_stock/current_stock_price)

        if inv_signal < 0:
            trade_inverse_vol = -1 * self.total_inverse[stock_num]
        
        elif inv_signal > 0:
            trade_inverse_vol = int(money_for_stock/current_inverse_price)

        return trade_stock_vol, trade_inverse_vol


    def cal_result(self, stock_num, stock_vol, inverse_vol, current_stock_price, current_inverse_price):

        # if self.total_inverse[stock_num] > 0:
        #     change_ratio = (current_price - prev_price) / prev_price
        #     self.inverse_price[stock_num] = (1 - change_ratio) * self.inverse_price[stock_num]


        if stock_vol > 0:
            self.total_stock[stock_num] += stock_vol
            self.money_for_pair[int(stock_num/2)] -= stock_vol * current_stock_price

        elif stock_vol < 0:
            self.total_stock[stock_num] += stock_vol
            self.money_for_pair[int(stock_num/2)] -= stock_vol * current_stock_price
        
            # Buy the inverse
        if inverse_vol > 0:
            self.total_inverse[stock_num] += inverse_vol
            self.money_for_pair[int(stock_num/2)] -= inverse_vol * current_inverse_price
            # self.inverse_price[stock_num] = current_price
        
        # Sell the inverse
        if inverse_vol < 0:
            self.total_inverse[stock_num] += inverse_vol
            self.money_for_pair[int(stock_num/2)] -= inverse_vol * current_inverse_price
            # self.money_for_pair[int(stock_num/2)] -= inverse_vol * self.inverse_price[stock_num]
            # self.inverse_price[stock_num] = 0

        # total_asset = (self.total_inverse[stock_num] * self.inverse_price[stock_num] + 
        #                 self.total_stock[stock_num] * current_price) 

        total_asset = (self.total_inverse[stock_num] * current_inverse_price +
                        self.total_stock[stock_num] * current_stock_price)

        return total_asset

    def backtesting(self, z_score_list, stock_price_list, inverse_price_list):

        total_asset = np.zeros((self.pairs_num, self.total_time))
        stock_a_vol_list = []
        inverse_a_vol_list = []
        stock_b_vol_list = []
        inverse_b_vol_list = []

        for t in range(self.total_time):
            for pair_num in range(self.pairs_num):
                stock_a_num = 2* pair_num
                stock_b_num = 2* pair_num + 1
                z_score = z_score_list[pair_num]

                # 매수매입 시그널
                stock_a, inverse_a = self.trade_decision(z_score[t], pair_num)
                stock_b = inverse_a
                inverse_b = stock_a

                # 매수매입 수량 결정
                stock_a_vol, inverse_a_vol = self.cal_trade_vol(stock_a_num, stock_a, inverse_a, stock_price_list[stock_a_num][t], inverse_price_list[stock_a_num][t])
                stock_b_vol, inverse_b_vol = self.cal_trade_vol(stock_b_num, stock_b, inverse_b, stock_price_list[stock_b_num][t], inverse_price_list[stock_b_num][t])
                stock_a_vol_list.append(stock_a_vol)
                inverse_a_vol_list.append(inverse_a_vol)
                stock_b_vol_list.append(stock_b_vol)
                inverse_b_vol_list.append(inverse_b_vol)

                # 투자 결과
                total_asset[pair_num][t] += self.cal_result(stock_a_num, stock_a_vol, inverse_a_vol, stock_price_list[stock_a_num][t], inverse_price_list[stock_a_num][t])
                total_asset[pair_num][t] += self.cal_result(stock_b_num, stock_b_vol, inverse_b_vol, stock_price_list[stock_b_num][t], inverse_price_list[stock_b_num][t])
                total_asset[pair_num][t] += self.money_for_pair[int(pair_num)]

        return total_asset, stock_a_vol_list, inverse_a_vol_list, stock_b_vol_list, inverse_b_vol_list

In [5]:
def get_pair(start_date, end_date):

    _, close_df, rtn_df, low_volume_etf = myFunc.preprocessing(price_df,etf_info, start_date, end_date)
    pc_rtn = myFunc.get_pca_return(rtn_df, pc_selecting_threshold)
    clusters_viz_list, clustered_series = myFunc.dbscan_clustering(close_df, pc_rtn, eps, min_samples, cluster_size_limit, cluster_member_counts)
    selected_pair, short_pair = myFunc.Pair_selection(close_df=close_df,
                                                rtn_df=rtn_df,
                                                low_volume_etf=low_volume_etf,
                                                clusters_viz_list=clusters_viz_list, 
                                                clustered_series=clustered_series, 
                                                inverse_threshold=inverse_threshold, 
                                                coint_pvalue_threshold=coint_pvalue_threshold, 
                                                hurst_threshold=hurst_threshold, 
                                                half_life_threshold=half_life_threshold, 
                                                mean_reverting_freq=mean_reverting_freq)

    return selected_pair, short_pair, close_df, rtn_df

def create_spread_function(pair_a, pair_b, start_date, end_date, alg='log'):

    def log_spread_func(pair_a, pair_b):
        
        spread = math.log(pair_b) - w_avg * math.log(pair_a)
        z_score = spread/w_std

        return (spread, z_score)

    def lr_spread_func(pair_a, pair_b):
        pair_a, pair_b = np.log(pair_a), np.log(pair_b)
        pair_a = pair_a * np.ones((1,1))
        poly = PolynomialFeatures(degree=best_degree)
        pair_a = poly.fit_transform(pair_a)

        spread = pair_b - model.predict(pair_a)
        z_score = spread / spread_std

        return (spread, z_score)

    target_a = np.log(pair_a[start_date:end_date])
    target_b = np.log(pair_b[start_date:end_date])

    # holding_period = end_date - start_date

    if alg == 'log':
        
        w_list = target_b / target_a
        w_avg = np.average(w_list)
        w_std = np.std(w_list)

        return log_spread_func
    
    elif alg == 'lr':

        min_cv_n = float('inf')
        best_degree = 0
        total_len = target_a.size

        permute_order = np.random.permutation(total_len)
        target_a = target_a[permute_order]
        target_b = target_b[permute_order]

        train_num = int(target_a.size/3*2)

        train_a = target_a[:train_num]
        train_b = target_b[:train_num]
        val_a = target_a[train_num:]
        val_b = target_b[train_num:]

        train_a = train_a.reshape(-1,1)
        val_a = val_a.reshape(-1,1)

        for degree in range(1,10,1):

            poly = PolynomialFeatures(degree=degree)
            poly_train_a = poly.fit_transform(train_a)
            poly_val_a = poly.fit_transform(val_a)

            model = linear_model.LassoCV(cv=5)
            model.fit(poly_train_a, train_b)
            
            mse = np.average((val_b - model.predict(poly_val_a))**2)

            if mse < min_cv_n:
                best_degree = degree
                min_cv_n = mse

        if best_degree == 0:
            print("error!")

        poly = PolynomialFeatures(degree= best_degree)
        poly_train_a = poly.fit_transform(train_a)
        model = linear_model.LassoCV(cv=5)
        model.fit(poly_train_a, train_b)

        b_pred = model.predict(poly_train_a)
        spread = train_b - b_pred
        spread_std = np.std(spread)

        return lr_spread_func

def gen_z_score_history(a, b, windows_width, spread_func_update_period):

    T = a.shape[0]
    z_score_list = np.zeros((T-windows_width))

    for t in range(T-windows_width):

        if t % spread_func_update_period==0:
            spread_func = create_spread_function(a,b,t,t+windows_width, 'lr')
        
        _, z_score = spread_func(a[t],b[t])
        z_score_list[t] = z_score

    return z_score_list

def get_total_transaction_cost(pair, trading_vol, short_list):
    
    a_roundtrip = len([i for i, x  in enumerate(trading_vol[0]) if x !=0]) / 2
    a_inv_roundtrip = len([i for i, x  in enumerate(trading_vol[1]) if x !=0]) / 2
    b_roundtrip = len([i for i, x  in enumerate(trading_vol[2]) if x !=0]) / 2
    b_inv_roundtrip = len([i for i, x  in enumerate(trading_vol[3]) if x !=0]) / 2

    a_holding_period = pd.Series([i for i, x  in enumerate(trading_vol[0]) if x !=0]).diff()[1:].sum()
    a_inv_holding_period = pd.Series([i for i, x  in enumerate(trading_vol[1]) if x !=0]).diff()[1:].sum()
    b_holding_period = pd.Series([i for i, x  in enumerate(trading_vol[2]) if x !=0]).diff()[1:].sum()
    b_inv_holding_period = pd.Series([i for i, x  in enumerate(trading_vol[3]) if x !=0]).diff()[1:].sum()

    a = pair[0]
    a_inv = short_list[a]
    b = pair[0]
    b_inv = short_list[b]

    def get_transaction_cost(holding_period, num_of_roundtrip, tic):
    
        bid_ask_spread = 0.1/100 * num_of_roundtrip
        operating_expenses = er_df[tic] * holding_period/252

        return bid_ask_spread + operating_expenses

    total_transaction_cost = get_transaction_cost(a_holding_period, a_roundtrip, a) 
    + get_transaction_cost(a_inv_holding_period, a_inv_roundtrip, a_inv)
    + get_transaction_cost(b_holding_period, b_roundtrip, b)
    + get_transaction_cost(b_inv_holding_period, b_inv_roundtrip, b_inv)

    return total_transaction_cost

def get_best_threshold(z_score_list, stock_price_list, inverse_price_list, pair, short_list, initial_money=1000000):
    stop_loss_candi = [1, 1.5, 2, 3]
    buy_z_candi = np.round(np.linspace(0, 2, 21), 1)
    sell_z_candi = np.round(np.linspace(0, 2, 21), 1)
    grid_search_result = {}
    for x in stop_loss_candi:
        stop_loss = x

        for y in buy_z_candi:
            buy_z = y

            for z in sell_z_candi:
                sell_z = z

                BT = Backtesting(z_score_list, total_money=initial_money, stop_loss=stop_loss, buy_z=buy_z, sell_z=sell_z)
                asset_per_pair, stock_a_vol_list, inverse_a_vol_list, stock_b_vol_list, inverse_b_vol_list = BT.backtesting(z_score_list, stock_price_list, inverse_price_list)
                total_asset = np.sum(asset_per_pair, axis=0)
                total_earning_ratio = total_asset[-1] / initial_money
                trading_vol = (stock_a_vol_list, inverse_a_vol_list, stock_b_vol_list, inverse_b_vol_list)
                total_earning_ratio_tr = total_earning_ratio - get_total_transaction_cost(pair, trading_vol, short_list)
                grid_search_result[(x,y,z)] = total_earning_ratio_tr

    best_param = max(grid_search_result, key=grid_search_result.get)
    best_earning_ratio = grid_search_result[best_param]

    print('best_earning_ratio is: ', best_earning_ratio)

    return best_param

# -----------------------------------------------------------------------------------------------

In [6]:
def get_sharpe_ratio(start_date, end_date, test_start_date, test_end_date):
    
    selected_pair, _, close_df, rtn_df = get_pair(start_date=start_date, end_date=end_date)

    selected_pair_final = []
    for i in range(len(selected_pair)):
        if len(selected_pair[i]) == 0:
            continue
        for j in range(len(selected_pair[i])):
            selected_pair_final.append(selected_pair[i][j])

    if len(selected_pair_final) == 0:
        
        return np.nan, np.nan, np.nan, np.nan, np.nan

    else: 
        # inverse etf list 뽑기
        short_list = {}
        for i in range(len(selected_pair_final)):
            short_list[selected_pair_final[i][0]] = rtn_df.corr()[selected_pair_final[i][0]].idxmin()
            short_list[selected_pair_final[i][1]] = rtn_df.corr()[selected_pair_final[i][1]].idxmin()

        # 모델 트레이닝을 위한 포메이션 기간동안의 페어 가격 추출
        training_set_price = pd.DataFrame(index=close_df.index)
        for i in range(len(selected_pair_final)):
            training_set_price = pd.concat([training_set_price, close_df[list(selected_pair_final[i])]], axis=1)

        training_set_inverse_price = close_df[[short_list[x] for x in training_set_price.columns]]

        # 페어별 z-score 뽑기
        for i in range(len(selected_pair_final)):

            print("pairs = (" + str(selected_pair_final[i]) + ")\n")

            a = training_set_price.iloc[:,2*i].to_numpy()
            b = training_set_price.iloc[:,2*i+1].to_numpy()

            spread_func = create_spread_function(a, b, 0, -1, alg='lr')
            x = np.arange(len(a))
            z_score_history = np.zeros((len(a)))

            for j in range(len(a)):
                (spread, z_score_history[j]) = spread_func(a[j], b[j])

        # 패어별로 z-score time series 뽑기
        z_score_history_list = []

        for i in range(len(selected_pair_final)):
            
            price_history = training_set_price.to_numpy()[:,2*i:2*i+2].T
            z_score_history = gen_z_score_history(price_history[0], price_history[1], 20, 20).reshape(1, -1)
            z_score_history_list.append(z_score_history)

        
        z_score_history_list = np.array(z_score_history_list).reshape(len(selected_pair_final), -1)
        z_score_list = z_score_history_list[:len(selected_pair_final),:]

        stock_price_list = training_set_price.T.to_numpy()[:,20:]
        inverse_price_list = training_set_inverse_price.T.to_numpy()[:,20:]

        # best threshold 찾기
        best_threshold = {}
        for i in range(len(selected_pair_final)):
            z_score_list_arb = z_score_list[i].reshape(1,-1)
            stock_price_list_arb = stock_price_list[2*i:2*(i+1)]
            inverse_price_list_arb = inverse_price_list[2*i:2*(i+1)]
            best_threshold[selected_pair_final[i]] = get_best_threshold(z_score_list_arb, stock_price_list_arb, inverse_price_list_arb, selected_pair_final[i], short_list)

        # 테스트 기간 데이터 로딩
        _, close_df_test, rtn_df_test, low_volume_etf_test = myFunc.preprocessing(price_df, etf_info, test_start_date, test_end_date)

        test_set_price = pd.DataFrame(index=close_df_test.index)
        for i in range(len(selected_pair_final)):
            test_set_price = pd.concat([test_set_price, close_df_test[list(selected_pair_final[i])]], axis=1)

        test_set_inverse_price = close_df_test[[short_list[x] for x in test_set_price.columns]]

        z_score_history_test_list = []
        for i in range(len(selected_pair_final)):

            print("pairs = (" + str(selected_pair_final[i]) + ")\n")

            a = test_set_price.iloc[:,2*i].to_numpy()
            b = test_set_price.iloc[:,2*i+1].to_numpy()

            spread_func = create_spread_function(a, b, 0, -1, alg='lr')
            z_score_history_test = np.zeros((a.shape[0]))

            for j in range(a.shape[0]):
                (spread, z_score_history_test[j]) = spread_func(a[j], b[j])

            z_score_history_test_list.append(z_score_history_test)

        z_score_list_test = np.array(z_score_history_test_list).reshape(len(selected_pair_final), -1)

        stock_price_list_test = test_set_price.T.to_numpy()
        inverse_price_list_test = test_set_inverse_price.T.to_numpy()

            # 트레이딩 결과 
        asset_per_pair_dic = {}        
        total_asset_dic = {}
        threshold_dic = {}
        trading_vol_dic = {}
        transaction_cost_dic = {}        

        for i in range(len(selected_pair_final)):

            initial_money = 1000000
            stop_loss = best_threshold[selected_pair_final[i]][0]
            buy_z = best_threshold[selected_pair_final[i]][1]
            sell_z = best_threshold[selected_pair_final[i]][2]

            z_score_test_arb = z_score_list_test[i].reshape(1,-1)
            stock_price_test_arb = stock_price_list_test[2*i:2*(i+1)]
            inverse_price_test_arb = inverse_price_list_test[2*i:2*(i+1)]

            BT = Backtesting(z_score_test_arb, total_money=initial_money, stop_loss=stop_loss, buy_z=buy_z, sell_z=sell_z)
            asset_per_pair_test, stock_a_vol_list_test, inverse_a_vol_list_test, stock_b_vol_list_test, inverse_b_vol_list_test = BT.backtesting(z_score_test_arb, stock_price_test_arb, inverse_price_test_arb)
            trading_vol = (stock_a_vol_list_test, inverse_a_vol_list_test, stock_b_vol_list_test, inverse_b_vol_list_test)
            total_asset_test = np.sum(asset_per_pair_test, axis=0)
            transaction_cost = get_total_transaction_cost(selected_pair_final[i], trading_vol, short_list)

            asset_per_pair_dic[selected_pair_final[i]] = asset_per_pair_test
            total_asset_dic[selected_pair_final[i]] = total_asset_test
            trading_vol_dic[selected_pair_final[i]] = trading_vol
            threshold_dic[selected_pair_final[i]] = [stop_loss, buy_z, sell_z]
            transaction_cost_dic[selected_pair_final[i]] = transaction_cost

        return asset_per_pair_dic, total_asset_dic, threshold_dic, trading_vol_dic, transaction_cost_dic

In [7]:
monthly = pd.date_range('2009-12-31', '2022-1-1', freq='MS')

In [43]:
period_candi = [(3,1),(6,1),(12,1),(24,12)]

In [44]:
formation_train = {}
for cnd in period_candi:

    train_window_width = cnd[0]
    test_window_width = cnd[1]

    train_period = []
    test_period = []
    for i in range(len(monthly)- train_window_width-test_window_width):
        train_start_date = monthly[i]
        train_end_date = monthly[i+train_window_width]
        test_end_date = monthly[i+train_window_width+test_window_width]

        if test_end_date.year < 2017:

            train_period.append((train_start_date.strftime("%Y-%m-%d"), train_end_date.strftime("%Y-%m-%d"), test_end_date.strftime("%Y-%m-%d")))
        
        elif test_end_date.year >=2017:

            test_period.append((train_start_date.strftime("%Y-%m-%d"), train_end_date.strftime("%Y-%m-%d"), test_end_date.strftime("%Y-%m-%d")))
            
    formation_train[cnd] = (train_period, test_period)

In [45]:
# trading 기간 2019-01-01 ~ 2021-12-31
performance_test = formation_train[(12,1)][1][37:49]
performance_test

[('2019-01-01', '2020-01-01', '2020-02-01'),
 ('2019-02-01', '2020-02-01', '2020-03-01'),
 ('2019-03-01', '2020-03-01', '2020-04-01'),
 ('2019-04-01', '2020-04-01', '2020-05-01'),
 ('2019-05-01', '2020-05-01', '2020-06-01'),
 ('2019-06-01', '2020-06-01', '2020-07-01'),
 ('2019-07-01', '2020-07-01', '2020-08-01'),
 ('2019-08-01', '2020-08-01', '2020-09-01'),
 ('2019-09-01', '2020-09-01', '2020-10-01'),
 ('2019-10-01', '2020-10-01', '2020-11-01'),
 ('2019-11-01', '2020-11-01', '2020-12-01'),
 ('2019-12-01', '2020-12-01', '2021-01-01')]

In [46]:
result = {}
for i in range(len(performance_test)):

    try:
        print('=================== {} 번째 시작 ======================'.format(i))
        start_date = performance_test[i][0]
        end_date = performance_test[i][1]
        test_start_date = performance_test[i][1]
        test_end_date = performance_test[i][2]
        asset_per_pair_dic, total_asset_dic, threshold_dic, trading_vol_dic, transaction_cost_dic = get_sharpe_ratio(start_date, end_date, test_start_date, test_end_date)

        result['trial_{}'.format(i+1)] = (asset_per_pair_dic, total_asset_dic, threshold_dic, trading_vol_dic, transaction_cost_dic)

    except KeyboardInterrupt as e:

        print(e)
        pass



100%|██████████| 1202/1202 [00:16<00:00, 72.34it/s]


Clusters discovered: 20
Clusters formed: 20
Pairs to evaluate: 9936
final_clusters index :  [19, 18, 13, 12, 11, 1, 8, 2, 6, 9, 7, 16, 17, 15, 4, 14, 3, 10, 5, 0]


100%|██████████| 1/1 [00:00<00:00, 180.70it/s]
0it [00:00, ?it/s]/20 [00:03<01:05,  3.45s/it]
0it [00:00, ?it/s]/20 [00:05<00:49,  2.77s/it]
0it [00:00, ?it/s]/20 [00:07<00:38,  2.29s/it]
100%|██████████| 6/6 [00:00<00:00, 256.64it/s]
100%|██████████| 1/1 [00:00<00:00, 177.18it/s]
0it [00:00, ?it/s]/20 [00:17<00:42,  3.05s/it]
100%|██████████| 10/10 [00:00<00:00, 265.74it/s]
0it [00:00, ?it/s]/20 [00:25<00:45,  3.77s/it]
100%|██████████| 3/3 [00:00<00:00, 168.38it/s]
0it [00:00, ?it/s]0/20 [00:31<00:36,  3.64s/it]
0it [00:00, ?it/s]1/20 [00:34<00:29,  3.25s/it]
0it [00:00, ?it/s]2/20 [00:37<00:25,  3.17s/it]
100%|██████████| 6/6 [00:00<00:00, 246.65it/s]]
0it [00:00, ?it/s]4/20 [00:45<00:22,  3.68s/it]
0it [00:00, ?it/s]5/20 [00:48<00:17,  3.46s/it]
100%|██████████| 10/10 [00:00<00:00, 253.48it/s]
0it [00:00, ?it/s]7/20 [01:00<00:15,  5.11s/it]
100%|██████████| 136/136 [00:00<00:00, 282.26it/s]
100%|██████████| 325/325 [00:01<00:00, 275.98it/s]
100%|██████████| 20/20 [02:43<00:00,  8.1

pairs = (('IJR', 'SLY'))

pairs = (('VONE', 'VV'))

pairs = (('SPTM', 'SCHX'))

best_earning_ratio is:  1.0043234468004136
best_earning_ratio is:  1.0171356330945456
best_earning_ratio is:  1.0871866641745793


100%|██████████| 1338/1338 [00:07<00:00, 190.77it/s]


pairs = (('IJR', 'SLY'))

pairs = (('VONE', 'VV'))

pairs = (('SPTM', 'SCHX'))



100%|██████████| 1206/1206 [00:17<00:00, 70.79it/s]


Clusters discovered: 20
Clusters formed: 20
Pairs to evaluate: 10704
final_clusters index :  [9, 17, 16, 12, 11, 4, 1, 2, 3, 7, 10, 19, 15, 13, 18, 8, 14, 0, 6, 5]


0it [00:00, ?it/s]/20 [00:00<?, ?it/s]
0it [00:00, ?it/s]/20 [00:02<00:44,  2.37s/it]
100%|██████████| 3/3 [00:00<00:00, 232.36it/s]
0it [00:00, ?it/s]/20 [00:08<00:53,  3.17s/it]
100%|██████████| 6/6 [00:00<00:00, 239.35it/s]
0it [00:00, ?it/s]/20 [00:16<00:53,  3.57s/it]
100%|██████████| 1/1 [00:00<00:00, 178.03it/s]
100%|██████████| 10/10 [00:00<00:00, 239.29it/s]
100%|██████████| 1/1 [00:00<00:00, 176.05it/s]
0it [00:00, ?it/s]/20 [00:31<00:45,  4.10s/it]
100%|██████████| 3/3 [00:00<00:00, 224.86it/s]]
0it [00:00, ?it/s]1/20 [00:38<00:34,  3.84s/it]
100%|██████████| 6/6 [00:00<00:00, 252.42it/s]]
0it [00:00, ?it/s]3/20 [00:46<00:28,  4.04s/it]
100%|██████████| 6/6 [00:00<00:00, 240.10it/s]]
0it [00:00, ?it/s]5/20 [00:54<00:20,  4.13s/it]
0it [00:00, ?it/s]6/20 [00:56<00:14,  3.61s/it]
0it [00:00, ?it/s]7/20 [00:59<00:10,  3.43s/it]
100%|██████████| 276/276 [00:00<00:00, 279.46it/s]
100%|██████████| 120/120 [00:00<00:00, 246.61it/s]
100%|██████████| 20/20 [02:39<00:00,  7.96s/it]


pairs = (('SPDN', 'SH'))

pairs = (('SPTM', 'SCHB'))

pairs = (('VONE', 'VV'))

pairs = (('SPTM', 'SCHX'))

pairs = (('SPLG', 'SCHB'))

best_earning_ratio is:  1.1625501964857314
best_earning_ratio is:  1.0
best_earning_ratio is:  1.0000936742620317
best_earning_ratio is:  1.0669310423102603
best_earning_ratio is:  1.0075901042827424


100%|██████████| 1347/1347 [00:07<00:00, 178.37it/s]


pairs = (('SPDN', 'SH'))

pairs = (('SPTM', 'SCHB'))

pairs = (('VONE', 'VV'))

pairs = (('SPTM', 'SCHX'))

pairs = (('SPLG', 'SCHB'))



100%|██████████| 1211/1211 [00:17<00:00, 69.21it/s]


Clusters discovered: 26
Clusters formed: 25
Pairs to evaluate: 1152
final_clusters index :  [12, 21, 5, 6, 8, 1, 25, 2, 3, 9, 11, 10, 24, 23, 15, 16, 20, 19, 13, 17, 18, 22, 14, 4, 7]


0it [00:00, ?it/s]/25 [00:00<?, ?it/s]
0it [00:00, ?it/s]/25 [00:02<00:57,  2.38s/it]
100%|██████████| 6/6 [00:00<00:00, 244.31it/s]
0it [00:00, ?it/s]/25 [00:09<01:16,  3.49s/it]
100%|██████████| 3/3 [00:00<00:00, 227.97it/s]
100%|██████████| 1/1 [00:00<00:00, 173.13it/s]
0it [00:00, ?it/s]/25 [00:19<01:03,  3.32s/it]
100%|██████████| 10/10 [00:00<00:00, 261.87it/s]
0it [00:00, ?it/s]/25 [00:26<01:03,  3.74s/it]
0it [00:00, ?it/s]/25 [00:29<00:55,  3.50s/it]
0it [00:00, ?it/s]0/25 [00:31<00:45,  3.02s/it]
100%|██████████| 3/3 [00:00<00:00, 222.13it/s]]
0it [00:00, ?it/s]2/25 [00:39<00:46,  3.61s/it]
0it [00:00, ?it/s]3/25 [00:41<00:39,  3.26s/it]
100%|██████████| 6/6 [00:00<00:00, 255.55it/s]]
0it [00:00, ?it/s]5/25 [00:49<00:36,  3.66s/it]
0it [00:00, ?it/s]6/25 [00:52<00:31,  3.46s/it]
0it [00:00, ?it/s]7/25 [00:54<00:25,  3.16s/it]
100%|██████████| 6/6 [00:00<00:00, 242.35it/s]]
0it [00:00, ?it/s]9/25 [01:04<00:24,  4.13s/it]
0it [00:00, ?it/s]0/25 [01:07<00:19,  3.81s/it]
0it [00:

pairs = (('SPDN', 'SH'))

best_earning_ratio is:  1.012152773604287


100%|██████████| 1363/1363 [00:07<00:00, 178.72it/s]


pairs = (('SPDN', 'SH'))



100%|██████████| 1222/1222 [00:17<00:00, 69.85it/s]


Clusters discovered: 3
Clusters formed: 2
Pairs to evaluate: 2582
final_clusters index :  [1, 2]


0it [00:00, ?it/s]/2 [00:00<?, ?it/s]
100%|██████████| 561/561 [00:02<00:00, 243.11it/s]
100%|██████████| 2/2 [00:55<00:00, 27.87s/it]


pairs = (('EPV', 'TZA'))

pairs = (('CHAD', 'SDOW'))

pairs = (('CHAD', 'FAZ'))

best_earning_ratio is:  1.017578819937744
best_earning_ratio is:  1.0
best_earning_ratio is:  1.1667529797209184


100%|██████████| 1367/1367 [00:07<00:00, 174.02it/s]


pairs = (('EPV', 'TZA'))

pairs = (('CHAD', 'SDOW'))

pairs = (('CHAD', 'FAZ'))



100%|██████████| 1238/1238 [00:18<00:00, 66.08it/s]


Clusters discovered: 4
Clusters formed: 3
Pairs to evaluate: 1980
final_clusters index :  [3, 1, 2]


100%|██████████| 21/21 [00:00<00:00, 268.35it/s]
0it [00:00, ?it/s]/3 [00:08<00:17,  8.96s/it]
100%|██████████| 351/351 [00:01<00:00, 273.00it/s]
100%|██████████| 3/3 [00:56<00:00, 18.82s/it]


pairs = (('BZQ', 'EFZ'))

best_earning_ratio is:  1.0659445686041211


100%|██████████| 1380/1380 [00:07<00:00, 182.10it/s]


pairs = (('BZQ', 'EFZ'))



100%|██████████| 1252/1252 [00:18<00:00, 66.11it/s]


Clusters discovered: 3
Clusters formed: 2
Pairs to evaluate: 2582
final_clusters index :  [1, 2]


0it [00:00, ?it/s]/2 [00:00<?, ?it/s]
100%|██████████| 595/595 [00:02<00:00, 260.86it/s]
100%|██████████| 2/2 [00:58<00:00, 29.02s/it]


pairs = (('SPDN', 'EDZ'))

pairs = (('EDZ', 'SH'))

pairs = (('DOG', 'EEV'))

best_earning_ratio is:  1.2403977072003678
best_earning_ratio is:  1.1979302009991544
best_earning_ratio is:  1.0180186792919825


100%|██████████| 1394/1394 [00:08<00:00, 159.37it/s]


pairs = (('SPDN', 'EDZ'))

pairs = (('EDZ', 'SH'))

pairs = (('DOG', 'EEV'))



100%|██████████| 1264/1264 [00:19<00:00, 66.05it/s]


Clusters discovered: 3
Clusters formed: 2
Pairs to evaluate: 2582
final_clusters index :  [1, 2]


0it [00:00, ?it/s]/2 [00:00<?, ?it/s]
100%|██████████| 630/630 [00:02<00:00, 269.97it/s]
100%|██████████| 2/2 [01:00<00:00, 30.35s/it]


pairs = (('BZQ', 'RWM'))

pairs = (('YXI', 'SKF'))

pairs = (('EUM', 'SKF'))

best_earning_ratio is:  1.1141323606693934
best_earning_ratio is:  1.0
best_earning_ratio is:  1.0470844594073225


100%|██████████| 1416/1416 [00:09<00:00, 156.71it/s]


pairs = (('BZQ', 'RWM'))

pairs = (('YXI', 'SKF'))

pairs = (('EUM', 'SKF'))



100%|██████████| 1271/1271 [00:19<00:00, 65.18it/s]


Clusters discovered: 4
Clusters formed: 3
Pairs to evaluate: 1704
final_clusters index :  [3, 1, 2]


100%|██████████| 6/6 [00:00<00:00, 237.46it/s]
0it [00:00, ?it/s]/3 [00:05<00:10,  5.34s/it]
100%|██████████| 378/378 [00:01<00:00, 277.29it/s]
100%|██████████| 3/3 [00:54<00:00, 18.16s/it]


pairs = (('DOG', 'SKF'))

best_earning_ratio is:  1.0


100%|██████████| 1427/1427 [00:08<00:00, 171.82it/s]


pairs = (('DOG', 'SKF'))



100%|██████████| 1278/1278 [00:19<00:00, 65.17it/s]


Clusters discovered: 5
Clusters formed: 4
Pairs to evaluate: 1488
final_clusters index :  [4, 3, 1, 2]


100%|██████████| 6/6 [00:00<00:00, 243.15it/s]
100%|██████████| 3/3 [00:00<00:00, 238.04it/s]
0it [00:00, ?it/s]/4 [00:09<00:09,  4.91s/it]
100%|██████████| 300/300 [00:01<00:00, 276.09it/s]
100%|██████████| 4/4 [00:55<00:00, 13.79s/it]




100%|██████████| 1293/1293 [00:19<00:00, 65.14it/s]


Clusters discovered: 5
Clusters formed: 4
Pairs to evaluate: 1346
final_clusters index :  [4, 3, 1, 2]


100%|██████████| 6/6 [00:00<00:00, 238.91it/s]
100%|██████████| 3/3 [00:00<00:00, 236.65it/s]
0it [00:00, ?it/s]/4 [00:10<00:10,  5.07s/it]
100%|██████████| 276/276 [00:00<00:00, 278.33it/s]
100%|██████████| 4/4 [00:55<00:00, 13.86s/it]




100%|██████████| 1301/1301 [00:20<00:00, 64.53it/s]


Clusters discovered: 5
Clusters formed: 4
Pairs to evaluate: 1416
final_clusters index :  [4, 3, 1, 2]


100%|██████████| 6/6 [00:00<00:00, 256.65it/s]
100%|██████████| 3/3 [00:00<00:00, 217.23it/s]
0it [00:00, ?it/s]/4 [00:10<00:10,  5.10s/it]
100%|██████████| 276/276 [00:00<00:00, 278.53it/s]
100%|██████████| 4/4 [00:55<00:00, 13.76s/it]




100%|██████████| 1318/1318 [00:20<00:00, 64.96it/s]


Clusters discovered: 5
Clusters formed: 4
Pairs to evaluate: 1532
final_clusters index :  [4, 3, 1, 2]


100%|██████████| 3/3 [00:00<00:00, 238.02it/s]
100%|██████████| 28/28 [00:00<00:00, 280.05it/s]
0it [00:00, ?it/s]/4 [00:16<00:17,  8.70s/it]
100%|██████████| 276/276 [00:01<00:00, 257.27it/s]
100%|██████████| 4/4 [01:03<00:00, 15.76s/it]


In [12]:
# 돌린거 저장
import pickle
with open('result_may_semi_final_all.pkl','wb') as f:
    pickle.dump(result, f)

In [13]:
with open('result_may_semi_final_all.pkl','rb') as f:
    result = pickle.load(f)

In [18]:
result

{'trial_1': (nan, nan, nan, nan, nan),
 'trial_2': ({('SPTM',
    'SCHB'): array([[1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000.        , 1000000.        , 1000000.        ,
           1000000