In [3]:
import numpy as np
import pandas as pd
import import_ipynb
import pair_selection_DBSCAN_funclist_for_training as myFunc
import matplotlib.pyplot as plt

import math
import sklearn
import datetime

from sklearn import datasets, linear_model
from sklearn.preprocessing import PolynomialFeatures

import warnings
warnings.filterwarnings('ignore')

In [4]:
price_df = pd.read_csv('./Data/us_etf_price.csv')
etf_info = pd.read_csv('./Data/etfs_details_equity.csv')
er_df = etf_info[['Symbol','ER']]
er_df = er_df.set_index('Symbol')
er_df = er_df['ER'].apply(lambda x: x.split('%')[0]).astype(float)

In [5]:
pc_selecting_threshold = 0.9
eps = 1.8
min_samples = 2
cluster_size_limit = 100
cluster_member_counts = 100
inverse_threshold=-0.99
coint_pvalue_threshold=0.01
hurst_threshold=0.5
half_life_threshold=10
mean_reverting_freq=12

In [6]:
def get_pair(start_date, end_date):

    _, close_df, rtn_df, low_volume_etf = myFunc.preprocessing(price_df,etf_info, start_date, end_date)
    pc_rtn = myFunc.get_pca_return(rtn_df, pc_selecting_threshold)
    clusters_viz_list, clustered_series = myFunc.dbscan_clustering(close_df, pc_rtn, eps, min_samples, cluster_size_limit, cluster_member_counts)
    selected_pair, short_pair = myFunc.Pair_selection(close_df=close_df,
                                                rtn_df=rtn_df,
                                                low_volume_etf=low_volume_etf,
                                                clusters_viz_list=clusters_viz_list, 
                                                clustered_series=clustered_series, 
                                                inverse_threshold=inverse_threshold, 
                                                coint_pvalue_threshold=coint_pvalue_threshold, 
                                                hurst_threshold=hurst_threshold, 
                                                half_life_threshold=half_life_threshold, 
                                                mean_reverting_freq=mean_reverting_freq)

    return selected_pair, short_pair, close_df, rtn_df

In [7]:
monthly = pd.date_range('2009-12-31', '2022-1-1', freq='MS')
period_candi = [(6,1), (12,1), (3,1)]

In [8]:
# 포메이션 - 트레이딩 기간 조합별 period 뽑기
formation_train = {}
for cnd in period_candi:

    train_window_width = cnd[0]
    test_window_width = cnd[1]

    train_period = []
    test_period = []
    for i in range(len(monthly)- train_window_width-test_window_width):
        train_start_date = monthly[i]
        train_end_date = monthly[i+train_window_width]
        test_end_date = monthly[i+train_window_width+test_window_width]

        if test_end_date.year < 2017:

            train_period.append((train_start_date.strftime("%Y-%m-%d"), train_end_date.strftime("%Y-%m-%d"), test_end_date.strftime("%Y-%m-%d")))
        
        elif test_end_date.year >=2017:

            test_period.append((train_start_date.strftime("%Y-%m-%d"), train_end_date.strftime("%Y-%m-%d"), test_end_date.strftime("%Y-%m-%d")))
            
    formation_train[cnd] = (train_period, test_period)

In [9]:
# 트레이딩 기간이 2021년인 거 뽑음
performance_test = formation_train[(12,1)][1][49:]

In [10]:
result = {}
for i in range(len(performance_test)):

    try:
        print('=================== {} 번째 시작 ======================'.format(i))
        start_date = performance_test[i][0]
        end_date = performance_test[i][1]

        selected_pair, short_pair, close_df, rtn_df = get_pair(start_date, end_date)

    except KeyboardInterrupt as e:
        print(e)
        pass

    result[performance_test[i]] = (selected_pair, short_pair, close_df, rtn_df)



100%|██████████| 1338/1338 [00:20<00:00, 64.94it/s]


Clusters discovered: 10
Clusters formed: 9
Pairs to evaluate: 1374
final_clusters index :  [8, 7, 9, 4, 6, 5, 3, 1, 2]


100%|██████████| 1/1 [00:00<00:00, 179.95it/s]
0it [00:00, ?it/s]/9 [00:02<00:22,  2.87s/it]
0it [00:00, ?it/s]/9 [00:04<00:14,  2.02s/it]
100%|██████████| 3/3 [00:00<00:00, 233.39it/s]
100%|██████████| 6/6 [00:00<00:00, 251.05it/s]
100%|██████████| 3/3 [00:00<00:00, 241.80it/s]
0it [00:00, ?it/s]/9 [00:20<00:12,  4.14s/it]
0it [00:00, ?it/s]/9 [00:22<00:07,  3.72s/it]
100%|██████████| 300/300 [00:01<00:00, 277.37it/s]
100%|██████████| 9/9 [01:09<00:00,  7.69s/it]




100%|██████████| 1347/1347 [00:21<00:00, 62.12it/s]


Clusters discovered: 12
Clusters formed: 11
Pairs to evaluate: 1338
final_clusters index :  [10, 9, 6, 11, 4, 8, 7, 3, 5, 1, 2]


100%|██████████| 1/1 [00:00<00:00, 169.00it/s]
0it [00:00, ?it/s]/11 [00:02<00:29,  2.97s/it]
0it [00:00, ?it/s]/11 [00:04<00:18,  2.08s/it]
0it [00:00, ?it/s]/11 [00:05<00:14,  1.81s/it]
100%|██████████| 3/3 [00:00<00:00, 164.32it/s]
100%|██████████| 6/6 [00:00<00:00, 224.86it/s]
100%|██████████| 3/3 [00:00<00:00, 240.17it/s]
0it [00:00, ?it/s]/11 [00:22<00:16,  4.12s/it]
0it [00:00, ?it/s]/11 [00:24<00:11,  3.73s/it]
0it [00:00, ?it/s]/11 [00:28<00:07,  3.71s/it]
100%|██████████| 276/276 [00:01<00:00, 272.51it/s]
100%|██████████| 11/11 [01:14<00:00,  6.77s/it]




100%|██████████| 1363/1363 [00:21<00:00, 62.65it/s]


Clusters discovered: 14
Clusters formed: 13
Pairs to evaluate: 1602
final_clusters index :  [12, 11, 8, 6, 13, 10, 9, 5, 7, 4, 1, 3, 2]


100%|██████████| 1/1 [00:00<00:00, 167.34it/s]
0it [00:00, ?it/s]/13 [00:03<00:36,  3.07s/it]
0it [00:00, ?it/s]/13 [00:04<00:23,  2.14s/it]
0it [00:00, ?it/s]/13 [00:06<00:18,  1.85s/it]
0it [00:00, ?it/s]/13 [00:07<00:15,  1.71s/it]
100%|██████████| 3/3 [00:00<00:00, 238.19it/s]
0it [00:00, ?it/s]/13 [00:12<00:16,  2.42s/it]
100%|██████████| 3/3 [00:00<00:00, 225.54it/s]
100%|██████████| 3/3 [00:00<00:00, 240.73it/s]
0it [00:00, ?it/s]/13 [00:23<00:13,  3.48s/it]
0it [00:00, ?it/s]0/13 [00:26<00:09,  3.32s/it]
100%|██████████| 15/15 [00:00<00:00, 254.64it/s]
100%|██████████| 253/253 [00:00<00:00, 267.37it/s]
100%|██████████| 13/13 [01:32<00:00,  7.10s/it]




100%|██████████| 1367/1367 [00:22<00:00, 60.61it/s]


Clusters discovered: 84
Clusters formed: 83
Pairs to evaluate: 3388
final_clusters index :  [83, 52, 49, 48, 47, 46, 45, 41, 38, 32, 28, 27, 26, 24, 21, 20, 15, 13, 12, 11, 7, 5, 58, 59, 42, 61, 76, 78, 60, 82, 67, 66, 65, 69, 64, 73, 62, 74, 79, 72, 25, 16, 77, 22, 36, 75, 71, 80, 19, 57, 68, 56, 55, 8, 81, 54, 53, 51, 35, 14, 70, 34, 2, 3, 9, 29, 50, 43, 31, 17, 40, 23, 18, 63, 33, 37, 30, 39, 44, 1, 6, 4, 10]


0it [00:00, ?it/s]/83 [00:00<?, ?it/s]
0it [00:00, ?it/s]/83 [00:01<02:04,  1.52s/it]
0it [00:00, ?it/s]/83 [00:03<02:38,  1.96s/it]
0it [00:00, ?it/s]/83 [00:05<02:20,  1.76s/it]
0it [00:00, ?it/s]/83 [00:06<02:11,  1.66s/it]
0it [00:00, ?it/s]/83 [00:07<01:44,  1.34s/it]
0it [00:00, ?it/s]/83 [00:08<01:28,  1.14s/it]
0it [00:00, ?it/s]/83 [00:09<01:36,  1.27s/it]
0it [00:00, ?it/s]/83 [00:11<01:41,  1.35s/it]
0it [00:00, ?it/s]/83 [00:12<01:44,  1.41s/it]
0it [00:00, ?it/s]0/83 [00:13<01:28,  1.21s/it]
0it [00:00, ?it/s]1/83 [00:15<01:34,  1.31s/it]
0it [00:00, ?it/s]2/83 [00:16<01:21,  1.15s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]4/83 [00:16<00:54,  1.27it/s]
0it [00:00, ?it/s]5/83 [00:17<00:53,  1.28it/s]
0it [00:00, ?it/s]6/83 [00:18<00:52,  1.29it/s]
0it [00:00, ?it/s]7/83 [00:19<00:50,  1.30it/s]
0it [00:00, ?it/s]8/83 [00:20<01:03,  1.02it/s]
0it [00:00, ?it/s]9/83 [00:21<00:58,  1.09it/s]
0it [00:00, ?it/s]0/83 [00:22<00:55,  1.14it/s]
0it [00:00, ?it/s]1/83 [00:23<01:05,  1



100%|██████████| 1380/1380 [00:22<00:00, 60.12it/s]


Clusters discovered: 90
Clusters formed: 89
Pairs to evaluate: 9796
final_clusters index :  [89, 61, 58, 56, 55, 54, 50, 47, 46, 44, 42, 41, 37, 36, 34, 29, 25, 24, 20, 15, 14, 13, 12, 8, 62, 63, 45, 65, 84, 79, 75, 87, 64, 73, 72, 80, 78, 86, 71, 82, 85, 68, 67, 66, 81, 28, 23, 21, 33, 83, 19, 77, 16, 88, 60, 59, 57, 74, 69, 2, 49, 9, 53, 52, 51, 76, 26, 40, 48, 30, 3, 5, 10, 31, 27, 18, 22, 70, 17, 32, 43, 39, 35, 1, 38, 6, 7, 11, 4]


0it [00:00, ?it/s]/89 [00:00<?, ?it/s]
0it [00:00, ?it/s]/89 [00:01<02:17,  1.56s/it]
0it [00:00, ?it/s]/89 [00:03<02:14,  1.55s/it]
0it [00:00, ?it/s]/89 [00:03<01:42,  1.19s/it]
100%|██████████| 1/1 [00:00<00:00, 168.69it/s]
0it [00:00, ?it/s]/89 [00:08<02:45,  1.97s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]/89 [00:09<01:35,  1.17s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]/89 [00:10<01:20,  1.00s/it]
0it [00:00, ?it/s]0/89 [00:12<01:29,  1.13s/it]
0it [00:00, ?it/s]1/89 [00:13<01:35,  1.23s/it]
0it [00:00, ?it/s]2/89 [00:15<01:41,  1.32s/it]
0it [00:00, ?it/s]3/89 [00:16<01:28,  1.17s/it]
0it [00:00, ?it/s]4/89 [00:17<01:35,  1.27s/it]
0it [00:00, ?it/s]5/89 [00:18<01:23,  1.13s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]7/89 [00:20<01:09,  1.03it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]9/89 [00:20<00:51,  1.35it/s]
0it [00:00, ?it/s]0/89 [00:21<00:51,  1.34it/s]
0it [00:00, ?it/s]1/89 [00:23<01:03,  1.07it/s]
0it [00:00, ?it/s]2/89 [00:24<01:12,  1.09s/it]
0it [00:00, ?it/s]3/89 [00:2



100%|██████████| 1394/1394 [00:24<00:00, 57.14it/s]


Clusters discovered: 94
Clusters formed: 93
Pairs to evaluate: 11062
final_clusters index :  [93, 63, 62, 58, 57, 56, 55, 53, 49, 48, 43, 36, 64, 35, 28, 25, 24, 23, 19, 18, 14, 13, 12, 9, 7, 33, 65, 47, 81, 82, 86, 79, 75, 66, 88, 85, 73, 71, 89, 90, 80, 70, 69, 92, 72, 84, 32, 83, 17, 20, 87, 15, 8, 22, 38, 67, 50, 61, 60, 59, 74, 52, 51, 39, 77, 78, 91, 26, 41, 42, 54, 76, 10, 29, 4, 2, 45, 30, 44, 21, 16, 68, 31, 46, 27, 40, 1, 34, 37, 5, 6, 11, 3]


0it [00:00, ?it/s]/93 [00:00<?, ?it/s]
0it [00:00, ?it/s]/93 [00:01<02:28,  1.61s/it]
0it [00:00, ?it/s]/93 [00:03<02:28,  1.63s/it]
0it [00:00, ?it/s]/93 [00:04<02:24,  1.61s/it]
100%|██████████| 1/1 [00:00<00:00, 155.67it/s]
0it [00:00, ?it/s]/93 [00:08<02:57,  2.02s/it]
0it [00:00, ?it/s]/93 [00:09<02:23,  1.65s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]/93 [00:10<01:27,  1.03s/it]
0it [00:00, ?it/s]/93 [00:12<01:39,  1.19s/it]
0it [00:00, ?it/s]0/93 [00:13<01:30,  1.09s/it]
0it [00:00, ?it/s]1/93 [00:14<01:40,  1.23s/it]
0it [00:00, ?it/s]2/93 [00:15<01:29,  1.11s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]4/93 [00:16<01:02,  1.26it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]6/93 [00:17<01:00,  1.26it/s]
0it [00:00, ?it/s]7/93 [00:18<01:00,  1.26it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]9/93 [00:19<00:47,  1.54it/s]
0it [00:00, ?it/s]0/93 [00:20<00:50,  1.45it/s]
0it [00:00, ?it/s]1/93 [00:21<00:52,  1.36it/s]
0it [00:00, ?it/s]2/93 [00:22<01:07,  1.05it/s]
100%|██████████| 1/1 [00:00<0



100%|██████████| 1416/1416 [00:23<00:00, 60.06it/s]


Clusters discovered: 110
Clusters formed: 109
Pairs to evaluate: 4316
final_clusters index :  [109, 69, 68, 67, 66, 65, 64, 63, 62, 60, 58, 56, 51, 46, 44, 43, 42, 36, 6, 7, 8, 9, 13, 14, 71, 15, 23, 24, 30, 31, 32, 35, 18, 73, 55, 92, 108, 94, 93, 95, 106, 105, 96, 97, 98, 89, 99, 100, 75, 101, 88, 82, 76, 77, 103, 78, 79, 80, 81, 86, 107, 102, 84, 85, 27, 26, 25, 17, 104, 22, 28, 19, 74, 10, 12, 91, 72, 70, 83, 59, 87, 52, 39, 1, 61, 21, 57, 41, 48, 2, 53, 4, 54, 50, 20, 49, 29, 16, 90, 37, 34, 47, 33, 40, 38, 5, 45, 11, 3]


0it [00:00, ?it/s]/109 [00:00<?, ?it/s]
0it [00:00, ?it/s]/109 [00:01<02:55,  1.63s/it]
0it [00:00, ?it/s]/109 [00:03<02:53,  1.62s/it]
100%|██████████| 1/1 [00:00<00:00, 179.79it/s]]
0it [00:00, ?it/s]/109 [00:07<03:33,  2.03s/it]
0it [00:00, ?it/s]/109 [00:08<02:45,  1.59s/it]
100%|██████████| 1/1 [00:00<00:00, 174.16it/s]
0it [00:00, ?it/s]/109 [00:11<02:43,  1.61s/it]
0it [00:00, ?it/s]/109 [00:12<02:42,  1.61s/it]
0it [00:00, ?it/s]/109 [00:14<02:41,  1.61s/it]
0it [00:00, ?it/s]0/109 [00:15<02:17,  1.38s/it]
0it [00:00, ?it/s]1/109 [00:16<02:22,  1.45s/it]
0it [00:00, ?it/s]2/109 [00:17<02:02,  1.26s/it]
100%|██████████| 1/1 [00:00<00:00, 178.73it/s]t]
0it [00:00, ?it/s]4/109 [00:22<03:02,  1.92s/it]
0it [00:00, ?it/s]5/109 [00:24<02:51,  1.83s/it]
0it [00:00, ?it/s]6/109 [00:25<02:44,  1.77s/it]
0it [00:00, ?it/s]7/109 [00:26<02:16,  1.48s/it]
0it [00:00, ?it/s]8/109 [00:27<01:56,  1.28s/it]
0it [00:00, ?it/s]9/109 [00:29<02:04,  1.38s/it]
0it [00:00, ?it/s]0/109 [00:30<02:09,  



100%|██████████| 1427/1427 [00:23<00:00, 59.73it/s]


Clusters discovered: 107
Clusters formed: 106
Pairs to evaluate: 4334
final_clusters index :  [106, 68, 66, 65, 64, 63, 62, 61, 60, 58, 56, 52, 48, 44, 43, 70, 41, 34, 32, 31, 30, 25, 22, 19, 15, 14, 13, 9, 8, 7, 6, 35, 73, 53, 90, 102, 74, 89, 91, 92, 103, 93, 94, 95, 86, 96, 104, 97, 85, 98, 75, 100, 76, 99, 77, 78, 80, 79, 82, 83, 105, 20, 27, 18, 26, 24, 12, 28, 72, 42, 88, 71, 69, 49, 81, 84, 1, 57, 38, 40, 101, 67, 59, 55, 50, 2, 23, 21, 4, 10, 51, 29, 17, 87, 54, 16, 47, 36, 46, 33, 39, 37, 45, 5, 11, 3]


0it [00:00, ?it/s]/106 [00:00<?, ?it/s]
0it [00:00, ?it/s]/106 [00:01<02:52,  1.64s/it]
0it [00:00, ?it/s]/106 [00:02<02:00,  1.16s/it]
0it [00:00, ?it/s]/106 [00:04<02:21,  1.38s/it]
100%|██████████| 1/1 [00:00<00:00, 174.18it/s]]
0it [00:00, ?it/s]/106 [00:08<03:13,  1.92s/it]
0it [00:00, ?it/s]/106 [00:09<02:34,  1.54s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]/106 [00:10<01:56,  1.19s/it]
0it [00:00, ?it/s]/106 [00:12<02:06,  1.30s/it]
0it [00:00, ?it/s]0/106 [00:13<01:52,  1.17s/it]
0it [00:00, ?it/s]1/106 [00:14<02:03,  1.30s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]3/106 [00:16<01:40,  1.09s/it]
0it [00:00, ?it/s]4/106 [00:17<01:52,  1.22s/it]
0it [00:00, ?it/s]5/106 [00:18<01:41,  1.12s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]7/106 [00:19<01:12,  1.22it/s]
0it [00:00, ?it/s]8/106 [00:21<01:28,  1.01s/it]
100%|██████████| 1/1 [00:00<00:00, 186.40it/s]s]
0it [00:00, ?it/s]0/106 [00:25<02:14,  1.56s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]2/106 [00:26<01:44,  1.24s/it]
0it [00:00



100%|██████████| 1435/1435 [00:25<00:00, 56.68it/s]


Clusters discovered: 113
Clusters formed: 112
Pairs to evaluate: 4502
final_clusters index :  [112, 6, 53, 55, 5, 58, 59, 51, 60, 67, 68, 69, 70, 71, 72, 64, 7, 49, 48, 22, 17, 27, 28, 16, 15, 35, 36, 37, 14, 40, 41, 9, 8, 47, 73, 75, 66, 96, 92, 94, 97, 98, 99, 100, 101, 91, 102, 104, 105, 106, 107, 109, 110, 111, 103, 90, 18, 84, 82, 81, 87, 78, 86, 79, 83, 80, 33, 32, 21, 29, 26, 23, 74, 31, 108, 77, 12, 54, 85, 10, 63, 44, 62, 88, 1, 76, 95, 93, 2, 46, 65, 25, 3, 56, 24, 61, 20, 34, 52, 30, 89, 42, 19, 39, 45, 57, 4, 43, 38, 50, 11, 13]


0it [00:00, ?it/s]/112 [00:00<?, ?it/s]
0it [00:00, ?it/s]/112 [00:01<03:12,  1.73s/it]
0it [00:00, ?it/s]/112 [00:03<03:06,  1.70s/it]
0it [00:00, ?it/s]/112 [00:05<03:04,  1.69s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]/112 [00:06<02:09,  1.21s/it]
0it [00:00, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 160.18it/s]]
0it [00:00, ?it/s]/112 [00:10<02:31,  1.46s/it]
0it [00:00, ?it/s]/112 [00:11<02:14,  1.30s/it]
0it [00:00, ?it/s]0/112 [00:13<02:23,  1.41s/it]
0it [00:00, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 177.45it/s]s]
0it [00:00, ?it/s]3/112 [00:17<02:34,  1.56s/it]
0it [00:00, ?it/s]4/112 [00:18<02:15,  1.38s/it]
0it [00:00, ?it/s]5/112 [00:20<02:21,  1.46s/it]
0it [00:00, ?it/s]6/112 [00:21<02:03,  1.29s/it]
0it [00:00, ?it/s]7/112 [00:22<02:12,  1.40s/it]
0it [00:00, ?it/s]8/112 [00:24<02:18,  1.48s/it]
0it [00:00, ?it/s]9/112 [00:25<02:00,  1.29s/it]
0it [00:00, ?it/s]0/112 [00:26<01:46,  1.16s/it]
0it [00:00, ?it/s]1/112 [00:27<01:39,  1.09s/it]
0it [00:00, ?it/s]2/112 [00:



100%|██████████| 1458/1458 [00:26<00:00, 54.74it/s]


Clusters discovered: 117
Clusters formed: 115
Pairs to evaluate: 3860
final_clusters index :  [116, 55, 57, 59, 61, 62, 64, 54, 4, 67, 17, 71, 73, 75, 76, 66, 53, 7, 8, 22, 23, 16, 29, 30, 15, 35, 14, 37, 38, 39, 40, 45, 46, 9, 77, 78, 68, 19, 102, 100, 104, 105, 106, 107, 98, 97, 108, 109, 95, 110, 111, 103, 92, 86, 90, 89, 81, 88, 87, 113, 91, 114, 85, 115, 84, 83, 41, 21, 28, 34, 31, 24, 12, 79, 10, 82, 74, 70, 93, 43, 1, 60, 96, 52, 99, 80, 101, 49, 3, 51, 112, 27, 72, 32, 63, 69, 25, 5, 58, 20, 36, 94, 18, 47, 33, 26, 44, 50, 65, 6, 48, 56, 42, 11, 13]


0it [00:00, ?it/s]/115 [00:00<?, ?it/s]
0it [00:00, ?it/s]/115 [00:01<03:21,  1.77s/it]
100%|██████████| 1/1 [00:00<00:00, 167.90it/s]]
0it [00:00, ?it/s]/115 [00:07<04:45,  2.55s/it]
0it [00:00, ?it/s]/115 [00:08<04:08,  2.24s/it]
0it [00:00, ?it/s]/115 [00:09<03:12,  1.75s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]/115 [00:10<01:57,  1.09s/it]
0it [00:00, ?it/s]/115 [00:11<01:50,  1.03s/it]
0it [00:00, ?it/s]/115 [00:13<02:10,  1.23s/it]
0it [00:00, ?it/s]0/115 [00:14<01:58,  1.13s/it]
0it [00:00, ?it/s]1/115 [00:15<02:16,  1.31s/it]
0it [00:00, ?it/s]2/115 [00:16<02:02,  1.19s/it]
0it [00:00, ?it/s]3/115 [00:18<02:18,  1.36s/it]
100%|██████████| 1/1 [00:00<00:00, 148.12it/s]
0it [00:00, ?it/s]5/115 [00:22<02:34,  1.55s/it]
0it [00:00, ?it/s]6/115 [00:23<02:38,  1.60s/it]
0it [00:00, ?it/s]7/115 [00:24<02:18,  1.42s/it]
0it [00:00, ?it/s]8/115 [00:26<02:26,  1.51s/it]
0it [00:00, ?it/s]9/115 [00:28<02:31,  1.57s/it]
0it [00:00, ?it/s]0/115 [00:29<02:10,  1.38s/it]
0it [00:00, ?it/s]1/



100%|██████████| 1475/1475 [00:27<00:00, 54.20it/s]


Clusters discovered: 110
Clusters formed: 109
Pairs to evaluate: 10792
final_clusters index :  [109, 42, 16, 73, 17, 71, 70, 69, 68, 67, 66, 65, 18, 20, 62, 61, 60, 23, 57, 24, 25, 52, 51, 50, 30, 33, 35, 36, 37, 43, 76, 77, 75, 79, 108, 107, 7, 105, 104, 103, 102, 101, 100, 99, 98, 97, 96, 78, 8, 39, 93, 80, 81, 82, 15, 84, 85, 94, 86, 14, 9, 91, 92, 87, 29, 32, 26, 10, 22, 55, 1, 88, 83, 46, 74, 72, 64, 95, 49, 59, 58, 90, 54, 13, 63, 28, 106, 48, 4, 5, 27, 56, 21, 34, 89, 31, 19, 44, 38, 41, 47, 45, 0, 53, 40, 6, 11, 12, 2]


0it [00:00, ?it/s]/109 [00:00<?, ?it/s]
0it [00:00, ?it/s]/109 [00:01<03:15,  1.81s/it]
0it [00:00, ?it/s]/109 [00:03<03:13,  1.81s/it]
0it [00:00, ?it/s]/109 [00:04<02:27,  1.39s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]/109 [00:06<01:55,  1.11s/it]
0it [00:00, ?it/s]/109 [00:07<01:48,  1.05s/it]
100%|██████████| 1/1 [00:00<00:00, 154.42it/s]]
0it [00:00, ?it/s]/109 [00:11<02:59,  1.78s/it]
0it [00:00, ?it/s]/109 [00:12<02:31,  1.52s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]1/109 [00:14<02:01,  1.24s/it]
100%|██████████| 1/1 [00:00<00:00, 172.05it/s]t]
0it [00:00, ?it/s]3/109 [00:19<02:53,  1.81s/it]
0it [00:00, ?it/s]4/109 [00:20<02:52,  1.81s/it]
0it [00:00, ?it/s]5/109 [00:21<02:26,  1.56s/it]
0it [00:00, ?it/s]6/109 [00:22<02:07,  1.37s/it]
0it [00:00, ?it/s]7/109 [00:23<01:53,  1.23s/it]
0it [00:00, ?it/s]8/109 [00:25<02:06,  1.39s/it]
0it [00:00, ?it/s]9/109 [00:27<02:15,  1.51s/it]
0it [00:00, ?it/s]0/109 [00:27<01:58,  1.33s/it]
0it [00:00, ?it/s]1/109 [00:28<01:45,  1.20s/it]



100%|██████████| 1489/1489 [00:29<00:00, 51.18it/s]


Clusters discovered: 100
Clusters formed: 100
Pairs to evaluate: 17396
final_clusters index :  [99, 63, 62, 61, 60, 59, 58, 55, 54, 52, 48, 47, 45, 43, 42, 36, 33, 31, 30, 29, 27, 25, 16, 14, 13, 12, 10, 7, 65, 68, 50, 70, 83, 98, 82, 86, 69, 87, 88, 89, 90, 91, 92, 76, 75, 73, 72, 93, 94, 95, 96, 85, 39, 26, 21, 18, 24, 8, 66, 57, 84, 79, 74, 53, 71, 1, 77, 67, 46, 23, 64, 81, 19, 44, 97, 4, 41, 5, 56, 37, 22, 78, 17, 80, 15, 51, 32, 35, 28, 40, 20, 38, 0, 6, 49, 34, 11, 9, 2, 3]


0it [00:00, ?it/s]/100 [00:00<?, ?it/s]
0it [00:00, ?it/s]/100 [00:01<03:01,  1.84s/it]
0it [00:00, ?it/s]/100 [00:02<02:07,  1.30s/it]
0it [00:00, ?it/s]/100 [00:04<02:29,  1.54s/it]
0it [00:00, ?it/s]/100 [00:05<02:04,  1.30s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]/100 [00:07<01:43,  1.10s/it]
0it [00:00, ?it/s]/100 [00:08<01:38,  1.06s/it]
0it [00:00, ?it/s]/100 [00:09<01:34,  1.03s/it]
0it [00:00, ?it/s]/100 [00:11<01:57,  1.29s/it]
0it [00:00, ?it/s]0/100 [00:13<02:11,  1.46s/it]
0it [00:00, ?it/s]1/100 [00:14<01:56,  1.30s/it]
0it [00:00, ?it/s]2/100 [00:14<01:44,  1.19s/it]
0it [00:00, ?it/s]3/100 [00:16<01:59,  1.38s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]5/100 [00:17<01:21,  1.05it/s]
0it [00:00, ?it/s]6/100 [00:18<01:19,  1.06it/s]
0it [00:00, ?it/s]7/100 [00:19<01:17,  1.07it/s]
0it [00:00, ?it/s]8/100 [00:20<01:16,  1.07it/s]
0it [00:00, ?it/s]9/100 [00:23<01:57,  1.45s/it]
0it [00:00, ?it/s]0/100 [00:25<02:04,  1.56s/it]
0it [00:00, ?it/s]1/100 [00:26<02:09,  1.64s/it]

In [19]:
result

{('2020-01-01',
  '2021-01-01',
  '2021-02-01'): ([[],
   [],
   [],
   [],
   [],
   [],
   [],
   [],
   [('SPXS',
     'SDOW')]], {'SPXS': 'SPXL', 'DOG': 'DIA', 'PSQ': 'QLD', 'QID': 'QLD', 'SPDN': 'SPXL', 'SRTY': 'TNA', 'DXD': 'DIA', 'TECS': 'XLK', 'SH': 'VOO', 'SPXU': 'SPXL', 'SMDD': 'IJH', 'TWM': 'UWM', 'SQQQ': 'TQQQ', 'SSG': 'USD', 'SDOW': 'DIA', 'SOXS': 'SOXX', 'TZA': 'URTY', 'SEF': 'UYG', 'SDS': 'UPRO', 'SKF': 'UYG', 'MYY': 'IJH', 'RWM': 'URTY', 'SMN': 'UYM', 'LABD': 'XBI', 'FAZ': 'FAS'},                    IVE        SIL       ROBO       FINX        FKU  \
  Date                                                                 
  2020-01-02  130.589996  32.939999  42.610001  30.840000  41.259998   
  2020-01-03  129.660004  32.689999  42.099998  30.660000  40.610001   
  2020-01-06  129.869995  32.320000  42.169998  30.660000  40.529999   
  2020-01-07  129.380005  32.599998  42.209999  30.629999  40.470001   
  2020-01-08  129.720001  31.389999  42.450001  30.850000  40.419998