기존에 만들어둔 predicted corr matrix 기반으로 Fixed K 구현해보기

In [7]:
import traceback
from joblib import Parallel, delayed
import json
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

from mvgarch.ugarch import UGARCH
from mvgarch.mgarch import DCCGARCH

from tools.portfolio import black_litterman, vectorize_corr

In [8]:
k = 3

In [9]:
df = pd.read_pickle("data/asset_cc2.pkl")

window_size = 60
rtn = np.log(df).diff().dropna() * 100
holding_rtn = (np.log(df) - np.log(df).shift(20)).shift(-20).dropna()["1997-05":] * 100
holding_cov = rtn.rolling(window=window_size).cov().shift(-120).dropna()
rolling_corr_matrix = rtn.rolling(window=window_size).corr().dropna()["1997-05":]
rolling_cov_matrix = rtn.rolling(window=window_size).cov().dropna()["1997-05":]

days_lst = rtn.loc["2007-01-03":"2025-04-16"].index[40:]
range_n_clusters = list(range(2, 10))

rtn = rtn["1997-05":]

In [10]:
listdir = os.listdir("results/res2")
predicted_corr_lst = []
for file in listdir:
    if "predicted_corr_matrix" in file:
        with open(f"results/res2/{file}", 'r', encoding='utf-8') as f:
            predicted_corr_lst.append(json.load(f))
            
predicted_corr_dict = {pd.to_datetime(k): v for d in predicted_corr_lst for k, v in d.items()}

In [11]:
rc_vector_dict = {
    d: vectorize_corr(rolling_corr_matrix.loc[d].values)
    for d in rtn.index
}

In [12]:
for tau in [0.01, 0.15, 0.3, 0.45, 0.60, 0.75, 1]:
    result_weight_dict = {}
    estimated_regime_lst = []
    km = KMeans(n_clusters=k) # Fixed K
    
    i=0
    for today in tqdm(days_lst):
        try:
            hist = rtn.loc[:today]
            rc = rolling_corr_matrix.loc[:today]
            stacked = np.vstack(
                [
                    np.vstack([rc_vector_dict[d] for d in hist.index]),
                    vectorize_corr(np.array(predicted_corr_dict[today]))
                ])

            # 3) Fixed KMeans
            labels = km.fit_predict(stacked)

            # 4) Black–Litterman
            idxs = hist.index
            labels_series = pd.Series(
                labels,
                index=list(idxs) + [rtn.loc[today:].index[1]]
            )
            pred_state = labels_series.iloc[-1]
            regs = labels_series[labels_series == pred_state].index
            mu_bl, w_bl = black_litterman(
                sigma=rolling_cov_matrix.loc[today].values,
                w_mkt=np.array([0.05,0.05,0.05,0.05,0.6,0.2]),
                p=np.eye(6),
                q=holding_rtn.loc[regs].mean().values,
                omega=holding_cov.loc[regs].groupby(level=1).mean().values,
                tau=tau,
            )
            w_bl = np.clip(w_bl, 0, None)
            w_bl /= w_bl.sum()

            # Save
            result_weight_dict[today] = w_bl
            estimated_regime_lst.append(labels_series)

        except:
            print(f"error in {today}")
            continue
        i+=1    
    
    serializable_result_w  = {str(k): v.tolist() for k, v in result_weight_dict.items()}
    serializable_estimated_regime_lst  = [{str(key):value for key, value in inner_dict.items()} for inner_dict in estimated_regime_lst]
    with open(f"results/tau/result_weights_{tau}.json", "w", encoding="utf-8") as f:
        json.dump(serializable_result_w, f, ensure_ascii=False, indent=4)
    with open(f"results/tau/estimated_regime_{tau}.json", "w", encoding="utf-8") as f:
        json.dump(serializable_estimated_regime_lst, f, ensure_ascii=False, indent=4)

 20%|██        | 923/4528 [00:24<01:54, 31.57it/s]

error in 2010-10-29 00:00:00


 22%|██▏       | 1010/4528 [00:27<01:46, 32.98it/s]

error in 2011-03-10 00:00:00


 27%|██▋       | 1204/4528 [00:32<01:18, 42.21it/s]

error in 2011-12-13 00:00:00


 61%|██████    | 2753/4528 [01:28<01:07, 26.29it/s]

error in 2018-03-02 00:00:00


 68%|██████▊   | 3062/4528 [01:41<01:08, 21.30it/s]

error in 2019-05-30 00:00:00


100%|██████████| 4528/4528 [02:57<00:00, 25.56it/s]


error in 2025-04-16 00:00:00


 20%|██        | 921/4528 [00:25<01:35, 37.72it/s]

error in 2010-10-29 00:00:00


 22%|██▏       | 1011/4528 [00:28<01:49, 32.04it/s]

error in 2011-03-10 00:00:00


 27%|██▋       | 1204/4528 [00:34<01:37, 34.02it/s]

error in 2011-12-13 00:00:00


 61%|██████    | 2755/4528 [01:31<01:07, 26.33it/s]

error in 2018-03-02 00:00:00


 68%|██████▊   | 3063/4528 [01:45<00:57, 25.28it/s]

error in 2019-05-30 00:00:00


100%|██████████| 4528/4528 [02:58<00:00, 25.38it/s]


error in 2025-04-16 00:00:00


 20%|██        | 925/4528 [00:25<01:38, 36.50it/s]

error in 2010-10-29 00:00:00


 22%|██▏       | 1010/4528 [00:27<01:44, 33.75it/s]

error in 2011-03-10 00:00:00


 27%|██▋       | 1202/4528 [00:33<01:49, 30.37it/s]

error in 2011-12-13 00:00:00


 61%|██████    | 2753/4528 [01:32<01:02, 28.63it/s]

error in 2018-03-02 00:00:00


 68%|██████▊   | 3061/4528 [01:46<01:03, 23.03it/s]

error in 2019-05-30 00:00:00


100%|██████████| 4528/4528 [03:00<00:00, 25.05it/s]


error in 2025-04-16 00:00:00


 20%|██        | 925/4528 [00:25<01:32, 38.92it/s]

error in 2010-10-29 00:00:00


 22%|██▏       | 1010/4528 [00:27<01:39, 35.39it/s]

error in 2011-03-10 00:00:00


 27%|██▋       | 1203/4528 [00:33<01:36, 34.36it/s]

error in 2011-12-13 00:00:00


 61%|██████    | 2753/4528 [01:34<01:07, 26.21it/s]

error in 2018-03-02 00:00:00


 68%|██████▊   | 3061/4528 [01:47<00:56, 26.15it/s]

error in 2019-05-30 00:00:00


100%|██████████| 4528/4528 [03:01<00:00, 24.95it/s]


error in 2025-04-16 00:00:00


 20%|██        | 919/4528 [00:26<02:05, 28.82it/s]

error in 2010-10-29 00:00:00


 22%|██▏       | 1012/4528 [00:29<01:52, 31.33it/s]

error in 2011-03-10 00:00:00


 27%|██▋       | 1204/4528 [00:36<01:39, 33.57it/s]

error in 2011-12-13 00:00:00


 61%|██████    | 2753/4528 [01:35<01:05, 26.96it/s]

error in 2018-03-02 00:00:00


 68%|██████▊   | 3065/4528 [01:48<01:00, 24.07it/s]

error in 2019-05-30 00:00:00


100%|██████████| 4528/4528 [03:03<00:00, 24.63it/s]


error in 2025-04-16 00:00:00


 20%|██        | 925/4528 [00:26<01:36, 37.22it/s]

error in 2010-10-29 00:00:00


 22%|██▏       | 1013/4528 [00:29<01:48, 32.40it/s]

error in 2011-03-10 00:00:00


 27%|██▋       | 1201/4528 [00:35<01:34, 35.30it/s]

error in 2011-12-13 00:00:00


 61%|██████    | 2753/4528 [01:33<01:09, 25.61it/s]

error in 2018-03-02 00:00:00


 68%|██████▊   | 3064/4528 [01:47<01:00, 24.32it/s]

error in 2019-05-30 00:00:00


100%|██████████| 4528/4528 [03:01<00:00, 24.88it/s]


error in 2025-04-16 00:00:00


 20%|██        | 919/4528 [00:27<01:39, 36.09it/s]

error in 2010-10-29 00:00:00


 22%|██▏       | 1011/4528 [00:30<01:46, 33.09it/s]

error in 2011-03-10 00:00:00


 27%|██▋       | 1201/4528 [00:37<01:50, 30.01it/s]

error in 2011-12-13 00:00:00


 61%|██████    | 2752/4528 [01:36<01:02, 28.39it/s]

error in 2018-03-02 00:00:00


 68%|██████▊   | 3063/4528 [01:49<00:57, 25.31it/s]

error in 2019-05-30 00:00:00


100%|██████████| 4528/4528 [03:05<00:00, 24.41it/s]


error in 2025-04-16 00:00:00
