기존에 만들어둔 predicted corr matrix 기반으로 Fixed K 구현해보기

In [1]:
import traceback
from joblib import Parallel, delayed
import json
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

from mvgarch.ugarch import UGARCH
from mvgarch.mgarch import DCCGARCH

from tools.portfolio import black_litterman, vectorize_corr

In [2]:
k = 3

In [3]:
df = pd.read_pickle("data/asset_cc2.pkl")

window_size = 60
rtn = np.log(df).diff().dropna() * 100
holding_rtn = (np.log(df) - np.log(df).shift(20)).shift(-20).dropna()["1997-05":] * 100
holding_cov = rtn.rolling(window=window_size).cov().shift(-120).dropna()
rolling_corr_matrix = rtn.rolling(window=window_size).corr().dropna()["1997-05":]
rolling_cov_matrix = rtn.rolling(window=window_size).cov().dropna()["1997-05":]

days_lst = rtn.loc["2007-01-03":"2025-04-16"].index[40:]
range_n_clusters = list(range(2, 10))

rtn = rtn["1997-05":]

In [4]:
listdir = os.listdir("results/res2")
predicted_corr_lst = []
for file in listdir:
    if "predicted_corr_matrix" in file:
        with open(f"results/res2/{file}", 'r', encoding='utf-8') as f:
            predicted_corr_lst.append(json.load(f))
            
predicted_corr_dict = {pd.to_datetime(k): v for d in predicted_corr_lst for k, v in d.items()}

In [5]:
rc_vector_dict = {
    d: vectorize_corr(rolling_corr_matrix.loc[d].values)
    for d in rtn.index
}

In [6]:
result_weight_dict = {}
estimated_regime_lst = []

km = KMeans(n_clusters=k) # Fixed K

i=0
for today in tqdm(days_lst[:]):
    try:
        hist = rtn.loc[:today]
        rc = rolling_corr_matrix.loc[:today]
        stacked = np.vstack(
            [
                np.vstack([rc_vector_dict[d] for d in hist.index]),
                vectorize_corr(np.array(predicted_corr_dict[today]))
            ])

        # 3) Fixed KMeans
        labels = km.fit_predict(stacked)

        # 4) Black–Litterman
        idxs = hist.index
        labels_series = pd.Series(
            labels,
            index=list(idxs) + [rtn.loc[today:].index[1]]
        )
        pred_state = labels_series.iloc[-1]
        regs = labels_series[labels_series == pred_state].index
        mu_bl, w_bl = black_litterman(
            sigma=rolling_cov_matrix.loc[today].values,
            w_mkt=np.array([0.05,0.05,0.05,0.05,0.6,0.2]),
            p=np.eye(6),
            q=holding_rtn.loc[regs].mean().values,
            omega=holding_cov.loc[regs].groupby(level=1).mean().values,
            tau=0.15,
        )
        w_bl = np.clip(w_bl, 0, None)
        w_bl /= w_bl.sum()

        # Save
        result_weight_dict[today] = w_bl
        estimated_regime_lst.append(labels_series)



        if i % 1500 == 0:
            serializable_result_w  = {str(k): v.tolist() for k, v in result_weight_dict.items()}
            serializable_estimated_regime_lst  = [{str(key):value for key, value in inner_dict.items()} for inner_dict in estimated_regime_lst]
            with open(f"results/res(k3)/result_weightsfull.json", "w", encoding="utf-8") as f:
                json.dump(serializable_result_w, f, ensure_ascii=False, indent=4)
            with open(f"results/res(k3)/estimated_regimefull.json", "w", encoding="utf-8") as f:
                json.dump(serializable_estimated_regime_lst, f, ensure_ascii=False, indent=4)
    except:
        print(f"error in {today}")
        continue
    i+=1

 20%|██        | 923/4528 [00:33<01:50, 32.60it/s]

error in 2010-10-29 00:00:00


 22%|██▏       | 1012/4528 [00:36<02:05, 28.00it/s]

error in 2011-03-10 00:00:00


 27%|██▋       | 1203/4528 [00:43<01:46, 31.10it/s]

error in 2011-12-13 00:00:00


 61%|██████    | 2751/4528 [02:32<01:26, 20.61it/s]  

error in 2018-03-02 00:00:00


 68%|██████▊   | 3063/4528 [03:43<01:18, 18.67it/s]  

error in 2019-05-30 00:00:00


100%|██████████| 4528/4528 [06:45<00:00, 11.16it/s]

error in 2025-04-16 00:00:00





In [7]:
serializable_result_w  = {str(k): v.tolist() for k, v in result_weight_dict.items()}
serializable_estimated_regime_lst  = [{str(key):value for key, value in inner_dict.items()} for inner_dict in estimated_regime_lst]
with open(f"results/res(k3)/result_weightsfull.json", "w", encoding="utf-8") as f:
    json.dump(serializable_result_w, f, ensure_ascii=False, indent=4)
with open(f"results/res(k3)/estimated_regimefull.json", "w", encoding="utf-8") as f:
    json.dump(serializable_estimated_regime_lst, f, ensure_ascii=False, indent=4)