GCC-Garch Prediction 없이 Estimation

In [1]:
import traceback
from joblib import Parallel, delayed
import json
import os
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

from mvgarch.ugarch import UGARCH
from mvgarch.mgarch import DCCGARCH

from tools.portfolio import black_litterman, vectorize_corr

In [None]:
df = pd.read_pickle("data/asset_cc2.pkl")

window_size = 60
rtn = np.log(df).diff().dropna() * 100
holding_rtn = (np.log(df) - np.log(df).shift(20)).shift(-20).dropna()["1997-05":] * 100
holding_cov = rtn.rolling(window=window_size).cov().shift(-120).dropna()
rolling_corr_matrix = rtn.rolling(window=window_size).corr().dropna()["1997-05":]
rolling_cov_matrix = rtn.rolling(window=window_size).cov().dropna()["1997-05":]

days_lst = rtn.loc["2007-01-03":"2025-04-16"].index[40:]
range_n_clusters = list(range(2, 10))

rtn = rtn["1997-05":]

out_dir = "results/res2"
os.makedirs(out_dir, exist_ok=True)

In [4]:
rtn.columns

Index(['brent', 'dxy', 'gold', 'silver', 'snp', 't10'], dtype='object')

In [None]:

def process_date(today, i):
    try:
        # 1) GARCH → DCC
        hist = rtn.loc[:today]
        specs = []
        for col in hist.columns:
            ug = UGARCH(order=(1,1))
            ug.spec(returns=hist[col])
            ug.fit()
            specs.append(ug)
        dcc = DCCGARCH()
        dcc.spec(ugarch_objs=specs, returns=hist)
        dcc.fit()
        dcc.forecast(n_ahead=1)
        pred_corr = dcc.fc_cor[:, :, 0]

        # 2) stacking
        rc = rolling_corr_matrix.loc[:today]
        stacked = np.vstack([
            np.vstack([vectorize_corr(rc.loc[d].values) for d in hist.index]),
            vectorize_corr(pred_corr)
        ])

        # 3) KMeans + silhouette
        best_score, best_k, best_labels = -1, None, None
        for k in range_n_clusters:
            km = KMeans(n_clusters=k)
            labels = km.fit_predict(stacked)
            s = silhouette_score(stacked, labels)
            if s > best_score:
                best_score, best_k, best_labels = s, k, labels

        # 4) Black–Litterman
        idxs = hist.index
        labels_series = pd.Series(
            best_labels,
            index=list(idxs) + [rtn.loc[today:].index[1]]
        )
        pred_state = labels_series.iloc[-1]
        regs = labels_series[labels_series == pred_state].index
        mu_bl, w_bl = black_litterman(
            sigma=rolling_cov_matrix.loc[today].values,
            w_mkt=np.array([0.05,0.05,0.05,0.05,0.6,0.2]),
            p=np.eye(6),
            q=holding_rtn.loc[regs].mean().values,
            omega=holding_cov.loc[regs].groupby(level=1).mean().values,
            tau=0.15,
        )
        w_bl = np.clip(w_bl, 0, None)
        w_bl /= w_bl.sum()

        # 5) Saving
        json.dump({str(today): pred_corr.tolist()},
                  open(f"{out_dir}/predicted_corr_matrix{i}.json","w"), indent=4)
        json.dump({str(today): {"mu_bl": mu_bl.tolist(), "w_bl": w_bl.tolist()}},
                  open(f"{out_dir}/original_black_litterman{i}.json","w"), indent=4)
        json.dump({str(today): w_bl.tolist()},
                  open(f"{out_dir}/result_weights{i}.json","w"), indent=4)
        json.dump({str(today): best_k},
                  open(f"{out_dir}/best_k{i}.json","w"), indent=4)
        # ── 여기서 수정: labels_series 인덱스를 문자열로 변환 ──
        tmp = labels_series.copy()
        tmp.index = tmp.index.astype(str)
        json.dump(
            [{str(today): tmp.to_dict()}],
            open(f"{out_dir}/estimated_regime{i}.json", "w"),
            ensure_ascii=False, indent=4
        )

    except Exception as e:
        err = traceback.format_exc()
        with open(os.path.join(out_dir, f"error_{i}.log"), "w") as lf:
            lf.write(f"Error at {today} (idx={i}):\n{err}\n")
    return

In [None]:
os.makedirs(out_dir, exist_ok=True)
n_procs = max(1, os.cpu_count() - 1)

Parallel(n_jobs=n_procs, backend="loky")(
    delayed(process_date)(day, idx)
    for idx, day in enumerate(days_lst)
)