In [None]:
# -*- coding: utf-8 -*-
# 目的：載入 TCN/LGBM 與校準器，評估分類指標與簡易回測，選出冠軍

from pathlib import Path
import numpy as np
import pandas as pd
import joblib
import mlflow
from sklearn.metrics import roc_auc_score, brier_score_loss, accuracy_score, precision_score, recall_score, f1_score
import tensorflow as tf
from tensorflow import keras

# ========== 0) 參數 ==========
FEAT_PATH = Path("./data/feat_6h.parquet")
LABEL_PATH = Path("./data/label_6h.parquet")
ART_TCN = Path("./artifacts_tcn")
ART_LGBM = Path("./artifacts_lgbm")
TEST_DAYS = 90
LOOKBACK = 16
ANNUAL_FREQ_6H = 1460  # 6h 為一周期，一年約 1460 期
THRESH = 0.55

mlflow.set_tracking_uri("mlruns")
mlflow.set_experiment("eval_backtest_6h")

# ========== 1) 載入資料 ==========
feat = pd.read_parquet(FEAT_PATH)
label = pd.read_parquet(LABEL_PATH)
df = feat.merge(label, on="time", how="inner").sort_values("time").reset_index(drop=True)
times = pd.to_datetime(df["time"], utc=True)
test_start_ts = times.max() - pd.Timedelta(days=TEST_DAYS)
train_mask = times < test_start_ts
test_mask = times >= test_start_ts

# 價格欄位用於回測
close_col = "price_close"
open_col = "price_open"
if close_col not in df.columns or open_col not in df.columns:
    raise ValueError("缺少 price_open/price_close 以回測策略。")

# 特徵與標籤（完整）
X_all = df.drop(columns=["time", "y_dir_6h", "y_tail_6h"]).values
y_all = df["y_dir_6h"].astype(int).values

X_test_raw = X_all[test_mask]
y_test = y_all[test_mask]
t_test = times[test_mask]
price_o = df.loc[test_mask, open_col].to_numpy()
price_c = df.loc[test_mask, close_col].to_numpy()

# ========== 2) 載入模型與校準器 ==========
# TCN
tcn_model = keras.models.load_model(ART_TCN / "tcn_model.h5", custom_objects={"TCN": TCN})
tcn_cal = joblib.load(ART_TCN / "tcn_calibrator.pkl")
tcn_scaler = joblib.load(ART_TCN / "scaler.pkl")

# LGBM
lgbm_model = joblib.load(ART_LGBM / "lgbm_model.pkl")
lgbm_cal = joblib.load(ART_LGBM / "lgbm_calibrator.pkl")
lgbm_scaler = joblib.load(ART_LGBM / "scaler.pkl")

# ========== 3) TCN 測試集序列 ==========
def make_sequences(X2d: np.ndarray, lookback: int) -> np.ndarray:
    seq = []
    for i in range(lookback, len(X2d)):
        seq.append(X2d[i - lookback:i, :])
    return np.asarray(seq, dtype=np.float32)

# 需要將完整訓練+測試資料縮放，以保證測試序列邊界有足夠 lookback
X_all_scaled = tcn_scaler.transform(df.drop(columns=["time", "y_dir_6h", "y_tail_6h"]).values)
mask_all_test = np.zeros(len(df), dtype=bool)
mask_all_test[np.where(test_mask)[0]] = True
# 測試集對應的序列起點索引
start_idx = np.where(test_mask)[0][0]
X_seq_all = make_sequences(X_all_scaled, LOOKBACK)
# 與序列對齊的時間與 y
times_seq = times[LOOKBACK:]
y_seq_all = y_all[LOOKBACK:]
# 取出對應測試區間的序列切片
test_seq_mask = times_seq >= t_test.min()
X_test_tcn = X_seq_all[test_seq_mask]
y_test_tcn = y_seq_all[test_seq_mask]

# ========== 4) 產生預測與校準 ==========
# TCN
proba_tcn = tcn_model.predict(X_test_tcn, verbose=0).ravel()
proba_tcn_cal = tcn_cal.transform(proba_tcn)

# LGBM（特徵直接用測試集縮放）
X_test_lgbm = lgbm_scaler.transform(X_test_raw)
proba_lgbm = lgbm_model.predict_proba(X_test_lgbm)[:, 1]
proba_lgbm_cal = lgbm_cal.transform(proba_lgbm)

# ========== 5) 評估分類指標 ==========
def clf_metrics(y_true, p_raw, p_cal):
    out = {}
    out["auc_raw"] = roc_auc_score(y_true, p_raw)
    out["auc_cal"] = roc_auc_score(y_true, p_cal)
    out["brier_raw"] = brier_score_loss(y_true, p_raw)
    out["brier_cal"] = brier_score_loss(y_true, p_cal)
    for k, thr in [("acc", 0.5), ("prec", 0.5), ("rec", 0.5), ("f1", 0.5)]:
        y_hat = (p_cal >= thr).astype(int)
        if k == "acc":
            out[k] = accuracy_score(y_true, y_hat)
        elif k == "prec":
            out[k] = precision_score(y_true, y_hat, zero_division=0)
        elif k == "rec":
            out[k] = recall_score(y_true, y_hat, zero_division=0)
        elif k == "f1":
            out[k] = f1_score(y_true, y_hat, zero_division=0)
    return out

m_tcn = clf_metrics(y_test_tcn, proba_tcn, proba_tcn_cal)
m_lgbm = clf_metrics(y_test, proba_lgbm, proba_lgbm_cal)

# ========== 6) 策略回測（單期持有，多/空/休息） ==========
def backtest_long_short(prob_cal: np.ndarray, price_open: np.ndarray, price_close: np.ndarray, thresh=0.55):
    # 注意：TCN 的 y_test_tcn 與 t_test 有位移（因 lookback），回測對齊也需裁剪
    n = len(prob_cal)
    price_open = price_open[-n:]
    price_close = price_close[-n:]

    # 信號：>0.55 做多；<0.45 做空；否則 0
    sig = np.where(prob_cal > thresh, 1, np.where(prob_cal < 1 - thresh, -1, 0)).astype(int)
    # 報酬
    r_long = (price_close / price_open - 1.0)
    r_short = (price_open / price_close - 1.0)
    ret = np.where(sig > 0, r_long, np.where(sig < 0, r_short, 0.0))

    # 淨值、Sharpe、命中率、MDD
    equity = (1.0 + ret).cumprod()
    rets = ret
    mu = np.mean(rets)
    sd = np.std(rets, ddof=1) if np.std(rets, ddof=1) > 0 else np.nan
    sharpe = (mu / sd) * np.sqrt(ANNUAL_FREQ_6H) if sd == sd else np.nan

    # 命中率（有倉位期間）
    hits = ((sig > 0) & (r_long > 0)) | ((sig < 0) & (r_long < 0))
    hit_rate = hits[sig != 0].mean() if np.any(sig != 0) else np.nan

    # 最大回撤
    peak = np.maximum.accumulate(equity)
    mdd = np.nanmax((equity - peak) / peak)

    return dict(sharpe=float(sharpe), hit_rate=float(hit_rate), mdd=float(mdd), trades=int((sig!=0).sum()))

bt_tcn = backtest_long_short(proba_tcn_cal, price_o, price_c, THRESH)
bt_lgbm = backtest_long_short(proba_lgbm_cal, price_o, price_c, THRESH)

# ========== 7) 冠軍選拔 ==========
def choose_champion(m1, bt1, m2, bt2, name1="TCN", name2="LGBM"):
    score1 = (m1["auc_cal"]) + (bt1["sharpe"] if not np.isnan(bt1["sharpe"]) else 0)
    score2 = (m2["auc_cal"]) + (bt2["sharpe"] if not np.isnan(bt2["sharpe"]) else 0)
    return name1 if score1 >= score2 else name2

champion = choose_champion(m_tcn, bt_tcn, m_lgbm, bt_lgbm)

# ========== 8) 記錄 ==========
with mlflow.start_run(run_name="compare_backtest"):
    for k, v in m_tcn.items():
        mlflow.log_metric(f"tcn_{k}", float(v))
    for k, v in bt_tcn.items():
        mlflow.log_metric(f"tcn_bt_{k}", float(v))
    for k, v in m_lgbm.items():
        mlflow.log_metric(f"lgbm_{k}", float(v))
    for k, v in bt_lgbm.items():
        mlflow.log_metric(f"lgbm_bt_{k}", float(v))
    mlflow.set_tag("champion", champion)

print({
    "TCN": {"metrics": m_tcn, "backtest": bt_tcn},
    "LGBM": {"metrics": m_lgbm, "backtest": bt_lgbm},
    "champion": champion
})
