In [None]:
# -*- coding: utf-8 -*-
# 回測 + ATR 動態倉位控制 (1% 風險) + K 線/進出場/淨值一次性可視化（完整）

import numpy as np
import pandas as pd
import joblib
import mlflow
from pathlib import Path
from sklearn.metrics import (
    roc_auc_score, brier_score_loss, accuracy_score,
    precision_recall_fscore_support
)
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
import mplfinance as mpf

# ===================== 0) 參數與路徑 =====================
FEAT_PATH = Path("./data/feat_6h.parquet")
LABEL_PATH = Path("./data/label_6h.parquet")
ART_LGBM  = Path("./artifacts_lgbm")
THRESHOLD = 0.55
INIT_CAP  = 10_000
RISK_PERC = 0.01
ATR_WIN   = 14

mlflow.set_experiment("eval_backtest")

# ===================== 1) 資料 =====================
feat  = pd.read_parquet(FEAT_PATH)
label = pd.read_parquet(LABEL_PATH)

df = feat.merge(label, on="ts_utc", how="inner").sort_values("ts_utc").reset_index(drop=True)
times = pd.to_datetime(df["ts_utc"], utc=True)

# 目標與特徵
y = df["y_dir_6h"].astype(int).values
feature_cols = df.drop(columns=["ts_utc","y_dir_6h","y_tail_6h"]).select_dtypes(include=[np.number]).columns.tolist()
X = df[feature_cols].values

# 測試集切分（最後 90 天）
test_start_ts = times.max() - pd.Timedelta(days=90)
train_mask = times < test_start_ts
test_mask  = times >= test_start_ts

X_train, y_train = X[train_mask], y[train_mask]
X_test,  y_test  = X[test_mask],  y[test_mask]
df_test = df[test_mask].copy()

# ===================== 2) 載入/準備模型 =====================
# LGBM 與縮放/校準
lgbm_model      = joblib.load(ART_LGBM / "lgbm_model.pkl")
lgbm_calibrator = joblib.load(ART_LGBM / "lgbm_calibrator.pkl")
scaler_lgbm     = joblib.load(ART_LGBM / "scaler.pkl")

# Baseline Logistic Regression（僅比較用）
logreg_model = LogisticRegression(max_iter=200)
logreg_model.fit(X_train, y_train)

# ===================== 3) 預測 =====================
results = {}

# LGBM：用相同欄名避免特徵名警告
X_test_scaled = scaler_lgbm.transform(X_test)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=feature_cols, index=np.arange(len(X_test)))
proba_lgbm = lgbm_model.predict_proba(X_test_scaled_df)[:, 1]
proba_lgbm_cal = lgbm_calibrator.transform(proba_lgbm)
results["LGBM"] = (proba_lgbm, proba_lgbm_cal)

# LogReg（不縮放以外之額外處理，僅做對照）
proba_log = logreg_model.predict_proba(X_test)[:, 1]
results["LogReg"] = (proba_log, proba_log)

# ===================== 4) 指標計算與回測 =====================
def calc_atr(high, low, close, period=14):
    high  = pd.Series(high)
    low   = pd.Series(low)
    close = pd.Series(close)
    tr = pd.concat([
        high - low,
        (high - close.shift()).abs(),
        (low  - close.shift()).abs()
    ], axis=1).max(axis=1)
    atr = tr.rolling(period).mean()
    return atr.values

def backtest_strategy_atr(proba, X_df, threshold=0.55, init_capital=10_000, risk_perc=0.01, atr_period=14):
    # 找 open/high/low/close 欄位
    open_col  = [c for c in X_df.columns if "open"  in c][0]
    high_col  = [c for c in X_df.columns if "high"  in c][0]
    low_col   = [c for c in X_df.columns if "low"   in c][0]
    close_col = [c for c in X_df.columns if "close" in c][0]

    openp, highp, lowp, closep = (
        X_df[open_col].values,
        X_df[high_col].values,
        X_df[low_col].values,
        X_df[close_col].values,
    )

    atr = calc_atr(highp, lowp, closep, period=atr_period)
    atr[:atr_period] = np.nan

    signals, pos_sizes, entry_prices = [], [], []
    capital, equity_curve, rets = init_capital, [], []
    risk_capital = init_capital * risk_perc

    for i, p in enumerate(proba):
        sig = 1 if p > threshold else (-1 if p < 1-threshold else 0)
        signals.append(sig)

        if np.isnan(atr[i]) or sig == 0:
            equity_curve.append(capital)
            rets.append(0.0)
            pos_sizes.append(0.0)
            entry_prices.append(np.nan)
            continue

        pos_size = (risk_capital / atr[i]) if atr[i] > 0 else 0.0
        if sig == 1:   # 多：開於 Open，平於 Close
            pnl = pos_size * (closep[i] - openp[i])
            entry_prices.append(openp[i])
        else:          # 空：開於 Open，平於 Close
            pnl = pos_size * (openp[i] - closep[i])
            entry_prices.append(openp[i])

        capital += pnl
        rets.append(pnl / init_capital)   # 以初始資金為基數計報酬
        equity_curve.append(capital)
        pos_sizes.append(pos_size)

    equity_curve = np.array(equity_curve, dtype=float)
    rets = np.array(rets, dtype=float)
    mean_r, std_r = np.nanmean(rets), np.nanstd(rets)
    sharpe = (mean_r/std_r)*np.sqrt(1460) if std_r > 0 else 0.0
    roll_max = np.maximum.accumulate(np.nan_to_num(equity_curve, nan=capital))
    dd = (equity_curve - roll_max) / roll_max
    mdd = float(np.min(dd)) if len(dd) else 0.0

    return {
        "equity_curve": equity_curve,
        "rets": rets,
        "signals": np.array(signals, dtype=int),
        "pos_sizes": np.array(pos_sizes, dtype=float),
        "entry_prices": np.array(entry_prices, dtype=float),
        "sharpe": sharpe,
        "hit_rate": float(np.mean((rets > 0)[np.array(signals)!=0])) if np.any(np.array(signals)!=0) else 0.0,
        "max_drawdown": mdd,
        "final_capital": float(capital)
    }

metrics, equities = {}, {}
for model_name, (proba, proba_cal) in results.items():
    # 對齊切片
    X_slice = df_test.iloc[-len(proba_cal):]
    y_slice = y_test[-len(proba):]

    # 回測用校準後機率
    strat = backtest_strategy_atr(
        proba=proba_cal,
        X_df=X_slice,
        threshold=THRESHOLD,
        init_capital=INIT_CAP,
        risk_perc=RISK_PERC,
        atr_period=ATR_WIN
    )

    # 分類指標（用未校準機率衡量 AUC/Brier）
    auc   = roc_auc_score(y_slice, proba)
    brier = brier_score_loss(y_slice, proba)
    acc   = accuracy_score(y_slice, (proba >= 0.5).astype(int))
    prec, rec, f1, _ = precision_recall_fscore_support(y_slice, (proba >= 0.5).astype(int),
                                                       average="binary", zero_division=0)

    metrics[model_name] = {
        "auc": auc,
        "brier": brier,
        "acc": acc,
        "precision": prec,
        "recall": rec,
        "f1": f1,
        "sharpe": strat["sharpe"],
        "hit_rate": strat["hit_rate"],
        "max_drawdown": strat["max_drawdown"],
        "final_capital": strat["final_capital"]
    }
    equities[model_name] = strat["equity_curve"]

print("=== ATR Backtest Results (1% risk) ===")
for m, v in metrics.items():
    print(m, v)

champion = max(metrics, key=lambda m: (metrics[m]["auc"], metrics[m]["sharpe"]))
print(f"\nChampion Model: {champion}")

# ===================== 5) Equity 曲線對比（可選） =====================
plt.figure(figsize=(12,6))
for model_name, eq in equities.items():
    plt.plot(eq, label=f"{model_name} (Final=${eq[-1]:.2f})")
plt.title("Equity Curve Comparison (ATR 1% Risk, Initial=10k USD)")
plt.xlabel("Steps (6h intervals)")
plt.ylabel("Capital (USD)")
plt.legend()
plt.grid(True)
plt.show()

# ===================== 6) K 線 + 進出場 + 淨值一次性可視化 =====================
def _full_series(index, idx, vals):
    s = pd.Series(np.nan, index=index, dtype=float)
    if len(idx) > 0:
        s.iloc[idx] = vals
    return s

def plot_kline_with_signals_and_equity(X_df, strat, model_name="Model"):
    # K 線 DataFrame
    df_plot = X_df[["ts_utc","price_open","price_high","price_low","price_close","price_volume_usd"]].copy()
    df_plot.index = pd.to_datetime(df_plot["ts_utc"], utc=True)
    df_plot = df_plot.rename(columns={
        "price_open":"Open","price_high":"High","price_low":"Low",
        "price_close":"Close","price_volume_usd":"Volume"
    })[["Open","High","Low","Close","Volume"]]
    n = len(df_plot)
    idx = df_plot.index

    # 對齊長度
    eq          = np.asarray(strat["equity_curve"], dtype=float)
    signals     = np.asarray(strat["signals"], dtype=int)
    entry_price = np.asarray(strat["entry_prices"], dtype=float)
    pos_sizes   = np.asarray(strat["pos_sizes"], dtype=float)

    if len(eq) != n:
        if len(eq) > n:
            eq          = eq[-n:]
            signals     = signals[-n:]
            entry_price = entry_price[-n:]
            pos_sizes   = pos_sizes[-n:]
        else:
            pad = n - len(eq)
            eq          = np.r_[np.full(pad, np.nan), eq]
            signals     = np.r_[np.zeros(pad, dtype=int), signals]
            entry_price = np.r_[np.full(pad, np.nan), entry_price]
            pos_sizes   = np.r_[np.full(pad, np.nan), pos_sizes]

    # 進出場標記
    long_idx  = np.where(signals == 1)[0]
    short_idx = np.where(signals == -1)[0]
    trade_idx = np.where(signals != 0)[0]

    long_entry  = _full_series(idx, long_idx,  entry_price[long_idx])
    short_entry = _full_series(idx, short_idx, entry_price[short_idx])
    exit_series = _full_series(idx, trade_idx, df_plot["Close"].values[trade_idx])  # 同根 K 收盤視為平倉位

    # 淨值副圖
    eq_series = pd.Series(eq, index=idx, dtype=float)

    apds = [
        mpf.make_addplot(eq_series, panel=1, color="blue", ylabel="Equity"),
        mpf.make_addplot(long_entry,  type="scatter", markersize=70, marker="^", color="green"),
        mpf.make_addplot(short_entry, type="scatter", markersize=70, marker="v", color="red"),
        mpf.make_addplot(exit_series, type="scatter", markersize=40, marker="x", color="gray"),
    ]

    fig, axes = mpf.plot(
        df_plot, type="candle", style="yahoo", volume=True,
        addplot=apds, title=f"{model_name} - KLine + Entries/Exits + Equity",
        figscale=1.2, figratio=(16,9), panel_ratios=(3,1),
        warn_too_much_data=10**9, returnfig=True
    )

    # 在主圖標註倉位大小
    ax_main = axes[0]
    for i in long_idx:
        ax_main.text(idx[i], entry_price[i], f"{pos_sizes[i]:.0f}",
                     color="green", fontsize=8, ha="center", va="bottom")
    for i in short_idx:
        ax_main.text(idx[i], entry_price[i], f"{pos_sizes[i]:.0f}",
                     color="red", fontsize=8, ha="center", va="top")

    plt.show()

# ===== Champion 可視化（縮到最後 N 根 + 強制輸出顯示） =====
from IPython.display import Image, display

def plot_kline_with_signals_and_equity(X_df, strat, model_name="Model", save_path="kline_equity.png"):
    df_plot = X_df[["ts_utc","price_open","price_high","price_low","price_close","price_volume_usd"]].copy()
    df_plot.index = pd.to_datetime(df_plot["ts_utc"], utc=True).dt.tz_convert(None)  # 轉成 tz-naive
    df_plot = df_plot.rename(columns={
        "ts_utc":"Time","price_open":"Open","price_high":"High","price_low":"Low",
        "price_close":"Close","price_volume_usd":"Volume"
    })[["Open","High","Low","Close","Volume"]]

    n   = len(df_plot)
    idx = df_plot.index

    eq          = np.asarray(strat["equity_curve"], dtype=float)
    signals     = np.asarray(strat["signals"], dtype=int)
    entry_price = np.asarray(strat["entry_prices"], dtype=float)
    pos_sizes   = np.asarray(strat["pos_sizes"], dtype=float)

    # 對齊長度
    if len(eq) != n:
        if len(eq) > n:
            eq, signals, entry_price, pos_sizes = eq[-n:], signals[-n:], entry_price[-n:], pos_sizes[-n:]
        else:
            pad = n - len(eq)
            eq          = np.r_[np.full(pad, np.nan), eq]
            signals     = np.r_[np.zeros(pad, dtype=int), signals]
            entry_price = np.r_[np.full(pad, np.nan), entry_price]
            pos_sizes   = np.r_[np.full(pad, np.nan), pos_sizes]

    def _full_series(index, idxs, vals):
        s = pd.Series(np.nan, index=index, dtype=float)
        if len(idxs) > 0: s.iloc[idxs] = vals
        return s

    long_idx  = np.where(signals == 1)[0]
    short_idx = np.where(signals == -1)[0]
    trade_idx = np.where(signals != 0)[0]

    long_entry  = _full_series(idx, long_idx,  entry_price[long_idx])
    short_entry = _full_series(idx, short_idx, entry_price[short_idx])
    exit_series = _full_series(idx, trade_idx, df_plot["Close"].values[trade_idx])

    apds = [
        mpf.make_addplot(pd.Series(eq, index=idx), panel=1, color="blue", ylabel="Equity"),
        mpf.make_addplot(long_entry,  type="scatter", markersize=70, marker="^", color="green"),
        mpf.make_addplot(short_entry, type="scatter", markersize=70, marker="v", color="red"),
        mpf.make_addplot(exit_series, type="scatter", markersize=40, marker="x", color="gray"),
    ]

    fig, axes = mpf.plot(
        df_plot, type="candle", style="yahoo", volume=True,
        addplot=apds, title=f"{model_name} - KLine + Entries/Exits + Equity",
        figscale=1.2, figratio=(16,9), panel_ratios=(3,1),
        warn_too_much_data=10**9, returnfig=True,
        savefig=dict(fname=save_path, dpi=180, bbox_inches="tight")
    )
    plt.close(fig)  # 先關閉，再用檔案顯示
    display(Image(filename=save_path))

# 只視覺化最後 N 根（避免太擁擠）
champ_proba = results[champion][1]
X_slice     = df_test.iloc[-len(champ_proba):]
strat_champ = backtest_strategy_atr(
    champ_proba, X_slice,
    threshold=THRESHOLD, init_capital=INIT_CAP, risk_perc=RISK_PERC, atr_period=ATR_WIN
)

N = 360  # 想看更多就調大
X_vis = X_slice.tail(N).copy()
strat_vis = {
    k: (np.asarray(v)[-N:] if isinstance(v, (list, np.ndarray)) else v)
    for k, v in strat_champ.items()
}
plot_kline_with_signals_and_equity(X_vis, strat_vis, model_name=champion, save_path="kline_equity.png")
