# (Legacy) LSTM.ipynb

此 notebook 属于阶段性改进版本。通用代码已迁移到 `fyp_trading/`，经典策略对比请看 `strategy_*.ipynb`。



In [None]:
# %% [markdown]
# # 固定窗口 LSTM（多类别）改进版：显著涨跌识别 & 策略回测
#
# 关键改动：
# - **阈值选择改用 Macro-F1 / Balanced Accuracy**，避免准确率误导。
# - **类别加权交叉熵 / 可选 Focal Loss**，缓解极度不平衡。
# - **支持波动自适应阈值**，保证极端事件密度稳定。
# - **交易置信阈 + 最小持有期**，减少噪声交易。
# - 输出更丰富的诊断信息（类分布、置信阈策略表现等）。

# %%
import warnings
warnings.filterwarnings("ignore")

import os, sys, math, random, copy
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional

import numpy as np
import pandas as pd

from datetime import datetime, timedelta

import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('seaborn-v0_8-whitegrid') if 'seaborn-v0_8-whitegrid' in plt.style.available else plt.style.use('ggplot')
sns.set_palette("Set2")

import yfinance as yf

from sklearn.metrics import (
    accuracy_score,
    balanced_accuracy_score,
    f1_score,
    classification_report,
    confusion_matrix,
)

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, WeightedRandomSampler
from torch.optim.lr_scheduler import ReduceLROnPlateau

# 设备与随机种子
SEED = 42
np.random.seed(SEED)
random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

print("Device:", device)

In [None]:
# %% [markdown]
# ## 1. 参数总览

# %%
@dataclass
class LabelingConfig:
    use_dynamic_threshold: bool = True   # True 表示使用“k × rolling volatility”的动态阈值
    vol_window: int = 20                 # 动态阈值的波动窗口
    min_vol: float = 1e-4                # 阈值下限，防止过小
    threshold_grid: Optional[np.ndarray] = None  # 阈值候选（若 None，将在运行时根据 use_dynamic_threshold 设置）


@dataclass
class TrainConfig:
    epochs: int = 120
    batch_size: int = 64
    patience: int = 15
    lr: float = 1e-3
    weight_decay: float = 1e-4
    lr_factor: float = 0.5
    lr_patience: int = 8
    lr_min: float = 1e-5
    grad_clip: float = 1.0
    verbose: bool = False

    use_focal_loss: bool = True         # True 则使用 Focal Loss
    focal_gamma: float = 1.0
    use_weighted_sampler: bool = False
    min_class_freq: int = 5              # 每个类别最少样本数要求


@dataclass
class StrategyConfig:
    trade_proba_threshold: float = 0.55  # 交易置信阈
    min_holding_period: int = 2          # 最少持有交易日（>=1）
    transaction_cost_bp: float = 2.0     # 交易成本（基点）


@dataclass
class PipelineConfig:
    ticker: str = "2800.HK"
    period: str = "3y"                   # 数据区间，可按需调整
    interval: str = "1d"
    lookback: int = 50
    train_window: int = 400
    val_size: int = 21
    test_size: int = 63
    step_size: Optional[int] = 21
    desired_macro_f1: float = 0.40       # 基准（可做报警），非硬门槛


LABEL_CFG = LabelingConfig()
TRAIN_CFG = TrainConfig()
STRATEGY_CFG = StrategyConfig()
PIPE_CFG = PipelineConfig()

if LABEL_CFG.threshold_grid is None:
    if LABEL_CFG.use_dynamic_threshold:
        LABEL_CFG.threshold_grid = np.round(np.linspace(0.6, 1.2, 13), 3)  # k 倍波动
    else:
        LABEL_CFG.threshold_grid = np.round(np.linspace(0.004, 0.012, 17), 4)

if PIPE_CFG.step_size is None:
    PIPE_CFG.step_size = PIPE_CFG.test_size

In [None]:
# %% [markdown]
# ## 2. 数据获取

# %%
def fetch_prices(ticker: str = "2800.HK", period: str = "1y", interval: str = "1d") -> pd.DataFrame:
    tk = yf.Ticker(ticker)
    df = tk.history(period=period, interval=interval, auto_adjust=True)
    if df is None or df.empty:
        end = pd.Timestamp.today().normalize()
        start = end - pd.Timedelta(days=3 * 370)
        df = tk.history(start=start, end=end, interval=interval, auto_adjust=True)
    if df is None or df.empty:
        raise RuntimeError(f"无法获取 {ticker} 数据。")
    df.index = pd.to_datetime(df.index).tz_localize(None)
    return df[["Open", "High", "Low", "Close", "Volume"]].copy()

df_raw = fetch_prices(ticker=PIPE_CFG.ticker, period=PIPE_CFG.period, interval=PIPE_CFG.interval)
print(f"数据范围: {df_raw.index.min().date()} ~ {df_raw.index.max().date()}, 共 {len(df_raw)} 根K线")
display(df_raw.tail())

# %% [markdown]
# ## 3. 特征工程
# %%
def calculate_rsi(series: pd.Series, period: int = 14) -> pd.Series:
    delta = series.diff()
    up = delta.clip(lower=0)
    down = -delta.clip(upper=0)
    roll_up = up.ewm(alpha=1 / period, adjust=False).mean()
    roll_down = down.ewm(alpha=1 / period, adjust=False).mean().replace(0, np.nan)
    rs = roll_up / roll_down
    return 100 - (100 / (1 + rs))


def calculate_macd(close: pd.Series, fast=12, slow=26, signal=9):
    ema_fast = close.ewm(span=fast, adjust=False).mean()
    ema_slow = close.ewm(span=slow, adjust=False).mean()
    macd_line = ema_fast - ema_slow
    signal_line = macd_line.ewm(span=signal, adjust=False).mean()
    histogram = macd_line - signal_line
    return macd_line, signal_line, histogram


def build_features(df: pd.DataFrame, horizon: int = 1, use_log_return: bool = True):
    close = df["Close"].copy()
    feat = pd.DataFrame(index=df.index)

    feat["ret_1d"] = close.pct_change(1)
    feat["ret_2d"] = close.pct_change(2)
    feat["ret_5d"] = close.pct_change(5)
    feat["ret_10d"] = close.pct_change(10)

    feat["vol_5d"] = feat["ret_1d"].rolling(5).std()
    feat["vol_10d"] = feat["ret_1d"].rolling(10).std()
    feat["vol_20d"] = feat["ret_1d"].rolling(20).std()

    sma_5 = close.rolling(5).mean()
    sma_10 = close.rolling(10).mean()
    sma_20 = close.rolling(20).mean()
    sma_50 = close.rolling(50).mean()

    feat["close_to_sma5"] = close.shift(1) / sma_5 - 1
    feat["close_to_sma10"] = close.shift(1) / sma_10 - 1
    feat["close_to_sma20"] = close.shift(1) / sma_20 - 1
    feat["sma5_sma10"] = sma_5 / sma_10 - 1
    feat["sma10_sma20"] = sma_10 / sma_20 - 1

    feat["rsi_14"] = calculate_rsi(close.shift(1), 14)
    macd_line, signal_line, histogram = calculate_macd(close.shift(1))
    feat["macd"] = macd_line
    feat["macd_signal"] = signal_line
    feat["macd_hist"] = histogram

    bb_period = 20
    bb_std = close.rolling(bb_period).std()
    bb_mean = close.rolling(bb_period).mean()
    feat["bb_upper"] = (bb_mean + 2 * bb_std - close.shift(1)) / close.shift(1)
    feat["bb_lower"] = (close.shift(1) - (bb_mean - 2 * bb_std)) / close.shift(1)

    if "Volume" in df.columns:
        vol_roll = df["Volume"].rolling(20).mean().shift(1)
        feat["volume_ratio"] = (df["Volume"].shift(1) / vol_roll)
        feat["volume_change"] = df["Volume"].pct_change().shift(1)

    feat["day_of_week"] = df.index.dayofweek
    feat["month"] = df.index.month
    feat["quarter"] = df.index.quarter

    for lag in [1, 2, 3, 5, 10]:
        feat[f"ret_lag_{lag}"] = feat["ret_1d"].shift(lag)

    if use_log_return:
        next_ret = np.log(close.shift(-horizon) / close)
    else:
        next_ret = close.pct_change(horizon).shift(-horizon)

    feat = feat.replace([np.inf, -np.inf], np.nan)
    data = feat.copy()
    data["next_return"] = next_ret

    data = data.dropna().copy()
    feature_cols = [c for c in data.columns if c != "next_return"]
    return data, feature_cols

feat_df, feature_cols = build_features(df_raw, horizon=1, use_log_return=True)
print(f"清洗后样本数: {len(feat_df)}, 特征数: {len(feature_cols)}")
display(feat_df.tail())

In [None]:
 ## 4. 标签生成（支持动态阈值）

# %%
CLASS_ID_DOWN = 0
CLASS_ID_NEUTRAL = 1
CLASS_ID_UP = 2
NUM_CLASSES = 3

def apply_labeling(feat_df: pd.DataFrame,
                   base_threshold: float,
                   label_cfg: LabelingConfig) -> pd.Series:
    thresholds = pd.Series(base_threshold, index=feat_df.index, dtype=float)
    if label_cfg.use_dynamic_threshold:
        roll_vol = feat_df["ret_1d"].rolling(label_cfg.vol_window).std().shift(1)
        thresholds = (base_threshold * roll_vol).clip(lower=label_cfg.min_vol)
    labels = pd.Series(np.nan, index=feat_df.index, dtype=float)
    labels[feat_df["next_return"] >= thresholds] = CLASS_ID_UP
    labels[feat_df["next_return"] <= -thresholds] = CLASS_ID_DOWN
    labels[(feat_df["next_return"] > -thresholds) & (feat_df["next_return"] < thresholds)] = CLASS_ID_NEUTRAL
    return labels


def make_sequences(
    feat_df: pd.DataFrame,
    feature_cols: List[str],
    lookback: int,
    label_cols: List[str]
) -> Tuple[np.ndarray, Dict[str, np.ndarray], pd.DatetimeIndex]:
    X_2d = feat_df[feature_cols].astype(np.float32).values
    dates = feat_df.index
    N = len(feat_df)
    if N < lookback:
        raise ValueError(f"样本不足: N={N} < lookback={lookback}")

    X_list = []
    labels_dict = {col: [] for col in label_cols}
    idx_list = []

    for end in range(lookback - 1, N):
        start = end - lookback + 1
        X_list.append(X_2d[start:end + 1])
        idx_list.append(dates[end])
        for col in label_cols:
            labels_dict[col].append(feat_df.iloc[end][col])

    X_seq = np.stack(X_list).astype(np.float32)
    labels_dict = {col: np.asarray(vals) for col, vals in labels_dict.items()}
    seq_index = pd.DatetimeIndex(idx_list)
    return X_seq, labels_dict, seq_index

# %% [markdown]
# ## 5. 标准化工具

# %%
def fit_scaler_3d(X_train: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    assert X_train.ndim == 3
    flat = X_train.reshape(-1, X_train.shape[-1])
    mean = flat.mean(axis=0)
    std = flat.std(axis=0)
    std = np.where(std < 1e-12, 1e-12, std)
    return mean.astype(np.float32), std.astype(np.float32)

def transform_3d(X: np.ndarray, mean: np.ndarray, std: np.ndarray) -> np.ndarray:
    return (X - mean[None, None, :]) / std[None, None, :]

In [None]:
# %% [markdown]
# # ## 6. LSTM 模型与损失
# %%
class LSTMMultiClassifier(nn.Module):
    def __init__(self, input_size: int, hidden_size: int = 64, num_layers: int = 2, dropout: float = 0.4):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
        )
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.act = nn.ReLU()
        self.out = nn.Linear(hidden_size, NUM_CLASSES)

    def forward(self, x):
        out, _ = self.lstm(x)
        h = out[:, -1, :]
        h = self.dropout(h)
        h = self.act(self.fc1(h))
        logits = self.out(h)
        return logits


class FocalLoss(nn.Module):
    def __init__(self, alpha: Optional[torch.Tensor] = None, gamma: float = 2.0, reduction: str = "mean"):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, logits, target):
        ce = nn.functional.cross_entropy(logits, target, weight=self.alpha, reduction="none")
        pt = torch.exp(-ce)
        loss = ((1 - pt) ** self.gamma) * ce
        if self.reduction == "mean":
            return loss.mean()
        elif self.reduction == "sum":
            return loss.sum()
        return loss

def make_loader_classification(
    X: np.ndarray,
    y: np.ndarray,
    batch_size: int,
    shuffle: bool,
    sample_weights: Optional[np.ndarray] = None,
    use_weighted_sampler: bool = False
) -> DataLoader:
    X_t = torch.from_numpy(X.astype(np.float32))
    y_t = torch.from_numpy(y.astype(np.int64))
    ds = TensorDataset(X_t, y_t)
    if use_weighted_sampler and sample_weights is not None:
        sampler = WeightedRandomSampler(
            weights=torch.from_numpy(sample_weights.astype(np.float32)),
            num_samples=len(sample_weights),
            replacement=True
        )
        return DataLoader(ds, batch_size=batch_size, sampler=sampler,
                          num_workers=0, pin_memory=torch.cuda.is_available())
    return DataLoader(ds, batch_size=batch_size, shuffle=shuffle,
                      num_workers=0, pin_memory=torch.cuda.is_available())

def train_classifier(model: nn.Module,
                     train_loader: DataLoader,
                     val_loader: DataLoader,
                     cfg: TrainConfig,
                     criterion: nn.Module) -> Tuple[nn.Module, Dict[str, List[float]]]:
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay)
    try:
        scheduler = ReduceLROnPlateau(
            optimizer, mode='min',
            factor=cfg.lr_factor, patience=cfg.lr_patience,
            min_lr=cfg.lr_min
        )
    except TypeError:
        scheduler = ReduceLROnPlateau(
            optimizer, mode='min',
            factor=cfg.lr_factor, patience=cfg.lr_patience
        )

    best_state = copy.deepcopy(model.state_dict())
    best_val = float('inf')
    no_improve = 0
    history = {"train_loss": [], "val_loss": []}

    for epoch in range(1, cfg.epochs + 1):
        model.train()
        train_loss = 0.0
        for xb, yb in train_loader:
            xb = xb.to(device, non_blocking=True)
            yb = yb.to(device, non_blocking=True)
            optimizer.zero_grad()
            logits = model(xb)
            loss = criterion(logits, yb)
            loss.backward()
            if cfg.grad_clip is not None:
                nn.utils.clip_grad_norm_(model.parameters(), cfg.grad_clip)
            optimizer.step()
            train_loss += loss.item() * xb.size(0)
        train_loss /= len(train_loader.dataset)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb = xb.to(device, non_blocking=True)
                yb = yb.to(device, non_blocking=True)
                logits = model(xb)
                loss = criterion(logits, yb)
                val_loss += loss.item() * xb.size(0)
        val_loss /= len(val_loader.dataset)
        scheduler.step(val_loss)

        history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss)

        if cfg.verbose and (epoch % 10 == 0 or epoch == 1):
            cur_lr = optimizer.param_groups[0]["lr"]
            print(f"Epoch {epoch:03d} | train {train_loss:.6f} | val {val_loss:.6f} | lr {cur_lr:.2e}")

        if val_loss + 1e-12 < best_val:
            best_val = val_loss
            best_state = copy.deepcopy(model.state_dict())
            no_improve = 0
        else:
            no_improve += 1
            if no_improve >= cfg.patience:
                if cfg.verbose:
                    print(f"Early stop at epoch {epoch}, best_val={best_val:.6f}")
                break

    model.load_state_dict(best_state)
    return model, history

def predict_logits(model: nn.Module, X: np.ndarray, batch_size: int = 256) -> np.ndarray:
    model = model.to(device)
    model.eval()
    outputs = []
    with torch.no_grad():
        for i in range(0, len(X), batch_size):
            xb = torch.from_numpy(X[i:i + batch_size].astype(np.float32)).to(device)
            logits = model(xb)
            outputs.append(logits.cpu().numpy())
    return np.concatenate(outputs, axis=0)

def softmax_np(logits: np.ndarray) -> np.ndarray:
    logits = logits - logits.max(axis=1, keepdims=True)
    exp = np.exp(logits)
    return exp / exp.sum(axis=1, keepdims=True)

# %% [markdown]
# ## 7. 固定窗口交叉验证（多类别 + 类别权重）

# %%
def fixed_window_cv_lstm_classifier(
    X_seq_all: np.ndarray,
    y_all: np.ndarray,
    returns_all: np.ndarray,
    seq_index: pd.DatetimeIndex,
    test_size: int,
    train_window: int,
    val_size: int,
    step_size: int,
    cfg: TrainConfig,
    collect_predictions: bool = False,
    verbose: bool = False,
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    metrics = []
    preds_list = []
    N = len(X_seq_all)
    start_index = train_window + val_size
    fold_id = 0

    for test_start in range(start_index, N - test_size + 1, step_size):
        train_end = test_start
        val_start = train_end - val_size
        train_start = val_start - train_window
        test_end = test_start + test_size
        if train_start < 0:
            continue

        X_train = X_seq_all[train_start:val_start]
        y_train = y_all[train_start:val_start]
        X_val = X_seq_all[val_start:train_end]
        y_val = y_all[val_start:train_end]
        X_test = X_seq_all[test_start:test_end]
        y_test = y_all[test_start:test_end]
        r_test = returns_all[test_start:test_end]
        test_dates = seq_index[test_start:test_end]

        train_counts = np.bincount(y_train, minlength=NUM_CLASSES).astype(np.float32)
        val_counts = np.bincount(y_val, minlength=NUM_CLASSES).astype(np.float32)
        test_counts = np.bincount(y_test, minlength=NUM_CLASSES).astype(np.float32)

        if (train_counts < cfg.min_class_freq).any():
            if verbose:
                print(f"Fold {fold_id} 训练集类别不足，跳过。Counts={train_counts}")
            fold_id += 1
            continue

        mean, std = fit_scaler_3d(X_train)
        X_train_sc = transform_3d(X_train, mean, std)
        X_val_sc = transform_3d(X_val, mean, std)
        X_test_sc = transform_3d(X_test, mean, std)

        weights_per_class = train_counts.sum() / (NUM_CLASSES * np.maximum(train_counts, 1.0))
        class_weights_tensor = torch.tensor(weights_per_class, device=device).float()

        sample_weights = None
        if cfg.use_weighted_sampler:
            sample_weights = weights_per_class[y_train]

        train_loader = make_loader_classification(
            X_train_sc, y_train, cfg.batch_size,
            shuffle=not cfg.use_weighted_sampler,
            sample_weights=sample_weights,
            use_weighted_sampler=cfg.use_weighted_sampler
        )
        val_loader = make_loader_classification(
            X_val_sc, y_val, cfg.batch_size, shuffle=False
        )

        model = LSTMMultiClassifier(input_size=X_train_sc.shape[-1], hidden_size=48, dropout=0.4)
        if cfg.use_focal_loss:
            criterion = FocalLoss(alpha=class_weights_tensor, gamma=2.0)
        else:
            criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)

        model, _ = train_classifier(model, train_loader, val_loader, cfg, criterion)

        logits_test = predict_logits(model, X_test_sc, batch_size=256)
        proba_test = softmax_np(logits_test)
        y_pred = proba_test.argmax(axis=1)

        acc = accuracy_score(y_test, y_pred)
        try:
            bacc = balanced_accuracy_score(y_test, y_pred)
        except Exception:
            bacc = np.nan
        macro_f1 = f1_score(y_test, y_pred, average="macro", zero_division=0)
        f1_per_class = f1_score(y_test, y_pred, average=None, labels=[0, 1, 2], zero_division=0)

        fold_metrics = {
            "fold": fold_id,
            "acc": acc,
            "bacc": bacc,
            "macro_f1": macro_f1,
            "train_size": len(X_train_sc),
            "val_size": len(X_val_sc),
            "test_size": len(X_test_sc),
            "test_start": test_dates[0],
            "test_end": test_dates[-1],
            "train_cls_0": train_counts[0],
            "train_cls_1": train_counts[1],
            "train_cls_2": train_counts[2],
            "test_cls_0": test_counts[0],
            "test_cls_1": test_counts[1],
            "test_cls_2": test_counts[2],
            "f1_down": f1_per_class[0],
            "f1_neutral": f1_per_class[1],
            "f1_up": f1_per_class[2],
        }
        metrics.append(fold_metrics)

        if collect_predictions:
            fold_pred = pd.DataFrame({
                "fold": fold_id,
                "date": test_dates,
                "actual_class": y_test,
                "pred_class": y_pred,
                "actual_return": r_test,
            })
            for cls in range(NUM_CLASSES):
                fold_pred[f"proba_{cls}"] = proba_test[:, cls]
            preds_list.append(fold_pred)

        fold_id += 1
        del model
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    metrics_df = pd.DataFrame(metrics)
    preds_df = pd.concat(preds_list, ignore_index=True) if (collect_predictions and preds_list) else pd.DataFrame()
    if verbose:
        print(f"共生成 {len(metrics_df)} 个折。")
    return metrics_df, preds_df

# %% [markdown]
# ## 8. 阈值搜索与选择（Macro-F1 优先）

# %%
def run_threshold_search(
    feat_df: pd.DataFrame,
    feature_cols: List[str],
    lookback: int,
    thresholds: List[float],
    cfg: PipelineConfig,
    train_cfg: TrainConfig,
    label_cfg: LabelingConfig,
) -> pd.DataFrame:
    records = []
    for thr in thresholds:
        labeled_df = feat_df.copy()
        labeled_df["target_class"] = apply_labeling(labeled_df, thr, label_cfg)
        labeled_df = labeled_df.dropna(subset=["target_class"]).copy()
        labeled_df["target_class"] = labeled_df["target_class"].astype(int)

        class_counts = labeled_df["target_class"].value_counts().reindex([0,1,2], fill_value=0)
        if (class_counts.values < train_cfg.min_class_freq).any():
            continue

        X_seq, labels_dict, seq_index = make_sequences(
            labeled_df,
            feature_cols=feature_cols,
            lookback=lookback,
            label_cols=["target_class", "next_return"]
        )
        y_all = labels_dict["target_class"].astype(np.int64)
        returns_all = labels_dict["next_return"].astype(np.float32)

        try:
            cv_metrics, _ = fixed_window_cv_lstm_classifier(
                X_seq, y_all, returns_all, seq_index,
                test_size=cfg.test_size,
                train_window=cfg.train_window,
                val_size=cfg.val_size,
                step_size=cfg.step_size,
                cfg=train_cfg,
                collect_predictions=False,
                verbose=False
            )
        except ValueError:
            continue

        if cv_metrics.empty:
            continue

        record = {
            "threshold": thr,
            "mean_acc": cv_metrics["acc"].mean(),
            "std_acc": cv_metrics["acc"].std(ddof=0),
            "mean_bacc": cv_metrics["bacc"].mean(),
            "mean_macro_f1": cv_metrics["macro_f1"].mean(),
            "mean_f1_down": cv_metrics["f1_down"].mean(),
            "mean_f1_up": cv_metrics["f1_up"].mean(),
            "folds": len(cv_metrics),
            "avg_train_cls_0": cv_metrics["train_cls_0"].mean(),
            "avg_train_cls_1": cv_metrics["train_cls_1"].mean(),
            "avg_train_cls_2": cv_metrics["train_cls_2"].mean(),
        }
        records.append(record)

    results_df = pd.DataFrame(records)
    if not results_df.empty:
        results_df = results_df.sort_values(
            ["mean_macro_f1", "mean_bacc", "std_acc"],
            ascending=[False, False, True]
        ).reset_index(drop=True)
    return results_df

def select_best_threshold(results_df: pd.DataFrame) -> pd.Series:
    if results_df.empty:
        raise ValueError("阈值搜索无可用结果，请调整阈值范围或窗口参数。")
    return results_df.iloc[0]

# %% [markdown]
# ## 9. 分类结果汇总工具

# %%
def summarize_classification(preds_df: pd.DataFrame, class_names: List[str]) -> Dict[str, object]:
    y_true = preds_df["actual_class"].astype(int).values
    y_pred = preds_df["pred_class"].astype(int).values

    overall = {
        "accuracy": accuracy_score(y_true, y_pred),
        "balanced_accuracy": balanced_accuracy_score(y_true, y_pred),
        "macro_f1": f1_score(y_true, y_pred, average="macro", zero_division=0)
    }
    report = classification_report(
        y_true, y_pred,
        labels=[CLASS_ID_DOWN, CLASS_ID_NEUTRAL, CLASS_ID_UP],
        target_names=class_names,
        digits=3,
        zero_division=0
    )
    conf = confusion_matrix(
        y_true, y_pred,
        labels=[CLASS_ID_DOWN, CLASS_ID_NEUTRAL, CLASS_ID_UP]
    )
    conf_df = pd.DataFrame(conf, index=class_names, columns=class_names)

    overall["classification_report"] = report
    overall["confusion_matrix"] = conf_df
    return overall

# %% [markdown]
# ## 10. 交易策略：置信阈 & 最小持有期

# %%
def generate_positions_from_proba(proba: np.ndarray,
                                  cfg: StrategyConfig) -> np.ndarray:
    proba_down = proba[:, CLASS_ID_DOWN]
    proba_neutral = proba[:, CLASS_ID_NEUTRAL]
    proba_up = proba[:, CLASS_ID_UP]

    raw_signal = np.zeros(len(proba), dtype=int)
    up_mask = (proba_up >= cfg.trade_proba_threshold) & (proba_up >= proba_down)
    down_mask = (proba_down >= cfg.trade_proba_threshold) & (proba_down > proba_up)
    raw_signal[up_mask] = 1
    raw_signal[down_mask] = -1

    positions = np.zeros(len(proba), dtype=int)
    holding = 0
    current_pos = 0

    for i in range(len(proba)):
        if holding > 0:
            positions[i] = current_pos
            holding -= 1
            continue

        new_signal = raw_signal[i]
        if new_signal != 0:
            current_pos = new_signal
            positions[i] = current_pos
            holding = max(cfg.min_holding_period - 1, 0)
        else:
            current_pos = 0
            positions[i] = 0
            holding = 0

    return positions

def simulate_significant_strategy(
    preds_df: pd.DataFrame,
    strategy_cfg: StrategyConfig
) -> Tuple[pd.DataFrame, Dict[str, object]]:
    if preds_df.empty:
        raise ValueError("预测结果为空，无法回测策略。")

    df = preds_df.sort_values("date").copy()
    proba_cols = [c for c in df.columns if c.startswith("proba_")]
    proba_array = df[proba_cols].values
    df["position"] = generate_positions_from_proba(proba_array, strategy_cfg)

    df["simple_return"] = np.exp(df["actual_return"]) - 1.0
    df["strategy_simple_return"] = df["position"] * df["simple_return"]

    position_change = df["position"].diff().abs()
    position_change.iloc[0] = abs(df["position"].iloc[0])
    cost_rate = strategy_cfg.transaction_cost_bp / 10000.0
    df["transaction_cost"] = position_change * cost_rate
    df["strategy_simple_return_after_cost"] = df["strategy_simple_return"] - df["transaction_cost"]

    df["strategy_equity"] = (1 + df["strategy_simple_return_after_cost"]).cumprod()
    df["buyhold_equity"] = (1 + df["simple_return"]).cumprod()

    df["strategy_equity"] = df["strategy_equity"].replace([np.inf, -np.inf], np.nan).fillna(method="ffill")
    df["buyhold_equity"] = df["buyhold_equity"].replace([np.inf, -np.inf], np.nan).fillna(method="ffill")

    stats: Dict[str, object] = {}
    total_days = len(df)
    if total_days == 0:
        raise ValueError("无交易样本。")

    strategy_final = df["strategy_equity"].iloc[-1]
    buyhold_final = df["buyhold_equity"].iloc[-1]
    stats["total_return"] = strategy_final - 1
    stats["buyhold_total_return"] = buyhold_final - 1

    years = max(total_days / 252, 1 / 252)
    stats["annualized_return"] = strategy_final ** (1 / years) - 1
    stats["buyhold_annual_return"] = buyhold_final ** (1 / years) - 1

    daily_mean = df["strategy_simple_return_after_cost"].mean()
    daily_std = df["strategy_simple_return_after_cost"].std()
    stats["annualized_volatility"] = daily_std * np.sqrt(252)
    stats["sharpe_ratio"] = (daily_mean / daily_std) * np.sqrt(252) if daily_std > 1e-8 else np.nan

    cum_max = df["strategy_equity"].cummax()
    drawdown = df["strategy_equity"] / cum_max - 1
    stats["max_drawdown"] = drawdown.min()

    trade_mask = df["position"] != 0
    if trade_mask.any():
        stats["hit_rate_on_trades"] = (df.loc[trade_mask, "actual_class"] == df.loc[trade_mask, "pred_class"]).mean()
        stats["avg_trade_return"] = df.loc[trade_mask, "strategy_simple_return_after_cost"].mean()
        stats["num_trading_days"] = int(trade_mask.sum())
    else:
        stats["hit_rate_on_trades"] = np.nan
        stats["avg_trade_return"] = np.nan
        stats["num_trading_days"] = 0

    pos_dist = df["position"].value_counts(normalize=True).sort_index()
    stats["position_distribution"] = {
        "short_pct": pos_dist.get(-1, 0.0),
        "flat_pct": pos_dist.get(0, 0.0),
        "long_pct": pos_dist.get(1, 0.0),
    }
    stats["transaction_cost_bp"] = strategy_cfg.transaction_cost_bp
    stats["trade_proba_threshold"] = strategy_cfg.trade_proba_threshold
    stats["min_holding_period"] = strategy_cfg.min_holding_period

    return df, stats

def plot_equity_curves(sim_df: pd.DataFrame, threshold: float, class_names: List[str], strategy_cfg: StrategyConfig):
    fig, axes = plt.subplots(3, 1, figsize=(12, 10), sharex=True,
                             gridspec_kw={"height_ratios": [3, 1, 1]})

    ax_price = axes[0]
    ax_pos = axes[1]
    ax_prob = axes[2]

    ax_price.plot(sim_df["date"], sim_df["strategy_equity"], label="策略权益", color="tab:blue", linewidth=2)
    ax_price.plot(sim_df["date"], sim_df["buyhold_equity"], label="买入持有权益", color="tab:orange", linestyle="--")
    ax_price.set_ylabel("累计净值")
    ax_price.set_title(
        f"策略 vs 买入持有（阈值 = {threshold:.3f}，trade_thr={strategy_cfg.trade_proba_threshold:.2f}, "
        f"hold≥{strategy_cfg.min_holding_period}）"
    )
    ax_price.legend(loc="best")
    ax_price.grid(True, alpha=0.3)

    ax_pos.step(sim_df["date"], sim_df["position"], where="post", color="tab:green", linewidth=1.5)
    ax_pos.set_ylabel("仓位")
    ax_pos.set_yticks([-1, 0, 1])
    ax_pos.set_yticklabels(["做空", "空仓", "做多"])
    ax_pos.grid(True, alpha=0.3)

    ax_prob.plot(sim_df["date"], sim_df["proba_2"], label=class_names[2], color="tab:blue")
    ax_prob.plot(sim_df["date"], sim_df["proba_0"], label=class_names[0], color="tab:red")
    ax_prob.axhline(strategy_cfg.trade_proba_threshold, color="gray", linestyle="--", alpha=0.6, label="置信阈")
    ax_prob.set_ylabel("概率")
    ax_prob.set_xlabel("日期")
    ax_prob.legend(loc="upper right")
    ax_prob.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

# %% [markdown]
# ## 11. 阈值搜索

# %%
threshold_results = run_threshold_search(
    feat_df=feat_df,
    feature_cols=feature_cols,
    lookback=PIPE_CFG.lookback,
    thresholds=LABEL_CFG.threshold_grid.tolist(),
    cfg=PIPE_CFG,
    train_cfg=TRAIN_CFG,
    label_cfg=LABEL_CFG
)

print("阈值搜索结果（按 Macro-F1/平衡准确率排序）:")
display(threshold_results)

best_threshold_info = select_best_threshold(threshold_results)
best_threshold = float(best_threshold_info["threshold"])
print(f"\n选定阈值: {best_threshold:.3f}")
print(best_threshold_info)

# %% [markdown]
# ## 12. 使用最佳阈值训练 + OOF 预测

# %%
feat_df_best = feat_df.copy()
feat_df_best["target_class"] = apply_labeling(feat_df_best, best_threshold, LABEL_CFG)
feat_df_best = feat_df_best.dropna(subset=["target_class"]).copy()
feat_df_best["target_class"] = feat_df_best["target_class"].astype(int)

X_seq_best, label_dict_best, seq_index_best = make_sequences(
    feat_df_best,
    feature_cols=feature_cols,
    lookback=PIPE_CFG.lookback,
    label_cols=["target_class", "next_return"]
)

y_best = label_dict_best["target_class"].astype(np.int64)
returns_best = label_dict_best["next_return"].astype(np.float32)

cv_metrics, cv_preds = fixed_window_cv_lstm_classifier(
    X_seq_best, y_best, returns_best, seq_index_best,
    test_size=PIPE_CFG.test_size,
    train_window=PIPE_CFG.train_window,
    val_size=PIPE_CFG.val_size,
    step_size=PIPE_CFG.step_size,
    cfg=TRAIN_CFG,
    collect_predictions=True,
    verbose=True
)

print("交叉验证折指标（前 5 行）:")
display(cv_metrics.head())

summary_table = cv_metrics[["acc", "bacc", "macro_f1", "f1_down", "f1_neutral", "f1_up"]].agg(["mean", "std", "min", "max"])
print("\n交叉验证指标汇总:")
display(summary_table)

avg_macro_f1 = summary_table.loc["mean", "macro_f1"]
if avg_macro_f1 < PIPE_CFG.desired_macro_f1:
    print(f"⚠️ Macro-F1({avg_macro_f1:.3f}) 低于期望值 {PIPE_CFG.desired_macro_f1:.2f}，建议继续优化标签或模型。")

# %% [markdown]
# ## 13. 分类表现汇总

# %%
class_names = [
    f"显著下跌 (≤ -阈值)",
    "无显著波动",
    f"显著上涨 (≥ 阈值)",
]

classification_summary = summarize_classification(cv_preds, class_names)
print("整体准确率:", f"{classification_summary['accuracy']:.3f}")
print("整体平衡准确率:", f"{classification_summary['balanced_accuracy']:.3f}")
print("Macro-F1:", f"{classification_summary['macro_f1']:.3f}")
print("\n分类报告:")
print(classification_summary["classification_report"])
print("混淆矩阵:")
display(classification_summary["confusion_matrix"])

# %% [markdown]
# ## 14. 策略回测（含置信阈 & 最小持有期）

# %%
sim_df, perf_stats = simulate_significant_strategy(
    cv_preds,
    strategy_cfg=STRATEGY_CFG
)

print("策略样本预览:")
display(sim_df.head())

print("\n策略表现指标:")
perf_table = pd.DataFrame([perf_stats])
display(perf_table.T.rename(columns={0: "值"}))

plot_equity_curves(sim_df, threshold=best_threshold, class_names=class_names, strategy_cfg=STRATEGY_CFG)

# %% [markdown]
# ## 15. 报告要点
# - 关注 `cv_metrics` 中各折类分布，避免孤立折驱动结果。
# - 若 Macro-F1 / F1_up / F1_down 仍低，可：
#   - 调整 `LABEL_CFG.threshold_grid` 范围或是否使用动态阈值；
#   - 启用 `TRAIN_CFG.use_focal_loss = True`；
#   - 延长 `PIPE_CFG.period`、增大 `train_window`；
#   - 调高 `STRATEGY_CFG.trade_proba_threshold` 控制噪声交易；
#   - 引入更多外部特征或采用两阶段模型。