# (Legacy) xgboost_CVGrid.ipynb

XGBoost baseline + time-respecting grid search。建议保留作参考，但后续如要系统化对比，最好抽成模块/脚本并统一输出格式。



In [None]:
# %% [markdown]
# # Fixed-window time-series modeling with XGBoost + Grid Search (Robust) + Last-fold Plot
#
# This notebook implements a strict fixed-window time-series cross-validation (CV):
# - Each fold trains only on the most recent `train_window` observations (e.g., 20 trading days),
# - Validates on the next `val_size` days (for early stopping and hyperparameter selection),
# - And tests on the subsequent `test_size` days.
#
# We add a time-respecting Grid Search with robust fallbacks:
# - For every outer fold, we run a small grid search using ONLY the contiguous validation window
#   (immediately following the training window) to select hyperparameters.
# - We handle edge-cases where a window has only one class by safely falling back to balanced accuracy
#   and accuracy, and by using constant models when needed.
# - We always return a valid model from the grid search (never None) to avoid runtime errors.
#
# Notes:
# - Feature engineering uses ≤ 20-day lookback to match the assumption that the process
#   depends only on recent history (~20 days).
# - We assume XGBoost is available.
# - Default grids are intentionally small to keep runtime reasonable. You can expand them.

# %%
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from typing import Dict, List, Tuple, Optional, Any
from itertools import product
from IPython.display import display

import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('seaborn-v0_8-whitegrid') if 'seaborn-v0_8-whitegrid' in plt.style.available else plt.style.use('ggplot')
sns.set_palette("Set2")

import yfinance as yf

from sklearn.metrics import (
    accuracy_score, balanced_accuracy_score, roc_auc_score,
    mean_absolute_error, mean_squared_error, r2_score
)

from xgboost import XGBClassifier, XGBRegressor

SEED = 42
np.random.seed(SEED)
print("XGBoost available:", True)

In [None]:
# %% [markdown]
# ## Data: 2800.HK, daily, 3 years
# Pull adjusted OHLCV data using `yfinance`. If `period` fails, fallback to explicit start/end dates.

In [None]:
# %%
def fetch_prices(ticker: str = "2800.HK", period: str = "3y", interval: str = "1d") -> pd.DataFrame:
    """Fetch adjusted OHLCV data. Index is timezone-naive DateTimeIndex."""
    tk = yf.Ticker(ticker)
    df = tk.history(period=period, interval=interval, auto_adjust=True)
    if df is None or df.empty:
        end = pd.Timestamp.today().normalize()
        start = end - pd.Timedelta(days=3*370)
        df = tk.history(start=start, end=end, interval=interval, auto_adjust=True)
    if df is None or df.empty:
        raise RuntimeError(f"Cannot fetch data for {ticker}")
    df.index = pd.to_datetime(df.index).tz_localize(None)
    return df[["Open", "High", "Low", "Close", "Volume"]].copy()

ticker = "2800.HK"
df_raw = fetch_prices(ticker=ticker, period="1y", interval="1d")
print(f"Data range: {df_raw.index.min().date()} to {df_raw.index.max().date()}")
print(f"Total samples: {len(df_raw)}")
display(df_raw.tail())

In [None]:
# %% [markdown]
# ## Feature engineering (max 20-day lookback)
# - Use only past information up to and including time t (no leakage).
# - Constrain all rolling windows to ≤ 20 trading days to match the “depends only on last 20 days” assumption.
# - Labels:
#   - Regression: `next_return` = log return from t to t+1 (`use_log_return=True`).
#   - Classification: `target_up` = 1 if `next_return` > 0 else 0.

In [None]:
# %%
def calculate_rsi_rolling(series: pd.Series, period: int = 14) -> pd.Series:
    """RSI computed with rolling simple means to ensure finite memory."""
    delta = series.diff()
    up = delta.clip(lower=0)
    down = -delta.clip(upper=0)
    roll_up = up.rolling(window=period, min_periods=period).mean()
    roll_down = down.rolling(window=period, min_periods=period).mean().replace(0, np.nan)
    rs = roll_up / roll_down
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_macd_sma(close: pd.Series, fast: int = 8, slow: int = 16, signal: int = 6):
    """MACD variant using simple moving averages (finite lookback)."""
    sma_fast = close.rolling(window=fast, min_periods=fast).mean()
    sma_slow = close.rolling(window=slow, min_periods=slow).mean()
    macd_line = sma_fast - sma_slow
    signal_line = macd_line.rolling(window=signal, min_periods=signal).mean()
    histogram = macd_line - signal_line
    return macd_line, signal_line, histogram

def build_features_and_labels(df: pd.DataFrame,
                              horizon: int = 1,
                              use_log_return: bool = True,
                              max_lookback: int = 20) -> Tuple[pd.DataFrame, List[str]]:
    """Build features using only up to `max_lookback` days of history."""
    close = df["Close"].copy()
    vol = df["Volume"].copy() if "Volume" in df.columns else None
    features = pd.DataFrame(index=df.index)

    # Returns up to t
    features["ret_1d"] = close.pct_change(1)
    features["ret_2d"] = close.pct_change(2)
    features["ret_5d"] = close.pct_change(5)
    features["ret_10d"] = close.pct_change(10)

    # Realized volatility
    features["vol_5d"] = features["ret_1d"].rolling(5, min_periods=5).std()
    features["vol_10d"] = features["ret_1d"].rolling(10, min_periods=10).std()
    features["vol_20d"] = features["ret_1d"].rolling(20, min_periods=20).std()

    # Moving averages and relative position
    sma_5 = close.rolling(5, min_periods=5).mean()
    sma_10 = close.rolling(10, min_periods=10).mean()
    sma_20 = close.rolling(20, min_periods=20).mean()

    features["close_to_sma5"] = close / sma_5 - 1
    features["close_to_sma10"] = close / sma_10 - 1
    features["close_to_sma20"] = close / sma_20 - 1
    features["sma5_sma10"] = sma_5 / sma_10 - 1
    features["sma10_sma20"] = sma_10 / sma_20 - 1

    # Technical indicators (≤20-day lookback)
    features["rsi_14"] = calculate_rsi_rolling(close, 14)
    macd_line, signal_line, histogram = calculate_macd_sma(close, fast=8, slow=16, signal=6)
    features["macd"] = macd_line
    features["macd_signal"] = signal_line
    features["macd_hist"] = histogram

    # Bollinger bands (20-day)
    bb_period = min(20, max_lookback)
    bb_std = close.rolling(bb_period, min_periods=bb_period).std()
    bb_mean = close.rolling(bb_period, min_periods=bb_period).mean()
    features["bb_upper"] = (bb_mean + 2 * bb_std - close) / close
    features["bb_lower"] = (close - (bb_mean - 2 * bb_std)) / close

    # Volume features (bounded to 20 days)
    if vol is not None:
        features["volume_ratio"] = vol / vol.rolling(20, min_periods=20).mean()
        features["volume_change"] = vol.pct_change()

    # Calendar features
    features["day_of_week"] = df.index.dayofweek
    features["month"] = df.index.month
    features["quarter"] = df.index.quarter

    # Lagged returns up to 20 days back
    for lag in [1, 2, 3, 5, 10, 15, 20]:
        features[f"ret_lag_{lag}"] = features["ret_1d"].shift(lag)

    # Targets
    if use_log_return:
        next_ret = np.log(close.shift(-horizon) / close)
    else:
        next_ret = close.pct_change(horizon).shift(-horizon)
    target_up = (next_ret > 0).astype(int)

    # Clean and merge
    features = features.replace([np.inf, -np.inf], np.nan)
    data = features.copy()
    data["next_return"] = next_ret
    data["target_up"] = target_up
    data = data.dropna().copy()

    feature_cols = [c for c in data.columns if c not in ["next_return", "target_up"]]
    return data, feature_cols

# Build features & labels
feat_df, feature_cols = build_features_and_labels(df_raw, horizon=1, use_log_return=True, max_lookback=20)
print(f"Samples after cleaning: {len(feat_df)}, Features: {len(feature_cols)}")
display(feat_df.tail())

In [None]:
# %% [markdown]
# ## Grid Search helpers (time-respecting, robust to single-class windows)
# - Train on training window, evaluate on contiguous validation window.
# - Classifier selection: prefer AUC if defined; otherwise fallback to Balanced Accuracy, then Accuracy.
# - Regressor selection: lowest RMSE on validation.
# - Always return a model (never None). For single-class training, use a constant-probability classifier.

In [None]:
# %%
def param_grid_product(param_grid: Dict[str, List[Any]]) -> List[Dict[str, Any]]:
    """Cartesian product of parameter grid."""
    if not isinstance(param_grid, dict) or len(param_grid) == 0:
        return []
    keys = list(param_grid.keys())
    values = [param_grid[k] for k in keys]
    combos = []
    for combo in product(*values):
        combos.append(dict(zip(keys, combo)))
    return combos

# Small, time-friendly grids (expand if needed)
DEFAULT_GRID_CLS: Dict[str, List[Any]] = {
    "learning_rate": [0.03, 0.07],
    "max_depth": [3, 4],
    "min_child_weight": [1, 3],
    "subsample": [0.8],           # can try [0.7, 0.9]
    "colsample_bytree": [0.8, 1.0],
    "reg_alpha": [0.0, 0.5],
    "reg_lambda": [1.0],          # can try [0.5, 1.0, 2.0]
}
DEFAULT_GRID_REG: Dict[str, List[Any]] = {
    "learning_rate": [0.03, 0.07],
    "max_depth": [3, 4],
    "min_child_weight": [1, 3],
    "subsample": [0.8],
    "colsample_bytree": [0.8, 1.0],
    "reg_alpha": [0.0, 0.5],
    "reg_lambda": [1.0],
}

BASE_XGB_CLS = dict(
    n_estimators=500,
    random_state=SEED,
    eval_metric="logloss",
    verbosity=0,
    n_jobs=-1,
    tree_method="hist",
)
BASE_XGB_REG = dict(
    n_estimators=500,
    random_state=SEED,
    eval_metric="rmse",
    verbosity=0,
    n_jobs=-1,
    tree_method="hist",
)

def is_single_class(y: np.ndarray) -> bool:
    """Return True if y contains fewer than 2 unique classes."""
    try:
        return np.unique(y).size < 2
    except Exception:
        return True

class ConstantProbaClassifier:
    """Simple classifier that predicts a constant probability for class 1."""
    def __init__(self, p: float):
        p = float(p)
        self.p = float(np.clip(p, 1e-6, 1 - 1e-6))
        self.classes_ = np.array([0, 1])

    def fit(self, X, y):
        return self

    def predict_proba(self, X):
        n = len(X)
        p1 = np.full(n, self.p, dtype=float)
        p0 = 1.0 - p1
        return np.column_stack([p0, p1])

class ConstantMeanRegressor:
    """Regressor that always predicts the training mean."""
    def __init__(self, mu: float):
        self.mu = float(mu)

    def fit(self, X, y):
        return self

    def predict(self, X):
        return np.full(len(X), self.mu, dtype=float)

def fit_xgb_classifier(X_train, y_train, X_val=None, y_val=None, params: Optional[Dict[str, Any]] = None):
    """Fit XGBClassifier with optional early stopping on a provided validation set.
       Falls back to a constant-probability classifier if training labels are single-class.
    """
    if is_single_class(y_train):
        return ConstantProbaClassifier(p=float(np.mean(y_train)))

    kwargs = BASE_XGB_CLS.copy()
    if params:
        kwargs.update(params)
    clf = XGBClassifier(**kwargs)
    if X_val is not None and y_val is not None and len(X_val) > 0:
        clf.set_params(early_stopping_rounds=30)
        clf.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)
    else:
        clf.fit(X_train, y_train, verbose=False)
    return clf

def fit_xgb_regressor(X_train, y_train, X_val=None, y_val=None, params: Optional[Dict[str, Any]] = None):
    """Fit XGBRegressor with optional early stopping on a provided validation set.
       Falls back to a constant-mean regressor if y_train is degenerate/too small to learn.
    """
    if len(y_train) == 0 or np.nanstd(y_train) == 0:
        return ConstantMeanRegressor(mu=float(np.nanmean(y_train) if len(y_train) else 0.0))

    kwargs = BASE_XGB_REG.copy()
    if params:
        kwargs.update(params)
    reg = XGBRegressor(**kwargs)
    if X_val is not None and y_val is not None and len(X_val) > 0:
        reg.set_params(early_stopping_rounds=30)
        reg.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)
    else:
        reg.fit(X_train, y_train, verbose=False)
    return reg

def safe_classification_score(y_true: np.ndarray, proba: np.ndarray) -> Tuple[float, Dict[str, float]]:
    """Return a scalar score for model selection and a dict of validation metrics.
       Preference: AUC (if defined) -> Balanced Accuracy -> Accuracy.
       NaNs are handled robustly.
    """
    metrics: Dict[str, float] = {}
    score = -np.inf

    # Try AUC if both classes present
    try:
        if not is_single_class(y_true):
            auc = roc_auc_score(y_true, proba)
            metrics["val_auc"] = float(auc)
            score = float(auc)
        else:
            metrics["val_auc"] = np.nan
    except Exception:
        metrics["val_auc"] = np.nan

    # Fallback to Balanced Accuracy
    try:
        pred = (proba >= 0.5).astype(int)
        bacc = balanced_accuracy_score(y_true, pred)
        metrics["val_bacc"] = float(bacc)
        if not np.isfinite(score):
            score = float(bacc)
    except Exception:
        metrics["val_bacc"] = np.nan

    # Fallback to Accuracy if still NaN
    try:
        pred = (proba >= 0.5).astype(int)
        acc = accuracy_score(y_true, pred)
        metrics["val_acc"] = float(acc)
        if not np.isfinite(score):
            score = float(acc)
    except Exception:
        metrics["val_acc"] = np.nan

    if not np.isfinite(score):
        score = -np.inf

    return score, metrics

def grid_search_classifier(
    X_train, y_train, X_val, y_val,
    param_grid: Dict[str, List[Any]] = DEFAULT_GRID_CLS
) -> Tuple[Any, Dict[str, Any], Dict[str, float]]:
    """Grid search for classifier. Select by best (AUC -> BAcc -> Acc) on validation.
       Always returns a valid model (never None), with robust fallbacks for single-class windows.
    """
    best_score = -np.inf
    best_params: Optional[Dict[str, Any]] = None
    best_model: Optional[Any] = None
    best_metrics: Dict[str, float] = {}

    first_model: Optional[Any] = None
    first_params: Optional[Dict[str, Any]] = None
    first_metrics: Dict[str, float] = {}

    for params in param_grid_product(param_grid):
        model = fit_xgb_classifier(X_train, y_train, X_val, y_val, params=params)
        if first_model is None:
            first_model = model
            first_params = params

        proba = model.predict_proba(X_val)[:, 1] if len(X_val) > 0 else np.array([])
        score, metrics = safe_classification_score(y_val, proba) if len(X_val) > 0 else (-np.inf, {})

        if score > best_score:
            best_score = score
            best_params = params
            best_model = model
            best_metrics = metrics

    if best_model is None and first_model is not None:
        best_model = first_model
        best_params = first_params
        best_metrics = first_metrics

    if best_model is None:
        best_model = ConstantProbaClassifier(p=float(np.mean(y_train)))
        best_params = {}
        best_metrics = {}

    best_metrics.setdefault("val_auc", np.nan)
    best_metrics.setdefault("val_bacc", np.nan)
    best_metrics.setdefault("val_acc", np.nan)

    return best_model, best_params, best_metrics

def grid_search_regressor(
    X_train, y_train, X_val, y_val,
    param_grid: Dict[str, List[Any]] = DEFAULT_GRID_REG
) -> Tuple[Any, Dict[str, Any], Dict[str, float]]:
    """Grid search for regressor. Select by lowest RMSE on validation. Always returns a model."""
    best_rmse = np.inf
    best_params: Optional[Dict[str, Any]] = None
    best_model: Optional[Any] = None

    first_model: Optional[Any] = None
    first_params: Optional[Dict[str, Any]] = None

    for params in param_grid_product(param_grid):
        model = fit_xgb_regressor(X_train, y_train, X_val, y_val, params=params)
        if first_model is None:
            first_model = model
            first_params = params
        pred = model.predict(X_val) if len(X_val) > 0 else np.array([])
        rmse = np.sqrt(mean_squared_error(y_val, pred)) if len(X_val) > 0 else np.inf
        if np.isfinite(rmse) and rmse < best_rmse:
            best_rmse = rmse
            best_params = params
            best_model = model

    if best_model is None and first_model is not None:
        best_model = first_model
        best_params = first_params
        best_rmse = np.nan

    if best_model is None:
        best_model = ConstantMeanRegressor(mu=float(np.mean(y_train) if len(y_train) else 0.0))
        best_params = {}
        best_rmse = np.nan

    return best_model, best_params, {"val_rmse": best_rmse}

In [None]:
# %% [markdown]
# ## Fixed-window time-series CV with Grid Search
# - For each fold:
#   - Train on `train_window`, validate on `val_size`, test on `test_size`.
#   - Run grid search on the training/validation split to select hyperparameters.
#   - Evaluate on the held-out test window.
# - We record:
#   - Fold metrics (classification and regression),
#   - The best params selected for the fold,
#   - Validation scores (AUC/BalancedAcc/Acc for classifier, RMSE for regressor).

In [None]:
# %%
def eval_fold(clf, reg, X_test: np.ndarray, y_cls_test: np.ndarray, y_reg_test: np.ndarray) -> Dict[str, float]:
    """Evaluate classification and regression on one fold."""
    proba = clf.predict_proba(X_test)[:, 1]
    y_pred_cls = (proba >= 0.5).astype(int)

    acc = accuracy_score(y_cls_test, y_pred_cls)
    bacc = balanced_accuracy_score(y_cls_test, y_pred_cls)
    try:
        auc = roc_auc_score(y_cls_test, proba)
    except Exception:
        auc = np.nan

    y_pred_reg = reg.predict(X_test)
    reg_mae = mean_absolute_error(y_reg_test, y_pred_reg)
    reg_rmse = np.sqrt(mean_squared_error(y_reg_test, y_pred_reg))
    reg_r2 = r2_score(y_reg_test, y_pred_reg)
    dir_acc = (np.sign(y_reg_test) == np.sign(y_pred_reg)).mean()

    return {
        "cls_acc": acc, "cls_bacc": bacc, "cls_auc": auc,
        "reg_mae": reg_mae, "reg_rmse": reg_rmse, "reg_r2": reg_r2,
        "reg_dir_acc": dir_acc
    }

def fixed_window_cv_with_grid(feat_df: pd.DataFrame, feature_cols: List[str],
                              test_size: int = 14,
                              train_window: int = 30,
                              val_size: int = 10,
                              step_size: Optional[int] = None,
                              grid_cls: Dict[str, List[Any]] = DEFAULT_GRID_CLS,
                              grid_reg: Dict[str, List[Any]] = DEFAULT_GRID_REG) -> pd.DataFrame:
    """Fixed-window CV with inner grid search on the contiguous validation window."""
    if step_size is None:
        step_size = test_size  # non-overlapping test windows by default

    metrics: List[Dict[str, Any]] = []
    X_all = feat_df[feature_cols].astype(float).values
    y_cls_all = feat_df["target_up"].astype(int).values
    y_reg_all = feat_df["next_return"].astype(float).values

    N = len(feat_df)
    start_index = train_window + val_size
    fold_id = 0

    for test_start in range(start_index, N - test_size + 1, step_size):
        train_end = test_start
        val_start = train_end - val_size
        train_start = val_start - train_window
        test_end = test_start + test_size

        if train_start < 0:
            continue  # not enough history

        X_train = X_all[train_start: val_start]
        y_cls_train = y_cls_all[train_start: val_start]
        y_reg_train = y_reg_all[train_start: val_start]

        X_val = X_all[val_start: train_end]
        y_cls_val = y_cls_all[val_start: train_end]
        y_reg_val = y_reg_all[val_start: train_end]

        X_test = X_all[test_start: test_end]
        y_cls_test = y_cls_all[test_start: test_end]
        y_reg_test = y_reg_all[test_start: test_end]

        # Grid search per fold (time-respecting, robust)
        best_clf, best_clf_params, clf_scores = grid_search_classifier(
            X_train, y_cls_train, X_val, y_cls_val, param_grid=grid_cls
        )
        best_reg, best_reg_params, reg_scores = grid_search_regressor(
            X_train, y_reg_train, X_val, y_reg_val, param_grid=grid_reg
        )

        # Evaluate on test
        fold_metrics = eval_fold(best_clf, best_reg, X_test, y_cls_test, y_reg_test)
        fold_metrics["fold"] = fold_id
        fold_metrics["train_size"] = len(X_train)
        fold_metrics["val_size"] = len(X_val)
        fold_metrics["test_size"] = len(X_test)
        fold_metrics["test_start"] = feat_df.index[test_start]
        fold_metrics["test_end"] = feat_df.index[test_end - 1]
        # Store selected params and validation scores
        fold_metrics["clf_params"] = best_clf_params
        fold_metrics["reg_params"] = best_reg_params
        fold_metrics["clf_val_auc"] = clf_scores.get("val_auc", np.nan)
        fold_metrics["clf_val_bacc"] = clf_scores.get("val_bacc", np.nan)
        fold_metrics["clf_val_acc"] = clf_scores.get("val_acc", np.nan)
        fold_metrics["reg_val_rmse"] = reg_scores.get("val_rmse", np.nan)

        metrics.append(fold_metrics)
        fold_id += 1

    return pd.DataFrame(metrics)

# Run fixed-window CV with grid search
cv_results = fixed_window_cv_with_grid(
    feat_df, feature_cols,
    test_size=7,
    train_window=60,
    val_size=10,
    step_size=7,
    grid_cls=DEFAULT_GRID_CLS,
    grid_reg=DEFAULT_GRID_REG
)

print(f"Folds: {len(cv_results)}")
if {"clf_val_auc","reg_val_rmse"}.issubset(cv_results.columns):
    display(cv_results[["fold","test_start","test_end","train_size","val_size","test_size","clf_val_auc","reg_val_rmse"]].head())
else:
    display(cv_results.head())

In [None]:
# %% [markdown]
# ## Summaries

In [None]:
# %%
def summarize_metrics(df: pd.DataFrame, cols: List[str]) -> pd.DataFrame:
    return df[cols].agg(['mean','std','min','max'])

print("\n=== Per-fold metrics (head) ===")
display(cv_results.head())

print("\n=== Selected params (first 3 folds) ===")
display(cv_results[["fold", "clf_params", "reg_params"]].head(3))

print("\n=== Summary (Classification) ===")
display(summarize_metrics(cv_results, ["cls_acc", "cls_bacc", "cls_auc"]))

print("\n=== Summary (Regression on next_return) ===")
display(summarize_metrics(cv_results, ["reg_mae", "reg_rmse", "reg_r2", "reg_dir_acc"]))

# Naive baselines over the full sample
naive_mae = feat_df["next_return"].abs().mean()              # Predict 0 for regression
naive_dir_acc = max((feat_df["next_return"] > 0).mean(),     # Predict majority sign for direction
                    (feat_df["next_return"] <= 0).mean())

print("\n=== Naive baselines over full sample ===")
print(f"Regression MAE baseline (predict 0): {naive_mae:.6f}")
print(f"Direction baseline (majority sign): {naive_dir_acc:.3f}")

In [None]:
# %% [markdown]
# ## Visualization: predictions on the last fold
# This section:
# - Rebuilds the exact train/validation/test windows for the last CV fold
#   using the dates and sizes stored in `cv_results`.
# - Re-runs the per-fold grid search (time-respecting) to obtain the best models.
# - Plots:
#   1) Actual vs Predicted next_return over the test window,
#   2) Reconstructed price paths (actual vs predicted) starting from the
#      previous close before the test window (assuming log-returns).
# - Prints key metrics for the last fold.

In [None]:
# %%
from sklearn.metrics import r2_score  # already imported above; re-import harmless

def reconstruct_price_path(start_price: float, returns: np.ndarray, assume_log_return: bool = True) -> np.ndarray:
    """Reconstruct price path from a starting price and a return series."""
    if assume_log_return:
        path = start_price * np.exp(np.cumsum(returns))
    else:
        path = start_price * np.cumprod(1.0 + returns)
    return path

def plot_last_fold_predictions(feat_df: pd.DataFrame,
                               feature_cols: list,
                               df_raw: pd.DataFrame,
                               cv_results: pd.DataFrame,
                               assume_log_return: bool = True,
                               grid_cls: dict = None,
                               grid_reg: dict = None):
    """
    Re-train best models on the last fold's train/val windows (via grid search)
    and visualize predictions vs. actuals on the test window.
    """
    if cv_results is None or len(cv_results) == 0:
        raise ValueError("cv_results is empty. Run the CV first.")

    # Identify the last fold by row order
    row = cv_results.iloc[-1]
    test_start_date = row["test_start"]
    test_end_date = row["test_end"]
    train_size = int(row["train_size"])
    val_size = int(row["val_size"])
    test_size = int(row["test_size"])

    all_dates = feat_df.index
    # Map dates back to positional indices
    try:
        test_start = all_dates.get_loc(test_start_date)
        test_end = all_dates.get_loc(test_end_date) + 1  # right-open slice
    except KeyError:
        raise ValueError("Fold dates not found in feat_df.index. Ensure cv_results matches feat_df.")

    # Recover train/val positions from sizes
    train_end = test_start
    val_start = train_end - val_size
    train_start = val_start - train_size
    if train_start < 0:
        raise ValueError("Not enough history to reconstruct the last fold windows.")

    # Slice arrays for the three windows
    X_all = feat_df[feature_cols].astype(float).values
    y_cls_all = feat_df["target_up"].astype(int).values
    y_reg_all = feat_df["next_return"].astype(float).values

    X_train = X_all[train_start: val_start]
    y_cls_train = y_cls_all[train_start: val_start]
    y_reg_train = y_reg_all[train_start: val_start]

    X_val = X_all[val_start: train_end]
    y_cls_val = y_cls_all[val_start: train_end]
    y_reg_val = y_reg_all[val_start: train_end]

    X_test = X_all[test_start: test_end]
    y_cls_test = y_cls_all[test_start: test_end]
    y_reg_test = y_reg_all[test_start: test_end]
    test_dates = all_dates[test_start: test_end]

    # Run per-fold grid search again to obtain best models (time-respecting)
    best_clf, best_clf_params, clf_scores = grid_search_classifier(
        X_train, y_cls_train, X_val, y_cls_val,
        param_grid=(grid_cls if grid_cls is not None else DEFAULT_GRID_CLS)
    )
    best_reg, best_reg_params, reg_scores = grid_search_regressor(
        X_train, y_reg_train, X_val, y_reg_val,
        param_grid=(grid_reg if grid_reg is not None else DEFAULT_GRID_REG)
    )

    # Predictions on the test window
    proba = best_clf.predict_proba(X_test)[:, 1]
    y_pred_cls = (proba >= 0.5).astype(int)
    y_pred_reg = best_reg.predict(X_test)

    # Metrics on test
    acc = accuracy_score(y_cls_test, y_pred_cls)
    bacc = balanced_accuracy_score(y_cls_test, y_pred_cls)
    try:
        auc = roc_auc_score(y_cls_test, proba)
    except Exception:
        auc = np.nan

    mae = mean_absolute_error(y_reg_test, y_pred_reg)
    rmse = float(np.sqrt(np.mean((y_reg_test - y_pred_reg) ** 2)))
    r2 = r2_score(y_reg_test, y_pred_reg)
    dir_acc = float((np.sign(y_reg_test) == np.sign(y_pred_reg)).mean())

    # Plot: returns and reconstructed price paths
    fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True)
    ax1, ax2 = axes

    # Panel 1: next_return (actual vs predicted)
    ax1.plot(test_dates, y_reg_test, label='Actual next_return', color='tab:blue')
    ax1.plot(test_dates, y_pred_reg, label='Predicted next_return', color='tab:orange', alpha=0.85)
    ax1.axhline(0, color='gray', linestyle='--', alpha=0.5)
    ax1.set_title(f'Last Fold: next_return (Actual vs Predicted)\n'
                  f'Cls Acc={acc:.3f}, BAcc={bacc:.3f}, AUC={auc:.3f} | '
                  f'Reg MAE={mae:.6f}, RMSE={rmse:.6f}, R2={r2:.3f}, DirAcc={dir_acc:.3f}')
    ax1.set_ylabel('Return')
    ax1.legend(loc='best')
    ax1.grid(True, alpha=0.3)

    # Panel 2: reconstructed price paths (from previous close)
    draw_price = False
    try:
        raw_close_aligned = df_raw.loc[feat_df.index, 'Close']
        price_start_idx = test_start - 1
        if price_start_idx >= 0:
            draw_price = True
    except Exception:
        draw_price = False

    if draw_price:
        start_close = float(raw_close_aligned.iloc[price_start_idx])
        actual_path = reconstruct_price_path(start_close, y_reg_test, assume_log_return=assume_log_return)
        pred_path = reconstruct_price_path(start_close, y_pred_reg, assume_log_return=assume_log_return)

        price_index = [feat_df.index[price_start_idx]] + list(test_dates)
        ax2.plot(price_index, [start_close] + list(actual_path), label='Actual price (reconstructed)', color='tab:green')
        ax2.plot(price_index, [start_close] + list(pred_path), label='Predicted price (reconstructed)', color='tab:red', alpha=0.85)
        ax2.set_title('Last Fold: Reconstructed price path (from previous close)')
        ax2.set_ylabel('Price')
        ax2.legend(loc='best')
        ax2.grid(True, alpha=0.3)
    else:
        ax2.set_visible(False)

    plt.xlabel('Date')
    plt.tight_layout()
    plt.show()

    # Print key info for traceability
    print("Last fold window sizes:", f"train={train_size}, val={val_size}, test={test_size}")
    print("Selected classifier params:", best_clf_params)
    print("Selected regressor params:", best_reg_params)
    print("Validation (from grid search) - cls:", clf_scores, "| reg:", reg_scores)
    print("Test metrics:",
          f"Acc={acc:.3f}, BAcc={bacc:.3f}, AUC={auc:.3f} | "
          f"MAE={mae:.6f}, RMSE={rmse:.6f}, R2={r2:.3f}, DirAcc={dir_acc:.3f}")

# Run the visualization for the last fold
plot_last_fold_predictions(
    feat_df=feat_df,
    feature_cols=feature_cols,
    df_raw=df_raw,
    cv_results=cv_results,
    assume_log_return=True  # set False if you engineered simple returns
)