In [2]:
import json
from dataclasses import dataclass
from pathlib import Path

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.isotonic import IsotonicRegression

from sklearn.svm import SVC, SVR
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import GridSearchCV

In [3]:
DATA_DIR = Path("/Users/nitinlodha/Desktop/ML/ML_Project/Bybit_CSV_Data")
FILES = {
    "BTC": DATA_DIR / "Bybit_BTC.csv",
    "ETH": DATA_DIR / "Bybit_ETH.csv",
    "SOL": DATA_DIR / "Bybit_SOL.csv",
    "XRP": DATA_DIR / "Bybit_XRP.csv",
    "DOGE": DATA_DIR / "Bybit_DOGE.csv",
}

HORIZONS = [1, 3, 6]
DEFAULT_COST_BP = {1: 8.0, 3: 10.0, 6: 12.0}

def bp_to_logret(bp: float) -> float:
    return bp * 1e-4

# Policy thresholds
TAU_P = 0.60
TAU_MU = 0.0005
LAM = 2.0
W_MAX = 0.50

MODEL_VERSION = "svm_rbf_multiH_v1.0"

In [None]:
def _find_close_column(df: pd.DataFrame) -> str:
    lower = {c.lower(): c for c in df.columns}
    for key in ("close", "closing_price", "close_price", "price_close", "last", "c"):
        if key in lower:
            return lower[key]
    float_cols = [c for c in df.columns if pd.api.types.is_float_dtype(df[c])]
    if len(float_cols) == 1:
        return float_cols[0]
    raise ValueError("Cannot identify 'close' column.")

def make_feature_table(close: pd.Series):
    """Build feature table from close prices."""
    df = pd.DataFrame(index=close.index)
    df["price"] = close.astype(float)

    # Returns at different lags
    df["ret_1"] = np.log(df["price"] / df["price"].shift(1))
    df["ret_3"] = np.log(df["price"] / df["price"].shift(3))
    df["ret_6"] = np.log(df["price"] / df["price"].shift(6))

    # Volatility
    df["vol_6"] = df["ret_1"].rolling(6).std()
    df["vol_12"] = df["ret_1"].rolling(12).std()

    # Moving averages (log ratio)
    ma_10 = df["price"].rolling(10).mean()
    ma_20 = df["price"].rolling(20).mean()
    df["ma_ratio"] = np.log(ma_10 / ma_20)

    # Drop NaN rows
    df = df.dropna()

    # Feature matrix (exclude price itself)
    feat_cols = [c for c in df.columns if c != "price"]
    X = df[feat_cols].values.astype(float)

    return df, X

In [None]:
@dataclass
class SVMSnapshot:
    """Holds trained SVM and preprocessing for one horizon"""
    clf: SVC | SVR
    scaler: StandardScaler
    model_type: str  # 'classification' or 'regression'
    horizon: int
    C: float = 1.0
    gamma: float | str = 'scale'

def fit_svm_classifier(X_train: np.ndarray, y_train: np.ndarray,
                       horizon: int, random_state: int = 123,
                       C: float = 1.0, gamma: float | str = 'scale') -> SVMSnapshot:
    """
    Train calibrated SVM classifier for binary edge detection.

    Args:
        X_train: Feature matrix (T, D)
        y_train: Binary labels, 1 if return > cost, else 0
        horizon: Forecast horizon in bars
        C: SVM regularization parameter
        gamma: RBF kernel parameter

    Returns:
        SVMSnapshot with calibrated classifier
    """
    # Standardize features
    scaler = StandardScaler().fit(X_train)
    X_scaled = scaler.transform(X_train)

    # Base SVM with RBF kernel
    base_svm = SVC(
        kernel='rbf',
        C=C,
        gamma=gamma,
        class_weight='balanced',  # Handle imbalanced data
        random_state=random_state,
        probability=False,  # Calibration will provide probabilities
        cache_size=500  # MB for kernel cache
    )

    # Calibrate probabilities using isotonic regression
    # This is CRITICAL for reliable P(edge > cost) estimates
    clf = CalibratedClassifierCV(
        base_svm,
        method='isotonic',  # More flexible than 'sigmoid'
        cv=3,  # 3-fold cross-validation for calibration
        n_jobs=-1
    )

    clf.fit(X_scaled, y_train)

    return SVMSnapshot(
        clf=clf,
        scaler=scaler,
        model_type='classification',
        horizon=horizon,
        C=C,
        gamma=gamma
    )

def fit_svm_regressor(X_train: np.ndarray, y_train: np.ndarray,
                      horizon: int, random_state: int = 123,
                      C: float = 1.0, gamma: float | str = 'scale',
                      epsilon: float = 0.1) -> SVMSnapshot:
    """
    Train SVR to directly predict continuous returns.

    Args:
        X_train: Feature matrix
        y_train: Continuous log returns
        epsilon: Epsilon-insensitive loss parameter

    Returns:
        SVMSnapshot with trained regressor
    """
    scaler = StandardScaler().fit(X_train)
    X_scaled = scaler.transform(X_train)

    reg = SVR(
        kernel='rbf',
        C=C,
        gamma=gamma,
        epsilon=epsilon,
        cache_size=500
    )

    reg.fit(X_scaled, y_train)

    return SVMSnapshot(
        clf=reg,
        scaler=scaler,
        model_type='regression',
        horizon=horizon,
        C=C,
        gamma=gamma
    )

def forecast_multi_horizon_svm(
    snapshots: dict[int, SVMSnapshot],
    X_seg: np.ndarray,
    price_seg: pd.Series,
    horizons: list[int],
    cost_bp: dict[int, float] | None = None,
    n_bootstrap: int = 0  # Set to 0 to disable bootstrap (faster)
):
    """
    Generate multi-horizon forecasts using trained SVMs.

    Key differences from HMM approach:
    - Direct prediction from features (no state transitions)
    - One model per horizon
    - Bootstrap resampling for uncertainty (optional)

    Args:
        snapshots: Dict mapping horizon -> trained SVMSnapshot
        X_seg: Feature matrix for forecast segment
        price_seg: Corresponding price series
        horizons: List of forecast horizons
        cost_bp: Trading costs in basis points
        n_bootstrap: Number of bootstrap samples (0 = disabled)

    Returns:
        out: dict[horizon] -> DataFrame with predictions
        cost_log: dict[horizon] -> cost in log-return units
    """
    if cost_bp is None:
        cost_bp = {h: DEFAULT_COST_BP.get(h, 8.0) for h in horizons}
    cost_log = {h: bp_to_logret(float(cost_bp[h])) for h in horizons}

    Tseg = X_seg.shape[0]
    idx = price_seg.index
    out = {}

    for h in horizons:
        if h not in snapshots:
            print(f"Warning: No model for horizon {h}, skipping")
            continue

        snap = snapshots[h]
        out_h = pd.DataFrame(index=idx[:-h] if h < Tseg else idx[:0])
        T_h = Tseg - h

        if T_h <= 0:
            out[h] = out_h
            continue

        # Scale features
        X_scaled = snap.scaler.transform(X_seg[:T_h])

        if snap.model_type == 'classification':
            # ========================================
            # Classification: Predict P(return > cost)
            # ========================================

            # Get calibrated probabilities
            p_edge = snap.clf.predict_proba(X_scaled)[:, 1]  # P(positive class)

            # Expected return estimation
            # Option 1: Threshold-based (simple)
            y_pred = snap.clf.predict(X_scaled)
            mu = np.where(y_pred == 1,
                         cost_log[h] + 0.002,  # Small positive edge
                         -cost_log[h] - 0.001)  # Small negative edge

            # Option 2: Probability-weighted (better)
            # mu = p_edge * (cost_log[h] + 0.002) + (1 - p_edge) * (-cost_log[h] - 0.001)

            # Uncertainty via bootstrap (optional)
            if n_bootstrap > 0:
                std_h, q10, q50, q90 = _bootstrap_uncertainty_classification(
                    snap, X_scaled, n_bootstrap, cost_log[h]
                )
            else:
                std_h = np.full(T_h, 0.01)  # Default uncertainty
                q10 = mu - 0.02
                q50 = mu
                q90 = mu + 0.02

        elif snap.model_type == 'regression':
            # ========================================
            # Regression: Predict continuous returns
            # ========================================

            mu = snap.clf.predict(X_scaled)

            # Probability via sigmoid transform
            p_edge = 1.0 / (1.0 + np.exp(-10 * (mu - cost_log[h])))

            if n_bootstrap > 0:
                std_h, q10, q50, q90 = _bootstrap_uncertainty_regression(
                    snap, X_scaled, n_bootstrap
                )
            else:
                std_h = np.full(T_h, 0.015)
                q10 = mu - 0.025
                q50 = mu
                q90 = mu + 0.025

        # Populate DataFrame
        p_now = price_seg.iloc[:T_h].values

        out_h['mu'] = mu
        out_h['std'] = std_h
        out_h['p_edge_raw'] = p_edge
        out_h['ret_q10'] = q10
        out_h['ret_q50'] = q50
        out_h['ret_q90'] = q90
        out_h['price_pred'] = p_now * np.exp(mu)
        out_h['price_q10'] = p_now * np.exp(q10)
        out_h['price_q50'] = p_now * np.exp(q50)
        out_h['price_q90'] = p_now * np.exp(q90)

        out[h] = out_h

    return out, cost_log

def _bootstrap_uncertainty_classification(snap, X_scaled, n_bootstrap, cost_threshold):
    """Bootstrap resampling for classification uncertainty."""
    T = len(X_scaled)
    rng = np.random.default_rng(42)
    probs = []

    for _ in range(n_bootstrap):
        boot_idx = rng.choice(T, size=T, replace=True)
        X_boot = X_scaled[boot_idx]
        p_boot = snap.clf.predict_proba(X_boot)[:, 1]
        probs.append(p_boot[:T])

    probs = np.array(probs)
    std = np.std(probs, axis=0)

    # Convert probabilities to return estimates for quantiles
    # This is approximate - better to train a separate regressor
    mu_samples = probs * (cost_threshold + 0.002) + (1 - probs) * (-cost_threshold - 0.001)

    q10 = np.percentile(mu_samples, 10, axis=0)
    q50 = np.percentile(mu_samples, 50, axis=0)
    q90 = np.percentile(mu_samples, 90, axis=0)

    return std, q10, q50, q90

def _bootstrap_uncertainty_regression(snap, X_scaled, n_bootstrap):
    """Bootstrap resampling for regression uncertainty."""
    T = len(X_scaled)
    rng = np.random.default_rng(42)
    preds = []

    for _ in range(n_bootstrap):
        boot_idx = rng.choice(T, size=T, replace=True)
        X_boot = X_scaled[boot_idx]
        pred_boot = snap.clf.predict(X_boot)
        preds.append(pred_boot[:T])

    preds = np.array(preds)
    std = np.std(preds, axis=0)
    q10 = np.percentile(preds, 10, axis=0)
    q50 = np.percentile(preds, 50, axis=0)
    q90 = np.percentile(preds, 90, axis=0)

    return std, q10, q50, q90

In [None]:
@dataclass
class ProbCalibrator:
    method: str
    iso: IsotonicRegression | None = None

def fit_prob_calibrator_isotonic(p_raw: np.ndarray, y: np.ndarray,
                                 min_points: int = 30) -> ProbCalibrator:
    p_raw = np.asarray(p_raw, float)
    y = np.asarray(y, float)
    m = np.isfinite(p_raw) & np.isfinite(y)
    p, t = p_raw[m], y[m]
    if p.size < min_points or np.unique(p).size < 3:
        return ProbCalibrator(method="identity", iso=None)
    iso = IsotonicRegression(out_of_bounds="clip")
    iso.fit(p, t)
    return ProbCalibrator(method="isotonic", iso=iso)

def apply_prob_calibrator(cal: ProbCalibrator, p_raw: np.ndarray) -> np.ndarray:
    p_raw = np.asarray(p_raw, float)
    if cal.method == "isotonic":
        return cal.iso.predict(p_raw)
    return p_raw

@dataclass
class IntervalCalibrator:
    method: str
    q_alpha: float
    alpha: float

def fit_conformal_interval(residuals: np.ndarray, alpha: float = 0.2) -> IntervalCalibrator:
    resid = np.asarray(residuals, float)
    resid = resid[np.isfinite(resid)]
    q = float(np.quantile(np.abs(resid), 1 - alpha)) if resid.size > 0 else 0.0
    return IntervalCalibrator(method="conformal_abs", q_alpha=q, alpha=alpha)

def apply_conformal_interval(cal: IntervalCalibrator, mu: np.ndarray):
    mu = np.asarray(mu, float)
    return mu - cal.q_alpha, mu + cal.q_alpha

def cumulative_log_returns(price: pd.Series, h: int) -> pd.Series:
    return np.log(price.shift(-h) / price).dropna()

def brier_score(y: np.ndarray, p: np.ndarray) -> float:
    return float(np.mean((y - p) ** 2))

def expected_calibration_error(y: np.ndarray, p: np.ndarray, bins: int = 10) -> float:
    edges = np.linspace(0, 1, bins + 1)
    ece = 0.0
    for i in range(bins):
        m = (p >= edges[i]) & (p < edges[i+1])
        if m.sum() == 0:
            continue
        ece += (m.sum()/len(p)) * np.abs(np.mean(y[m]) - np.mean(p[m]))
    return float(ece)


In [None]:
def purged_walkforward_slices(n: int, n_folds: int = 3, embargo: int = 24):
    """Generate (train, val, test) slices for walk-forward CV."""
    fold_size = n // (n_folds + 2)
    slices = []

    for i in range(n_folds):
        train_end = (i + 1) * fold_size
        val_start = train_end + embargo
        val_end = val_start + fold_size
        test_start = val_end + embargo
        test_end = min(test_start + fold_size, n)

        if test_end - test_start < fold_size // 2:
            break

        slices.append((
            (0, train_end),
            (val_start, val_end),
            (test_start, test_end)
        ))

    return slices

In [None]:
def run_partA_for_symbol(symbol: str, path: Path,
                         horizons: list[int] = HORIZONS,
                         n_folds: int = 3,
                         embargo: int = 24,
                         model_type: str = 'classification',
                         n_bootstrap: int = 0):
    """
    Train and evaluate SVM models for one symbol.

    Args:
        symbol: Asset symbol
        path: Path to CSV file
        horizons: Forecast horizons in bars
        n_folds: Number of walk-forward folds
        embargo: Embargo period between folds
        model_type: 'classification' or 'regression'
        n_bootstrap: Bootstrap samples for uncertainty (0 = disabled)
    """
    # Load data
    df_raw = pd.read_csv(path)
    close_col = _find_close_column(df_raw)
    close = pd.Series(df_raw[close_col].astype(float).values,
                      index=pd.RangeIndex(len(df_raw)), name="close")

    feat_df, X = make_feature_table(close)
    price = feat_df["price"]
    n = len(price)

    folds = purged_walkforward_slices(n, n_folds=n_folds, embargo=embargo)

    results = {h: {"val": [], "test": [], "diag": []} for h in horizons}

    print(f"\\n{'='*60}")
    print(f"Training SVM for {symbol}")
    print(f"Model type: {model_type}")
    print(f"Horizons: {horizons}")
    print(f"Folds: {n_folds}")
    print(f"{'='*60}\\n")

    for fold_idx, ((s0,e0), (s1,e1), (s2,e2)) in enumerate(folds):
        print(f"Fold {fold_idx + 1}/{len(folds)}: Train[{s0}:{e0}] Val[{s1}:{e1}] Test[{s2}:{e2}]")

        # ========================================
        # KEY CHANGE: Train one SVM per horizon
        # ========================================
        snapshots = {}

        for h in horizons:
            print(f"  Training h={h}...", end=" ")

            # Create labels for this horizon
            ret_train = cumulative_log_returns(price.iloc[s0:e0], h)

            # Align features and labels
            n_train = min(len(X[s0:e0]), len(ret_train))
            X_train_aligned = X[s0:s0+n_train]
            ret_train_aligned = ret_train.iloc[:n_train]

            if len(X_train_aligned) < 50:
                print("SKIP (insufficient data)")
                continue

            if model_type == 'classification':
                # Binary classification: profitable vs not
                y_train = (ret_train_aligned.values > bp_to_logret(DEFAULT_COST_BP[h])).astype(int)

                # Check class balance
                pos_frac = y_train.mean()
                if pos_frac < 0.1 or pos_frac > 0.9:
                    print(f"WARN (imbalanced: {pos_frac:.2%} positive)")

                snap = fit_svm_classifier(
                    X_train_aligned, y_train,
                    horizon=h,
                    random_state=123 + fold_idx,
                    C=10.0,  # Can tune this
                    gamma='scale'
                )
            else:
                # Regression: predict actual returns
                y_train = ret_train_aligned.values
                snap = fit_svm_regressor(
                    X_train_aligned, y_train,
                    horizon=h,
                    random_state=123 + fold_idx,
                    C=10.0,
                    gamma='scale',
                    epsilon=0.01
                )

            snapshots[h] = snap
            print("✓")

        if not snapshots:
            print("  No models trained, skipping fold")
            continue

        # Forecast on validation and test
        print("  Forecasting validation...", end=" ")
        out_val_raw, cost_log = forecast_multi_horizon_svm(
            snapshots=snapshots,
            X_seg=X[s1:e1],
            price_seg=price.iloc[s1:e1],
            horizons=horizons,
            n_bootstrap=n_bootstrap
        )
        print("✓")

        print("  Forecasting test...", end=" ")
        out_test_raw, _ = forecast_multi_horizon_svm(
            snapshots=snapshots,
            X_seg=X[s2:e2],
            price_seg=price.iloc[s2:e2],
            horizons=horizons,
            n_bootstrap=n_bootstrap
        )
        print("✓")

        # Calibration (same as HMM version)
        for h in horizons:
            if h not in out_val_raw or h not in out_test_raw:
                continue

            ret_val = cumulative_log_returns(price.iloc[s1:e1], h)
            idx_common = out_val_raw[h].index.intersection(ret_val.index)

            if len(idx_common) == 0:
                continue

            dfV = out_val_raw[h].loc[idx_common].copy()
            maskV = np.isfinite(dfV["p_edge_raw"].values) & np.isfinite(dfV["mu"].values)
            dfV = dfV[maskV]

            if len(dfV) < 20:
                continue

            ret_val_aligned = ret_val.loc[dfV.index]
            y_val = (ret_val_aligned.values > cost_log[h]).astype(int)
            p_raw_val = dfV["p_edge_raw"].values
            mu_val = dfV["mu"].values

            # Fit calibrators
            cal_prob = fit_prob_calibrator_isotonic(p_raw_val, y_val, min_points=20)
            resid_val = ret_val_aligned.values - mu_val
            cal_pi = fit_conformal_interval(resid_val, alpha=0.2)

            # Apply to test
            ret_test = cumulative_log_returns(price.iloc[s2:e2], h)
            idx_test_common = out_test_raw[h].index.intersection(ret_test.index)
            dfT = out_test_raw[h].loc[idx_test_common].copy()

            maskT = np.isfinite(dfT["p_edge_raw"].values) & np.isfinite(dfT["mu"].values)
            dfT = dfT[maskT]

            if len(dfT) == 0:
                continue

            dfT["p_edge"] = apply_prob_calibrator(cal_prob, dfT["p_edge_raw"].values)
            mu_test = dfT["mu"].values
            lo, hi = apply_conformal_interval(cal_pi, mu_test)
            dfT["ret_lo"] = lo
            dfT["ret_hi"] = hi

            p_now = price.loc[dfT.index].values
            dfT["price_lo"] = p_now * np.exp(lo)
            dfT["price_hi"] = p_now * np.exp(hi)

            dfT["edge"] = dfT["mu"] - cost_log[h]
            dfT["risk_edge"] = (dfT["mu"] - cost_log[h]) / (dfT["std"] + 1e-12)

            results[h]["test"].append(dfT)

            # Diagnostics
            if cal_prob.method == "isotonic":
                p_cal_val = apply_prob_calibrator(cal_prob, p_raw_val)
                brier = brier_score(y_val, p_cal_val)
                ece = expected_calibration_error(y_val, p_cal_val)
            else:
                brier = brier_score(y_val, p_raw_val)
                ece = expected_calibration_error(y_val, p_raw_val)

            coverage = float(np.mean((resid_val >= -cal_pi.q_alpha) & (resid_val <= cal_pi.q_alpha)))

            diag = {
                "h": h,
                "brier_val": float(brier),
                "ece_val": float(ece),
                "pi_coverage_val": coverage
            }
            results[h]["diag"].append(diag)

            # Store validation
            dfV["p_edge"] = apply_prob_calibrator(cal_prob, dfV["p_edge_raw"].values)
            loV, hiV = apply_conformal_interval(cal_pi, mu_val)
            dfV["ret_lo"], dfV["ret_hi"] = loV, hiV
            results[h]["val"].append(dfV)

    # Concatenate folds
    for h in horizons:
        for split in ("val", "test"):
            if results[h][split]:
                results[h][split] = pd.concat(results[h][split]).sort_index()
            else:
                results[h][split] = pd.DataFrame()

    print(f"\\nCompleted {symbol}\\n")
    return results


In [None]:
if __name__ == "__main__":
    # Run for one symbol
    symbol = "BTC"
    results = run_partA_for_symbol(
        symbol=symbol,
        path=FILES[symbol],
        horizons=HORIZONS,
        n_folds=3,
        embargo=24,
        model_type='classification',  # or 'regression'
        n_bootstrap=0  # Set to 50 for uncertainty estimates
    )

    # Print summary
    print(f"\\n{'='*60}")
    print(f"RESULTS FOR {symbol}")
    print(f"{'='*60}")

    for h in HORIZONS:
        test_df = results[h]['test']
        diag_list = results[h]['diag']

        if len(test_df) > 0:
            print(f"\\nHorizon {h}:")
            print(f"  Test samples: {len(test_df)}")
            print(f"  Mean p_edge: {test_df['p_edge'].mean():.3f}")
            print(f"  Mean mu: {test_df['mu'].mean():.4f}")

            if diag_list:
                avg_brier = np.mean([d['brier_val'] for d in diag_list])
                avg_ece = np.mean([d['ece_val'] for d in diag_list])
                avg_cov = np.mean([d['pi_coverage_val'] for d in diag_list])
                print(f"  Avg Brier: {avg_brier:.4f}")
                print(f"  Avg ECE: {avg_ece:.4f}")
                print(f"  Avg PI coverage: {avg_cov:.2%}")


Training SVM for BTC
Model type: classification
Horizons: [1, 3, 6]
Folds: 3
Fold 1/3: Train[0:1749] Val[1773:3522] Test[3546:5295]
  Training h=1... ✓
  Training h=3... ✓
  Training h=6... ✓
  Forecasting validation... ✓
  Forecasting test... ✓
Fold 2/3: Train[0:3498] Val[3522:5271] Test[5295:7044]
  Training h=1... ✓
  Training h=3... ✓
  Training h=6... ✓
  Forecasting validation... ✓
  Forecasting test... ✓
Fold 3/3: Train[0:5247] Val[5271:7020] Test[7044:8747]
  Training h=1... ✓
  Training h=3... ✓
  Training h=6... ✓
  Forecasting validation... ✓
  Forecasting test... ✓
\nCompleted BTC\n
RESULTS FOR BTC
\nHorizon 1:
  Test samples: 5198
  Mean p_edge: 0.458
  Mean mu: -0.0012
  Avg Brier: 0.2450
  Avg ECE: 0.0000
  Avg PI coverage: 79.98%
\nHorizon 3:
  Test samples: 5192
  Mean p_edge: 0.469
  Mean mu: -0.0012
  Avg Brier: 0.2463
  Avg ECE: 0.0000
  Avg PI coverage: 80.01%
\nHorizon 6:
  Test samples: 5183
  Mean p_edge: 0.487
  Mean mu: -0.0019
  Avg Brier: 0.2472
  Avg ECE: 0

In [None]:
#!/usr/bin/env python3
"""
SVM Multi-Horizon Trading Signal Generator
Complete implementation for cryptocurrency trading with SVM classification/regression

This replaces the HMM-based approach with Support Vector Machines.
Key features:
- One SVM model per horizon (1, 3, 6 bars)
- Calibrated probabilities for edge detection
- Walk-forward cross-validation
- Isotonic + conformal calibration
- JSON feed output for trading agent
"""

import json
from dataclasses import dataclass
from pathlib import Path

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.isotonic import IsotonicRegression
from sklearn.svm import SVC, SVR
from sklearn.calibration import CalibratedClassifierCV


# =========================================
# CONFIGURATION
# =========================================
DATA_DIR = Path("/Users/nitinlodha/Desktop/ML/ML_Project/Bybit_CSV_Data")
FILES = {
    "BTC": DATA_DIR / "Bybit_BTC.csv",
    "ETH": DATA_DIR / "Bybit_ETH.csv",
    "SOL": DATA_DIR / "Bybit_SOL.csv",
    "XRP": DATA_DIR / "Bybit_XRP.csv",
    "DOGE": DATA_DIR / "Bybit_DOGE.csv",
}

HORIZONS = [1, 3, 6]  # Forecast horizons in 4-hour bars
DEFAULT_COST_BP = {1: 8.0, 3: 10.0, 6: 12.0}  # Trading costs in basis points

# Policy thresholds
TAU_P = 0.60        # Probability gate for P(edge > cost)
TAU_MU = 0.0005     # Expected-return gate (log-return)
LAM = 2.0           # Kelly-lite multiplier
W_MAX = 0.50        # Max gross position (50% notional)

MODEL_VERSION = "svm_rbf_multiH_v1.0"
CALIBRATION_VERSION = "iso+conformal_v1"


# =========================================
# UTILITY FUNCTIONS
# =========================================
def bp_to_logret(bp: float) -> float:
    """Convert basis points to log-return units."""
    return bp * 1e-4


def _find_close_column(df: pd.DataFrame) -> str:
    """Find the close price column in a dataframe."""
    lower = {c.lower(): c for c in df.columns}
    for key in ("close", "closing_price", "close_price", "price_close", "last", "c"):
        if key in lower:
            return lower[key]
    # Fallback: any single float column
    float_cols = [c for c in df.columns if pd.api.types.is_float_dtype(df[c])]
    if len(float_cols) == 1:
        return float_cols[0]
    raise ValueError("Cannot identify 'close' column.")


def cumulative_log_returns(price: pd.Series, h: int) -> pd.Series:
    """Compute log(P_{t+h}/P_t) aligned to t."""
    return np.log(price.shift(-h) / price).dropna()


def brier_score(y: np.ndarray, p: np.ndarray) -> float:
    """Brier score for probability calibration."""
    return float(np.mean((y - p) ** 2))


def expected_calibration_error(y: np.ndarray, p: np.ndarray, bins: int = 10) -> float:
    """Expected Calibration Error (ECE)."""
    edges = np.linspace(0, 1, bins + 1)
    ece = 0.0
    for i in range(bins):
        m = (p >= edges[i]) & (p < edges[i+1])
        if m.sum() == 0:
            continue
        ece += (m.sum()/len(p)) * np.abs(np.mean(y[m]) - np.mean(p[m]))
    return float(ece)


# =========================================
# FEATURE ENGINEERING
# =========================================
def make_feature_table(close: pd.Series):
    """
    Build feature table from close prices.

    Features:
    - Returns at multiple lags (1, 3, 6 bars)
    - Rolling volatility (6, 12 bars)
    - Moving average ratio (log MA10/MA20)

    Returns:
        df: DataFrame with price and features
        X: Feature matrix (numpy array)
    """
    df = pd.DataFrame(index=close.index)
    df["price"] = close.astype(float)

    # Log returns
    df["ret_1"] = np.log(df["price"] / df["price"].shift(1))
    df["ret_3"] = np.log(df["price"] / df["price"].shift(3))
    df["ret_6"] = np.log(df["price"] / df["price"].shift(6))

    # Volatility
    df["vol_6"] = df["ret_1"].rolling(6).std()
    df["vol_12"] = df["ret_1"].rolling(12).std()

    # Moving average ratio
    ma_10 = df["price"].rolling(10).mean()
    ma_20 = df["price"].rolling(20).mean()
    df["ma_ratio"] = np.log(ma_10 / ma_20)

    # Drop NaN rows
    df = df.dropna()

    # Feature matrix (exclude price)
    feat_cols = [c for c in df.columns if c != "price"]
    X = df[feat_cols].values.astype(float)

    return df, X


# =========================================
# WALK-FORWARD CV
# =========================================
def purged_walkforward_slices(n: int, n_folds: int = 3, embargo: int = 24):
    """
    Generate (train, val, test) slices for walk-forward CV with embargo.

    Args:
        n: Total number of samples
        n_folds: Number of folds
        embargo: Gap between train/val and val/test (in bars)

    Returns:
        List of ((train_start, train_end), (val_start, val_end), (test_start, test_end))
    """
    fold_size = n // (n_folds + 2)
    slices = []

    for i in range(n_folds):
        train_end = (i + 1) * fold_size
        val_start = train_end + embargo
        val_end = val_start + fold_size
        test_start = val_end + embargo
        test_end = min(test_start + fold_size, n)

        if test_end - test_start < fold_size // 2:
            break

        slices.append((
            (0, train_end),
            (val_start, val_end),
            (test_start, test_end)
        ))

    return slices


# =========================================
# SVM MODEL
# =========================================
@dataclass
class SVMSnapshot:
    """Container for trained SVM model and preprocessing."""
    clf: SVC | SVR
    scaler: StandardScaler
    model_type: str  # 'classification' or 'regression'
    horizon: int
    C: float = 1.0
    gamma: float | str = 'scale'


def fit_svm_classifier(X_train: np.ndarray, y_train: np.ndarray,
                       horizon: int, random_state: int = 123,
                       C: float = 10.0, gamma: float | str = 'scale') -> SVMSnapshot:
    """
    Train calibrated SVM classifier for binary edge detection.

    Args:
        X_train: Feature matrix (T, D)
        y_train: Binary labels (1 if return > cost, else 0)
        horizon: Forecast horizon in bars
        C: SVM regularization parameter (higher = less regularization)
        gamma: RBF kernel parameter ('scale', 'auto', or float)

    Returns:
        SVMSnapshot with calibrated classifier
    """
    # Standardize features
    scaler = StandardScaler().fit(X_train)
    X_scaled = scaler.transform(X_train)

    # Base SVM with RBF kernel
    base_svm = SVC(
        kernel='rbf',
        C=C,
        gamma=gamma,
        class_weight='balanced',  # Handle imbalanced classes
        random_state=random_state,
        probability=False,  # Calibration provides probabilities
        cache_size=500  # MB for kernel cache
    )

    # Calibrate probabilities using isotonic regression
    # This is CRITICAL for reliable P(edge > cost) estimates
    clf = CalibratedClassifierCV(
        base_svm,
        method='isotonic',
        cv=3,
        n_jobs=-1
    )

    clf.fit(X_scaled, y_train)

    return SVMSnapshot(
        clf=clf,
        scaler=scaler,
        model_type='classification',
        horizon=horizon,
        C=C,
        gamma=gamma
    )


def fit_svm_regressor(X_train: np.ndarray, y_train: np.ndarray,
                      horizon: int, random_state: int = 123,
                      C: float = 10.0, gamma: float | str = 'scale',
                      epsilon: float = 0.01) -> SVMSnapshot:
    """
    Train SVR to directly predict continuous returns.

    Args:
        X_train: Feature matrix
        y_train: Continuous log returns
        epsilon: Epsilon-insensitive loss parameter

    Returns:
        SVMSnapshot with trained regressor
    """
    scaler = StandardScaler().fit(X_train)
    X_scaled = scaler.transform(X_train)

    reg = SVR(
        kernel='rbf',
        C=C,
        gamma=gamma,
        epsilon=epsilon,
        cache_size=500
    )

    reg.fit(X_scaled, y_train)

    return SVMSnapshot(
        clf=reg,
        scaler=scaler,
        model_type='regression',
        horizon=horizon,
        C=C,
        gamma=gamma
    )


# =========================================
# FORECASTING
# =========================================
def forecast_multi_horizon_svm(
    snapshots: dict[int, SVMSnapshot],
    X_seg: np.ndarray,
    price_seg: pd.Series,
    horizons: list[int],
    cost_bp: dict[int, float] | None = None,
    n_bootstrap: int = 0
):
    """
    Generate multi-horizon forecasts using trained SVMs.

    Args:
        snapshots: Dict mapping horizon -> trained SVMSnapshot
        X_seg: Feature matrix for forecast segment
        price_seg: Corresponding price series
        horizons: List of forecast horizons
        cost_bp: Trading costs in basis points
        n_bootstrap: Bootstrap samples for uncertainty (0 = disabled)

    Returns:
        out: dict[horizon] -> DataFrame with predictions
        cost_log: dict[horizon] -> cost in log-return units
    """
    if cost_bp is None:
        cost_bp = {h: DEFAULT_COST_BP.get(h, 8.0) for h in horizons}
    cost_log = {h: bp_to_logret(float(cost_bp[h])) for h in horizons}

    Tseg = X_seg.shape[0]
    idx = price_seg.index
    out = {}

    for h in horizons:
        if h not in snapshots:
            print(f"Warning: No model for horizon {h}, skipping")
            continue

        snap = snapshots[h]
        out_h = pd.DataFrame(index=idx[:-h] if h < Tseg else idx[:0])
        T_h = Tseg - h

        if T_h <= 0:
            out[h] = out_h
            continue

        # Scale features
        X_scaled = snap.scaler.transform(X_seg[:T_h])

        if snap.model_type == 'classification':
            # Get calibrated probabilities
            p_edge = snap.clf.predict_proba(X_scaled)[:, 1]

            # Expected return estimation (probability-weighted)
            mu = p_edge * (cost_log[h] + 0.002) + (1 - p_edge) * (-cost_log[h] - 0.001)

            # Uncertainty
            if n_bootstrap > 0:
                std_h, q10, q50, q90 = _bootstrap_uncertainty_classification(
                    snap, X_scaled, n_bootstrap, cost_log[h]
                )
            else:
                std_h = np.full(T_h, 0.01)
                q10 = mu - 0.02
                q50 = mu
                q90 = mu + 0.02

        elif snap.model_type == 'regression':
            # Direct return prediction
            mu = snap.clf.predict(X_scaled)

            # Probability via sigmoid transform
            p_edge = 1.0 / (1.0 + np.exp(-10 * (mu - cost_log[h])))

            if n_bootstrap > 0:
                std_h, q10, q50, q90 = _bootstrap_uncertainty_regression(
                    snap, X_scaled, n_bootstrap
                )
            else:
                std_h = np.full(T_h, 0.015)
                q10 = mu - 0.025
                q50 = mu
                q90 = mu + 0.025

        # Populate DataFrame
        p_now = price_seg.iloc[:T_h].values

        out_h['mu'] = mu
        out_h['std'] = std_h
        out_h['p_edge_raw'] = p_edge
        out_h['ret_q10'] = q10
        out_h['ret_q50'] = q50
        out_h['ret_q90'] = q90
        out_h['price_pred'] = p_now * np.exp(mu)
        out_h['price_q10'] = p_now * np.exp(q10)
        out_h['price_q50'] = p_now * np.exp(q50)
        out_h['price_q90'] = p_now * np.exp(q90)

        out[h] = out_h

    return out, cost_log


def _bootstrap_uncertainty_classification(snap, X_scaled, n_bootstrap, cost_threshold):
    """Bootstrap resampling for classification uncertainty."""
    T = len(X_scaled)
    rng = np.random.default_rng(42)
    probs = []

    for _ in range(n_bootstrap):
        boot_idx = rng.choice(T, size=T, replace=True)
        X_boot = X_scaled[boot_idx]
        p_boot = snap.clf.predict_proba(X_boot)[:, 1]
        probs.append(p_boot[:T])

    probs = np.array(probs)
    std = np.std(probs, axis=0)

    # Convert probabilities to return estimates
    mu_samples = probs * (cost_threshold + 0.002) + (1 - probs) * (-cost_threshold - 0.001)

    q10 = np.percentile(mu_samples, 10, axis=0)
    q50 = np.percentile(mu_samples, 50, axis=0)
    q90 = np.percentile(mu_samples, 90, axis=0)

    return std, q10, q50, q90


def _bootstrap_uncertainty_regression(snap, X_scaled, n_bootstrap):
    """Bootstrap resampling for regression uncertainty."""
    T = len(X_scaled)
    rng = np.random.default_rng(42)
    preds = []

    for _ in range(n_bootstrap):
        boot_idx = rng.choice(T, size=T, replace=True)
        X_boot = X_scaled[boot_idx]
        pred_boot = snap.clf.predict(X_boot)
        preds.append(pred_boot[:T])

    preds = np.array(preds)
    std = np.std(preds, axis=0)
    q10 = np.percentile(preds, 10, axis=0)
    q50 = np.percentile(preds, 50, axis=0)
    q90 = np.percentile(preds, 90, axis=0)

    return std, q10, q50, q90


# =========================================
# CALIBRATION
# =========================================
@dataclass
class ProbCalibrator:
    """Probability calibrator using isotonic regression."""
    method: str
    iso: IsotonicRegression | None = None


def fit_prob_calibrator_isotonic(p_raw: np.ndarray, y: np.ndarray,
                                 min_points: int = 30) -> ProbCalibrator:
    """Fit isotonic regression p_raw -> y."""
    p_raw = np.asarray(p_raw, float)
    y = np.asarray(y, float)
    m = np.isfinite(p_raw) & np.isfinite(y)
    p, t = p_raw[m], y[m]
    if p.size < min_points or np.unique(p).size < 3:
        return ProbCalibrator(method="identity", iso=None)
    iso = IsotonicRegression(out_of_bounds="clip")
    iso.fit(p, t)
    return ProbCalibrator(method="isotonic", iso=iso)


def apply_prob_calibrator(cal: ProbCalibrator, p_raw: np.ndarray) -> np.ndarray:
    """Apply probability calibrator."""
    p_raw = np.asarray(p_raw, float)
    if cal.method == "isotonic":
        return cal.iso.predict(p_raw)
    return p_raw


@dataclass
class IntervalCalibrator:
    """Conformal prediction interval calibrator."""
    method: str
    q_alpha: float
    alpha: float


def fit_conformal_interval(residuals: np.ndarray, alpha: float = 0.2) -> IntervalCalibrator:
    """Fit conformal prediction intervals."""
    resid = np.asarray(residuals, float)
    resid = resid[np.isfinite(resid)]
    q = float(np.quantile(np.abs(resid), 1 - alpha)) if resid.size > 0 else 0.0
    return IntervalCalibrator(method="conformal_abs", q_alpha=q, alpha=alpha)


def apply_conformal_interval(cal: IntervalCalibrator, mu: np.ndarray):
    """Apply conformal prediction intervals."""
    mu = np.asarray(mu, float)
    return mu - cal.q_alpha, mu + cal.q_alpha


# =========================================
# MAIN TRAINING PIPELINE
# =========================================
def run_svm_for_symbol(symbol: str, path: Path,
                       horizons: list[int] = HORIZONS,
                       n_folds: int = 3,
                       embargo: int = 24,
                       model_type: str = 'classification',
                       n_bootstrap: int = 0):
    """
    Train and evaluate SVM models for one symbol.

    Args:
        symbol: Asset symbol
        path: Path to CSV file
        horizons: Forecast horizons in bars
        n_folds: Number of walk-forward folds
        embargo: Embargo period between folds
        model_type: 'classification' or 'regression'
        n_bootstrap: Bootstrap samples for uncertainty (0 = disabled)

    Returns:
        results: dict[horizon] -> dict with 'val', 'test', 'diag' DataFrames
    """
    # Load data
    df_raw = pd.read_csv(path)
    close_col = _find_close_column(df_raw)
    close = pd.Series(df_raw[close_col].astype(float).values,
                      index=pd.RangeIndex(len(df_raw)), name="close")

    feat_df, X = make_feature_table(close)
    price = feat_df["price"]
    n = len(price)

    folds = purged_walkforward_slices(n, n_folds=n_folds, embargo=embargo)

    results = {h: {"val": [], "test": [], "diag": []} for h in horizons}

    print(f"\n{'='*60}")
    print(f"Training SVM for {symbol}")
    print(f"Model type: {model_type}")
    print(f"Horizons: {horizons}")
    print(f"Folds: {n_folds}")
    print(f"{'='*60}\n")

    for fold_idx, ((s0,e0), (s1,e1), (s2,e2)) in enumerate(folds):
        print(f"Fold {fold_idx + 1}/{len(folds)}: Train[{s0}:{e0}] Val[{s1}:{e1}] Test[{s2}:{e2}]")

        # Train one SVM per horizon
        snapshots = {}

        for h in horizons:
            print(f"  Training h={h}...", end=" ")

            # Create labels for this horizon
            ret_train = cumulative_log_returns(price.iloc[s0:e0], h)

            # Align features and labels
            n_train = min(len(X[s0:e0]), len(ret_train))
            X_train_aligned = X[s0:s0+n_train]
            ret_train_aligned = ret_train.iloc[:n_train]

            if len(X_train_aligned) < 50:
                print("SKIP (insufficient data)")
                continue

            if model_type == 'classification':
                # Binary classification
                y_train = (ret_train_aligned.values > bp_to_logret(DEFAULT_COST_BP[h])).astype(int)

                # Check class balance
                pos_frac = y_train.mean()
                if pos_frac < 0.1 or pos_frac > 0.9:
                    print(f"WARN (imbalanced: {pos_frac:.2%} positive)")

                snap = fit_svm_classifier(
                    X_train_aligned, y_train,
                    horizon=h,
                    random_state=123 + fold_idx,
                    C=10.0,
                    gamma='scale'
                )
            else:
                # Regression
                y_train = ret_train_aligned.values
                snap = fit_svm_regressor(
                    X_train_aligned, y_train,
                    horizon=h,
                    random_state=123 + fold_idx,
                    C=10.0,
                    gamma='scale',
                    epsilon=0.01
                )

            snapshots[h] = snap
            print("✓")

        if not snapshots:
            print("  No models trained, skipping fold")
            continue

        # Forecast on validation and test
        print("  Forecasting validation...", end=" ")
        out_val_raw, cost_log = forecast_multi_horizon_svm(
            snapshots=snapshots,
            X_seg=X[s1:e1],
            price_seg=price.iloc[s1:e1],
            horizons=horizons,
            n_bootstrap=n_bootstrap
        )
        print("✓")

        print("  Forecasting test...", end=" ")
        out_test_raw, _ = forecast_multi_horizon_svm(
            snapshots=snapshots,
            X_seg=X[s2:e2],
            price_seg=price.iloc[s2:e2],
            horizons=horizons,
            n_bootstrap=n_bootstrap
        )
        print("✓")

        # Calibration
        for h in horizons:
            if h not in out_val_raw or h not in out_test_raw:
                continue

            ret_val = cumulative_log_returns(price.iloc[s1:e1], h)
            idx_common = out_val_raw[h].index.intersection(ret_val.index)

            if len(idx_common) == 0:
                continue

            dfV = out_val_raw[h].loc[idx_common].copy()
            maskV = np.isfinite(dfV["p_edge_raw"].values) & np.isfinite(dfV["mu"].values)
            dfV = dfV[maskV]

            if len(dfV) < 20:
                continue

            ret_val_aligned = ret_val.loc[dfV.index]
            y_val = (ret_val_aligned.values > cost_log[h]).astype(int)
            p_raw_val = dfV["p_edge_raw"].values
            mu_val = dfV["mu"].values

            # Fit calibrators
            cal_prob = fit_prob_calibrator_isotonic(p_raw_val, y_val, min_points=20)
            resid_val = ret_val_aligned.values - mu_val
            cal_pi = fit_conformal_interval(resid_val, alpha=0.2)

            # Apply to test
            ret_test = cumulative_log_returns(price.iloc[s2:e2], h)
            idx_test_common = out_test_raw[h].index.intersection(ret_test.index)
            dfT = out_test_raw[h].loc[idx_test_common].copy()

            maskT = np.isfinite(dfT["p_edge_raw"].values) & np.isfinite(dfT["mu"].values)
            dfT = dfT[maskT]

            if len(dfT) == 0:
                continue

            dfT["p_edge"] = apply_prob_calibrator(cal_prob, dfT["p_edge_raw"].values)
            mu_test = dfT["mu"].values
            lo, hi = apply_conformal_interval(cal_pi, mu_test)
            dfT["ret_lo"] = lo
            dfT["ret_hi"] = hi

            p_now = price.loc[dfT.index].values
            dfT["price_lo"] = p_now * np.exp(lo)
            dfT["price_hi"] = p_now * np.exp(hi)

            dfT["edge"] = dfT["mu"] - cost_log[h]
            dfT["risk_edge"] = (dfT["mu"] - cost_log[h]) / (dfT["std"] + 1e-12)

            results[h]["test"].append(dfT)

            # Diagnostics
            if cal_prob.method == "isotonic":
                p_cal_val = apply_prob_calibrator(cal_prob, p_raw_val)
                brier = brier_score(y_val, p_cal_val)
                ece = expected_calibration_error(y_val, p_cal_val)
            else:
                brier = brier_score(y_val, p_raw_val)
                ece = expected_calibration_error(y_val, p_raw_val)

            coverage = float(np.mean((resid_val >= -cal_pi.q_alpha) & (resid_val <= cal_pi.q_alpha)))

            diag = {
                "h": h,
                "brier_val": float(brier),
                "ece_val": float(ece),
                "pi_coverage_val": coverage
            }
            results[h]["diag"].append(diag)

            # Store validation
            dfV["p_edge"] = apply_prob_calibrator(cal_prob, dfV["p_edge_raw"].values)
            loV, hiV = apply_conformal_interval(cal_pi, mu_val)
            dfV["ret_lo"], dfV["ret_hi"] = loV, hiV
            results[h]["val"].append(dfV)

    # Concatenate folds
    for h in horizons:
        for split in ("val", "test"):
            if results[h][split]:
                results[h][split] = pd.concat(results[h][split]).sort_index()
            else:
                results[h][split] = pd.DataFrame()

    print(f"\nCompleted {symbol}\n")
    return results


# =========================================
# JSON EXPORT
# =========================================
def build_json_records(all_outputs: dict,
                       model_version: str = MODEL_VERSION,
                       calibration_version: str = CALIBRATION_VERSION,
                       horizons: list[int] = HORIZONS):
    """Build JSONL records for trading agent."""
    records = []
    for sym, res in all_outputs.items():
        for h in horizons:
            df = res[h]["test"]
            if isinstance(df, list) or isinstance(df, tuple):
                df = pd.concat(df).sort_index()
            for t, row in df.iterrows():
                rec = {
                    "timestamp_index": int(t),
                    "symbol": sym,
                    "horizon_bars": int(h),
                    "model_version": model_version,
                    "calibration_version": calibration_version,
                    "signals": {
                        "expected_return": float(row["mu"]),
                        "stdev_return": float(row["std"]),
                        "p_edge_gt_cost": float(row["p_edge"]),
                        "predicted_price": float(row["price_pred"]),
                        "price_PI": {
                            "p10": float(row["price_q10"]),
                            "p50": float(row["price_q50"]),
                            "p90": float(row["price_q90"])
                        }
                    },
                    "policy_suggestions": {
                        "gate_threshold_p": TAU_P,
                        "gate_threshold_edge": TAU_MU,
                        "suggested_action": "buy" if (row["p_edge"]>=TAU_P and row["edge"]>=TAU_MU and row["mu"]>0)
                                            else ("sell" if (row["p_edge"]>=TAU_P and row["edge"]>=TAU_MU and row["mu"]<0)
                                                  else "hold")
                    }
                }
                records.append(rec)
    return records


# =========================================
# MAIN EXECUTION
# =========================================
if __name__ == "__main__":
    # Process all symbols
    all_outputs = {}

    for symbol, path in FILES.items():
        if not path.exists():
            print(f"Warning: {path} not found, skipping {symbol}")
            continue

        results = run_svm_for_symbol(
            symbol=symbol,
            path=path,
            horizons=HORIZONS,
            n_folds=3,
            embargo=24,
            model_type='classification',  # or 'regression'
            n_bootstrap=0  # Set to 50 for uncertainty estimates
        )

        all_outputs[symbol] = results

        # Print summary
        print(f"\n{'='*60}")
        print(f"RESULTS FOR {symbol}")
        print(f"{'='*60}")

        for h in HORIZONS:
            test_df = results[h]['test']
            diag_list = results[h]['diag']

            if len(test_df) > 0:
                print(f"\nHorizon {h}:")
                print(f"  Test samples: {len(test_df)}")
                print(f"  Mean p_edge: {test_df['p_edge'].mean():.3f}")
                print(f"  Mean mu: {test_df['mu'].mean():.4f}")

                if diag_list:
                    avg_brier = np.mean([d['brier_val'] for d in diag_list])
                    avg_ece = np.mean([d['ece_val'] for d in diag_list])
                    avg_cov = np.mean([d['pi_coverage_val'] for d in diag_list])
                    print(f"  Avg Brier: {avg_brier:.4f}")
                    print(f"  Avg ECE: {avg_ece:.4f}")
                    print(f"  Avg PI coverage: {avg_cov:.2%}")

    # Export to JSON
    json_records = build_json_records(all_outputs)
    json_path = DATA_DIR / "trader_feed_svm_multiH.jsonl"
    with open(json_path, "w") as f:
        for r in json_records:
            f.write(json.dumps(r) + "\n")

    print(f"\n{'='*60}")
    print(f"Wrote {len(json_records)} records to: {json_path}")
    print(f"{'='*60}\n")


Training SVM for BTC
Model type: classification
Horizons: [1, 3, 6]
Folds: 3

Fold 1/3: Train[0:1749] Val[1773:3522] Test[3546:5295]
  Training h=1... ✓
  Training h=3... ✓
  Training h=6... ✓
  Forecasting validation... ✓
  Forecasting test... ✓
Fold 2/3: Train[0:3498] Val[3522:5271] Test[5295:7044]
  Training h=1... ✓
  Training h=3... ✓
  Training h=6... ✓
  Forecasting validation... ✓
  Forecasting test... ✓
Fold 3/3: Train[0:5247] Val[5271:7020] Test[7044:8747]
  Training h=1... ✓
  Training h=3... ✓
  Training h=6... ✓
  Forecasting validation... ✓
  Forecasting test... ✓

Completed BTC


RESULTS FOR BTC

Horizon 1:
  Test samples: 5198
  Mean p_edge: 0.458
  Mean mu: 0.0002
  Avg Brier: 0.2450
  Avg ECE: 0.0000
  Avg PI coverage: 79.98%

Horizon 3:
  Test samples: 5192
  Mean p_edge: 0.469
  Mean mu: 0.0003
  Avg Brier: 0.2463
  Avg ECE: 0.0000
  Avg PI coverage: 80.01%

Horizon 6:
  Test samples: 5183
  Mean p_edge: 0.487
  Mean mu: 0.0002
  Avg Brier: 0.2472
  Avg ECE: 0.0000