In [10]:
import numpy as np
from sklearn.linear_model import LinearRegression

def generate_data(n, d=5, sigma=1.0, seed=None):
    rng = np.random.default_rng(seed)
    X = rng.uniform(-1, 1, size=(n, d))
    beta = rng.normal(size=d)
    eps = rng.normal(scale=sigma, size=n)
    # y = X @ beta + eps
    nonlin = 2.0 * (X[:, 0] ** 2) + 1.0 * (X[:, 1] ** 2)
    y = X @ beta + nonlin + eps
    return X, y, beta

def split_conformal_interval_auto(
    X_train_all, y_train_all, X_test,
    alpha=0.1, cal_ratio=0.1, seed=None
):
    """
    Split CP：在 X_train_all 中自动划分 cal_ratio 比例做 calibration，
    剩下的用来训练 base model。
    """
    rng = np.random.default_rng(seed)
    n = X_train_all.shape[0]
    n_cal = max(1, int(np.floor(n * cal_ratio)))

    idx = rng.permutation(n)
    cal_idx = idx[:n_cal]
    train_idx = idx[n_cal:]

    X_train = X_train_all[train_idx]
    y_train = y_train_all[train_idx]
    X_cal   = X_train_all[cal_idx]
    y_cal   = y_train_all[cal_idx]

    # 训练 base model
    model = LinearRegression().fit(X_train, y_train)

    # 校准残差
    y_cal_pred = model.predict(X_cal)
    residuals = np.abs(y_cal - y_cal_pred)
    n_cal_eff = len(residuals)
    k = int(np.ceil((n_cal_eff + 1) * (1 - alpha)))
    q_hat = np.sort(residuals)[k - 1]

    # 测试集区间
    y_test_pred = model.predict(X_test)
    lower = y_test_pred - q_hat
    upper = y_test_pred + q_hat
    return lower, upper

# def bootstrap_interval(X_train, y_train, X_test, alpha=0.1, B=300, seed=None):
#     rng = np.random.default_rng(seed)
#     n = X_train.shape[0]

#     base_model = LinearRegression().fit(X_train, y_train)
#     y_train_hat = base_model.predict(X_train)
#     residuals = y_train - y_train_hat
#     y_test_hat = base_model.predict(X_test)

#     B_preds = np.zeros((B, X_test.shape[0]))

#     for b in range(B):
#         e_star = rng.choice(residuals, size=n, replace=True)
#         y_star = y_train_hat + e_star
#         model_b = LinearRegression().fit(X_train, y_star)
#         B_preds[b] = model_b.predict(X_test)

#     deltas = B_preds - y_test_hat[None, :]
#     q_low = np.quantile(deltas, alpha / 2, axis=0)
#     q_high = np.quantile(deltas, 1 - alpha / 2, axis=0)

#     lower = y_test_hat + q_low
#     upper = y_test_hat + q_high
#     return lower, upper

def bootstrap_interval_predictive(X_train, y_train, X_test,
                                  alpha=0.1, B=300, seed=None):
    """
    残差 Bootstrap 预测区间（针对 Y，而不是 E[Y|X]）

    步骤：
    1. 用原始数据拟合线性回归，得到 y_hat_train 和残差 e = y - y_hat_train。
    2. 对 b=1..B:
       - 在训练残差中有放回抽样，得到 e_train*，构造 y_train* = y_hat_train + e_train*。
       - 用 (X_train, y_train*) 重拟合得到 model_b。
       - 在 X_test 上得到均值预测 mu_b = model_b(X_test)。
       - 再从残差中抽 e_test*（长度 = n_test），得到 y_test* = mu_b + e_test*。
    3. 对每个测试点，对 {y_test*^(b)} 的经验分布取 alpha/2 和 1-alpha/2 分位数。
    """
    rng = np.random.default_rng(seed)
    n_train = X_train.shape[0]
    n_test = X_test.shape[0]

    # 原始拟合 + 残差
    base_model = LinearRegression().fit(X_train, y_train)
    y_train_hat = base_model.predict(X_train)
    residuals = y_train - y_train_hat  # 近似噪声 N(0, sigma^2)

    boot_y_test = np.zeros((B, n_test))

    for b in range(B):
        # 训练端：残差重采样 + 重拟合
        e_star_train = rng.choice(residuals, size=n_train, replace=True)
        y_star_train = y_train_hat + e_star_train
        model_b = LinearRegression().fit(X_train, y_star_train)

        # 测试端：均值预测 + 再加一份噪声
        mu_star_test = model_b.predict(X_test)
        e_star_test = rng.choice(residuals, size=n_test, replace=True)
        boot_y_test[b] = mu_star_test + e_star_test

    lower = np.quantile(boot_y_test, alpha / 2, axis=0)
    upper = np.quantile(boot_y_test, 1 - alpha / 2, axis=0)
    return lower, upper


def eval_coverage_len(lower, upper, y_true):
    covered = (y_true >= lower) & (y_true <= upper)
    coverage = covered.mean()
    avg_len = np.mean(upper - lower)
    return coverage, avg_len

def run_experiment(
    n_train_total=100,
    n_test=500,
    d=5,
    sigma=1.0,
    alpha=0.1,
    B=1000,
    cal_ratio=0.1,
    n_rep=50,
    seed=0
):
    rng = np.random.default_rng(seed)

    cov_cp, len_cp = [], []
    cov_boot, len_boot = [], []

    for rep in range(n_rep):
        # 一次性生成训练 + 测试
        X_all, y_all, _ = generate_data(n_train_total + n_test,
                                        d=d, sigma=sigma,
                                        seed=rng.integers(1e9))
        X_train_all = X_all[:n_train_total]
        y_train_all = y_all[:n_train_total]
        X_test = X_all[n_train_total:]
        y_test = y_all[n_train_total:]

        # Split CP：内部自己切 10% 做 calibration
        lower_cp, upper_cp = split_conformal_interval_auto(
            X_train_all, y_train_all, X_test,
            alpha=alpha, cal_ratio=cal_ratio,
            seed=rng.integers(1e9)
        )
        c1, l1 = eval_coverage_len(lower_cp, upper_cp, y_test)
        cov_cp.append(c1)
        len_cp.append(l1)

        # Bootstrap：用全部训练集
        # lower_bt, upper_bt = bootstrap_interval(
        #     X_train_all, y_train_all,
        #     X_test, alpha=alpha, B=B,
        #     seed=rng.integers(1e9)
        # )
        lower_bt, upper_bt = bootstrap_interval_predictive(
            X_train_all, y_train_all,
            X_test, alpha=alpha, B=B,
            seed=rng.integers(1e9)
        )

        c2, l2 = eval_coverage_len(lower_bt, upper_bt, y_test)
        cov_boot.append(c2)
        len_boot.append(l2)

    print(f"=== alpha = {alpha}, d = {d}, sigma = {sigma}, "
          f"n_train_total = {n_train_total}, cal_ratio = {cal_ratio} ===")
    print(f"Split Conformal : coverage = {np.mean(cov_cp):.3f} ± {np.std(cov_cp):.3f}, "
          f"avg_len = {np.mean(len_cp):.3f} ± {np.std(len_cp):.3f}")
    print(f"Bootstrap       : coverage = {np.mean(cov_boot):.3f} ± {np.std(len_boot):.3f}, "
          f"avg_len = {np.mean(len_boot):.3f} ± {np.std(len_boot):.3f}")

if __name__ == "__main__":
    run_experiment()


=== alpha = 0.1, d = 5, sigma = 1.0, n_train_total = 100, cal_ratio = 0.1 ===
Split Conformal : coverage = 0.927 ± 0.063, avg_len = 4.825 ± 1.145
Bootstrap       : coverage = 0.884 ± 0.342, avg_len = 3.911 ± 0.342
