In [None]:
import numpy as np
from sklearn.model_selection import KFold, RepeatedKFold, train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# 1. 模拟 100 个数据点
X = np.random.rand(100, 1)
y = 3 * X + np.random.randn(100, 1) * 0.1  # 带有噪声的线性关系

# 2. 单次验证 (Single Validation)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = LinearRegression().fit(X_train, y_train)
single_score = mean_squared_error(y_test, model.predict(X_test))
print(f"单次验证误差: {single_score:.6f} (可能会因为随机分割而剧烈波动)")

# 3. 重复交叉验证 (Repeated K-Fold)
# 设定 5 折交叉验证，重复 10 次
rkf = RepeatedKFold(n_splits=5, n_repeats=10, random_state=42)
scores = []

for train_index, test_index in rkf.split(X):
    X_train_cv, X_test_cv = X[train_index], X[test_index]
    y_train_cv, y_test_cv = y[train_index], y[test_index]

    cv_model = LinearRegression().fit(X_train_cv, y_train_cv)
    scores.append(mean_squared_error(y_test_cv, cv_model.predict(X_test_cv)))

print(f"重复交叉验证平均误差: {np.mean(scores):.6f}")
print(f"误差的标准差: {np.std(scores):.6f} (标准差越小，说明你的模型评估越可靠)")