In [None]:
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.metrics import (
    get_scorer, accuracy_score, recall_score, precision_score,
    roc_auc_score, matthews_corrcoef, average_precision_score
)
from tqdm import tqdm
import pandas as pd
import numpy as np
import scipy.stats
import copy
from joblib import Parallel, delayed  # 导入并行计算库

class Scrambler:
    def __init__(self, model, iterations=100):
        self.base_model = copy.deepcopy(model)
        self.iterations = iterations
        self.progress_bar = False

    def validate(self, X, Y, method="train_test_split", scoring="accuracy", cross_val_score_aggregator="mean", pvalue_threshold=0.05, cv_kfolds=5, as_df=False, validation_data=None, progress_bar=False):
        model_scorer = get_scorer(scoring)
        result = None

        # 对于交叉验证方法，直接在 cross_val_predict 中设置 n_jobs=-1
        if method == "cross_validation":
            result = self.__evaluate_model(X, Y, X, Y, model_scorer, progress_bar, cross_val=True, cv_kfolds=cv_kfolds, n_jobs=-1)
        else:
            # 对于训练/测试拆分方法，没有直接的方法并行化，所以保持不变
            if validation_data:
                X_train, Y_train = X, Y
                X_test, Y_test = validation_data
            else:
                X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25)
            result = self.__evaluate_model(X_train, Y_train, X_test, Y_test, model_scorer, progress_bar)

        # 其余代码保持不变...

    def __evaluate_model(self, X_train, Y_train, X_test, Y_test, scorer, progress_bar, cross_val=False, cv_kfolds=5, n_jobs=1):
        """对此函数的修改主要是为了接受 n_jobs 参数"""
        self.base_model.fit(X_train, Y_train)
        metrics = {
            # 初始化指标存储
        }

        if cross_val:
            # 当使用交叉验证时，将 n_jobs 参数传递给 cross_val_predict
            Y_pred = cross_val_predict(self.base_model, X_train, Y_train, cv=cv_kfolds, n_jobs=n_jobs)
            # 计算指标...
        else:
            # 对于非交叉验证的情况，当前的实现没有直接的并行化机会
            Y_pred = self.base_model.predict(X_test)
            # 计算指标...

        # 对于扰乱模型的评估，可以考虑使用 joblib 的 Parallel 和 delayed 来并行化迭代过程
        # 注意：这可能需要根据实际情况调整，确保模型和数据可以安全地在多线程/进程中使用
        def evaluate_scrambled(index):
            np.random.seed(index)  # 确保随机性的可重复性
            Y_train_scrambled = np.random.permutation(Y_train)
            self.base_model.fit(X_train, Y_train_scrambled)
            if cross_val:
                Y_pred_scrambled = cross_val_predict(self.base_model, X_train, Y_train_scrambled, cv=cv_kfolds, n_jobs=n_jobs)
            else:
                Y_pred_scrambled = self.base_model.predict(X_test)
            # 返回计算得到的指标...

        # 使用 Parallel 进行并行计算
        scrambled_results = Parallel(n_jobs=-1)(delayed(evaluate_scrambled)(i) for i in range(self.iterations))
        # 合并并处理计算结果...

        return metrics
