# 构建基于WGCNA的基础模型

In [1]:
import optuna
from optuna.samplers import TPESampler
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold, train_test_split
import xgboost as xgb
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

np.random.seed(42)

# 加载数据集

data1 = pd.read_csv('wgcna_10.csv')
X = data1.iloc[:, 1:]
y = data1.iloc[:, 0]  # 假设标签在 data1 的第一列

# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 拆分数据集
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

In [2]:
# 5折交叉验证
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 目标函数
def objective(trial, model_name):
    if model_name == 'et':
        model = ExtraTreesClassifier(
            max_depth=trial.suggest_int('max_depth', 5, 20),
            min_samples_split=trial.suggest_int('min_samples_split', 2, 20),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            random_state=42
        )
    elif model_name == 'knn':
        model = KNeighborsClassifier(
            n_neighbors=trial.suggest_int('n_neighbors', 3, 20),
            leaf_size=trial.suggest_int('leaf_size', 5, 50),
            weights=trial.suggest_categorical('weights', ['uniform', 'distance'])
        )
    elif model_name == 'logistic':
        model = LogisticRegression(
            C=trial.suggest_float('C', 1e-4, 1e2, log=True),
            penalty='l1',
            solver='saga',
            random_state=42
        )
    elif model_name == 'xgb':
        model = xgb.XGBClassifier(
            learning_rate=trial.suggest_float('learning_rate', 0.01, 0.3),
            max_depth=trial.suggest_int('max_depth', 3, 10),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            use_label_encoder=False,
            eval_metric='logloss',
            random_state=42
        )
    elif model_name == 'rf':
        model = RandomForestClassifier(
            max_depth=trial.suggest_int('max_depth', 5, 20),
            min_samples_split=trial.suggest_int('min_samples_split', 2, 20),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            random_state=42
        )
    elif model_name == 'gnb':
        model = GaussianNB(
            var_smoothing=trial.suggest_float('var_smoothing', 1e-12, 1e-7, log=True)
        )
    elif model_name == 'lda':
        model = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto')
    elif model_name == 'ada':
        model = AdaBoostClassifier(
            learning_rate=trial.suggest_float('learning_rate', 0.01, 2),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            algorithm='SAMME',
            random_state=42
        )
    elif model_name == 'gb':
        model = GradientBoostingClassifier(
            learning_rate=trial.suggest_float('learning_rate', 0.01, 0.3),
            max_depth=trial.suggest_int('max_depth', 3, 10),
            min_samples_split=trial.suggest_int('min_samples_split', 2, 20),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            subsample=trial.suggest_float('subsample', 0.5, 1.0),
            random_state=42
        )
    elif model_name == 'svm':
        model = SVC(
            C=trial.suggest_float('C', 1e-3, 1e2, log=True),
            probability=True,
            random_state=42
        )

    # 5折交叉验证计算 AUC 均值
    auc_scores = []
    for train_idx, val_idx in cv.split(X_train, y_train):
        X_tr, X_val = X_train[train_idx], X_train[val_idx]
        y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

        model.fit(X_tr, y_tr)
        y_pred = model.predict_proba(X_val)[:, 1]
        auc_scores.append(roc_auc_score(y_val, y_pred))

    return np.mean(auc_scores)

# 运行 Optuna 进行超参数优化
optimized_models = {}
best_aucs = {}

for model_name in ['et', 'knn', 'logistic', 'xgb', 'rf', 'gnb', 'lda', 'ada', 'gb', 'svm']:
    study = optuna.create_study(direction='maximize', sampler=TPESampler())
    study.optimize(lambda trial: objective(trial, model_name), n_trials=50)

    optimized_models[model_name] = study.best_params
    best_aucs[model_name] = study.best_value

    print(f"Best AUC for {model_name}: {study.best_value}")
    print(f"Best parameters for {model_name}: {study.best_params}")

# 输出每个模型的最佳 AUC
print("\nModel Best AUCs:")
for model_name, auc in best_aucs.items():
    print(f"{model_name}: {auc}")
    



[I 2025-04-03 23:05:15,433] A new study created in memory with name: no-name-8de4cb4f-e2eb-4341-abde-87dded844995
[I 2025-04-03 23:05:16,132] Trial 0 finished with value: 0.6192847593582887 and parameters: {'max_depth': 14, 'min_samples_split': 11, 'n_estimators': 225}. Best is trial 0 with value: 0.6192847593582887.
[I 2025-04-03 23:05:16,971] Trial 1 finished with value: 0.6256461675579322 and parameters: {'max_depth': 12, 'min_samples_split': 15, 'n_estimators': 262}. Best is trial 1 with value: 0.6256461675579322.
[I 2025-04-03 23:05:17,143] Trial 2 finished with value: 0.6106283422459893 and parameters: {'max_depth': 16, 'min_samples_split': 15, 'n_estimators': 53}. Best is trial 1 with value: 0.6256461675579322.
[I 2025-04-03 23:05:17,899] Trial 3 finished with value: 0.6254790552584669 and parameters: {'max_depth': 12, 'min_samples_split': 9, 'n_estimators': 235}. Best is trial 1 with value: 0.6256461675579322.
[I 2025-04-03 23:05:18,835] Trial 4 finished with value: 0.621028297

[I 2025-04-03 23:05:33,086] Trial 40 finished with value: 0.6295844474153298 and parameters: {'max_depth': 5, 'min_samples_split': 17, 'n_estimators': 220}. Best is trial 19 with value: 0.6538992869875222.
[I 2025-04-03 23:05:33,298] Trial 41 finished with value: 0.6417502228163994 and parameters: {'max_depth': 6, 'min_samples_split': 20, 'n_estimators': 64}. Best is trial 19 with value: 0.6538992869875222.
[I 2025-04-03 23:05:33,530] Trial 42 finished with value: 0.6516934046345811 and parameters: {'max_depth': 6, 'min_samples_split': 19, 'n_estimators': 72}. Best is trial 19 with value: 0.6538992869875222.
[I 2025-04-03 23:05:33,842] Trial 43 finished with value: 0.6325200534759359 and parameters: {'max_depth': 7, 'min_samples_split': 19, 'n_estimators': 101}. Best is trial 19 with value: 0.6538992869875222.
[I 2025-04-03 23:05:34,227] Trial 44 finished with value: 0.6267658199643493 and parameters: {'max_depth': 15, 'min_samples_split': 18, 'n_estimators': 122}. Best is trial 19 wit

Best AUC for et: 0.6538992869875222
Best parameters for et: {'max_depth': 6, 'min_samples_split': 20, 'n_estimators': 88}


[I 2025-04-03 23:05:36,451] Trial 14 finished with value: 0.6415886809269162 and parameters: {'n_neighbors': 17, 'leaf_size': 30, 'weights': 'distance'}. Best is trial 5 with value: 0.6415886809269162.
[I 2025-04-03 23:05:36,469] Trial 15 finished with value: 0.6075144830659537 and parameters: {'n_neighbors': 14, 'leaf_size': 22, 'weights': 'distance'}. Best is trial 5 with value: 0.6415886809269162.
[I 2025-04-03 23:05:36,484] Trial 16 finished with value: 0.5974988859180035 and parameters: {'n_neighbors': 10, 'leaf_size': 41, 'weights': 'distance'}. Best is trial 5 with value: 0.6415886809269162.
[I 2025-04-03 23:05:36,503] Trial 17 finished with value: 0.6075144830659537 and parameters: {'n_neighbors': 14, 'leaf_size': 34, 'weights': 'distance'}. Best is trial 5 with value: 0.6415886809269162.
[I 2025-04-03 23:05:36,521] Trial 18 finished with value: 0.5543894830659537 and parameters: {'n_neighbors': 7, 'leaf_size': 17, 'weights': 'distance'}. Best is trial 5 with value: 0.641588680

[I 2025-04-03 23:05:37,149] Trial 2 finished with value: 0.5 and parameters: {'C': 0.02743812394833149}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,160] Trial 3 finished with value: 0.5 and parameters: {'C': 0.007291402131558983}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,178] Trial 4 finished with value: 0.6833054812834225 and parameters: {'C': 25.223648089330222}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,193] Trial 5 finished with value: 0.6624554367201426 and parameters: {'C': 0.4588823440766385}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,204] Trial 6 finished with value: 0.5 and parameters: {'C': 0.0013147173902157752}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,214] Trial 7 finished with value: 0.5 and parameters: {'C': 0.0004964278727490588}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,226] Trial 8 finished with

Best AUC for knn: 0.649935940285205
Best parameters for knn: {'n_neighbors': 17, 'leaf_size': 5, 'weights': 'uniform'}


[I 2025-04-03 23:05:37,314] Trial 12 finished with value: 0.6812889928698752 and parameters: {'C': 6.21860178725321}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,337] Trial 13 finished with value: 0.6823585115864527 and parameters: {'C': 7.2083269616448025}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,356] Trial 14 finished with value: 0.6694685828877005 and parameters: {'C': 0.9057469706280038}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,376] Trial 15 finished with value: 0.6833054812834225 and parameters: {'C': 16.364562877566456}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,396] Trial 16 finished with value: 0.67512811942959 and parameters: {'C': 1.2055433888858513}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,410] Trial 17 finished with value: 0.5 and parameters: {'C': 0.054969325397994165}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-0

[I 2025-04-03 23:05:37,513] Trial 22 finished with value: 0.679083110516934 and parameters: {'C': 8.666916415043458}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,528] Trial 23 finished with value: 0.6607063279857397 and parameters: {'C': 0.23561132970409115}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,549] Trial 24 finished with value: 0.6833054812834225 and parameters: {'C': 25.170469689573977}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,568] Trial 25 finished with value: 0.6804088680926916 and parameters: {'C': 3.510215254315108}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,589] Trial 26 finished with value: 0.6833054812834225 and parameters: {'C': 14.978511456381282}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,609] Trial 27 finished with value: 0.6821691176470589 and parameters: {'C': 45.365872537741346}. Best is trial 1 with value: 0.683400178253

[I 2025-04-03 23:05:37,719] Trial 33 finished with value: 0.5 and parameters: {'C': 0.006604115348384208}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,740] Trial 34 finished with value: 0.679083110516934 and parameters: {'C': 8.321395136760543}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,756] Trial 35 finished with value: 0.6570298573975045 and parameters: {'C': 0.4020262360113578}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,777] Trial 36 finished with value: 0.6804088680926916 and parameters: {'C': 3.6281328674794717}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,790] Trial 37 finished with value: 0.5 and parameters: {'C': 0.017246301181955793}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,812] Trial 38 finished with value: 0.6833054812834225 and parameters: {'C': 12.540676841880092}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:

[I 2025-04-03 23:05:37,948] Trial 45 finished with value: 0.6833054812834225 and parameters: {'C': 15.272982528921498}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,967] Trial 46 finished with value: 0.6824532085561497 and parameters: {'C': 4.441536403244497}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:37,988] Trial 47 finished with value: 0.6821691176470589 and parameters: {'C': 57.619183764745706}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:38,001] Trial 48 finished with value: 0.5 and parameters: {'C': 0.0019959021146582004}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:38,023] Trial 49 finished with value: 0.6772003119429589 and parameters: {'C': 1.3848221288660265}. Best is trial 1 with value: 0.6834001782531194.
[I 2025-04-03 23:05:38,024] A new study created in memory with name: no-name-4f05d744-ef2f-4abb-8fe5-f165c3a85e93


Best AUC for logistic: 0.6834001782531194
Best parameters for logistic: {'C': 9.969827526090993}


[I 2025-04-03 23:05:38,419] Trial 0 finished with value: 0.5018605169340463 and parameters: {'learning_rate': 0.05288898406350987, 'max_depth': 9, 'n_estimators': 207}. Best is trial 0 with value: 0.5018605169340463.
[I 2025-04-03 23:05:38,765] Trial 1 finished with value: 0.5059157754010695 and parameters: {'learning_rate': 0.13740048525932128, 'max_depth': 6, 'n_estimators': 295}. Best is trial 1 with value: 0.5059157754010695.
[I 2025-04-03 23:05:39,106] Trial 2 finished with value: 0.5010082442067737 and parameters: {'learning_rate': 0.08984938025338193, 'max_depth': 9, 'n_estimators': 158}. Best is trial 1 with value: 0.5059157754010695.
[I 2025-04-03 23:05:39,529] Trial 3 finished with value: 0.49407865418894825 and parameters: {'learning_rate': 0.04566295509785478, 'max_depth': 8, 'n_estimators': 269}. Best is trial 1 with value: 0.5059157754010695.
[I 2025-04-03 23:05:40,050] Trial 4 finished with value: 0.5059881907308378 and parameters: {'learning_rate': 0.06087798203240794, 

[I 2025-04-03 23:05:46,219] Trial 38 finished with value: 0.5184046345811052 and parameters: {'learning_rate': 0.028324707706595433, 'max_depth': 4, 'n_estimators': 68}. Best is trial 15 with value: 0.5516571969696968.
[I 2025-04-03 23:05:46,531] Trial 39 finished with value: 0.5217635918003565 and parameters: {'learning_rate': 0.12082594263077928, 'max_depth': 3, 'n_estimators': 293}. Best is trial 15 with value: 0.5516571969696968.
[I 2025-04-03 23:05:46,683] Trial 40 finished with value: 0.5057987967914438 and parameters: {'learning_rate': 0.15027228338593307, 'max_depth': 4, 'n_estimators': 111}. Best is trial 15 with value: 0.5516571969696968.
[I 2025-04-03 23:05:46,877] Trial 41 finished with value: 0.5233177361853832 and parameters: {'learning_rate': 0.03154337392960842, 'max_depth': 3, 'n_estimators': 174}. Best is trial 15 with value: 0.5516571969696968.
[I 2025-04-03 23:05:47,004] Trial 42 finished with value: 0.5312221479500892 and parameters: {'learning_rate': 0.06924554185

Best AUC for xgb: 0.5516571969696968
Best parameters for xgb: {'learning_rate': 0.014866156596317159, 'max_depth': 3, 'n_estimators': 65}


[I 2025-04-03 23:05:48,261] Trial 0 finished with value: 0.6123440285204991 and parameters: {'max_depth': 15, 'min_samples_split': 12, 'n_estimators': 69}. Best is trial 0 with value: 0.6123440285204991.
[I 2025-04-03 23:05:49,538] Trial 1 finished with value: 0.6051581996434938 and parameters: {'max_depth': 8, 'min_samples_split': 11, 'n_estimators': 283}. Best is trial 0 with value: 0.6123440285204991.
[I 2025-04-03 23:05:50,394] Trial 2 finished with value: 0.6184826203208555 and parameters: {'max_depth': 6, 'min_samples_split': 17, 'n_estimators': 193}. Best is trial 2 with value: 0.6184826203208555.
[I 2025-04-03 23:05:50,628] Trial 3 finished with value: 0.6153854723707666 and parameters: {'max_depth': 6, 'min_samples_split': 19, 'n_estimators': 53}. Best is trial 2 with value: 0.6184826203208555.
[I 2025-04-03 23:05:51,944] Trial 4 finished with value: 0.610650623885918 and parameters: {'max_depth': 9, 'min_samples_split': 11, 'n_estimators': 298}. Best is trial 2 with value: 0.

[I 2025-04-03 23:06:26,267] Trial 40 finished with value: 0.6281584224598931 and parameters: {'max_depth': 16, 'min_samples_split': 19, 'n_estimators': 237}. Best is trial 26 with value: 0.6296457219251337.
[I 2025-04-03 23:06:27,289] Trial 41 finished with value: 0.6292947860962567 and parameters: {'max_depth': 16, 'min_samples_split': 19, 'n_estimators': 235}. Best is trial 26 with value: 0.6296457219251337.
[I 2025-04-03 23:06:28,312] Trial 42 finished with value: 0.6304311497326203 and parameters: {'max_depth': 16, 'min_samples_split': 19, 'n_estimators': 240}. Best is trial 42 with value: 0.6304311497326203.
[I 2025-04-03 23:06:29,438] Trial 43 finished with value: 0.6229110962566845 and parameters: {'max_depth': 14, 'min_samples_split': 17, 'n_estimators': 253}. Best is trial 42 with value: 0.6304311497326203.
[I 2025-04-03 23:06:30,651] Trial 44 finished with value: 0.6205269607843137 and parameters: {'max_depth': 12, 'min_samples_split': 20, 'n_estimators': 276}. Best is trial 

Best AUC for rf: 0.6304311497326203
Best parameters for rf: {'max_depth': 16, 'min_samples_split': 19, 'n_estimators': 240}


[I 2025-04-03 23:06:36,224] Trial 17 finished with value: 0.6498440285204989 and parameters: {'var_smoothing': 4.2010669310570245e-11}. Best is trial 0 with value: 0.6498440285204989.
[I 2025-04-03 23:06:36,236] Trial 18 finished with value: 0.6498440285204989 and parameters: {'var_smoothing': 5.022938560988949e-10}. Best is trial 0 with value: 0.6498440285204989.
[I 2025-04-03 23:06:36,247] Trial 19 finished with value: 0.6498440285204989 and parameters: {'var_smoothing': 4.7257778973897454e-11}. Best is trial 0 with value: 0.6498440285204989.
[I 2025-04-03 23:06:36,261] Trial 20 finished with value: 0.6498440285204989 and parameters: {'var_smoothing': 1.6942825120366754e-10}. Best is trial 0 with value: 0.6498440285204989.
[I 2025-04-03 23:06:36,272] Trial 21 finished with value: 0.6498440285204989 and parameters: {'var_smoothing': 3.271268358891591e-08}. Best is trial 0 with value: 0.6498440285204989.
[I 2025-04-03 23:06:36,284] Trial 22 finished with value: 0.6498440285204989 and p

Best AUC for gnb: 0.6498440285204989
Best parameters for gnb: {'var_smoothing': 7.562491618313689e-10}


[I 2025-04-03 23:06:36,829] Trial 11 finished with value: 0.6994318181818182 and parameters: {}. Best is trial 0 with value: 0.6994318181818182.
[I 2025-04-03 23:06:36,844] Trial 12 finished with value: 0.6994318181818182 and parameters: {}. Best is trial 0 with value: 0.6994318181818182.
[I 2025-04-03 23:06:36,859] Trial 13 finished with value: 0.6994318181818182 and parameters: {}. Best is trial 0 with value: 0.6994318181818182.
[I 2025-04-03 23:06:36,874] Trial 14 finished with value: 0.6994318181818182 and parameters: {}. Best is trial 0 with value: 0.6994318181818182.
[I 2025-04-03 23:06:36,888] Trial 15 finished with value: 0.6994318181818182 and parameters: {}. Best is trial 0 with value: 0.6994318181818182.
[I 2025-04-03 23:06:36,905] Trial 16 finished with value: 0.6994318181818182 and parameters: {}. Best is trial 0 with value: 0.6994318181818182.
[I 2025-04-03 23:06:36,919] Trial 17 finished with value: 0.6994318181818182 and parameters: {}. Best is trial 0 with value: 0.699

Best AUC for lda: 0.6994318181818182
Best parameters for lda: {}


[I 2025-04-03 23:06:37,968] Trial 0 finished with value: 0.5141042780748664 and parameters: {'learning_rate': 1.620978791911401, 'n_estimators': 99}. Best is trial 0 with value: 0.5141042780748664.
[I 2025-04-03 23:06:38,762] Trial 1 finished with value: 0.5684770499108736 and parameters: {'learning_rate': 1.6607679690213386, 'n_estimators': 142}. Best is trial 1 with value: 0.5684770499108736.
[I 2025-04-03 23:06:40,193] Trial 2 finished with value: 0.574777183600713 and parameters: {'learning_rate': 1.1089462436484177, 'n_estimators': 257}. Best is trial 2 with value: 0.574777183600713.
[I 2025-04-03 23:06:41,401] Trial 3 finished with value: 0.5878676470588236 and parameters: {'learning_rate': 1.1532229564233214, 'n_estimators': 224}. Best is trial 3 with value: 0.5878676470588236.
[I 2025-04-03 23:06:42,962] Trial 4 finished with value: 0.5627450980392157 and parameters: {'learning_rate': 1.203731983806368, 'n_estimators': 290}. Best is trial 3 with value: 0.5878676470588236.
[I 20

[I 2025-04-03 23:07:11,565] Trial 41 finished with value: 0.6189561051693404 and parameters: {'learning_rate': 0.12698003747539394, 'n_estimators': 299}. Best is trial 32 with value: 0.6559157754010696.
[I 2025-04-03 23:07:12,319] Trial 42 finished with value: 0.649713123885918 and parameters: {'learning_rate': 0.03469999123600159, 'n_estimators': 136}. Best is trial 32 with value: 0.6559157754010696.
[I 2025-04-03 23:07:12,755] Trial 43 finished with value: 0.6516349153297682 and parameters: {'learning_rate': 0.11892011019006053, 'n_estimators': 80}. Best is trial 32 with value: 0.6559157754010696.
[I 2025-04-03 23:07:14,237] Trial 44 finished with value: 0.6474403966131906 and parameters: {'learning_rate': 0.01852719434679447, 'n_estimators': 278}. Best is trial 32 with value: 0.6559157754010696.
[I 2025-04-03 23:07:15,553] Trial 45 finished with value: 0.5931651069518716 and parameters: {'learning_rate': 0.3157564291557847, 'n_estimators': 233}. Best is trial 32 with value: 0.655915

Best AUC for ada: 0.6569379456327986
Best parameters for ada: {'learning_rate': 0.012627466233997098, 'n_estimators': 129}


[I 2025-04-03 23:07:19,698] Trial 0 finished with value: 0.5408589572192513 and parameters: {'learning_rate': 0.19148807215433028, 'max_depth': 6, 'min_samples_split': 16, 'n_estimators': 277, 'subsample': 0.9207750601134792}. Best is trial 0 with value: 0.5408589572192513.
[I 2025-04-03 23:07:20,754] Trial 1 finished with value: 0.5479445187165777 and parameters: {'learning_rate': 0.1994208355717747, 'max_depth': 8, 'min_samples_split': 11, 'n_estimators': 169, 'subsample': 0.8447007857282027}. Best is trial 1 with value: 0.5479445187165777.
[I 2025-04-03 23:07:21,455] Trial 2 finished with value: 0.5708054812834223 and parameters: {'learning_rate': 0.03392399105243359, 'max_depth': 3, 'min_samples_split': 8, 'n_estimators': 207, 'subsample': 0.5408981814062714}. Best is trial 2 with value: 0.5708054812834223.
[I 2025-04-03 23:07:22,615] Trial 3 finished with value: 0.5450200534759357 and parameters: {'learning_rate': 0.10892651618142911, 'max_depth': 7, 'min_samples_split': 17, 'n_es

[I 2025-04-03 23:07:40,965] Trial 30 finished with value: 0.5585672905525846 and parameters: {'learning_rate': 0.11449569390193201, 'max_depth': 4, 'min_samples_split': 19, 'n_estimators': 182, 'subsample': 0.7115518802204074}. Best is trial 18 with value: 0.5969864081996434.
[I 2025-04-03 23:07:41,650] Trial 31 finished with value: 0.5590017825311944 and parameters: {'learning_rate': 0.022248211029582486, 'max_depth': 3, 'min_samples_split': 7, 'n_estimators': 207, 'subsample': 0.5411149558550765}. Best is trial 18 with value: 0.5969864081996434.
[I 2025-04-03 23:07:42,444] Trial 32 finished with value: 0.5782141265597148 and parameters: {'learning_rate': 0.03619172645058989, 'max_depth': 3, 'min_samples_split': 5, 'n_estimators': 233, 'subsample': 0.5137443790935132}. Best is trial 18 with value: 0.5969864081996434.
[I 2025-04-03 23:07:43,255] Trial 33 finished with value: 0.5374610071301247 and parameters: {'learning_rate': 0.0439012823096275, 'max_depth': 3, 'min_samples_split': 6,

Best AUC for gb: 0.5969864081996434
Best parameters for gb: {'learning_rate': 0.1714705440880487, 'max_depth': 3, 'min_samples_split': 18, 'n_estimators': 92, 'subsample': 0.6820183062649872}


[I 2025-04-03 23:07:58,940] Trial 9 finished with value: 0.5662377450980391 and parameters: {'C': 0.07855093106598364}. Best is trial 4 with value: 0.6832414215686273.
[I 2025-04-03 23:07:58,967] Trial 10 finished with value: 0.6698640819964349 and parameters: {'C': 12.921525310608022}. Best is trial 4 with value: 0.6832414215686273.
[I 2025-04-03 23:07:59,005] Trial 11 finished with value: 0.5150066844919785 and parameters: {'C': 75.73748835860916}. Best is trial 4 with value: 0.6832414215686273.
[I 2025-04-03 23:07:59,029] Trial 12 finished with value: 0.6786597593582888 and parameters: {'C': 5.06780622537644}. Best is trial 4 with value: 0.6832414215686273.
[I 2025-04-03 23:07:59,055] Trial 13 finished with value: 0.6818488190730838 and parameters: {'C': 3.366941319396143}. Best is trial 4 with value: 0.6832414215686273.
[I 2025-04-03 23:07:59,092] Trial 14 finished with value: 0.5152350713012479 and parameters: {'C': 73.63380949749425}. Best is trial 4 with value: 0.683241421568627

Best AUC for svm: 0.6832414215686273
Best parameters for svm: {'C': 3.9949312416946654}

Model Best AUCs:
et: 0.6538992869875222
knn: 0.649935940285205
logistic: 0.6834001782531194
xgb: 0.5516571969696968
rf: 0.6304311497326203
gnb: 0.6498440285204989
lda: 0.6994318181818182
ada: 0.6569379456327986
gb: 0.5969864081996434
svm: 0.6832414215686273


In [3]:
import joblib
from itertools import combinations
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier

# 使用最佳参数初始化模型
optimized_models_instances = {
    'et': ExtraTreesClassifier(**optimized_models['et'], random_state=42),
    'knn': KNeighborsClassifier(**optimized_models['knn']),
    'logistic': LogisticRegression(**optimized_models['logistic'], random_state=42),
    'xgb': xgb.XGBClassifier(**optimized_models['xgb'], random_state=42),
    'rf': RandomForestClassifier(**optimized_models['rf'], random_state=42),
    #'bagging': BaggingClassifier(**optimized_models['bagging'], random_state=42),
    'gnb': GaussianNB(**optimized_models['gnb']),
    'lda': LinearDiscriminantAnalysis(**optimized_models['lda']),
    'ada': AdaBoostClassifier(**optimized_models['ada'], random_state=42, algorithm='SAMME'),
    'gb': GradientBoostingClassifier(**optimized_models['gb'], random_state=42),
    'svm': SVC(**optimized_models['svm'], probability=True, random_state=42)
}

# 保存每个优化后的模型
for model_name, model in optimized_models_instances.items():
    joblib.dump(model, f'C:\\Users\\一个大活人\\ppi-wgcna-sssgsea-4.3\\basemodel\\wgcna\\{model_name}_wgcna.joblib')
    print(f"Model {model_name} saved successfully.")



Model et saved successfully.
Model knn saved successfully.
Model logistic saved successfully.
Model xgb saved successfully.
Model rf saved successfully.
Model gnb saved successfully.
Model lda saved successfully.
Model ada saved successfully.
Model gb saved successfully.
Model svm saved successfully.
