# 构建基于PPI的基础模型

In [1]:
import optuna
from optuna.samplers import TPESampler
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold, train_test_split
import xgboost as xgb
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

np.random.seed(42)

# 加载数据集

data1 = pd.read_csv('ppi_10.csv')
X = data1.iloc[:, 1:]
y = data1.iloc[:, 0]  # 假设标签在 data1 的第一列

# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 拆分数据集
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)



In [2]:
# 5折交叉验证
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 目标函数
def objective(trial, model_name):
    if model_name == 'et':
        model = ExtraTreesClassifier(
            max_depth=trial.suggest_int('max_depth', 5, 20),
            min_samples_split=trial.suggest_int('min_samples_split', 2, 20),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            random_state=42
        )
    elif model_name == 'knn':
        model = KNeighborsClassifier(
            n_neighbors=trial.suggest_int('n_neighbors', 3, 20),
            leaf_size=trial.suggest_int('leaf_size', 5, 50),
            weights=trial.suggest_categorical('weights', ['uniform', 'distance'])
        )
    elif model_name == 'logistic':
        model = LogisticRegression(
            C=trial.suggest_float('C', 1e-4, 1e2, log=True),
            penalty='l1',
            solver='saga',
            random_state=42
        )
    elif model_name == 'xgb':
        model = xgb.XGBClassifier(
            learning_rate=trial.suggest_float('learning_rate', 0.01, 0.3),
            max_depth=trial.suggest_int('max_depth', 3, 10),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            use_label_encoder=False,
            eval_metric='logloss',
            random_state=42
        )
    elif model_name == 'rf':
        model = RandomForestClassifier(
            max_depth=trial.suggest_int('max_depth', 5, 20),
            min_samples_split=trial.suggest_int('min_samples_split', 2, 20),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            random_state=42
        )
    elif model_name == 'gnb':
        model = GaussianNB(
            var_smoothing=trial.suggest_float('var_smoothing', 1e-12, 1e-7, log=True)
        )
    elif model_name == 'lda':
        model = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto')
    elif model_name == 'ada':
        model = AdaBoostClassifier(
            learning_rate=trial.suggest_float('learning_rate', 0.01, 2),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            algorithm='SAMME',
            random_state=42
        )
    elif model_name == 'gb':
        model = GradientBoostingClassifier(
            learning_rate=trial.suggest_float('learning_rate', 0.01, 0.3),
            max_depth=trial.suggest_int('max_depth', 3, 10),
            min_samples_split=trial.suggest_int('min_samples_split', 2, 20),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            subsample=trial.suggest_float('subsample', 0.5, 1.0),
            random_state=42
        )
    elif model_name == 'svm':
        model = SVC(
            C=trial.suggest_float('C', 1e-3, 1e2, log=True),
            probability=True,
            random_state=42
        )

    # 5折交叉验证计算 AUC 均值
    auc_scores = []
    for train_idx, val_idx in cv.split(X_train, y_train):
        X_tr, X_val = X_train[train_idx], X_train[val_idx]
        y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

        model.fit(X_tr, y_tr)
        y_pred = model.predict_proba(X_val)[:, 1]
        auc_scores.append(roc_auc_score(y_val, y_pred))

    return np.mean(auc_scores)

# 运行 Optuna 进行超参数优化
optimized_models = {}
best_aucs = {}

for model_name in ['et', 'knn', 'logistic', 'xgb', 'rf', 'gnb', 'lda', 'ada', 'gb', 'svm']:
    study = optuna.create_study(direction='maximize', sampler=TPESampler())
    study.optimize(lambda trial: objective(trial, model_name), n_trials=50)

    optimized_models[model_name] = study.best_params
    best_aucs[model_name] = study.best_value

    print(f"Best AUC for {model_name}: {study.best_value}")
    print(f"Best parameters for {model_name}: {study.best_params}")

# 输出每个模型的最佳 AUC
print("\nModel Best AUCs:")
for model_name, auc in best_aucs.items():
    print(f"{model_name}: {auc}")
    



[I 2025-04-03 23:10:04,623] A new study created in memory with name: no-name-18c26293-8811-40fe-a2e4-cbc5157002e3
[I 2025-04-03 23:10:05,528] Trial 0 finished with value: 0.6759413992869876 and parameters: {'max_depth': 16, 'min_samples_split': 16, 'n_estimators': 296}. Best is trial 0 with value: 0.6759413992869876.
[I 2025-04-03 23:10:06,291] Trial 1 finished with value: 0.6921290106951872 and parameters: {'max_depth': 11, 'min_samples_split': 5, 'n_estimators': 247}. Best is trial 1 with value: 0.6921290106951872.
[I 2025-04-03 23:10:06,599] Trial 2 finished with value: 0.6871323529411766 and parameters: {'max_depth': 17, 'min_samples_split': 8, 'n_estimators': 99}. Best is trial 1 with value: 0.6921290106951872.
[I 2025-04-03 23:10:07,149] Trial 3 finished with value: 0.6986965240641712 and parameters: {'max_depth': 18, 'min_samples_split': 2, 'n_estimators': 171}. Best is trial 3 with value: 0.6986965240641712.
[I 2025-04-03 23:10:07,390] Trial 4 finished with value: 0.68843025846

[I 2025-04-03 23:10:26,758] Trial 40 finished with value: 0.6917780748663102 and parameters: {'max_depth': 15, 'min_samples_split': 5, 'n_estimators': 270}. Best is trial 28 with value: 0.7057486631016043.
[I 2025-04-03 23:10:27,358] Trial 41 finished with value: 0.6842858734402852 and parameters: {'max_depth': 18, 'min_samples_split': 3, 'n_estimators': 179}. Best is trial 28 with value: 0.7057486631016043.
[I 2025-04-03 23:10:27,859] Trial 42 finished with value: 0.6937305035650624 and parameters: {'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 151}. Best is trial 28 with value: 0.7057486631016043.
[I 2025-04-03 23:10:28,524] Trial 43 finished with value: 0.6926860516934046 and parameters: {'max_depth': 17, 'min_samples_split': 4, 'n_estimators': 202}. Best is trial 28 with value: 0.7057486631016043.
[I 2025-04-03 23:10:28,708] Trial 44 finished with value: 0.6779133244206774 and parameters: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 52}. Best is trial 28 wit

Best AUC for et: 0.7057486631016043
Best parameters for et: {'max_depth': 19, 'min_samples_split': 4, 'n_estimators': 143}


[I 2025-04-03 23:10:31,551] Trial 15 finished with value: 0.6296847147950089 and parameters: {'n_neighbors': 14, 'leaf_size': 33, 'weights': 'distance'}. Best is trial 10 with value: 0.6529133244206774.
[I 2025-04-03 23:10:31,567] Trial 16 finished with value: 0.6222398618538325 and parameters: {'n_neighbors': 3, 'leaf_size': 18, 'weights': 'distance'}. Best is trial 10 with value: 0.6529133244206774.
[I 2025-04-03 23:10:31,585] Trial 17 finished with value: 0.6529133244206774 and parameters: {'n_neighbors': 8, 'leaf_size': 6, 'weights': 'distance'}. Best is trial 10 with value: 0.6529133244206774.
[I 2025-04-03 23:10:31,602] Trial 18 finished with value: 0.6429924242424242 and parameters: {'n_neighbors': 20, 'leaf_size': 18, 'weights': 'distance'}. Best is trial 10 with value: 0.6529133244206774.
[I 2025-04-03 23:10:31,617] Trial 19 finished with value: 0.6245933600713013 and parameters: {'n_neighbors': 15, 'leaf_size': 31, 'weights': 'distance'}. Best is trial 10 with value: 0.652913

[I 2025-04-03 23:10:32,199] Trial 1 finished with value: 0.5 and parameters: {'C': 0.00025582219429886346}. Best is trial 0 with value: 0.6750111408199643.
[I 2025-04-03 23:10:32,211] Trial 2 finished with value: 0.46951871657754013 and parameters: {'C': 0.06310545868356429}. Best is trial 0 with value: 0.6750111408199643.
[I 2025-04-03 23:10:32,228] Trial 3 finished with value: 0.6725211675579322 and parameters: {'C': 1.3286954693119983}. Best is trial 0 with value: 0.6750111408199643.
[I 2025-04-03 23:10:32,239] Trial 4 finished with value: 0.5 and parameters: {'C': 0.000480106439056567}. Best is trial 0 with value: 0.6750111408199643.
[I 2025-04-03 23:10:32,257] Trial 5 finished with value: 0.6747938948306595 and parameters: {'C': 1.692882782094645}. Best is trial 0 with value: 0.6750111408199643.
[I 2025-04-03 23:10:32,271] Trial 6 finished with value: 0.6478108288770054 and parameters: {'C': 0.17864021879385233}. Best is trial 0 with value: 0.6750111408199643.
[I 2025-04-03 23:10:

Best AUC for knn: 0.6558294340463459
Best parameters for knn: {'n_neighbors': 5, 'leaf_size': 10, 'weights': 'distance'}


[I 2025-04-03 23:10:32,384] Trial 13 finished with value: 0.676052807486631 and parameters: {'C': 5.71263317783063}. Best is trial 9 with value: 0.6875779857397504.
[I 2025-04-03 23:10:32,405] Trial 14 finished with value: 0.675891265597148 and parameters: {'C': 2.78201968330082}. Best is trial 9 with value: 0.6875779857397504.
[I 2025-04-03 23:10:32,421] Trial 15 finished with value: 0.6389872994652406 and parameters: {'C': 0.157194644150318}. Best is trial 9 with value: 0.6875779857397504.
[I 2025-04-03 23:10:32,442] Trial 16 finished with value: 0.676052807486631 and parameters: {'C': 8.878127991434619}. Best is trial 9 with value: 0.6875779857397504.
[I 2025-04-03 23:10:32,455] Trial 17 finished with value: 0.5 and parameters: {'C': 0.006205271313403954}. Best is trial 9 with value: 0.6875779857397504.
[I 2025-04-03 23:10:32,476] Trial 18 finished with value: 0.690435606060606 and parameters: {'C': 0.5501450455013761}. Best is trial 18 with value: 0.690435606060606.
[I 2025-04-03 2

[I 2025-04-03 23:10:32,582] Trial 24 finished with value: 0.46951871657754013 and parameters: {'C': 0.05234228495290247}. Best is trial 19 with value: 0.6926693404634581.
[I 2025-04-03 23:10:32,602] Trial 25 finished with value: 0.6843694295900178 and parameters: {'C': 0.3022162610190531}. Best is trial 19 with value: 0.6926693404634581.
[I 2025-04-03 23:10:32,616] Trial 26 finished with value: 0.5 and parameters: {'C': 0.0021179940533181145}. Best is trial 19 with value: 0.6926693404634581.
[I 2025-04-03 23:10:32,636] Trial 27 finished with value: 0.6749832887700534 and parameters: {'C': 20.266512735897116}. Best is trial 19 with value: 0.6926693404634581.
[I 2025-04-03 23:10:32,658] Trial 28 finished with value: 0.6749164438502675 and parameters: {'C': 3.5064269204859895}. Best is trial 19 with value: 0.6926693404634581.
[I 2025-04-03 23:10:32,679] Trial 29 finished with value: 0.6840352049910874 and parameters: {'C': 0.927357173828365}. Best is trial 19 with value: 0.692669340463458

[I 2025-04-03 23:10:32,855] Trial 39 finished with value: 0.4829434046345812 and parameters: {'C': 0.08224998030175633}. Best is trial 31 with value: 0.6935606060606061.
[I 2025-04-03 23:10:32,878] Trial 40 finished with value: 0.6750111408199643 and parameters: {'C': 36.81911230331545}. Best is trial 31 with value: 0.6935606060606061.
[I 2025-04-03 23:10:32,897] Trial 41 finished with value: 0.6925189393939394 and parameters: {'C': 0.5124849117495399}. Best is trial 31 with value: 0.6935606060606061.
[I 2025-04-03 23:10:32,917] Trial 42 finished with value: 0.6893939393939393 and parameters: {'C': 0.5552312867209735}. Best is trial 31 with value: 0.6935606060606061.
[I 2025-04-03 23:10:32,937] Trial 43 finished with value: 0.6748217468805704 and parameters: {'C': 2.9201193095918816}. Best is trial 31 with value: 0.6935606060606061.
[I 2025-04-03 23:10:32,957] Trial 44 finished with value: 0.6767435383244207 and parameters: {'C': 0.9865165998467954}. Best is trial 31 with value: 0.6935

[I 2025-04-03 23:10:33,053] A new study created in memory with name: no-name-93132481-cb96-4069-bd74-7e048655d7d9


Best AUC for logistic: 0.6935606060606061
Best parameters for logistic: {'C': 0.48776008229191153}


[I 2025-04-03 23:10:33,312] Trial 0 finished with value: 0.6658812388591799 and parameters: {'learning_rate': 0.13666772704747748, 'max_depth': 4, 'n_estimators': 93}. Best is trial 0 with value: 0.6658812388591799.
[I 2025-04-03 23:10:33,531] Trial 1 finished with value: 0.6145889037433154 and parameters: {'learning_rate': 0.2746048672575809, 'max_depth': 10, 'n_estimators': 82}. Best is trial 0 with value: 0.6658812388591799.
[I 2025-04-03 23:10:33,928] Trial 2 finished with value: 0.6466577540106953 and parameters: {'learning_rate': 0.18378135873738172, 'max_depth': 10, 'n_estimators': 288}. Best is trial 0 with value: 0.6658812388591799.
[I 2025-04-03 23:10:34,193] Trial 3 finished with value: 0.6370376559714794 and parameters: {'learning_rate': 0.2871914944817568, 'max_depth': 10, 'n_estimators': 290}. Best is trial 0 with value: 0.6658812388591799.
[I 2025-04-03 23:10:34,333] Trial 4 finished with value: 0.6378620766488413 and parameters: {'learning_rate': 0.2247410900656459, 'ma

[I 2025-04-03 23:10:41,892] Trial 38 finished with value: 0.6570131461675579 and parameters: {'learning_rate': 0.089122473713552, 'max_depth': 9, 'n_estimators': 65}. Best is trial 28 with value: 0.6842190285204991.
[I 2025-04-03 23:10:42,027] Trial 39 finished with value: 0.6387979055258468 and parameters: {'learning_rate': 0.2624623441010243, 'max_depth': 5, 'n_estimators': 96}. Best is trial 28 with value: 0.6842190285204991.
[I 2025-04-03 23:10:42,211] Trial 40 finished with value: 0.6411876114081997 and parameters: {'learning_rate': 0.21833551658650274, 'max_depth': 7, 'n_estimators': 124}. Best is trial 28 with value: 0.6842190285204991.
[I 2025-04-03 23:10:42,343] Trial 41 finished with value: 0.6629512032085562 and parameters: {'learning_rate': 0.04545649390954552, 'max_depth': 5, 'n_estimators': 78}. Best is trial 28 with value: 0.6842190285204991.
[I 2025-04-03 23:10:42,489] Trial 42 finished with value: 0.6684881907308379 and parameters: {'learning_rate': 0.0310794202496248,

Best AUC for xgb: 0.6904188948306597
Best parameters for xgb: {'learning_rate': 0.023868161584308278, 'max_depth': 6, 'n_estimators': 57}


[I 2025-04-03 23:10:44,903] Trial 0 finished with value: 0.6479835115864527 and parameters: {'max_depth': 19, 'min_samples_split': 14, 'n_estimators': 235}. Best is trial 0 with value: 0.6479835115864527.
[I 2025-04-03 23:10:46,007] Trial 1 finished with value: 0.6445020053475936 and parameters: {'max_depth': 5, 'min_samples_split': 6, 'n_estimators': 244}. Best is trial 0 with value: 0.6479835115864527.
[I 2025-04-03 23:10:46,446] Trial 2 finished with value: 0.6484625668449198 and parameters: {'max_depth': 6, 'min_samples_split': 6, 'n_estimators': 97}. Best is trial 2 with value: 0.6484625668449198.
[I 2025-04-03 23:10:47,261] Trial 3 finished with value: 0.6502952317290552 and parameters: {'max_depth': 18, 'min_samples_split': 12, 'n_estimators': 184}. Best is trial 3 with value: 0.6502952317290552.
[I 2025-04-03 23:10:48,059] Trial 4 finished with value: 0.6641209893048128 and parameters: {'max_depth': 12, 'min_samples_split': 19, 'n_estimators': 177}. Best is trial 4 with value: 

[I 2025-04-03 23:11:06,729] Trial 40 finished with value: 0.6605169340463458 and parameters: {'max_depth': 6, 'min_samples_split': 3, 'n_estimators': 125}. Best is trial 28 with value: 0.6883021390374331.
[I 2025-04-03 23:11:07,064] Trial 41 finished with value: 0.685366532976827 and parameters: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 64}. Best is trial 28 with value: 0.6883021390374331.
[I 2025-04-03 23:11:07,385] Trial 42 finished with value: 0.6875724153297682 and parameters: {'max_depth': 9, 'min_samples_split': 2, 'n_estimators': 63}. Best is trial 28 with value: 0.6883021390374331.
[I 2025-04-03 23:11:07,719] Trial 43 finished with value: 0.6875724153297682 and parameters: {'max_depth': 9, 'min_samples_split': 2, 'n_estimators': 63}. Best is trial 28 with value: 0.6883021390374331.
[I 2025-04-03 23:11:08,045] Trial 44 finished with value: 0.6875724153297682 and parameters: {'max_depth': 9, 'min_samples_split': 2, 'n_estimators': 63}. Best is trial 28 with value:

Best AUC for rf: 0.6883021390374331
Best parameters for rf: {'max_depth': 8, 'min_samples_split': 2, 'n_estimators': 69}


[I 2025-04-03 23:11:11,649] Trial 18 finished with value: 0.6926526292335116 and parameters: {'var_smoothing': 8.474067492681141e-08}. Best is trial 0 with value: 0.6926526292335116.
[I 2025-04-03 23:11:11,661] Trial 19 finished with value: 0.6926526292335116 and parameters: {'var_smoothing': 1.2037620881676708e-11}. Best is trial 0 with value: 0.6926526292335116.
[I 2025-04-03 23:11:11,672] Trial 20 finished with value: 0.6926526292335116 and parameters: {'var_smoothing': 1.893340389661181e-08}. Best is trial 0 with value: 0.6926526292335116.
[I 2025-04-03 23:11:11,683] Trial 21 finished with value: 0.6926526292335116 and parameters: {'var_smoothing': 1.4126687165608273e-10}. Best is trial 0 with value: 0.6926526292335116.
[I 2025-04-03 23:11:11,695] Trial 22 finished with value: 0.6926526292335116 and parameters: {'var_smoothing': 3.201960424860924e-10}. Best is trial 0 with value: 0.6926526292335116.
[I 2025-04-03 23:11:11,706] Trial 23 finished with value: 0.6926526292335116 and pa

Best AUC for gnb: 0.6926526292335116
Best parameters for gnb: {'var_smoothing': 2.164907962800091e-11}


[I 2025-04-03 23:11:12,227] Trial 11 finished with value: 0.6904244652406417 and parameters: {}. Best is trial 0 with value: 0.6904244652406417.
[I 2025-04-03 23:11:12,242] Trial 12 finished with value: 0.6904244652406417 and parameters: {}. Best is trial 0 with value: 0.6904244652406417.
[I 2025-04-03 23:11:12,257] Trial 13 finished with value: 0.6904244652406417 and parameters: {}. Best is trial 0 with value: 0.6904244652406417.
[I 2025-04-03 23:11:12,271] Trial 14 finished with value: 0.6904244652406417 and parameters: {}. Best is trial 0 with value: 0.6904244652406417.
[I 2025-04-03 23:11:12,287] Trial 15 finished with value: 0.6904244652406417 and parameters: {}. Best is trial 0 with value: 0.6904244652406417.
[I 2025-04-03 23:11:12,301] Trial 16 finished with value: 0.6904244652406417 and parameters: {}. Best is trial 0 with value: 0.6904244652406417.
[I 2025-04-03 23:11:12,315] Trial 17 finished with value: 0.6904244652406417 and parameters: {}. Best is trial 0 with value: 0.690

Best AUC for lda: 0.6904244652406417
Best parameters for lda: {}


[I 2025-04-03 23:11:13,151] Trial 0 finished with value: 0.6085282976827094 and parameters: {'learning_rate': 1.1359705479781719, 'n_estimators': 64}. Best is trial 0 with value: 0.6085282976827094.
[I 2025-04-03 23:11:13,602] Trial 1 finished with value: 0.6350629456327986 and parameters: {'learning_rate': 0.3558489721350431, 'n_estimators': 81}. Best is trial 1 with value: 0.6350629456327986.
[I 2025-04-03 23:11:13,929] Trial 2 finished with value: 0.6171930704099822 and parameters: {'learning_rate': 0.1748985417719543, 'n_estimators': 62}. Best is trial 1 with value: 0.6350629456327986.
[I 2025-04-03 23:11:14,283] Trial 3 finished with value: 0.6167335115864526 and parameters: {'learning_rate': 1.1046476447576643, 'n_estimators': 65}. Best is trial 1 with value: 0.6350629456327986.
[I 2025-04-03 23:11:15,045] Trial 4 finished with value: 0.5722593582887701 and parameters: {'learning_rate': 1.5561195693849603, 'n_estimators': 145}. Best is trial 1 with value: 0.6350629456327986.
[I 2

[I 2025-04-03 23:11:45,618] Trial 41 finished with value: 0.6442959001782531 and parameters: {'learning_rate': 0.4849078091323872, 'n_estimators': 115}. Best is trial 28 with value: 0.6574448529411765.
[I 2025-04-03 23:11:46,259] Trial 42 finished with value: 0.6533923796791444 and parameters: {'learning_rate': 0.5804950054028094, 'n_estimators': 118}. Best is trial 28 with value: 0.6574448529411765.
[I 2025-04-03 23:11:46,990] Trial 43 finished with value: 0.6436915106951873 and parameters: {'learning_rate': 0.348715459102363, 'n_estimators': 140}. Best is trial 28 with value: 0.6574448529411765.
[I 2025-04-03 23:11:47,833] Trial 44 finished with value: 0.6387422014260251 and parameters: {'learning_rate': 0.593398471712301, 'n_estimators': 154}. Best is trial 28 with value: 0.6574448529411765.
[I 2025-04-03 23:11:48,775] Trial 45 finished with value: 0.6111185383244206 and parameters: {'learning_rate': 0.8924134346524709, 'n_estimators': 178}. Best is trial 28 with value: 0.6574448529

Best AUC for ada: 0.6574448529411765
Best parameters for ada: {'learning_rate': 0.3855833760795085, 'n_estimators': 145}


[I 2025-04-03 23:11:51,707] Trial 0 finished with value: 0.6690006684491978 and parameters: {'learning_rate': 0.13702653811920046, 'max_depth': 10, 'min_samples_split': 11, 'n_estimators': 101, 'subsample': 0.7091554599643576}. Best is trial 0 with value: 0.6690006684491978.
[I 2025-04-03 23:11:52,548] Trial 1 finished with value: 0.6379177807486631 and parameters: {'learning_rate': 0.195832018093705, 'max_depth': 3, 'min_samples_split': 19, 'n_estimators': 243, 'subsample': 0.803607263179579}. Best is trial 0 with value: 0.6690006684491978.
[I 2025-04-03 23:11:54,381] Trial 2 finished with value: 0.6848707664884135 and parameters: {'learning_rate': 0.04073104058180706, 'max_depth': 9, 'min_samples_split': 6, 'n_estimators': 299, 'subsample': 0.6251469955080247}. Best is trial 2 with value: 0.6848707664884135.
[I 2025-04-03 23:11:54,761] Trial 3 finished with value: 0.6462622549019608 and parameters: {'learning_rate': 0.14478643513321154, 'max_depth': 3, 'min_samples_split': 2, 'n_esti

[I 2025-04-03 23:12:22,045] Trial 30 finished with value: 0.6810494652406416 and parameters: {'learning_rate': 0.16684658718690332, 'max_depth': 7, 'min_samples_split': 4, 'n_estimators': 188, 'subsample': 0.8822241909984181}. Best is trial 21 with value: 0.698117201426025.
[I 2025-04-03 23:12:23,444] Trial 31 finished with value: 0.6843192959001783 and parameters: {'learning_rate': 0.23528677078516747, 'max_depth': 8, 'min_samples_split': 2, 'n_estimators': 278, 'subsample': 0.8192903490815826}. Best is trial 21 with value: 0.698117201426025.
[I 2025-04-03 23:12:24,714] Trial 32 finished with value: 0.6864471925133689 and parameters: {'learning_rate': 0.24874648119698736, 'max_depth': 10, 'min_samples_split': 3, 'n_estimators': 252, 'subsample': 0.7837891992656376}. Best is trial 21 with value: 0.698117201426025.
[I 2025-04-03 23:12:25,991] Trial 33 finished with value: 0.6913213012477718 and parameters: {'learning_rate': 0.2766081914712293, 'max_depth': 8, 'min_samples_split': 2, 'n_

Best AUC for gb: 0.7144607843137254
Best parameters for gb: {'learning_rate': 0.27824447470120994, 'max_depth': 9, 'min_samples_split': 3, 'n_estimators': 272, 'subsample': 0.8850316686412799}


[I 2025-04-03 23:12:43,433] Trial 9 finished with value: 0.6738803475935828 and parameters: {'C': 0.8464474159823367}. Best is trial 2 with value: 0.6828988413547237.
[I 2025-04-03 23:12:43,456] Trial 10 finished with value: 0.6826815953654191 and parameters: {'C': 0.009422981089100854}. Best is trial 2 with value: 0.6828988413547237.
[I 2025-04-03 23:12:43,478] Trial 11 finished with value: 0.6818293226381462 and parameters: {'C': 0.06946219072148478}. Best is trial 2 with value: 0.6828988413547237.
[I 2025-04-03 23:12:43,499] Trial 12 finished with value: 0.6806929590017825 and parameters: {'C': 0.03231779515440809}. Best is trial 2 with value: 0.6828988413547237.
[I 2025-04-03 23:12:43,525] Trial 13 finished with value: 0.645727495543672 and parameters: {'C': 5.200303244452213}. Best is trial 2 with value: 0.6828988413547237.
[I 2025-04-03 23:12:43,546] Trial 14 finished with value: 0.6806929590017825 and parameters: {'C': 0.034255814766687015}. Best is trial 2 with value: 0.6828988

Best AUC for svm: 0.6860405525846702
Best parameters for svm: {'C': 0.38995453773975636}

Model Best AUCs:
et: 0.7057486631016043
knn: 0.6558294340463459
logistic: 0.6935606060606061
xgb: 0.6904188948306597
rf: 0.6883021390374331
gnb: 0.6926526292335116
lda: 0.6904244652406417
ada: 0.6574448529411765
gb: 0.7144607843137254
svm: 0.6860405525846702


In [3]:
import joblib
from itertools import combinations
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier

# 使用最佳参数初始化模型
optimized_models_instances = {
    'et': ExtraTreesClassifier(**optimized_models['et'], random_state=42),
    'knn': KNeighborsClassifier(**optimized_models['knn']),
    'logistic': LogisticRegression(**optimized_models['logistic'], random_state=42),
    'xgb': xgb.XGBClassifier(**optimized_models['xgb'], random_state=42),
    'rf': RandomForestClassifier(**optimized_models['rf'], random_state=42),
    #'bagging': BaggingClassifier(**optimized_models['bagging'], random_state=42),
    'gnb': GaussianNB(**optimized_models['gnb']),
    'lda': LinearDiscriminantAnalysis(**optimized_models['lda']),
    'ada': AdaBoostClassifier(**optimized_models['ada'], random_state=42, algorithm='SAMME'),
    'gb': GradientBoostingClassifier(**optimized_models['gb'], random_state=42),
    'svm': SVC(**optimized_models['svm'], probability=True, random_state=42)
}

# 保存每个优化后的模型
for model_name, model in optimized_models_instances.items():
    joblib.dump(model, f'C:\\Users\\一个大活人\\ppi-wgcna-sssgsea-4.3\\basemodel\\ppi\\{model_name}_ppi.joblib')  
    print(f"Model {model_name} saved successfully.")



Model et saved successfully.
Model knn saved successfully.
Model logistic saved successfully.
Model xgb saved successfully.
Model rf saved successfully.
Model gnb saved successfully.
Model lda saved successfully.
Model ada saved successfully.
Model gb saved successfully.
Model svm saved successfully.
