# 构建基于SSGSEA的基础模型

In [1]:
import optuna
from optuna.samplers import TPESampler
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold, train_test_split
import xgboost as xgb
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

np.random.seed(42)

# 加载数据集

data1 = pd.read_csv('ssgsea_10.csv')
X = data1.iloc[:, 1:]
y = data1.iloc[:, 0]  # 假设标签在 data1 的第一列

# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 拆分数据集
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

In [2]:
# 5折交叉验证
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 目标函数
def objective(trial, model_name):
    if model_name == 'et':
        model = ExtraTreesClassifier(
            max_depth=trial.suggest_int('max_depth', 5, 20),
            min_samples_split=trial.suggest_int('min_samples_split', 2, 20),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            random_state=42
        )
    elif model_name == 'knn':
        model = KNeighborsClassifier(
            n_neighbors=trial.suggest_int('n_neighbors', 3, 20),
            leaf_size=trial.suggest_int('leaf_size', 5, 50),
            weights=trial.suggest_categorical('weights', ['uniform', 'distance'])
        )
    elif model_name == 'logistic':
        model = LogisticRegression(
            C=trial.suggest_float('C', 1e-4, 1e2, log=True),
            penalty='l1',
            solver='saga',
            random_state=42
        )
    elif model_name == 'xgb':
        model = xgb.XGBClassifier(
            learning_rate=trial.suggest_float('learning_rate', 0.01, 0.3),
            max_depth=trial.suggest_int('max_depth', 3, 10),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            use_label_encoder=False,
            eval_metric='logloss',
            random_state=42
        )
    elif model_name == 'rf':
        model = RandomForestClassifier(
            max_depth=trial.suggest_int('max_depth', 5, 20),
            min_samples_split=trial.suggest_int('min_samples_split', 2, 20),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            random_state=42
        )
    elif model_name == 'gnb':
        model = GaussianNB(
            var_smoothing=trial.suggest_float('var_smoothing', 1e-12, 1e-7, log=True)
        )
    elif model_name == 'lda':
        model = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto')
    elif model_name == 'ada':
        model = AdaBoostClassifier(
            learning_rate=trial.suggest_float('learning_rate', 0.01, 2),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            algorithm='SAMME',
            random_state=42
        )
    elif model_name == 'gb':
        model = GradientBoostingClassifier(
            learning_rate=trial.suggest_float('learning_rate', 0.01, 0.3),
            max_depth=trial.suggest_int('max_depth', 3, 10),
            min_samples_split=trial.suggest_int('min_samples_split', 2, 20),
            n_estimators=trial.suggest_int('n_estimators', 50, 300),
            subsample=trial.suggest_float('subsample', 0.5, 1.0),
            random_state=42
        )
    elif model_name == 'svm':
        model = SVC(
            C=trial.suggest_float('C', 1e-3, 1e2, log=True),
            probability=True,
            random_state=42
        )

    # 5折交叉验证计算 AUC 均值
    auc_scores = []
    for train_idx, val_idx in cv.split(X_train, y_train):
        X_tr, X_val = X_train[train_idx], X_train[val_idx]
        y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

        model.fit(X_tr, y_tr)
        y_pred = model.predict_proba(X_val)[:, 1]
        auc_scores.append(roc_auc_score(y_val, y_pred))

    return np.mean(auc_scores)

# 运行 Optuna 进行超参数优化
optimized_models = {}
best_aucs = {}

for model_name in ['et', 'knn', 'logistic', 'xgb', 'rf', 'gnb', 'lda', 'ada', 'gb', 'svm']:
    study = optuna.create_study(direction='maximize', sampler=TPESampler())
    study.optimize(lambda trial: objective(trial, model_name), n_trials=50)

    optimized_models[model_name] = study.best_params
    best_aucs[model_name] = study.best_value

    print(f"Best AUC for {model_name}: {study.best_value}")
    print(f"Best parameters for {model_name}: {study.best_params}")

# 输出每个模型的最佳 AUC
print("\nModel Best AUCs:")
for model_name, auc in best_aucs.items():
    print(f"{model_name}: {auc}")
    



[I 2025-04-03 22:59:13,939] A new study created in memory with name: no-name-eee2070a-4472-48de-a31c-675ad0b6a5f8
[I 2025-04-03 22:59:14,429] Trial 0 finished with value: 0.6577261586452763 and parameters: {'max_depth': 16, 'min_samples_split': 4, 'n_estimators': 147}. Best is trial 0 with value: 0.6577261586452763.
[I 2025-04-03 22:59:14,865] Trial 1 finished with value: 0.6637979055258467 and parameters: {'max_depth': 7, 'min_samples_split': 4, 'n_estimators': 141}. Best is trial 1 with value: 0.6637979055258467.
[I 2025-04-03 22:59:15,522] Trial 2 finished with value: 0.6383801247771835 and parameters: {'max_depth': 10, 'min_samples_split': 4, 'n_estimators': 219}. Best is trial 1 with value: 0.6637979055258467.
[I 2025-04-03 22:59:15,753] Trial 3 finished with value: 0.6483400178253118 and parameters: {'max_depth': 16, 'min_samples_split': 10, 'n_estimators': 77}. Best is trial 1 with value: 0.6637979055258467.
[I 2025-04-03 22:59:16,280] Trial 4 finished with value: 0.641243315508

[I 2025-04-03 22:59:33,131] Trial 40 finished with value: 0.6382520053475936 and parameters: {'max_depth': 11, 'min_samples_split': 6, 'n_estimators': 151}. Best is trial 10 with value: 0.6682542335115864.
[I 2025-04-03 22:59:33,808] Trial 41 finished with value: 0.6478163992869875 and parameters: {'max_depth': 7, 'min_samples_split': 4, 'n_estimators': 227}. Best is trial 10 with value: 0.6682542335115864.
[I 2025-04-03 22:59:34,440] Trial 42 finished with value: 0.651721256684492 and parameters: {'max_depth': 6, 'min_samples_split': 3, 'n_estimators': 209}. Best is trial 10 with value: 0.6682542335115864.
[I 2025-04-03 22:59:35,007] Trial 43 finished with value: 0.6527183600713012 and parameters: {'max_depth': 7, 'min_samples_split': 4, 'n_estimators': 188}. Best is trial 10 with value: 0.6682542335115864.
[I 2025-04-03 22:59:35,807] Trial 44 finished with value: 0.6385639483065954 and parameters: {'max_depth': 8, 'min_samples_split': 17, 'n_estimators': 269}. Best is trial 10 with v

Best AUC for et: 0.6682542335115864
Best parameters for et: {'max_depth': 5, 'min_samples_split': 8, 'n_estimators': 55}


[I 2025-04-03 22:59:38,536] Trial 13 finished with value: 0.6129706996434937 and parameters: {'n_neighbors': 15, 'leaf_size': 32, 'weights': 'uniform'}. Best is trial 2 with value: 0.6325646167557932.
[I 2025-04-03 22:59:38,555] Trial 14 finished with value: 0.608706550802139 and parameters: {'n_neighbors': 8, 'leaf_size': 9, 'weights': 'distance'}. Best is trial 2 with value: 0.6325646167557932.
[I 2025-04-03 22:59:38,573] Trial 15 finished with value: 0.6300077985739752 and parameters: {'n_neighbors': 20, 'leaf_size': 22, 'weights': 'uniform'}. Best is trial 2 with value: 0.6325646167557932.
[I 2025-04-03 22:59:38,591] Trial 16 finished with value: 0.6177389705882353 and parameters: {'n_neighbors': 14, 'leaf_size': 30, 'weights': 'uniform'}. Best is trial 2 with value: 0.6325646167557932.
[I 2025-04-03 22:59:38,609] Trial 17 finished with value: 0.6086174242424243 and parameters: {'n_neighbors': 10, 'leaf_size': 38, 'weights': 'distance'}. Best is trial 2 with value: 0.63256461675579

[I 2025-04-03 22:59:39,248] Trial 1 finished with value: 0.5 and parameters: {'C': 0.0014864932671244968}. Best is trial 0 with value: 0.6546958556149731.
[I 2025-04-03 22:59:39,260] Trial 2 finished with value: 0.5 and parameters: {'C': 0.006815412572753202}. Best is trial 0 with value: 0.6546958556149731.
[I 2025-04-03 22:59:39,270] Trial 3 finished with value: 0.5 and parameters: {'C': 0.006025655219546574}. Best is trial 0 with value: 0.6546958556149731.
[I 2025-04-03 22:59:39,288] Trial 4 finished with value: 0.676754679144385 and parameters: {'C': 13.37454324405568}. Best is trial 4 with value: 0.676754679144385.
[I 2025-04-03 22:59:39,316] Trial 5 finished with value: 0.6778910427807486 and parameters: {'C': 11.43209933832456}. Best is trial 5 with value: 0.6778910427807486.
[I 2025-04-03 22:59:39,335] Trial 6 finished with value: 0.6435216131907308 and parameters: {'C': 0.5013598396043255}. Best is trial 5 with value: 0.6778910427807486.
[I 2025-04-03 22:59:39,345] Trial 7 fini

Best AUC for knn: 0.6325646167557932
Best parameters for knn: {'n_neighbors': 20, 'leaf_size': 33, 'weights': 'distance'}


[I 2025-04-03 22:59:39,425] Trial 12 finished with value: 0.6779578877005348 and parameters: {'C': 5.366522760456447}. Best is trial 12 with value: 0.6779578877005348.
[I 2025-04-03 22:59:39,444] Trial 13 finished with value: 0.6768215240641712 and parameters: {'C': 4.810152933203402}. Best is trial 12 with value: 0.6779578877005348.
[I 2025-04-03 22:59:39,457] Trial 14 finished with value: 0.5203208556149732 and parameters: {'C': 0.07040549452845753}. Best is trial 12 with value: 0.6779578877005348.
[I 2025-04-03 22:59:39,476] Trial 15 finished with value: 0.6723150623885917 and parameters: {'C': 2.378277834777443}. Best is trial 12 with value: 0.6779578877005348.
[I 2025-04-03 22:59:39,490] Trial 16 finished with value: 0.5902852049910873 and parameters: {'C': 0.14147838888872352}. Best is trial 12 with value: 0.6779578877005348.
[I 2025-04-03 22:59:39,510] Trial 17 finished with value: 0.676754679144385 and parameters: {'C': 14.398392767185927}. Best is trial 12 with value: 0.677957

[I 2025-04-03 22:59:39,632] Trial 24 finished with value: 0.6768215240641712 and parameters: {'C': 4.924603404534279}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:39,648] Trial 25 finished with value: 0.6411263368983957 and parameters: {'C': 0.45446709489374537}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:39,669] Trial 26 finished with value: 0.6768215240641712 and parameters: {'C': 25.173382374655027}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:39,688] Trial 27 finished with value: 0.6756573083778965 and parameters: {'C': 4.030196975383225}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:39,708] Trial 28 finished with value: 0.6592134581105169 and parameters: {'C': 0.9007397462823447}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:39,728] Trial 29 finished with value: 0.6768883689839572 and parameters: {'C': 97.36943214248102}. Best is trial 19 with value: 0.679027

[I 2025-04-03 22:59:39,828] Trial 34 finished with value: 0.6778910427807486 and parameters: {'C': 9.377398693553737}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:39,847] Trial 35 finished with value: 0.6768215240641712 and parameters: {'C': 26.464628656490458}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:39,867] Trial 36 finished with value: 0.6723150623885917 and parameters: {'C': 2.900497569055685}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:39,884] Trial 37 finished with value: 0.626431595365419 and parameters: {'C': 0.2910390730073431}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:39,896] Trial 38 finished with value: 0.5 and parameters: {'C': 0.023838620272049284}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:39,915] Trial 39 finished with value: 0.6589739304812834 and parameters: {'C': 1.260252822521158}. Best is trial 19 with value: 0.6790274064171122.
[I 

[I 2025-04-03 22:59:40,011] Trial 44 finished with value: 0.6768215240641712 and parameters: {'C': 53.92533456150084}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:40,030] Trial 45 finished with value: 0.6768215240641712 and parameters: {'C': 46.65171266541472}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:40,042] Trial 46 finished with value: 0.5 and parameters: {'C': 0.00014233510283931394}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:40,062] Trial 47 finished with value: 0.6778910427807486 and parameters: {'C': 20.638471771348026}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:40,081] Trial 48 finished with value: 0.6723150623885917 and parameters: {'C': 3.403857203057831}. Best is trial 19 with value: 0.6790274064171122.
[I 2025-04-03 22:59:40,100] Trial 49 finished with value: 0.6768883689839572 and parameters: {'C': 62.27612716695227}. Best is trial 19 with value: 0.6790274064171122.
[

Best AUC for logistic: 0.6790274064171122
Best parameters for logistic: {'C': 7.917731414421977}


[I 2025-04-03 22:59:40,385] Trial 0 finished with value: 0.635745320855615 and parameters: {'learning_rate': 0.21628804443049446, 'max_depth': 7, 'n_estimators': 94}. Best is trial 0 with value: 0.635745320855615.
[I 2025-04-03 22:59:40,695] Trial 1 finished with value: 0.6402852049910873 and parameters: {'learning_rate': 0.10313663361433269, 'max_depth': 5, 'n_estimators': 88}. Best is trial 1 with value: 0.6402852049910873.
[I 2025-04-03 22:59:41,033] Trial 2 finished with value: 0.6365975935828877 and parameters: {'learning_rate': 0.10757275872379897, 'max_depth': 5, 'n_estimators': 259}. Best is trial 1 with value: 0.6402852049910873.
[I 2025-04-03 22:59:41,292] Trial 3 finished with value: 0.6492201426024955 and parameters: {'learning_rate': 0.17519400402517846, 'max_depth': 6, 'n_estimators': 185}. Best is trial 3 with value: 0.6492201426024955.
[I 2025-04-03 22:59:41,664] Trial 4 finished with value: 0.6515040106951873 and parameters: {'learning_rate': 0.024095205811770794, 'max

[I 2025-04-03 22:59:51,330] Trial 38 finished with value: 0.6293950534759358 and parameters: {'learning_rate': 0.12275801748853185, 'max_depth': 4, 'n_estimators': 170}. Best is trial 7 with value: 0.6845198306595364.
[I 2025-04-03 22:59:51,551] Trial 39 finished with value: 0.6464962121212121 and parameters: {'learning_rate': 0.2985418566460074, 'max_depth': 6, 'n_estimators': 193}. Best is trial 7 with value: 0.6845198306595364.
[I 2025-04-03 22:59:51,967] Trial 40 finished with value: 0.647448752228164 and parameters: {'learning_rate': 0.06577604461223685, 'max_depth': 3, 'n_estimators': 284}. Best is trial 7 with value: 0.6845198306595364.
[I 2025-04-03 22:59:52,151] Trial 41 finished with value: 0.6756461675579323 and parameters: {'learning_rate': 0.03363066331707401, 'max_depth': 3, 'n_estimators': 116}. Best is trial 7 with value: 0.6845198306595364.
[I 2025-04-03 22:59:52,380] Trial 42 finished with value: 0.6724153297682709 and parameters: {'learning_rate': 0.02616741575787253

Best AUC for xgb: 0.6845198306595364
Best parameters for xgb: {'learning_rate': 0.03135215760522494, 'max_depth': 3, 'n_estimators': 226}


[I 2025-04-03 22:59:55,480] Trial 0 finished with value: 0.6514148841354723 and parameters: {'max_depth': 13, 'min_samples_split': 5, 'n_estimators': 276}. Best is trial 0 with value: 0.6514148841354723.
[I 2025-04-03 22:59:56,018] Trial 1 finished with value: 0.6545510249554367 and parameters: {'max_depth': 10, 'min_samples_split': 6, 'n_estimators': 120}. Best is trial 1 with value: 0.6545510249554367.
[I 2025-04-03 22:59:56,585] Trial 2 finished with value: 0.6501002673796791 and parameters: {'max_depth': 5, 'min_samples_split': 4, 'n_estimators': 123}. Best is trial 1 with value: 0.6545510249554367.
[I 2025-04-03 22:59:57,707] Trial 3 finished with value: 0.6589237967914439 and parameters: {'max_depth': 13, 'min_samples_split': 20, 'n_estimators': 248}. Best is trial 3 with value: 0.6589237967914439.
[I 2025-04-03 22:59:58,403] Trial 4 finished with value: 0.6485572638146168 and parameters: {'max_depth': 7, 'min_samples_split': 5, 'n_estimators': 152}. Best is trial 3 with value: 0

[I 2025-04-03 23:00:35,632] Trial 41 finished with value: 0.6547125668449197 and parameters: {'max_depth': 19, 'min_samples_split': 8, 'n_estimators': 256}. Best is trial 11 with value: 0.66326871657754.
[I 2025-04-03 23:00:36,720] Trial 42 finished with value: 0.6618315508021391 and parameters: {'max_depth': 20, 'min_samples_split': 10, 'n_estimators': 239}. Best is trial 11 with value: 0.66326871657754.
[I 2025-04-03 23:00:37,957] Trial 43 finished with value: 0.647827540106952 and parameters: {'max_depth': 17, 'min_samples_split': 7, 'n_estimators': 280}. Best is trial 11 with value: 0.66326871657754.
[I 2025-04-03 23:00:39,169] Trial 44 finished with value: 0.6587733957219252 and parameters: {'max_depth': 14, 'min_samples_split': 9, 'n_estimators': 264}. Best is trial 11 with value: 0.66326871657754.
[I 2025-04-03 23:00:40,145] Trial 45 finished with value: 0.6555425579322638 and parameters: {'max_depth': 10, 'min_samples_split': 20, 'n_estimators': 214}. Best is trial 11 with valu

Best AUC for rf: 0.66326871657754
Best parameters for rf: {'max_depth': 9, 'min_samples_split': 20, 'n_estimators': 233}


[I 2025-04-03 23:00:45,088] Trial 17 finished with value: 0.6480782085561498 and parameters: {'var_smoothing': 1.2871590371595608e-12}. Best is trial 0 with value: 0.6480782085561498.
[I 2025-04-03 23:00:45,100] Trial 18 finished with value: 0.6480782085561498 and parameters: {'var_smoothing': 2.051178030432357e-09}. Best is trial 0 with value: 0.6480782085561498.
[I 2025-04-03 23:00:45,111] Trial 19 finished with value: 0.6480782085561498 and parameters: {'var_smoothing': 2.0105368988823822e-10}. Best is trial 0 with value: 0.6480782085561498.
[I 2025-04-03 23:00:45,124] Trial 20 finished with value: 0.6480782085561498 and parameters: {'var_smoothing': 9.624085811911296e-11}. Best is trial 0 with value: 0.6480782085561498.
[I 2025-04-03 23:00:45,136] Trial 21 finished with value: 0.6480782085561498 and parameters: {'var_smoothing': 4.533588542651417e-11}. Best is trial 0 with value: 0.6480782085561498.
[I 2025-04-03 23:00:45,150] Trial 22 finished with value: 0.6480782085561498 and pa

Best AUC for gnb: 0.6480782085561498
Best parameters for gnb: {'var_smoothing': 1.1010653759168197e-09}


[I 2025-04-03 23:00:45,687] Trial 11 finished with value: 0.6780971479500891 and parameters: {}. Best is trial 0 with value: 0.6780971479500891.
[I 2025-04-03 23:00:45,704] Trial 12 finished with value: 0.6780971479500891 and parameters: {}. Best is trial 0 with value: 0.6780971479500891.
[I 2025-04-03 23:00:45,718] Trial 13 finished with value: 0.6780971479500891 and parameters: {}. Best is trial 0 with value: 0.6780971479500891.
[I 2025-04-03 23:00:45,732] Trial 14 finished with value: 0.6780971479500891 and parameters: {}. Best is trial 0 with value: 0.6780971479500891.
[I 2025-04-03 23:00:45,746] Trial 15 finished with value: 0.6780971479500891 and parameters: {}. Best is trial 0 with value: 0.6780971479500891.
[I 2025-04-03 23:00:45,763] Trial 16 finished with value: 0.6780971479500891 and parameters: {}. Best is trial 0 with value: 0.6780971479500891.
[I 2025-04-03 23:00:45,779] Trial 17 finished with value: 0.6780971479500891 and parameters: {}. Best is trial 0 with value: 0.678

Best AUC for lda: 0.6780971479500891
Best parameters for lda: {}


[I 2025-04-03 23:00:46,524] Trial 0 finished with value: 0.6668560606060605 and parameters: {'learning_rate': 1.188629556498112, 'n_estimators': 51}. Best is trial 0 with value: 0.6668560606060605.
[I 2025-04-03 23:00:47,532] Trial 1 finished with value: 0.5557096702317291 and parameters: {'learning_rate': 1.7532326581759081, 'n_estimators': 192}. Best is trial 0 with value: 0.6668560606060605.
[I 2025-04-03 23:00:48,754] Trial 2 finished with value: 0.6081327985739751 and parameters: {'learning_rate': 1.9203434653286593, 'n_estimators': 227}. Best is trial 0 with value: 0.6668560606060605.
[I 2025-04-03 23:00:49,832] Trial 3 finished with value: 0.6514928698752229 and parameters: {'learning_rate': 0.08295431110163655, 'n_estimators': 196}. Best is trial 0 with value: 0.6668560606060605.
[I 2025-04-03 23:00:50,607] Trial 4 finished with value: 0.5849153297682709 and parameters: {'learning_rate': 1.6565248684817893, 'n_estimators': 147}. Best is trial 0 with value: 0.6668560606060605.
[

[I 2025-04-03 23:01:25,282] Trial 41 finished with value: 0.699777183600713 and parameters: {'learning_rate': 0.4646013521513544, 'n_estimators': 183}. Best is trial 12 with value: 0.7018549465240642.
[I 2025-04-03 23:01:26,160] Trial 42 finished with value: 0.5002952317290552 and parameters: {'learning_rate': 1.9211528134750737, 'n_estimators': 161}. Best is trial 12 with value: 0.7018549465240642.
[I 2025-04-03 23:01:27,298] Trial 43 finished with value: 0.6844474153297682 and parameters: {'learning_rate': 0.3315140032636859, 'n_estimators': 200}. Best is trial 12 with value: 0.7018549465240642.
[I 2025-04-03 23:01:28,595] Trial 44 finished with value: 0.5953264260249554 and parameters: {'learning_rate': 1.6053097216892631, 'n_estimators': 225}. Best is trial 12 with value: 0.7018549465240642.
[I 2025-04-03 23:01:29,548] Trial 45 finished with value: 0.684569964349376 and parameters: {'learning_rate': 0.6485581368598531, 'n_estimators': 174}. Best is trial 12 with value: 0.7018549465

Best AUC for ada: 0.7018549465240642
Best parameters for ada: {'learning_rate': 0.6260213017597845, 'n_estimators': 100}


[I 2025-04-03 23:01:35,507] Trial 0 finished with value: 0.6544841800356507 and parameters: {'learning_rate': 0.14885111562535483, 'max_depth': 6, 'min_samples_split': 15, 'n_estimators': 260, 'subsample': 0.8274934895209494}. Best is trial 0 with value: 0.6544841800356507.
[I 2025-04-03 23:01:36,516] Trial 1 finished with value: 0.6545120320855615 and parameters: {'learning_rate': 0.297885012679625, 'max_depth': 10, 'min_samples_split': 7, 'n_estimators': 283, 'subsample': 0.9597074288466136}. Best is trial 1 with value: 0.6545120320855615.
[I 2025-04-03 23:01:37,054] Trial 2 finished with value: 0.6617479946524064 and parameters: {'learning_rate': 0.2550541487932265, 'max_depth': 4, 'min_samples_split': 12, 'n_estimators': 130, 'subsample': 0.7366898831447546}. Best is trial 2 with value: 0.6617479946524064.
[I 2025-04-03 23:01:37,981] Trial 3 finished with value: 0.6774286987522282 and parameters: {'learning_rate': 0.2982126926437912, 'max_depth': 9, 'min_samples_split': 20, 'n_esti

[I 2025-04-03 23:02:07,122] Trial 30 finished with value: 0.655899064171123 and parameters: {'learning_rate': 0.20370230295565955, 'max_depth': 10, 'min_samples_split': 11, 'n_estimators': 245, 'subsample': 0.8519724188231967}. Best is trial 7 with value: 0.6829768270944742.
[I 2025-04-03 23:02:08,470] Trial 31 finished with value: 0.6891822638146168 and parameters: {'learning_rate': 0.16249215747972517, 'max_depth': 9, 'min_samples_split': 14, 'n_estimators': 247, 'subsample': 0.9162276243567948}. Best is trial 31 with value: 0.6891822638146168.
[I 2025-04-03 23:02:09,872] Trial 32 finished with value: 0.6716466131907308 and parameters: {'learning_rate': 0.16086232607829581, 'max_depth': 9, 'min_samples_split': 12, 'n_estimators': 292, 'subsample': 0.905644192716891}. Best is trial 31 with value: 0.6891822638146168.
[I 2025-04-03 23:02:11,128] Trial 33 finished with value: 0.6724821746880572 and parameters: {'learning_rate': 0.18425477788220385, 'max_depth': 10, 'min_samples_split': 1

Best AUC for gb: 0.6954211229946523
Best parameters for gb: {'learning_rate': 0.29583429084563645, 'max_depth': 10, 'min_samples_split': 8, 'n_estimators': 300, 'subsample': 0.8110367950160445}


[I 2025-04-03 23:02:30,367] Trial 8 finished with value: 0.5559826203208557 and parameters: {'C': 0.07393801565407988}. Best is trial 3 with value: 0.5606617647058824.
[I 2025-04-03 23:02:30,398] Trial 9 finished with value: 0.591583110516934 and parameters: {'C': 58.16973627701003}. Best is trial 9 with value: 0.591583110516934.
[I 2025-04-03 23:02:30,430] Trial 10 finished with value: 0.5937667112299465 and parameters: {'C': 97.57484803138352}. Best is trial 10 with value: 0.5937667112299465.
[I 2025-04-03 23:02:30,460] Trial 11 finished with value: 0.5928475935828876 and parameters: {'C': 90.82706154301144}. Best is trial 10 with value: 0.5937667112299465.
[I 2025-04-03 23:02:30,493] Trial 12 finished with value: 0.5960561497326203 and parameters: {'C': 83.15005240861056}. Best is trial 12 with value: 0.5960561497326203.
[I 2025-04-03 23:02:30,522] Trial 13 finished with value: 0.5424520944741533 and parameters: {'C': 7.409624581479115}. Best is trial 12 with value: 0.59605614973262

Best AUC for svm: 0.5976659982174688
Best parameters for svm: {'C': 65.92742338289928}

Model Best AUCs:
et: 0.6682542335115864
knn: 0.6325646167557932
logistic: 0.6790274064171122
xgb: 0.6845198306595364
rf: 0.66326871657754
gnb: 0.6480782085561498
lda: 0.6780971479500891
ada: 0.7018549465240642
gb: 0.6954211229946523
svm: 0.5976659982174688


In [3]:
import joblib
from itertools import combinations
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier

# 使用最佳参数初始化模型
optimized_models_instances = {
    'et': ExtraTreesClassifier(**optimized_models['et'], random_state=42),
    'knn': KNeighborsClassifier(**optimized_models['knn']),
    'logistic': LogisticRegression(**optimized_models['logistic'], random_state=42),
    'xgb': xgb.XGBClassifier(**optimized_models['xgb'], random_state=42),
    'rf': RandomForestClassifier(**optimized_models['rf'], random_state=42),
    #'bagging': BaggingClassifier(**optimized_models['bagging'], random_state=42),
    'gnb': GaussianNB(**optimized_models['gnb']),
    'lda': LinearDiscriminantAnalysis(**optimized_models['lda']),
    'ada': AdaBoostClassifier(**optimized_models['ada'], random_state=42, algorithm='SAMME'),
    'gb': GradientBoostingClassifier(**optimized_models['gb'], random_state=42),
    'svm': SVC(**optimized_models['svm'], probability=True, random_state=42)
}

# 保存每个优化后的模型
for model_name, model in optimized_models_instances.items():
    joblib.dump(model, f'C:\\Users\\一个大活人\\ppi-wgcna-sssgsea-4.3\\basemodel\\ssgsea\\{model_name}_ssgsea.joblib') 
    print(f"Model {model_name} saved successfully.")



Model et saved successfully.
Model knn saved successfully.
Model logistic saved successfully.
Model xgb saved successfully.
Model rf saved successfully.
Model gnb saved successfully.
Model lda saved successfully.
Model ada saved successfully.
Model gb saved successfully.
Model svm saved successfully.
