In [None]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier
from xgboost import XGBClassifier

models = {
    'RF': RandomForestClassifier(),
    'ET': ExtraTreesClassifier(),
    'HistGB': HistGradientBoostingClassifier(),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
}


In [None]:
from sklearn.metrics import accuracy_score, roc_auc_score, matthews_corrcoef
import pickle
import pandas as pd
import numpy as np
import os

data_path = "Features.csv"  
data = pd.read_csv(data_path)


train_data = data[data['ref'] != 'DILIrank']
test_data = data[data['ref'] == 'DILIrank']


X_train = train_data.drop(['SMILES', 'Label', 'ref'], axis=1)
y_train = train_data['Label']
X_test = test_data.drop(['SMILES', 'Label', 'ref'], axis=1)
y_test = test_data['Label']

save_path = "Model/"
os.makedirs(save_path, exist_ok=True) 

for model_name, model in models.items():
    print(f"正在训练模型: {model_name}")

    best_auc = -np.inf  
    best_model = None


    for i in range(5):
        print(f"第 {i + 1} 次训练...")

       
        model.set_params(random_state=np.random.randint(0, 10000))

      
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1]  

        
        acc = accuracy_score(y_test, y_pred)
        auc = roc_auc_score(y_test, y_prob)
        mcc = matthews_corrcoef(y_test, y_pred)

        print(f"第 {i + 1} 次训练结果 - {model_name}: ACC: {acc:.4f}, AUC: {auc:.4f}, MCC: {mcc:.4f}")

        
        if auc > best_auc:
            best_auc = auc
            best_model = pickle.dumps(model) 
   
    model_file = os.path.join(save_path, f"best_model_{model_name}.pkl")
    
    
    with open(model_file, 'wb') as f:
        f.write(best_model)
    print(f"模型 {model_name} 的最佳AUC: {best_auc:.4f}，已保存为 {model_file}")
