In [8]:
import sklearn
import numpy as np
print(f"scikit-learn版本: {sklearn.__version__}")
print(f"numpy版本: {np.__version__}")

import time
import os
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor  
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
# 导入 LinearRegression
from sklearn.linear_model import LinearRegression  
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import StackingRegressor
from scipy.stats import randint as sp_randint
from scipy.stats import uniform

# 基础配置
os.makedirs('catboost_info', exist_ok=True)
TREE_THREADS = 2  # 控制树模型线程，平衡速度

# 指标计算函数
def calculate_metrics(actual, predicted):
    r2 = r2_score(actual, predicted)
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    rrmse = rmse / np.mean(actual) if np.mean(actual) != 0 else np.inf
    mae = mean_absolute_error(actual, predicted)
    rho = rrmse / (1 + np.sqrt(max(r2, 0.001)))
    return r2, rmse, rrmse, mae, rho

if __name__ == "__main__":
    start_total = time.perf_counter()

    # 1. 数据加载与预处理
    print("1. 加载数据...")
    data = pd.read_excel('CS.xlsx').dropna()  # 简单去缺失值
    X = data.iloc[:, 0:9].values
    y = data.iloc[:, 9].values
    print(f"数据规模：{X.shape[0]}样本 × {X.shape[1]}特征\n")

    # 2. 划分数据集（测试集30%）
    print("2. 划分训练集(70%)/测试集(30%)...")
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=0, shuffle=True
    )

    # 3. 定义6个基学习器（严格保留需求的模型）
    print("3. 初始化基学习器：RF、SVR、MLP、XGB、CatBoost、AdaBoost...\n")
    estimators = [
        ('rf', Pipeline([
            ('scaler', MinMaxScaler()),
            ('model', RandomForestRegressor(
                random_state=0, n_jobs=TREE_THREADS, 
                max_features='sqrt', max_depth=8
            ))
        ])),
        ('svr', Pipeline([
            ('scaler', MinMaxScaler()),
            ('model', SVR(kernel='rbf', cache_size=100))
        ])),
        ('mlp', Pipeline([
            ('scaler', MinMaxScaler()),
            ('model', MLPRegressor(
                random_state=0, early_stopping=True, 
                max_iter=300, batch_size=32, hidden_layer_sizes=(30, 20)
            ))
        ])),
        ('xgb', Pipeline([
            ('scaler', MinMaxScaler()),
            ('model', XGBRegressor(
                random_state=0, n_jobs=TREE_THREADS, 
                max_depth=6, subsample=0.8, verbosity=0
            ))
        ])),
        ('cat', Pipeline([
            ('scaler', MinMaxScaler()),
            ('model', CatBoostRegressor(
                random_state=0, verbose=0, 
                train_dir='catboost_info', depth=6, thread_count=TREE_THREADS
            ))
        ])),
        ('ada', Pipeline([
            ('scaler', MinMaxScaler()),
            ('model', AdaBoostRegressor(
                random_state=0, 
                estimator=DecisionTreeRegressor(max_depth=3), 
                n_estimators=100
            ))
        ]))
    ]

    # 4. 构建Stacking模型（元学习器改为 LinearRegression）
    stacking_model = StackingRegressor(
        estimators=estimators,
        # 替换为 LinearRegression
        final_estimator=LinearRegression(),  
        cv=3, n_jobs=-1, passthrough=False
    )

    # 5. 定义参数搜索空间（精简但保留核心参数）
    param_dist = {
        # RF
        'rf__model__n_estimators': sp_randint(50, 200),
        'rf__model__max_depth': sp_randint(4, 8),
        # SVR
        'svr__model__C': uniform(1, 50),
        'svr__model__epsilon': uniform(0.05, 0.3),
        # MLP
        'mlp__model__hidden_layer_sizes': [(30,), (30, 20), (50,)],
        'mlp__model__alpha': uniform(0.001, 0.01),
        # XGB
        'xgb__model__n_estimators': sp_randint(50, 200),
        'xgb__model__max_depth': sp_randint(4, 6),
        'xgb__model__learning_rate': uniform(0.05, 0.15),
        # CatBoost
        'cat__model__iterations': sp_randint(50, 200),
        'cat__model__depth': sp_randint(4, 6),
        'cat__model__learning_rate': uniform(0.05, 0.15),
        # AdaBoost
        'ada__model__n_estimators': sp_randint(50, 200),
        'ada__model__learning_rate': uniform(0.05, 0.15),
        # 元学习器（LinearRegression 无额外参数需搜索，可保留空字典或删除）
        # 若需搜索元学习器参数，可补充，比如 fit_intercept
        'final_estimator__fit_intercept': [True, False]  
    }

    # 6. 随机搜索（30次迭代，平衡速度与精度）
    print("4. 开始参数搜索（30次迭代 + 3折交叉验证）...")
    random_search = RandomizedSearchCV(
        estimator=stacking_model,
        param_distributions=param_dist,
        n_iter=30, cv=3,
        scoring='r2', random_state=0,
        verbose=1, n_jobs=-1
    )

    # 7. 训练（含异常处理）
    start_search = time.perf_counter()
    try:
        random_search.fit(X_train, y_train)
        best_model = random_search.best_estimator_
        # 关键：完整打印最佳参数（保留原始参数名，方便复现）
        print("\n✅ 最佳参数组合（可直接用于复现）：")
        for param, value in random_search.best_params_.items():
            print(f"  {param}: {value}")
    except Exception as e:
        print(f"\n⚠️ 训练警告: {str(e)}，切换单线程重试...")
        random_search.n_jobs = 1
        random_search.fit(X_train, y_train)
        best_model = random_search.best_estimator_
    end_search = time.perf_counter()

    # 8. 预测与结果保存
    print("\n5. 进行预测...")
    start_predict = time.perf_counter()
    y_train_pred = best_model.predict(X_train)
    y_test_pred = best_model.predict(X_test)
    
    # 保存训练集预测结果
    train_path = 'Stacking-RS-train.xlsx'
    pd.DataFrame({'Actual': y_train, 'Predicted': y_train_pred}).to_excel(train_path, index=False)
    print(f"  ✅ 训练集预测结果已保存：{train_path}")
    
    # 保存测试集预测结果
    test_path = 'Stacking-RS-test.xlsx'
    pd.DataFrame({'Actual': y_test, 'Predicted': y_test_pred}).to_excel(test_path, index=False)
    print(f"  ✅ 测试集预测结果已保存：{test_path}")
    
    # 新增：保存整体数据集预测结果
    full_actual = np.concatenate([y_train, y_test])
    full_predicted = np.concatenate([y_train_pred, y_test_pred])
    full_path = 'Stacking-RS-full.xlsx'
    pd.DataFrame({'Actual': full_actual, 'Predicted': full_predicted}).to_excel(full_path, index=False)
    print(f"  ✅ 整体数据集预测结果已保存：{full_path}")
    
    end_predict = time.perf_counter()

    # 9. 指标计算与输出
    metrics_train = calculate_metrics(y_train, y_train_pred)
    metrics_test = calculate_metrics(y_test, y_test_pred)
    metrics_full = calculate_metrics(
        np.concatenate([y_train, y_test]), 
        np.concatenate([y_train_pred, y_test_pred])
    )

    # 10. 时间与指标汇总
    total_time = time.perf_counter() - start_total
    search_time = end_search - start_search
    predict_time = end_predict - start_predict

    print("\n==================== 结果汇总 ====================")
    print(f"时间统计：")
    print(f"  - 参数搜索: {search_time:.2f}秒（{search_time/60:.2f}分钟）")
    print(f"  - 预测耗时: {predict_time:.2f}秒")
    print(f"  - 总运行时间: {total_time:.2f}秒（{total_time/60:.2f}分钟）\n")
    
    print(f"训练集指标：")
    print(f"  R²={metrics_train[0]:.4f}, RMSE={metrics_train[1]:.4f}, RRMSE={metrics_train[2]:.4f}, MAE={metrics_train[3]:.4f}, ρ={metrics_train[4]:.4f}")
    print(f"测试集指标：")
    print(f"  R²={metrics_test[0]:.4f}, RMSE={metrics_test[1]:.4f}, RRMSE={metrics_test[2]:.4f}, MAE={metrics_test[3]:.4f}, ρ={metrics_test[4]:.4f}")
    print(f"完整数据集指标：")
    print(f"  R²={metrics_full[0]:.4f}, RMSE={metrics_full[1]:.4f}, RRMSE={metrics_full[2]:.4f}, MAE={metrics_full[3]:.4f}, ρ={metrics_full[4]:.4f}")
    print("===================================================")

scikit-learn版本: 1.6.1
numpy版本: 1.23.5
1. 加载数据...
数据规模：375样本 × 9特征

2. 划分训练集(70%)/测试集(30%)...
3. 初始化基学习器：RF、SVR、MLP、XGB、CatBoost、AdaBoost...

4. 开始参数搜索（30次迭代 + 3折交叉验证）...
Fitting 3 folds for each of 30 candidates, totalling 90 fits

✅ 最佳参数组合（可直接用于复现）：
  ada__model__learning_rate: 0.1418143584083632
  ada__model__n_estimators: 67
  cat__model__depth: 5
  cat__model__iterations: 182
  cat__model__learning_rate: 0.1522730448655225
  final_estimator__fit_intercept: True
  mlp__model__alpha: 0.007130634578841324
  mlp__model__hidden_layer_sizes: (30, 20)
  rf__model__max_depth: 5
  rf__model__n_estimators: 107
  svr__model__C: 34.338335772283386
  svr__model__epsilon: 0.2511913608854478
  xgb__model__learning_rate: 0.08155738416107614
  xgb__model__max_depth: 5
  xgb__model__n_estimators: 178

5. 进行预测...
  ✅ 训练集预测结果已保存：Stacking-RS-train.xlsx
  ✅ 测试集预测结果已保存：Stacking-RS-test.xlsx
  ✅ 整体数据集预测结果已保存：Stacking-RS-full.xlsx

时间统计：
  - 参数搜索: 9.60秒（0.16分钟）
  - 预测耗时: 0.10秒
  - 总运行时间: 9.74秒（0.16分钟）

训练集

In [9]:
# 导入 joblib 模块
from joblib import dump

# 保存FS-Stacking模型
print("\n保存最佳CS-Stacking模型...")
dump(best_model, 'CS-Stacking-model.joblib')
print("模型已成功保存为 CS-Stacking-model.joblib")


保存最佳CS-Stacking模型...
模型已成功保存为 CS-Stacking-model.joblib


In [None]:
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
import numpy as np
from joblib import load
import warnings

class featureNameSetter:
    def __init__(self, model=None, feature_names=None):
        self.model = model
        self.feature_names = feature_names

    def set_feature_names(self, feature_names):
        self.feature_names = feature_names

# 初始化主窗口
predict_root = tk.Tk()
predict_root.title("Optimization Prediction Interface")
predict_root.geometry("1600x800")

# 忽略 UserWarning
warnings.filterwarnings('ignore', category=UserWarning)

# 设置全局样式
style = ttk.Style(predict_root)
available_themes = style.theme_names()
print("可用主题:", available_themes)  

try:
    style.theme_use('classic')  # 或 'alt', 'classic'
except:
    print("clam主题不可用，使用默认主题")

# 配置样式 - 使用更具体的方法
style.configure("TEntry", 
                font=('Times New Roman', 20, "bold"),  # 增大字体测试
                foreground='black',
                padding=5)

style.configure("TFrame", borderwidth=5, relief="groove")
style.configure("TLabel", font=('Times New Roman', 16, "bold"))
style.configure("TButton",
                background='#0078D4',
                foreground='black',
                bordercolor='#0078D4',
                borderwidth=1,
                font=('Times New Roman', 18, "bold"))
style.map('TButton',
          background=[('active', '#005A9E'), ('pressed', '#004A8C')],
          foreground=[('active', 'white'), ('pressed', 'white')])

# 如果ttk.Entry样式不生效，使用这个函数创建自定义Entry
def create_styled_entry(parent, textvariable=None, width=12):
    """创建具有自定义样式的输入框"""
    return tk.Entry(parent, 
                   textvariable=textvariable, 
                   width=width,
                   font=('Times New Roman', 16, 'bold'),  # 直接设置字体
                   bg='white', 
                   fg='black',
                   relief='solid',
                   bd=1)

# 定义目标函数
def aim_function(**kwargs):
    try:
        # 加载CS的模型 - 添加错误处理
        try:
            models = {
                'CS': load('CS-Stacking-model.joblib'),
            }
        except Exception as e:
            messagebox.showerror("模型加载错误", f"无法加载模型文件: {str(e)}\n请确保模型文件 'CS-Stacking-model.joblib' 存在。")
            return [0.0]
        
        # 调整后的特征映射（确保与输入参数标签一致）
        feature_mapping = {
            'CS': ['MPD', 'Si/Al', 'W/B', 'AC', 'SM', 'CA', 'IT', 'TT', 'VD'],
        }
        
        # 验证模型是否正确加载
        if not hasattr(models['CS'], 'predict'):
            raise ValueError("模型文件加载失败，请检查模型文件是否正确。")
        
        # 验证特征映射是否正确
        if len(feature_mapping['CS']) != 9:
            raise ValueError("特征映射长度不正确，请检查特征映射是否与模型训练时一致。")
        
        # 预测CS
        cs_features = [kwargs[var] for var in feature_mapping['CS']]
        
        # 验证输入特征是否在合理范围内
        if not (0 <= kwargs['MPD'] <= 247.45 and 
                0.98 <= kwargs['Si/Al'] <= 5.61 and 
                0.04 <= kwargs['W/B'] <= 0.491 and 
                0 <= kwargs['AC'] <= 0.155 and 
                0 <= kwargs['SM'] <= 3.34 and 
                0 <= kwargs['CA'] <= 108 and 
                -196 <= kwargs['IT'] <= 120 and 
                -196 <= kwargs['TT'] <= 600 and 
                0.2 <= kwargs['VD'] <= 101325):
            raise ValueError("输入特征超出合理范围，请检查输入数据。")
        
        # 预测结果
        cs = models['CS'].predict(np.array(cs_features).reshape(1, -1))[0]
        
        return [cs]
    
    except Exception as e:
        messagebox.showerror("Error", f"模型预测错误: {str(e)}")
        return [0.0]

# 定义更新函数
def update_predict():
    try:
        input_values = {}
        for var_name, var in input_vars.items():
            if var_name == 'AC':
                # 将显示值转换为计算值
                ac_value = var.get()
                try:
                    # 如果输入的是百分比形式（如7.50），转换为0.075
                    ac_value = float(ac_value) / 100
                except ValueError:
                    # 如果输入的是数值形式（如0.075），直接使用
                    ac_value = float(ac_value)
                input_values[var_name] = ac_value
            else:
                input_values[var_name] = float(var.get())
        
        # 预测结果
        results = aim_function(**input_values)
        
        # 更新输出
        for i, (result_var, result) in enumerate(zip(output_vars, results)):
            formatted_result = f"{result:.2f}"
            result_var.delete(0, tk.END)
            result_var.insert(0, formatted_result)
            
    except ValueError as e:
        messagebox.showerror("Error", f"输入数据格式错误: {str(e)}")
    except Exception as e:
        messagebox.showerror("Error", f"预测过程中发生错误: {str(e)}")

# 定义清除函数
def clear_entries():
    for var in input_vars.values():
        var.set("")
    for entry in output_vars:
        entry.delete(0, tk.END)

# 定义保存函数
def save_entries():
    file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")])
    if file_path:
        with open(file_path, "w") as file:
            # 保存输入数据
            input_data = ""
            for var_name, var in input_vars.items():
                input_data += f"{var_name}{input_units[var_name]}: {var.get()}\n"
            
            # 保存输出数据
            output_data = ""
            for i, entry in enumerate(output_vars):
                output_data += f"{output_labels[i]}{output_units[output_labels[i]]}: {entry.get()}\n"
            
            file.write(input_data + "\n" + output_data)
            messagebox.showinfo("保存成功", "数据已成功保存到文件！")

# 定义返回主菜单函数
def return_main_menu():
    predict_root.destroy()

# 输入变量的单位
input_units = {
    'MPD': ' (µm)',
    'Si/Al': ' (--)',
    'W/B': ' (--)',
    'AC': ' (%)',
    'SM': ' (--)',
    'CA': ' (days)',
    'IT': ' (°C)',
    'TT': ' (°C)',
    'VD': ' (Pa)'
}

# 输出变量的单位
output_units = {
    'CS': ' (MPa)'
}

# 设置标题
title_label = tk.Label(predict_root, text="Optimization Prediction Interface", font=('Times New Roman', 24, "bold"))
title_label.grid(row=0, column=0, columnspan=3, sticky="ew", padx=10, pady=10)

# 创建输入参数外框
input_outer_frame = ttk.Frame(predict_root, padding="0 0 0 0", style="TFrame")
input_outer_frame.grid(row=1, column=0, sticky="nsew", padx=(5, 5))

# 设置输入参数外框标题
input_title_label = ttk.Label(input_outer_frame, text="Input panel", font=('Times New Roman', 18, "bold"))
input_title_label.pack(fill="x", padx=300, pady=10)

# 创建输入参数框架
input_frame = ttk.Frame(input_outer_frame, padding="6 6 6 15")
input_frame.pack(fill="both", expand=True, padx=6, pady=5)

# 输入变量列表
input_vars = {}
original_input_labels = ['MPD', 'Si/Al', 'W/B', 'AC', 'SM', 'CA', 'IT', 'TT', 'VD']
input_labels = ['MPD', 'Si/Al', 'W/B', 'AC', 'SM', 'CA', 'IT', 'TT', 'VD']

# 设置输入面板的默认值
default_values = {
    'MPD': '38.20',
    'Si/Al': '2.32',
    'W/B': '0.30',
    'AC': '10.00',
    'SM': '0',
    'CA': '3.00',
    'IT': '93.00',
    'TT': '99.60',
    'VD': '101325'
}

# 创建输入控件（三列，每列3行）
for i, label in enumerate(input_labels):
    row, col = divmod(i, 3)
    # 添加单位
    full_label = f"{label}{input_units[label]}"
    ttk.Label(input_frame, text=full_label).grid(row=row, column=col*2, padx=7, pady=15, sticky="ew")
    input_vars[label] = tk.StringVar(value=default_values.get(label, ""))
    
    # 使用自定义的Entry创建函数，确保字体样式生效
    entry = create_styled_entry(input_frame, textvariable=input_vars[label], width=13)
    entry.grid(row=row, column=col*2+1, padx=8, pady=30, sticky="ew")

# 创建输出参数外框
output_outer_frame = ttk.Frame(predict_root, padding="0 0 0 0", style="TFrame")
output_outer_frame.grid(row=1, column=2, sticky="nsew", padx=(5, 10))

# 设置输出参数外框标题
output_title_label = ttk.Label(output_outer_frame, text="Output panel", font=('Times New Roman', 18, "bold"))
output_title_label.pack(fill="x", padx=140, pady=10)

# 创建输出参数框架
output_frame = ttk.Frame(output_outer_frame, padding="6 6 6 6", width=180)
output_frame.pack(fill="both", expand=True, padx=5, pady=5)

# 调整输出框架的宽度
output_outer_frame.columnconfigure(0, weight=4)
output_outer_frame.columnconfigure(1, weight=1)

# 输出标签和输入框（一列，一行）
output_labels = ['CS']
output_vars = []

# 创建输出标签和输入框（一列，一行）
for i, label in enumerate(output_labels):
    full_label = f"{label}{output_units[label]}"
    ttk.Label(output_frame, text=full_label).grid(row=i, column=0, padx=50, pady=100, sticky="ew")
    
    # 使用自定义的Entry创建函数，确保字体样式生效
    entry = create_styled_entry(output_frame, width=12)
    entry.grid(row=i, column=1, padx=20, pady=80, sticky="ew")
    output_vars.append(entry)

# 创建控制面板外框
control_panel_frame = ttk.Frame(predict_root, padding="0 0 0 0", style="TFrame")
control_panel_frame.grid(row=2, column=0, columnspan=3, sticky="ew", padx=10, pady=5)

# 创建控制面板外框标题
control_panel_title_label = ttk.Label(control_panel_frame, text="Control panel", font=('Times New Roman', 18, "bold"))
control_panel_title_label.pack(fill="x", padx=590, pady=10)

# 创建控制面板框架
control_panel_inner_frame = ttk.Frame(control_panel_frame, padding="0 0 0 0")
control_panel_inner_frame.pack(fill="x", padx=5, pady=5)

# 创建预测按钮
button_predict = ttk.Button(control_panel_inner_frame, text="Predict", command=update_predict, width=18)
button_predict.pack(side="left", padx=(55, 12), pady=20, anchor="n")

# 创建清除按钮
button_clear = ttk.Button(control_panel_inner_frame, text="Clear", command=clear_entries, width=18)
button_clear.pack(side="left", padx=(55, 12), pady=20, anchor="n")

# 创建保存按钮
button_save = ttk.Button(control_panel_inner_frame, text="Save", command=save_entries, width=18)
button_save.pack(side="left", padx=(55, 12), pady=20, anchor="n")

# 创建返回主菜单按钮
button_main_menu = ttk.Button(control_panel_inner_frame, text="Exit", command=return_main_menu, width=18)
button_main_menu.pack(side="left", padx=(55, 12), pady=20, anchor="n")

# 调试函数，检查样式是否应用
def check_entry_style():
    test_entry = ttk.Entry(predict_root)
    print("TEntry样式选项:", style.configure("TEntry"))
    test_entry.destroy()
    
    test_tk_entry = tk.Entry(predict_root, font=('Times New Roman', 20, 'bold'))
    print("tk.Entry字体:", test_tk_entry.cget('font'))
    test_tk_entry.destroy()

check_entry_style()

predict_root.mainloop()

可用主题: ('winnative', 'clam', 'alt', 'default', 'classic', 'vista', 'xpnative')
TEntry样式选项: {'foreground': 'black', 'font': '{Times New Roman} 20 bold', 'padding': [5], 'relief': 'sunken'}
tk.Entry字体: {Times New Roman} 20 bold
