In [1]:
import pandas as pd
import numpy as np
import json
from typing import List, Dict, Any

print("✅ 库导入完成")

# 读取实验数据
data_file = "1728_BMS_experiments_yield_and_cost.csv"
df = pd.read_csv(data_file)

print(f"�� 数据加载完成，共{len(df)}行数据")
print(f"📋 列名: {list(df.columns)}")
print("\n🔍 数据预览:")
print(df.head())
print("\n📈 数据统计:")
print(df.describe())

✅ 库导入完成
�� 数据加载完成，共1728行数据
📋 列名: ['new_index', 'base', 'ligand', 'solvent', 'concentration', 'temperature', 'yield', 'cost']

🔍 数据预览:
   new_index  base      ligand solvent  concentration  temperature  yield  \
0          0  KOAc   BrettPhos    DMAc            0.1          105   5.47   
1          1  KOAc     PPhtBu2    DMAc            0.1          105   0.00   
2          2  KOAc  tBPh-CPhos    DMAc            0.1          105  78.95   
3          3  KOAc   PCy3 HBF4    DMAc            0.1          105   7.26   
4          4  KOAc        PPh3    DMAc            0.1          105  28.15   

       cost  
0  0.145775  
1  0.043201  
2  0.269140  
3  0.032181  
4  0.026373  

📈 数据统计:
         new_index  concentration  temperature        yield         cost
count  1728.000000    1728.000000  1728.000000  1728.000000  1728.000000
mean    863.500000       0.103333   105.000000    19.374705     0.135069
std     498.974949       0.039274    12.250994    24.603949     0.111972
min       0.000000

In [2]:
# 分析参数列和目标列
parameter_columns = ['base', 'ligand', 'solvent', 'concentration', 'temperature']
objective_columns = ['yield', 'cost']

print("🔍 参数列分析:")
for col in parameter_columns:
    unique_values = df[col].unique()
    print(f"  {col}: {len(unique_values)}个唯一值")
    if len(unique_values) <= 10:
        print(f"    值: {list(unique_values)}")
    else:
        print(f"    值范围: {min(unique_values)} - {max(unique_values)}")

print("\n🎯 目标列分析:")
for col in objective_columns:
    print(f"  {col}: 范围 {df[col].min():.3f} - {df[col].max():.3f}, 均值 {df[col].mean():.3f}")

🔍 参数列分析:
  base: 4个唯一值
    值: ['KOAc', 'KOPiv', 'CsOAc', 'CsOPiv']
  ligand: 12个唯一值
    值范围: BrettPhos - tBPh-CPhos
  solvent: 4个唯一值
    值: ['DMAc', 'BuCN', 'BuOAc', 'p-Xylene']
  concentration: 3个唯一值
    值: [0.1, 0.057, 0.153]
  temperature: 3个唯一值
    值: [105, 90, 120]

🎯 目标列分析:
  yield: 范围 0.000 - 100.000, 均值 19.375
  cost: 范围 0.021 - 0.483, 均值 0.135


In [3]:
import json
import numpy as np

def build_parameter_space(df, parameter_columns):
    """根据数据构建参数空间 - 所有参数都是choice类型"""
    parameter_space = []
    
    for col in parameter_columns:
        unique_values = df[col].unique()
        
        # 转换成原生 Python 类型，避免 np.int64 / np.float64 报错
        converted_values = [v.item() if isinstance(v, (np.generic,)) else v for v in unique_values]
        
        parameter_space.append({
            "name": col,
            "type": "choice",
            "values": converted_values
        })
    
    return parameter_space

# 构建参数空间
parameter_space = build_parameter_space(df, parameter_columns)
print("🔧 构建的参数空间:")
for param in parameter_space:
    print(f"  {param['name']}: {param['type']} - {param['values']}")

print(f"\n📋 参数空间JSON格式:")
print(json.dumps(parameter_space, indent=2, ensure_ascii=False))

🔧 构建的参数空间:
  base: choice - ['KOAc', 'KOPiv', 'CsOAc', 'CsOPiv']
  ligand: choice - ['BrettPhos', 'PPhtBu2', 'tBPh-CPhos', 'PCy3 HBF4', 'PPh3', 'X-Phos', 'P(fur)3', 'PPh2Me', 'GorlosPhos HBF4', 'JackiePhos', 'CgMe-PPh', 'PPhMe2']
  solvent: choice - ['DMAc', 'BuCN', 'BuOAc', 'p-Xylene']
  concentration: choice - [0.1, 0.057, 0.153]
  temperature: choice - [105, 90, 120]

📋 参数空间JSON格式:
[
  {
    "name": "base",
    "type": "choice",
    "values": [
      "KOAc",
      "KOPiv",
      "CsOAc",
      "CsOPiv"
    ]
  },
  {
    "name": "ligand",
    "type": "choice",
    "values": [
      "BrettPhos",
      "PPhtBu2",
      "tBPh-CPhos",
      "PCy3 HBF4",
      "PPh3",
      "X-Phos",
      "P(fur)3",
      "PPh2Me",
      "GorlosPhos HBF4",
      "JackiePhos",
      "CgMe-PPh",
      "PPhMe2"
    ]
  },
  {
    "name": "solvent",
    "type": "choice",
    "values": [
      "DMAc",
      "BuCN",
      "BuOAc",
      "p-Xylene"
    ]
  },
  {
    "name": "concentration",
    "type": "choic

In [4]:
# 定义优化目标
objectives = {
    "yield": {"minimize": False},  # 最大化产率
    "cost": {"minimize": True}     # 最小化成本
}

print("🎯 优化目标:")
for obj, config in objectives.items():
    direction = "最小化" if config["minimize"] else "最大化"
    print(f"  {obj}: {direction}")

# 目标权重（可选）
objective_weights = {
    "yield": 0.7,  # 产率权重70%
    "cost": 0.3    # 成本权重30%
}

print(f"\n⚖️ 目标权重: {objective_weights}")

# 保存配置供后续使用
config = {
    'parameter_space': parameter_space,
    'objectives': objectives,
    'objective_weights': objective_weights,
    'parameter_columns': parameter_columns,
    'objective_columns': objective_columns
}

print("\n✅ 配置已保存，可以进入下一步测试API接口")

🎯 优化目标:
  yield: 最大化
  cost: 最小化

⚖️ 目标权重: {'yield': 0.7, 'cost': 0.3}

✅ 配置已保存，可以进入下一步测试API接口


In [5]:
import requests
import time

# API配置
API_BASE_URL = "http://localhost:3320"
INIT_ENDPOINT = f"{API_BASE_URL}/init"
UPDATE_ENDPOINT = f"{API_BASE_URL}/update"

print("🔧 API配置:")
print(f"   基础URL: {API_BASE_URL}")
print(f"   Init接口: {INIT_ENDPOINT}")
print(f"   Update接口: {UPDATE_ENDPOINT}")

# 测试API连接
def test_api_connection():
    """测试API连接"""
    try:
        response = requests.get(f"{API_BASE_URL}/docs")
        if response.status_code == 200:
            print("✅ API服务器连接正常")
            return True
        else:
            print(f"❌ API服务器响应异常: {response.status_code}")
            return False
    except Exception as e:
        print(f"❌ API连接失败: {str(e)}")
        return False

# 测试连接
api_connected = test_api_connection()
if not api_connected:
    print("⚠️ 请确保API服务器正在运行: python api_parameter_optimizer_v3.py")

🔧 API配置:
   基础URL: http://localhost:3320
   Init接口: http://localhost:3320/init
   Update接口: http://localhost:3320/update
✅ API服务器连接正常


In [6]:
def call_init_api(parameter_space, objectives, batch_size=5, seed=2025):
    """调用init接口初始化优化器"""
    
    # 构建请求数据
    init_request = {
        "parameter_space": parameter_space,
        "objectives": list(objectives.keys()),
        "batch": batch_size,
        "seed": seed,
        "sampling_method": "lhs"  # 使用sobol采样
    }
    
    print(f"🚀 调用init接口，批次大小: {batch_size}")
    print(f"�� 请求数据: {json.dumps(init_request, indent=2, ensure_ascii=False)}")
    
    try:
        response = requests.post(INIT_ENDPOINT, json=init_request)
        
        if response.status_code == 200:
            result = response.json()
            print(f"✅ Init接口调用成功")
            print(f"�� 采样方法: {result['sampling_method']}")
            print(f"📈 生成参数组合数: {len(result['results'])}")
            print(f"💬 消息: {result['message']}")
            return result
        else:
            print(f"❌ Init接口调用失败: {response.status_code}")
            print(f"📄 错误信息: {response.text}")
            return None
            
    except Exception as e:
        print(f"❌ Init接口调用异常: {str(e)}")
        return None

# 调用init接口
init_result = call_init_api(parameter_space, objectives, batch_size=5)

if init_result:
    print("\n�� 生成的参数组合:")
    for i, params in enumerate(init_result['results'], 1):
        print(f"  {i}. {params}")

🚀 调用init接口，批次大小: 5
�� 请求数据: {
  "parameter_space": [
    {
      "name": "base",
      "type": "choice",
      "values": [
        "KOAc",
        "KOPiv",
        "CsOAc",
        "CsOPiv"
      ]
    },
    {
      "name": "ligand",
      "type": "choice",
      "values": [
        "BrettPhos",
        "PPhtBu2",
        "tBPh-CPhos",
        "PCy3 HBF4",
        "PPh3",
        "X-Phos",
        "P(fur)3",
        "PPh2Me",
        "GorlosPhos HBF4",
        "JackiePhos",
        "CgMe-PPh",
        "PPhMe2"
      ]
    },
    {
      "name": "solvent",
      "type": "choice",
      "values": [
        "DMAc",
        "BuCN",
        "BuOAc",
        "p-Xylene"
      ]
    },
    {
      "name": "concentration",
      "type": "choice",
      "values": [
        0.1,
        0.057,
        0.153
      ]
    },
    {
      "name": "temperature",
      "type": "choice",
      "values": [
        105,
        90,
        120
      ]
    }
  ],
  "objectives": [
    "yield",
    "cost"
 

In [7]:
def simulate_experiment_results(params_list, df):
    """从真实数据中查找完全匹配的实验结果"""
    results = []
    
    for params in params_list:
        # 构建精确匹配的查询条件
        query_conditions = []
        for key, value in params.items():
            # 所有参数都进行精确匹配
            if isinstance(value, str):
                query_conditions.append(f"{key} == '{value}'")
            else:
                query_conditions.append(f"{key} == {value}")
        
        # 构建查询字符串
        query_str = " and ".join(query_conditions)
        
        try:
            # 查找完全匹配的数据
            matched_data = df.query(query_str)
            
            if len(matched_data) > 0:
                # 取第一个匹配的结果
                row = matched_data.iloc[0]
                experiment_result = {
                    "parameters": params,
                    "metrics": {
                        "yield": float(row['yield']),
                        "cost": float(row['cost'])
                    }
                }
                results.append(experiment_result)
                print(f"✅ 找到完全匹配: {params} -> yield={row['yield']:.2f}, cost={row['cost']:.3f}")
            else:
                # 没有找到完全匹配，尝试查找最接近的参数组合
                print(f"⚠️ 未找到完全匹配: {params}")
                print(f"   尝试查找最接近的参数组合...")
                
                # 计算每个参数的距离
                best_match = None
                min_distance = float('inf')
                
                for _, row in df.iterrows():
                    distance = 0
                    match = True
                    
                    for key, target_value in params.items():
                        actual_value = row[key]
                        
                        if key in ['base', 'ligand', 'solvent']:
                            # 类别参数必须完全匹配
                            if actual_value != target_value:
                                match = False
                                break
                        else:
                            # 数值参数计算距离
                            distance += abs(actual_value - target_value)
                    
                    if match and distance < min_distance:
                        min_distance = distance
                        best_match = row
                
                if best_match is not None:
                    experiment_result = {
                        "parameters": params,
                        "metrics": {
                            "yield": float(best_match['yield']),
                            "cost": float(best_match['cost'])
                        }
                    }
                    results.append(experiment_result)
                    print(f"✅ 找到最接近匹配: {params} -> yield={best_match['yield']:.2f}, cost={best_match['cost']:.3f}")
                else:
                    print(f"❌ 未找到任何匹配: {params}")
                    # 如果实在找不到，使用随机结果（这种情况应该很少）
                    random_row = df.sample(1).iloc[0]
                    experiment_result = {
                        "parameters": params,
                        "metrics": {
                            "yield": float(random_row['yield']),
                            "cost": float(random_row['cost'])
                        }
                    }
                    results.append(experiment_result)
                    print(f"⚠️ 使用随机结果: {params} -> yield={random_row['yield']:.2f}, cost={random_row['cost']:.3f}")
                
        except Exception as e:
            print(f"❌ 查询失败: {str(e)}")
            # 使用随机结果
            random_row = df.sample(1).iloc[0]
            experiment_result = {
                "parameters": params,
                "metrics": {
                    "yield": float(random_row['yield']),
                    "cost": float(random_row['cost'])
                }
            }
            results.append(experiment_result)
    
    return results

# 模拟第一轮实验结果
if 'init_result' in locals() and init_result:
    print("\n🧪 模拟第一轮实验结果:")
    first_round_results = simulate_experiment_results(init_result['results'], df)
    
    print("\n�� 第一轮实验结果:")
    for i, result in enumerate(first_round_results, 1):
        params = result['parameters']
        metrics = result['metrics']
        print(f"  {i}. 参数: {params}")
        print(f"     结果: yield={metrics['yield']:.2f}, cost={metrics['cost']:.3f}")


🧪 模拟第一轮实验结果:
✅ 找到完全匹配: {'base': 'CsOPiv', 'ligand': 'PPhMe2', 'solvent': 'p-Xylene', 'concentration': 0.1, 'temperature': 105} -> yield=0.00, cost=0.117
✅ 找到完全匹配: {'base': 'KOPiv', 'ligand': 'P(fur)3', 'solvent': 'DMAc', 'concentration': 0.153, 'temperature': 105} -> yield=64.46, cost=0.033
✅ 找到完全匹配: {'base': 'KOAc', 'ligand': 'PPhtBu2', 'solvent': 'p-Xylene', 'concentration': 0.057, 'temperature': 120} -> yield=0.00, cost=0.057
✅ 找到完全匹配: {'base': 'CsOAc', 'ligand': 'JackiePhos', 'solvent': 'BuCN', 'concentration': 0.153, 'temperature': 120} -> yield=27.41, cost=0.409
✅ 找到完全匹配: {'base': 'CsOAc', 'ligand': 'PCy3 HBF4', 'solvent': 'BuCN', 'concentration': 0.1, 'temperature': 90} -> yield=0.00, cost=0.095

�� 第一轮实验结果:
  1. 参数: {'base': 'CsOPiv', 'ligand': 'PPhMe2', 'solvent': 'p-Xylene', 'concentration': 0.1, 'temperature': 105}
     结果: yield=0.00, cost=0.117
  2. 参数: {'base': 'KOPiv', 'ligand': 'P(fur)3', 'solvent': 'DMAc', 'concentration': 0.153, 'temperature': 105}
     结果: yield=64.

In [9]:
def call_update_api(parameter_space, objectives, completed_experiments, batch_size=3, use_weights=False):
    """调用update接口进行贝叶斯优化"""
    
    # 构建请求数据
    update_request = {
        "parameter_space": parameter_space,
        "objectives": objectives,
        "completed_experiments": completed_experiments,
        "batch": batch_size,
        "use_weights": use_weights,
        "objective_weights": objective_weights if use_weights else None,
        "additional_metrics": []
    }
    
    print(f"�� 调用update接口，批次大小: {batch_size}")
    print(f"📊 已完成实验数: {len(completed_experiments)}")
    print(f"⚖️ 使用权重: {use_weights}")
    
    try:
        response = requests.post(UPDATE_ENDPOINT, json=update_request)
        
        if response.status_code == 200:
            result = response.json()
            print(f"✅ Update接口调用成功")
            print(f"📈 推荐参数组合数: {len(result['results'])}")
            print(f"💬 消息: {result['message']}")
            return result
        else:
            print(f"❌ Update接口调用失败: {response.status_code}")
            print(f"📄 错误信息: {response.text}")
            return None
            
    except Exception as e:
        print(f"❌ Update接口调用异常: {str(e)}")
        return None

# 调用update接口
if 'first_round_results' in locals():
    update_result = call_update_api(parameter_space, objectives, first_round_results, batch_size=3)
    
    if update_result:
        print("\n🔍 推荐的下一轮参数组合:")
        for i, params in enumerate(update_result['results'], 1):
            print(f"  {i}. {params}")

�� 调用update接口，批次大小: 3
📊 已完成实验数: 5
⚖️ 使用权重: False
✅ Update接口调用成功
📈 推荐参数组合数: 3
💬 消息: 成功推荐3个参数组合，使用默认配置

🔍 推荐的下一轮参数组合:
  1. {'base': 'KOPiv', 'ligand': 'GorlosPhos HBF4', 'solvent': 'p-Xylene', 'concentration': 0.153, 'temperature': 105}
  2. {'base': 'KOPiv', 'ligand': 'P(fur)3', 'solvent': 'DMAc', 'concentration': 0.153, 'temperature': 120}
  3. {'base': 'KOPiv', 'ligand': 'CgMe-PPh', 'solvent': 'BuOAc', 'concentration': 0.153, 'temperature': 105}


In [10]:
def call_update_api(parameter_space, objectives, completed_experiments, batch_size=3, use_weights=False):
    """调用update接口进行贝叶斯优化"""
    
    # 构建请求数据
    update_request = {
        "parameter_space": parameter_space,
        "objectives": objectives,
        "completed_experiments": completed_experiments,
        "batch": batch_size,
        "use_weights": use_weights,
        "objective_weights": objective_weights if use_weights else None,
        "additional_metrics": []
    }
    
    print(f"�� 调用update接口，批次大小: {batch_size}")
    print(f"📊 已完成实验数: {len(completed_experiments)}")
    print(f"⚖️ 使用权重: {use_weights}")
    
    try:
        response = requests.post(UPDATE_ENDPOINT, json=update_request)
        
        if response.status_code == 200:
            result = response.json()
            print(f"✅ Update接口调用成功")
            print(f"📈 推荐参数组合数: {len(result['results'])}")
            print(f"💬 消息: {result['message']}")
            return result
        else:
            print(f"❌ Update接口调用失败: {response.status_code}")
            print(f"📄 错误信息: {response.text}")
            return None
            
    except Exception as e:
        print(f"❌ Update接口调用异常: {str(e)}")
        return None

# 调用update接口
if 'first_round_results' in locals():
    update_result = call_update_api(parameter_space, objectives, first_round_results, batch_size=3)
    
    if update_result:
        print("\n🔍 推荐的下一轮参数组合:")
        for i, params in enumerate(update_result['results'], 1):
            print(f"  {i}. {params}")

�� 调用update接口，批次大小: 3
📊 已完成实验数: 5
⚖️ 使用权重: False
✅ Update接口调用成功
📈 推荐参数组合数: 3
💬 消息: 成功推荐3个参数组合，使用默认配置

🔍 推荐的下一轮参数组合:
  1. {'base': 'KOPiv', 'ligand': 'GorlosPhos HBF4', 'solvent': 'p-Xylene', 'concentration': 0.153, 'temperature': 105}
  2. {'base': 'KOPiv', 'ligand': 'P(fur)3', 'solvent': 'DMAc', 'concentration': 0.153, 'temperature': 120}
  3. {'base': 'KOPiv', 'ligand': 'CgMe-PPh', 'solvent': 'BuOAc', 'concentration': 0.153, 'temperature': 105}


In [None]:
def run_optimization_loop(parameter_space, objectives, df, n_rounds=8, init_batch=10, update_batch=5):
    """运行多轮优化循环"""
    
    all_experiments = []
    optimization_history = []
    
    print(f"🔄 开始{n_rounds}轮优化循环")
    print(f"📊 初始批次: {init_batch}, 更新批次: {update_batch}")
    
    # 第一轮：初始化
    print("\n=== 第1轮：初始化 ===")
    init_result = call_init_api(parameter_space, objectives, batch_size=init_batch,seed=42)
    
    if not init_result:
        print("❌ 初始化失败，退出优化循环")
        return None, None
    
    # 模拟第一轮实验结果
    round_results = simulate_experiment_results(init_result['results'], df)
    all_experiments.extend(round_results)
    
    # 记录历史
    optimization_history.append({
        'round': 1,
        'type': 'init',
        'parameters': init_result['results'],
        'results': round_results,
        'best_yield': max([r['metrics']['yield'] for r in round_results]),
        'best_cost': min([r['metrics']['cost'] for r in round_results])
    })
    
    print(f"📈 第1轮最佳结果: yield={optimization_history[-1]['best_yield']:.2f}, cost={optimization_history[-1]['best_cost']:.3f}")
    
    # 后续轮次：贝叶斯优化
    for round_num in range(2, n_rounds + 1):
        print(f"\n=== 第{round_num}轮：贝叶斯优化 ===")
        
        # 调用update接口
        update_result = call_update_api(parameter_space, objectives, all_experiments, batch_size=update_batch)
        
        if not update_result:
            print(f"❌ 第{round_num}轮更新失败，退出优化循环")
            break
        
        # 模拟实验结果
        round_results = simulate_experiment_results(update_result['results'], df)
        all_experiments.extend(round_results)
        
        # 记录历史
        optimization_history.append({
            'round': round_num,
            'type': 'update',
            'parameters': update_result['results'],
            'results': round_results,
            'best_yield': max([r['metrics']['yield'] for r in round_results]),
            'best_cost': min([r['metrics']['cost'] for r in round_results])
        })
        
        print(f"�� 第{round_num}轮最佳结果: yield={optimization_history[-1]['best_yield']:.2f}, cost={optimization_history[-1]['best_cost']:.3f}")
        
        # 添加延迟，避免请求过快
        time.sleep(1)
    
    return all_experiments, optimization_history

# 运行优化循环
all_experiments, optimization_history = run_optimization_loop(parameter_space, objectives, df)

🔄 开始8轮优化循环
📊 初始批次: 10, 更新批次: 5

=== 第1轮：初始化 ===
🚀 调用init接口，批次大小: 10
�� 请求数据: {
  "parameter_space": [
    {
      "name": "base",
      "type": "choice",
      "values": [
        "KOAc",
        "KOPiv",
        "CsOAc",
        "CsOPiv"
      ]
    },
    {
      "name": "ligand",
      "type": "choice",
      "values": [
        "BrettPhos",
        "PPhtBu2",
        "tBPh-CPhos",
        "PCy3 HBF4",
        "PPh3",
        "X-Phos",
        "P(fur)3",
        "PPh2Me",
        "GorlosPhos HBF4",
        "JackiePhos",
        "CgMe-PPh",
        "PPhMe2"
      ]
    },
    {
      "name": "solvent",
      "type": "choice",
      "values": [
        "DMAc",
        "BuCN",
        "BuOAc",
        "p-Xylene"
      ]
    },
    {
      "name": "concentration",
      "type": "choice",
      "values": [
        0.1,
        0.057,
        0.153
      ]
    },
    {
      "name": "temperature",
      "type": "choice",
      "values": [
        105,
        90,
        120
      ]
    }

In [12]:
# 将优化历史结果保存到CSV文件
def save_optimization_history_to_csv(optimization_history, filename="optimization_results.csv"):
    """将优化历史结果保存到CSV文件"""
    
    if not optimization_history:
        print("❌ 没有优化历史数据可保存")
        return
    
    # 准备数据
    csv_data = []
    
    for round_data in optimization_history:
        round_num = round_data['round']
        round_type = round_data['type']
        best_yield = round_data['best_yield']
        best_cost = round_data['best_cost']
        
        # 为每个参数组合创建一行
        for i, (params, result) in enumerate(zip(round_data['parameters'], round_data['results'])):
            row = {
                'round': round_num,
                'type': round_type,
                'experiment_id': f"R{round_num}_E{i+1}",
                'base': params['base'],
                'ligand': params['ligand'],
                'solvent': params['solvent'],
                'concentration': params['concentration'],
                'temperature': params['temperature'],
                'yield': result['metrics']['yield'],
                'cost': result['metrics']['cost'],
                'best_yield_in_round': best_yield,
                'best_cost_in_round': best_cost
            }
            csv_data.append(row)
    
    # 创建DataFrame并保存
    df_results = pd.DataFrame(csv_data)
    
    # 保存到CSV
    df_results.to_csv(filename, index=False, encoding='utf-8')
    
    print(f"✅ 优化结果已保存到: {filename}")
    print(f"📊 共保存 {len(df_results)} 个实验结果")
    print(f"🔄 包含 {len(optimization_history)} 轮优化")
    
    # 显示数据预览
    print("\n📋 数据预览:")
    print(df_results.head())
    
    # 显示统计信息
    print("\n📈 统计信息:")
    print(f"  总实验数: {len(df_results)}")
    print(f"  总轮次: {df_results['round'].nunique()}")
    print(f"  最佳产率: {df_results['yield'].max():.2f}")
    print(f"  最佳成本: {df_results['cost'].min():.3f}")
    print(f"  平均产率: {df_results['yield'].mean():.2f}")
    print(f"  平均成本: {df_results['cost'].mean():.3f}")
    
    return df_results

# 保存优化结果
if 'optimization_history' in locals() and optimization_history:
    print("💾 保存优化历史结果到CSV文件...")
    results_df = save_optimization_history_to_csv(optimization_history, "bayesian_optimization_results.csv")
    
    # 额外保存汇总数据
    summary_data = []
    for round_data in optimization_history:
        summary_row = {
            'round': round_data['round'],
            'type': round_data['type'],
            'experiments_count': len(round_data['parameters']),
            'best_yield': round_data['best_yield'],
            'best_cost': round_data['best_cost'],
            'avg_yield': np.mean([r['metrics']['yield'] for r in round_data['results']]),
            'avg_cost': np.mean([r['metrics']['cost'] for r in round_data['results']])
        }
        summary_data.append(summary_row)
    
    summary_df = pd.DataFrame(summary_data)
    summary_df.to_csv("optimization_summary.csv", index=False, encoding='utf-8')
    print("✅ 优化汇总数据已保存到: optimization_summary.csv")
    
    print("\n�� 生成的文件:")
    print("  - bayesian_optimization_results.csv: 详细实验结果")
    print("  - optimization_summary.csv: 轮次汇总数据")
    
else:
    print("❌ 没有找到optimization_history数据，请先运行优化循环")

💾 保存优化历史结果到CSV文件...
✅ 优化结果已保存到: bayesian_optimization_results.csv
📊 共保存 49 个实验结果
🔄 包含 40 轮优化

📋 数据预览:
   round  type experiment_id    base      ligand   solvent  concentration  \
0      1  init         R1_E1    KOAc      PPh2Me      DMAc          0.057   
1      1  init         R1_E2    KOAc   BrettPhos      DMAc          0.153   
2      1  init         R1_E3   KOPiv  tBPh-CPhos  p-Xylene          0.153   
3      1  init         R1_E4   CsOAc    CgMe-PPh     BuOAc          0.100   
4      1  init         R1_E5  CsOPiv        PPh3      DMAc          0.153   

   temperature  yield      cost  best_yield_in_round  best_cost_in_round  
0          105  14.36  0.037519                 58.4            0.037519  
1          105   5.49  0.141125                 58.4            0.037519  
2           90  24.02  0.264828                 58.4            0.037519  
3          120  57.84  0.115306                 58.4            0.037519  
4          120  58.40  0.108183                 58.4        

In [13]:
results_df

Unnamed: 0,round,type,experiment_id,base,ligand,solvent,concentration,temperature,yield,cost,best_yield_in_round,best_cost_in_round
0,1,init,R1_E1,KOAc,PPh2Me,DMAc,0.057,105,14.36,0.037519,58.4,0.037519
1,1,init,R1_E2,KOAc,BrettPhos,DMAc,0.153,105,5.49,0.141125,58.4,0.037519
2,1,init,R1_E3,KOPiv,tBPh-CPhos,p-Xylene,0.153,90,24.02,0.264828,58.4,0.037519
3,1,init,R1_E4,CsOAc,CgMe-PPh,BuOAc,0.1,120,57.84,0.115306,58.4,0.037519
4,1,init,R1_E5,CsOPiv,PPh3,DMAc,0.153,120,58.4,0.108183,58.4,0.037519
5,1,init,R1_E6,KOAc,X-Phos,BuCN,0.1,90,26.5,0.052515,58.4,0.037519
6,1,init,R1_E7,KOPiv,PPhtBu2,BuCN,0.057,90,0.0,0.082787,58.4,0.037519
7,1,init,R1_E8,CsOPiv,PPhMe2,p-Xylene,0.153,105,0.0,0.111903,58.4,0.037519
8,1,init,R1_E9,CsOPiv,P(fur)3,BuOAc,0.1,105,14.51,0.136764,58.4,0.037519
9,1,init,R1_E10,CsOAc,JackiePhos,BuOAc,0.1,120,4.06,0.413251,58.4,0.037519
