In [1]:
import numpy as np
import dill
import torch
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split

# 加载训练集和测试集数据
with open('train.pkl', 'rb') as f:
    train_data = dill.load(f)

with open('test.pkl', 'rb') as f:
    test_data = dill.load(f)

train_packed_fps = train_data['packed_fp']
train_values = train_data['values']

test_packed_fps = test_data['packed_fp']
test_values = test_data['values']

# 数据降采样
# sample_ratio = 0.5
# train_packed_fps, _, train_values, _ = train_test_split(train_packed_fps, train_values, test_size=sample_ratio, random_state=42)

train_fingerprints = []
for i in range(len(train_packed_fps)):
    unpacked_fp_i = np.unpackbits(train_packed_fps[i]).astype(np.float32)
    train_fingerprints.append(unpacked_fp_i)

train_fingerprints = np.array(train_fingerprints)
train_values = np.array(train_values, dtype=np.float32)
train_values = train_values.ravel()  # 将train_values转换为一维数组

# 训练 MLP 模型
model_mlp = MLPRegressor(hidden_layer_sizes=(50, 50))  # 可根据需要调整隐藏层的大小和层数
model_mlp.fit(train_fingerprints, train_values)

# 使用 MLP 模型预测测试集数据
test_fingerprints = []
for i in range(len(test_packed_fps)):
    unpacked_fp_i = np.unpackbits(test_packed_fps[i]).astype(np.float32)
    test_fingerprints.append(unpacked_fp_i)

test_fingerprints = np.array(test_fingerprints)

test_values = np.array(test_values, dtype=np.float32)  # 将test_values转换为NumPy数组
test_predictions_mlp = model_mlp.predict(test_fingerprints)

# 使用方程 3.1 预测多个分子的成本
def predict_multiple_molecules(molecules, model):
    costs = []
    for molecule in molecules:
        unpacked_fp = np.unpackbits(molecule).astype(np.float32)
        cost = model.predict([unpacked_fp])[0]
        costs.append(cost)
    return costs

# 生成一些测试分子
test_molecules = [test_packed_fps[0], test_packed_fps[1], test_packed_fps[2]]  # 根据需求替换为相应的测试分子

# 使用方程 3.1 预测多个分子的成本
predicted_costs_mlp = predict_multiple_molecules(test_molecules, model_mlp)

# 求和预测的成本
total_cost_eq_3_1 = sum(predicted_costs_mlp)
print("使用方程 3.1 的总成本:", total_cost_eq_3_1)


# 使用方程 3.2 预测多个分子的成本
def predict_total_cost(molecules, model):
    fingerprints = []
    for molecule in molecules:
        unpacked_fp = np.unpackbits(molecule).astype(np.float32)
        fingerprints.append(unpacked_fp)
    fingerprints = np.array(fingerprints)
    costs = model.predict(fingerprints)
    total_cost = costs.sum()
    return total_cost

# 使用方程 3.2 预测多个分子的成本
total_cost_eq_3_2 = predict_total_cost(test_molecules, model_mlp)
print("使用方程 3.2 的总成本:", total_cost_eq_3_2)




使用方程 3.1 的总成本: 1.918793797492981
使用方程 3.2 的总成本: 1.9187933
