In [9]:
import os   # 文件系统操作
import pickle  # 模块 Python 标准库
import pandas as pd  #  表格数据 处理
import numpy as np  # 模块 科学计算库   数组计算

import networkx as nx # 复杂网络分析库（图数据）

import torch
import torch.nn.functional as F # PyTorch 的函数（如激活函数）
from torch import nn           # PyTorch 的神经网络层（类）
from torch.utils.data import Dataset, DataLoader, random_split # 数据加载类

from torch_geometric.data import Data, DataLoader as PyGDataLoader # PyTorch Geometric（图神经网络库），用于 GNN 计算
from torch_geometric.utils import from_networkx # 函数，用于从 networkx 转换为 torch_geometric 格式
from torch_geometric.nn import GCNConv, GINEConv, global_mean_pool, BatchNorm # 图神经网络层的类 
from torch_geometric.nn.conv.gcn_conv import gcn_norm # 函数，用于对 GCN 进行归一化处理

from sklearn.metrics import r2_score, mean_squared_error



In [2]:
import warnings
warnings.filterwarnings('ignore')


### 常见特殊方法

- 实例方法（Instance Method）：定义在类中的函数，属于某个对象（实例）

1.必须定义在类中

2.必须接受 self（指向当前实例）

3.用于实例的索引访问

特殊方法	作用
- __init__	构造函数，创建对象时调用
- __str__	定义 print(obj) 时的字符串表示
- __repr__	定义 repr(obj) 时的字符串表示

__len__	让对象支持 len(obj)

__getitem__	让对象支持索引访问 obj[key]

__setitem__	让对象支持索引赋值 obj[key] = value

__delitem__	让对象支持 del obj[key]

__iter__	让对象支持 for item in obj 迭代

__contains__	让对象支持 in 关键字，如 item in obj

__call__	让对象像函数一样调用，如 obj()

__eq__	定义 == 比较

__add__	让对象支持 + 运算符

### ---


---
### 1. Standard training set

In [None]:
from sklearn.preprocessing import StandardScaler # 类，用于标准化数据（均值为 0，标准差为 1）
import torch
from torch.utils.data import Dataset

class MolDataset(Dataset):
    def __init__(self, raw_dataframe, nx_graph_dict, *, component_col: str, global_state_cols: list[str], label_col: str, transform=None):
        self.raw_dataframe = raw_dataframe
        self.nx_graph_dict = nx_graph_dict
        self.component_col = [component_col] if isinstance(component_col, str) else component_col
        self.global_state_cols = global_state_cols
        self.label_col = [label_col] if isinstance(label_col, str) else label_col
        self.transform = transform
        
        required_cols = set(self.global_state_cols + self.label_col + self.component_col)
        for col in required_cols:
            if col not in self.raw_dataframe.columns:
                raise ValueError(f"Missing column in DataFrame: '{col}'")

        # Create uninitialized scalers
        self.node_scaler = StandardScaler()
        self.edge_scaler = StandardScaler()
        self.env_scaler = StandardScaler()

    def fit_standardizers(self, train_indices):
        """ Fit scalers only using training data """
        node_features, edge_features, env_features = [], [], []

        for idx in train_indices:
            row = self.raw_dataframe.iloc[idx]
            component_name = row[self.component_col[0]]
            pyg_data = self.nx_graph_dict[component_name]

            if pyg_data.x is not None:
                node_features.append(pyg_data.x.numpy())
            if pyg_data.edge_attr is not None:
                edge_features.append(pyg_data.edge_attr.numpy())
            env_features.append(row[self.global_state_cols].values.astype(float))

        # Fit scalers only on training data
        if node_features:
            all_node_features = np.vstack(node_features)
            self.node_scaler.fit(all_node_features)

        if edge_features:
            all_edge_features = np.vstack(edge_features)
            self.edge_scaler.fit(all_edge_features)

        all_env_features = np.vstack(env_features)
        self.env_scaler.fit(all_env_features)

    def __getitem__(self, idx):
        row = self.raw_dataframe.iloc[idx]
        component_name = row[self.component_col[0]]
        pyg_data = self.nx_graph_dict[component_name]

        # Standardize node features
        if pyg_data.x is not None:
            pyg_data.x = torch.tensor(self.node_scaler.transform(pyg_data.x.numpy()), dtype=torch.float)

        # Standardize edge features
        if pyg_data.edge_attr is not None:
            pyg_data.edge_attr = torch.tensor(self.edge_scaler.transform(pyg_data.edge_attr.numpy()), dtype=torch.float)

        # Standardize environmental data
        externals = row[self.global_state_cols].values.astype(float)
        externals = torch.tensor(self.env_scaler.transform([externals])[0], dtype=torch.float).unsqueeze(0)
        pyg_data.externals = externals  

        # Prepare label
        label = torch.tensor([row[self.label_col][0]], dtype=torch.float)
        pyg_data.y = label  

        if self.transform:
            pyg_data = self.transform(pyg_data)

        return pyg_data




In [22]:

def networkx_to_pyg(nx_graph):
    """
    Convert a networkx graph to a torch_geometric.data.Data object.
    This is a basic template; adjust for your actual node/edge features.
    """
    # Sort nodes to ensure consistent ordering
    # e.g. node 0, node 1, ...
    # In some networkx graphs, node labels might be strings. We’ll map them to integers.
    # 字典推导式（dictionary comprehension）
    node_mapping = {node: i for i, node in enumerate(nx_graph.nodes())} 

    # 节点映射
    # 由于 networkx 允许字符串节点名称，但 PyG 需要整数索引，因此创建 node_mapping 确保一致性。

    # Build lists for PyG
    x_list = []
    edge_index_list = []
    edge_attr_list = []

    for node in nx_graph.nodes(data=True): # 遍历 networkx 图中的所有节点及其属性
        # original_id = node[0]
        attrs = node[1]
        # Example: 'symbol' might be in attrs, etc.
        # For demonstration, let's store only "symbol" as a simple categorical embedding
        # You might do something more sophisticated (e.g., one-hot) for real usage
        symbol = attrs.get("symbol", "C")  # 获取 symbol 属性，若不存在则默认 "C"。

        symbol_id = 0 if symbol == "C" else 1 if symbol == "H" else 2

        # 将原子符号（如 "C", "H"）转换为整数 ID
        
        x_list.append([symbol_id]) # 构建节点特征矩阵

    for u, v, edge_attrs in nx_graph.edges(data=True): # 遍历边
        u_idx = node_mapping[u] # 使用 node_mapping 将节点名称转换为整数索引
        v_idx = node_mapping[v]
        edge_index_list.append((u_idx, v_idx)) #tuple
        # Possibly store bond features: "bond_index", "bde_pred", etc.
        bde_pred = edge_attrs.get("bde_pred", 0.0)
        if bde_pred is None:
            bde_pred = 0.0
        bdfe_pred = edge_attrs.get("bdfe_pred", 0.0)
        if bdfe_pred is None:
            bdfe_pred = 0.0
        edge_attr_list.append([bde_pred, bdfe_pred])
    
    # Convert to torch tensors
    x = torch.tensor(x_list, dtype=torch.float)  # shape [num_nodes, num_node_features]
    edge_index = torch.tensor(edge_index_list, dtype=torch.long).t().contiguous()  # shape [2, num_edges]
    edge_attr = torch.tensor(edge_attr_list, dtype=torch.float)  # shape [num_edges, edge_feat_dim]

    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
    return data


nn.Sequential(...)：
将多个层串联，形成一个前馈神经网络（MLP）。
nn.Linear(edge_in_dim, hidden_dim)：
线性变换，将 edge_in_dim 维的边特征转换为 hidden_dim 维度。
nn.ReLU()：
激活函数，引入非线性。
nn.Linear(hidden_dim, hidden_dim)：

GINEConv 是 Graph Isomorphism Network with Edge Features (GINE) 的卷积层，用于处理带边特征的图。
nn 是 GINEConv 内部的神经网络（MLP），用于消息传递（Message Passing） 和特征更新。
net 是你自己定义的 神经网络（MLP），作为 nn 传入 GINEConv，用于节点特征的转换。

1. 层数收敛
（64 -32 -16）

2. optuna生成梯形层（其他GNN结构）
3. 图形GUM

In [10]:
class GINE_Regression(nn.Module):
    def __init__(self,
                 node_in_dim: int,
                 edge_in_dim: int,
                 external_in_dim: int,
                 hidden_dim: int = 128, # 32 64
                 num_layers: int = 3, # 5 4
                 dropout: float = 0.1): # 0.2 0.3
        """
        A more 'realistic' GNN for regression, using GINEConv layers + edge attributes.
        
        Args:
            node_in_dim (int): Dim of node features (e.g. 1 or 3).
            edge_in_dim (int): Dim of edge features (e.g. 2 for [bde_pred, bdfe_pred]).
            external_in_dim (int): Dim of external factor features (e.g. 6).
            hidden_dim (int): Hidden embedding size for GNN layers.
            num_layers (int): Number of GNN layers.
            dropout (float): Dropout probability.
        """
        super().__init__() 

        # 调用 nn.Module 的 __init__()，确保模型参数被 PyTorch 正确管理

        # nn.Sequential 将多个层串联
        
        # A learnable linear transform for edge features (required by GINEConv's "nn" argument):
        # Typically GINEConv uses a small MLP to incorporate edge_attr into the message.
        self.edge_encoder = nn.Sequential(
            nn.Linear(edge_in_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim)
        )
        
        # A learnable linear transform for node features:
        self.node_encoder = nn.Linear(node_in_dim, hidden_dim) 

        # 线性变换，将 node_in_dim 维的节点特征转换为 hidden_dim 维
        
        # Create multiple GINEConv layers
        self.convs = nn.ModuleList()
        self.bns = nn.ModuleList()

        # 用于存储多个 GINEConv 层和 BatchNorm 层，使模型可以迭代访问这些层。
        
        for _ in range(num_layers): # 循环创建 num_layers 层 GINEConv
            # GINEConv requires an MLP for node update:
            # We'll use a simple 2-layer 
            # MLP GINEConv 需要 MLP 作为 nn 参数，这里创建一个两层的 MLP
            net = nn.Sequential(
                nn.Linear(hidden_dim, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            )
            conv = GINEConv(nn=net)
            self.convs.append(conv)
            self.bns.append(BatchNorm(hidden_dim))  # batch norm for stability

        self.dropout = nn.Dropout(p=dropout) # 防止过拟合，随机丢弃 dropout 比例的神经元。

        # An MLP to process external factors
        self.externals_mlp = nn.Sequential(
            nn.Linear(external_in_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(hidden_dim, hidden_dim)
        )

        # Final regression MLP after pooling + external embedding
        # 最终将 graph_emb + ext_emb 组合后进行回归预测。
        self.final_regressor = nn.Sequential(
            nn.Linear(hidden_dim + hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, data):
        """
        Args:
            data: PyG Data object, expected fields:
                - x: Node features [num_nodes, node_in_dim]
                - edge_index: [2, num_edges]
                - edge_attr: [num_edges, edge_in_dim]
                - batch: [num_nodes] mapping each node to a graph ID
                - externals: [batch_size, external_in_dim]
        Returns:
            A tensor of shape [batch_size], the predicted regression value.
        """
        x, edge_index, edge_attr, batch = data.x, data.edge_index, data.edge_attr, data.batch
        
        # 1) Encode node features and edge features
        x = self.node_encoder(x)                 # [num_nodes, hidden_dim]
        edge_emb = self.edge_encoder(edge_attr)  # [num_edges, hidden_dim]
        
        # 2) Pass through multiple GINEConv layers 
        #    进行 BatchNorm + ReLU + Dropout
        for conv, bn in zip(self.convs, self.bns):
            x = conv(x, edge_index, edge_emb)
            x = bn(x)
            x = F.relu(x)
            x = self.dropout(x)

        # 3) Global pooling to get graph embedding
        graph_emb = global_mean_pool(x, batch)  # [batch_size, hidden_dim]

        # 4) Process external factors
        ext_emb = self.externals_mlp(data.externals)  # [batch_size, hidden_dim]

        # 5) Combine + final regression
        combined = torch.cat([graph_emb, ext_emb], dim=-1)  # [batch_size, hidden_dim * 2]
        out = self.final_regressor(combined).squeeze(-1)    # [batch_size]
        return out


batch_data 是 PyG 的 Data 对象，包含：
batch_data.x：节点特征。
batch_data.edge_index：边索引。
batch_data.edge_attr：边特征。
batch_data.y：标签（回归目标）。
batch_data.num_graphs：该 batch 中的图数量。

In [5]:
def train_one_epoch(model, loader, optimizer, criterion, device): #参数
    # 前向传播 ➝ 计算损失 ➝ 反向传播 ➝ 更新模型参数
    model.train() # PyTorch nn.Module 的方法，将模型设置为训练模式 
    total_loss = 0.0 # 初始化损失值
    count = 0
    for batch_data in loader: # 遍历 loader 里的 每个 batch
        batch_data = batch_data.to(device)
        optimizer.zero_grad() # 梯度清零
        preds = model(batch_data)               # [batch_size] 前
        y = batch_data.y.to(device).view(-1)    # [batch_size] label
        loss = criterion(preds, y)   
        loss.backward() # 反向传播
        optimizer.step() # 更新
        total_loss += loss.item() * batch_data.num_graphs
        count += batch_data.num_graphs
    return total_loss / count if count > 0 else 0.0

def validate(model, loader, criterion, device): # 禁用 Dropout/BatchNorm
    model.eval()
    total_loss = 0.0
    count = 0
    with torch.no_grad():
        for batch_data in loader:
            batch_data = batch_data.to(device) 
            preds = model(batch_data) # 前向传播，但不计算梯度
            y = batch_data.y.to(device).view(-1)
            loss = criterion(preds, y)
            total_loss += loss.item() * batch_data.num_graphs
            count += batch_data.num_graphs
    return total_loss / count if count > 0 else 0.0


In [7]:


def evaluate_model(model, loader, device):
    """
    Evaluate the model on a dataset loader and compute R² and RMSE.

    Args:
        model (nn.Module): The trained GNN model.
        loader (DataLoader): The PyG DataLoader for the evaluation dataset.
        device (torch.device): The device to run on.
    
    Returns:
        r2 (float): Coefficient of determination.
        rmse (float): Root Mean Squared Error.
    """
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            preds = model(batch)
            y_true.append(batch.y.cpu())
            y_pred.append(preds.cpu())

    # If your labels are stored as tensors with an extra dimension, use .squeeze() if needed.
    y_true = torch.cat(y_true).numpy().squeeze()
    y_pred = torch.cat(y_pred).numpy().squeeze()

    r2 = r2_score(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))

    print(f"R²: {r2:.4f}")
    print(f"RMSE: {rmse:.4f}")

    return r2, rmse


In [16]:
env_file = r"F:\2025 energing\PYTHON\GNN_chemicalENV-main\GNN molecules\graph_pickles\dataset02.xlsx"

data = pd.read_excel(env_file, engine='openpyxl').dropna(subset=['degradation_rate'])
data['seawater'] = data['seawater'].map({'art': 1, 'sea': 0})

In [17]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1023 entries, 0 to 1039
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   data number       1023 non-null   float64
 1   temperature       1023 non-null   float64
 2   seawater          1023 non-null   int64  
 3   concentration     1023 non-null   int64  
 4   time              1023 non-null   int64  
 5   component         1023 non-null   object 
 6   BDE               1023 non-null   float64
 7   BDFE              1023 non-null   float64
 8   energy            1023 non-null   float64
 9   degradation_rate  1023 non-null   float64
dtypes: float64(6), int64(3), object(1)
memory usage: 87.9+ KB


In [18]:
folder_path = r"F:\2025 energing\PYTHON\GNN_chemicalENV-main\GNN molecules\graph_pickles\molecules"
graph_pickles = [f for f in os.listdir(folder_path) if f.endswith(".pkl")]

#graph_pickles = [f for f in os.listdir('./molecules') if f.endswith('.pkl')]

In [19]:
import os

base_dir = r"F:\2025 energing\PYTHON\GNN_chemicalENV-main\GNN molecules\graph_pickles\molecules"

if os.path.exists(base_dir):
    print("Directory exists:", base_dir)
    print("Files in directory:", os.listdir(base_dir))
else:
    print(f"Error: Directory {base_dir} does not exist!")

Directory exists: F:\2025 energing\PYTHON\GNN_chemicalENV-main\GNN molecules\graph_pickles\molecules
Files in directory: ['gpickle_graph_0.pkl', 'gpickle_graph_1.pkl', 'gpickle_graph_10.pkl', 'gpickle_graph_11.pkl', 'gpickle_graph_12.pkl', 'gpickle_graph_13.pkl', 'gpickle_graph_14.pkl', 'gpickle_graph_15.pkl', 'gpickle_graph_16.pkl', 'gpickle_graph_17.pkl', 'gpickle_graph_18.pkl', 'gpickle_graph_19.pkl', 'gpickle_graph_2.pkl', 'gpickle_graph_3.pkl', 'gpickle_graph_4.pkl', 'gpickle_graph_5.pkl', 'gpickle_graph_6.pkl', 'gpickle_graph_7.pkl', 'gpickle_graph_8.pkl', 'gpickle_graph_9.pkl']


In [23]:
compounds = data.component.unique()
graphs_dict = {}

for compound, graph_pickle in zip(compounds, graph_pickles):
    #with open(f'./molecules/{graph_pickle}', 'rb') as f:
    with open(os.path.join(base_dir, graph_pickle), 'rb') as f:

        graph = pickle.load(f)
        graphs_dict[compound] = networkx_to_pyg(graph)


In [24]:
from torch_geometric.loader import DataLoader as PyGDataLoader

# train set st.
dataset = MolDataset(raw_dataframe=data, nx_graph_dict=graphs_dict, component_col="component", global_state_cols=["temperature", "concentration", "time", "seawater"], label_col="degradation_rate")

# Split dataset into training and testing
train_size = int(0.8 * len(data))
test_size = len(data) - train_size
indices = list(range(len(data)))
train_indices, test_indices = random_split(indices, [train_size, test_size])
# random_split() 需要的数据类型是 Dataset 对象

dataset.fit_standardizers(train_indices)

train_dataset = torch.utils.data.Subset(dataset, train_indices)
test_dataset = torch.utils.data.Subset(dataset, test_indices)

train_loader = PyGDataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader   = PyGDataLoader(test_dataset, batch_size=16, shuffle=False)


---
---

In [33]:
def __len__(self):
    return self.raw_dataframe.shape[0]


In [35]:
dataset = MolDataset(raw_dataframe=data, nx_graph_dict=graphs_dict, 
                     component_col="component", 
                     global_state_cols=["temperature", "concentration", "time", "seawater"], 
                     label_col="degradation_rate")

print("Dataset length:", len(dataset))


Dataset length: 1023


In [37]:
def fit_standardizers(self, train_indices):
    """ Fit scalers only using training data """
    node_features, edge_features, env_features = [], [], []

    for idx in train_indices:
        row = self.raw_dataframe.iloc[idx]
        component_name = row[self.component_col[0]]
        pyg_data = self.nx_graph_dict[component_name]

        # Clean and collect node features
        if pyg_data.x is not None:
            # Convert to numpy, replace NaN/Inf with finite numbers
            node_feats = np.nan_to_num(pyg_data.x.numpy(), nan=0.0, posinf=1e6, neginf=-1e6)
            node_features.append(node_feats)

        # Clean and collect edge features
        if pyg_data.edge_attr is not None:
            edge_feats = np.nan_to_num(pyg_data.edge_attr.numpy(), nan=0.0, posinf=1e6, neginf=-1e6)
            edge_features.append(edge_feats)

        # Collect environmental features (if needed, clean them too)
        env = row[self.global_state_cols].values.astype(float)
        env = np.nan_to_num(env, nan=0.0, posinf=1e6, neginf=-1e6)
        env_features.append(env)

    # Fit scalers only on training data
    if node_features:
        all_node_features = np.vstack(node_features)
        self.node_scaler.fit(all_node_features)

    if edge_features:
        all_edge_features = np.vstack(edge_features)
        self.edge_scaler.fit(all_edge_features)

    all_env_features = np.vstack(env_features)
    self.env_scaler.fit(all_env_features)


In [38]:
from sklearn.model_selection import KFold

# 3) k-Fold Cross-Validation Setup
# ---------------------------------------------------
k_folds = 5
kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

all_r2_scores = []
all_rmse_scores = []

for fold, (train_indices, test_indices) in enumerate(kf.split(range(len(dataset)))):
    print(f"\n=== Fold {fold+1}/{k_folds} ===")

    # 3.1) Fit scalers on training fold only
    dataset.fit_standardizers(train_indices)

    # 3.2) Create subset objects
    train_subset = Subset(dataset, train_indices)
    test_subset  = Subset(dataset, test_indices)

    # 3.3) Create DataLoaders
    train_loader = PyGDataLoader(train_subset, batch_size=16, shuffle=True)
    test_loader  = PyGDataLoader(test_subset, batch_size=16, shuffle=False)

    # 3.4) Instantiate a new model for each fold
    model = GINE_Regression(
        node_in_dim=1,
        edge_in_dim=2,
        external_in_dim=4,
        hidden_dim=16,
        num_layers=5,
        dropout=0.0
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
    criterion = nn.MSELoss()

    # 3.5) Train for some epochs
    num_epochs = 50
    for epoch in range(1, num_epochs+1):
        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
        val_loss   = validate(model, test_loader, criterion, device)
        if epoch % 10 == 0:
            print(f"[Epoch {epoch}] train_loss: {train_loss:.4f}, val_loss: {val_loss:.4f}")

    # 3.6) Evaluate on test fold
    r2, rmse = evaluate_model(model, test_loader, device)
    print(f"Fold {fold+1} -> R²: {r2:.4f}, RMSE: {rmse:.4f}")

    all_r2_scores.append(r2)
    all_rmse_scores.append(rmse)

# ---------------------------------------------------
# 4) Aggregate and Print Final Results
# ---------------------------------------------------
mean_r2  = np.mean(all_r2_scores)
std_r2   = np.std(all_r2_scores)
mean_rmse = np.mean(all_rmse_scores)
std_rmse  = np.std(all_rmse_scores)

print("\n=== Cross-Validation Results ===")
print(f"R²:   {mean_r2:.4f} ± {std_r2:.4f}")
print(f"RMSE: {mean_rmse:.4f} ± {std_rmse:.4f}")


=== Fold 1/5 ===


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


ValueError: Input X contains infinity or a value too large for dtype('float32').

---

In [12]:

# -----------------------------------
# 2) Instantiate model + optimizer
# -----------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [13]:
model = GINE_Regression(
    node_in_dim=1,
    edge_in_dim=2,
    external_in_dim=4,
    hidden_dim=16, # 32 64
    num_layers=5,
    dropout=0 # 0.1 0.5 
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
criterion = torch.nn.MSELoss()


In [None]:
import optuna
import torch
from torch_geometric.nn import GINEConv, global_mean_pool



[I 2025-03-05 15:52:50,764] A new study created in memory with name: no-name-5b09dcce-30ff-4c5f-8ba7-142f9f7cd54b
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
  dropout = trial.suggest_uniform('dropout', 0.1, 0.5)
  label = torch.tensor([row[self.label_col][0]], dtype=torch.float)
[W 2025-03-05 15:52:50,779] Trial 0 failed with parameters: {'lr': 0.00010390339340463158, 'hidden_dim': 64, 'dropout': 0.17339195269394883, 'num_layers': 5} because of the following error: ValueError("Input X contains infinity or a value too large for dtype('float32').").
Traceback (most recent call last):
  File "f:\Miniconda3\envs\alfabet_env\lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\kang_\AppData\Local\Temp\ipykernel_34408\3231929643.py", line 27, in objective
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
  File "C:\Users\kang_\AppData\Local\Temp\ipykernel_34408\561660198.py", line 212

ValueError: Input X contains infinity or a value too large for dtype('float32').

In [None]:
if __name__ == "__main__":
    study = optuna.create_study(
        storage="sqlite:///db_test.sqlite3",  # Specify the storage URL here.
        study_name="quadratic-simple"
    )
    study.optimize(objective, n_trials=100)
    print(f"Best value: {study.best_value} (params: {study.best_params})")

In [None]:

# Example usage after training:
r2, rmse = evaluate_model(model, val_loader, device)

R²: 0.5258
RMSE: 0.1728


1.配合optuna 参数搜索基于交叉验证后的数据集
2. GINE 换成 Gcon
3. 梯形GNN
4. 调参组合