In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from torch_geometric.utils import dense_to_sparse
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from datetime import datetime # 导入 datetime
from torch.utils.tensorboard import SummaryWriter

In [2]:
# In[2]:
# =============================================================================
# 单元格 2: 定义超参数、创建唯一日志目录、加载数据
# =============================================================================

# --- 1. 定义实验参数和超参数 ---
data_name = 'zoo'

# a) 数据处理超参数
hparams = {
    'threshold_pos': 4000,
    'threshold_neg': 49000,
    'learning_rate': 0.005,
    'weight_decay': 5e-4,
    'hidden_channels': 16,
    'epochs': 150
}

# --- 2. 创建唯一的 TensorBoard 日志目录 ---
# a) 将超参数格式化为字符串
hparam_str = f"tp={hparams['threshold_pos']}_tn={hparams['threshold_neg']}_lr={hparams['learning_rate']}_wd={hparams['weight_decay']}"
# b) 获取当前时间戳
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
# c) 组合成最终的日志目录
log_dir = f"../runs/{data_name}_{hparam_str}_{timestamp}"

# d) 实例化 SummaryWriter
writer = SummaryWriter(log_dir)
print(f"TensorBoard 日志将保存在: {log_dir}")


# --- 3. 数据加载与预处理 ---

# a) 加载节点特征 (两个分支共享相同的节点特征)
features_path = '../data/' + data_name + '/' + data_name + '.data.cleaned.csv'
x_numpy = np.loadtxt(features_path, delimiter=',')
x_features = torch.tensor(x_numpy, dtype=torch.float)
num_nodes = x_features.shape[0]

# b) 加载并处理正概念格邻接矩阵 (分支一)
adj_matrix_pos_path = '../data/' + data_name + '/' + data_name + '_A_plus_UG.csv'
a_plus_pos_numpy = np.loadtxt(adj_matrix_pos_path, delimiter=',')
a_plus_pos = torch.tensor(a_plus_pos_numpy, dtype=torch.float)
a_plus_pos[a_plus_pos <= hparams['threshold_pos']] = 0
a_plus_pos.fill_diagonal_(0)
pos_edge_num = torch.count_nonzero(a_plus_pos).item() # 计算边数
edge_index_pos, edge_attr_pos = dense_to_sparse(a_plus_pos)
print(f"--- 正概念图 ---")
print(f"阈值化后保留 {pos_edge_num} 条边。")


# c) 加载并处理负概念格邻接矩阵 (分支二)
adj_matrix_neg_path = '../data/' + data_name + '/' + data_name + '_A_negative_UG.csv'
a_plus_neg_numpy = np.loadtxt(adj_matrix_neg_path, delimiter=',')
a_plus_neg = torch.tensor(a_plus_neg_numpy, dtype=torch.float)
a_plus_neg[a_plus_neg <= hparams['threshold_neg']] = 0
a_plus_neg.fill_diagonal_(0)
neg_edge_num = torch.count_nonzero(a_plus_neg).item() # 计算边数
edge_index_neg, edge_attr_neg = dense_to_sparse(a_plus_neg)
print(f"--- 负概念图 ---")
print(f"阈值化后保留 {neg_edge_num} 条边。")

# d) 将计算出的边数也加入超参数字典，用于记录
hparams['pos_edge_num'] = pos_edge_num
hparams['neg_edge_num'] = neg_edge_num

# e) 加载标签 y
labels_path = '../data/' + data_name + '/' + data_name + '.data'
column_names = [
    "animal_name", "hair", "feathers", "eggs", "milk", "airborne", 
    "aquatic", "predator", "toothed", "backbone", "breathes", 
    "venomous", "fins", "legs", "tail", "domestic", "catsize", "type"
]
data_df = pd.read_csv(labels_path, header=None, names=column_names)
species_labels = data_df['type'].values
encoder = LabelEncoder()
y_numpy = encoder.fit_transform(species_labels)
y = torch.tensor(y_numpy, dtype=torch.long)
if num_nodes != len(y):
    y = y[:num_nodes]

TensorBoard 日志将保存在: ../runs/zoo_tp=4000_tn=49000_lr=0.005_wd=0.0005_20250926-144126
--- 正概念图 ---
阈值化后保留 1288 条边。
--- 负概念图 ---
阈值化后保留 916 条边。


In [39]:
# e) 创建包含所有图信息的 Data 对象
data = Data(x=x_features, y=y,
            edge_index_pos=edge_index_pos, edge_attr_pos=edge_attr_pos,
            edge_index_neg=edge_index_neg, edge_attr_neg=edge_attr_neg)

# f) 数据划分
num_train = int(num_nodes * 0.6)
num_val = int(num_nodes * 0.2)
num_test = num_nodes - num_train - num_val
indices = torch.randperm(num_nodes)
data.train_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.val_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.test_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.train_mask[indices[:num_train]] = True
data.val_mask[indices[num_train:num_train + num_val]] = True
data.test_mask[indices[num_train + num_val:]] = True

print("\n--- 数据加载完成 (双概念格分支模型) ---")
data


--- 数据加载完成 (双概念格分支模型) ---


Data(x=[101, 43], y=[101], edge_index_pos=[2, 2118], edge_attr_pos=[2118], edge_index_neg=[2, 916], edge_attr_neg=[916], train_mask=[101], val_mask=[101], test_mask=[101])

In [40]:
# --- 2. 定义双概念格分支 GNN 模型 (DualConceptGCN) ---
class DualConceptGCN(nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_classes):
        super(DualConceptGCN, self).__init__()
        
        # 分支一：处理正概念格图
        self.pos_conv = GCNConv(num_node_features, hidden_channels)
        
        # 分支二：处理负概念格图
        self.neg_conv = GCNConv(num_node_features, hidden_channels)
        
        # 融合层
        self.fusion_layer = nn.Linear(hidden_channels * 2, num_classes)

    def forward(self, x, edge_index_pos, edge_attr_pos, edge_index_neg, edge_attr_neg):
        # --- 分支一前向传播 (正概念图) ---
        h_pos = self.pos_conv(x, edge_index_pos, edge_attr_pos)
        h_pos = F.relu(h_pos)
        h_pos = F.dropout(h_pos, p=0.5, training=self.training)
        
        # --- 分支二前向传播 (负概念图) ---
        h_neg = self.neg_conv(x, edge_index_neg, edge_attr_neg)
        h_neg = F.relu(h_neg)
        h_neg = F.dropout(h_neg, p=0.5, training=self.training)
        
        # --- 特征融合 ---
        h_combined = torch.cat([h_pos, h_neg], dim=1)
        
        # --- 通过融合层得到最终输出 ---
        out = self.fusion_layer(h_combined)
        return out

In [41]:
# --- 3. 训练与评估 ---
model = DualConceptGCN(num_node_features=data.num_node_features, 
                       hidden_channels=hparams['hidden_channels'], 
                       num_classes=len(np.unique(y_numpy)))

optimizer = torch.optim.Adam(model.parameters(), lr=hparams['learning_rate'], weight_decay=hparams['weight_decay'])
criterion = torch.nn.CrossEntropyLoss()

def train(epoch):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index_pos, data.edge_attr_pos, 
                data.edge_index_neg, data.edge_attr_neg)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    writer.add_scalar('Loss/train', loss.item(), epoch)
    return loss.item()

def evaluate(epoch):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index_pos, data.edge_attr_pos, 
                    data.edge_index_neg, data.edge_attr_neg)
        pred = out.argmax(dim=1)
        
        train_acc = (pred[data.train_mask] == data.y[data.train_mask]).sum().item() / data.train_mask.sum().item()
        val_acc = (pred[data.val_mask] == data.y[data.val_mask]).sum().item() / data.val_mask.sum().item()
        test_acc = (pred[data.test_mask] == data.y[data.test_mask]).sum().item() / data.test_mask.sum().item()

        writer.add_scalar('Accuracy/train', train_acc, epoch)
        writer.add_scalar('Accuracy/validation', val_acc, epoch)
        writer.add_scalar('Accuracy/test', test_acc, epoch)
        
        return train_acc, val_acc, test_acc

print("\n--- 开始训练 (双概念格分支 GNN) ---")
for epoch in range(1, hparams['epochs'] + 1):
    loss = train(epoch)
    if epoch % 1 == 0:
        train_acc, val_acc, test_acc = evaluate(epoch)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

# --- 训练完成后 ---
# 1. 获取最终的性能指标
final_train_acc, final_val_acc, final_test_acc = evaluate(hparams['epochs'])
print(f'--- 训练完成 ---')
print(f'最终测试集准确率 (双概念格分支 GNN): {final_test_acc:.4f}')

# 2. 定义需要记录的最终指标
metrics = {
    'accuracy/final_train': final_train_acc,
    'accuracy/final_validation': final_val_acc,
    'accuracy/final_test': final_test_acc
}

# 3. 【新增】调用 add_hparams 记录本次运行的超参数和结果
writer.add_hparams(hparams, metrics)

# 4. 关闭 writer
writer.close()
print(f"\nTensorBoard 日志和超参数已写入 '{log_dir}' 文件夹。")


--- 开始训练 (双概念格分支 GNN) ---
Epoch: 001, Loss: 2.0871, Train Acc: 0.0167, Val Acc: 0.1500, Test Acc: 0.0952
Epoch: 002, Loss: 2.0130, Train Acc: 0.1167, Val Acc: 0.3000, Test Acc: 0.0952
Epoch: 003, Loss: 1.8661, Train Acc: 0.4500, Val Acc: 0.5000, Test Acc: 0.1905
Epoch: 004, Loss: 1.7803, Train Acc: 0.6833, Val Acc: 0.6000, Test Acc: 0.3810
Epoch: 005, Loss: 1.7276, Train Acc: 0.8000, Val Acc: 0.6500, Test Acc: 0.5714
Epoch: 006, Loss: 1.6535, Train Acc: 0.8000, Val Acc: 0.7000, Test Acc: 0.5714
Epoch: 007, Loss: 1.5695, Train Acc: 0.7833, Val Acc: 0.7000, Test Acc: 0.5714
Epoch: 008, Loss: 1.5585, Train Acc: 0.7833, Val Acc: 0.7000, Test Acc: 0.5714
Epoch: 009, Loss: 1.4996, Train Acc: 0.7667, Val Acc: 0.7000, Test Acc: 0.5714
Epoch: 010, Loss: 1.4374, Train Acc: 0.7333, Val Acc: 0.7000, Test Acc: 0.5238
Epoch: 011, Loss: 1.3623, Train Acc: 0.7167, Val Acc: 0.7000, Test Acc: 0.5238
Epoch: 012, Loss: 1.3331, Train Acc: 0.7167, Val Acc: 0.7000, Test Acc: 0.5238
Epoch: 013, Loss: 1.2466,