In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from torch_geometric.utils import dense_to_sparse
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from torch.utils.tensorboard import SummaryWriter

In [None]:
data_name = 'zoo'

# --- 实例化 SummaryWriter ---
writer = SummaryWriter('../runs/9_2_' + data_name)

# --- 1. 数据加载与预处理 ---

threshold_pos = 3000
threshold_neg = 49000

# a) 加载节点特征 (两个分支共享相同的节点特征)
features_path = '../data/' + data_name + '/' + data_name + '.data.cleaned.csv'
x_numpy = np.loadtxt(features_path, delimiter=',')
x_features = torch.tensor(x_numpy, dtype=torch.float)
num_nodes = x_features.shape[0]

# b) 加载并处理正概念格邻接矩阵 (分支一)
adj_matrix_pos_path = '../data/' + data_name + '/' + data_name + '_A_plus_UG.csv'
a_plus_pos_numpy = np.loadtxt(adj_matrix_pos_path, delimiter=',')
a_plus_pos = torch.tensor(a_plus_pos_numpy, dtype=torch.float)
a_plus_pos[a_plus_pos <= threshold_pos] = 0
a_plus_pos.fill_diagonal_(0)
edge_index_pos, edge_attr_pos = dense_to_sparse(a_plus_pos)
print(f"--- 正概念图 ---")
print(f"阈值化后保留 {torch.count_nonzero(a_plus_pos)} 条边。")


# c) 【新增】加载并处理负概念格邻接矩阵 (分支二)
adj_matrix_neg_path = '../data/' + data_name + '/' + data_name + '_A_negative_UG.csv'
a_plus_neg_numpy = np.loadtxt(adj_matrix_neg_path, delimiter=',')
a_plus_neg = torch.tensor(a_plus_neg_numpy, dtype=torch.float)
a_plus_neg[a_plus_neg <= threshold_neg] = 0
a_plus_neg.fill_diagonal_(0)
edge_index_neg, edge_attr_neg = dense_to_sparse(a_plus_neg)
print(f"--- 负概念图 ---")
print(f"阈值化后保留 {torch.count_nonzero(a_plus_neg)} 条边。")


# d) 加载标签 y
labels_path = '../data/' + data_name + '/' + data_name + '.data'
column_names = [
    "animal_name", "hair", "feathers", "eggs", "milk", "airborne", 
    "aquatic", "predator", "toothed", "backbone", "breathes", 
    "venomous", "fins", "legs", "tail", "domestic", "catsize", "type"
]
data_df = pd.read_csv(labels_path, header=None, names=column_names)
species_labels = data_df['type'].values
encoder = LabelEncoder()
y_numpy = encoder.fit_transform(species_labels)
y = torch.tensor(y_numpy, dtype=torch.long)
if num_nodes != len(y):
    y = y[:num_nodes]

In [None]:
# e) 创建包含所有图信息的 Data 对象
data = Data(x=x_features, y=y,
            edge_index_pos=edge_index_pos, edge_attr_pos=edge_attr_pos,
            edge_index_neg=edge_index_neg, edge_attr_neg=edge_attr_neg)

# f) 数据划分
num_train = int(num_nodes * 0.6)
num_val = int(num_nodes * 0.2)
num_test = num_nodes - num_train - num_val
indices = torch.randperm(num_nodes)
data.train_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.val_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.test_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.train_mask[indices[:num_train]] = True
data.val_mask[indices[num_train:num_train + num_val]] = True
data.test_mask[indices[num_train + num_val:]] = True

print("\n--- 数据加载完成 (双概念格分支模型) ---")
data

In [None]:
# --- 2. 定义双概念格分支 GNN 模型 (DualConceptGCN) ---
class DualConceptGCN(nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_classes):
        super(DualConceptGCN, self).__init__()
        
        # 分支一：处理正概念格图
        self.pos_conv = GCNConv(num_node_features, hidden_channels)
        
        # 分支二：处理负概念格图
        self.neg_conv = GCNConv(num_node_features, hidden_channels)
        
        # 融合层
        self.fusion_layer = nn.Linear(hidden_channels * 2, num_classes)

    def forward(self, x, edge_index_pos, edge_attr_pos, edge_index_neg, edge_attr_neg):
        # --- 分支一前向传播 (正概念图) ---
        h_pos = self.pos_conv(x, edge_index_pos, edge_attr_pos)
        h_pos = F.relu(h_pos)
        h_pos = F.dropout(h_pos, p=0.5, training=self.training)
        
        # --- 分支二前向传播 (负概念图) ---
        h_neg = self.neg_conv(x, edge_index_neg, edge_attr_neg)
        h_neg = F.relu(h_neg)
        h_neg = F.dropout(h_neg, p=0.5, training=self.training)
        
        # --- 特征融合 ---
        h_combined = torch.cat([h_pos, h_neg], dim=1)
        
        # --- 通过融合层得到最终输出 ---
        out = self.fusion_layer(h_combined)
        return out

In [None]:
# --- 3. 训练与评估 ---
HIDDEN_CHANNELS = 16
model = DualConceptGCN(num_node_features=data.num_node_features, 
                       hidden_channels=HIDDEN_CHANNELS, 
                       num_classes=len(np.unique(y_numpy)))

optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train(epoch):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index_pos, data.edge_attr_pos, 
                data.edge_index_neg, data.edge_attr_neg)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    writer.add_scalar('Loss/train', loss.item(), epoch)
    return loss.item()

def evaluate(epoch):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index_pos, data.edge_attr_pos, 
                    data.edge_index_neg, data.edge_attr_neg)
        pred = out.argmax(dim=1)
        
        train_acc = (pred[data.train_mask] == data.y[data.train_mask]).sum().item() / data.train_mask.sum().item()
        val_acc = (pred[data.val_mask] == data.y[data.val_mask]).sum().item() / data.val_mask.sum().item()
        test_acc = (pred[data.test_mask] == data.y[data.test_mask]).sum().item() / data.test_mask.sum().item()

        writer.add_scalar('Accuracy/train', train_acc, epoch)
        writer.add_scalar('Accuracy/validation', val_acc, epoch)
        writer.add_scalar('Accuracy/test', test_acc, epoch)
        
        return train_acc, val_acc, test_acc

print("\n--- 开始训练 (双概念格分支 GNN) ---")
for epoch in range(1, 151):
    loss = train(epoch)
    if epoch % 1 == 0:
        train_acc, val_acc, test_acc = evaluate(epoch)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

writer.close()

final_test_acc = evaluate(150)[2]
print(f'--- 训练完成 ---')
print(f'最终测试集准确率 (双概念格分支 GNN): {final_test_acc:.4f}')
print("\nTensorBoard 日志已写入 'runs/" + data_name + "_dual_concept_gnn' 文件夹。")