In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from torch_geometric.utils import dense_to_sparse
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from torch.utils.tensorboard import SummaryWriter

# --- 实例化 SummaryWriter ---
# 为这个完整的双分支模型实验创建一个新的日志文件夹
writer = SummaryWriter('../runs/iris_clgcn_full_model')

# --- 1. 数据加载与预处理 ---

# a) 加载用于 FCA 分支的节点特征 (二元化特征)
fca_features_path = '../data/iris.data.cleaned.csv'
x_fca_numpy = np.loadtxt(fca_features_path, delimiter=',')
x_features = torch.tensor(x_fca_numpy, dtype=torch.float)
num_nodes = x_features.shape[0]

# b) 加载并处理概念格邻接矩阵 (FCA 分支)
adj_matrix_fca_path = '../data/iris_A_plus.csv'
a_plus_fca_numpy = np.loadtxt(adj_matrix_fca_path, delimiter=',')
a_plus_fca = torch.tensor(a_plus_fca_numpy, dtype=torch.float)
threshold = 10  # 使用与之前实验相同的阈值
a_plus_fca[a_plus_fca <= threshold] = 0
a_plus_fca.fill_diagonal_(0)
edge_index_fca, edge_attr_fca = dense_to_sparse(a_plus_fca)

# c) 【新增】加载原始连续特征，用于构建余弦相似度邻接矩阵
labels_path = '../data/iris.data'
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris_df = pd.read_csv(labels_path, header=None, names=column_names)
x_continuous_numpy = iris_df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].values
x_continuous = torch.tensor(x_continuous_numpy, dtype=torch.float)

# d) 【新增】构建余弦相似度邻接矩阵 (Cosine 分支)
print("--- 正在构建余弦相似度图结构 ---")
x_normalized = F.normalize(x_continuous, p=2, dim=1)
adj_matrix_cos = x_normalized @ x_normalized.T
adj_matrix_cos.fill_diagonal_(0) # 移除自环
adj_matrix_cos[adj_matrix_cos < 0] = 0 # 只保留正相关连接
edge_index_cos, edge_attr_cos = dense_to_sparse(adj_matrix_cos)
print(f"已生成一个包含 {edge_index_cos.shape[1]} 条边的余弦相似度图。")

# e) 加载标签 y (保持不变)
species_labels = iris_df['species'].values
encoder = LabelEncoder()
y_numpy = encoder.fit_transform(species_labels)
y = torch.tensor(y_numpy, dtype=torch.long)
if num_nodes != len(y):
    y = y[:num_nodes]

# f) 创建一个包含所有图信息的 Data 对象
# 我们将两个图的边信息都存储在这个对象中，方便管理
data = Data(x=x_features, y=y,
            edge_index_fca=edge_index_fca, edge_attr_fca=edge_attr_fca,
            edge_index_cos=edge_index_cos, edge_attr_cos=edge_attr_cos)

# g) 数据划分 (保持不变)
num_train = int(num_nodes * 0.6)
num_val = int(num_nodes * 0.2)
num_test = num_nodes - num_train - num_val
indices = torch.randperm(num_nodes)
data.train_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.val_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.test_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.train_mask[indices[:num_train]] = True
data.val_mask[indices[num_train:num_train + num_val]] = True
data.test_mask[indices[num_train + num_val:]] = True

print("\n--- 数据加载完成 (双分支模型) ---")
print(data)

# --- 2. 【核心修改】定义双分支 GNN 模型 (CLGCN) ---
class CLGCN(nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_classes):
        super(CLGCN, self).__init__()
        
        # 分支一：处理概念格图的 GCN 层
        self.fca_conv = GCNConv(num_node_features, hidden_channels)
        
        # 分支二：处理余弦相似度图的 GCN 层
        self.cos_conv = GCNConv(num_node_features, hidden_channels)
        
        # 融合层：将两个分支的输出拼接后，通过一个线性层进行分类
        # 输入维度是两个分支隐藏层维度之和 (hidden_channels * 2)
        self.fusion_layer = nn.Linear(hidden_channels * 2, num_classes)

    def forward(self, x, edge_index_fca, edge_attr_fca, edge_index_cos, edge_attr_cos):
        # --- 分支一前向传播 ---
        # 注意：GCNConv 支持 edge_weight 参数，我们将 attr 传入
        h_fca = self.fca_conv(x, edge_index_fca, edge_attr_fca)
        h_fca = F.relu(h_fca)
        h_fca = F.dropout(h_fca, p=0.5, training=self.training)
        
        # --- 分支二前向传播 ---
        h_cos = self.cos_conv(x, edge_index_cos, edge_attr_cos)
        h_cos = F.relu(h_cos)
        h_cos = F.dropout(h_cos, p=0.5, training=self.training)
        
        # --- 特征融合 ---
        # 将两个分支的输出向量按特征维度 (dim=1) 拼接
        h_combined = torch.cat([h_fca, h_cos], dim=1)
        
        # --- 通过融合层得到最终输出 ---
        out = self.fusion_layer(h_combined)
        return out

# --- 3. 训练与评估 ---
HIDDEN_CHANNELS = 16 # 定义隐藏层维度
model = CLGCN(num_node_features=data.num_node_features, 
              hidden_channels=HIDDEN_CHANNELS, 
              num_classes=len(np.unique(y_numpy)))

optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train(epoch):
    model.train()
    optimizer.zero_grad()
    # 【修改】将所有需要的图信息传入模型
    out = model(data.x, data.edge_index_fca, data.edge_attr_fca, 
                data.edge_index_cos, data.edge_attr_cos)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    writer.add_scalar('Loss/train', loss.item(), epoch)
    return loss.item()

def evaluate(epoch):
    model.eval()
    with torch.no_grad():
        # 【修改】将所有需要的图信息传入模型
        out = model(data.x, data.edge_index_fca, data.edge_attr_fca, 
                    data.edge_index_cos, data.edge_attr_cos)
        pred = out.argmax(dim=1)
        
        train_acc = (pred[data.train_mask] == data.y[data.train_mask]).sum().item() / data.train_mask.sum().item()
        val_acc = (pred[data.val_mask] == data.y[data.val_mask]).sum().item() / data.val_mask.sum().item()
        test_acc = (pred[data.test_mask] == data.y[data.test_mask]).sum().item() / data.test_mask.sum().item()

        writer.add_scalar('Accuracy/train', train_acc, epoch)
        writer.add_scalar('Accuracy/validation', val_acc, epoch)
        writer.add_scalar('Accuracy/test', test_acc, epoch)
        
        return train_acc, val_acc, test_acc

print("\n--- 开始训练 (CLGCN 双分支模型) ---")
for epoch in range(1, 101):
    loss = train(epoch)
    if epoch % 1 == 0:
        train_acc, val_acc, test_acc = evaluate(epoch)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

writer.close()

final_test_acc = evaluate(100)[2]
print(f'--- 训练完成 ---')
print(f'最终测试集准确率 (CLGCN 双分支模型): {final_test_acc:.4f}')
print("\nTensorBoard 日志已写入 'runs/iris_clgcn_full_model' 文件夹。")


--- 正在构建余弦相似度图结构 ---
已生成一个包含 22350 条边的余弦相似度图。

--- 数据加载完成 (双分支模型) ---
Data(x=[150, 126], y=[150], edge_index_fca=[2, 653], edge_attr_fca=[653], edge_index_cos=[2, 22350], edge_attr_cos=[22350], train_mask=[150], val_mask=[150], test_mask=[150])

--- 开始训练 (CLGCN 双分支模型) ---
Epoch: 001, Loss: 1.0865, Train Acc: 0.3444, Val Acc: 0.2667, Test Acc: 0.3333
Epoch: 002, Loss: 1.0895, Train Acc: 0.3667, Val Acc: 0.2667, Test Acc: 0.3333
Epoch: 003, Loss: 1.0615, Train Acc: 0.3667, Val Acc: 0.2667, Test Acc: 0.3333
Epoch: 004, Loss: 1.0695, Train Acc: 0.4000, Val Acc: 0.2667, Test Acc: 0.3333
Epoch: 005, Loss: 1.0636, Train Acc: 0.4222, Val Acc: 0.2667, Test Acc: 0.3333
Epoch: 006, Loss: 1.0646, Train Acc: 0.4556, Val Acc: 0.2667, Test Acc: 0.3667
Epoch: 007, Loss: 1.0442, Train Acc: 0.4889, Val Acc: 0.2667, Test Acc: 0.3667
Epoch: 008, Loss: 1.0392, Train Acc: 0.5222, Val Acc: 0.3000, Test Acc: 0.4000
Epoch: 009, Loss: 1.0311, Train Acc: 0.5444, Val Acc: 0.3000, Test Acc: 0.4333
Epoch: 010, Loss