In [2]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from torch_geometric.utils import dense_to_sparse
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from torch.utils.tensorboard import SummaryWriter

# --- 实例化 SummaryWriter ---
# 为这个新的对照实验创建一个独立的日志文件夹
writer = SummaryWriter('../runs/iris_random_gnn_comparison')

# --- 1. 数据加载与预处理 ---

# a) 加载节点特征矩阵 X (这部分保持不变)
features_path = '../data/iris.data.cleaned.csv'
x_numpy = np.loadtxt(features_path, delimiter=',')
x = torch.tensor(x_numpy, dtype=torch.float)
num_nodes = x.shape[0]

# =================================================================
# ============  【核心修改】替换为随机邻接矩阵  ============
# =================================================================
# 在 7.ipynb 中，经过阈值化处理后，有效的边数（非零元素）是 653。
# 为了进行公平比较，我们也生成一个包含大约 653 条随机边的图。

print(f"--- 正在生成随机图结构 ---")
num_edges_from_fca = 653  # 从 7.ipynb 的输出中得知

# 随机生成边的起点和终点
# torch.randint(low, high, size)
# 我们生成 num_edges_from_fca 条边
source_nodes = torch.randint(0, num_nodes, (num_edges_from_fca,))
target_nodes = torch.randint(0, num_nodes, (num_edges_from_fca,))

# 组合成 edge_index 格式
random_edge_index = torch.stack([source_nodes, target_nodes], dim=0)

# (可选) 为随机边生成随机权重，或者统一设为1
# 这里我们统一设为1，因为边的存在性比权重更重要
random_edge_attr = torch.ones(num_edges_from_fca, dtype=torch.float)

# (可选) 移除自环，确保没有节点指向自己
is_self_loop = random_edge_index[0] == random_edge_index[1]
random_edge_index = random_edge_index[:, ~is_self_loop]
random_edge_attr = random_edge_attr[~is_self_loop]

print(f"已生成一个包含 {random_edge_index.shape[1]} 条随机边的图。")
# =================================================================
# =================================================================

# c) 加载标签 y (这部分保持不变)
labels_path = '../data/iris.data'
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris_df = pd.read_csv(labels_path, header=None, names=column_names)
species_labels = iris_df['species'].values
encoder = LabelEncoder()
y_numpy = encoder.fit_transform(species_labels)
y = torch.tensor(y_numpy, dtype=torch.long)

if num_nodes != len(y):
    y = y[:num_nodes]

# d) 创建 Data 对象，但使用随机生成的 edge_index 和 edge_attr
data = Data(x=x, edge_index=random_edge_index, edge_attr=random_edge_attr, y=y)

# e) 数据划分 (保持不变)
num_train = int(num_nodes * 0.6)
num_val = int(num_nodes * 0.2)
num_test = num_nodes - num_train - num_val
indices = torch.randperm(num_nodes)

data.train_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.val_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.test_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.train_mask[indices[:num_train]] = True
data.val_mask[indices[num_train:num_train + num_val]] = True
data.test_mask[indices[num_train + num_val:]] = True

print("\n--- 数据加载完成 (使用随机图) ---")
print(data)
print(f"训练节点数: {data.train_mask.sum().item()}, 验证节点数: {data.val_mask.sum().item()}, 测试节点数: {data.test_mask.sum().item()}")

# --- GNN 模型定义 (保持不变) ---
class GCN(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_node_features, 16)
        self.conv2 = GCNConv(16, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

# --- 训练与评估 (保持不变) ---
model = GCN(num_node_features=data.num_node_features, num_classes=len(np.unique(y_numpy)))
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train(epoch):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    writer.add_scalar('Loss/train_random_adj', loss.item(), epoch)
    return loss.item()

def evaluate(epoch):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        pred = out.argmax(dim=1)
        
        train_acc = (pred[data.train_mask] == data.y[data.train_mask]).sum().item() / data.train_mask.sum().item()
        val_acc = (pred[data.val_mask] == data.y[data.val_mask]).sum().item() / data.val_mask.sum().item()
        test_acc = (pred[data.test_mask] == data.y[data.test_mask]).sum().item() / data.test_mask.sum().item()

        writer.add_scalar('Accuracy/train_random_adj', train_acc, epoch)
        writer.add_scalar('Accuracy/validation_random_adj', val_acc, epoch)
        writer.add_scalar('Accuracy/test_random_adj', test_acc, epoch)
        
        return train_acc, val_acc, test_acc

print("\n--- 开始训练 (使用随机图) ---")
for epoch in range(1, 201):
    loss = train(epoch)
    if epoch % 20 == 0:
        train_acc, val_acc, test_acc = evaluate(epoch)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

writer.close()

final_test_acc = evaluate(200)[2]
print(f'--- 训练完成 ---')
print(f'最终测试集准确率 (使用随机图): {final_test_acc:.4f}')
print("\nTensorBoard 日志已写入 'runs/iris_random_gnn_comparison' 文件夹。")

--- 正在生成随机图结构 ---
已生成一个包含 650 条随机边的图。

--- 数据加载完成 (使用随机图) ---
Data(x=[150, 126], edge_index=[2, 650], edge_attr=[650], y=[150], train_mask=[150], val_mask=[150], test_mask=[150])
训练节点数: 90, 验证节点数: 30, 测试节点数: 30

--- 开始训练 (使用随机图) ---
Epoch: 020, Loss: 0.9324, Train Acc: 0.6556, Val Acc: 0.3333, Test Acc: 0.3333
Epoch: 040, Loss: 0.6931, Train Acc: 0.7444, Val Acc: 0.4000, Test Acc: 0.2667
Epoch: 060, Loss: 0.5413, Train Acc: 0.8111, Val Acc: 0.4000, Test Acc: 0.3000
Epoch: 080, Loss: 0.4809, Train Acc: 0.9000, Val Acc: 0.3333, Test Acc: 0.3000
Epoch: 100, Loss: 0.3773, Train Acc: 0.9222, Val Acc: 0.3667, Test Acc: 0.3667
Epoch: 120, Loss: 0.4011, Train Acc: 0.9889, Val Acc: 0.3333, Test Acc: 0.3333
Epoch: 140, Loss: 0.3269, Train Acc: 0.9778, Val Acc: 0.3000, Test Acc: 0.3667
Epoch: 160, Loss: 0.3287, Train Acc: 1.0000, Val Acc: 0.3000, Test Acc: 0.3333
Epoch: 180, Loss: 0.3127, Train Acc: 1.0000, Val Acc: 0.3000, Test Acc: 0.3333
Epoch: 200, Loss: 0.2342, Train Acc: 1.0000, Val Acc: 0.