In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from torch_geometric.utils import dense_to_sparse
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from torch.utils.tensorboard import SummaryWriter

# --- 实例化 SummaryWriter ---
# 创建一个新的文件夹来存放本次实验的日志
writer = SummaryWriter('../runs/iris_fca_gnn_multi_curve')

# --- 数据加载与预处理 (与之前相同) ---
features_path = '../data/iris.data.cleaned.csv'
x_numpy = np.loadtxt(features_path, delimiter=',')
x = torch.tensor(x_numpy, dtype=torch.float)

adj_matrix_path = '../data/iris_A_plus.csv'
a_plus_numpy = np.loadtxt(adj_matrix_path, delimiter=',')
a_plus = torch.tensor(a_plus_numpy, dtype=torch.float)
threshold = 10
a_plus[a_plus <= threshold] = 0
a_plus.fill_diagonal_(0)
edge_index, edge_attr = dense_to_sparse(a_plus)

labels_path = '../data/iris.data'
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris_df = pd.read_csv(labels_path, header=None, names=column_names)
species_labels = iris_df['species'].values
encoder = LabelEncoder()
y_numpy = encoder.fit_transform(species_labels)
y = torch.tensor(y_numpy, dtype=torch.long)

num_nodes = x.shape[0]
if num_nodes != len(y):
    y = y[:num_nodes]

data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)

# --- 数据划分：训练(60%)、验证(20%)、测试(20%) ---
num_train = int(num_nodes * 0.6)
num_val = int(num_nodes * 0.2)
num_test = num_nodes - num_train - num_val
indices = torch.randperm(num_nodes)

data.train_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.val_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.test_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.train_mask[indices[:num_train]] = True
data.val_mask[indices[num_train:num_train + num_val]] = True
data.test_mask[indices[num_train + num_val:]] = True

print("--- 数据加载完成 ---")
print(f"训练节点数: {data.train_mask.sum().item()}, 验证节点数: {data.val_mask.sum().item()}, 测试节点数: {data.test_mask.sum().item()}")

# --- GNN 模型定义 (与之前相同) ---
class GCN(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_node_features, 16)
        self.conv2 = GCNConv(16, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

# --- 训练与评估 ---
model = GCN(num_node_features=data.num_node_features, num_classes=len(np.unique(y_numpy)))
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train(epoch):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    # 只记录训练集的Loss
    writer.add_scalar('Loss/train', loss.item(), epoch)
    return loss.item()

# --- 【修改】评估函数现在也计算并记录Loss ---
def evaluate(epoch):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        pred = out.argmax(dim=1)
        
        # --- 计算 Loss ---
        train_loss = criterion(out[data.train_mask], data.y[data.train_mask])
        val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
        test_loss = criterion(out[data.test_mask], data.y[data.test_mask])
        
        # --- 计算 Accuracy ---
        correct_train = pred[data.train_mask] == data.y[data.train_mask]
        train_acc = int(correct_train.sum()) / int(data.train_mask.sum())
        
        correct_val = pred[data.val_mask] == data.y[data.val_mask]
        val_acc = int(correct_val.sum()) / int(data.val_mask.sum())
        
        correct_test = pred[data.test_mask] == data.y[data.test_mask]
        test_acc = int(correct_test.sum()) / int(data.test_mask.sum())

        # --- 将所有指标记录到 TensorBoard ---
        # 使用相同的 "Loss" 标签头，将三条线画在同一个Loss图里
        writer.add_scalar('Loss/validation', val_loss.item(), epoch)
        writer.add_scalar('Loss/test', test_loss.item(), epoch)
        
        # 使用相同的 "Accuracy" 标签头，将三条线画在同一个Accuracy图里
        writer.add_scalar('Accuracy/train', train_acc, epoch)
        writer.add_scalar('Accuracy/validation', val_acc, epoch)
        writer.add_scalar('Accuracy/test', test_acc, epoch)
        
        return train_acc, val_acc, test_acc, train_loss.item()

print("\n--- 开始训练 ---")
for epoch in range(1, 201):
    # train()函数现在只返回loss，不再计算精度，避免重复计算
    loss = train(epoch) 
    
    # 在每个 epoch 后都进行评估
    train_acc, val_acc, test_acc, _ = evaluate(epoch)
    
    if epoch % 20 == 0:
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

writer.close()

# 获取最后一个 epoch 的测试精度作为最终结果
final_test_acc = evaluate(200)[2] 
print(f'--- 训练完成 ---')
print(f'最终测试集准确率: {final_test_acc:.4f}')
print("\nTensorBoard 日志已写入 'runs' 文件夹。")

--- 数据加载完成 ---
训练节点数: 90, 验证节点数: 30, 测试节点数: 30

--- 开始训练 ---
Epoch: 020, Loss: 0.1459, Train Acc: 1.0000, Val Acc: 0.9667, Test Acc: 0.9667
Epoch: 040, Loss: 0.0179, Train Acc: 1.0000, Val Acc: 0.9667, Test Acc: 0.9667
Epoch: 060, Loss: 0.0066, Train Acc: 1.0000, Val Acc: 0.9667, Test Acc: 0.9667
Epoch: 080, Loss: 0.0091, Train Acc: 1.0000, Val Acc: 0.9667, Test Acc: 0.9667
Epoch: 100, Loss: 0.0200, Train Acc: 1.0000, Val Acc: 0.9667, Test Acc: 0.9667
Epoch: 120, Loss: 0.0341, Train Acc: 1.0000, Val Acc: 0.9667, Test Acc: 0.9667
Epoch: 140, Loss: 0.0240, Train Acc: 1.0000, Val Acc: 0.9667, Test Acc: 0.9667
Epoch: 160, Loss: 0.0085, Train Acc: 1.0000, Val Acc: 0.9667, Test Acc: 0.9667
Epoch: 180, Loss: 0.0165, Train Acc: 1.0000, Val Acc: 0.9667, Test Acc: 0.9667
Epoch: 200, Loss: 0.0263, Train Acc: 1.0000, Val Acc: 0.9667, Test Acc: 0.9667
--- 训练完成 ---
最终测试集准确率: 0.9667

TensorBoard 日志已写入 'runs' 文件夹。
