In [16]:
import torch
from torch_geometric.datasets import TUDataset

In [22]:
dataset = TUDataset(root='C:/folder/code/jupyter/PyG/data', name='ENZYMES',transform=NormalizeFeatures())
print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')
print('====================')
data=dataset[0]
print(data)



Dataset: ENZYMES(600):
Number of graphs: 600
Number of features: 3
Number of classes: 6
Data(edge_index=[2, 168], x=[37, 3], y=[1])


In [20]:
train_dataset = dataset
print(f'Number of training graphs: {len(train_dataset)}')

Number of training graphs: 600


In [25]:
from torch_geometric.loader import DataLoader

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)#图也可以进行batch，拼接邻接矩阵和数据没有对其做统一操作

for step, data in enumerate(train_loader):
    print(f'Step {step + 1}:')
    print('=======')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data)
    print()

Step 1:
Number of graphs in the current batch: 64
DataBatch(edge_index=[2, 7984], x=[2147, 3], y=[64], batch=[2147], ptr=[65])

Step 2:
Number of graphs in the current batch: 64
DataBatch(edge_index=[2, 7394], x=[1985, 3], y=[64], batch=[1985], ptr=[65])

Step 3:
Number of graphs in the current batch: 64
DataBatch(edge_index=[2, 8618], x=[2257, 3], y=[64], batch=[2257], ptr=[65])

Step 4:
Number of graphs in the current batch: 64
DataBatch(edge_index=[2, 7190], x=[1933, 3], y=[64], batch=[1933], ptr=[65])

Step 5:
Number of graphs in the current batch: 64
DataBatch(edge_index=[2, 8504], x=[2298, 3], y=[64], batch=[2298], ptr=[65])

Step 6:
Number of graphs in the current batch: 64
DataBatch(edge_index=[2, 7632], x=[1966, 3], y=[64], batch=[1966], ptr=[65])

Step 7:
Number of graphs in the current batch: 64
DataBatch(edge_index=[2, 8336], x=[2151, 3], y=[64], batch=[2151], ptr=[65])

Step 8:
Number of graphs in the current batch: 64
DataBatch(edge_index=[2, 8426], x=[2155, 3], y=[64], b

图分类仍让是对节点进行嵌入，只不过最后增加一个聚合节点特征的操作
把各个节点特征汇总成全局特征就相当于得到了整个图的编码：

In [26]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool

In [30]:
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(1)
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)
        
    def forward(self, x, edge_index, batch):
        # 1.对各节点进行编码
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. 平均操作
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. 输出
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x


In [31]:
model = GCN(hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

In [35]:
def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.

In [38]:
def test(loader):
    model.eval()

    correct = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch)  
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        correct += int((pred == data.y).sum())  # Check against ground-truth labels.
    return correct / len(loader.dataset)  # Derive ratio of correct predictions.

In [39]:
for epoch in range(1, 171):
    train()
    train_acc = test(train_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}')

Epoch: 001, Train Acc: 0.1933
Epoch: 002, Train Acc: 0.2217
Epoch: 003, Train Acc: 0.2450
Epoch: 004, Train Acc: 0.2450
Epoch: 005, Train Acc: 0.2600
Epoch: 006, Train Acc: 0.2667
Epoch: 007, Train Acc: 0.2733
Epoch: 008, Train Acc: 0.2450
Epoch: 009, Train Acc: 0.2833
Epoch: 010, Train Acc: 0.2850
Epoch: 011, Train Acc: 0.2833
Epoch: 012, Train Acc: 0.2817
Epoch: 013, Train Acc: 0.2733
Epoch: 014, Train Acc: 0.3000
Epoch: 015, Train Acc: 0.2900
Epoch: 016, Train Acc: 0.2833
Epoch: 017, Train Acc: 0.3033
Epoch: 018, Train Acc: 0.3000
Epoch: 019, Train Acc: 0.2867
Epoch: 020, Train Acc: 0.2817
Epoch: 021, Train Acc: 0.3133
Epoch: 022, Train Acc: 0.2900
Epoch: 023, Train Acc: 0.3033
Epoch: 024, Train Acc: 0.2833
Epoch: 025, Train Acc: 0.2783
Epoch: 026, Train Acc: 0.2833
Epoch: 027, Train Acc: 0.3283
Epoch: 028, Train Acc: 0.2650
Epoch: 029, Train Acc: 0.3150
Epoch: 030, Train Acc: 0.2983
Epoch: 031, Train Acc: 0.2867
Epoch: 032, Train Acc: 0.3300
Epoch: 033, Train Acc: 0.3183
Epoch: 034