In [1]:
import torch
import numpy as np
from torch_geometric.data import InMemoryDataset, Data
def load_raw():
    from sklearn.preprocessing import LabelBinarizer
    data = np.load("dataset/raw/data.npy")
    label = np.load("dataset/raw/label.npy")
    data = data[...,:3]
    one_hot = LabelBinarizer()
    label = one_hot.fit_transform(label)
    print(data.shape)
    print(label.shape)
    return data, label
CONNECTION = torch.tensor([[0,0,0,1,1,2,2,3,3,4,5,5,5,6,6,7,7,8,9,9,9,10,10,11,11,12,13,13,13,14,14,15,15,16,17,17,17,18,18,19,19,20],
                           [1,5,17,0,2,1,3,2,4,3,0,9,6,5,7,8,6,7,5,10,13,9,11,10,12,11,9,14,17,13,15,14,16,15,0,13,18,17,19,18,20,19]], dtype=torch.long)

#这里给出大家注释方便理解
class MyOwnDataset(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super().__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
        print(self[0].x.shape)
    #返回数据集源文件名
    @property
    def raw_file_names(self):
        return ['./dataset/raw/data.npy', 'dataset\raw\label.npy']
    #返回process方法所需的保存文件名。你之后保存的数据集名字和列表里的一致
    @property
    def processed_file_names(self):
        return ['data.pt']
    #生成数据集所用的方法
    def process(self):
        data, label = load_raw()
        # Read data into huge `Data` list.
        data_list = []
        for i in range(data.shape[0]):
            
            pr_data = Data(x=torch.tensor(data[i], dtype=torch.float), edge_index=CONNECTION, y=torch.tensor([label[i]], dtype=float))
            data_list.append(pr_data)
        if self.pre_filter is not None:
            data_list = [data for data in data_list if self.pre_filter(data)]

        if self.pre_transform is not None:
            data_list = [self.pre_transform(data) for data in data_list]

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])
dataset = MyOwnDataset("./dataset")

torch.Size([21, 3])


In [2]:
dataset = dataset.shuffle()
train_dataset = dataset[:240000]
test_dataset = dataset[240000:]

from torch_geometric.loader import DataLoader

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [3]:

import torch.nn.functional as F
import torch.nn as nn
from torch_geometric.nn import GCNConv, global_mean_pool

class GCN(torch.nn.Module):
    def __init__(self, hidden_channels=64):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = nn.Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


model = GCN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    
    for data in train_loader:  # Iterate in batches over the training dataset.
        data.to(device)
        out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.

def test(loader):
    model.eval()
    
    correct = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        data.to(device)
        out = model(data.x, data.edge_index, data.batch)  
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        correct += int((pred == data.y.argmax(dim=1)).sum())  # Check against ground-truth labels.
    return correct / len(loader.dataset)  # Derive ratio of correct predictions.

for epoch in range(1, 10):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Train Acc: 0.9036, Test Acc: 0.9070
Epoch: 002, Train Acc: 0.9267, Test Acc: 0.9282
Epoch: 003, Train Acc: 0.9339, Test Acc: 0.9353
Epoch: 004, Train Acc: 0.9258, Test Acc: 0.9260
Epoch: 005, Train Acc: 0.9377, Test Acc: 0.9381
Epoch: 006, Train Acc: 0.9314, Test Acc: 0.9322
Epoch: 007, Train Acc: 0.9347, Test Acc: 0.9340
Epoch: 008, Train Acc: 0.9392, Test Acc: 0.9402
Epoch: 009, Train Acc: 0.9403, Test Acc: 0.9422
