https://colab.research.google.com/drive/1I8a0DfQ3fI7Njc62__mVXUlcAleUclnb?usp=sharing#scrollTo=cNgkR8SRaU_P

In [6]:
import torch
from torch_geometric.data import DataLoader
import random
DATA_PATH = 'YOUR DATA PATH'

In [7]:
# load your dataset
train_dataset = torch.load(DATA_PATH)

In [8]:
random.shuffle(train_dataset)

In [18]:
# split to train and validation set
train_set = train_dataset[:int(len(train_dataset)*0.8)]
test_set = train_dataset[int(len(train_dataset)*0.8):]

In [11]:
# data to DataLoader
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

for step, data in enumerate(train_loader):
    print(f'Step {step + 1}:')
    print('=======')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data)
    print()

Step 1:
Number of graphs in the current batch: 64
Batch(batch=[4757], edge_index=[2, 614402], ptr=[65], x=[4757, 1], y=[64])

Step 2:
Number of graphs in the current batch: 64
Batch(batch=[3879], edge_index=[2, 278816], ptr=[65], x=[3879, 1], y=[64])

Step 3:
Number of graphs in the current batch: 64
Batch(batch=[4375], edge_index=[2, 588736], ptr=[65], x=[4375, 1], y=[64])

Step 4:
Number of graphs in the current batch: 64
Batch(batch=[5438], edge_index=[2, 997796], ptr=[65], x=[5438, 1], y=[64])

Step 5:
Number of graphs in the current batch: 64
Batch(batch=[4667], edge_index=[2, 591492], ptr=[65], x=[4667, 1], y=[64])

Step 6:
Number of graphs in the current batch: 64
Batch(batch=[4521], edge_index=[2, 510468], ptr=[65], x=[4521, 1], y=[64])

Step 7:
Number of graphs in the current batch: 64
Batch(batch=[4198], edge_index=[2, 379736], ptr=[65], x=[4198, 1], y=[64])

Step 8:
Number of graphs in the current batch: 64
Batch(batch=[4637], edge_index=[2, 516726], ptr=[65], x=[4637, 1], y

In [14]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    """
    You can change and simply test your own GCN model.
    default,
    - three GCN layers
    - 64 hidden channels
    
    Try to various options, find our approximating hyper parameters
    """
    def __init__(self, hidden_channels=64):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        """
        GCNConv
        forward(x: torch.Tensor,
                edge_index: Union[torch.Tensor, torch_sparse.tensor.SparseTensor], 
                edge_weight: Optional[torch.Tensor] = None) → torch.Tensor
        """
        self.conv1 = GCNConv(1, hidden_channels) # 1 : feature 갯수
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, 3)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)
        x = x.relu()

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

In [15]:
model = GCN()
for n, m in model.named_children():
    print(n)

conv1
conv2
conv3
lin


In [16]:
# check your device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Current Device : {device}")

model = GCN()
model.to(device)

# you can change the optimizer and learning rate to anything
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()


def train():
    model.train()
    for data in train_loader:  # Iterate in batches over the training dataset.
        data.to(device)
        
        out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.


def test(loader):
    model.eval()

    correct = 0
    for i, data in enumerate(loader):  # Iterate in batches over the training/test dataset.
        data.to(device)
        out = model(data.x, data.edge_index, data.batch)  
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        correct += (pred == data.y).sum() # Check against ground-truth labels.
    return correct / len(loader.dataset)  # Derive ratio of correct predictions.

Current Device : cuda:0


In [17]:
for epoch in range(1, 50):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f} ')


Epoch: 001, Train Acc: 0.5426, Test Acc: 0.5568 
Epoch: 002, Train Acc: 0.6131, Test Acc: 0.6148 
Epoch: 003, Train Acc: 0.6088, Test Acc: 0.6057 
Epoch: 004, Train Acc: 0.6074, Test Acc: 0.6057 
Epoch: 005, Train Acc: 0.6102, Test Acc: 0.6091 
Epoch: 006, Train Acc: 0.6153, Test Acc: 0.6136 
Epoch: 007, Train Acc: 0.6102, Test Acc: 0.6114 
Epoch: 008, Train Acc: 0.6071, Test Acc: 0.6000 
Epoch: 009, Train Acc: 0.6114, Test Acc: 0.6102 
Epoch: 010, Train Acc: 0.6148, Test Acc: 0.6068 
Epoch: 011, Train Acc: 0.6114, Test Acc: 0.6080 
Epoch: 012, Train Acc: 0.6142, Test Acc: 0.6080 
Epoch: 013, Train Acc: 0.6165, Test Acc: 0.6091 
Epoch: 014, Train Acc: 0.6099, Test Acc: 0.6080 
Epoch: 015, Train Acc: 0.6094, Test Acc: 0.6080 
Epoch: 016, Train Acc: 0.6077, Test Acc: 0.6080 
Epoch: 017, Train Acc: 0.6111, Test Acc: 0.6102 
Epoch: 018, Train Acc: 0.6139, Test Acc: 0.6034 
Epoch: 019, Train Acc: 0.6102, Test Acc: 0.6091 
Epoch: 020, Train Acc: 0.6099, Test Acc: 0.6057 
Epoch: 021, Train Ac