In [1]:
# Import torch & Check CUDA availability
import torch

print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())

True
1
0


In [2]:
# Get CUDA device name
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

<torch.cuda.device object at 0x7efc70752610>
NVIDIA A100-SXM4-80GB


#### Import Graph500_Scale24_EdgeFactor64

In [3]:
# Import dataset from saved PyTorch dataset
data = torch.load("/mnt/ephemeral/gnn/dataset/Graph500/graph500_scale24_ef64.pt")

In [4]:
# Transform dense edge index to sparse adjacency matrix
import torch_geometric.transforms as T

data = T.ToSparseTensor()(data)
data = data.pin_memory()

#### Graph Information

In [5]:
# Print first element
print(f'Graph: {data}')

Graph: Data(x=[16777216, 64], y=[16777216], train_mask=[16777216], adj_t=[16777216, 16777216, nnz=1044447191])


In [6]:
# Node feature matrix information
print(f'x = {data.x.shape}')
print(data.x)

x = torch.Size([16777216, 64])
tensor([[ 0.9196,  1.7667, -0.4883,  ...,  0.1658,  0.1360, -2.3290],
        [-0.8109, -2.0677,  2.3122,  ...,  1.4025, -0.3883, -1.6758],
        [-1.8271,  1.6633,  0.3773,  ...,  2.1587, -1.9037,  0.0145],
        ...,
        [ 2.4858, -1.1205, -1.6414,  ...,  0.3470, -1.7955,  2.0294],
        [ 1.0389, -0.9825,  2.2535,  ...,  0.1453, -2.1524, -0.9722],
        [-2.1791,  1.0309, -1.9018,  ...,  2.2616, -0.4018, -1.8365]])


In [7]:
# Adjacency matrix for the edges
print(data.adj_t)

SparseTensor(row=tensor([       0,        0,        0,  ..., 16777215, 16777215, 16777215]),
             col=tensor([       0,        1,        2,  ..., 16147194, 16285027, 16745422]),
             size=(16777216, 16777216), nnz=1044447191, density=0.00%)


In [8]:
# Ground-truth labels
print(f'y = {data.y.shape}')
print(data.y)

y = torch.Size([16777216])
tensor([50, 44,  8,  ..., 32, 45, 51])


In [9]:
# Train mask
print(f'train_mask = {data.train_mask.shape}')
print(data.train_mask)

train_mask = torch.Size([16777216])
tensor([False, False,  True,  ..., False,  True, False])


#### Sigle-layer GraphSAGE

In [10]:
# Create a simple GraphSAGE with only one GraphSAGE layer
import torch.nn.functional as F

from torch.nn import Linear
from torch_geometric.nn import SAGEConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.sage1 = SAGEConv(64, 64)
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.02)

    def forward(self, x, adj_t):
        x = self.sage1(x, adj_t)
        z = F.log_softmax(x, dim=1)
        return x, z

#### Do use mini-batch

In [11]:
def accuracy(pred_y, y):
    """Calculate accuracy."""
    return ((pred_y == y).sum() / len(y)).item()

def train(model, train_loader):
    """Train a GNN model and return the trained model."""
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = model.optimizer
    epochs = 5

    model.train()
    for epoch in range(epochs):
        # Training on batches
        for batch in train_loader:
            batch = batch.to('cuda:0', non_blocking=True)
            optimizer.zero_grad()
            h, out = model(batch.x, batch.adj_t)
            loss = criterion(out, batch.y)
            loss.backward()
            optimizer.step()

    return model, h, out

#### batch_size = 131072

##### num_workers = 32

In [12]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=131072,
    pin_memory=True,
    num_workers=32,
)

In [13]:
print(len(train_loader))

128


In [14]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (100 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (sage1): SAGEConv(64, 64, aggr=mean)
)

Elapsed Time (100 Epochs): 211.42334375000002 seconds


##### num_workers = 16

In [15]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=131072,
    pin_memory=True,
    num_workers=16,
)

In [16]:
print(len(train_loader))

128


In [17]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (100 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (sage1): SAGEConv(64, 64, aggr=mean)
)

Elapsed Time (100 Epochs): 173.9125 seconds


##### num_workers = 8

In [18]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=131072,
    pin_memory=True,
    num_workers=8,
)

In [19]:
print(len(train_loader))

128


In [20]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (100 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (sage1): SAGEConv(64, 64, aggr=mean)
)

Elapsed Time (100 Epochs): 177.258671875 seconds


##### num_workers = 4

In [21]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=131072,
    pin_memory=True,
    num_workers=4,
)

In [22]:
print(len(train_loader))

128


In [23]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (100 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (sage1): SAGEConv(64, 64, aggr=mean)
)

Elapsed Time (100 Epochs): 261.037703125 seconds


##### num_workers = 2

In [12]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=131072,
    pin_memory=True,
    num_workers=2,
)

In [13]:
print(len(train_loader))

128


In [14]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (100 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (sage1): SAGEConv(64, 64, aggr=mean)
)

Elapsed Time (100 Epochs): 456.25625 seconds


##### num_workers = 0

In [15]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=131072,
    pin_memory=True,
)

In [16]:
print(len(train_loader))

128


In [17]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (100 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (sage1): SAGEConv(64, 64, aggr=mean)
)

Elapsed Time (100 Epochs): 703.126125 seconds
