In [1]:
# Import torch & Check CUDA availability
import torch

print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())

True
1
0


In [2]:
# Get CUDA device name
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

<torch.cuda.device object at 0x7fbc8ed2af10>
NVIDIA A100-SXM4-80GB


#### Import Twitter Dataset

In [3]:
# Import dataset from saved PyTorch dataset
data = torch.load("/mnt/ephemeral/gnn/dataset/Twitter/twitter_sparse.pt")

#### Graph Information

In [4]:
# Print first element
print(f'Graph: {data}')

Graph: Data(x=[41652230, 16], y=[41652230], adj_t=[41652230, 41652230, nnz=2405026390])


In [5]:
# Node feature matrix information
print(f'x = {data.x.shape}')
print(data.x)

x = torch.Size([41652230, 16])
tensor([[-0.2245,  1.3572, -0.0271,  ...,  2.3032, -1.8811, -2.3928],
        [-0.3052,  0.9633,  2.2129,  ...,  0.6190, -1.5212, -0.6997],
        [-2.2178, -1.8188,  1.8149,  ..., -1.1661,  2.0380,  2.2890],
        ...,
        [-0.2966, -0.4907, -1.2873,  ...,  0.6229,  1.6473, -0.0802],
        [ 2.0246, -1.1070, -2.4220,  ...,  0.2497,  1.7722, -0.9513],
        [ 0.9942, -1.6089,  0.6479,  ..., -0.5970,  1.7722, -0.1598]])


In [6]:
# Adjacency matrix for the edges
print(data.adj_t)

SparseTensor(row=tensor([       0,        0,        0,  ..., 41652228, 41652228, 41652229]),
             col=tensor([       1,        2,        3,  ..., 22209873, 41478284, 41650839]),
             size=(41652230, 41652230), nnz=2405026390, density=0.00%)


In [7]:
# Ground-truth labels
print(f'y = {data.y.shape}')
print(data.y)

y = torch.Size([41652230])
tensor([ 4, 10,  8,  ..., 14, 11,  2])


#### Sigle-layer GCN

In [8]:
# Create a simple GCN with only one GCN layer
import torch.nn.functional as F

from torch.nn import Linear
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.gcn1 = GCNConv(16, 16)
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.02)

    def forward(self, x, adj_t):
        x = self.gcn1(x, adj_t)
        z = F.log_softmax(x, dim=1)
        return x, z

#### Do use mini-batch

In [9]:
def accuracy(pred_y, y):
    """Calculate accuracy."""
    return ((pred_y == y).sum() / len(y)).item()

def train(model, train_loader):
    """Train a GNN model and return the trained model."""
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = model.optimizer
    epochs = 5

    model.train()
    for epoch in range(epochs):
        # Training on batches
        for batch in train_loader:
            batch = batch.to('cuda:0', non_blocking=True)
            optimizer.zero_grad()
            h, out = model(batch.x, batch.adj_t)
            loss = criterion(out, batch.y)
            loss.backward()
            optimizer.step()

    return model, h, out

#### batch_size = 131072

##### num_workers = 32

In [10]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=131072,
    pin_memory=True,
    num_workers=32,
)

In [11]:
print(len(train_loader))

318


In [12]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (5 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (gcn1): GCNConv(16, 16)
)

Elapsed Time (5 Epochs): 554.0041875000001 seconds


##### num_workers = 16

In [13]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=131072,
    pin_memory=True,
    num_workers=16,
)

In [14]:
print(len(train_loader))

318


In [15]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (5 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (gcn1): GCNConv(16, 16)
)

Elapsed Time (5 Epochs): 515.42 seconds


##### num_workers = 8

In [16]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=131072,
    pin_memory=True,
    num_workers=8,
)

In [17]:
print(len(train_loader))

318


In [18]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (100 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (gcn1): GCNConv(16, 16)
)

Elapsed Time (100 Epochs): 510.18850000000003 seconds


##### num_workers = 4

In [19]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=131072,
    pin_memory=True,
    num_workers=4,
)

In [20]:
print(len(train_loader))

318


In [21]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (100 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (gcn1): GCNConv(16, 16)
)

Elapsed Time (100 Epochs): 644.4569375 seconds


##### num_workers = 2

In [22]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=131072,
    pin_memory=True,
    num_workers=2,
)

In [23]:
print(len(train_loader))

318


In [24]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (100 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (gcn1): GCNConv(16, 16)
)

Elapsed Time (100 Epochs): 1164.026 seconds


##### num_workers = 0

In [25]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=131072,
    pin_memory=True,
)

In [26]:
print(len(train_loader))

318


In [27]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (100 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (gcn1): GCNConv(16, 16)
)

Elapsed Time (100 Epochs): 1747.88375 seconds


#### batch_size = 262144

##### num_workers = 32

In [28]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=262144,
    pin_memory=True,
    num_workers=32,
)

In [29]:
print(len(train_loader))

159


In [30]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (5 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (gcn1): GCNConv(16, 16)
)

Elapsed Time (5 Epochs): 504.7018125 seconds


##### num_workers = 16

In [37]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=262144,
    pin_memory=True,
    num_workers=16,
)

In [38]:
print(len(train_loader))

159


In [39]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (5 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (gcn1): GCNConv(16, 16)
)

Elapsed Time (5 Epochs): 487.49878125000004 seconds


#### batch_size = 524288

##### num_workers = 32

In [31]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=524288,
    pin_memory=True,
    num_workers=32,
)

In [32]:
print(len(train_loader))

80


In [33]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (5 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (gcn1): GCNConv(16, 16)
)

Elapsed Time (5 Epochs): 502.61262500000004 seconds


##### num_workers = 16

In [40]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=524288,
    pin_memory=True,
    num_workers=16,
)

In [41]:
print(len(train_loader))

80


In [42]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (5 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (gcn1): GCNConv(16, 16)
)

Elapsed Time (5 Epochs): 476.39862500000004 seconds


#### batch_size = 1048576

##### num_workers = 32

In [34]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=1048576,
    pin_memory=True,
    num_workers=32,
)

In [35]:
print(len(train_loader))

40


In [36]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (5 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (gcn1): GCNConv(16, 16)
)

Elapsed Time (5 Epochs): 541.912 seconds


##### num_workers = 16

In [43]:
from torch_geometric.loader import NeighborLoader

# NeighborLoader
train_loader = NeighborLoader(
    data,
    num_neighbors=[-1],
    batch_size=1048576,
    pin_memory=True,
    num_workers=16,
)

In [44]:
print(len(train_loader))

40


In [45]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# Create GCN model
gcn = GCN()
print(gcn)
print()

# Train
start.record()
gcn_model, gcn_output, final_output = train(gcn.to('cuda:0'), train_loader)
end.record()
torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end)
print('Elapsed Time (5 Epochs):', elapsed_time*0.001, 'seconds')

GCN(
  (gcn1): GCNConv(16, 16)
)

Elapsed Time (5 Epochs): 487.27059375 seconds
