In [1]:
# Import torch & Check CUDA availability
import torch

print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())

True
1
0


In [2]:
# Get CUDA device name
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

<torch.cuda.device object at 0x7f5f0523a650>
NVIDIA A30


In [3]:
from torch_geometric.datasets import Reddit

# Import dataset from PyTorch Geometric
dataset = Reddit(root="/dfs6/pub/seminl1/Reddit")
data = dataset[0]

# Store the dataset to GPU
data = data.pin_memory()
data = data.to('cuda:0', non_blocking=True)

# Print information about the dataset
print(f'Dataset: {dataset}')
print('-------------------')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of nodes: {data.x.shape[0]}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

# Print information about the graph
print(f'\nGraph:')
print('------')
print(f'Edges are directed: {data.is_directed()}')
print(f'Graph has isolated nodes: {data.has_isolated_nodes()}')
print(f'Graph has loops: {data.has_self_loops()}')

Dataset: Reddit()
-------------------
Number of graphs: 1
Number of nodes: 232965
Number of features: 602
Number of classes: 41

Graph:
------
Edges are directed: False
Graph has isolated nodes: False
Graph has loops: False


In [4]:
# Check whether the dataset is stored on the GPU or not
print(f'Graph is stored on the GPU: {data.is_cuda}')

Graph is stored on the GPU: True


In [5]:
# Print first element
print(f'Graph: {data}')

Graph: Data(x=[232965, 602], edge_index=[2, 114615892], y=[232965], train_mask=[232965], val_mask=[232965], test_mask=[232965])


In [7]:
# Node feature matrix information
print(f'x = {data.x.shape}')
print(data.x.dtype)
print(data.x)

x = torch.Size([232965, 602])
torch.float32
tensor([[ 1.2334,  9.0430, -0.9233,  ..., -0.2579,  0.3112, -0.3772],
        [-0.1386, -0.2022,  0.1277,  ...,  0.1563,  0.1048, -0.6534],
        [-0.1330, -0.1962, -0.0296,  ...,  0.0358,  0.2864,  0.2744],
        ...,
        [-0.0614, -0.2022,  0.9698,  ...,  1.1064, -1.4323, -0.2398],
        [-0.1606, -0.2022, -0.0892,  ...,  0.7440, -0.5046, -2.2288],
        [ 0.0929,  0.2822,  0.1768,  ...,  0.2196,  0.5967,  0.5588]],
       device='cuda:0')


In [8]:
# Edge index information
print(f'edge_index = {data.edge_index.shape}')
print(data.edge_index.dtype)
print(data.edge_index)

edge_index = torch.Size([2, 114615892])
torch.int64
tensor([[     0,      0,      0,  ..., 232964, 232964, 232964],
        [   242,    249,    524,  ..., 231806, 232594, 232634]],
       device='cuda:0')


In [13]:
# Try to get the number of addition operation
start_point = 0
numAddition = 0
numEdges = 114615892
for i in range(numEdges-1):
    if data.edge_index[0][i+1] == start_point:
        numAddition += 1
    else:
        start_point = data.edge_index[0][i+1]
        if data.edge_index[0][i] % 1000 == 0:
            print(data.edge_index[0][i])
print(numAddition)

tensor(0, device='cuda:0')
tensor(1000, device='cuda:0')
tensor(2000, device='cuda:0')
tensor(3000, device='cuda:0')
tensor(4000, device='cuda:0')
tensor(5000, device='cuda:0')
tensor(6000, device='cuda:0')
tensor(7000, device='cuda:0')
tensor(8000, device='cuda:0')
tensor(9000, device='cuda:0')
tensor(10000, device='cuda:0')
tensor(11000, device='cuda:0')
tensor(12000, device='cuda:0')
tensor(13000, device='cuda:0')
tensor(14000, device='cuda:0')
tensor(15000, device='cuda:0')
tensor(16000, device='cuda:0')
tensor(17000, device='cuda:0')
tensor(18000, device='cuda:0')
tensor(19000, device='cuda:0')
tensor(20000, device='cuda:0')
tensor(21000, device='cuda:0')
tensor(22000, device='cuda:0')
tensor(23000, device='cuda:0')
tensor(24000, device='cuda:0')
tensor(25000, device='cuda:0')
tensor(26000, device='cuda:0')
tensor(27000, device='cuda:0')
tensor(28000, device='cuda:0')
tensor(29000, device='cuda:0')
tensor(30000, device='cuda:0')
tensor(31000, device='cuda:0')
tensor(32000, device=

In [9]:
# Ground-truth labels
print(f'y = {data.y.shape}')
print(data.y.dtype)
print(data.y)

y = torch.Size([232965])
torch.int64
tensor([30, 17, 18,  ...,  3, 13, 13], device='cuda:0')


In [10]:
# Train mask
print(f'train_mask = {data.train_mask.shape}')
print(data.train_mask.dtype)
print(data.train_mask)

train_mask = torch.Size([232965])
torch.bool
tensor([False,  True, False,  ...,  True,  True, False], device='cuda:0')


In [10]:
# Create a simple GCN with only one GCN layer
import torch.nn.functional as F

from torch.nn import Linear
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.gcn1 = GCNConv(dataset.num_features, dataset.num_classes)
        self.optimizer = torch.optim.Adam(self.parameters(),
                                          lr=0.01,
                                          weight_decay=5e-4)

    def forward(self, x, adj_t):
        x = self.gcn1(x, adj_t)
        z = F.log_softmax(x, dim=1)
        return x, z

In [11]:
def accuracy(pred_y, y):
    """Calculate accuracy."""
    return ((pred_y == y).sum() / len(y)).item()

def train(model, data):
    """Train a GNN model and return the trained model."""
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = model.optimizer
    epochs = 200

    model.train()
    for epoch in range(epochs+1):
        # Training
        optimizer.zero_grad()
        h, out = model(data.x, data.edge_index)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        acc = accuracy(out[data.train_mask].argmax(dim=1), data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        # Print metrics every 10 epochs
        if(epoch % 10 == 0):
            print(f'Epoch {epoch:>3} | Train Loss: {loss:.3f} | Train Acc: {acc*100:>6.2f}%')
          
    return model, h, out

In [12]:
%%time
# Create GCN model
gcn = GCN()
print(gcn)

# Train and test
# Train
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
gcn_model, gcn_output, final_output = train(gcn.to(device), data.to(device))

GCN(
  (gcn1): GCNConv(602, 41)
)


OutOfMemoryError: CUDA out of memory. Tried to allocate 17.54 GiB. GPU 0 has a total capacty of 23.50 GiB of which 17.43 GiB is free. Including non-PyTorch memory, this process has 6.05 GiB memory in use. Of the allocated memory 4.42 GiB is allocated by PyTorch, and 1.37 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF