## data

In [1]:
from torch_geometric.datasets import OGB_MAG


dataset = OGB_MAG(root='./data', preprocess='metapath2vec')
data = dataset[0]

In [2]:
data

HeteroData(
  paper={
    x=[736389, 128],
    year=[736389],
    y=[736389],
    train_mask=[736389],
    val_mask=[736389],
    test_mask=[736389],
  },
  author={ x=[1134649, 128] },
  institution={ x=[8740, 128] },
  field_of_study={ x=[59965, 128] },
  (author, affiliated_with, institution)={ edge_index=[2, 1043998] },
  (author, writes, paper)={ edge_index=[2, 7145660] },
  (paper, cites, paper)={ edge_index=[2, 5416271] },
  (paper, has_topic, field_of_study)={ edge_index=[2, 7505078] }
)

In [5]:
data.edge_items()

[(('author', 'affiliated_with', 'institution'),
  {'edge_index': tensor([[      0,       1,       2,  ..., 1134645, 1134647, 1134648],
          [    845,     996,    3197,  ...,    5189,    4668,    4668]])}),
 (('author', 'writes', 'paper'),
  {'edge_index': tensor([[      0,       0,       0,  ..., 1134647, 1134648, 1134648],
          [  19703,  289285,  311768,  ...,  657395,  671118,  719594]])}),
 (('paper', 'cites', 'paper'),
  {'edge_index': tensor([[     0,      0,      0,  ..., 736388, 736388, 736388],
          [    88,  27449, 121051,  ..., 421711, 427339, 439864]])}),
 (('paper', 'has_topic', 'field_of_study'),
  {'edge_index': tensor([[     0,      0,      0,  ..., 736388, 736388, 736388],
          [   145,   2215,   3205,  ...,  21458,  22283,  31934]])})]

In [6]:
paper_node_data = data['paper']
cites_edge_data = data['paper', 'cites', 'paper']
cites_edge_data = data['paper', 'paper']
cites_edge_data = data['cites']

In [7]:
node_types, edge_types = data.metadata()
print(node_types)
print(edge_types)

['paper', 'author', 'institution', 'field_of_study']
[('author', 'affiliated_with', 'institution'), ('author', 'writes', 'paper'), ('paper', 'cites', 'paper'), ('paper', 'has_topic', 'field_of_study')]


In [8]:
print(data.has_isolated_nodes())
print(data.has_self_loops())
print(data.is_undirected())

False
False
False


## GNN

In [3]:
import torch
import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv, to_hetero
import torch.nn.functional as F
import time
import psutil

In [17]:
dataset = OGB_MAG(root='./data', preprocess='metapath2vec', transform=T.ToUndirected())
data = dataset[0]

In [33]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
data = data.to(device)

In [18]:
class GNN(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x


model = GNN(hidden_channels=64, out_channels=dataset.num_classes)
model = to_hetero(model, data.metadata(), aggr='sum')

In [23]:
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x_dict, data.edge_index_dict)
    mask = data['paper'].train_mask
    loss = F.cross_entropy(out['paper'][mask], data['paper'].y[mask])
    loss.backward()
    optimizer.step()
    return float(loss)

In [26]:
def test():
    model.eval()
    out = model(data.x_dict, data.edge_index_dict)
    mask = data['paper'].test_mask
    pred = out['paper'][mask].argmax(dim=1)
    correct = pred.eq(data['paper'].y[mask])
    acc = correct.sum() / mask.sum()
    return float(acc)

In [24]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [30]:
def get_memory_usage():
    if torch.cuda.is_available():
        gpu_mem_alloc = torch.cuda.memory_allocated() / (1024 ** 2)  # In MB
        gpu_mem_reserved = torch.cuda.memory_reserved() / (1024 ** 2)  # In MB
        return f'GPU Allocated: {gpu_mem_alloc:.2f} MB, GPU Reserved: {gpu_mem_reserved:.2f} MB'
    else:
        cpu_mem = psutil.Process().memory_info().rss / (1024 ** 2)  # In MB
        return f'CPU Memory: {cpu_mem:.2f} MB'

In [32]:
for epoch in range(50):
    start_time = time.time()  # Start time for epoch
    loss = train()
    test_acc = test()
    end_time = time.time()  # End time for epoch

    # Calculate time and memory usage
    epoch_time = end_time - start_time
    memory_usage = get_memory_usage()

    # Print epoch details
    print(f'Epoch {epoch}, Loss: {loss:.4f}, Test Accuracy: {test_acc:.4f}, Time: {epoch_time:.2f} sec, {memory_usage}')

Epoch 0, Loss: 2.9511, Test Accuracy: 0.3033, Time: 59.69 sec, CPU Memory: 265.44 MB
Epoch 1, Loss: 2.8744, Test Accuracy: 0.3075, Time: 48.99 sec, CPU Memory: 238.12 MB
Epoch 2, Loss: 2.8177, Test Accuracy: 0.3089, Time: 50.16 sec, CPU Memory: 262.48 MB
Epoch 3, Loss: 2.7696, Test Accuracy: 0.3116, Time: 48.58 sec, CPU Memory: 265.83 MB
Epoch 4, Loss: 2.7260, Test Accuracy: 0.3257, Time: 49.06 sec, CPU Memory: 262.64 MB
Epoch 5, Loss: 2.6832, Test Accuracy: 0.3313, Time: 48.56 sec, CPU Memory: 240.11 MB
Epoch 6, Loss: 2.6470, Test Accuracy: 0.3280, Time: 49.65 sec, CPU Memory: 242.88 MB
Epoch 7, Loss: 2.6156, Test Accuracy: 0.3237, Time: 49.95 sec, CPU Memory: 263.06 MB
Epoch 8, Loss: 2.5875, Test Accuracy: 0.3305, Time: 49.49 sec, CPU Memory: 263.09 MB
Epoch 9, Loss: 2.5594, Test Accuracy: 0.3374, Time: 48.45 sec, CPU Memory: 262.86 MB
Epoch 10, Loss: 2.5373, Test Accuracy: 0.3392, Time: 48.53 sec, CPU Memory: 267.41 MB


KeyboardInterrupt: 

## GAT

In [12]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import OGB_MAG
import time
from torch_geometric.nn import GATConv, Linear, to_hetero
from torch_geometric.transforms import ToUndirected
import pandas

In [13]:
dataset = OGB_MAG(root='./data', preprocess='metapath2vec', transform=T.ToUndirected())
data = dataset[0]

In [14]:
class GAT(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GATConv((-1, -1), hidden_channels, add_self_loops=False)
        self.lin1 = Linear(-1, hidden_channels)
        self.conv2 = GATConv((-1, -1), out_channels, add_self_loops=False)
        self.lin2 = Linear(-1, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index) + self.lin1(x)
        x = x.relu()
        x = self.conv2(x, edge_index) + self.lin2(x)
        return x

model = GAT(hidden_channels=64, out_channels=dataset.num_classes)
model = to_hetero(model, data.metadata(), aggr='sum')

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [16]:
# Training loop
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x_dict, data.edge_index_dict)
    mask = data['paper'].train_mask
    loss = F.cross_entropy(out['paper'][mask], data['paper'].y[mask])
    loss.backward()
    optimizer.step()
    return float(loss)

# Evaluation loop
def test():
    model.eval()
    out = model(data.x_dict, data.edge_index_dict)
    mask = data['paper'].test_mask
    pred = out['paper'][mask].argmax(dim=1)
    correct = pred.eq(data['paper'].y[mask])
    acc = correct.sum() / mask.sum()
    return float(acc)

In [None]:
for epoch in range(10):
    start_time = time.time() 
    loss = train()
    test_acc = test()
    end_time = time.time() 
    epoch_time = end_time - start_time
    print(f'Epoch {epoch}, Loss: {loss:.4f}, Test Accuracy: {test_acc:.4f}, Time: {epoch_time:.2f} seconds')
