<a href="https://colab.research.google.com/github/QingfangLiu/DS_learning/blob/main/DDI_GNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# check pytorch version and its cuda support
import torch
print(torch.__version__)

2.6.0+cu124


In [2]:
%%bash
TORCH_VER=2.6.0
CUDA_VER=cu124

# Install dependencies
pip install torch-scatter -f https://data.pyg.org/whl/torch-${TORCH_VER}+${CUDA_VER}.html
pip install torch-sparse -f https://data.pyg.org/whl/torch-${TORCH_VER}+${CUDA_VER}.html
pip install torch-geometric
pip install ogb

Looking in links: https://data.pyg.org/whl/torch-2.6.0+cu124.html
Looking in links: https://data.pyg.org/whl/torch-2.6.0+cu124.html


In [3]:
# imports
import torch
import torch_geometric
print(torch.__version__)
print(torch_geometric.__version__)

2.6.0+cu124
2.6.1


In [4]:
# imports
import torch.nn as nn
import torch.nn.functional as F

from torch_geometric.loader import DataLoader
from torch_geometric.nn import SAGEConv
from torch_geometric.utils import negative_sampling
from tqdm import trange

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [7]:
# load ogbl-ddi dataset
from ogb.linkproppred import PygLinkPropPredDataset
import torch
import functools

# Temporarily patch torch.load to set weights_only=False
original_torch_load = torch.load
def patched_torch_load(*args, **kwargs):
    kwargs['weights_only'] = False
    return original_torch_load(*args, **kwargs)

torch.load = patched_torch_load

try:
    dataset = PygLinkPropPredDataset(name='ogbl-ddi')
finally:
    # Restore the original torch.load function
    torch.load = original_torch_load

In [8]:
print(dataset)

PygLinkPropPredDataset()


In [9]:
print(len(dataset))

1


In [11]:
# This graph only contains training edges but no node features
ddi_graph = dataset[0]

print(f'DDI graph object: {ddi_graph}')
print(f'Number of nodes |V|: {ddi_graph.num_nodes}')
print(f'Number of (training) edges |E|: {ddi_graph.num_edges}')
print(f'Is undirected: {ddi_graph.is_undirected()}')
# Note that since the graph is undirected, PyG includes both (u, v) and (v, u) as edges
print(f'Average node degree: {ddi_graph.num_edges / ddi_graph.num_nodes:.2f}')
print(f'Number of node features: {ddi_graph.num_node_features}')
print(f'Has isolated nodes: {ddi_graph.has_isolated_nodes()}')
print(f'Has self-loops: {ddi_graph.has_self_loops()}')

DDI graph object: Data(num_nodes=4267, edge_index=[2, 2135822])
Number of nodes |V|: 4267
Number of (training) edges |E|: 2135822
Is undirected: True
Average node degree: 500.54
Number of node features: 0
Has isolated nodes: False
Has self-loops: False


In [16]:
# dataset split using protein-target split
# Temporarily patch torch.load to set weights_only=False for get_edge_split as well
original_torch_load = torch.load
def patched_torch_load(*args, **kwargs):
    kwargs['weights_only'] = False
    return original_torch_load(*args, **kwargs)

torch.load = patched_torch_load

try:
    split_edge = dataset.get_edge_split()
finally:
    # Restore the original torch.load function
    torch.load = original_torch_load

train_edges = split_edge['train']
valid_edges = split_edge['valid']
test_edges = split_edge['test']

In [17]:
print(f'Number of training positive edges: {train_edges["edge"].shape[0]}')
print(f'Number of validation positive edges: {valid_edges["edge"].shape[0]}')
print(f'Number of validation negative edges: {valid_edges["edge_neg"].shape[0]}')
print(f'Number of test positive edges: {test_edges["edge"].shape[0]}')
print(f'Number of test negative edges: {valid_edges["edge_neg"].shape[0]}')

Number of training positive edges: 1067911
Number of validation positive edges: 133489
Number of validation negative edges: 101882
Number of test positive edges: 133489
Number of test negative edges: 101882


In [None]:
# build graphsage class
class GraphSAGE(torch.nn.Module):
    def __init__(self, conv, in_channels, hidden_channels, out_channels, num_layers, dropout):
        super().__init__()
        self.convs = torch.nn.ModuleList() # initialize an ordered list of GNN layers
        assert (num_layers >= 2), "Number of layers must be at least 2"
        self.convs.append(conv(in_channels, hidden_channels, normalize=True)) # L2 normalization of output embeddings
        for _ in range(num_layers - 2):
            self.convs.append(conv(hidden_channels, hidden_channels, normalize=True))
        self.convs.append(conv(hidden_channels, out_channels, normalize=True))

        self.num_layers = num_layers
        self.dropout = dropout
