# GraphSage with Cora dataset

## Import necessary libraries

In [23]:
import copy
import os.path as osp
import time

import torch
import torch.nn.functional as F
from tqdm import tqdm


from torch_geometric.datasets import Planetoid, Reddit
from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import SAGEConv

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')


Using device: cpu


## Store the data

In [24]:
path = osp.join('data', 'cora')
#dataset = Reddit(path)
dataset = Planetoid(path, name = 'cora')

# Already send node features/labels to GPU for faster access during sampling
data = dataset[0].to(device, 'x', 'y')

# Define parameters for the NeighborLoader
kwargs = {'batch_size': 1024, 'num_workers': 6, 'persistent_workers': True}
train_loader = NeighborLoader(data, input_nodes=data.train_mask,
                              num_neighbors=[25, 10], shuffle=True, **kwargs)

subgraph_loader = NeighborLoader(copy.copy(data), input_nodes=None,
                                 num_neighbors=[-1], shuffle=False, **kwargs)

# No need to maintain these features during evaluation:
del subgraph_loader.data.x, subgraph_loader.data.y
# Add global node index information.
subgraph_loader.data.num_nodes = data.num_nodes
subgraph_loader.data.n_id = torch.arange(data.num_nodes)



## Graph statistics 

In [18]:
# Check some graph statistics of Cora graph
print("Number of nodes in the graph:", data.num_nodes)
print("Number of edges in the graph:", data.num_edges)
print("Node feature matrix with shape:", data.x.shape) # [num_nodes, num_node_features]
print("Graph connectivity in COO format with shape:", data.edge_index.shape) # [2, num_edges]
print("Target to train against :", data.y.shape) 
print("Node feature length", dataset.num_features)

Number of nodes in the graph: 2708
Number of edges in the graph: 10556
Node feature matrix with shape: torch.Size([2708, 1433])
Graph connectivity in COO format with shape: torch.Size([2, 10556])
Target to train against : torch.Size([2708])
Node feature length 1433


## Check the number of unique labels

In [26]:
data.y.unique()

tensor([0, 1, 2, 3, 4, 5, 6])

In [27]:
class SAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))

    def forward(self, x, edge_index):
        for i, conv in enumerate(self.convs):
            x = conv(x, edge_index)
            if i < len(self.convs) - 1:
                x = x.relu_()
                x = F.dropout(x, p=0.5, training=self.training)
        return x


    #Compute representations of nodes layer by layer
    #This leads to faster computation in contrast to immediately computing the final representations of each batch
    @torch.no_grad()
    def inference(self, x_all, subgraph_loader):
        # Initialize a progress bar
        pbar = tqdm(total=len(subgraph_loader.dataset) * len(self.convs))
        pbar.set_description('Evaluating')

        # Forward pass through each SAGEConv layer
        for i, conv in enumerate(self.convs):
            xs = []
            for batch in subgraph_loader:
                # Get the node features for the current batch
                x = x_all[batch.n_id.to(x_all.device)].to(device)
                # Apply the current SAGEConv layer
                x = conv(x, batch.edge_index.to(device))
                if i < len(self.convs) - 1:
                    x = x.relu_()
                xs.append(x[:batch.batch_size].cpu())
                pbar.update(batch.batch_size)
            x_all = torch.cat(xs, dim=0)
        pbar.close()
        return x_all

## Construct SAGE model with (in_channels, hidden_channels, out_channels)

In [28]:
model = SAGE(dataset.num_features, 256, dataset.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [29]:
def train(epoch):
    model.train()

    pbar = tqdm(total=int(len(train_loader.dataset)))
    pbar.set_description(f'Epoch {epoch:02d}')

    total_loss = total_correct = total_examples = 0
    for batch in train_loader:
        optimizer.zero_grad()
        y = batch.y[:batch.batch_size]
        y_hat = model(batch.x, batch.edge_index.to(device))[:batch.batch_size]
        loss = F.cross_entropy(y_hat, y)
        loss.backward()
        optimizer.step()

        total_loss += float(loss) * batch.batch_size
        total_correct += int((y_hat.argmax(dim=-1) == y).sum())
        total_examples += batch.batch_size
        pbar.update(batch.batch_size)
    pbar.close()

    return total_loss / total_examples, total_correct / total_examples


In [30]:
@torch.no_grad()
def test():
    model.eval()
    y_hat = model.inference(data.x, subgraph_loader).argmax(dim=-1)
    y = data.y.to(y_hat.device)

    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        accs.append(int((y_hat[mask] == y[mask]).sum()) / int(mask.sum()))
    return accs

In [31]:
times = []
for epoch in range(1, 5):
    start = time.time()
    loss, acc = train(epoch)
    print(f'Epoch {epoch:02d}, Loss: {loss:.4f}, Approx. Train: {acc:.4f}')
    train_acc, val_acc, test_acc = test()
    print(f'Epoch: {epoch:02d}, Train: {train_acc:.4f}, Val: {val_acc:.4f}, '
          f'Test: {test_acc:.4f}')
    times.append(time.time() - start)
print(f"Median time per epoch: {torch.tensor(times).median():.4f}s")

Epoch 01: 100%|██████████| 140/140 [00:00<00:00, 1745.62it/s]


Epoch 01, Loss: 1.9509, Approx. Train: 0.1929


Evaluating: 100%|██████████| 5416/5416 [00:00<00:00, 54798.67it/s]


Epoch: 01, Train: 1.0000, Val: 0.7440, Test: 0.7580


Epoch 02: 100%|██████████| 140/140 [00:00<00:00, 6126.71it/s]


Epoch 02, Loss: 1.3975, Approx. Train: 0.9929


Evaluating: 100%|██████████| 5416/5416 [00:00<00:00, 176343.17it/s]


Epoch: 02, Train: 0.9929, Val: 0.7680, Test: 0.7650


Epoch 03: 100%|██████████| 140/140 [00:00<00:00, 8311.78it/s]


Epoch 03, Loss: 0.6457, Approx. Train: 1.0000


Evaluating: 100%|██████████| 5416/5416 [00:00<00:00, 158475.48it/s]


Epoch: 03, Train: 1.0000, Val: 0.7640, Test: 0.7790


Epoch 04: 100%|██████████| 140/140 [00:00<00:00, 9729.47it/s]


Epoch 04, Loss: 0.1990, Approx. Train: 1.0000


Evaluating: 100%|██████████| 5416/5416 [00:00<00:00, 181090.47it/s]

Epoch: 04, Train: 1.0000, Val: 0.7660, Test: 0.7900
Median time per epoch: 0.0537s





## Save the model checkpoint

In [10]:
fp = 'Graphsage_model.pt'
torch.save(model, './model.pt')
torch.save(model, fp)

# GraphSAINT model

In [1]:
import argparse
import os.path as osp

import torch
import torch.nn.functional as F

from torch_geometric.datasets import Flickr
from torch_geometric.loader import GraphSAINTRandomWalkSampler
from torch_geometric.nn import GraphConv
from torch_geometric.typing import WITH_TORCH_SPARSE
from torch_geometric.utils import degree


In [2]:
from torch_geometric.datasets import Planetoid, Reddit, Flickr
if not WITH_TORCH_SPARSE:
    quit("This example requires 'torch-sparse'")
    
path = osp.join('data', 'cora')
dataset = Planetoid(path, name=  'cora')
data = dataset[0]
row, col = data.edge_index
data.edge_weight = 1. / degree(col, data.num_nodes)[col]  # Norm by in-degree.

In [3]:
# Check some graph statistics of Cora graph
print("Number of nodes in the graph:", data.num_nodes)
print("Number of edges in the graph:", data.num_edges)
print("Node feature matrix with shape:", data.x.shape) # [num_nodes, num_node_features]
print("Graph connectivity in COO format with shape:", data.edge_index.shape) # [2, num_edges]
print("Target to train against :", data.y.shape) 
print("Node feature length", dataset.num_features)

Number of nodes in the graph: 2708
Number of edges in the graph: 10556
Node feature matrix with shape: torch.Size([2708, 1433])
Graph connectivity in COO format with shape: torch.Size([2, 10556])
Target to train against : torch.Size([2708])
Node feature length 1433


In [5]:
import argparse

# Define the argument parser
parser = argparse.ArgumentParser()
parser.add_argument('--use_normalization', required=False, action='store_true')
parser.add_argument("-f", required=False)  # This is to handle the Jupyter Notebook's additional argument

# Parse known arguments to avoid errors in Jupyter Notebook
args, unknown = parser.parse_known_args()

# Define the model
loader = GraphSAINTRandomWalkSampler(data, batch_size=16, walk_length=2,
                                     num_steps=5, sample_coverage=10,
                                     save_dir=dataset.processed_dir)


Compute GraphSAINT normalization:   0%|          | 0/27080 [00:00<?, ?it/s]

Compute GraphSAINT normalization: : 27094it [00:00, 527343.77it/s]         


In [6]:
class Net(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        # Get the number of input features and output classes from the dataset
        in_channels = dataset.num_node_features
        out_channels = dataset.num_classes

        self.conv1 = GraphConv(in_channels, hidden_channels)
        self.conv2 = GraphConv(hidden_channels, hidden_channels)
        self.conv3 = GraphConv(hidden_channels, hidden_channels)
        # Define a linear layer to combine the outputs of the three convolution layers
        self.lin = torch.nn.Linear(3 * hidden_channels, out_channels)

    def set_aggr(self, aggr):
        # Set the aggregation method for each convolution layer
        self.conv1.aggr = aggr
        self.conv2.aggr = aggr
        self.conv3.aggr = aggr

    def forward(self, x0, edge_index, edge_weight=None):
        # Apply the first GraphConv layer
        x1 = F.relu(self.conv1(x0, edge_index, edge_weight))
        x1 = F.dropout(x1, p=0.2, training=self.training)

        x2 = F.relu(self.conv2(x1, edge_index, edge_weight))
        x2 = F.dropout(x2, p=0.2, training=self.training)
        
        x3 = F.relu(self.conv3(x2, edge_index, edge_weight))
        x3 = F.dropout(x3, p=0.2, training=self.training)
        # Concatenate the outputs of the three convolution layers
        x = torch.cat([x1, x2, x3], dim=-1)
        x = self.lin(x)
        # Apply log softmax to the output
        return x.log_softmax(dim=-1)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(hidden_channels=256).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [8]:
def train():
    # Set the model to training mode
    model.train()

    # Set the aggregation method based on the use_normalization flag
    model.set_aggr('add' if args.use_normalization else 'mean')

    total_loss = total_examples = 0
    # Iterate over the data loader
    for data in loader:
        data = data.to(device)
        # Zero the gradients
        optimizer.zero_grad()

        if args.use_normalization:
            # Compute edge weights if normalization is used
            edge_weight = data.edge_norm * data.edge_weight
            # Forward pass through the model with edge weights
            out = model(data.x, data.edge_index, edge_weight)
            # Compute the loss with no reduction
            loss = F.nll_loss(out, data.y, reduction='none')
            # Apply node normalization and sum the loss for the training mask
            loss = (loss * data.node_norm)[data.train_mask].sum()
        else:
            # Forward pass through the model without edge weights
            out = model(data.x, data.edge_index)
            # Compute the loss for the training mask
            loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])

        # Backward pass to compute the gradients
        loss.backward()
        # Update the model parameters
        optimizer.step()
        # Accumulate the total loss and the number of examples
        total_loss += loss.item() * data.num_nodes
        total_examples += data.num_nodes
        
    return total_loss / total_examples


In [9]:
@torch.no_grad()
def test():
    # Set the model to evaluation mode
    model.eval()
    # Set the aggregation method to 'mean' for evaluation
    model.set_aggr('mean')
    # Perform a forward pass through the model
    out = model(data.x.to(device), data.edge_index.to(device))
    # Get the predicted class by taking the argmax of the output
    pred = out.argmax(dim=-1)
    # Check which predictions are correct
    correct = pred.eq(data.y.to(device))

    accs = []
    # Calculate accuracy for train, validation, and test masks
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        accs.append(correct[mask].sum().item() / mask.sum().item())
        
    return accs

In [10]:
for epoch in range(1, 5):
    loss = train()
    accs = test()
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {accs[0]:.4f}, '
          f'Val: {accs[1]:.4f}, Test: {accs[2]:.4f}')

Epoch: 01, Loss: 1.9535, Train: 0.1643, Val: 0.0880, Test: 0.1010
Epoch: 02, Loss: 1.8895, Train: 0.1500, Val: 0.0720, Test: 0.0910
Epoch: 03, Loss: 1.9626, Train: 0.1571, Val: 0.0800, Test: 0.0940
Epoch: 04, Loss: 1.8472, Train: 0.2786, Val: 0.1380, Test: 0.1460


# Cluster_GCN with Reddit graph

## Import libraries

In [11]:
import time

import torch
import torch.nn.functional as F
from torch.nn import ModuleList
from tqdm import tqdm

from torch_geometric.datasets import Planetoid, Reddit
from torch_geometric.loader import ClusterData, ClusterLoader, NeighborLoader
from torch_geometric.nn import SAGEConv

## Import graph datasets: cora/citeseer/pubmed/ Reddit

In [12]:
path = osp.join('data', 'cora')
dataset = Planetoid(path,name = 'cora')
data = dataset[0]

cluster_data = ClusterData(data, num_parts=10, recursive=False, save_dir=dataset.processed_dir)
train_loader = ClusterLoader(cluster_data, batch_size=20, shuffle=True,
                             num_workers=12)

subgraph_loader = NeighborLoader(data, num_neighbors=[-1], batch_size=1024,
                                 shuffle=False, num_workers=12)


Computing METIS partitioning...
Done!


## Construct the model

In [13]:
class Net(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        # Initialize a list of SAGEConv layers
        self.convs = ModuleList(
            [SAGEConv(in_channels, 128), # First SAGEConv layer
             SAGEConv(128, out_channels)]) # Second SAGEConv layer

    def forward(self, x, edge_index):
        # Forward pass through each SAGEConv layer
        for i, conv in enumerate(self.convs):
            x = conv(x, edge_index)
            if i != len(self.convs) - 1:
                x = F.relu(x)
                x = F.dropout(x, p=0.5, training=self.training)
        return F.log_softmax(x, dim=-1)

    def inference(self, x_all):
        # Initialize a progress bar
        pbar = tqdm(total=x_all.size(0) * len(self.convs))
        pbar.set_description('Evaluating')

        # Compute representations of nodes layer by layer, using *all*
        # available edges. This leads to faster computation in contrast to
        # immediately computing the final representations of each batch.
        for i, conv in enumerate(self.convs):
            xs = []
            for batch in subgraph_loader:
                edge_index = batch.edge_index.to(device)
                x = x_all[batch.n_id].to(device)
                # Get the target node features for the current batch
                x_target = x[:batch.batch_size]
                # Apply the current SAGEConv layer
                x = conv((x, x_target), edge_index)
                if i != len(self.convs) - 1:
                    # Apply ReLU activation after each layer except the last one
                    x = F.relu(x)
                xs.append(x.cpu())

                pbar.update(batch.batch_size)

            x_all = torch.cat(xs, dim=0)

        pbar.close()

        return x_all


## Initialize the model

In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(dataset.num_features, dataset.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

## Define the train function

In [15]:
def train():
    model.train()

    total_loss = total_nodes = 0
    for batch in train_loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch.x, batch.edge_index)
        loss = F.nll_loss(out[batch.train_mask], batch.y[batch.train_mask])
        loss.backward()
        optimizer.step()

        nodes = batch.train_mask.sum().item()
        total_loss += loss.item() * nodes
        total_nodes += nodes

    return total_loss / total_nodes


## Define the test function

In [16]:

@torch.no_grad()
def test():  # Inference should be performed on the full graph.
    model.eval()

    out = model.inference(data.x)
    y_pred = out.argmax(dim=-1)

    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        correct = y_pred[mask].eq(data.y[mask]).sum().item()
        accs.append(correct / mask.sum().item())
    return accs

## Train the model

In [17]:
times = []
for epoch in range(1, 5):
    start = time.time()
    loss = train()
    if epoch % 5 == 0:
        train_acc, val_acc, test_acc = test()
        print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {train_acc:.4f}, '
              f'Val: {val_acc:.4f}, test: {test_acc:.4f}')
    else:
        print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}')
    times.append(time.time() - start)
print(f"Median time per epoch: {torch.tensor(times).median():.4f}s")

Epoch: 01, Loss: 1.9491
Epoch: 02, Loss: 1.7128
Epoch: 03, Loss: 1.4123
Epoch: 04, Loss: 1.0836
Median time per epoch: 0.1457s
