### Assignment 1. Download "Citeseer" graph.
Questions: Construct GCN model by using dropping_path technique for node classification task

In [2]:
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.utils import subgraph

# Load Cora dataset
dataset = Planetoid(root='.', name='Citeseer', transform=T.NormalizeFeatures())
data = dataset[0]
edge_index  = data.edge_index

In [3]:
from typing import List, Optional, Tuple, Union
from torch import Tensor
from torch_geometric.utils.num_nodes import maybe_num_nodes

# Function to extract the k-hop subgraph around a given node or set of nodes
def k_hop_subgraph(
    node_idx: Union[int, List[int], Tensor],  # The target node(s)
    num_hops: int,  # The number of hops k
    edge_index: Tensor,  # The edge indices
    relabel_nodes: bool = False,  # Whether to relabel nodes to a contiguous range
    num_nodes: Optional[int] = None,  # The number of nodes in the graph
    flow: str = 'source_to_target',  # The flow direction ('source_to_target' or 'target_to_source')
    directed: bool = False,  # Whether the graph is directed
) -> Tuple[Tensor, Tensor, Tensor, Tensor]:  # Returns the subgraph, edge indices, inverse mapping, and edge mask

    # Determine the number of nodes if not provided
    num_nodes = maybe_num_nodes(edge_index, num_nodes)

    # Ensure the flow direction is valid
    assert flow in ['source_to_target', 'target_to_source']
    if flow == 'target_to_source':
        row, col = edge_index
    else:
        col, row = edge_index

    # Initialize masks for nodes and edges
    node_mask = row.new_empty(num_nodes, dtype=torch.bool)
    edge_mask = row.new_empty(row.size(0), dtype=torch.bool)

    # Convert node_idx to a tensor if it is not already
    if isinstance(node_idx, (int, list, tuple)):
        node_idx = torch.tensor([node_idx], device=row.device).flatten()
    else:
        node_idx = node_idx.to(row.device)

    # List to store the subsets of nodes at each hop
    subsets = [node_idx]

    # Perform k-hop expansion
    for _ in range(num_hops):
        node_mask.fill_(False)
        node_mask[subsets[-1]] = True
        torch.index_select(node_mask, 0, row, out=edge_mask)
        subsets.append(col[edge_mask])

    # Concatenate all subsets and get unique nodes
    subset, inv = torch.cat(subsets).unique(return_inverse=True)
    inv = inv[:node_idx.numel()]

    # Create a mask for the subset of nodes
    node_mask.fill_(False)
    node_mask[subset] = True

    # If the graph is undirected, update the edge mask
    if not directed:
        edge_mask = node_mask[row] & node_mask[col]

    # Filter the edge index to include only the edges in the subgraph
    edge_index = edge_index[:, edge_mask]

    # Relabel nodes to a contiguous range if specified
    if relabel_nodes:
        node_idx = row.new_full((num_nodes, ), -1)
        node_idx[subset] = torch.arange(subset.size(0), device=row.device)
        edge_index = node_idx[edge_index]

    # Return the subset of nodes, the filtered edge index, the inverse mapping, and the edge mask
    return subset, edge_index, inv, edge_mask

In [4]:
from torch_geometric.nn import GCNConv
import torch.nn.functional as F


class GCN_dropout_path(torch.nn.Module):
    """Graph Convolutional Network"""

    def __init__(self, dim_in, dim_h, dim_out):
        super().__init__()
        self.gcn1 = GCNConv(dim_in, dim_h)
        self.gcn2 = GCNConv(dim_h, dim_out)
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.01, weight_decay=5e-4)

    def forward(self, x, edge_index):
        h = F.dropout(x, p=0.5, training=self.training)
        h = self.gcn1(h, edge_index)
        h = torch.relu(h)
        h = F.dropout(h, p=0.5, training=self.training)
        h = self.gcn2(h, edge_index)
        return h, F.log_softmax(h, dim=1)

In [5]:
from torch_geometric.utils import cumsum, degree, sort_edge_index, subgraph
from torch_geometric import is_compiling
import torch_geometric.typing

def dropout_path(edge_index: Tensor, p: float = 0.2, walks_per_node: int = 1,
                 walk_length: int = 3, num_nodes: Optional[int] = None,
                 is_sorted: bool = False, training: bool = True) -> Tuple[Tensor, Tensor]:
    # Ensure probability is within range
    if not (0.0 <= p <= 1.0):
        raise ValueError(f'Sample probability must be between 0 and 1 (got {p})')

    # Return unchanged edge_index if not in training mode or p=0
    if not training or p == 0.0:
        return edge_index, torch.ones(edge_index.size(1), dtype=torch.bool, device=edge_index.device)

    # Ensure required torch-cluster support is available
    if not torch_geometric.typing.WITH_TORCH_CLUSTER or is_compiling():
        raise ImportError('`dropout_path` requires `torch-cluster`.')

    # Sort edges if necessary
    num_nodes = maybe_num_nodes(edge_index, num_nodes)
    edge_orders = None
    if not is_sorted:
        edge_orders = torch.arange(edge_index.size(1), device=edge_index.device)
        edge_index, edge_orders = sort_edge_index(edge_index, edge_orders, num_nodes=num_nodes)

    # Randomly mask edges
    row, col = edge_index
    sample_mask = torch.rand(row.size(0), device=edge_index.device) <= p
    start = row[sample_mask].repeat(walks_per_node)

    # Perform random walk to determine paths
    rowptr = cumsum(degree(row, num_nodes=num_nodes, dtype=torch.long))
    n_id, e_id = torch.ops.torch_cluster.random_walk(rowptr, col, start, walk_length, 1.0, 1.0)
    e_id = e_id[e_id != -1].view(-1)  # Filter out illegal edges

    # Adjust for sorted edges if applicable
    if edge_orders is not None:
        e_id = edge_orders[e_id]

    # Apply mask to edges and return
    edge_mask = torch.ones(edge_index.size(1), dtype=torch.bool, device=edge_index.device)
    edge_mask[e_id] = False
    return edge_index[:, edge_mask], edge_mask


def accuracy(pred_y, y):
    """Calculate accuracy."""
    return ((pred_y == y).sum() / len(y)).item()


def train_dropout_path(model, data):
    """Train a GNN model and return the trained model."""
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = model.optimizer
    epochs = 5

    model.train()
    for epoch in range(epochs + 1):
        # Training
        optimizer.zero_grad()
        edge_index1, _ = dropout_path(data.edge_index, p=0.2, walks_per_node=1, walk_length=3)
        _, out = model(data.x, edge_index1)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        acc = accuracy(out[data.train_mask].argmax(dim=1), data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        # Validation
        val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
        val_acc = accuracy(out[data.val_mask].argmax(dim=1), data.y[data.val_mask])

        # Print metrics every 10 epochs
        if (epoch % 1 == 0):
            print(f'Epoch {epoch:>3} | Train Loss: {loss:.3f} | Train Acc: '
                  f'{acc * 100:>6.2f}% | Val Loss: {val_loss:.2f} | '
                  f'Val Acc: {val_acc * 100:.2f}%')

    return model


def test(model, data):
    """Evaluate the model on test set and print the accuracy score."""
    model.eval()
    _, out = model(data.x, data.edge_index)
    acc = accuracy(out.argmax(dim=1)[data.test_mask], data.y[data.test_mask])
    return acc

import time
start_time = time.time()

# Set the device to GPU if available, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create GCN model
gcn_dropout_path = GCN_dropout_path(dataset.num_features, 16, dataset.num_classes).to(device)
print(gcn_dropout_path)

# Train
train_dropout_path(gcn_dropout_path, data.to(device))

# Test
acc = test(gcn_dropout_path, data.to(device))
print(f'\nGCN test accuracy: {acc*100:.2f}%\n')

end_time = time.time()
print("Execution time:", end_time - start_time, "seconds")

GCN_dropout_path(
  (gcn1): GCNConv(3703, 16)
  (gcn2): GCNConv(16, 6)
)
Epoch   0 | Train Loss: 1.792 | Train Acc:  17.50% | Val Loss: 1.79 | Val Acc: 13.00%
Epoch   1 | Train Loss: 1.789 | Train Acc:  18.33% | Val Loss: 1.79 | Val Acc: 6.60%
Epoch   2 | Train Loss: 1.784 | Train Acc:  25.83% | Val Loss: 1.79 | Val Acc: 7.40%
Epoch   3 | Train Loss: 1.778 | Train Acc:  36.67% | Val Loss: 1.79 | Val Acc: 16.60%
Epoch   4 | Train Loss: 1.773 | Train Acc:  38.33% | Val Loss: 1.79 | Val Acc: 22.00%
Epoch   5 | Train Loss: 1.772 | Train Acc:  41.67% | Val Loss: 1.78 | Val Acc: 24.60%

GCN test accuracy: 36.00%

Execution time: 0.8125090599060059 seconds


### Assignment 2. Load the Cora dataset from Torch Geometric.

Questions: Train the Mixhop model on node classification task with 2-step transition and 3-step transition steps


In [6]:
import torch
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.utils import subgraph

# Load Cora dataset
dataset = Planetoid(root='.', name='Cora', transform=T.NormalizeFeatures())
data = dataset[0]
edge_index  = data.edge_index

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [7]:
import torch
from torch import nn, Tensor
from torch.nn import Parameter
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch_geometric.nn.dense.linear import Linear
from torch_geometric.nn.inits import zeros
from torch_geometric.utils import spmm
from typing import List, Optional

class MixHopConv(MessagePassing):
    def __init__(
        self,
        in_channels: int,  # Number of input features
        out_channels: int,  # Number of output features
        powers: Optional[List[int]] = None,  # List of powers for MixHop
        add_self_loops: bool = True,  # Whether to add self-loops
        bias: bool = True,  # Whether to add a bias term
        **kwargs,
    ):
        super().__init__(aggr='add', **kwargs)  # Initialize the MessagePassing class with 'add' aggregation
        
        self.powers = powers or [0, 1, 2]  # Default powers are [0, 1, 2]
        self.add_self_loops = add_self_loops  # Store the add_self_loops flag
        
        # Create a list of linear transformations for each power
        self.lins = nn.ModuleList([
            Linear(in_channels, out_channels, bias=False) if p in self.powers else nn.Identity()
            for p in range(max(self.powers) + 1)
        ])
        
        # Initialize the bias parameter if bias is True
        self.bias = Parameter(torch.empty(len(self.powers) * out_channels)) if bias else None
        self.reset_parameters()  # Reset parameters

    def reset_parameters(self):
        # Reset parameters of each linear transformation
        for lin in self.lins:
            if hasattr(lin, 'reset_parameters'):
                lin.reset_parameters()
        zeros(self.bias)  # Initialize the bias to zeros

    def forward(self, x: Tensor, edge_index, edge_weight=None) -> Tensor:
        # Normalize the edge index and edge weight using GCN normalization
        edge_index, edge_weight = gcn_norm(
            edge_index, edge_weight, x.size(0), False, self.add_self_loops, self.flow, x.dtype
        )
        
        # Initialize the output list with the transformed input features
        outs = [self.lins[0](x)]
        
        # Propagate the features through the graph for each power
        for lin in self.lins[1:]:
            x = self.propagate(edge_index, x=x, edge_weight=edge_weight)
            outs.append(lin(x))

        # Concatenate the outputs for each power along the feature dimension
        out = torch.cat([outs[p] for p in self.powers], dim=-1)
        
        # Add the bias term if it exists
        return out + self.bias if self.bias is not None else out

    def message(self, x_j: Tensor, edge_weight=None) -> Tensor:
        # Compute the message to be passed to the target nodes
        return x_j if edge_weight is None else edge_weight.view(-1, 1) * x_j

    def message_and_aggregate(self, adj_t, x: Tensor) -> Tensor:
        # Perform sparse matrix multiplication to aggregate messages
        return spmm(adj_t, x, reduce=self.aggr)

    def __repr__(self):
        # Return a string representation of the MixHopConv layer
        return f'{self.__class__.__name__}({self.in_channels}, {self.out_channels}, powers={self.powers})'

In [8]:
from torch_geometric.nn import BatchNorm, Linear

class MixHop(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # First MixHopConv layer with powers [0, 1, 2] and 60 output features
        self.conv1 = MixHopConv(dataset.num_features, 60, powers=[0, 1, 2])
        # Batch normalization for the first layer's output
        self.norm1 = BatchNorm(3 * 60)

        # Second MixHopConv layer with powers [0, 1, 2] and 60 output features
        self.conv2 = MixHopConv(3 * 60, 60, powers=[0, 1, 2])
        # Batch normalization for the second layer's output
        self.norm2 = BatchNorm(3 * 60)

        # Third MixHopConv layer with powers [0, 1, 2] and 60 output features
        self.conv3 = MixHopConv(3 * 60, 60, powers=[0, 1, 2])
        # Batch normalization for the third layer's output
        self.norm3 = BatchNorm(3 * 60)

        # Linear layer to map the final output to the number of classes
        self.lin = Linear(3 * 60, dataset.num_classes)

    def forward(self, x, edge_index):
        # Apply dropout to the input features
        x = F.dropout(x, p=0.7, training=self.training)

        # First MixHopConv layer
        x = self.conv1(x, edge_index)
        # Apply batch normalization
        x = self.norm1(x)
        # Apply dropout
        x = F.dropout(x, p=0.9, training=self.training)

        # Second MixHopConv layer
        x = self.conv2(x, edge_index)
        # Apply batch normalization
        x = self.norm2(x)
        # Apply dropout
        x = F.dropout(x, p=0.9, training=self.training)

        # Third MixHopConv layer
        x = self.conv3(x, edge_index)
        # Apply batch normalization
        x = self.norm3(x)
        # Apply dropout
        x = F.dropout(x, p=0.9, training=self.training)

        # Final linear layer to get the class scores
        return self.lin(x)

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model, data = MixHop().to(device), data.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.5, weight_decay=0.005)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=40, gamma=0.01)


In [10]:
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    scheduler.step()
    return float(loss)

In [11]:
@torch.no_grad()
def test():
    model.eval()
    pred = model(data.x, data.edge_index).argmax(dim=-1)

    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        accs.append(int((pred[mask] == data.y[mask]).sum()) / int(mask.sum()))
    return accs


In [12]:
best_val_acc = test_acc = 0

for epoch in range(1, 100):
    loss = train()
    train_acc, val_acc, tmp_test_acc = test()
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_acc:.4f}, '
          f'Val: {val_acc:.4f}, Test: {tmp_test_acc:.4f}')


print(f'\nBest Validation Accuracy: {best_val_acc:.4f}')
print(f'Corresponding Test Accuracy: {test_acc:.4f}')


Epoch: 001, Loss: 3.0005, Train: 0.1429, Val: 0.0580, Test: 0.0640
Epoch: 002, Loss: 4.2431, Train: 0.1643, Val: 0.1360, Test: 0.1730
Epoch: 003, Loss: 3.7366, Train: 0.1429, Val: 0.3200, Test: 0.3200
Epoch: 004, Loss: 5.0231, Train: 0.2286, Val: 0.1100, Test: 0.1340
Epoch: 005, Loss: 5.0167, Train: 0.1786, Val: 0.3180, Test: 0.3170
Epoch: 006, Loss: 7.6017, Train: 0.2000, Val: 0.1340, Test: 0.1330
Epoch: 007, Loss: 7.5737, Train: 0.3714, Val: 0.2220, Test: 0.2400
Epoch: 008, Loss: 6.2074, Train: 0.2571, Val: 0.1300, Test: 0.1390
Epoch: 009, Loss: 7.0606, Train: 0.1929, Val: 0.0900, Test: 0.1060
Epoch: 010, Loss: 5.2631, Train: 0.1714, Val: 0.0760, Test: 0.1060
Epoch: 011, Loss: 8.0932, Train: 0.1643, Val: 0.3180, Test: 0.3230
Epoch: 012, Loss: 9.0790, Train: 0.1714, Val: 0.1740, Test: 0.1490
Epoch: 013, Loss: 5.7691, Train: 0.1643, Val: 0.0980, Test: 0.1070
Epoch: 014, Loss: 5.7827, Train: 0.1500, Val: 0.1600, Test: 0.1420
Epoch: 015, Loss: 4.7050, Train: 0.2143, Val: 0.1780, Test: 0.