In [1]:
# import os
# import torch
# os.environ['TORCH'] = torch.__version__
# print(torch.__version__)

# !pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
# !pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
# !pip install -q git+https://github.com/pyg-team/pytorch_geometric.git
# !pip install ogb
# !pip install -U kaleido

In [2]:
# !git clone https://github.com/SamirMoustafa/visualization-loss-landscape-GNNs.git
# !pip install -e visualization-loss-landscape-GNNs

In [3]:
# Restart the runtime if you're using colab
from torch_landscape.directions import PcaDirections, RandomDirections, LearnableDirections
from torch_landscape.landscape_linear import LinearLandscapeCalculator
from torch_landscape.trajectory import TrajectoryCalculator
from torch_landscape.utils import clone_parameters, reset_parameters, seed_everything
from torch_landscape.visualize import Plotly2dVisualization, VisualizationData
from torch_landscape.visualize_options import VisualizationOptions

In [4]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid
import numpy as np
from tqdm import tqdm
from os.path import join

In [5]:
class GCN(torch.nn.Module):
    def __init__(self, num_layers, in_channels, out_channels):
        super(GCN, self).__init__()
        self.layers = torch.nn.ModuleList()
        self.layers.append(GCNConv(in_channels, 128))  # First layer

        for i in range(num_layers - 2):  # Hidden layers
            self.layers.append(GCNConv(128, 128))

        self.layers.append(GCNConv(128, out_channels))  # Output layer

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        for layer in self.layers[:-1]:
            x = F.relu(layer(x, edge_index))
            x = F.dropout(x, p=0.5, training=self.training)
        x = self.layers[-1](x, edge_index)
        return F.log_softmax(x, dim=1)

In [None]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = dataset[0].to(device)

num_layers=3

num_runs = 10
test_losses = []
test_accuracies = []

for run in range(num_runs):
    model = GCN(num_layers=num_layers,
                in_channels=dataset.num_features,
                out_channels=dataset.num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    best_val_acc = 0
    best_test_loss = float('inf')
    best_test_acc = 0

    model.train()
    for epoch in range(200):
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        model.eval()
        _, pred = model(data).max(dim=1)
        val_correct = float (pred[data.val_mask].eq(data.y[data.val_mask]).sum().item())
        val_acc = val_correct / data.val_mask.sum().item()
        test_loss = F.nll_loss(out[data.test_mask], data.y[data.test_mask]).item()
        test_correct = float (pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
        test_acc = test_correct / data.test_mask.sum().item()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_test_loss = test_loss
            best_test_acc = test_acc

    test_losses.append(best_test_loss)
    test_accuracies.append(best_test_acc  * 100)
    print(f'Run {run + 1}, Best Validation Accuracy: {best_val_acc}, Test Loss: {best_test_loss}, Test Accuracy: {best_test_acc}')

mean_test_loss = np.mean(test_losses)
std_test_loss = np.std(test_losses)
mean_test_acc = np.mean(test_accuracies)
std_test_acc = np.std(test_accuracies)

print(f'Mean Test Loss: {mean_test_loss}, Standard Deviation of Test Loss: {std_test_loss}')
print(f'Mean Test Accuracy: {mean_test_acc}, Standard Deviation of Test Accuracy: {std_test_acc}')


In [None]:
import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, SAGEConv

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

In [None]:
class GCN_res(torch.nn.Module):
    def __init__(self, dataset, hidden=256, num_layers=6):
        super(GCN_res, self).__init__()

        self.num_layers = num_layers
        self.convs = torch.nn.ModuleList()
        self.bns = torch.nn.ModuleList()

        self.input_fc = torch.nn.Linear(dataset.num_node_features, hidden)

        for i in range(self.num_layers):
            self.convs.append(GCNConv(hidden, hidden))
            self.bns.append(torch.nn.BatchNorm1d(hidden))

        self.out_fc = torch.nn.Linear(hidden, dataset.num_classes)
        self.weights = torch.nn.Parameter(torch.randn((len(self.convs))))

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()
        self.input_fc.reset_parameters()
        self.out_fc.reset_parameters()
        torch.nn.init.normal_(self.weights)

    def forward(self, data):
        x, adj_t = data.x, data.adj_t

        x = self.input_fc(x)
        x_input = x  # .copy()

        layer_out = []  # 保存每一层的结果
        for i in range(self.num_layers):
            x = self.convs[i](x, adj_t)
            x = self.bns[i](x)
            x = F.relu(x, inplace=True)
            x = F.dropout(x, p=0.5, training=self.training)

            if i == 0:
                x = x + 0.2 * x_input
            else:
                x = x + 0.2 * x_input + 0.5 * layer_out[i - 1]
            layer_out.append(x)

        weight = F.softmax(self.weights, dim=0)
        for i in range(len(layer_out)):
            layer_out[i] = layer_out[i] * weight[i]

        x = sum(layer_out)
        x = self.out_fc(x)
        x = F.log_softmax(x, dim=1)
        return x

def train(model, data, train_idx, optimizer):
    model.train()
    optimizer.zero_grad()
    out = model(data)[train_idx]
    loss = F.nll_loss(out, data.y.squeeze(1)[train_idx])
    loss.backward()
    optimizer.step()
    return loss.item()


@torch.no_grad()
def test(model, data, split_idx, evaluator):
    model.eval()

    out = model(data.x, data.adj_t)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': data.y[split_idx['train']],
        'y_pred': y_pred[split_idx['train']],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': data.y[split_idx['valid']],
        'y_pred': y_pred[split_idx['valid']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': data.y[split_idx['test']],
        'y_pred': y_pred[split_idx['test']],
    })['acc']

    return train_acc, valid_acc, test_acc

@torch.no_grad()
def evaluate(model):
    model.eval()
    out = model(data)[split_idx['train']]
    loss = F.cross_entropy(out, data.y.squeeze(1)[split_idx['train']])
    return loss.item()

In [None]:
num_layers = 8

device_id = 0
hidden_channels = 256
dropout = 0.5
lr = 0.01
epochs = 500

device = f'cuda:{device_id}' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)

dataset = PygNodePropPredDataset(name='ogbn-arxiv', transform=T.ToSparseTensor())
data = dataset[0]
data.adj_t = data.adj_t.to_symmetric()  # 对称归一化

split_idx = dataset.get_idx_split()
train_idx = split_idx['train'].to(device)

model = GCN_res(dataset=dataset, hidden=128, num_layers=num_layers).to(device)


data = data.to(device)

evaluator = Evaluator(name='ogbn-arxiv')

model.reset_parameters()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

intermediate_results = []
best_train_loss = 0
best_valid_acc = 0
best_test_acc = 0
best_model_state_dict = None

for epoch in tqdm(range(1, 1 + epochs)):
    loss = train(model, data, train_idx, optimizer)
    result = test(model, data, split_idx, evaluator)
    train_acc, valid_acc, test_acc = result

    if epoch % 10 == 0:
            intermediate_results.append((clone_parameters(model.parameters()), loss))

    # Update the best validation and test accuracies
    if valid_acc > best_valid_acc:
        best_train_loss = loss
        best_valid_acc = valid_acc
        best_test_acc = test_acc
        best_model_state_dict = model.state_dict()

# Print the best test accuracy based on the best validation accuracy
print(f'Best Test Accuracy based on Best Validation Accuracy: {100 * best_test_acc:.2f}%')

In [None]:
filename_prefix = f"gcn_res-l{num_layers}-p{sum([p.numel() for p in model.parameters()])}"
title = "ogbn-arxiv"
output_folder = "./"
output_file = filename_prefix + title
file_path = join(output_folder, output_file)
title = title + f", Loss: {loss:.3f}, Accuracy: {best_valid_acc:.3f}"

directions = LearnableDirections([*model.parameters()], intermediate_results).calculate_directions()

options = VisualizationOptions(num_points=20)
trajectory = TrajectoryCalculator([*model.parameters()], directions).project_with_loss(intermediate_results)
trajectory.set_range_to_fit_trajectory(options)
landscape_calculator = LinearLandscapeCalculator(model.parameters(), directions, options=options)
landscape = landscape_calculator.calculate_loss_surface_data_model(model, lambda: evaluate(model))
Plotly2dVisualization(options).plot(VisualizationData(landscape, trajectory), file_path, title, "pdf")