# Advanced Model Implementation
# GraphTransormed & DeeperGCN



In [None]:
!pip install torch_geometric





# Import Libraries and Helper Functions


In [None]:
import torch
import torch.nn.functional as F
import torch_geometric
from torch_geometric.datasets import Planetoid, WikipediaNetwork, WebKB
from torch_geometric.nn import TransformerConv, GENConv, DeepGCNLayer
from torch.nn import Linear, LayerNorm, BatchNorm1d
import time
import pandas as pd
import matplotlib.pyplot as plt

# Helper functions from previous steps

def load_dataset(dataset_name):
    if dataset_name in ['Cora', 'PubMed', 'CiteSeer']:
        dataset = Planetoid(root=f'./data/{dataset_name}', name=dataset_name)
    elif dataset_name in ['Chameleon', 'Squirrel']:
        dataset = WikipediaNetwork(root=f'./data/{dataset_name}', name=dataset_name)
    elif dataset_name in ['Texas']:
        dataset = WebKB(root=f'./data/{dataset_name}', name=dataset_name)
    else:
        raise ValueError("Dataset not recognized!")
    return dataset

def fix_masks(data):
    # Ensure masks are one-dimensional Boolean tensors
    if len(data.train_mask.shape) > 1:
        data.train_mask = data.train_mask[:, 0].bool()
    if len(data.val_mask.shape) > 1:
        data.val_mask = data.val_mask[:, 0].bool()
    if len(data.test_mask.shape) > 1:
        data.test_mask = data.test_mask[:, 0].bool()
    return data

def train_model(model, data, optimizer, criterion):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)  # Forward pass
    loss = criterion(out[data.train_mask], data.y[data.train_mask].long())  # Loss calculation
    loss.backward()  # Backpropagation
    optimizer.step()  # Optimizer step
    return loss.item()

@torch.no_grad()
def evaluate_model(model, data, mask):
    model.eval()
    out = model(data.x, data.edge_index)  # Forward pass
    pred = out.argmax(dim=1)  # Predicted class
    correct = pred[mask] == data.y[mask]  # Compare predictions with ground truth
    acc = int(correct.sum()) / int(mask.sum())  # Calculate accuracy
    return acc


# Implement Graph Transformer Model


In [None]:
class GraphTransformer(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, heads=4, dropout=0.1):
        super(GraphTransformer, self).__init__()
        self.num_layers = num_layers

        self.embedding = Linear(input_dim, hidden_dim)
        self.convs = torch.nn.ModuleList()
        self.norms = torch.nn.ModuleList()

        for _ in range(num_layers):
            conv = TransformerConv(hidden_dim, hidden_dim, heads=heads, dropout=dropout, concat=False)
            self.convs.append(conv)
            self.norms.append(LayerNorm(hidden_dim))

        self.output_layer = Linear(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        x = self.embedding(x)
        for conv, norm in zip(self.convs, self.norms):
            residual = x
            x = conv(x, edge_index)
            x = F.relu(x)
            x = norm(x + residual)  # Residual connection and normalization
        x = self.output_layer(x)
        return F.log_softmax(x, dim=1)


# Implement DeeperGCN Model

In [None]:
class DeeperGCN(torch.nn.Module):
    def __init__(
        self, input_dim, hidden_dim, output_dim, num_layers,
        aggr='softmax', t=1.0, learn_t=False, p=1.0, learn_p=False,
        msg_norm=True, norm='layer'
    ):
        super(DeeperGCN, self).__init__()
        self.num_layers = num_layers

        self.embedding = Linear(input_dim, hidden_dim)
        self.layers = torch.nn.ModuleList()

        for _ in range(num_layers):
            conv = GENConv(
                hidden_dim, hidden_dim,
                aggr=aggr,
                t=t, learn_t=learn_t,
                p=p, learn_p=learn_p,
                msg_norm=msg_norm,
                norm=norm
            )
            if norm == 'layer':
                norm_layer = LayerNorm(hidden_dim)
            elif norm == 'batch':
                norm_layer = BatchNorm1d(hidden_dim)
            else:
                raise ValueError("norm should be 'layer' or 'batch'")

            layer = DeepGCNLayer(
                conv,
                norm=norm_layer,
                act=torch.nn.ReLU(inplace=True),
                block='res+',  # Residual connection
                dropout=0.0,
                ckpt_grad=False
            )
            self.layers.append(layer)

        self.output_layer = Linear(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        x = self.embedding(x)
        for layer in self.layers:
            x = layer(x, edge_index)
        x = self.output_layer(x)
        return F.log_softmax(x, dim=1)


# Training Function

In [None]:
def train_and_log_model(model_class, dataset_name, num_layers, hidden_dim, model_name):
    print(f"\n--- Training {model_name} on {dataset_name} with {num_layers} layers and hidden_dim={hidden_dim} ---\n")
    dataset = load_dataset(dataset_name)
    data = dataset[0]

    data = fix_masks(data)
    if len(data.y.shape) > 1:
        data.y = data.y.argmax(dim=1)

    # Move data to GPU if available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data = data.to(device)

    model = model_class(
        input_dim=dataset.num_node_features,
        hidden_dim=hidden_dim,  # Use variable hidden_dim
        output_dim=dataset.num_classes,
        num_layers=num_layers
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
    criterion = torch.nn.NLLLoss()

    metrics = {"epoch": [], "train_loss": [], "val_acc": [], "test_acc": []}
    start_time = time.time()

    best_val_acc = 0
    best_test_acc = 0
    for epoch in range(100):
        train_loss = train_model(model, data, optimizer, criterion)
        val_acc = evaluate_model(model, data, data.val_mask)
        test_acc = evaluate_model(model, data, data.test_mask)

        metrics["epoch"].append(epoch + 1)
        metrics["train_loss"].append(train_loss)
        metrics["val_acc"].append(val_acc)
        metrics["test_acc"].append(test_acc)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_test_acc = test_acc

        if epoch % 10 == 0:
            print(f"Epoch: {epoch+1}, Loss: {train_loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}")

    total_time = time.time() - start_time
    print(f"Training completed in {total_time:.2f}s for {dataset_name} with {num_layers} layers and hidden_dim={hidden_dim}")
    metrics["total_time"] = total_time

    # Compute the number of parameters
    num_params = sum(p.numel() for p in model.parameters())

    # Save metrics to a DataFrame
    results_df = pd.DataFrame(metrics)
    results_df.to_csv(f"{dataset_name}_{model_name}_{num_layers}_layers_{hidden_dim}_hidden_results.csv", index=False)
    print(f"Results saved to {dataset_name}_{model_name}_{num_layers}_layers_{hidden_dim}_hidden_results.csv")

    # Return hidden_dim as well
    return best_val_acc, best_test_acc, total_time, num_params, hidden_dim


# Training and Testing the Models

In [None]:
# Include GCN into the models list

models = [
    {'name': 'GCN', 'class': GCN},
    {'name': 'GraphTransformer', 'class': GraphTransformer},
    {'name': 'DeeperGCN', 'class': DeeperGCN},
]




datasets = ['Cora', 'PubMed', 'CiteSeer', 'Squirrel', 'Chameleon', 'Texas']
num_layers_list = [2, 4, 8, 16]

results = []
hidden_dims = [8, 16, 24, 32, 40, 48, 56, 64]  # Example hidden dimensions

for model_info in models:
    model_name = model_info['name']
    model_class = model_info['class']
    for dataset_name in datasets:
        for num_layers in num_layers_list:
            for hidden_dim in hidden_dims:

                val_acc, test_acc, runtime, num_params, hidden_dim = train_and_log_model(
                    model_class=model_class,
                    dataset_name=dataset_name,
                    num_layers=num_layers,
                    hidden_dim=hidden_dim,
                    model_name=model_name
                )
                results.append({
                    "model": model_name,
                    "dataset": dataset_name,
                    "num_layers": num_layers,
                    "hidden_dim": hidden_dim,
                    "num_params": num_params,
                    "val_acc": val_acc,
                    "test_acc": test_acc,
                    "runtime": runtime
                })

# Save all results to a single summary file
summary_df = pd.DataFrame(results)
summary_df.to_csv("advanced_models_summary.csv", index=False)
print("Summary results saved to advanced_models_summary.csv")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch: 81, Loss: 0.0033, Val Acc: 0.7000, Test Acc: 0.7130
Epoch: 91, Loss: 0.0029, Val Acc: 0.7000, Test Acc: 0.7070
Training completed in 11.15s for PubMed with 8 layers and hidden_dim=24
Results saved to PubMed_GraphTransformer_8_layers_24_hidden_results.csv

--- Training GraphTransformer on PubMed with 8 layers and hidden_dim=32 ---

Epoch: 1, Loss: 1.1370, Val Acc: 0.3040, Test Acc: 0.3120
Epoch: 11, Loss: 0.0926, Val Acc: 0.6960, Test Acc: 0.6910
Epoch: 21, Loss: 0.0196, Val Acc: 0.6620, Test Acc: 0.6760
Epoch: 31, Loss: 0.0078, Val Acc: 0.6740, Test Acc: 0.6890
Epoch: 41, Loss: 0.0045, Val Acc: 0.6740, Test Acc: 0.6950
Epoch: 51, Loss: 0.0032, Val Acc: 0.6700, Test Acc: 0.6940
Epoch: 61, Loss: 0.0026, Val Acc: 0.6780, Test Acc: 0.6950
Epoch: 71, Loss: 0.0022, Val Acc: 0.6820, Test Acc: 0.6970
Epoch: 81, Loss: 0.0020, Val Acc: 0.6760, Test Acc: 0.6990
Epoch: 91, Loss: 0.0018, Val Acc: 0.6780, Test Acc: 0.6980
Traini