<a href="https://colab.research.google.com/github/AbhiJeet70/AirGCN/blob/main/AirGCN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch torch_geometric
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Airports
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.nn import GCNConv
import numpy as np
import random

# Set random seed for reproducibility
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

set_seed(20)

# Define function to load Airports data for a given country
def load_airports_data(country):
    dataset = Airports(root='/tmp/Airports', name=country, transform=NormalizeFeatures())
    data = dataset[0]
    return data

# Split data into train, validation, and test sets
def split_indices(num_nodes, train_ratio=0.7, val_ratio=0.1):
    indices = np.random.permutation(num_nodes)
    train_end = int(train_ratio * num_nodes)
    val_end = int((train_ratio + val_ratio) * num_nodes)
    train_idx = torch.tensor(indices[:train_end], dtype=torch.long)
    val_idx = torch.tensor(indices[train_end:val_end], dtype=torch.long)
    test_idx = torch.tensor(indices[val_end:], dtype=torch.long)
    return train_idx, val_idx, test_idx

# Define the GCN model with increased complexity and batch normalization
class GCNNet(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCNNet, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.conv4 = GCNConv(hidden_channels, hidden_channels)
        self.conv5 = GCNConv(hidden_channels, out_channels)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.bn3 = torch.nn.BatchNorm1d(hidden_channels)
        self.bn4 = torch.nn.BatchNorm1d(hidden_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)

        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)

        x = self.conv3(x, edge_index)
        x = self.bn3(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)

        x = self.conv4(x, edge_index)
        x = self.bn4(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)

        x = self.conv5(x, edge_index)
        return F.log_softmax(x, dim=1)

# Train and evaluate the model
def train_model(model, pyg_data, lr, weight_decay):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    pyg_data = pyg_data.to(device)
    model = model.to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    best_val_acc = 0
    patience = 50
    patience_counter = 0

    for epoch in range(1, 501):
        model.train()
        optimizer.zero_grad()
        out = model(pyg_data.x, pyg_data.edge_index)
        loss = F.cross_entropy(out[pyg_data.train_mask], pyg_data.y[pyg_data.train_mask])
        loss.backward()
        optimizer.step()

        model.eval()
        _, pred = model(pyg_data.x, pyg_data.edge_index).max(dim=1)
        val_correct = float(pred[pyg_data.val_mask].eq(pyg_data.y[pyg_data.val_mask]).sum().item())
        val_acc = val_correct / pyg_data.val_mask.sum().item()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
            best_model_state = model.state_dict()
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch}')
            break

    model.load_state_dict(best_model_state)
    model.eval()
    _, pred = model(pyg_data.x, pyg_data.edge_index).max(dim=1)
    correct = float(pred[pyg_data.test_mask].eq(pyg_data.y[pyg_data.test_mask]).sum().item())
    acc = correct / pyg_data.test_mask.sum().item()
    print(f'Test Accuracy: {acc:.4f}')
    return acc

# Print dataset statistics
def print_dataset_statistics(data, country):
    num_nodes = data.num_nodes
    num_edges = data.num_edges
    num_features = data.num_node_features
    num_classes = data.y.max().item() + 1
    class_distribution = torch.bincount(data.y).cpu().numpy()
    print(f"Statistics for {country}:")
    print(f"  Number of nodes: {num_nodes}")
    print(f"  Number of edges: {num_edges}")
    print(f"  Number of features: {num_features}")
    print(f"  Number of classes: {num_classes}")
    print(f"  Class distribution: {class_distribution}")

# Hyperparameter grid search
hidden_channels_list = [64, 128, 256]
learning_rates = [0.005, 0.001, 0.0005]
weight_decays = [1e-4, 1e-5]

# List of countries to process
countries = ['USA', 'Brazil', 'Europe']

# Process each country and print accuracies
for country in countries:
    print(f'Processing country: {country}')
    data = load_airports_data(country)

    # Print dataset statistics
    print_dataset_statistics(data, country)

    # Prepare the masks
    train_idx, val_idx, test_idx = split_indices(data.num_nodes)
    data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    data.val_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    data.train_mask[train_idx] = True
    data.val_mask[val_idx] = True
    data.test_mask[test_idx] = True

    best_acc = 0
    best_params = None

    models = [GCNNet(data.num_node_features, hidden_channels, data.y.max().item() + 1) for hidden_channels in hidden_channels_list]
    for model in models:
        for lr in learning_rates:
            for weight_decay in weight_decays:
                print(f'Training with {model.__class__.__name__}, lr={lr}, weight_decay={weight_decay}')
                acc = train_model(model, data, lr, weight_decay)
                if acc > best_acc:
                    best_acc = acc
                    best_params = (model.__class__.__name__, model, lr, weight_decay)

    print(f'Best accuracy for {country}: {best_acc:.4f} with params {best_params}')


Collecting torch_geometric
  Downloading torch_geometric-2.5.3-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_

Downloading https://github.com/leoribeiro/struc2vec/raw/master/graph/usa-airports.edgelist
Downloading https://github.com/leoribeiro/struc2vec/raw/master/graph/labels-usa-airports.txt
Processing...
Done!


Statistics for USA:
  Number of nodes: 1190
  Number of edges: 13599
  Number of features: 1190
  Number of classes: 4
  Class distribution: [297 297 297 299]
Training with GCNNet, lr=0.005, weight_decay=0.0001
Early stopping at epoch 52
Test Accuracy: 0.2636
Training with GCNNet, lr=0.005, weight_decay=1e-05
Early stopping at epoch 111
Test Accuracy: 0.4603
Training with GCNNet, lr=0.001, weight_decay=0.0001
Early stopping at epoch 52
Test Accuracy: 0.4728
Training with GCNNet, lr=0.001, weight_decay=1e-05
Early stopping at epoch 51
Test Accuracy: 0.4812
Training with GCNNet, lr=0.0005, weight_decay=0.0001
Early stopping at epoch 52
Test Accuracy: 0.4812
Training with GCNNet, lr=0.0005, weight_decay=1e-05
Early stopping at epoch 52
Test Accuracy: 0.4728
Training with GCNNet, lr=0.005, weight_decay=0.0001
Early stopping at epoch 51
Test Accuracy: 0.2594
Training with GCNNet, lr=0.005, weight_decay=1e-05
Early stopping at epoch 116
Test Accuracy: 0.5021
Training with GCNNet, lr=0.001, w

Downloading https://github.com/leoribeiro/struc2vec/raw/master/graph/brazil-airports.edgelist
Downloading https://github.com/leoribeiro/struc2vec/raw/master/graph/labels-brazil-airports.txt
Processing...
Done!


Statistics for Brazil:
  Number of nodes: 131
  Number of edges: 1074
  Number of features: 131
  Number of classes: 4
  Class distribution: [32 32 32 35]
Training with GCNNet, lr=0.005, weight_decay=0.0001
Early stopping at epoch 64
Test Accuracy: 0.3704
Training with GCNNet, lr=0.005, weight_decay=1e-05
Early stopping at epoch 78
Test Accuracy: 0.5185
Training with GCNNet, lr=0.001, weight_decay=0.0001
Early stopping at epoch 62
Test Accuracy: 0.3704
Training with GCNNet, lr=0.001, weight_decay=1e-05
Early stopping at epoch 75
Test Accuracy: 0.5185
Training with GCNNet, lr=0.0005, weight_decay=0.0001
Early stopping at epoch 66
Test Accuracy: 0.4074
Training with GCNNet, lr=0.0005, weight_decay=1e-05
Early stopping at epoch 53
Test Accuracy: 0.3704
Training with GCNNet, lr=0.005, weight_decay=0.0001
Early stopping at epoch 68
Test Accuracy: 0.4444
Training with GCNNet, lr=0.005, weight_decay=1e-05
Early stopping at epoch 51
Test Accuracy: 0.4815
Training with GCNNet, lr=0.001, weight_

Downloading https://github.com/leoribeiro/struc2vec/raw/master/graph/europe-airports.edgelist
Downloading https://github.com/leoribeiro/struc2vec/raw/master/graph/labels-europe-airports.txt
Processing...
Done!


Statistics for Europe:
  Number of nodes: 399
  Number of edges: 5995
  Number of features: 399
  Number of classes: 4
  Class distribution: [ 99  99  99 102]
Training with GCNNet, lr=0.005, weight_decay=0.0001
Early stopping at epoch 76
Test Accuracy: 0.4250
Training with GCNNet, lr=0.005, weight_decay=1e-05
Early stopping at epoch 51
Test Accuracy: 0.4125
Training with GCNNet, lr=0.001, weight_decay=0.0001
Early stopping at epoch 95
Test Accuracy: 0.4500
Training with GCNNet, lr=0.001, weight_decay=1e-05
Early stopping at epoch 55
Test Accuracy: 0.4375
Training with GCNNet, lr=0.0005, weight_decay=0.0001
Early stopping at epoch 51
Test Accuracy: 0.4625
Training with GCNNet, lr=0.0005, weight_decay=1e-05
Early stopping at epoch 51
Test Accuracy: 0.4625
Training with GCNNet, lr=0.005, weight_decay=0.0001
Early stopping at epoch 80
Test Accuracy: 0.4250
Training with GCNNet, lr=0.005, weight_decay=1e-05
Early stopping at epoch 78
Test Accuracy: 0.4625
Training with GCNNet, lr=0.001, wei