In [1]:
import sys
import os
import random

# Add the absolute path to the src directory 
os.chdir('../src')
src_path = os.path.abspath('../src/')
sys.path.insert(0, src_path)

from setup import *

In [None]:
import os
import torch
import torch.multiprocessing as mp
from torch import nn
from torch.nn import functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv
from torch_geometric.transforms import AddRandomWalkPE
from torch_geometric.datasets import LRGBDataset


# Enable multiprocessing for DataLoader and PyTorch
torch.set_num_threads(mp.cpu_count() - 1)
torch.set_num_interop_threads(mp.cpu_count() - 1)

def load_cocosp(parallel=True, subset_ratio=1.0):
    """Preprocess and train-valid-test split COCO-SP data with optional subset size."""
    transform = AddRandomWalkPE(walk_length=20, attr_name='pe')
    dataset = LRGBDataset(root='/tmp/COCO-SP', name='COCO-SP')
    dataset = [transform(data) for data in dataset]

    # Normalize labels
    for data in dataset:
        if data.y.ndim > 0:  # If data.y is not a scalar, take the first element
            data.y = data.y[0]

    min_label = min(data.y.item() for data in dataset)
    for data in dataset:
        data.y -= min_label

    # Train-test split
    train_split = int(len(dataset) * 0.8)
    train_dataset = dataset[:train_split]
    test_dataset = dataset[train_split:]

    # Reduce dataset size for faster tests
    if subset_ratio < 1.0:
        import random
        random.seed(42)
        train_dataset = random.sample(train_dataset, max(1, int(len(train_dataset) * subset_ratio)))
        test_dataset = random.sample(test_dataset, max(1, int(len(test_dataset) * subset_ratio)))

    # DataLoader
    num_workers = max(1, mp.cpu_count() - 2) if parallel else 0
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=num_workers, pin_memory=True)

    num_classes = max(data.y.item() for data in dataset) + 1
    return train_loader, test_loader, num_classes



class GPSNodeClassifier(torch.nn.Module):
    """Graph Convolutional Network."""
    def __init__(self, num_node_features, hidden_channels, num_classes, num_layers):
        super(GPSNodeClassifier, self).__init__()
        self.convs = nn.ModuleList()
        self.convs.append(GCNConv(num_node_features, hidden_channels))
        for _ in range(num_layers - 1):
            self.convs.append(GCNConv(hidden_channels, hidden_channels))
        self.lin = nn.Linear(hidden_channels, num_classes)

    def forward(self, x, edge_index):
        for conv in self.convs:
            x = conv(x, edge_index)
            x = F.relu(x)
        return self.lin(x)


def train_gps_nodes(model, data_loader, optimizer, device):
    """Training loop for GPS nodes."""
    model.train()
    total_loss = 0
    for data in data_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)


@torch.no_grad()
def test_gps_nodes(model, data_loader, device):
    """Test loop for GPS nodes."""
    model.eval()
    correct = 0
    total = 0
    for data in data_loader:
        data = data.to(device)
        out = model(data.x, data.edge_index)
        pred = out.argmax(dim=1)
        correct += (pred[data.test_mask] == data.y[data.test_mask]).sum().item()
        total += data.test_mask.sum().item()
    return correct / total


def main():
    # Configure multi-threading and device
    num_workers = max(1, mp.cpu_count() - 2)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Load data
    cocosp_train_loader, cocosp_test_loader, num_classes = load_cocosp(subset_ratio=0.1)
    first_batch = next(iter(cocosp_train_loader))
    num_node_features = first_batch.x.shape[1]
    # Model setup
    model = GPSNodeClassifier(
        num_node_features=num_node_features,
        hidden_channels=128,
        num_classes=num_classes,
        num_layers=5
    ).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-4)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

    print('Beginning training...')
    # Training loop
    for epoch in range(30):
        train_loss = train_gps_nodes(model, cocosp_train_loader, optimizer, device)
        test_acc = test_gps_nodes(model, cocosp_test_loader, device)
        scheduler.step(train_loss)
        print(f"Epoch {epoch}: Train Loss={train_loss:.4f}, Test Acc={test_acc:.4f}")


'''if __name__ == "__main__":
    main()
'''

main()