In [2]:
!pip install torch-scatter torch-sparse torch-cluster torch-geometric -f https://data.pyg.org/whl/torch-2.0.0+cpu.html

Looking in links: https://data.pyg.org/whl/torch-2.0.0+cpu.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_scatter-2.1.2%2Bpt20cpu-cp310-cp310-linux_x86_64.whl (494 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m494.1/494.1 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_sparse-0.6.18%2Bpt20cpu-cp310-cp310-linux_x86_64.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m37.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch-cluster
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_cluster-1.6.3%2Bpt20cpu-cp310-cp310-linux_x86_64.whl (751 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m751.3/751.3 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K    

In [3]:
!pip install torch-geometric



# gnn

In [48]:
!rm -rf /content/drive/*

In [49]:
from google.colab import drive

# Mount Google Drive to a different directory
drive.mount('/content/drive_mount')

Drive already mounted at /content/drive_mount; to attempt to forcibly remount, call drive.mount("/content/drive_mount", force_remount=True).


In [64]:
import torch
import pandas as pd
import numpy as np
from torch_geometric.data import Data
from sklearn.metrics import accuracy_score
import torch.nn as nn
from torch_geometric.nn import GATConv
import torch.optim as optim

# Load METR-LA dataset
def load_metr_la_dataset(adj_path, time_series_path):
    adj_data = pd.read_pickle(adj_path)
    if isinstance(adj_data, list):
        adj_matrix = np.array([list(map(float, row)) for row in adj_data[0]], dtype=np.float32)
        node_mapping = adj_data[1]
    elif isinstance(adj_data, dict):
        adj_matrix = adj_data["adjacency_matrix"]
        node_mapping = adj_data["node_mapping"]
    else:
        raise ValueError("Unexpected structure in adj_METR-LA.pkl")

    if len(adj_matrix.shape) == 1:
        num_nodes = len(adj_matrix)
        adj_matrix = np.diag(adj_matrix)
    elif len(adj_matrix.shape) != 2:
        raise ValueError(f"Cannot convert adjacency matrix to 2D. Current shape: {adj_matrix.shape}")

    time_series_data = pd.read_hdf(time_series_path, "df")  # shape: (timesteps, nodes)
    return adj_matrix, node_mapping, time_series_data

# Generate static node features from time-series
def create_node_features(time_series_data, aggregation="mean"):
    if aggregation == "mean":
        node_features = time_series_data.mean(axis=0).values
    elif aggregation == "std":
        node_features = time_series_data.std(axis=0).values
    else:
        raise ValueError("Unsupported aggregation method.")
    node_features = np.array(node_features, dtype=np.float32)
    node_features = np.expand_dims(node_features, axis=1)
    return torch.tensor(node_features, dtype=torch.float)

# Convert adjacency matrix to edge list for PyTorch Geometric
def adjacency_to_edge_list(adj_matrix):
    if isinstance(adj_matrix, pd.DataFrame):
        adj_matrix = adj_matrix.values

    edge_indices = np.nonzero(adj_matrix)
    edge_index = np.stack(edge_indices, axis=0)
    edge_weight = adj_matrix[edge_indices]

    edge_index = torch.tensor(edge_index, dtype=torch.long)
    edge_weight = torch.tensor(edge_weight, dtype=torch.float)

    return edge_index, edge_weight

# Prepare PyTorch Geometric Data object
def prepare_graph_data(adj_matrix, node_features, labels, train_mask, test_mask):
    edge_index, edge_weight = adjacency_to_edge_list(adj_matrix)
    data = Data(
        x=node_features,
        edge_index=edge_index,
        edge_attr=edge_weight,
        y=torch.tensor(labels, dtype=torch.long),
        train_mask=train_mask,
        test_mask=test_mask
    )
    return data

# Define GNNEncoder
class GNNEncoder(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, out_dim):
        super().__init__()
        self.conv1 = GATConv(input_dim, hidden_dim, heads=4, dropout=0.5)
        self.conv2 = GATConv(hidden_dim * 4, out_dim, heads=1, concat=False, dropout=0.5)

    def forward(self, x, edge_index, edge_attr):
        x = self.conv1(x, edge_index, edge_attr).relu()
        x = self.conv2(x, edge_index, edge_attr)
        return x

# Define NodeClassifier
class NodeClassifier(torch.nn.Module):
    def __init__(self, input_dim, num_classes, dropout=0.5):
        super().__init__()
        self.fc = nn.Linear(input_dim, num_classes)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.dropout(x)
        return self.fc(x)

# Define overall Model
class Model(torch.nn.Module):
    def __init__(self, encoder, classifier):
        super().__init__()
        self.encoder = encoder
        self.classifier = classifier

    def forward(self, data):
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
        x = self.encoder(x, edge_index, edge_attr)
        x = self.classifier(x)
        return x

# Training function
def train_model(model, data, optimizer, criterion, epochs=50, device="cpu"):
    model.to(device)
    data = data.to(device)
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

# Evaluation function
def evaluate_model(model, data, device="cpu"):
    model.eval()
    data = data.to(device)
    with torch.no_grad():
        out = model(data)
    pred = out.argmax(dim=1)
    accuracy_train = accuracy_score(data.y[data.train_mask].cpu(), pred[data.train_mask].cpu())
    accuracy_test = accuracy_score(data.y[data.test_mask].cpu(), pred[data.test_mask].cpu())
    print(f"Train Accuracy: {accuracy_train:.4f}")
    print(f"Test Accuracy: {accuracy_test:.4f}")

# Paths to METR-LA dataset
adj_path = "/content/drive_mount/MyDrive/archive (26)/adj_METR-LA.pkl"
time_series_path = "/content/drive_mount/MyDrive/archive (26)/METR-LA.h5"

# Load dataset
adj_matrix, node_mapping, time_series_data = load_metr_la_dataset(adj_path, time_series_path)

# Create node features
node_features = create_node_features(time_series_data, aggregation="mean")

# Fixed labels (replace with actual labels if available)
labels = np.array([0, 1, 2] * (node_features.shape[0] // 3))  # Example fixed labels

# Fixed train and test masks (replace with actual splits if available)
num_nodes = node_features.shape[0]
train_ratio = 0.8
train_size = int(train_ratio * num_nodes)
train_indices = np.arange(train_size)
test_indices = np.arange(train_size, num_nodes)

train_mask = torch.zeros(num_nodes, dtype=torch.bool)
train_mask[train_indices] = True
test_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask[test_indices] = True

# Prepare graph data
graph_data = prepare_graph_data(adj_matrix, node_features, labels, train_mask, test_mask)

# Define model components
encoder = GNNEncoder(input_dim=1, hidden_dim=64, out_dim=32)
classifier = NodeClassifier(input_dim=32, num_classes=3)
model = Model(encoder, classifier)

# Define optimizer and criterion
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()

# Determine device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Train and evaluate the model
train_model(model, graph_data, optimizer, criterion, epochs=100, device=device)
evaluate_model(model, graph_data, device=device)

Epoch 1/100, Loss: 4.8729
Epoch 2/100, Loss: 8.1887
Epoch 3/100, Loss: 7.9535
Epoch 4/100, Loss: 4.4124
Epoch 5/100, Loss: 3.5589
Epoch 6/100, Loss: 4.1677
Epoch 7/100, Loss: 4.0039
Epoch 8/100, Loss: 3.0218
Epoch 9/100, Loss: 3.5662
Epoch 10/100, Loss: 2.1703
Epoch 11/100, Loss: 2.7774
Epoch 12/100, Loss: 2.4365
Epoch 13/100, Loss: 2.1918
Epoch 14/100, Loss: 1.6188
Epoch 15/100, Loss: 1.5268
Epoch 16/100, Loss: 1.4580
Epoch 17/100, Loss: 1.5869
Epoch 18/100, Loss: 1.4853
Epoch 19/100, Loss: 1.4066
Epoch 20/100, Loss: 1.2856
Epoch 21/100, Loss: 1.2876
Epoch 22/100, Loss: 1.3441
Epoch 23/100, Loss: 1.2927
Epoch 24/100, Loss: 1.1661
Epoch 25/100, Loss: 1.2311
Epoch 26/100, Loss: 1.2666
Epoch 27/100, Loss: 1.1796
Epoch 28/100, Loss: 1.1722
Epoch 29/100, Loss: 1.1455
Epoch 30/100, Loss: 1.1515
Epoch 31/100, Loss: 1.1389
Epoch 32/100, Loss: 1.1749
Epoch 33/100, Loss: 1.1754
Epoch 34/100, Loss: 1.1195
Epoch 35/100, Loss: 1.1277
Epoch 36/100, Loss: 1.1436
Epoch 37/100, Loss: 1.1319
Epoch 38/1