In [3]:
import os.path as osp

import torch
import torch.nn.functional as F

from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GraphConv, TopKPooling
from torch_geometric.nn import global_max_pool as gmp
from torch_geometric.nn import global_mean_pool as gap

path = 'data/TUDataset'
dataset = TUDataset(path, name='PROTEINS')
dataset = dataset.shuffle()
n = len(dataset) // 10
test_dataset = dataset[:n]
train_dataset = dataset[n:]
test_loader = DataLoader(test_dataset, batch_size=60)
train_loader = DataLoader(train_dataset, batch_size=60)


class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = GraphConv(dataset.num_features, 128)
        self.pool1 = TopKPooling(128, ratio=0.8)
        self.conv2 = GraphConv(128, 128)
        self.pool2 = TopKPooling(128, ratio=0.8)
        self.conv3 = GraphConv(128, 128)
        self.pool3 = TopKPooling(128, ratio=0.8)

        self.lin1 = torch.nn.Linear(256, 128)
        self.lin2 = torch.nn.Linear(128, 64)
        self.lin3 = torch.nn.Linear(64, dataset.num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        x = F.relu(self.conv1(x, edge_index))
        x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)
        x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv2(x, edge_index))
        x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)
        x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv3(x, edge_index))
        x, edge_index, _, batch, _, _ = self.pool3(x, edge_index, None, batch)
        x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = x1 + x2 + x3

        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.lin2(x))
        x = F.log_softmax(self.lin3(x), dim=-1)

        return x


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)


def train(epoch):
    model.train()

    loss_all = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, data.y)
        loss.backward()
        loss_all += data.num_graphs * loss.item()
        optimizer.step()
    return loss_all / len(train_dataset)


def test(loader):
    model.eval()

    correct = 0
    for data in loader:
        data = data.to(device)
        pred = model(data).max(dim=1)[1]
        correct += pred.eq(data.y).sum().item()
    return correct / len(loader.dataset)


for epoch in range(1, 201):
    loss = train(epoch)
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Loss: {loss:.5f}, Train Acc: {train_acc:.5f}, '
          f'Test Acc: {test_acc:.5f}')

Epoch: 001, Loss: 0.63994, Train Acc: 0.67166, Test Acc: 0.77477
Epoch: 002, Loss: 0.60602, Train Acc: 0.72555, Test Acc: 0.75676
Epoch: 003, Loss: 0.59269, Train Acc: 0.73553, Test Acc: 0.78378
Epoch: 004, Loss: 0.57202, Train Acc: 0.73353, Test Acc: 0.75676
Epoch: 005, Loss: 0.56646, Train Acc: 0.73553, Test Acc: 0.72973
Epoch: 006, Loss: 0.56377, Train Acc: 0.74451, Test Acc: 0.76577
Epoch: 007, Loss: 0.53971, Train Acc: 0.74950, Test Acc: 0.77477
Epoch: 008, Loss: 0.54838, Train Acc: 0.74750, Test Acc: 0.76577
Epoch: 009, Loss: 0.54948, Train Acc: 0.75349, Test Acc: 0.75676
Epoch: 010, Loss: 0.53813, Train Acc: 0.75349, Test Acc: 0.76577
Epoch: 011, Loss: 0.53843, Train Acc: 0.75150, Test Acc: 0.76577
Epoch: 012, Loss: 0.53526, Train Acc: 0.75649, Test Acc: 0.74775
Epoch: 013, Loss: 0.53497, Train Acc: 0.76347, Test Acc: 0.76577
Epoch: 014, Loss: 0.52893, Train Acc: 0.76447, Test Acc: 0.76577
Epoch: 015, Loss: 0.52412, Train Acc: 0.75848, Test Acc: 0.74775
Epoch: 016, Loss: 0.52143

In [4]:
from tqdm import tqdm
import time

n = 10

for i in tqdm(range(1, n+1), desc='Progress', unit='number'):
    print(i)
    time.sleep(0.1)


Progress:  10%|█         | 1/10 [00:00<00:00,  9.78number/s]

1
2


Progress:  30%|███       | 3/10 [00:00<00:00,  9.25number/s]

3
4


Progress:  50%|█████     | 5/10 [00:00<00:00,  9.19number/s]

5
6


Progress:  70%|███████   | 7/10 [00:00<00:00,  9.16number/s]

7
8


Progress:  90%|█████████ | 9/10 [00:00<00:00,  9.16number/s]

9
10


Progress: 100%|██████████| 10/10 [00:01<00:00,  9.19number/s]


In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.nn import GATConv

# Define a simple GAT-based GNN model
class GATModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_heads):
        super(GATModel, self).__init__()
        self.conv1 = GATConv(input_size, hidden_size, heads=num_heads)
        self.conv2 = GATConv(hidden_size * num_heads, output_size, heads=1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)

        return x

# Function to compute feature importance using gradient
def compute_feature_importance_gat(model, data, target):
    model.eval()
    data.x.requires_grad_(True)

    optimizer = optim.SGD([data.x], lr=0.01)
    criterion = nn.MSELoss()

    # Forward pass
    output = model(data)
    loss = criterion(output, target)

    # Backward pass to compute gradients
    optimizer.zero_grad()
    loss.backward()

    # Compute feature importance as the absolute gradient
    # feature_importance = torch.abs(data.x.grad)
    feature_importance = data.x.grad

    return feature_importance

# Example usage
# Assume your GNN model takes a graph as input, represented using the PyTorch Geometric data structure.
# Here, we use a simple example with a random graph.

from torch_geometric.data import Data
input_size=15
output_size=1
hidden_size=64
# Generate a random graph
num_nodes = 20
num_edges = 15
data = Data(x=torch.randn((num_nodes, input_size), requires_grad=True),
            edge_index=torch.randint(0, num_nodes, (2, num_edges), dtype=torch.long),
            y=torch.randn((num_nodes, output_size)))

# Initialize the GAT-based GNN model
gat_model = GATModel(input_size, hidden_size, output_size, num_heads=2)

# Compute feature importance
feature_importance = compute_feature_importance_gat(gat_model, data, data.y)

print("Feature Importance:", feature_importance)


Feature Importance: tensor([[ 1.2275e-02, -4.3866e-04, -2.3747e-03,  5.9961e-06, -1.8322e-02,
          6.7758e-03, -3.3756e-03,  3.3045e-03, -4.1410e-04, -4.7517e-03,
          6.5441e-03,  1.1756e-04,  8.9861e-03, -6.2483e-03, -2.0754e-04],
        [ 9.7265e-04, -4.1853e-04,  8.4249e-04, -4.1004e-04, -1.3501e-03,
         -2.3933e-04, -1.1280e-03, -7.0684e-04,  3.6807e-04, -7.3343e-04,
          6.7297e-04,  1.7970e-05,  6.3480e-04, -1.7746e-04, -7.7775e-04],
        [ 7.6379e-04, -4.1705e-04,  5.0388e-03,  9.4725e-03,  1.3427e-03,
         -3.9083e-03, -7.4786e-04, -9.5126e-03,  2.7012e-03, -1.3322e-02,
         -2.7063e-03, -5.9831e-03, -1.0255e-02,  6.4585e-03, -2.2670e-03],
        [ 2.3280e-02, -1.9069e-03,  1.2953e-02, -1.7481e-03, -2.7319e-03,
          2.4886e-02, -1.2913e-02,  4.1282e-04, -2.1938e-02,  1.2066e-02,
          9.2277e-03,  1.7172e-02,  2.6745e-02, -1.2474e-02, -8.2982e-03],
        [ 7.0498e-03, -1.2809e-03, -4.0443e-03,  5.3280e-03, -1.0616e-03,
          5.35

In [12]:
print(type(feature_importance))

<class 'torch.Tensor'>


In [10]:
print(feature_importance.shape)

torch.Size([20, 15])


In [36]:
import torch
import torch.nn as nn
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv
import torch.optim as optim
import shap

# Define a simple PyTorch GNN model for graph regression with GCNConv
class SimpleGNN(nn.Module):
    def __init__(self, in_feats, hidden_size, out_feats):
        super(SimpleGNN, self).__init__()
        self.conv1 = GCNConv(in_feats, hidden_size)
        self.conv2 = GCNConv(hidden_size, out_feats)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = torch.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return x

# Create a dummy graph dataset
# Note: In a real-world scenario, you would load your dataset using PyG's available datasets.
x = torch.randn((5, 1))  # Node features (5 nodes, 1 feature per node)
edge_index = torch.tensor([[0, 1, 1, 2, 2, 3, 3, 4], [1, 0, 2, 1, 3, 2, 4, 3]], dtype=torch.long)
y = torch.randn((5, 1))  # Target values (regression task)

data = Data(x=x, edge_index=edge_index, y=y)

# Create a DataLoader
loader = DataLoader([data], batch_size=1, shuffle=False)

# Create a simple GNN model with GCNConv
model = SimpleGNN(in_feats=1, hidden_size=16, out_feats=1)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
data = data.to(device)

# Define a function for the GNN model
def model_predict(data):
    model.eval()
    with torch.no_grad():
        predictions = model(data)
    return predictions.cpu().numpy()

# Wrap the model_predict function with a shap.DeepExplainer
explainer = shap.DeepExplainer(model_predict, loader)

# Get Shapley values for a specific data point
shap_values = explainer.shap_values(data)

# Print Shapley values for each feature
print("Shapley Values:", shap_values)


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\bolou\miniconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\bolou\AppData\Local\Temp\ipykernel_19068\1474834421.py", line 48, in <module>
    explainer = shap.DeepExplainer(model_predict, loader)
  File "C:\Users\bolou\miniconda3\lib\site-packages\shap\explainers\_deep\__init__.py", line 84, in __init__
    self.explainer = TFDeep(model, data, session, learning_phase_flags)
  File "C:\Users\bolou\miniconda3\lib\site-packages\shap\explainers\_deep\deep_tf.py", line 110, in __init__
  File "C:\Users\bolou\miniconda3\lib\site-packages\shap\explainers\tf_utils.py", line 69, in _get_model_inputs
AssertionError: <class 'function'> is not currently a supported model type!

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\bolou\miniconda3\lib\site-packages\IPython\core\

In [1]:
import os.path as osp

import torch
import torch.nn.functional as F

from torch_geometric.datasets import Planetoid
from torch_geometric.explain import Explainer, GNNExplainer
from torch_geometric.nn import GCNConv

dataset = 'Cora'
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Planetoid')
dataset = Planetoid(path, dataset)
data = dataset[0]


class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

for epoch in range(1, 201):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

explainer = Explainer(
    model=model,
    algorithm=GNNExplainer(epochs=200),
    explanation_type='model',
    node_mask_type='attributes',
    edge_mask_type='object',
    model_config=dict(
        mode='multiclass_classification',
        task_level='node',
        return_type='log_probs',
    ),
)
node_index = 10
explanation = explainer(data.x, data.edge_index, index=node_index)
print(f'Generated explanations in {explanation.available_explanations}')

path = 'feature_importance.png'
explanation.visualize_feature_importance(path, top_k=10)
print(f"Feature importance plot has been saved to '{path}'")

path = 'subgraph.pdf'
explanation.visualize_graph(path)
print(f"Subgraph visualization plot has been saved to '{path}'")

ModuleNotFoundError: No module named 'torch_geometric'

In [2]:
from torch_geometric.datasets import TUDataset


ModuleNotFoundError: No module named 'torch_geometric'

In [3]:
data = [
    {'gnnConv1': ('linear', 2, 2), 'gnnConv2': ('SGConv', 11, 4), ... 'KendalTau': {0.4176}},
    {'gnnConv1': ('GATConv', 1, 1), 'gnnConv2': ('LEConv', 13, 6), ... 'KendalTau': {0}},
    {'gnnConv1': ('linear', 2, 2), 'gnnConv2': ('GENConv', 10, 3), ... 'KendalTau': {0.4851}},
    {'gnnConv1': ('SGConv', 4, 4), 'gnnConv2': ('GENConv', 10, 3), ... 'KendalTau': {0.4852}},
    {'gnnConv1': ('LEConv', 6, 6), 'gnnConv2': ('GCNConv', 7, 0), ... 'KendalTau': {0.2727}}
]

max_kendaltau_entry = max(data, key=lambda x: x['KendalTau'].pop())  # Pop the only element from the set

print("Entry with the highest KendalTau:")
print(max_kendaltau_entry)


SyntaxError: invalid syntax (2101667002.py, line 2)