In [None]:
!pip install dgl

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
import dgl
from dgl.data import DGLDataset
import torch
import numpy as np

# edges = pd.read_csv('./graph_edges.csv')
# properties = pd.read_csv('./graph_properties.csv')

class SyntheticDataset(DGLDataset):
    def __init__(self):
        super().__init__(name='synthetic')

    def process(self):
        edges = pd.read_csv('./graphs_edges.csv')
        properties = pd.read_csv('./graph_properties.csv')
        features = pd.read_csv('./features-nodes.csv')
        self.graphs = []
        self.labels = []
        self.nodes = []

        # Create a graph for each graph ID from the edges table.
        # First process the properties table into two dictionaries with graph IDs as keys.
        # The label and number of nodes are values.
        label_dict = {}
        num_nodes_dict = {}
        for _, row in properties.iterrows():
            label_dict[row['graph_id']] = row['label']
            num_nodes_dict[row['graph_id']] = row['num_nodes']

        # For the edges, first group the table by graph IDs.
        edges_group = edges.groupby('graph_id')
        features_group = features.groupby('OBJECTID')

        # For each graph ID...
        for graph_id in edges_group.groups:
            # Find the edges as well as the number of nodes and its label.
            edges_of_id = edges_group.get_group(graph_id)
            feat_of_id = features_group.get_group(graph_id)

            edge_weights = [1 for i in range(len(edges_of_id['src']))]
            edge_features = torch.from_numpy(np.asarray(edge_weights))

            nodes_feat = torch.from_numpy(feat_of_id.iloc[:,2:].to_numpy())

            src = edges_of_id['src'].astype('category').cat.codes.to_numpy()
            dst = edges_of_id['dist'].astype('category').cat.codes.to_numpy()
            num_nodes = num_nodes_dict[graph_id]
            label = label_dict[graph_id]

            # Create a graph and add it to the list of graphs and labels.
            g = dgl.graph((src, dst), num_nodes=num_nodes)
            g.ndata['feat'] = nodes_feat
            g.edata['edge_weight'] = edge_features

            # self.nodes.append(nodes_feat)
            self.graphs.append(g)
            self.labels.append(label)

        # Convert the label list to tensor for saving.
        self.labels = torch.LongTensor(self.labels)

    def __getitem__(self, i):
        return self.graphs[i], self.labels[i]

    def __len__(self):
        return len(self.graphs)

dataset = SyntheticDataset()
graph, label = dataset[0]
print(graph, label)

  return th.as_tensor(data, dtype=dtype)


Graph(num_nodes=1, num_edges=1,
      ndata_schemes={'feat': Scheme(shape=(19,), dtype=torch.float64)}
      edata_schemes={'edge_weight': Scheme(shape=(), dtype=torch.int64)}) tensor(0)


In [None]:
# nodes

In [None]:
import dgl.function as fn
import torch
import torch.nn as nn
from dgl.data import GINDataset
from dgl.dataloading import GraphDataLoader
from dgl.nn import AvgPooling, GNNExplainer

In [None]:
class Model(nn.Module):
    def __init__(self, in_feats, out_feats):
        super(Model, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)
        self.pool = AvgPooling()
    def forward(self, graph, feat, eweight=None):
        with graph.local_scope():
            feat = self.linear(feat)
            graph.ndata['h'] = feat
            if eweight is None:
                graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
            else:
                graph.edata['w'] = eweight
                graph.update_all(fn.u_mul_e('h', 'w', 'm'), fn.sum('m', 'h'))
            return self.pool(graph, graph.ndata['h'])

In [None]:
from dgl.nn import GraphConv
import torch.nn.functional as F

class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, num_classes)

    def forward(self, graph, feat,eweight=None):
        h = self.conv1(graph, feat)
        h = F.relu(h)
        h = self.conv2(graph, h)
        graph.ndata['h'] = h
        if eweight is None:
                graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
        else:
                graph.edata['w'] = eweight
                graph.update_all(fn.u_mul_e('h', 'w', 'm'), fn.sum('m', 'h'))
        return dgl.mean_nodes(graph, 'h')

In [None]:
# Train the model
# feat_size = data[0][0].ndata['attr'].shape[1]

dataloader = GraphDataLoader(dataset, batch_size=64, shuffle=True)
# print(dataloader)
model=GCN(19,16,5)
# model = Model(19, 5)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
for epoch in range(50):
  for bg, labels in dataloader:
    logits = model(bg, bg.ndata['feat'].float())
    loss = criterion(logits, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    pred=logits

    num_correct = 0
    num_tests = 0

    num_correct += (pred.argmax(1) == labels).sum().item()
    num_tests += len(labels)
    print('Train accuracy:', num_correct / num_tests)

num_correct = 0
num_tests = 0
for batched_graph, labels in dataloader:
    pred = model(batched_graph, batched_graph.ndata['feat'].float())
    num_correct += (pred.argmax(1) == labels).sum().item()
    num_tests += len(labels)

print('Test accuracy:', num_correct / num_tests)

Train accuracy: 0.28125
Train accuracy: 0.546875
Train accuracy: 0.515625
Train accuracy: 0.578125
Train accuracy: 0.703125
Train accuracy: 0.515625
Train accuracy: 0.640625
Train accuracy: 0.6875
Train accuracy: 0.515625
Train accuracy: 0.640625
Train accuracy: 0.59375
Train accuracy: 0.53125




Train accuracy: 0.6785714285714286
Train accuracy: 0.53125
Train accuracy: 0.5
Train accuracy: 0.5
Train accuracy: 0.59375
Train accuracy: 0.578125
Train accuracy: 0.6875
Train accuracy: 0.625
Train accuracy: 0.65625
Train accuracy: 0.5625
Train accuracy: 0.5625
Train accuracy: 0.59375
Train accuracy: 0.59375
Train accuracy: 0.6785714285714286
Train accuracy: 0.59375
Train accuracy: 0.546875
Train accuracy: 0.625
Train accuracy: 0.65625
Train accuracy: 0.5625
Train accuracy: 0.5
Train accuracy: 0.53125
Train accuracy: 0.59375
Train accuracy: 0.59375
Train accuracy: 0.578125
Train accuracy: 0.625
Train accuracy: 0.65625
Train accuracy: 0.5892857142857143
Train accuracy: 0.625
Train accuracy: 0.703125
Train accuracy: 0.546875
Train accuracy: 0.671875
Train accuracy: 0.5
Train accuracy: 0.5625
Train accuracy: 0.484375
Train accuracy: 0.578125
Train accuracy: 0.6875
Train accuracy: 0.59375
Train accuracy: 0.546875
Train accuracy: 0.625
Train accuracy: 0.5178571428571429
Train accuracy: 0.

In [None]:
explainer = GNNExplainer(model, num_hops=1)
g, _ = dataset[1]
features = g.ndata['feat'].float()
# print(features)
feat_mask, edge_mask = explainer.explain_graph(g, features)
print(feat_mask)
print(edge_mask)

Explain graph: 100%|██████████| 100/100 [00:00<00:00, 134.34it/s]

tensor([0.2493, 0.2884, 0.2688, 0.7402, 0.6590, 0.2856, 0.2557, 0.2550, 0.7395,
        0.2569, 0.7249, 0.2438, 0.2507, 0.2414, 0.2367, 0.7418, 0.2695, 0.2660,
        0.2575])
tensor([0.1640, 0.1588, 0.1567, 0.7979, 0.8135, 0.7832, 0.1672, 0.1801, 0.9173,
        0.8897, 0.8318, 0.7771, 0.7895, 0.1716, 0.8101, 0.8195, 0.8148, 0.7931,
        0.7717, 0.8607, 0.7585, 0.8141, 0.1303, 0.8422, 0.8345, 0.1647, 0.1188,
        0.8157, 0.9108, 0.8653, 0.1595, 0.1632, 0.7679, 0.7823, 0.7779, 0.7583,
        0.7601])



