In [1]:
from time import time
import numpy as np
import networkx as nx

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn import svm

from grakel import datasets, GraphKernel, graph_from_networkx

### Load datasets

In [2]:
dataset_name = "hard_small" # options: {easy_small, easy, hard_small, hard}

loaded = np.load('datasets/'+dataset_name+'.npz', allow_pickle=True)
A_train = list(loaded['tr_adj']) # list of adjacency matrices
X_train = loaded['tr_feat'] # node features
y_train = loaded['tr_class'] # class labels
A_test = list(loaded['te_adj']) # list of adjacency matrices
X_test = loaded['te_feat'] # node features
y_test = loaded['te_class'] # class labels

# Convert to networkx format
G_tr = []
for a, x in zip(A_train, X_train):
    G = nx.from_scipy_sparse_matrix(a)
    x_tuple = tuple(map(tuple, x))
    nx.set_node_attributes(G, dict(enumerate(x_tuple)), 'features')
    G_tr.append(G)
G_te = []
for a, x in zip(A_test, X_test):
    G = nx.from_scipy_sparse_matrix(a)
    x_tuple = tuple(map(tuple, x))
    nx.set_node_attributes(G, dict(enumerate(x_tuple)), 'features')
    G_te.append(G)

# Convert to GraKel format
G_train = graph_from_networkx(G_tr, node_labels_tag='features')
G_train = [g for g in G_train]
y_train = np.argmax(y_train, axis=-1)
G_test = graph_from_networkx(G_te, node_labels_tag='features')
G_test = [g for g in G_test]
y_test = np.argmax(y_test, axis=-1)

### Train and evaluate graph kernels

In [3]:
# Create a list with the graph kernels to evaluate
# For more kernels and information:
# https://ysig.github.io/GraKeL/dev/generated/grakel.GraphKernel.html#grakel.GraphKernel
kernel_names = [
                "shortest_path", 
                "graphlet_sampling", 
                "pyramid_match", 
                 "svm_theta",
                "neighborhood_hash",
                "subtree_wl",
                "odd_sth",
                "propagation",
                "vertex_histogram",
                "weisfeiler_lehman",
                "core_framework"
                ]

for k_ in kernel_names:
    
    start = time()
    
    # General kernels
    if k_ in ["weisfeiler_lehman" , "core_framework"]:
        gk = GraphKernel(kernel=[{"name": k_}, {"name": "shortest_path"}], normalize=True)
    
    # Base kernels
    else:
        gk = GraphKernel(kernel=[{"name": k_}], normalize=True)

    # Calculate the kernel matrix
    K_train = gk.fit_transform(G_train)
    K_test = gk.transform(G_test)
    
    # Initialise an SVM and fit
    clf = svm.SVC(kernel='precomputed', C=1)
    clf.fit(K_train, y_train)
    
    # Compute predictions on test set
    y_pred = clf.predict(K_test)
    
    # Calculate accuracy of classification
    acc = accuracy_score(y_test, y_pred)
    
    end = time()
    print(k_, "-- Accuracy:", str(round(acc*100, 2)), "% | Took:",
          str(round(end - start, 2)), "s")

shortest_path -- Accuracy: 69.23 % | Took: 7.85 s
graphlet_sampling -- Accuracy: 38.46 % | Took: 37.84 s
pyramid_match -- Accuracy: 23.08 % | Took: 2.86 s
svm_theta -- Accuracy: 23.08 % | Took: 2.91 s
neighborhood_hash -- Accuracy: 69.23 % | Took: 2.71 s
subtree_wl -- Accuracy: 15.38 % | Took: 0.03 s
odd_sth -- Accuracy: 42.31 % | Took: 24.48 s
propagation -- Accuracy: 53.85 % | Took: 2.61 s
pyramid_match -- Accuracy: 23.08 % | Took: 3.47 s
vertex_histogram -- Accuracy: 15.38 % | Took: 0.01 s
weisfeiler_lehman -- Accuracy: 73.08 % | Took: 58.92 s
core_framework -- Accuracy: 69.23 % | Took: 18.62 s
