In [5]:
from time import time
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn import svm

from grakel import datasets, GraphKernel, graph_from_networkx

### Load datasets

In [6]:
dataset_name = "hard_small" # options: {easy_small, easy, hard_small, hard}

loaded = np.load('datasets/'+dataset_name+'.npz', allow_pickle=True)
G_tr = [_ for _ in loaded['tr_graphs']] # networkx graphs
y_train = loaded['tr_class'] 
G_te = [_ for _ in loaded['te_graphs']] # networkx graphs
y_test = loaded['te_class']

# Put the data in GraKel format
G_train = graph_from_networkx(G_tr, node_labels_tag='features')
G_train = [g for g in G_train]
y_train = np.argmax(y_train, axis=-1)
G_test = graph_from_networkx(G_te, node_labels_tag='features')
G_test = [g for g in G_test]
y_test = np.argmax(y_test, axis=-1)

### Train and evaluate graph kernels

In [7]:
# Create a list with the graph kernels to evaluate
# For more kernels and information:
# https://ysig.github.io/GraKeL/dev/generated/grakel.GraphKernel.html#grakel.GraphKernel
kernel_names = [
                "shortest_path", 
                "graphlet_sampling", 
                "pyramid_match", 
                 "svm_theta",
                "neighborhood_hash",
                "subtree_wl",
                "odd_sth",
                "propagation",
                "pyramid_match",
                "vertex_histogram",
                "weisfeiler_lehman",
                "core_framework"
                ]

for k_ in kernel_names:
    
    start = time()
    
    # General kernels
    if k_ in ["weisfeiler_lehman" , "core_framework"]:
        gk = GraphKernel(kernel=[{"name": k_}, {"name": "propagation"}], normalize=True)
    
    # Base kernels
    else:
        gk = GraphKernel(kernel=[{"name": k_}], normalize=True)

    # Calculate the kernel matrix
    K_train = gk.fit_transform(G_train)
    K_test = gk.transform(G_test)
    
    # Initialise an SVM and fit
    clf = svm.SVC(kernel='precomputed', C=1)
    clf.fit(K_train, y_train)
    
    # Compute predictions on test set
    y_pred = clf.predict(K_test)
    
    # Calculate accuracy of classification
    acc = accuracy_score(y_test, y_pred)
    
    end = time()
    print(k_, "-- Accuracy:", str(round(acc*100, 2)), "% | Took:",
          str(round(end - start, 2)), "s")

shortest_path -- Accuracy: 72.22 % | Took: 22.68 s
graphlet_sampling -- Accuracy: 44.44 % | Took: 69.99 s
pyramid_match -- Accuracy: 36.11 % | Took: 5.68 s
svm_theta -- Accuracy: 33.33 % | Took: 7.04 s
neighborhood_hash -- Accuracy: 80.56 % | Took: 5.13 s
subtree_wl -- Accuracy: 30.56 % | Took: 0.04 s
odd_sth -- Accuracy: 44.44 % | Took: 51.12 s
propagation -- Accuracy: 36.11 % | Took: 5.52 s
pyramid_match -- Accuracy: 36.11 % | Took: 5.79 s
vertex_histogram -- Accuracy: 30.56 % | Took: 0.03 s
weisfeiler_lehman -- Accuracy: 27.78 % | Took: 104.29 s




core_framework -- Accuracy: 36.11 % | Took: 19.8 s
