<a href="https://colab.research.google.com/github/Patschwemm/Container-Localisation/blob/main/Exercise7b_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!pip install igraph
!pip install grakel

In [21]:
import numpy as np
import igraph
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC 
import grakel
import grakel.graph as gg
from grakel.datasets import fetch_dataset
from grakel.utils import cross_validate_Kfold_SVM
from grakel.kernels import WeisfeilerLehman, VertexHistogram
import pickle
from pathlib import Path

In [20]:
# Loads the MUTAG dataset
NCI1 = fetch_dataset("NCI1", verbose=False)
DHFR = fetch_dataset("DHFR", verbose=False)

G = []
y = []
G.append(NCI1.data)
y.append(NCI1.target)
G.append(DHFR.data)
y.append(DHFR.target)

# Generates a list of kernel matrices using the Weisfeiler-Lehman subtree kernel
# Each kernel matrix is generated by setting the number of iterations of the
# kernel to a different value (from 2 to 7)
# done for both datasets
for G, y in zip(G,y):
    Ks = list()
    for i in range(1, 7):
        gk = WeisfeilerLehman(n_iter=i, base_graph_kernel=VertexHistogram, normalize=True)
        K = gk.fit_transform(G)
        Ks.append(K)

    # Performs 10-fold cross-validation over different kernels and the parameter C of
    # SVM and repeats the experiment 10 times with different folds
    accs = cross_validate_Kfold_SVM([Ks], y, n_iter=10)
    print("Average accuracy:", str(round(np.mean(accs[0])*100, 2)) + "%")
    print("Standard deviation:", str(round(np.std(accs[0])*100, 2)) + "%")

Average accuracy: 84.7%
Standard deviation: 0.22%
Average accuracy: 81.03%
Standard deviation: 1.18%


In [31]:
# Loads the MUTAG dataset
NCI1 = fetch_dataset("NCI1", verbose=False)
DHFR = fetch_dataset("DHFR", verbose=False)

G = []
y = []
G.append(NCI1.data)
y.append(NCI1.target)
G.append(DHFR.data)
y.append(DHFR.target)



for G, y in zip(G,y):
    # generate VertexHistogram Kernel
    G_train, G_test, y_train, y_test = train_test_split(G, y, test_size=0.1, random_state=42)
    gk = VertexHistogram( normalize=False)
    K_train = gk.fit_transform(G_train)
    K_test = gk.transform(G_test)

    # Uses the SVM classifier to perform classification
    clf = SVC(kernel="precomputed")
    clf.fit(K_train, y_train)
    y_pred = clf.predict(K_test)

    # Computes and prints the classification accuracy
    acc = accuracy_score(y_test, y_pred)
    print("Accuracy:", str(round(acc*100, 2)) + "%")

Accuracy: 65.94%
Accuracy: 59.21%
