# MUTAG Example

The goal of the example is not to achieve the best performance, but to show how the GraphESN classifier can be used for a graph-level task.

In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch_geometric.utils import to_scipy_sparse_matrix
import torch.nn.functional as F
from scipy.sparse.linalg import eigs
import statistics as stats

import os
import sys
import shutil

sys.path.append(os.path.abspath('../src'))
from graphesn import GraphESN

os.makedirs('../data', exist_ok=True)

In [2]:
dataset = TUDataset(root="../data", name="MUTAG")
dataset = dataset.shuffle()

Downloading https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip
Processing...
Done!


In [3]:
train_size = int(0.8 * len(dataset))  # 80% dev(train + validation), 20% test
train_set = dataset[:train_size]
test_set = dataset[train_size:]

train_loader = DataLoader(train_set, batch_size=train_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=len(test_set), shuffle=False)

### Hyperparameters

In [14]:
def compute_spectral_radius(data):
    A = to_scipy_sparse_matrix(data.edge_index, num_nodes=data.num_nodes).tocsc()
    eigenvalues = sorted(eigs(A, k=1, which='LM', return_eigenvectors=False), reverse=True) 
    return abs(eigenvalues[0])

spectral_radii = [compute_spectral_radius(data) for data in dataset]
stats.mean(spectral_radii)

np.float32(2.467464)

In [20]:
RHO = 1.2 # spectral radius of the adjacency matrix for re-scaling the reservoir weights
OMEGA = 0.5 # input scale for the input weights
RESERVOIR_SIZE = 100 # number of nodes in the reservoir
REC_INITIALIZATION = 'ring'
INPUT_INITIALIZATION = 'sign'
INPUT_SIZE = dataset.num_features

In [21]:
model = GraphESN(INPUT_SIZE, RESERVOIR_SIZE, RHO, OMEGA, INPUT_INITIALIZATION, REC_INITIALIZATION)
train_data = next(iter(train_loader))

model.fit(train_data.x, train_data.edge_index, train_data.batch, train_data.y)

In [27]:
# Compute the loss for the training data
train_output = model(train_data.x, train_data.edge_index, train_data.batch)
correct = train_output.eq(train_data.y).sum().item()
accuracy = correct / len(train_data.y)
print(f'Training Accuracy: {accuracy * 100:.2f}%')

# # Compute the accuracy for the test data
test_data = next(iter(test_loader))
test_output = model(test_data.x, test_data.edge_index, test_data.batch)
correct = test_output.eq(test_data.y).sum().item()
accuracy = correct / len(test_data.y)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Training Accuracy: 94.67%
Test Accuracy: 86.84%


### Clean All

In [None]:
shutil.rmtree('../data')