In [1]:
import numpy as np
import warnings 
from scipy.spatial.distance import pdist
from sklearn.metrics.pairwise import pairwise_kernels
import os
from nystrom import generate_nystrom_data, nystrom_kernel

warnings.filterwarnings('ignore')

ModuleNotFoundError: No module named 'nystrom'

### Get Data

In [None]:
data = generate_nystrom_data()

### Kernel Nystrom Approximation

In [3]:
sigma = np.mean(pdist(data, metric='euclidean'))
gamma = 1 / (2 * sigma**2)
n_jobs = 1
kernel = 'rbf'

K = pairwise_kernels(data, metric=kernel, n_jobs=n_jobs, gamma=gamma)

# -------------------
# Randomized
# -------------------

n_col_indices = 200 # number of columns to sample
n_components = 100  # rank
random_state = 123  # reproducibility
svd = 'randomized'  # svd algorithm

U_approx, D_approx, C = nystrom_kernel(
    K, n_col_indices, n_components=n_components, 
    random_state=random_state, 
    svd=svd)

K_approx = U_approx.dot(D_approx).dot(U_approx.T)

err = np.linalg.norm(K - K_approx, 'fro')
print('Error ({}): {:.3f}'.format(svd, err))

# --------------------
# ARPACK
# --------------------

svd = 'arpack'

U_approx, D_approx, C = nystrom_kernel(
    K, n_col_indices, n_components=None, 
    random_state=random_state, 
    svd=svd)

K_approx = U_approx.dot(D_approx).dot(U_approx.T)

err = np.linalg.norm(K - K_approx, 'fro')
print('Error ({}): {:.3f}'.format(svd, err))

Error (randomized): 4.915
Error (arpack): 3.620


In [4]:
print('ARPACK Version')
%timeit nystrom_kernel(K, n_col_indices, n_components=None, random_state=random_state, svd=svd)

print('Randomized Version')
%timeit nystrom_kernel(K, n_col_indices, n_components=None, random_state=random_state, svd=svd)

ARPACK Version
53.2 ms ± 964 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Randomized Version
53.1 ms ± 1.48 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
