# Comparison of the decay of the approximation error for different kernels

In [None]:
from utils import plot_graph
from graph_loaders import load_graph
import matplotlib.pyplot as plt
import numpy as np
from approx import GBFInterpolant
from kernels import VarSpline, Diffusion, PolyDecay, Trivial
import networkx as nx

### Load a graph

We start by loading a pre-defined graph to be used as an example. 

In [None]:
# G = load_graph('wbc')
# G = load_graph('sensor2')
# G = load_graph('sensor1')
# G = load_graph('emptyset')
# G = load_graph('2moon')
# G = load_graph('minnesota')
# G = load_graph('rand')
# G = load_graph('rand_sparse')
G = load_graph('bunny')

### Define a test set

The signal `f` is defined as a Gaussian centered and scaled around the mean point of the graph.

In [None]:
f = lambda x: np.exp(-(4 * np.linalg.norm(x - [.5, .5], axis=1)) ** 2)

As a test set we use the entire set of nodes and we assign the train and test values by evaluating `f`.

In [None]:
X_test = np.arange(len(G))

pos = np.array([[pos[0], pos[1]] for pos in nx.get_node_attributes(G, 'pos').values()])
y_test = np.array(f(pos))

The signal looks as follows. 

In [None]:
fig = plt.figure(figsize=(7, 5))
fig.clf()
ax = fig.gca()
plot_graph(G, ax=ax, values=y_test, 
           cb_label='Target signal')

### Define a sequence of train set

We compute the approximants using an increasing sequence of nodes containing `nn[0]`, ..., `nn[-1]` number of elements, where `nn` is a logspaced sequence of roughly `n_steps` integers between `1` and `max_nodes`. 

The permutation `nodes_idx` is used to select the nodes.

In [None]:
max_nodes = int(len(G) * 0.9)
n_steps = 20
nn = np.unique(np.logspace(0, np.log10(max_nodes), n_steps).astype(int))
nodes_idx = np.random.permutation(np.arange(len(G)))

### Define the kernels

We now pick some Graph Basis Functions as the kernels that will be used in the approximation. 

In [None]:
kernel = {}
kernel['Diffusion'] = Diffusion(G, par=[-10])
kernel['VarSpline'] = VarSpline(G, par=[-2.1, 0.01])
kernel['PolyDecay'] = PolyDecay(G)
kernel['Trivial'] = Trivial(G)

We visualize a kernel translate for each kernel.

In [None]:
idx = int(np.random.randint(0, len(X_test), 1))
ker_eval = {}
for ker_id in kernel:
    ker_eval[ker_id] = kernel[ker_id].eval(X_test, X_test[idx])

n_rows = int(np.ceil(len(kernel) / 2))
fig = plt.figure(figsize=(15, 7 * n_rows))
for i, ker_id in enumerate(kernel):
    ax = plt.subplot(n_rows, 2, i+1)
    plot_graph(G, ax=ax, values=ker_eval[ker_id], nodelist=X_test[idx:idx+1], 
              cb_label='A GBF translate')
    ax.set_title(ker_id)

### Reconstruct the signal

We first initialize the approximants. 

In [None]:
model = {}
for ker_id in kernel:
    model[ker_id] = GBFInterpolant(G, kernel=kernel[ker_id], reg_par=1e-15, verbose=False)

We can now fit the approximants to the increasing sets of training data.

In [None]:
rel_err_tol = 1e-10
max_err_test = {}
rms_err_test = {}
for ker_id in kernel:
    max_err_test[ker_id] = []
    rms_err_test[ker_id] = []

for idx, n in enumerate(nn):
    print('[ %4d / %4d ] Fitting %2d models with %4d training nodes' %(idx + 1, len(nn), len(kernel), n))
    X_train = X_test[nodes_idx[:n]]
    y_train = y_test[nodes_idx[:n]]
    for ker_id in kernel:
        s_test = model[ker_id].fit(X_train, y_train).predict(X_test)
        abs_err_test = np.abs(y_test - s_test)
        max_err_test[ker_id].append(np.max(abs_err_test))
        rms_err_test[ker_id].append(np.linalg.norm(abs_err_test) / np.sqrt(len(X_test)))

### Visualize

We estimate the algebraic rate of decay of the various errors.

In [None]:
coeff_max = {}
coeff_rms = {}
for ker_id in kernel:
    coeff_max[ker_id] = np.polyfit(np.log(nn), np.log(max_err_test[ker_id]), 1)
    coeff_rms[ker_id] = np.polyfit(np.log(nn), np.log(rms_err_test[ker_id]), 1)

Finally, we visualize the decay of the max and RMS errors.

In [None]:
leg = []
fig = plt.figure(figsize=(7, 5))
fig.clf()
ax = fig.gca()
for ker_id in kernel:
    a = ax.loglog(nn, max_err_test[ker_id], linewidth=2)
    ax.loglog(nn, np.exp(coeff_max[ker_id][1]) * nn ** coeff_max[ker_id][0], 
              '--', color=a[0].get_color())
    leg += [ker_id, '$n^{%2.2f}$' % coeff_max[ker_id][0]]

ax.grid(True)
ax.legend(leg, fontsize=16, loc=(1.1, 0.1))
ax.set_xlabel('Number of nodes', fontsize=16)
ax.set_ylabel('Max Error', fontsize=16)
for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(16) 
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(16) 

In [None]:
leg = []
fig = plt.figure(figsize=(7, 5))
fig.clf()
ax = fig.gca()
for ker_id in kernel:
    a = ax.loglog(nn, rms_err_test[ker_id], linewidth=2)
    ax.loglog(nn, np.exp(coeff_rms[ker_id][1]) * nn ** coeff_rms[ker_id][0], 
              '--', color=a[0].get_color())
    leg += [ker_id, '$n^{%2.2f}$' % coeff_rms[ker_id][0]]

ax.grid(True)
ax.legend(leg, fontsize=16, loc=(1.1, 0.1))
ax.set_xlabel('Number of nodes', fontsize=16)
ax.set_ylabel('RMS Error', fontsize=16)
for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(16) 
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(16) 