# Introduction to the basic computation of a GBF approximant

In [None]:
from utils import plot_graph
from graph_loaders import load_graph
import matplotlib.pyplot as plt
import numpy as np
from approx import GBFInterpolant
from kernels import VarSpline, Diffusion
import networkx as nx

### Load a graph

We start by loading a pre-defined graph to be used as an example. 

All the following graphs have coordinate information for each node (as an attribute `pos` scaled to `[0, 1]^2`) that is used for visualization purposes. However, this information is not necessary nor used in the approximation process, since the main code only assumes that `G` is a `networkx` graph.

In [None]:
# G = load_graph('wbc')
# G = load_graph('sensor2')
# G = load_graph('sensor1')
# G = load_graph('emptyset')
# G = load_graph('2moon')
# G = load_graph('minnesota')
# G = load_graph('rand')
# G = load_graph('rand_sparse')
G = load_graph('bunny')

# G = nx.dorogovtsev_goltsev_mendes_graph(7)
# pos = nx.spectral_layout(G, center=[0.5, 0.5])
# nx.set_node_attributes(G, pos, 'pos')

In [None]:
len(G)

### Define a training and a test set

We define a signal/function on the nodes using the `pos` attribute. This is an interesting test as the approximation process does not have access to this attribute, and it tries to reconstruct the signal by using only information on the nodes' connectivity.

The signal `f` is defined as a Gaussian centered and scaled around the mean point of the graph.

In [None]:
f = lambda x: np.exp(-(4 * np.linalg.norm(x - [.5, .5], axis=1)) ** 2)

We extract a random subset of 10% of the nodes to be used as the training set, and as a test set we use the entire set of nodes. All nodes sets are represented by the list of their indices in the graph.

In [None]:
n_train = int(len(G) * 0.1)
X_train = np.random.randint(1, len(G), size=n_train)
X_test = np.arange(len(G))

Then, we assign the train and test values by evaluating `f`.

In [None]:
pos = np.array([[pos[0], pos[1]] for pos in nx.get_node_attributes(G, 'pos').values()])

y_test = np.array(f(pos))
y_train = y_test[X_train]

The signal looks as follows. The training nodes are highlighted.

In [None]:
fig = plt.figure(figsize=(7, 5))
fig.clf()
ax = fig.gca()
plot_graph(G, ax=ax, values=y_test, nodelist=X_train, 
           cb_label='Target signal')

### Define a kernel

We now pick a Graph Basis Function as the kernel that will be used in the approximation. 
Kernels need to be implementations of the abstract class `GraphKernel` that is defined in `kernel.py`. The file also contains the implementation of some concrete kernels.

In [None]:
# kernel = VarSpline(G, par=[-1.1, 0.01])
kernel = Diffusion(G, par=[-10])

We visualize a kernel translate into one of the training points.

In [None]:
idx = int(np.random.randint(0, len(X_train), 1))
ker_eval = kernel.eval(X_test, X_train[idx])

fig = plt.figure(figsize=(7, 5))
fig.clf()
ax = fig.gca()
plot_graph(G, ax=ax, values=ker_eval, nodelist=X_train[idx:idx+1], 
          cb_label='A GBF translate')

### Reconstruct the signal

We first initialize the approximant. Several approximants are available, and they are all implementations of the abstract class `GBFApprox` (see `approx.py`).

These methods may be initialized by passing a `GraphKernel` object, or a string and a list of parameters.

In [None]:
model = GBFInterpolant(G, kernel=kernel, reg_par=1e-12)

# Or:
# model = GBFInterpolant(G, kernel='Diffusion', reg_par=1e-12)

Observe that also a `kernel_par` variable can be explicitly passed, and by doing so the parameters of an existing kernel are overwritten, but only when calling the `fit` method.

In [None]:
kernel_tmp = Diffusion(G, par=[-10])
print('Before: ' + str(kernel_tmp))
model_tmp = GBFInterpolant(G, kernel=kernel_tmp, kernel_par=[-1], reg_par=1e-12)
print('After:  ' + str(model_tmp.kernel))
model_tmp.fit([0], [0]) # Fit with dummy data
print('After fit:  ' + str(model_tmp.kernel))

We can now fit the approximant to the training data.

In [None]:
model.fit(X_train, y_train)

### Compute the model predictions

Now that the model is trained, we can compute the predictions on the test set.

In [None]:
s_test = model.predict(X_test)

And compute some errors. We use a clipping in the computation of the relative error to avoid dividing by zero.

In [None]:
rel_err_tol = 1e-10
abs_err_test = np.abs(y_test - s_test)
rel_err_test = abs_err_test / np.clip(np.abs(y_test), rel_err_tol, np.inf)

### Visualize

Finally, we visualize some results: the original and the reconstructed signal.

In [None]:
fig = plt.figure(figsize=(15, 5))
ax = plt.subplot(1, 2, 1)
plot_graph(G, ax=ax, values=y_test, nodelist=model.ctrs_, 
           cb_label='Target signal')

ax = plt.subplot(1, 2, 2)
plot_graph(G, ax=ax, values=s_test, nodelist=model.ctrs_, 
           cb_label='Reconstructed signal')

And the absolute and relative test errors.

In [None]:
fig = plt.figure(figsize=(15, 5))
ax = plt.subplot(1, 2, 1)
plot_graph(G, ax=ax, values=abs_err_test, nodelist=model.ctrs_, 
           cb_label='Absolute Error', log_scale=True)

ax = plt.subplot(1, 2, 2)
plot_graph(G, ax=ax, values=rel_err_test, nodelist=model.ctrs_, 
           cb_label='Relative Error (clipped to %2.2e)' % rel_err_tol, log_scale=True)