# Introduction to the point selection via P-greedy with parameter optimization

In [None]:
from utils import plot_graph
from graph_loaders import load_graph
import matplotlib.pyplot as plt
import numpy as np
from approx import GBFGreedy
from kernels import VarSpline, Diffusion
import networkx as nx
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from itertools import product

### Load a graph

We start by loading a pre-defined graph to be used as an example. 

In [None]:
# G = load_graph('wbc')
# G = load_graph('sensor2')
# G = load_graph('sensor1'
# G = load_graph('emptyset')
# G = load_graph('2moon')
# G = load_graph('minnesota')
# G = load_graph('rand')
# G = load_graph('rand_sparse')
G = load_graph('bunny')
# G = load_graph('star')

# G = nx.dorogovtsev_goltsev_mendes_graph(7)
# pos = nx.spectral_layout(G, center=[0.5, 0.5])
# nx.set_node_attributes(G, pos, 'pos')

### Define an optimization set

In this case the focus is on point selection only. This means that we can use all the nodes as a training set, but without the need to have target values `y_train`.

In [None]:
X_train = np.arange(len(G))

We use a costant vector of ones as a target. This is used to choose the parameters in the next point.

In [None]:
y_train = np.ones(len(G))

The signal looks as follows. The training nodes are highlighted.

### Optimize the parameters and reconstruct the signal

We first define a metric to rank the performances of the different parameters. In this case the best model is the one providing the smallest mean error.

In [None]:
def mean_error(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

scorer = make_scorer(mean_error, greater_is_better=False)

We define the parameters to be optimized.

In [None]:
n_grid = 5

# Grid for the Diffusion kernel
kernel = [['Diffusion']]
reg_par = [[x] for x in np.logspace(-15, 0, n_grid)]
kernel_par = [[-x] for x in np.logspace(-1, 2, n_grid)]
params_1 = [{'kernel': kernel, 'reg_par' : reg_par, 'kernel_par': kernel_par}
                   for kernel, reg_par, kernel_par in product(kernel, reg_par, kernel_par)]

# Grid for the VarSpline kernel
kernel = [['VarSpline']]
reg_par = [[x] for x in np.logspace(-15, 0, n_grid)]
kernel_par = [[-x, y] for x in np.logspace(-1, 2, 5) for y in np.linspace(0, 10, n_grid)]
params_2 = [{'kernel': kernel, 'reg_par' : reg_par, 'kernel_par': kernel_par}
                   for kernel, reg_par, kernel_par in product(kernel, reg_par, kernel_par)]

# Join the two grids
params = params_1 + params_2

In [None]:
params_1

In this case, we wrap the approximation model into a `GridSearchCV`. We use all the available cores and run `cv=5`-fold cross validation, with final refitting. Here we turn off the regularization (i.e., `reg_par=0`) since we are interested purely in the variance minimization.

In [None]:
max_iter = 100 # Max number of point to be selected
tol_p = 1e-12  # Tolerance on the max of the squared power function
tol_f = 1e-12      # Tolerance on the residual

model = GridSearchCV(GBFGreedy(G, greedy_type='p_greedy', 
                               max_iter=max_iter, tol_p=tol_p, 
                               verbose=False), 
                     params, scoring=scorer, n_jobs=6, cv=5, 
                     refit=True, verbose=1)

We can now fit the approximant to the training data.

In [None]:
model.fit(X_train, y_train)

Finally, we visualize the selected parameters.

In [None]:
model.best_params_

### Visualize the selected points and the decay of the power function

We visualize the training history.

In [None]:
p_max = model.best_estimator_.train_hist['p']

We estimate the algebraic rate of decay of the power function.

In [None]:
tail_size = int(0.3 * len(p_max)) 
nn = np.arange(1, len(p_max) + 1)
coeff_max = np.polyfit(np.log(nn)[-tail_size:], np.log(p_max)[-tail_size:], 1)

In [None]:
fig = plt.figure(figsize=(7, 5))
ax = fig.gca()
a = ax.loglog(p_max)
ax.loglog(nn[-2*tail_size:], np.exp(coeff_max[1]) * nn[-2*tail_size:] ** coeff_max[0], 
              '--', color=a[0].get_color())
ax.legend(['Max of the power function', '$n^{%2.2f}$' % coeff_max[0]], fontsize=16, loc=(1.1, 0.1))
ax.set_xlabel('Number of nodes', fontsize=16)
for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(16) 
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(16) 
ax.grid(True)
ax.grid(True)

And the selected points. In this case we visualize the power function values as a signal.

In [None]:
p = model.best_estimator_.eval_power_fun(X_train)

In [None]:
fig = plt.figure(figsize=(7, 5))
ax = fig.gca()
plot_graph(G, ax=ax, values=p, nodelist=model.best_estimator_.ctrs_, 
           cb_label='Power function')

We visualize also the error with respect to the constant signal with all ones. 

In [None]:
s_train = model.predict(X_train)
abs_err_test = np.abs(y_train - s_train)

In [None]:
fig = plt.figure(figsize=(7, 5))
fig.clf()
ax = fig.gca()
plot_graph(G, ax=ax, values=abs_err_test, nodelist=model.best_estimator_.ctrs_, 
           cb_label='Absolute Error', log_scale=True)