# Experiment 3

This id is used as a prefix for the figure names.

In [None]:
exp_id = 'experiment3'
from datetime import datetime
exp_id += '_' + str(datetime.now()).replace(' ', '_')

### Imports

In [None]:
import sys
sys.path.insert(1, '../')

In [None]:
from utils import plot_graph, independent_cascade_scorer
from graph_loaders import load_graph
import matplotlib.pyplot as plt
import numpy as np
from approx import GBFGreedy, GBFInterpolant
from kernels import VarSpline, Diffusion
import networkx as nx
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from external_algorithms import ICgreedy
plt.rcParams.update({'font.size': 16})

### Load a graph

In [None]:
path = '../'
# G = load_graph('sensor2', path=path)
G = load_graph('sensor1', path=path)
# G = load_graph('emptyset', path=path)
# G = load_graph('2moon', path=path)
# G = load_graph('minnesota', path=path)
# G = load_graph('rand', path=path)
# G = load_graph('rand_sparse', path=path)
# G = load_graph('bunny', path=path)

# G = nx.dorogovtsev_goltsev_mendes_graph(7)
# pos = nx.spectral_layout(G, center=[0.5, 0.5])
# nx.set_node_attributes(G, pos, 'pos')

In [None]:
(len(G), len(G.edges))

## Independent cascade

In [None]:
max_iter = 10   # Max number of point to be selected
p = 0.2         # Propagation probability
mc = 500        # Number of Monte-Carlo simulations

In [None]:
idx_IC, spread = ICgreedy(G, max_iter, p, mc)

## Page Rank

In [None]:
pg = nx.link_analysis.pagerank(G)
idx_pagerank = [node[0] for node in sorted(pg.items(), key=lambda item: item[1], reverse=True)]

## $P$-greedy

Define an optimization set

In [None]:
X_train = np.arange(len(G))
y_train = np.ones(len(G))

Define the common params for the point selection and optimization

In [None]:
tol_p = 1e-10  # Tolerance on the max of the squared power function
tol_f = 1e-12  # Tolerance on the residual

In [None]:
def mean_error(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

def max_error(y_true, y_pred):
    return np.max(np.abs(y_true - y_pred))

scorer = make_scorer(mean_error, greater_is_better=False)
cv = 5          # cv-fold cross validation
n_jobs = -1     # number of parallel jobs (-1: all available cores)
grid_size = 25   # size of 1d discretization grid

In [None]:
kernel = VarSpline(G, par=[-1.1, 0.01])
# kernel = Diffusion(G, par=[-10])

Select the points for optimized kernel

In [None]:
params = {
        'kernel': ['VarSpline'],
        'kernel_par': [[-x, y] for x in np.logspace(-1, 1, grid_size) for y in np.logspace(-16, 0, grid_size)]
        }

# params['diffusion'] = {
#         'kernel': ['Diffusion'],
#         'kernel_par': [[-x] for x in np.logspace(-2, 2, grid_size)]
# }

In [None]:
model = GridSearchCV(GBFGreedy(G, kernel=kernel, greedy_type='p_greedy', 
                                            reg_par=0, 
                                            max_iter=max_iter, tol_p=tol_p, tol_f=tol_f, 
                                            verbose=False), 
                                  param_grid=params, scoring=scorer, n_jobs=n_jobs, 
                                  cv=cv, refit=True, verbose=1)

model.fit(X_train, y_train)

Check the optimal parameters

In [None]:
print(model.best_estimator_.kernel.par)

## Compare

In [None]:
idx_kernel = model.best_estimator_.ctrs_.flatten().tolist()
# idx_IC already ok
idx_pagerank = idx_pagerank[:max_iter]

### Visualize the selected points

In [None]:
fig = plt.figure(figsize=(15, 5))
ax = plt.subplot(1, 3, 1) 
v = y_train.copy()
v[idx_kernel] = 0
plot_graph(G, ax=ax, values=v,
           nodelist=idx_kernel,
           show_cb=False)
ax.set_title('kernel')
    
ax = plt.subplot(1, 3, 2) 
v = y_train.copy()
v[idx_IC] = 0
plot_graph(G, ax=ax, values=v,
           nodelist=idx_IC,
           show_cb=False)
ax.set_title('IC')

ax = plt.subplot(1, 3, 3) 
v = y_train.copy()
v[idx_pagerank] = 0
plot_graph(G, ax=ax, values=v,
           nodelist=idx_pagerank, 
           show_cb=False)
ax.set_title('pagerank')
    
plt.savefig('figures/' + exp_id + '_points' + '.pdf', bbox_inches='tight')

### Visualize the order of the first selected points

In [None]:
import pandas as pd
data = {}

data['kernel'] = idx_kernel
data['IC'] = idx_IC    
data['pagerank'] = idx_pagerank
data['degree'] = [n[0] for n in sorted(G.degree, key=lambda x: x[1], reverse=True)][:max_iter]
                  
points = pd.DataFrame(data)
points.head(10)

In [None]:
set(points['kernel']) & set(points['IC']) & set(points['pagerank'])

In [None]:
set(points['kernel']) & set(points['IC'])

In [None]:
set(points['kernel']) & set(points['pagerank'])

In [None]:
set(points['IC']) & set(points['pagerank'])

### Visualize the IC score

In [None]:
ICscore = {}
ICscore['kernel'] = independent_cascade_scorer(G, idx_kernel, p, mc)
ICscore['IC'] = independent_cascade_scorer(G, idx_IC, p, mc)
ICscore['pagerank'] = independent_cascade_scorer(G, idx_pagerank, p, mc)

In [None]:
fig = plt.figure(figsize=(6, 5))
ax = fig.gca()
leg = []
for method_id in ICscore:
    ax.plot(ICscore[method_id], 'o-')
    leg.append(method_id)

ax.legend(leg, fontsize=16)

ax.set_xlabel('Number of nodes', fontsize=16)
ax.set_ylabel('IC score', fontsize=16)
ax.set_ylim(.9 * np.min(ICscore['IC']), 1.5 * np.max(ICscore['IC']))

for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(16) 
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(16) 
ax.grid(True)
ax.set_ylim([0, 1])

plt.savefig('figures/' + exp_id + '_IC_score' + '.pdf', bbox_inches='tight')

### Visualize the variance

In [None]:
kernel = model.best_estimator_.kernel

In [None]:
p_max = {}
p_mean = {}

model_kernel = GBFInterpolant(G, kernel=kernel, reg_par=0, verbose=False)
p_max['kernel'] = []
p_mean['kernel'] = []
for i in range(len(idx_kernel)):
    model_kernel.fit(idx_kernel[:i+1], np.ones(i+1))
    p_kernel = model_kernel.eval_power_fun(X_train)
    p_max['kernel'].append(np.max(p_kernel))   
    p_mean['kernel'].append(np.mean(p_kernel))   
    
model_IC = GBFInterpolant(G, kernel=kernel, reg_par=0, verbose=False)
p_max['IC'] = []
p_mean['IC'] = []
for i in range(len(idx_IC)):
    model_IC.fit(idx_IC[:i+1], np.ones(i+1))
    p_IC = model_IC.eval_power_fun(X_train)
    p_max['IC'].append(np.max(p_IC))    
    p_mean['IC'].append(np.mean(p_IC))    
    
model_pagerank = GBFInterpolant(G, kernel=kernel, reg_par=0, verbose=False)
p_max['pagerank'] = []
p_mean['pagerank'] = []
for i in range(len(idx_pagerank)):
    model_pagerank.fit(idx_pagerank[:i+1], np.ones(i+1))
    p_pagerank = model_pagerank.eval_power_fun(X_train)
    p_max['pagerank'].append(np.max(p_pagerank))    
    p_mean['pagerank'].append(np.mean(p_pagerank))    

In [None]:
fig = plt.figure(figsize=(6, 5))
ax = fig.gca()
leg = []
for method_id in ICscore:
    ax.plot(p_max[method_id]  / np.max(p_max[method_id]), 'o-')
    leg.append(method_id)

ax.legend(leg, fontsize=16)

ax.set_xlabel('Number of nodes', fontsize=16)
ax.set_ylabel('Max standard deviation', fontsize=16)

for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(16) 
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(16) 
ax.grid(True)
ax.set_ylim([0, 1.1])

plt.savefig('figures/' + exp_id + '_p_max' + '.pdf', bbox_inches='tight')

In [None]:
fig = plt.figure(figsize=(6, 5))
ax = fig.gca()
leg = []
for method_id in ICscore:
    ax.plot(p_mean[method_id] / np.max(p_mean[method_id]), 'o-')
    leg.append(method_id)

ax.legend(leg, fontsize=16)

ax.set_xlabel('Number of nodes', fontsize=16)
ax.set_ylabel('Mean standard deviation', fontsize=16)

for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(16) 
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(16) 
ax.grid(True)
ax.set_ylim([0, 1.1])

plt.savefig('figures/' + exp_id + '_p_mean' + '.pdf', bbox_inches='tight')