In [15]:
from cc_model.load_datasets import *
from cc_model.wl import *
from cc_model.utils import nx_to_gt
from cc_model.pagerank import all_pagerank
from cc_model.rewire import *

import networkx as nx
from pathlib import Path
import graph_tool.all as gt
import time

In [16]:
datasets = ["karate",  #"phonecalls",
            "HepPh", #"AstroPh", "web-Google", "soc-Pokec"
#            "deezer_HR", "deezer_HU", "deezer_RO","tw_musae_DE",
#            "tw_musae_ENGB","tw_musae_FR","lastfm_asia","fb_ath",
#            "fb_pol", "facebook_sc"
           ]

In [17]:
dataset_path = Path("/home/felix/projects/colorful_configuration/datasets")

In [18]:
#edges, is_directed = load_dataset(dataset_path, "soc-Pokec")

In [19]:
epsilon=1e-14
max_iter = 300
alpha=0.85

In [20]:
def run_pagerank(g, WL_round, outer_iter=10, verbosity=0):
    """ Generate synthethic networks which have the same WL colors as g at round WL_round and return absolute error"""
    mode = "theirs"
    base_pagerank = all_pagerank(g, mode, epsilon=epsilon, max_iter=max_iter, alpha=alpha)
    print(g.is_directed())
    pageranks = []
    GraphEnsemble = LocalHistogramRewiring(g, g.vp[f"color_{WL_round}"].get_array() )
    for i in range(outer_iter):
        if verbosity > 4:
            print("    ",i)
        new_g = GraphEnsemble.get_sample()
        assert new_g.is_directed() == g.is_directed()
        pagerank, err = all_pagerank(new_g, mode, epsilon=epsilon, max_iter=max_iter, alpha=alpha, return_err=True)

        if verbosity > 0:
            print("the error in pagerank iteration is:\r\n", err)
        pageranks.append(pagerank)
    error_sum = [np.sum(np.abs(base_pagerank-pagerank)) for pagerank in pageranks]
    if verbosity > 0:
        print("max", [np.max(np.abs(base_pagerank-pagerank)) for pagerank in pageranks])
    return error_sum

In [21]:
def get_MAE_for_iterations(g, n_graphs):
    means = []
    stds = []
    WL_iterations,_ = WL(g, add_labelings=True)
    for WL_round in range(WL_iterations):
        g_rewire = gt.Graph(g)
        if verbosity>0:
            print(WL_round)

        MAEs = run_pagerank(g_rewire, WL_round, outer_iter=n_graphs, verbosity=0)
        means.append(np.mean(MAEs))
        stds.append(np.std(MAEs))
    return means, stds

In [22]:
def compute_pagerank_on_all_Graphs(n_graphs, verbosity=0):
    list_means = []
    list_stds = []
    for dataset in datasets:
        if dataset==None:
            list_means.append([])
            list_stds.append([])
            continue
        if verbosity>0:
            print(dataset)
        if verbosity > 3:
            print("reading graph")
        G = load_gt_dataset_cached(dataset_path, dataset, verbosity=verbosity, force_reload=True)
        print(G.num_edges(), G.num_vertices())
        if verbosity >3:
            print("done reading graph")
            print("starting WL")
            print(repr(G))
        #print("Done with WL")
        means, stds = get_MAE_for_iterations(G,
                                             n_graphs=n_graphs,)
        list_means.append(means)
        list_stds.append(stds)
    return list_means, list_stds

In [23]:
from collections import Counter

In [24]:
Counter([1,1,2]).most_common(2)[1][1]

1

In [25]:
%load_ext snakeviz

The snakeviz extension is already loaded. To reload it, use:
  %reload_ext snakeviz


In [26]:
#%%snakeviz --new-tab

verbosity=1
list_means, list_stds = compute_pagerank_on_all_Graphs(42, verbosity=1)

karate
78 34
0
False
1
False
2
False
HepPh
421578 34546
0
True


KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize=(10,6))
for means, stds,label in zip(list_means, list_stds, datasets):
    plt.errorbar(x=np.arange(len(means)),y=np.array(means)+1e-20, yerr=stds, label=label)
plt.ylabel("MAE of pagerank")
plt.xlabel("Iteration")
plt.yscale("log")
plt.legend()
plt.title("Convergence of pagerank for synthetic networks ")