In [1]:
import analysis_nx
import nx_approx_methods
import networkx as nx
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
g = analysis_nx.load_graph('data/soc-Epinions1/soc-Epinions1.txt')

In [3]:
lscc_view = analysis_nx.calculate_largest_strongly_connected_comp(g)

In [4]:
lscc = lscc_view.copy(as_view=False)

In [6]:
nx.write_gpickle(lscc, 'graphs/epi-lscc.gpickle')

In [7]:
lwcc_view = analysis_nx.calculate_largest_weakly_connected_comp(g)

In [8]:
lwcc = lwcc_view.copy(as_view=False)

In [9]:
nx.write_gpickle(lwcc, 'graphs/epi-lwcc.gpickle')

In [10]:
acc_ar = [0.02, 0.03, 0.04, 0.05, 0.06]#, 0.07, 0.08, 0.09, 0.1, 0.15, 0.2, 0.25, 0.3]
#acc_ar = [0.35, 0.4, 0.45, 0.5, 0.6]
s_median_ar, s_mean_ar, s_diam_ar, s_eff_diam_ar = [], [], [], []
w_median_ar, w_mean_ar, w_diam_ar, w_eff_diam_ar = [], [], [], []

In [None]:
%%time
for accuracy in acc_ar:

    print('=====LSCC=====')
    #print('LSCC edges: \t', lscc.num_edges())
    #print('LSCC nodes: \t', lscc.num_vertices())
    print('accuracy is ', accuracy * 100, '% corresponds to ', int(accuracy * lscc.number_of_nodes()), 'sampled number of nodes of LSCC')

    n_samples = int(accuracy * lscc.number_of_nodes())
    bfs_graph_lscc_view = nx_approx_methods.method_2(lscc, n_samples)
    bfs_graph_lscc = bfs_graph_lscc_view.copy(as_view=False)
    lscc_dists = analysis_nx.compute_shortest_path_distances_parallel_mp(bfs_graph_lscc)
    s_median, s_mean, s_diam, s_eff_diam = analysis_nx.compute_stats(lscc_dists)
    s_median_ar.append(s_median)
    s_mean_ar.append(s_mean)
    s_diam_ar.append(s_diam)
    s_eff_diam_ar.append(s_eff_diam)

    print('=====LWCC=====')
    #print('LWCC edges: \t', lwcc.num_edges())
    #print('LWCC nodes: \t', lwcc.num_vertices())
    print('accuracy is ', accuracy * 100, '% corresponds to ', int(accuracy * lwcc.number_of_nodes()), 'sampled number of nodes of LWCC')

    n_samples = int(accuracy * lwcc.number_of_nodes())
    bfs_graph_lwcc_view = nx_approx_methods.method_2(lwcc, n_samples)
    bfs_graph_lwcc = bfs_graph_lwcc_view.copy(as_view=False)
    lwcc_dists = analysis_nx.compute_shortest_path_distances_parallel_mp(bfs_graph_lwcc)
    s_median, s_mean, s_diam, s_eff_diam = analysis_nx.compute_stats(lwcc_dists)
    w_median_ar.append(s_median)
    w_mean_ar.append(s_mean)
    w_diam_ar.append(s_diam)
    w_eff_diam_ar.append(s_eff_diam)

=====LSCC=====
accuracy is  2.0 % corresponds to  644 sampled number of nodes of LSCC
taking 644 samples
4 cores used
=====LWCC=====
accuracy is  2.0 % corresponds to  1517 sampled number of nodes of LWCC
taking 1517 samples
4 cores used


In [None]:
print(s_median_ar, s_mean_ar, s_diam_ar, s_eff_diam_ar)
print(w_median_ar, w_mean_ar, w_diam_ar, w_eff_diam_ar)

In [20]:
# enter exact values for soc-Epinions1:
s_ex_median, s_ex_mean, s_ex_dia, s_ex_eff_dia = 4, 4.405, 16, 6
w_ex_median, w_ex_mean, w_ex_dia, w_ex_eff_dia = 4, 4.308, 15, 5

In [None]:
plt.figure(figsize=(12,6))
plt.plot(acc_ar, s_median_ar, label='Median distance', color='navy')
plt.axhline(s_ex_median, linestyle='dashed', color='navy')

plt.plot(acc_ar, s_mean_ar, label='Mean distance', color='r')
plt.axhline(s_ex_mean, linestyle='dashed', color='r')

plt.plot(acc_ar, s_diam_ar, label='Diameter', color='orange')
plt.axhline(s_ex_dia, linestyle='dashed', color='orange')

plt.plot(acc_ar, s_eff_diam_ar, label='Effective Diameter', color='g')
plt.axhline(s_ex_eff_dia, linestyle='dashed', color='g')

plt.legend()
title = 'approximation method: sample random sources + BFS \n approximate network statistics as a function of the accuracy parameter for  the LSCC \n for the network: soc-Epinions1'
plt.title(title)
plt.xlabel('accuracy')
plt.ylabel('distance')
plt.savefig('2_2_' + 'method2_'  + 'soc-Epinions1' + '_lscc', dpi=300, bordes='tight')

In [None]:
plt.figure(figsize=(12,6))
plt.plot(acc_ar, w_median_ar, label='Median distance', color='navy')
plt.axhline(w_ex_median, linestyle='dashed', color='navy')

plt.plot(acc_ar, w_mean_ar, label='Mean distance', color='r')
plt.axhline(w_ex_mean, linestyle='dashed', color='r')

plt.plot(acc_ar, w_diam_ar, label='Diameter', color='orange')
plt.axhline(w_ex_dia, linestyle='dashed', color='orange')

plt.plot(acc_ar, w_eff_diam_ar, label='Effective Diameter', color='g')
plt.axhline(w_ex_eff_dia, linestyle='dashed', color='g')
plt.legend()
title = 'approximation method: sample random sources + BFS \n approximate network statistics as a function of the accuracy parameter for  the LWCC \n for the network: soc-Epinions1'
plt.title(title)
plt.xlabel('accuracy')
plt.ylabel('distance')
plt.savefig('2_2_' + 'method2_'  + 'soc-Epinions1' + '_lscc', dpi=300, bordes='tight')