# P04-03: Small-world networks

*November 16 2022*

In the final unit of this week we study properties of synthetic and empirical small-world networks and show how we can visualize path funelling using `pathpy`. You can use this unit as a basis for your solution of some tasks on exercise sheet 3.

In [1]:
import pathpy as pp
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from constants import ROOT_DIR

plt.style.use('default')
sns.set_style("whitegrid")

  import pandas.util.testing as tm


# Shortest Paths in random vs. small-world networks

One implication of the small-world property is that 

In [2]:
n_random = pp.generators.ER_np(n=50, p=0.15, loops=False)

style = {}
style['edge_directed'] = False
style['edge_color'] = 'gray'
style['node_color'] = 'CornflowerBlue'
style['node_label_size'] = 2
style['node_size'] = 20
style['edge_size'] = 1.0

layout_style = {}
layout_style['layout'] = 'Fruchterman-Reingold'
layout_style['height'] = 600
layout_style['width'] = 400
layout_style['force'] = 0.5

layout =  pp.layout(n_random, **layout_style)
n_random.plot(**style, layout=layout)

In [3]:
def funelling_plot(n, style={}, layout=None, source=None):
    # visualise number of shortest paths from given node x to all other nodes, passing through neighbors of x
    if source is None:
        source = np.random.choice(list(n.nodes.uids))
        print(source)
    labels = { v: v for v in n.nodes.uids }
    style['node_label_size'] = { v: 2 for v in n.nodes.uids}
    style['node_size'] = { v: 20 for v in n.nodes.uids}
    style['node_color'] = { v: 'CornflowerBlue' for v in n.nodes.uids }
    style['node_color'][source] = 'Red'

    tree = pp.algorithms.shortest_path_tree(n, source=source)
    assert pp.algorithms.check_tree(tree)

    # compute tree size for all neighbors of source
    for v in n.successors[source]:    
        s = pp.algorithms.tree_size(tree, v.uid)
        labels[v.uid] = v.uid + ': ' + str(s)
        style['node_label_size'][v.uid] = 2 + 0.1*s
        style['node_size'][v.uid] = 5 + 2*s
        style['node_color'][v.uid] = 'Green'
    if layout is None:
        layout = pp.layout(n, layout='fr', force=0.5)
    
    n.plot(node_label=labels, node_id_as_label=False, **style, layout=layout)

In [4]:
n_random = pp.generators.Watts_Strogatz(n=50, s=2, p=1, loops=False)
funelling_plot(n_random, style)

39


In [5]:
n_ring = pp.generators.Watts_Strogatz(n=50, s=2, p=0)
layout = pp.layout(n_ring, layout='lattice', dimension=1)
funelling_plot(n_ring, style, layout)

20


In [6]:
n_ws = pp.generators.Watts_Strogatz(n=50, s=4, p=0.05)
funelling_plot(n_ws, style)

11


# Empirical small-world networks

We finally test for which of the three empirical networks we find a small-world property. For this, we compare their average shortest path lengths and clustering coefficients to that of random networks with the same macrostate. If the average shortest path lengths are similar to that of a random network (or even smaller) and the clustering coefficient is much larger than expected at random, we call the network a small-world network. 

In [7]:
n_gentoo = pp.io.sql.read_network(f'{ROOT_DIR}/data/networks.db', sql='SELECT DISTINCT source, target FROM gentoo', directed=False, loops=False).largest_connected_component()
n_highschool = pp.io.sql.read_network(f'{ROOT_DIR}/data/networks.db', sql='SELECT DISTINCT source, target FROM highschool', directed=False, loops=False).largest_connected_component()
n_lotr = pp.io.sql.read_network(f'{ROOT_DIR}/data/networks.db', sql='SELECT DISTINCT source, target FROM lotr', directed=False, loops=False).largest_connected_component()



In [8]:
r_gentoo = pp.generators.ER_np_randomize(n_gentoo).largest_connected_component()

print('cc_e = ', pp.statistics.avg_clustering_coefficient(n_gentoo))
print('cc_r = ', pp.statistics.avg_clustering_coefficient(r_gentoo))
print('<l_e> = ', pp.algorithms.avg_path_length(n_gentoo))
print('<l_r> = ', pp.algorithms.avg_path_length(r_gentoo))

cc_e =  0.017357355654577633
cc_r =  0.0
<l_e> =  3.1096379146451367
<l_r> =  6.542920898738073


In [9]:
r_highschool = pp.generators.ER_np_randomize(n_highschool).largest_connected_component()

print('cc_e = ', pp.statistics.avg_clustering_coefficient(n_highschool))
print('cc_r = ', pp.statistics.avg_clustering_coefficient(r_highschool))
print('<l_e> = ', pp.algorithms.avg_path_length(n_highschool))
print('<l_r> = ', pp.algorithms.avg_path_length(r_highschool))

cc_e =  0.446799543049543
cc_r =  0.05288245834464322
<l_e> =  5.362745098039215
<l_r> =  2.853154821250534


In [10]:
r_lotr = pp.generators.ER_np_randomize(n_lotr).largest_connected_component()

print('cc_e = ', pp.statistics.avg_clustering_coefficient(n_lotr))
print('cc_r = ', pp.statistics.avg_clustering_coefficient(r_lotr))
print('<l_e> = ', pp.algorithms.avg_path_length(n_lotr))
print('<l_r> = ', pp.algorithms.avg_path_length(r_lotr))

cc_e =  0.5848854454659764
cc_r =  0.07093375603033196
<l_e> =  2.6724273369992146
<l_r> =  2.433958029401863


In [11]:
funelling_plot(n_lotr, source='Sam')