# Encontro 8: Centro e Periferia
## Elisa Malzoni e Bruna Kimura

In [1]:
import sys
sys.path.append('..')

from random import choice
from itertools import permutations
from scipy import stats

import pandas as pd
import networkx as nx

import socnet as sn

In [2]:
sn.node_size = 10
sn.node_color = (255, 255, 255)

sn.edge_width = 1
sn.edge_color = (192, 192, 192)

sn.node_label_position = 'top center'

In [3]:
g = sn.load_graph('Renaissance.gml', has_pos=True)

sn.show_graph(g, nlab=True)

In [4]:
def set_geodesic_successors(g, s, t):
    for n in g.nodes:
        g.nodes[n]['geodesic_successors'] = set()

    for p in nx.all_shortest_paths(g, s, t):
        for i in range(len(p) - 1):
            g.nodes[p[i]]['geodesic_successors'].add(p[i + 1])

In [5]:
# Pense que o atributo 'passages' abaixo indica quantas
# vezes um fluxo já passou por um nó ou por uma aresta.

def random_geodesic_successor(g, n):
    return choice([m for m in g.nodes[n]['geodesic_successors']])

def random_path_successor(g, n):
    return choice([m for m in g.neighbors(n) if g.nodes[m]['passages'] == 0])

def random_trail_successor(g, n):
    return choice([m for m in g.neighbors(n) if g.edges[n, m]['passages'] == 0])

def random_walk_successor(g, n):
    return choice([m for m in g.neighbors(n)])

In [6]:
def simulate_single_flow(g, s, t, func_traj, difusao):
    # Inicializa o atributo 'passages' de cada nó.
    for n in g.nodes:
        g.nodes[n]['passages'] = 0
    g.nodes[s]['passages'] = 1

    # Inicializa o atributo 'passages' de cada aresta.
    for n, m in g.edges:
        g.edges[n, m]['passages'] = 0

    # Inicializa s como o único dono do insumo.
    for n in g.nodes:
        g.nodes[n]['owner'] = False
    g.nodes[s]['owner'] = True

    # Simula o fluxo, contando o número total de passos.

    steps = 0

    while True:
        # O conjunto reached representa todos os nós
        # que o fluxo consegue alcançar no passo atual.
        reached = set()

        # Verifica cada um dos donos atuais do insumo.

        owners = [n for n in g.nodes if g.nodes[n]['owner']]

        for n in owners:
            # Deixa de ser dono do insumo.
            g.nodes[n]['owner'] = difusao ###
            
            try:
                # Escolhe aleatoriamente um dos sucessores.
                m = func_traj(g, n) ##
            except IndexError:
                continue
            
            # Deixa de ser dono do insumo.
            g.nodes[n]['owner'] = difusao ###
            
            # Incrementa o atributo 'passages' do nó.
            g.nodes[m]['passages'] += 1

            # Incrementa o atributo 'passages' da aresta.
            g.edges[n, m]['passages'] += 1

            # Registra que consegue alcançar esse nó.
            reached.add(m)

            
        # Todo nó alcançado passa a ser dono do insumo.

        for n in reached:
            g.nodes[n]['owner'] = True

        # Isso conclui o passo atual da simulação.
        steps += 1

        # Se o passo alcançou t, chegamos ao fim da simulação.
        # Ela foi bem-sucedida: devolvemos o número de passos.
        if t in reached:
            return steps

        # Se o passo não alcançou ninguém, chegamos ao fim da
        # simulação. Ela não foi bem-sucedida: devolvemos -1.
        if not reached:
            return -1

In [7]:
def simulate_successful_flow(g, s, t, func_traj, difusao):
    set_geodesic_successors(g, s, t)

    while True:
        steps = simulate_single_flow(g, s, t, func_traj, difusao)

        if steps != -1:
            return steps

In [8]:
def simulate_all_flows(g,func_traj, difusao):
    for n in g.nodes:
        g.nodes[n]['closeness'] = 0
        g.nodes[n]['betweenness'] = 0

    for s, t in permutations(g.nodes, 2):
        steps = simulate_successful_flow(g, s, t, func_traj, difusao)

        g.nodes[s]['closeness'] += steps
        for n in g.nodes:
            if n != s and n != t:
                g.nodes[n]['betweenness'] += g.nodes[n]['passages']

    # Normalizações necessárias para comparar com os
    # resultados analíticos. Não precisa entender.
    for n in g.nodes:
        g.nodes[n]['closeness'] = (g.number_of_nodes() - 1) / g.nodes[n]['closeness']
        g.nodes[n]['betweenness'] /= (g.number_of_nodes() - 1) * (g.number_of_nodes() - 2)

In [9]:
def simul(TIMES, func_traj, difusao):
    
    lista_closeness = [[], [], [], [], [], [], [], [], [], [], [], [] ,[], [], []]
    lista_betweenness = [[], [], [], [], [], [], [], [], [], [], [], [] ,[], [], []]
    
    for n in g.nodes:
        g.nodes[n]['mean_closeness'] = 0
        g.nodes[n]['mean_betweenness'] = 0

    for _ in range(TIMES):
        simulate_all_flows(g, func_traj, difusao)
        i = 0
        for n in g.nodes:
            g.nodes[n]['mean_closeness'] += g.nodes[n]['closeness']
            g.nodes[n]['mean_betweenness'] += g.nodes[n]['betweenness']
            lista_closeness[i].append(g.nodes[n]['closeness'])
            lista_betweenness[i].append(g.nodes[n]['betweenness'])
            i += 1

    for n in g.nodes:
        g.nodes[n]['mean_closeness'] /= TIMES
        g.nodes[n]['mean_betweenness'] /= TIMES

    df = pd.DataFrame({
    'família': [g.nodes[n]['label'] for n in g.nodes],
    'closeness simulado': [g.nodes[n]['mean_closeness'] for n in g.nodes],
    'betweenness simulado': [g.nodes[n]['mean_betweenness'] for n in g.nodes]})
    
    return df, lista_closeness, lista_betweenness

In [10]:
cc = nx.closeness_centrality(g)
bc = nx.betweenness_centrality(g)

In [11]:
bm, bmcl, bmbl = simul(100, random_geodesic_successor, False)
bmc = bm["closeness simulado"]
bmb = bm["betweenness simulado"]
bm.describe()

Unnamed: 0,betweenness simulado,closeness simulado
count,15.0,15.0
mean,0.115751,0.411834
std,0.136491,0.074293
min,0.0,0.285714
25%,0.010687,0.359211
50%,0.088407,0.4
75%,0.131648,0.474713
max,0.521264,0.56


In [12]:
bm.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
6,0.521264,0.56,medici
3,0.256923,0.466667,guadagni
2,0.213791,0.482759,albizzi
5,0.142857,0.388889,salviati
8,0.12044,0.4,bischeri
13,0.115165,0.4375,barbadori
9,0.08989,0.482759,ridolfi
14,0.088407,0.388889,castellan
7,0.086593,0.482759,tornabuon
11,0.07956,0.424242,strozzi


In [13]:
bm.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
6,0.521264,0.56,medici
2,0.213791,0.482759,albizzi
7,0.086593,0.482759,tornabuon
9,0.08989,0.482759,ridolfi
3,0.256923,0.466667,guadagni
13,0.115165,0.4375,barbadori
11,0.07956,0.424242,strozzi
8,0.12044,0.4,bischeri
5,0.142857,0.388889,salviati
14,0.088407,0.388889,castellan


In [14]:
gd, gdcl, gdbl = simul(100, random_geodesic_successor, True)
gdc = gd["closeness simulado"]
gdb = gd["betweenness simulado"]
gd.describe()

Unnamed: 0,betweenness simulado,closeness simulado
count,15.0,15.0
mean,0.31178,0.411834
std,0.364444,0.074293
min,0.0,0.285714
25%,0.027912,0.359211
50%,0.246319,0.4
75%,0.361923,0.474713
max,1.388901,0.56


In [15]:
gd.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
6,1.388901,0.56,medici
3,0.690549,0.466667,guadagni
2,0.569945,0.482759,albizzi
5,0.406593,0.388889,salviati
13,0.317253,0.4375,barbadori
8,0.314396,0.4,bischeri
9,0.24978,0.482759,ridolfi
7,0.246319,0.482759,tornabuon
14,0.230659,0.388889,castellan
11,0.206484,0.424242,strozzi


In [16]:
gd.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
6,1.388901,0.56,medici
2,0.569945,0.482759,albizzi
7,0.246319,0.482759,tornabuon
9,0.24978,0.482759,ridolfi
3,0.690549,0.466667,guadagni
13,0.317253,0.4375,barbadori
11,0.206484,0.424242,strozzi
8,0.314396,0.4,bischeri
5,0.406593,0.388889,salviati
14,0.230659,0.388889,castellan


In [17]:
for i in range(15):
    print(stats.ttest_ind(bmcl[i],gdcl[i])[1])

1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
nan
1.0


In [18]:
for i in range(15):
    print(stats.ttest_ind(bmbl[i],gdbl[i])[1])

nan
nan
9.9370164795e-192
1.05975286461e-224
nan
0.0
2.77427131376e-287
4.22000905581e-129
8.74646045401e-198
8.76950893095e-153
nan
1.81545044607e-125
5.17497688063e-62
2.06443523688e-251
3.73524077568e-203


In [19]:
pt, ptcl, ptbl = simul(100, random_path_successor, False)
ptc = pt["closeness simulado"]
ptb = pt["betweenness simulado"]
pt.describe()

Unnamed: 0,betweenness simulado,closeness simulado
count,15.0,15.0
mean,0.247989,0.243166
std,0.188047,0.026215
min,0.0,0.199159
25%,0.071429,0.230646
50%,0.251703,0.243339
75%,0.372857,0.260056
max,0.625989,0.300898


In [20]:
pt.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
6,0.625989,0.300898,medici
3,0.406044,0.268405,guadagni
14,0.386319,0.251973,castellan
8,0.372967,0.248088,bischeri
11,0.372747,0.234746,strozzi
9,0.362308,0.260937,ridolfi
7,0.328022,0.259176,tornabuon
12,0.251703,0.226715,peruzzi
13,0.239396,0.237551,barbadori
2,0.231484,0.262252,albizzi


In [21]:
pt.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
6,0.625989,0.300898,medici
3,0.406044,0.268405,guadagni
2,0.231484,0.262252,albizzi
9,0.362308,0.260937,ridolfi
7,0.328022,0.259176,tornabuon
14,0.386319,0.251973,castellan
8,0.372967,0.248088,bischeri
5,0.142857,0.243339,salviati
13,0.239396,0.237551,barbadori
11,0.372747,0.234746,strozzi


In [50]:
for i in range(15):
    print(stats.ttest_ind(bmcl[i],ptcl[i])[1])

5.03343342125e-111
2.70078056996e-103
7.01137087543e-123
5.03676089487e-115
3.0156542478e-94
2.19618644374e-111
2.42561157302e-113
3.4973760646e-133
2.48826253134e-126
2.77183799291e-138
3.51124351924e-109
3.82959889312e-134
5.4497889182e-114
4.16609076872e-128
2.69997528986e-119


In [52]:
for i in range(15):
    print(stats.ttest_ind(bmbl[i],ptbl[i])[1])

nan
nan
1.80491807678e-10
1.6775059078e-114
nan
1.0
4.2917142854e-89
1.00794098946e-138
1.8990282668e-142
1.38395880037e-152
nan
1.93900805889e-159
9.51098519867e-150
1.14792115104e-92
4.2278250344e-165


In [24]:
pd1, pdcl,pdbl = simul(100, random_path_successor, True)
pdc = pd1["closeness simulado"]
pdb = pd1["betweenness simulado"]
pd1.describe()

Unnamed: 0,betweenness simulado,closeness simulado
count,15.0,15.0
mean,0.566894,0.25196
std,0.157851,0.021806
min,0.263462,0.203648
25%,0.420962,0.23572
50%,0.611044,0.257729
75%,0.680577,0.26462
max,0.82,0.281459


In [25]:
pd1.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
6,0.82,0.267349,medici
3,0.747253,0.265506,guadagni
9,0.700769,0.27903,ridolfi
7,0.683297,0.281459,tornabuon
11,0.677857,0.263734,strozzi
8,0.651703,0.263586,bischeri
2,0.619451,0.257729,albizzi
14,0.611044,0.256317,castellan
13,0.576099,0.263532,barbadori
12,0.520769,0.252796,peruzzi


In [26]:
pd1.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
7,0.683297,0.281459,tornabuon
9,0.700769,0.27903,ridolfi
6,0.82,0.267349,medici
3,0.747253,0.265506,guadagni
11,0.677857,0.263734,strozzi
8,0.651703,0.263586,bischeri
13,0.576099,0.263532,barbadori
2,0.619451,0.257729,albizzi
14,0.611044,0.256317,castellan
12,0.520769,0.252796,peruzzi


In [27]:
for i in range(15):
    print(stats.ttest_ind(bmcl[i],pdcl[i])[1])

3.31042974209e-158
6.89469905064e-144
3.08220504964e-190
8.64502537184e-175
9.25647956014e-148
7.72384556374e-178
1.67689539126e-180
1.11520949091e-168
5.00477673863e-170
3.03435511147e-171
6.21158069984e-154
6.71199571256e-175
9.9244807875e-159
2.03706299036e-175
2.55678840451e-162


In [28]:
for i in range(15):
    print(stats.ttest_ind(bmbl[i],pdbl[i])[1])

1.1225630912e-202
3.9289116351e-195
2.92936631329e-190
6.58031343295e-226
1.95790311592e-175
5.74893518843e-179
9.81790101315e-198
1.63373763671e-231
2.60854589533e-226
2.00507977074e-243
1.00161961464e-191
2.66879506336e-235
1.64743100326e-212
1.91826909472e-215
2.97410875988e-238


In [29]:
tt,ttcl, ttbl = simul(100, random_trail_successor, False)
ttc = tt["closeness simulado"]
ttb = tt["betweenness simulado"]
tt.describe()

Unnamed: 0,betweenness simulado,closeness simulado
count,15.0,15.0
mean,0.247908,0.239883
std,0.196324,0.026412
min,0.0,0.199654
25%,0.071429,0.222987
50%,0.269011,0.236827
75%,0.338297,0.260087
max,0.708187,0.28433


In [30]:
tt.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
6,0.708187,0.28433,medici
3,0.409066,0.271274,guadagni
14,0.373132,0.226286,castellan
8,0.342253,0.227542,bischeri
9,0.334341,0.24272,ridolfi
11,0.329286,0.232599,strozzi
7,0.318791,0.248899,tornabuon
13,0.269011,0.273079,barbadori
2,0.26522,0.274036,albizzi
12,0.226484,0.243225,peruzzi


In [31]:
tt.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
6,0.708187,0.28433,medici
2,0.26522,0.274036,albizzi
13,0.269011,0.273079,barbadori
3,0.409066,0.271274,guadagni
7,0.318791,0.248899,tornabuon
12,0.226484,0.243225,peruzzi
9,0.334341,0.24272,ridolfi
5,0.142857,0.236827,salviati
11,0.329286,0.232599,strozzi
8,0.342253,0.227542,bischeri


In [32]:
for i in range(15):
    print(stats.ttest_ind(bmcl[i],ttcl[i])[1])

3.820558193e-106
9.67909492605e-108
6.10613402738e-109
1.19829604789e-112
2.51953526495e-89
7.58616942737e-116
5.03808031805e-120
5.60610128567e-117
5.25809149585e-124
2.14246948134e-146
5.8866455516e-121
7.18134700638e-135
5.67182559363e-99
6.6008426276e-97
5.80821774094e-114


In [33]:
for i in range(15):
    print(stats.ttest_ind(bmbl[i],ttbl[i])[1])

nan
nan
2.77040377177e-42
7.44891638536e-105
nan
1.0
1.25510849677e-114
1.62572736753e-133
8.00290507672e-134
9.42934422633e-150
nan
1.07384016303e-139
1.69790921916e-142
2.7176251289e-106
2.39376836342e-155


In [34]:
td, tdcl, tdbl = simul(100, random_trail_successor, True)
tdc = td["closeness simulado"]
tdb = td["betweenness simulado"]
td.describe()

Unnamed: 0,betweenness simulado,closeness simulado
count,15.0,15.0
mean,0.68226,0.240454
std,0.281028,0.017399
min,0.262967,0.200601
25%,0.411511,0.228397
50%,0.681209,0.24705
75%,0.895742,0.251543
max,1.259615,0.263115


In [35]:
td.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
6,1.259615,0.251388,medici
7,0.92522,0.263115,tornabuon
9,0.92522,0.261718,ridolfi
3,0.898791,0.250558,guadagni
11,0.892692,0.251698,strozzi
14,0.836264,0.243469,castellan
8,0.784451,0.24789,bischeri
2,0.681209,0.24705,albizzi
13,0.604121,0.25348,barbadori
12,0.596374,0.236884,peruzzi


In [36]:
td.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
7,0.92522,0.263115,tornabuon
9,0.92522,0.261718,ridolfi
13,0.604121,0.25348,barbadori
11,0.892692,0.251698,strozzi
6,1.259615,0.251388,medici
3,0.898791,0.250558,guadagni
8,0.784451,0.24789,bischeri
2,0.681209,0.24705,albizzi
14,0.836264,0.243469,castellan
12,0.596374,0.236884,peruzzi


In [37]:
for i in range(15):
    print(stats.ttest_ind(bmcl[i],tdcl[i])[1])

1.72634832253e-159
1.59123932973e-159
1.38297841799e-190
5.66601523762e-183
9.35428990255e-156
1.19011751616e-182
3.4636205889e-194
5.66958094965e-186
4.42694629673e-164
1.54012644667e-187
6.88841893186e-160
5.06629814624e-180
1.01095361474e-155
9.45271839935e-186
2.60697040089e-166


In [38]:
for i in range(15):
    print(stats.ttest_ind(bmbl[i],tdbl[i])[1])

6.71164316208e-195
3.71688856508e-197
3.1651541742e-189
3.07407086955e-212
3.98605229292e-177
3.80132817163e-180
2.02694129916e-223
7.14910297343e-221
8.48184704046e-217
2.59175477201e-217
7.08585930007e-199
4.58191487269e-221
4.36611397421e-212
5.38202916242e-213
9.59052582954e-212


In [39]:
wt,wtcl,wtbl = simul(100, random_walk_successor, False)
wtc = wt["closeness simulado"]
wtb = wt["betweenness simulado"]
wt.describe()

Unnamed: 0,betweenness simulado,closeness simulado
count,15.0,15.0
mean,2.060758,0.036571
std,1.170484,0.002284
min,0.707912,0.03362
25%,1.167115,0.034705
50%,2.429176,0.036487
75%,2.480687,0.038157
max,5.067363,0.040343


In [40]:
wt.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
6,5.067363,0.03536,medici
3,3.308571,0.033829,guadagni
9,2.503462,0.038221,ridolfi
7,2.498516,0.03798,tornabuon
14,2.462857,0.036577,castellan
11,2.46011,0.034817,strozzi
8,2.440879,0.034458,bischeri
2,2.429176,0.034595,albizzi
13,1.623846,0.036487,barbadori
12,1.567143,0.03362,peruzzi


In [41]:
wt.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
0,0.742253,0.040343,ginori
4,0.707912,0.03998,pazzi
5,1.564286,0.03939,salviati
9,2.503462,0.038221,ridolfi
1,0.765055,0.038092,lambertes
7,2.498516,0.03798,tornabuon
14,2.462857,0.036577,castellan
13,1.623846,0.036487,barbadori
6,5.067363,0.03536,medici
11,2.46011,0.034817,strozzi


In [42]:
for i in range(15):
    print(stats.ttest_ind(bmcl[i],wtcl[i])[1])

4.57814272369e-241
3.02096775104e-232
3.65033343582e-293
3.8714914906e-280
2.35137665955e-240
2.13818179886e-256
7.42879869878e-314
9.30298920646e-286
9.73180354489e-279
1.33292999238e-276
7.63777985545e-269
1.58165723237e-275
6.45032399628e-258
9.88428874344e-258
4.10275767477e-268


In [43]:
for i in range(15):
    print(stats.ttest_ind(bmbl[i],wtbl[i])[1])

9.15859461306e-138
1.1084838515e-131
2.90053491329e-157
4.87686924516e-154
2.31961664976e-121
1.48202507584e-141
1.01388393761e-168
3.24069467763e-165
5.05071780482e-146
8.8401649536e-160
7.69696223744e-147
2.63827872762e-153
1.87262170601e-133
4.86803586225e-151
1.10660933933e-147


In [44]:
wd,wdcl,wdbl = simul(100, random_walk_successor, True)
wdc = wd["closeness simulado"]
wdb = wd["betweenness simulado"]
wd.describe()

Unnamed: 0,betweenness simulado,closeness simulado
count,15.0,15.0
mean,2.376227,0.155007
std,1.664823,0.012144
min,0.580495,0.12967
25%,1.140879,0.144617
50%,2.408736,0.156818
75%,2.890165,0.164908
max,6.892637,0.170188


In [45]:
wd.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
6,6.892637,0.165755,medici
3,4.464066,0.161312,guadagni
14,3.098462,0.151924,castellan
2,3.037582,0.16406,albizzi
8,2.742747,0.156818,bischeri
11,2.691978,0.159415,strozzi
9,2.543681,0.169521,ridolfi
7,2.408736,0.16872,tornabuon
5,1.783022,0.144994,salviati
12,1.663297,0.144239,peruzzi


In [46]:
wd.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness simulado,closeness simulado,família
13,1.493516,0.170188,barbadori
9,2.543681,0.169521,ridolfi
7,2.408736,0.16872,tornabuon
6,6.892637,0.165755,medici
2,3.037582,0.16406,albizzi
3,4.464066,0.161312,guadagni
11,2.691978,0.159415,strozzi
8,2.742747,0.156818,bischeri
10,0.580495,0.153091,acciaiuol
14,3.098462,0.151924,castellan


In [47]:
for i in range(15):
    print(stats.ttest_ind(bmcl[i],wdcl[i])[1])

1.22702883614e-174
5.72445807777e-169
3.43142695707e-197
8.1653981681e-186
1.21128063939e-188
2.58227272812e-194
2.8198896909e-204
2.02273452684e-180
3.46357237212e-189
6.13009734901e-187
5.95958689735e-172
1.8698693904e-182
1.56020405181e-185
7.25365674584e-182
4.52379151122e-172


In [48]:
for i in range(15):
    print(stats.ttest_ind(bmbl[i],wdbl[i])[1])

1.92019199123e-158
1.62855881151e-164
9.27237617041e-169
8.28560313711e-174
9.86673951607e-135
6.63433195167e-144
2.05420845119e-178
6.4505995156e-180
6.13246546562e-170
1.51372345915e-167
3.6047776024e-156
7.83285556396e-166
2.21169177294e-164
1.08283292192e-157
1.09798064324e-165
