In [123]:
import pandas as pd
import numpy as np
import networkx as nx

In [124]:
g = nx.Graph()
g.name = 'copenhagen'
with open('data/copenhagen/bt_symmetric.csv') as f:
    for l in f.readlines()[1:]:
        tid, a, b, rssi = l.rstrip().split(',')
        g.add_edge(int(a),int(b), tid=tid)
print('loaded')

attrs = {n: None for n in g.nodes()} # also fix missing data
with open('data/copenhagen/genders.csv') as f:
    for l in f.readlines()[1:]:
        node, gender = l.rstrip().split(',')
        attrs[int(node)] = gender
    nx.set_node_attributes(g, attrs, name='gender')
print('attributes')

loaded
attributes


In [125]:
to_remove = []
for n in attrs:
    if attrs[n] is None:
        to_remove.append(n)

g.remove_nodes_from(to_remove)

In [126]:
print(nx.info(g))

Name: copenhagen
Type: Graph
Number of nodes: 673
Number of edges: 75124
Average degree: 223.2511


In [127]:
from collections import Counter
#g = nx.convert_node_labels_to_integers(g)
sizes = dict(Counter(list(nx.get_node_attributes(g, 'gender').values())))
sizes['0'] = sizes['0'] / (len(g))
sizes['1'] = sizes['1'] / (len(g))

In [128]:
sizes

{'0': 0.7830609212481426, '1': 0.21693907875185736}

In [129]:
def homogeneity(node, attr, remove_ego=False):
    
    egonet = nx.ego_graph(g, node)
    egonet_attrs = list(nx.get_node_attributes(egonet, 'gender').values())
    count = dict(Counter(egonet_attrs))[attr]
    size = len(egonet)
    if remove_ego:
        count-=1
        size-=1
    return count/size


hs = {}
for n in g.nodes():
    attr = attrs[n]
    hom = homogeneity(n, attr, remove_ego=False)
    hs[n] = hom
nx.set_node_attributes(g, hs, 'homogeneity')

In [120]:
def purity(node):
    egonet = nx.ego_graph(g, node)
    egonet_attrs = list(nx.get_node_attributes(egonet, 'gender').values())
    count = Counter(egonet_attrs).most_common()[0][1]
    size = len(egonet)
    return count/size

def second_order_homogeneity(node):
    egonet = nx.ego_graph(g, node)
    homs = []
    for n2 in egonet:
        n2_attr = attrs[n2]
        homs.append(homogeneity(n2, n2_attr))
    return np.mean(homs)

def weighted_homogeneity(node, attr, remove_ego=False):
    hom = homogeneity(node, attr, remove_ego=remove_ego)
    return hom / sizes[attr]

In [122]:
for node in list(g.nodes())[:50]:
    node_attr = attrs[node]
    
    print()
   

0 0.8170347003154574 1.0433858696628138 
0 0.7580645161290323 0.9680785946012119 
0 0.7530864197530864 0.9617213671609623 
0 0.8181818181818182 1.0448507848887356 
1 0.25936599423631124 1.1955706446646401 
1 0.313953488372093 1.447196559413826 
0 0.8985507246376812 1.1474850809889172 
0 1.0 1.2770398481973435 
0 0.7349397590361446 0.9385473583137103 
0 0.7821100917431193 0.9987857528332434 
0 0.7757201646090535 0.9906255612559639 
0 0.7976539589442815 1.0186358906442152 
0 0.7961538461538461 1.0167201868340388 
0 0.7725490196078432 0.9865758827250065 
1 0.2222222222222222 1.0243531202435312 
0 0.756578947368421 0.9661814640966743 
0 0.8211009174311926 1.0485785909510297 
0 0.847457627118644 1.0822371594892741 
0 0.7894736842105263 1.008189353840008 
0 0.6735751295336787 0.860182281169195 
0 0.7962962962962963 1.016902101342329 
0 0.7941176470588235 1.0141198794508315 
1 0.3034825870646766 1.3989300074967628 
0 0.6363636363636364 0.8126617215801276 
0 0.8148148148148148 1.04055098742005

In [115]:
for node in list(g.nodes())[:20]:
    attr = attrs[node]
    egonet = nx.ego_graph(g, node)
    egonet_attrs = list(nx.get_node_attributes(egonet, 'homogeneity').values())
    avg = np.mean(egonet_attrs)
    
    print(attr, hs[node], avg)

0 0.8170347003154574 0.6834622809165989
0 0.7580645161290323 0.6486752989992199
0 0.7530864197530864 0.646373473151727
0 0.8181818181818182 0.684656160157771
1 0.25936599423631124 0.6391783450142745
1 0.313953488372093 0.6054182014335772
0 0.8985507246376812 0.7453715402386006
0 1.0 1.0
0 0.7349397590361446 0.6324474070967165
0 0.7821100917431193 0.6648878213011667
0 0.7757201646090535 0.661044093283853
0 0.7976539589442815 0.6730996186171939
0 0.7961538461538461 0.6747745000712377
0 0.7725490196078432 0.6590079022186733
1 0.2222222222222222 0.6465605721801042
0 0.756578947368421 0.646208156211844
0 0.8211009174311926 0.6871210187042911
0 0.847457627118644 0.6918933253374321
0 0.7894736842105263 0.6714803600770461
0 0.6735751295336787 0.5994439482745674
