In [1]:
import pandas as pd
import numpy as np
import networkx as nx

In [2]:
g = nx.Graph()
g.name = 'copenhagen'
with open('bt_symmetric.csv') as f:
    for l in f.readlines()[1:]:
        tid, a, b, rssi = l.rstrip().split(',')
        g.add_edge(int(a),int(b), tid=tid)
print('loaded')

attrs = {n: None for n in g.nodes()} # also fix missing data
with open('genders.csv') as f:
    for l in f.readlines()[1:]:
        node, gender = l.rstrip().split(',')
        attrs[int(node)] = gender
    nx.set_node_attributes(g, attrs, name='gender')
print('attributes')

loaded
attributes


In [3]:
to_remove = []
for n in attrs:
    if attrs[n] is None:
        to_remove.append(n)

g.remove_nodes_from(to_remove)

In [4]:
print(nx.info(g))

Name: copenhagen
Type: Graph
Number of nodes: 673
Number of edges: 75124
Average degree: 223.2511


In [5]:
from collections import Counter
#g = nx.convert_node_labels_to_integers(g)
sizes = dict(Counter(list(nx.get_node_attributes(g, 'gender').values())))
sizes['0'] = sizes['0'] / (len(g))
sizes['1'] = sizes['1'] / (len(g))

In [7]:
weights = dict(Counter(list(nx.get_node_attributes(g, 'gender').values())))
weights['0'] = 1 - sizes['0']
weights['1'] = 1 - sizes['1']

In [24]:
sizes['0']

0.7830609212481426

In [14]:
def homogeneity(node, attr, remove_ego=False):
    
    egonet = nx.ego_graph(g, node)
    egonet_attrs = list(nx.get_node_attributes(egonet, 'gender').values())
    count = dict(Counter(egonet_attrs))[attr]
    size = len(egonet)
    if remove_ego:
        count-=1
        size-=1
    return count/size

def weighted_homogeneity(node, attr, remove_ego=False):
    
    egonet = nx.ego_graph(g, node)
    egonet_attrs = list(nx.get_node_attributes(egonet, 'gender').values())
    count = dict(Counter(egonet_attrs))[attr]
    size = len(egonet)
    if remove_ego:
        count-=1
        size-=1
    
    if attr == '1':
        other = '0'
    else:
        other = '1'
        
    return count * weights[attr] / (count * weights[attr] + (size-count)*weights[other])


#hs = {}
#for n in g.nodes():
 #   attr = attrs[n]
  #  hom = homogeneity(n, attr, remove_ego=False)
   # hs[n] = hom
#nx.set_node_attributes(g, hs, 'homogeneity')

In [15]:
def purity(node):
    egonet = nx.ego_graph(g, node)
    egonet_attrs = list(nx.get_node_attributes(egonet, 'gender').values())
    count = Counter(egonet_attrs).most_common()[0][1]
    size = len(egonet)
    return count/size

def second_order_homogeneity(node):
    egonet = nx.ego_graph(g, node)
    homs = []
    for n2 in egonet:
        n2_attr = attrs[n2]
        homs.append(homogeneity(n2, n2_attr))
    return np.mean(homs)

#def old_weighted_homogeneity(node, attr, remove_ego=False):
 #   hom = homogeneity(node, attr, remove_ego=remove_ego)
  #  return hom * sizes[attr]

In [23]:
dict_to_df = {}
dict_to_df['Gender'] = []
dict_to_df['Target'] = []

print ("ATTR - Hom - Weighted Hom")
for node in list(g.nodes()):
    attr = attrs[node]
    hom = homogeneity(node, attr)
    w_hom = weighted_homogeneity(node, attr)
    
    dict_to_df['Gender'].append(attr)
    if w_hom > 0.5:
        dict_to_df['Target'].append(0)
    else:
        dict_to_df['Target'].append(1)
    print(attr, hom, w_hom)

ATTR - Hom - Weighted Hom
0 0.8170347003154574 0.5529979526177246
0 0.7580645161290323 0.4646847700954832
0 0.7530864197530864 0.45798621824539754
0 0.8181818181818182 0.5548986486486487
1 0.25936599423631124 0.5583152839250399
1 0.313953488372093 0.6229041719563979
0 0.8985507246376812 0.7104622871046229
0 1.0 1.0
0 0.7349397590361446 0.43443902439024396
0 0.7821100917431193 0.49860291834833903
0 0.7757201646090535 0.4893274658843402
0 0.7976539589442815 0.5220111731843575
0 0.7961538461538461 0.5196980379344144
0 0.7725490196078432 0.4847963861920173
1 0.2222222222222222 0.5077071290944123
0 0.756578947368421 0.4626746396979801
0 0.8211009174311926 0.5597703857604901
0 0.847457627118644 0.606161255501121
0 0.7894736842105263 0.5095393206142392
0 0.6735751295336787 0.3637339261416991
0 0.7962962962962963 0.5199171842650104
0 0.7941176470588235 0.5165771196435592
1 0.3034825870646766 0.6113107802308555
0 0.6363636363636364 0.32651757188498404
0 0.8148148148148148 0.5493415426714555
1 0

0 0.7938461538461539 0.5161626265809776
0 0.7843137254901961 0.5018475552118243
1 0.2511013215859031 0.547567400061977
0 0.7635327635327636 0.47216691404505906
1 0.23481781376518218 0.5255502063273727
0 0.7327044025157232 0.4316292997348153
0 0.7866666666666666 0.5053384958348
1 0.26273458445040215 0.5626171074992374
0 0.8 0.5256525652565256
0 0.7883597883597884 0.5078675818275202
1 0.23 0.518811796430253
0 0.8064516129032258 0.5358191426893717
0 0.7326732673267327 0.43159030080294014
0 1.0 1.0
0 0.8252032520325203 0.5667029962331976
0 0.8778625954198473 0.6656886844818016
1 1.0 1.0
0 0.7373737373737373 0.43752052545155995
0 0.7908045977011494 0.5115450036157708
0 0.8173076923076923 0.5534495830174374
0 0.8440860215053764 0.5999738254155215
1 0.26440677966101694 0.5647359454855195
0 0.7659574468085106 0.47552700624264904
1 0.29743589743589743 0.604453409270685
0 0.7702702702702703 0.4815693536253689
0 0.8108108108108109 0.542818193084645
1 0.2838983050847458 0.5886501175333012
0 0.7831

0 0.7960199004975125 0.5194920719638846
0 0.8108108108108109 0.542818193084645
0 0.7544910179640718 0.4598655100867435
0 0.7154471544715447 0.4105710542293804
1 0.31693989071038253 0.626147164863979
0 0.8367346938775511 0.5867476965300922
0 1.0 1.0
0 0.8522167487684729 0.6150287328333496
0 0.76 0.46731805929919146
1 0.26339285714285715 0.5634525125491546
0 0.7974683544303798 0.521724333522405
0 0.776824034334764 0.49091584618242623
1 0.20087336244541484 0.47570643642072213
1 0.22088353413654618 0.5057669824983859
0 0.778816199376947 0.4937970967436449
0 0.7072243346007605 0.4009153318077803
0 0.8857142857142857 0.6822429906542056
0 0.8415841584158416 0.5954323001631321
0 0.72 0.4160202627829666
0 0.8068181818181818 0.5364036222509703
0 0.8333333333333334 0.5807478122513923
0 0.7412280701754386 0.44244804274929617
0 0.8309859154929577 0.5766501539697416
0 0.8188976377952756 0.5560886284563267
0 0.8121019108280255 0.5449116695695446
1 0.21839080459770116 0.5021312872975278
1 0.2134387351

0 0.7086614173228346 0.4025858635374859
0 0.6708860759493671 0.3609141791044776
1 0.2594594594594595 0.5584352510044595
0 0.7461139896373057 0.4487800712959208
0 0.8095238095238095 0.5407407407407407
0 1.0 1.0
0 0.8160919540229885 0.5514416427279498
0 1.0 1.0
0 0.7948717948717948 0.5177304964539007
0 0.7857142857142857 0.50392218387198
1 0.2676767676767677 0.5688478849717928
1 0.21666666666666667 0.4995989207321519
0 0.7509578544061303 0.4551542046412495
0 0.7908163265306123 0.5115627189908899
0 0.7716763005780347 0.48355765056130995
0 0.8666666666666667 0.6429539295392954
0 0.7843137254901961 0.5018475552118243
1 0.3333333333333333 0.6434676434676434
0 0.7974683544303798 0.5217243335224051
0 0.6780487804878049 0.3684726559663011
1 0.25 0.5461139896373057
1 0.43478260869565216 0.7352120535714286
0 1.0 1.0
0 1.0 1.0
0 0.6375838926174496 0.32767907767907767
1 1.0 1.0
0 0.6 0.2935656836461126
0 1.0 1.0


In [25]:
df = pd.DataFrame(data=dict_to_df)
df
df.to_json("cambridge.json")

In [13]:


for node in list(g.nodes())[:20]:
    attr = attrs[node]
    egonet = nx.ego_graph(g, node)
    egonet_attrs = list(nx.get_node_attributes(egonet, 'homogeneity').values())
    avg = np.mean(egonet_attrs)
    
    print(attr, hs[node], avg)

0 0.8170347003154574 0.6834622809165989
0 0.7580645161290323 0.6486752989992199
0 0.7530864197530864 0.646373473151727
0 0.8181818181818182 0.684656160157771
1 0.25936599423631124 0.6391783450142745
1 0.313953488372093 0.6054182014335772
0 0.8985507246376812 0.7453715402386006
0 1.0 1.0
0 0.7349397590361446 0.6324474070967165
0 0.7821100917431193 0.6648878213011667
0 0.7757201646090535 0.661044093283853
0 0.7976539589442815 0.6730996186171939
0 0.7961538461538461 0.6747745000712377
0 0.7725490196078432 0.6590079022186733
1 0.2222222222222222 0.6465605721801042
0 0.756578947368421 0.646208156211844
0 0.8211009174311926 0.6871210187042911
0 0.847457627118644 0.6918933253374321
0 0.7894736842105263 0.6714803600770461
0 0.6735751295336787 0.5994439482745674
