In [1]:
import pandas as pd
import numpy as np
import networkx as nx

In [2]:
g = nx.Graph()
g.name = 'copenhagen'
with open('bt_symmetric.csv') as f:
    for l in f.readlines()[1:]:
        tid, a, b, rssi = l.rstrip().split(',')
        g.add_edge(int(a),int(b), tid=tid)
print('loaded')

attrs = {n: None for n in g.nodes()} # also fix missing data
with open('genders.csv') as f:
    for l in f.readlines()[1:]:
        node, gender = l.rstrip().split(',')
        attrs[int(node)] = gender
    nx.set_node_attributes(g, attrs, name='gender')
print('attributes')

loaded
attributes


In [3]:
to_remove = []
for n in attrs:
    if attrs[n] is None:
        to_remove.append(n)

g.remove_nodes_from(to_remove)

In [4]:
print(nx.info(g))

Name: copenhagen
Type: Graph
Number of nodes: 673
Number of edges: 75124
Average degree: 223.2511


In [5]:
from collections import Counter
#g = nx.convert_node_labels_to_integers(g)
sizes = dict(Counter(list(nx.get_node_attributes(g, 'gender').values())))
sizes['0'] = sizes['0'] / (len(g))
sizes['1'] = sizes['1'] / (len(g))

In [6]:
weights = dict(Counter(list(nx.get_node_attributes(g, 'gender').values())))
weights['0'] = 1 - sizes['0']
weights['1'] = 1 - sizes['1']

In [7]:
def homogeneity(node, attr, remove_ego=False):
    
    egonet = nx.ego_graph(g, node)
    egonet_attrs = list(nx.get_node_attributes(egonet, 'gender').values())
    count = dict(Counter(egonet_attrs))[attr]
    size = len(egonet)
    if size > 1:
        if remove_ego:
            count-=1
            size-=1
        return count/size

def weighted_homogeneity(node, attr, remove_ego=False):
    
    egonet = nx.ego_graph(g, node)
    egonet_attrs = list(nx.get_node_attributes(egonet, 'gender').values())
    count = dict(Counter(egonet_attrs))[attr]
    size = len(egonet)
    if remove_ego:
        count-=1
            
    if size > 1:
        return  count * weights[attr] / (count * weights[attr] + (size-count)* (1 - weights[attr]))


#hs = {}
#for n in g.nodes():
 #   attr = attrs[n]
  #  hom = homogeneity(n, attr, remove_ego=False)
   # hs[n] = hom
#nx.set_node_attributes(g, hs, 'homogeneity')

In [8]:
def purity(node):
    egonet = nx.ego_graph(g, node)
    egonet_attrs = list(nx.get_node_attributes(egonet, 'gender').values())
    count = Counter(egonet_attrs).most_common()[0][1]
    size = len(egonet)
    return count/size

def second_order_homogeneity(node):
    egonet = nx.ego_graph(g, node)
    homs = []
    for n2 in egonet:
        n2_attr = attrs[n2]
        homs.append(homogeneity(n2, n2_attr))
    return np.mean(homs)

#def old_weighted_homogeneity(node, attr, remove_ego=False):
 #   hom = homogeneity(node, attr, remove_ego=remove_ego)
  #  return hom * sizes[attr]

In [9]:
dict_to_df = {}
dict_to_df['Gender'] = []
dict_to_df['Target'] = []

print ("ATTR - Hom - Weighted Hom")
for node in list(g.nodes()):
    attr = attrs[node]
    hom = homogeneity(node, attr, True)
    w_hom = weighted_homogeneity(node, attr, True)
    
    if w_hom is not None:
        dict_to_df['Gender'].append(attr)
        if w_hom > 0.5:
            dict_to_df['Target'].append(0)
        else:
            dict_to_df['Target'].append(1)
        print(attr, hom, w_hom)
    else:
        print("Node with no links")


ATTR - Hom - Weighted Hom
0 0.8164556962025317 0.5478105321330404
0 0.7540983606557377 0.4433588592553473
0 0.7523219814241486 0.4538860103626943
0 0.8174603174603174 0.5483818032637432
1 0.25722543352601157 0.5545990942521668
1 0.3058823529411765 0.6100080135339684
0 0.8970588235294118 0.6787075140984606
Node with no links
0 0.7333333333333333 0.4269109011382035
0 0.7816091954022989 0.49525101763907736
0 0.7752577319587629 0.48638208140626943
0 0.7970588235294118 0.5175002615883646
0 0.7953667953667953 0.5138210270953635
0 0.7716535433070866 0.47925773334003247
1 0.20454545454545456 0.4743474347434743
0 0.7549668874172185 0.4538860103626943
0 0.8202764976958525 0.5521373332200221
0 0.8448275862068966 0.5758209916291049
0 0.7889447236180904 0.5057866922627126
0 0.671875 0.3583196986416042
0 0.794392523364486 0.5058905058905059
0 0.7934426229508197 0.5116130900666088
1 0.3 0.6056775083323755
0 0.631578947368421 0.31438697232066753
0 0.8142414860681114 0.5443050535119428
1 0.296116504854

0 0.7932098765432098 0.5114915891927261
0 0.7836065573770492 0.4970442858567297
1 0.24778761061946902 0.5417232644370205
0 0.7628571428571429 0.46825225225225225
1 0.23170731707317074 0.5198947714567577
0 0.7318611987381703 0.4277091698866076
0 0.785234899328859 0.49551823165955966
1 0.260752688172043 0.559197068314828
0 0.7994186046511628 0.5211578400830738
0 0.7872340425531915 0.5000115700566933
1 0.22807017543859648 0.5152732859859677
0 0.8059299191374663 0.5315555555555556
0 0.7313432835820896 0.42543659682438995
Node with no links
0 0.8244897959183674 0.559832953682612
0 0.8769230769230769 0.6500800687419442
1 1.0 0.7830609212481426
0 0.7360406091370558 0.4311521150282072
0 0.7903225806451613 0.5080862807167063
0 0.8164251207729468 0.5455590686979017
0 0.8432432432432433 0.5902658995490593
1 0.2619047619047619 0.560429240266825
0 0.7651245551601423 0.47062174845200083
1 0.29381443298969073 0.5985414549584553
0 0.7694915254237288 0.4768290051075462
0 0.8055555555555556 0.5010650887

0 0.8427672955974843 0.5881079781157939
0 0.795 0.5119079121460703
0 0.8101694915254237 0.5373846888331049
0 0.7530120481927711 0.451911648177496
0 0.7142857142857143 0.40576810075118713
1 0.3131868131868132 0.6201920099101889
0 0.8333333333333334 0.551828404044222
Node with no links
0 0.8514851485148515 0.6058529759463438
0 0.7593123209169055 0.46343654548721325
1 0.2600896860986547 0.5577533666654502
0 0.7966101694915254 0.5152522010099304
0 0.7758620689655172 0.4847724631532346
1 0.19736842105263158 0.4688704798434133
1 0.21774193548387097 0.4998946037099494
0 0.778125 0.4892998465638375
0 0.7061068702290076 0.3965294497621704
0 0.8840579710144928 0.652502014799619
0 0.84 0.5778636385053951
0 0.7171717171717171 0.40414830987562866
0 0.8060836501901141 0.5303996161491535
0 0.8 0.3565323565323565
0 0.7400881057268722 0.43684547980337674
0 0.8297872340425532 0.5645635720659683
0 0.8181818181818182 0.549580840501173
0 0.8115015974440895 0.5397647880763856
1 0.2153846153846154 0.49648396

0 0.8345864661654135 0.5721043527376707
0 0.8 0.5201694311389099
0 0.7735849056603774 0.4663083274908468
0 0.7922077922077922 0.5085171580289679
0 0.7063492063492064 0.393519079345851
0 0.6687898089171974 0.3543607406208826
1 0.2554347826086957 0.5514393214150544
0 0.7447916666666666 0.4420682645888032
0 0.8083832335329342 0.5312525268860677
Node with no links
0 0.813953488372093 0.5328744981490171
Node with no links
0 0.7938144329896907 0.5099453403188859
0 0.7846153846153846 0.4964110313562524
1 0.2639593908629442 0.56247947454844
1 0.21338912133891214 0.493418516274715
0 0.75 0.4501043445266553
0 0.7897435897435897 0.50392218387198
0 0.7710144927536232 0.4794804681712677
0 0.8571428571428571 0.5256525652565257
0 0.7828947368421053 0.4922928709055877
1 0.2 0.4192521877486078
0 0.7961783439490446 0.512050728094049
0 0.6764705882352942 0.36330850929549025
1 0.21052631578947367 0.47434743474347435
1 0.4222222222222222 0.7175206019347904
0 1.0 0.21693907875185736
Node with no links
0 0.6

In [10]:
df = pd.DataFrame(data=dict_to_df)
df
df.to_json("cambridge.json")

In [None]:
#for node in list(g.nodes())[:20]:
 #   attr = attrs[node]
  #  egonet = nx.ego_graph(g, node)
   # egonet_attrs = list(nx.get_node_attributes(egonet, 'homogeneity').values())
    #avg = np.mean(egonet_attrs)
    
    #print(attr, hs[node], avg)