In [4]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt

In [5]:
G_fb = nx.read_edgelist(r'facebook_combined.txt', create_using = nx.Graph(), nodetype=int)

In [9]:
pos = nx.spring_layout(G_fb)
betCent = nx.betweenness_centrality(G_fb, normalized=True, endpoints=True)
node_color = [20000.0 * G_fb.degree(v) for v in G_fb]
node_size =  [v * 10000 for v in betCent.values()]
plt.figure(figsize=(20,20))
nx.draw_networkx(G_fb, pos=pos, with_labels=False,                 node_color=node_color,                 node_size=node_size )
plt.axis('off')
plt.savefig("G_fb.png", format="PNG")
plt.show()

In [6]:
node_feats = {}

filenames_id = ['0','107','348','414','686','698','1684','1912','3437','3980']
for n_ in filenames_id:
    #Import .feat file
    feat_ = np.genfromtxt(r'features/'+n_+'.feat', delimiter=' ')
    
    #Import .featnames file
    featnames_ = {}
    featnames_file = open(r'features/'+n_+'.featnames', 'r') # Parse line by line (no delimiters)
    for line in featnames_file:  # example line: '0 birthday;anonymized feature 376' --> split it at first space
        spl = line.split(" ", 1)
        k = spl[0].replace('\'','')
        v = spl[1].replace('\'','').replace('\n','')
        v_ = v.split("anonymized ", 1) # Split the feature into feature name (e.g. birthday) and feature value (e.g. anonymized feature 376)
        v_name = v_[0].replace('\'','')
        v_value = v_[1].replace('\'','')
        v_value = "anonymized "+v_value
        featnames_[k] = [v_name, v_value] # Build a dict of lists where list[0] is attribute key, list[1] is attribute value
    featnames_file.close()

    # Fill in a dict with nodes as key and their features (i.e. feat == 1)
    for r in np.arange(0, feat_.shape[0], 1): # For each node in feat file
        feat_row_ = feat_[r]
        node_ = int(feat_row_[0])
        # Check if node has already some features assigned
        if node_ not in node_feats.keys():
            feat_assigned_ = {}
        else:
            feat_assigned_ = node_feats[node_]
        # Assign features to node_
        for fc in np.arange(1, feat_.shape[1], 1): # For each column in feat file
            if int(feat_row_[fc]) >= 1:
                dic_k = str(fc-1)
                if featnames_[dic_k][0] not in feat_assigned_.keys():
                    feat_assigned_[featnames_[dic_k][0]] = featnames_[dic_k][1]
        node_feats[node_] = feat_assigned_

In [7]:
# Assign nodes attribute to the network created above : G_fb
nx.set_node_attributes(G_fb, node_feats)

In [14]:
G_fb.nodes[200]

{'birthday;': 'anonymized feature 7',
 'education;school;id;': 'anonymized feature 34',
 'education;type;': 'anonymized feature 53',
 'education;year;id;': 'anonymized feature 59',
 'gender;': 'anonymized feature 78',
 'languages;id;': 'anonymized feature 92',
 'locale;': 'anonymized feature 126',
 'location;id;': 'anonymized feature 132',
 'work;employer;id;': 'anonymized feature 50',
 'work;location;id;': 'anonymized feature 177',
 'work;start_date;': 'anonymized feature 168'}

In [19]:
print(nx.attribute_mixing_dict(G_fb, 'gender;')) # Display all possible attribute values

{'anonymized feature 78': {'anonymized feature 77': 36738, 'anonymized feature 78': 63528, None: 2336}, 'anonymized feature 77': {'anonymized feature 78': 36738, 'anonymized feature 77': 30980, None: 1822}, None: {'anonymized feature 78': 2336, 'anonymized feature 77': 1822, None: 168}}


In [20]:
mapping_ = {'anonymized feature 77' : 0,
            'anonymized feature 78' : 1,
            None : 2}
M_ = nx.attribute_mixing_matrix(G_fb, 'gender;', mapping = mapping_, normalized = True)
print(M_)

[[0.17555591 0.20818505 0.01032482]
 [0.20818505 0.35999728 0.01323753]
 [0.01032482 0.01323753 0.00095201]]


In [11]:
# NX computation
print(nx.attribute_assortativity_coefficient(G_fb, 'gender;'))

# Manual computation of the attribute assortativity coeff
tr_ = np.trace(M_)
Msq_ = np.sum(np.matmul(M_, M_))

coeff_ = (tr_-Msq_)/(1-Msq_)
print(coeff_)

0.0841148755492507
0.0841148755492507


In [21]:
# Build a list of all possible attribute names
attrs_ = []
for n in node_feats.keys(): # Nodes id
    for attr in node_feats[n].keys():
        if attr not in attrs_:
            attrs_.append(attr)

# Measure coefficients for all attributes
assort_coeff = {}

for attr in attrs_:
    coeff_ = nx.attribute_assortativity_coefficient(G_fb, attr)
    assort_coeff[attr] = coeff_

print(dict(sorted(assort_coeff.items(), key=lambda item: item[1], reverse=True)))

{'locale;': 0.30071730395411966, 'last_name;': 0.2353780905744854, 'hometown;id;': 0.20157834193137472, 'birthday;': 0.17350650604874907, 'education;school;id;': 0.16575681549827262, 'work;projects;id;': 0.1303199573309532, 'location;id;': 0.11518068705251638, 'gender;': 0.0841148755492507, 'education;type;': 0.08015756256815627, 'education;concentration;id;': 0.07808419543141708, 'education;degree;id;': 0.07255949027183224, 'work;employer;id;': 0.060529138443159575, 'education;year;id;': 0.05819649918934245, 'name;': 0.04859836520325402, 'first_name;': 0.048404511230639276, 'work;location;id;': 0.04450251816796201, 'education;classes;id;': 0.04060297765897203, 'languages;id;': 0.035788680613727045, 'work;position;id;': 0.033296847835467235, 'middle_name;': 0.030235546013299645, 'education;with;id;': 0.02521084319459844, 'work;from;id;': 0.024495513969905777, 'work;with;id;': 0.02020192113136017, 'work;end_date;': 0.019672905714052162, 'work;start_date;': 0.01736876283936075, 'religion