## Exercices 10 February 2021

In [3]:
# import networkx (and other useful packages)
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

<mark>ELNodeAttributes : afficher les données
Status : 
Bureau : 1, 2 ou 3 dans 3 villes des USA
Senior : nombre de données passées dans l'entreprise
Spec : 2 specialités
School : 3 écoles de droit des USA 
</mark>

In [4]:
########### Graphs from empirical data ##############

# generate new digraph by reading edgelist (from empirical data)
# it is a "DiGraph" (not "Graph") because it is directed
# comments : à ne pas prendre en compte node car ce sont des commentaires
# nodetype = int car les identifants des noeuds sont des nombres entiers
ELadviceG = nx.read_edgelist('ELadvice_edgelist.txt',comments="node",
                             create_using=nx.DiGraph(),nodetype=int)

In [5]:
# match nodes of digraph with attributes of nodes
ELnodes = pd.read_table("ELnodeAttributes.txt", sep='\t')
ELnodes.head()

Unnamed: 0,Node,Status,Gender,Office,Senior,Age,Spec,School
0,1,1,1,1,31,64,1,1
1,2,1,1,1,32,62,2,1
2,3,1,1,2,13,67,1,1
3,4,1,1,1,31,59,2,3
4,5,1,1,2,31,59,1,2


In [12]:
ELNodeData = ELnodes.set_index('Node').to_dict('index').items()
list(ELNodeData)[0]

(1,
 {'Status': 1,
  'Gender': 1,
  'Office': 1,
  'Senior': 31,
  'Age': 64,
  'Spec': 1,
  'School': 1})

In [18]:
ELadviceG.add_nodes_from(ELNodeData)

# view fist results
print(list(ELadviceG.nodes(data=True))[0])
print(list(ELadviceG.edges(data=True))[0])

(1, {'Status': 1, 'Gender': 1, 'Office': 1, 'Senior': 31, 'Age': 64, 'Spec': 1, 'School': 1})
(1, 2, {})


In [19]:
# see basic properties of graph
print(nx.info(ELadviceG))

Name: 
Type: DiGraph
Number of nodes: 71
Number of edges: 892
Average in degree:  12.5634
Average out degree:  12.5634


In [23]:
# to view attributes

# for example, 'Office'
offices = nx.get_node_attributes(ELadviceG,'Office')
print(offices)
# to view the value for a single node (for example, node n. 5)
offices[5]


{1: 1, 2: 1, 17: 1, 20: 1, 6: 2, 22: 1, 24: 1, 26: 1, 3: 2, 14: 2, 18: 2, 28: 2, 30: 2, 4: 1, 9: 1, 12: 1, 13: 1, 19: 1, 21: 1, 29: 1, 5: 2, 11: 1, 7: 2, 10: 1, 34: 1, 8: 1, 16: 1, 15: 3, 25: 2, 32: 2, 35: 2, 38: 1, 42: 1, 39: 1, 40: 1, 41: 1, 43: 1, 50: 2, 37: 3, 45: 1, 46: 2, 48: 1, 53: 1, 60: 1, 31: 2, 64: 1, 70: 1, 27: 1, 23: 1, 36: 1, 57: 1, 63: 2, 33: 2, 59: 2, 47: 3, 52: 1, 55: 1, 56: 1, 49: 1, 44: 3, 51: 2, 54: 1, 58: 2, 65: 1, 62: 1, 61: 1, 68: 1, 66: 1, 67: 1, 69: 1, 71: 1}


2

In [24]:
######################### Mini-digression ##############################

### A separate file with nodes (possibly with attributes)
### must be imported separately from the edge list, and 'added' to the graph
### if there are isolates (which do not appear in edgelists)

# For example, import friendship graph
ELfriendG = nx.read_edgelist('ELfriend_edgelist.txt',comments="node",
                             create_using=nx.DiGraph(),nodetype=int)
# see basic properties of this graph
print(nx.info(ELfriendG))

Name: 
Type: DiGraph
Number of nodes: 69
Number of edges: 575
Average in degree:   8.3333
Average out degree:   8.3333


In [22]:
# now add attributes (add nodes without links)
ELfriendG.add_nodes_from(ELNodeData)

# see basic properties of this graph again
print(nx.info(ELfriendG))

############### end of digression ##############

Name: 
Type: DiGraph
Number of nodes: 71
Number of edges: 575
Average in degree:   8.0986
Average out degree:   8.0986


In [25]:
nx.density(ELadviceG)

0.179476861167002

In [26]:
nx.density(ELfriendG)

0.12254901960784313

In [30]:
################## Basic connectivity measures

# is the graph connected?
# Tout le réseau est-il connectée de manière directe ? 
print(nx.is_strongly_connected(ELadviceG)) # "strongly" because it is a digraph
# with an undirected graph, nx.is_connected(G)

# if "strongly" is false, try with "weakly":
print(nx.is_weakly_connected(ELadviceG))

False
True


In [33]:
### find isolates (components consisting in 1 node)
list(nx.isolates(ELadviceG))

[]

In [34]:
### find isolates in friendship network
list(nx.isolates(ELfriendG))

[]

In [35]:
# the networkx algorithm is only for undirected graphs
# so we need to take undirected representation
ELadviceUG = nx.DiGraph.to_undirected(ELadviceG)

In [36]:
## diameter

# because graph is not strongly connected (infinite distances)
# we need to take its undirected representation
# ELadviceUG = nx.DiGraph.to_undirected(ELadviceG)
print(nx.diameter(ELadviceUG))


3


In [37]:
### find cliques 
# cliques de tailes 1, 2, 3 et 7...
list(nx.find_cliques(ELadviceUG))

[[3, 2, 19, 4, 17],
 [3, 2, 19, 4, 6],
 [3, 28, 17, 19, 4],
 [3, 28, 17, 19, 30],
 [3, 28, 17, 14, 25, 30],
 [3, 28, 17, 14, 4],
 [3, 28, 6, 18],
 [3, 28, 6, 4, 19],
 [3, 28, 6, 4, 14],
 [3, 28, 6, 30, 19],
 [3, 28, 6, 30, 14],
 [3, 23, 18],
 [3, 23, 4],
 [7, 34, 16, 64, 2, 10],
 [7, 34, 16, 28],
 [7, 34, 33],
 [7, 35, 33],
 [7, 35, 28],
 [7, 58, 33],
 [7, 58, 28],
 [14, 48, 34, 16, 17, 42, 12],
 [14, 20, 17, 25, 30],
 [14, 20, 17, 42, 4],
 [14, 20, 17, 42, 30],
 [14, 20, 35, 25, 30],
 [14, 20, 35, 4],
 [14, 20, 35, 15],
 [14, 28, 12, 16, 32, 50],
 [14, 28, 12, 16, 32, 15],
 [14, 28, 12, 16, 25, 17],
 [14, 28, 12, 16, 25, 50],
 [14, 28, 12, 16, 42, 17, 34],
 [14, 28, 12, 16, 42, 17, 4],
 [14, 28, 12, 16, 42, 50],
 [14, 28, 12, 16, 15, 34],
 [14, 28, 12, 35, 32, 50],
 [14, 28, 12, 35, 32, 15],
 [14, 28, 12, 35, 25, 50],
 [14, 28, 12, 35, 4],
 [14, 28, 63, 35, 32, 50, 30],
 [14, 28, 63, 35, 32, 15],
 [14, 28, 63, 35, 25, 50, 30],
 [14, 28, 6, 32, 16, 50],
 [14, 28, 6, 32, 16, 15],
 [14, 

In [38]:
## average shortest path length
nx.average_shortest_path_length(ELadviceG)

2.061368209255533

Le diamètre était de 3, pas énorme. Là c'est un peu plus faible que le diamètre mais ça va, c'est un graphe qui reste quand même assez bien connecté. 
La notion de réciprocité s'applique uniquement dans les graphes orientés, là c'est bon.

In [39]:
##### Reciprocity (only interesting in digraph; not relevant in undirected graphs)
nx.reciprocity(ELadviceG)

0.3923766816143498

39 % des liens dans les relations de conseil sont réciproques. 

In [41]:
##### Transitivity
print(nx.transitivity(ELadviceG)) # this is the global clustering coefficient

0.36453095819566156


36 % de transitivité. 

In [46]:
# Clustering
print(nx.clustering(ELadviceG))  # measure is by node (local clustering coefficient)

{1: 0.37542087542087543, 2: 0.3752913752913753, 17: 0.3455399061032864, 20: 0.3694817658349328, 6: 0.3619047619047619, 22: 0.266156462585034, 24: 0.30824372759856633, 26: 0.2898071625344353, 3: 0.43564356435643564, 14: 0.4302600472813239, 18: 0.3888888888888889, 28: 0.2970639032815199, 30: 0.2919207317073171, 4: 0.3993558776167472, 9: 0.5223880597014925, 12: 0.36310299869621904, 13: 0.29012345679012347, 19: 0.31296296296296294, 21: 0.4055876685934489, 29: 0.4671361502347418, 5: 0.2727272727272727, 11: 0.37683823529411764, 7: 0.3181818181818182, 10: 0.43154761904761907, 34: 0.34456521739130436, 8: 0.4423076923076923, 16: 0.30102996254681647, 15: 0.3352803738317757, 25: 0.47315436241610737, 32: 0.38497652582159625, 35: 0.3599562363238512, 38: 0.39558232931726905, 42: 0.369727047146402, 39: 0.32346491228070173, 40: 0.390595009596929, 41: 0.3267937219730942, 43: 0.4188191881918819, 50: 0.3888888888888889, 37: 0.2619047619047619, 45: 0.41721854304635764, 46: 0.5086206896551724, 48: 0.379227

In [47]:
## take the average:
print(nx.average_clustering(ELadviceG)) 

0.3678383808693976


Ici presque identique à la transité globale

In [48]:
################# Centrality

# degree centrality (without considering directed edges)
# sur un graphe orienté il fait la somme des in et out ?
print(nx.degree_centrality(ELadviceG))

# indegree centrality (incoming ties)
print(nx.in_degree_centrality(ELadviceG))

# indegree centrality (outgoing ties)
print(nx.out_degree_centrality(ELadviceG))

{1: 0.35714285714285715, 2: 0.42857142857142855, 17: 0.6714285714285714, 20: 0.4714285714285714, 6: 0.3, 22: 0.5, 24: 0.6857142857142857, 26: 0.8714285714285714, 3: 0.21428571428571427, 14: 0.42857142857142855, 18: 0.22857142857142856, 28: 0.7, 30: 0.5285714285714286, 4: 0.5142857142857142, 9: 0.24285714285714285, 12: 0.5714285714285714, 13: 0.7142857142857143, 19: 0.5857142857142857, 21: 0.4714285714285714, 29: 0.42857142857142855, 5: 0.3, 11: 0.34285714285714286, 7: 0.14285714285714285, 10: 0.2714285714285714, 34: 0.44285714285714284, 8: 0.21428571428571427, 16: 0.6714285714285714, 15: 0.42857142857142855, 25: 0.2571428571428571, 32: 0.42857142857142855, 35: 0.44285714285714284, 38: 0.32857142857142857, 42: 0.41428571428571426, 39: 0.44285714285714284, 40: 0.4714285714285714, 41: 0.6142857142857142, 43: 0.34285714285714286, 50: 0.3, 37: 0.09999999999999999, 45: 0.2571428571428571, 46: 0.22857142857142856, 48: 0.3, 53: 0.11428571428571428, 60: 0.2714285714285714, 31: 0.471428571428571

In [49]:
# Eigenvector centrality
# Non vu en cours 2
nx.eigenvector_centrality(ELadviceG)

# This measure works only with undirected graphs.
# For directed graphs, networkx takes the “left” eigenvector centrality
# which corresponds to the in-edges in the graph.
# For out-edges eigenvector centrality first reverse the graph with G.reverse()
# (it is the transpose of the adjacency matrix)

{1: 0.23611842910389202,
 2: 0.23446598758776463,
 17: 0.2833381019863146,
 20: 0.2136997733197734,
 6: 0.19327488034675078,
 22: 0.14595178248289495,
 24: 0.2055602752981124,
 26: 0.27940181999328895,
 3: 0.0690957806884336,
 14: 0.16316090141330242,
 18: 0.04920764169567497,
 28: 0.17088364259117622,
 30: 0.12626722622054937,
 4: 0.18440063516941513,
 9: 0.1455021929173895,
 12: 0.20937508725597115,
 13: 0.1906295020615452,
 19: 0.10463771646723334,
 21: 0.16924653664919967,
 29: 0.1725461832837216,
 5: 0.10119522090334503,
 11: 0.16883332400304915,
 7: 0.03881236845877199,
 10: 0.08151403900398026,
 34: 0.20047568885772762,
 8: 0.12393409482289403,
 16: 0.1897905283487608,
 15: 0.17812634571760497,
 25: 0.0860128649495746,
 32: 0.1489807061393299,
 35: 0.094819273236335,
 38: 0.10060646340894579,
 42: 0.03394025636663496,
 39: 0.07694921955197542,
 40: 0.14459839998092092,
 41: 0.11514251781480742,
 43: 0.06431875795214105,
 50: 0.07329090846328304,
 37: 0.02526698595493389,
 45: 0.

In [50]:
# Non vu en cours 2
# Closeness centrality
nx.closeness_centrality(ELadviceG)

# The closeness centrality uses inward distance to a node, not outward.
# If you want to use outword distances apply the function to G.reverse()

{1: 0.5763922518159806,
 2: 0.5863300492610838,
 17: 0.6018963337547408,
 20: 0.5763922518159806,
 6: 0.5691056910569106,
 22: 0.5813186813186814,
 24: 0.5813186813186814,
 26: 0.6603328710124827,
 3: 0.4304701627486438,
 14: 0.5075692963752666,
 18: 0.44165120593692025,
 28: 0.5667857142857142,
 30: 0.5272425249169435,
 4: 0.5529616724738676,
 9: 0.5075692963752666,
 12: 0.5621015348288075,
 13: 0.641644204851752,
 19: 0.482370820668693,
 21: 0.5574941451990633,
 29: 0.539795918367347,
 5: 0.5272425249169435,
 11: 0.5485023041474654,
 7: 0.4304701627486438,
 10: 0.4858163265306123,
 34: 0.5863300492610838,
 8: 0.5038095238095238,
 16: 0.5667857142857142,
 15: 0.5621015348288075,
 25: 0.4564717162032598,
 32: 0.5529616724738676,
 35: 0.45955598455598456,
 38: 0.5231868131868133,
 42: 0.40972461273666094,
 39: 0.5313616071428572,
 40: 0.5441142857142858,
 41: 0.5191930207197383,
 43: 0.45342857142857146,
 50: 0.489311408016444,
 37: 0.3931461601981834,
 45: 0.4658512720156556,
 46: 0.45

In [51]:
# Betweenness centrality
nx.betweenness_centrality(ELadviceG)

# Betweenness values are normalized by 2/((n-1)(n-2)) for graphs,
# and 1/((n-1)(n-2)) for directed graphs, where n is the number of nodes 

{1: 0.008833673173946111,
 2: 0.01009224654146655,
 17: 0.07855679481717165,
 20: 0.01139731429167709,
 6: 0.0,
 22: 0.025115281617500766,
 24: 0.05165160763525819,
 26: 0.06642682993590107,
 3: 0.004973753454634258,
 14: 0.01234010299412268,
 18: 0.005471541688690605,
 28: 0.06171447676778272,
 30: 0.07491023142150818,
 4: 0.013384164027975378,
 9: 0.0006494578186122967,
 12: 0.03889449310210418,
 13: 0.04394039301097058,
 19: 0.0341594801550794,
 21: 0.012734656353257365,
 29: 0.007984875916370418,
 5: 0.004435473932021742,
 11: 0.016778388440100718,
 7: 0.001329623311926803,
 10: 0.004740117693952743,
 34: 0.007839694186268205,
 8: 0.00020918292657423092,
 16: 0.0799836790470071,
 15: 0.014440526789132104,
 25: 0.003180226613076882,
 32: 0.01953798851844105,
 35: 0.01574651961242209,
 38: 0.003339991474684802,
 42: 0.011272989345195553,
 39: 0.07320791418995971,
 40: 0.009241482257118577,
 41: 0.0495274725774742,
 43: 0.009961798414741208,
 50: 0.009435751588984723,
 37: 0.014474282

In [None]:
#### Let us look at visualization

# draw graph
nx.draw(ELadviceG)
plt.show()

In [None]:
# improve visualization

# change layout, eg Fruchterman Reingold
# reduce node size, make edges more transparent to improve visibility
nx.draw(ELadviceG, with_labels=False, node_size=30, node_color="red", edge_color = "gray",
        pos=nx.fruchterman_reingold_layout(ELadviceG))

# autres layouts to try: spring_layout, spectral_layout, random_layout, circular_layout... 


In [None]:
### use different colors to represent attributes, eg Office
# Office has 3 values only (categorical variable)

# create empty list for node colors
node_color = []

# for each node in the graph
for node in ELadviceG.nodes(data=True):

    # if the node has the attribute Office 1
    if node[1]['Office']==1:
        node_color.append('blue')
        
    # if the node has the attribute Office 2
    elif node[1]['Office']==2:
        node_color.append('red')

    # if the node has the attribute office 3
    elif node[1]['Office']==3:
        node_color.append('yellow')

    # draw graph with node attribute color
nx.draw(ELadviceG, with_labels=False, node_size=25, node_color=node_color, edge_color = "gray")


In [None]:
# change node size to reflect indegree

indeg = ELadviceG.in_degree   # sets up the indegree view
indegs = [d for n, d in indeg]   # gets all nodes' degree values

# draw plot (color = office)
nx.draw(ELadviceG, pos=nx.fruchterman_reingold_layout(ELadviceG), with_labels=False,
        node_color=node_color, edge_color = "gray", 
        node_size=indegs)
plt.show()

# Exercise: do the same for outdegree

In [None]:
###### For better visualization
## export graph in Gephi format

nx.write_gexf(ELadviceG, 'ELadviceG.gexf')

## Homework for next time 

Re-do the same with ELfriend and ELcowork: create graph objects, add attributes, analyze

Careful: ELfriend is an asymmetric matrix (like ELadvice) while ELcowork is symmetric

Also: mind the presence of isolates

Email to paola.tubaro@lri.fr by Tuesday, 23 February 2021, 2pm

Creer graphe, rajouter les attributs, refaire les analyses (celles qu'on a faites en cours et pas les autres). Il y a des isolés, il faut y faire attention, parfois il peut être utile de les enlever, garder un oeil là-dessus. 