## Exercices 10 February 2021

In [None]:
# import networkx (and other useful packages)
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
########### Graphs from empirical data ##############

# generate new digraph by reading edgelist (from empirical data)
# it is a "DiGraph" (not "Graph") because it is directed
ELadviceG = nx.read_edgelist('ELadvice_edgelist.txt',comments="node",create_using=nx.DiGraph(),nodetype=int)

In [None]:
# match nodes of digraph with attributes of nodes
ELnodes = pd.read_table("ELnodeAttributes.txt", sep='\t')
ELNodeData = ELnodes.set_index('Node').to_dict('index').items()
ELadviceG.add_nodes_from(ELNodeData)

# view results
print(ELadviceG.nodes(data=True))
print(ELadviceG.edges(data=True))

In [None]:
# see basic properties of graph
print(nx.info(ELadviceG))

In [None]:
# to view attributes

# for example, 'Office'
offices = nx.get_node_attributes(ELadviceG,'Office')
print(offices)
# to view the value for a single node (for example, node n. 5)
offices[5]


In [None]:
######################### Mini-digression ##############################

### A separate file with nodes (possibly with attributes)
### must be imported separately from the edge list, and 'added' to the graph
### if there are isolates (which do not appear in edgelists)

# For example, import friendship graph
ELfriendG = nx.read_edgelist('ELfriend_edgelist.txt',comments="node",create_using=nx.DiGraph(),nodetype=int)
# see basic properties of this graph
print(nx.info(ELfriendG))

In [None]:
# now add attributes
ELfriendG.add_nodes_from(ELNodeData)

# see basic properties of this graph again
print(nx.info(ELfriendG))

############### end of digression ##############

In [None]:
################## Basic connectivity measures

# is the graph connected?
print(nx.is_strongly_connected(ELadviceG)) # "strongly" because it is a digraph
# with an undirected graph, nx.is_connected(G)

# if "strongly" is false, try with "weakly":
print(nx.is_weakly_connected(ELadviceG))

In [None]:
### find isolates (components consisting in 1 node)
list(nx.isolates(ELadviceG))

In [None]:
### find isolates in friendship network
list(nx.isolates(ELfriendG))

In [None]:
### find cliques 

# the networkx algorithm is only for undirected graphs
# so we need to take undirected representation
ELadviceUG = nx.DiGraph.to_undirected(ELadviceG)

list(nx.find_cliques(ELadviceUG))

In [None]:
## diameter

# because graph is not strongly connected (infinite distances)
# we need to take its undirected representation
# ELadviceUG = nx.DiGraph.to_undirected(ELadviceG)
print(nx.diameter(ELadviceUG))


In [None]:
## average shortest path length
nx.average_shortest_path_length(ELadviceG)


In [None]:
##### Reciprocity (only interesting in digraph; not relevant in undirected graphs)

nx.reciprocity(ELadviceG)

In [None]:
##### Transitivity
print(nx.transitivity(ELadviceG)) # this is the global clustering coefficient

# Clustering
print(nx.clustering(ELadviceG))  # measure is by node (local clustering coefficient)
## take the average:
print(nx.average_clustering(ELadviceG)) 

In [None]:
################# Centrality

# degree centrality (without considering directed edges)
print(nx.degree_centrality(ELadviceG))

# indegree centrality (incoming ties)
print(nx.in_degree_centrality(ELadviceG))

# indegree centrality (outgoing ties)
print(nx.out_degree_centrality(ELadviceG))

In [None]:
# Eigenvector centrality

nx.eigenvector_centrality(ELadviceG)

# This measure works only with undirected graphs.
# For directed graphs, networkx takes the “left” eigenvector centrality
# which corresponds to the in-edges in the graph.
# For out-edges eigenvector centrality first reverse the graph with G.reverse()
# (it is the transpose of the adjacency matrix)

In [None]:
# Closeness centrality
nx.closeness_centrality(ELadviceG)

# The closeness centrality uses inward distance to a node, not outward.
# If you want to use outword distances apply the function to G.reverse()

In [None]:
# Betweenness centrality
nx.betweenness_centrality(ELadviceG)

# Betweenness values are normalized by 2/((n-1)(n-2)) for graphs,
# and 1/((n-1)(n-2)) for directed graphs, where n is the number of nodes 

In [None]:
#### Let us look at visualization

# draw graph
nx.draw(ELadviceG)
plt.show()

In [None]:
# improve visualization

# change layout, eg Fruchterman Reingold
# reduce node size, make edges more transparent to improve visibility
nx.draw(ELadviceG, with_labels=False, node_size=30, node_color="red", edge_color = "gray",
        pos=nx.fruchterman_reingold_layout(ELadviceG))

# autres layouts to try: spring_layout, spectral_layout, random_layout, circular_layout... 


In [None]:
### use different colors to represent attributes, eg Office
# Office has 3 values only (categorical variable)

# create empty list for node colors
node_color = []

# for each node in the graph
for node in ELadviceG.nodes(data=True):

    # if the node has the attribute Office 1
    if node[1]['Office']==1:
        node_color.append('blue')
        
    # if the node has the attribute Office 2
    elif node[1]['Office']==2:
        node_color.append('red')

    # if the node has the attribute office 3
    elif node[1]['Office']==3:
        node_color.append('yellow')

    # draw graph with node attribute color
nx.draw(ELadviceG, with_labels=False, node_size=25, node_color=node_color, edge_color = "gray")


In [None]:
# change node size to reflect indegree

indeg = ELadviceG.in_degree   # sets up the indegree view
indegs = [d for n, d in indeg]   # gets all nodes' degree values

# draw plot (color = office)
nx.draw(ELadviceG, pos=nx.fruchterman_reingold_layout(ELadviceG), with_labels=False,
        node_color=node_color, edge_color = "gray", 
        node_size=indegs)
plt.show()

# Exercise: do the same for outdegree

In [None]:
###### For better visualization
## export graph in Gephi format

nx.write_gexf(ELadviceG, 'ELadviceG.gexf')

## Homework for next time 

Re-do the same with ELfriend and ELcowork: create graph objects, add attributes, analyze

Careful: ELfriend is an asymmetric matrix (like ELadvice) while ELcowork is symmetric

Also: mind the presence of isolates

Email to paola.tubaro@lri.fr by Tuesday, 23 February 2021, 2pm