# Tutorial, 14 February 2024

In [None]:
# import networkx (and other useful packages)
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

## Matching graphs and node attributes

In [None]:
########### Graphs from empirical data ##############

# generate new digraph by reading edgelist (from empirical data)
# it is a "DiGraph" (not "Graph") because it is directed
ELadviceG = nx.read_edgelist('ELadvice_edgelist.txt',comments="node",create_using=nx.DiGraph(),nodetype=int)

In [None]:
### Alternative way to read the edgelist (1st step)
advice = pd.read_csv('ELadvice_edgelist.txt', sep='\t')
advice


In [None]:
### Alternative way to read the edgelist (2nd step)
ELadviceG1 = nx.from_pandas_edgelist(advice, 'node1', 'node2', create_using=nx.DiGraph())

In [None]:
ELnodes = pd.read_table("ELnodeAttributes.txt", sep='\t')
print(ELnodes)

In [None]:
# match nodes of digraph with attributes of nodes
ELNodeData = ELnodes.set_index('Node').to_dict('index').items()
ELadviceG.add_nodes_from(ELNodeData)

# view results
print(ELadviceG.nodes(data=True))


In [None]:
# view results (2)
print(ELadviceG.edges(data=True))

In [None]:
# to view attributes

# for example, 'Office'
offices = nx.get_node_attributes(ELadviceG,'Office')
print(offices)
# to view the value for a single node (for example, node n. 4)
offices[4]


In [None]:
######################### Mini-digression ##############################

### A separate file with nodes (possibly with attributes)
### must be imported separately from the edge list, and 'added' to the graph
### if there are isolates (which do not appear in edgelists)

# For example, import friendship graph
ELfriendG = nx.read_edgelist('ELfriend_edgelist.txt',comments="node",create_using=nx.DiGraph(),nodetype=int)
# see nodes
ELfriendG.nodes()

In [None]:
# how many nodes?
len(ELfriendG.nodes())

In [None]:
# now add attributes
ELfriendG.add_nodes_from(ELNodeData)

# how many nodes now?
len(ELfriendG.nodes())

############### end of digression ##############

## Local structures

In [None]:
##### Isolates (components consisting in 1 node) ### 
list(nx.isolates(ELadviceG))

In [None]:
### NB but if we use the friendhsip graph instead... ### 
list(nx.isolates(ELfriendG))

In [None]:
##### Dyads: reciprocity (only interesting in digraph; not relevant in undirected graphs)

nx.reciprocity(ELadviceG)

In [None]:
##### Triads: global transitivity
print(nx.transitivity(ELadviceG)) # this is the global clustering coefficient

In [None]:
##### Triads: local transitivity
print(nx.clustering(ELadviceG))  # measure is by node (local clustering coefficient)
## take the average:
print(nx.average_clustering(ELadviceG)) 

## Global structures: cohesion

In [None]:
##### Density
nx.density(ELadviceG)

In [None]:
### find cliques 

# the networkx algorithm is only for undirected graphs
# so we need to take undirected representation
ELadviceUG = nx.DiGraph.to_undirected(ELadviceG)

list(nx.find_cliques(ELadviceUG))

## Global structures: connectivity

In [None]:
################## Basic connectivity measures

# is the graph connected?
print(nx.is_strongly_connected(ELadviceG)) # "strongly" because it is a digraph
# with an undirected graph, nx.is_connected(G)

# if "strongly" is false, try with "weakly":
print(nx.is_weakly_connected(ELadviceG))

In [None]:
## diameter

# because graph is not strongly connected (infinite distances)
# we need to take its undirected representation
# ELadviceUG = nx.DiGraph.to_undirected(ELadviceG)
print(nx.diameter(ELadviceUG))


In [None]:
## average shortest path length
# because graph is not strongly connected (infinite distances)
# we need to take its undirected representation
# ELadviceUG = nx.DiGraph.to_undirected(ELadviceG)
nx.average_shortest_path_length(ELadviceUG)


## Centrality

In [None]:
################# Centrality

# degree centrality (without considering directed edges)
print(nx.degree_centrality(ELadviceG))

# indegree centrality (incoming ties)
print(nx.in_degree_centrality(ELadviceG))

# indegree centrality (outgoing ties)
print(nx.out_degree_centrality(ELadviceG))

In [None]:
# Eigenvector centrality

nx.eigenvector_centrality(ELadviceG)

# This measure works only with undirected graphs.
# For directed graphs, networkx takes the “left” eigenvector centrality
# which corresponds to the in-edges in the graph.
# For out-edges eigenvector centrality first reverse the graph with G.reverse()
# (it is the transpose of the adjacency matrix)

In [None]:
# Closeness centrality
nx.closeness_centrality(ELadviceG)

# The closeness centrality uses inward distance to a node, not outward.
# If you want to use outword distances apply the function to G.reverse()

In [None]:
# Betweenness centrality
nx.betweenness_centrality(ELadviceG)

# Betweenness values are normalized by 2/((n-1)(n-2)) for graphs,
# and 1/((n-1)(n-2)) for directed graphs, where n is the number of nodes 

In [None]:
#### Let's briefly look at degree distributions
indegrees = [val for (node, val) in ELadviceG.in_degree()]
outdegrees = [val for (node, val) in ELadviceG.out_degree()]

In = np.array(indegrees)
Out = np.array(outdegrees)

plt.hist([In, Out], bins=5, label=['Indegrees', 'Outdegrees'])
plt.legend(loc='upper right')
plt.show()



## Visualization

In [None]:
#### Let us look at visualization

# draw graph
nx.draw(ELadviceG)
plt.show()

In [None]:
# improve visualization

# change layout, eg Fruchterman Reingold
# reduce node size, make edges more transparent to improve visibility
nx.draw(ELadviceG, with_labels=False, node_size=30, node_color="red", edge_color = "gray",
        pos=nx.fruchterman_reingold_layout(ELadviceG))

# autres layouts to try: spring_layout, spectral_layout, random_layout, circular_layout... 


In [None]:
### use different colors to represent attributes, eg Office
# Office has 3 values only (categorical variable)

# create empty list for node colors
node_color = []

# for each node in the graph
for node in ELadviceG.nodes(data=True):

    # if the node has the attribute Office 1
    if node[1]['Office']==1:
        node_color.append('blue')
        
    # if the node has the attribute Office 2
    elif node[1]['Office']==2:
        node_color.append('red')

    # if the node has the attribute office 3
    elif node[1]['Office']==3:
        node_color.append('yellow')

    # draw graph with node attribute color
nx.draw(ELadviceG, with_labels=False, node_size=25, node_color=node_color, edge_color = "gray")


In [None]:
# change node size to reflect indegree

indeg = ELadviceG.in_degree   # sets up the indegree view
indegs = [d for n, d in indeg]   # gets all nodes' degree values 

# draw plot (color = office)
nx.draw(ELadviceG, pos=nx.fruchterman_reingold_layout(ELadviceG), with_labels=False,
        node_color=node_color, edge_color = "gray", 
        node_size=indegs)
plt.show()

# Exercise: do the same for outdegree

In [None]:
###### For better visualization
## export graph in Gephi format

nx.write_gexf(ELadviceG, 'ELadviceG.gexf')

## Homework for next session

Re-do the same with ELfriend and ELcowork: create graph objects, add attributes, analyze

Careful: ELfriend is an asymmetric matrix (like ELadvice) while ELcowork is symmetric

Also: mind the presence of isolates


## Home exercises week 2

In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

### Matching graphs and node attributes

In [2]:
# Create directed graph
ELfriendG = nx.read_edgelist('ELfriend_edgelist.txt', comments="node", create_using=nx.DiGraph(), nodetype=int)
#Create undirected graph
ELcoworkG = nx.read_edgelist('ELcowork_edgelist.txt', comments="node", create_using=nx.Graph(), nodetype=int)

In [3]:
ELfriendG.number_of_nodes(), ELcoworkG.number_of_nodes()

(69, 70)

In [4]:
# read the attributes
ELnodes = pd.read_table("ELnodeAttributes.txt", sep='\t')

In [5]:
# match nodes of directed graph with attributes of nodes
ELnodeData = ELnodes.set_index('Node').to_dict('index').items()
ELfriendG.add_nodes_from(ELnodeData)
ELcoworkG.add_nodes_from(ELnodeData)

In [6]:
ELfriendG.number_of_nodes(), ELcoworkG.number_of_nodes()

(71, 71)

The number of nodes has changed after adding attributes, because both graphs have isolates (not in the edge list). This will have implications for parts of our analysis. For now, we consider graphs with isolates.

In [7]:
# let's look at isolates
list(nx.isolates(ELfriendG)), list(nx.isolates(ELcoworkG))

([44, 47], [8])

In [8]:
# let's look at attribute 'Gender' (same for both graphs)
gender = nx.get_node_attributes(ELfriendG,'Gender')
print(gender)

{1: 1, 2: 1, 4: 1, 8: 1, 17: 1, 16: 1, 22: 1, 26: 1, 3: 1, 9: 1, 11: 1, 12: 1, 14: 1, 19: 1, 21: 1, 23: 1, 27: 2, 29: 2, 5: 1, 7: 1, 18: 1, 25: 1, 24: 1, 10: 1, 13: 1, 34: 2, 38: 2, 15: 1, 20: 1, 41: 1, 43: 2, 52: 1, 6: 1, 28: 1, 50: 1, 37: 1, 32: 1, 39: 2, 40: 1, 31: 1, 33: 1, 35: 1, 51: 2, 58: 1, 46: 2, 60: 2, 36: 1, 57: 2, 30: 1, 42: 1, 54: 1, 56: 1, 48: 2, 49: 1, 64: 2, 45: 1, 55: 1, 65: 1, 62: 1, 59: 2, 53: 1, 61: 2, 63: 1, 66: 1, 69: 2, 70: 1, 71: 2, 68: 1, 67: 2, 44: 2, 47: 1}


In [9]:
# reciprocity (only interesting for ELfriend as it is an undirected graph)
nx.reciprocity(ELfriendG)
print(round(nx.reciprocity(ELfriendG)*100,1), "% of friendship relations are reciprocical.")

61.2 % of friendship relations are reciprocical.


In [10]:
# One can look at the local transitivity
print(nx.clustering(ELfriendG), nx.clustering(ELcoworkG))

{1: 0.6142857142857143, 2: 0.5229885057471264, 4: 0.37468671679197996, 8: 0.37037037037037035, 17: 0.31218905472636815, 16: 0.2533783783783784, 22: 0.5625, 26: 0.38100436681222705, 3: 0.25, 9: 0.5978260869565217, 11: 0.5301204819277109, 12: 0.3548009367681499, 14: 0.29069767441860467, 19: 0.28846153846153844, 21: 0.536734693877551, 23: 0.7037037037037037, 27: 0.4858490566037736, 29: 0.40710382513661203, 5: 0.17307692307692307, 7: 0.2, 18: 0.47093023255813954, 25: 0.3602941176470588, 24: 0.3542483660130719, 10: 0.5503355704697986, 13: 0.34392014519056263, 34: 0.29770992366412213, 38: 0.3235294117647059, 15: 0.38461538461538464, 20: 0.45481927710843373, 41: 0.3110831234256927, 43: 0.3220108695652174, 52: 0.444636678200692, 6: 0, 28: 0.40594059405940597, 50: 0.504424778761062, 37: 0, 32: 0.5098039215686274, 39: 0.2867647058823529, 40: 0.38513513513513514, 31: 0.22916666666666666, 33: 0.30097087378640774, 35: 0.37575757575757573, 51: 0.6506849315068494, 58: 0.4013840830449827, 46: 0.384615

In [11]:
# we expect a local transitivity of 0 for isolates:
print(nx.clustering(ELfriendG)[44], nx.clustering(ELfriendG)[47], nx.clustering(ELcoworkG)[8])

0 0 0


In [12]:
# Let's look at the average local transitivity
print(nx.average_clustering(ELfriendG), nx.average_clustering(ELcoworkG))
# Triads: global transitivity
print(nx.transitivity(ELfriendG), nx.transitivity(ELcoworkG))

0.3952045322118626 0.3852689534189992
0.3473431401981387 0.3069738480697385


One can notice average local transitivity is close to global transitivity for both graphs.

### Other indicators of cohesion

In [13]:
# Density
nx.density(ELfriendG), nx.density(ELcoworkG)

(0.11569416498993963, 0.15211267605633802)

In [14]:
# to find cliques that are only defined for undirected graphs, we need to convert ELfriendG to an undirected graph
ELfriendUG = nx.DiGraph.to_undirected(ELfriendG)
print(list(nx.find_cliques(ELfriendUG)))
print(list(nx.find_cliques(ELcoworkG)))

[[1, 17, 12, 8], [1, 17, 12, 2, 26, 4, 22], [1, 17, 39, 26], [2, 12, 17, 10, 24, 26], [2, 12, 17, 4, 22, 16], [2, 12, 17, 4, 22, 24, 26], [2, 53], [3, 14, 4], [3, 14, 28], [3, 7], [5, 33, 25], [5, 33, 18], [5, 33, 7], [5, 12, 25], [5, 20], [6, 18], [6, 14], [8, 41, 65], [8, 11, 65], [8, 11, 10, 17, 12], [9, 24, 27, 10, 12, 29, 26], [9, 24, 27, 10, 12, 13, 17, 26], [9, 24, 27, 10, 12, 13, 17, 11, 21], [9, 24, 27, 4, 17, 25, 12, 26], [9, 24, 27, 4, 17, 25, 12, 21], [9, 24, 27, 4, 17, 13, 26, 12], [9, 24, 27, 4, 17, 13, 26, 20], [9, 24, 27, 4, 17, 13, 11, 21, 12], [9, 24, 27, 4, 17, 13, 11, 21, 20], [9, 24, 27, 4, 17, 13, 23], [9, 24, 27, 4, 29, 26, 12], [10, 12, 34, 26, 17], [10, 12, 34, 26, 29], [10, 12, 38, 24, 26, 27, 13], [10, 12, 38, 24, 26, 27, 29], [11, 27, 40, 17, 21, 43], [11, 27, 40, 17, 21, 20], [11, 27, 13, 24, 43, 17, 21], [11, 27, 13, 24, 4, 65], [12, 34, 16, 17], [12, 4, 16, 13, 17, 21], [12, 4, 16, 13, 17, 22], [12, 4, 19, 22], [12, 4, 22, 13, 17, 24, 26], [12, 38, 16, 13

In [15]:
len(list(nx.find_cliques(ELfriendUG))), len(list(nx.find_cliques(ELcoworkG)))

(154, 205)

### Global structures: connectivity

Both graphs have isolates. Thus, both are disconnected graphs, for which the diameter and the average shortest path length are not defined:

In [16]:
print(nx.is_weakly_connected(ELfriendG))
print(nx.is_connected(ELcoworkG))

False
False


Let's now look at the connectivity of these graphs after removing isolates.

In [17]:
ELfriend_without_isolatesG = ELfriendG.copy() # we make a copy because we will need the initial graph afterwards
ELfriend_without_isolatesG.remove_nodes_from(list(nx.isolates(ELfriendG)))

In [18]:
ELfriend_without_isolatesG.number_of_nodes()

69

In [19]:
ELcowork_without_isolatesG = ELcoworkG.copy()
ELcowork_without_isolatesG.remove_nodes_from(list(nx.isolates(ELcoworkG)))

In [20]:
print(nx.is_strongly_connected(ELfriend_without_isolatesG))
print(nx.is_weakly_connected(ELfriend_without_isolatesG))

False
True


In [21]:
ELfriend_without_isolatesUG = nx.DiGraph.to_undirected(ELfriend_without_isolatesG)
print(nx.diameter(ELfriend_without_isolatesUG))
print(nx.average_shortest_path_length(ELfriend_without_isolatesG))

5


NetworkXError: Graph is not strongly connected.

In [22]:
print(nx.is_connected(ELcowork_without_isolatesG))
print(nx.diameter(ELcowork_without_isolatesG))
print(nx.average_shortest_path_length(ELcowork_without_isolatesG))

True
4
2.10351966873706


### Centrality

We are now again considering graphs with isolates.

In [23]:
# degree centrality (without considering directed edges)
print(nx.degree_centrality(ELfriendG))
print(nx.degree_centrality(ELcoworkG))

# indegree centrality (incoming ties, only for directed graphs)
print(nx.in_degree_centrality(ELfriendG))

# indegree centrality (outgoing ties)
print(nx.out_degree_centrality(ELfriendG))

{1: 0.12857142857142856, 2: 0.19999999999999998, 4: 0.41428571428571426, 8: 0.11428571428571428, 17: 0.5857142857142857, 16: 0.2571428571428571, 22: 0.19999999999999998, 26: 0.44285714285714284, 3: 0.05714285714285714, 9: 0.2857142857142857, 11: 0.2714285714285714, 12: 0.42857142857142855, 14: 0.19999999999999998, 19: 0.11428571428571428, 21: 0.32857142857142857, 23: 0.11428571428571428, 27: 0.42857142857142855, 29: 0.2857142857142857, 5: 0.11428571428571428, 7: 0.05714285714285714, 18: 0.19999999999999998, 25: 0.3, 24: 0.5714285714285714, 10: 0.2571428571428571, 13: 0.4857142857142857, 34: 0.24285714285714285, 38: 0.3, 15: 0.08571428571428572, 20: 0.2714285714285714, 41: 0.41428571428571426, 43: 0.39999999999999997, 52: 0.35714285714285715, 6: 0.02857142857142857, 28: 0.3, 50: 0.22857142857142856, 37: 0.014285714285714285, 32: 0.15714285714285714, 39: 0.3, 40: 0.2571428571428571, 31: 0.5714285714285714, 33: 0.21428571428571427, 35: 0.2714285714285714, 51: 0.18571428571428572, 58: 0.35

In [None]:
# Eigenvector centrality
print(nx.eigenvector_centrality(ELfriendG))
print(nx.eigenvector_centrality(ELfriendG.reverse()))
# corresponding to the in-edges of the graph and to the out-edges with G.reverse()
print(nx.eigenvector_centrality(ELcoworkG))

In [None]:
# Closeness centrality
print(nx.closeness_centrality(ELfriendG)) # uses the inward distance to a node
print(nx.closeness_centrality(ELcoworkG))

In [None]:
# Betweenness centrality
print(nx.betweenness_centrality(ELfriendG))
print(nx.betweenness_centrality(ELcoworkG))