In [1]:
import networkx as nx
import matplotlib.pyplot as plt

%matplotlib notebook

In [2]:
G_md = nx.read_edgelist('email_network.txt', create_using=nx.MultiDiGraph(), data=[('weight', int)])
employees_n = len(G_md.nodes())
emails_n = len(G_md.edges())
print('Number of employees {}, and number of emails {}.'.format(employees_n, emails_n))

Number of employees 167, and number of emails 82927.


In [3]:
print('G_md is strongly connected: {}'.format(nx.is_strongly_connected(G_md)))
print('G_md is weakly connected: {}'.format(nx.is_weakly_connected(G_md)))

G_md is strongly connected: False
G_md is weakly connected: True


In [4]:
components = sorted(nx.weakly_connected_components(G_md))
print('Number of weakly connected components {}'.format(len(components)))
largest = max(components, key=len)
print('Number of nodes in the largest component {}'.format(len(largest)))

Number of weakly connected components 1
Number of nodes in the largest component 167


In [5]:
components = sorted(nx.strongly_connected_components(G_md))
print('Number of strongly connected components {}'.format(len(components)))
largest = max(components, key=len)
print('Number of nodes in the largest component {}'.format(len(largest)))

Number of strongly connected components 42
Number of nodes in the largest component 126


In [49]:
# The subgraph of the largest strongly connected component
G_sc = G_md.subgraph(largest)

In [10]:
avg_d = nx.average_shortest_path_length(G_sc)
print('Avg distance between nodes in G_sc is {:.5}'.format(avg_d))
d = nx.diameter(G_sc)
print('Diameter of G_sc is {}'.format(d))
r = nx.radius(G_sc)
print('Radius of G_sc is {}'.format(r))

Avg distance between nodes in G_sc is 1.6462
Diameter of G_sc is 3
Radius of G_sc is 1


In [33]:
# Nodes of G_sc that have eccentricity equal to the diameter of the subgraph, i.e., nodes on the periphery of the graph
nodes_dia = nx.periphery(G_sc)
# Nodes of G_sc that have eccentricity equal to the radius of the subgraph, i.e., nodes in the center of the graph
nodes_rad = nx.center(G_sc)

In [34]:
# Node which is connected to the most other nodes by a shortest path of length equal to the diameter of G_sc
count_connected = [(n1, len([n2 for n2 in G_sc.nodes() if nx.shortest_path_length(G_sc, n1, n2)==d])) for n1 in G_sc.nodes()]
max_connected = max([p[1] for p in count_connected])
node = [p[0] for p in count_connected if p[1] == max_connected][0]
node

'97'

In [40]:
nodes_center = nx.center(G_sc)[0]
min_cut = nx.minimum_node_cut(G_sc, '38', '97')

In [15]:
# Undirected graph obtained from G_sc
G_mul = G_sc.to_undirected()
G = nx.Graph()
for u,v in G_mul.edges():
    if G.has_edge(u, v):
        pass
    else:
        G.add_edge(u, v)

trans = nx.transitivity(G)
avg_clustering = nx.average_clustering(G)
print('Transitivity of G is {:.4}, and its average clustering coefficient is {:.4}'.format(trans, avg_clustering))

Transitivity of G is 0.5701, and its average clustering coefficient is 0.6975
