In [14]:
#Importing the necessary libraries
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random

In [16]:
def load_email_network(file_path):
    """Loads the email network from a text file into a NetworkX graph."""
    graph = nx.DiGraph()  # Undirected graph since roads are bidirectional

    with open(file_path, "r") as file:
        for line in file:
            if line.startswith("#"):  # Skip comment lines
                continue
            from_node, to_node = map(int, line.split())  # Extract node IDs
            graph.add_edge(from_node, to_node)  # Add edge to the graph

    return graph

# Example usage,en->emailnetwork
file_path = "Email-EuAll.txt" 
en = load_email_network(file_path)

# Print basic statistics
print(f"Loaded road network with {en.number_of_nodes()} nodes and {en.number_of_edges()} edges.")


Loaded road network with 265214 nodes and 420045 edges.


In [17]:
def sample_nodes_induced_subgraph(G, node_percentage):
    """Randomly samples a percentage of nodes and retains only edges between them."""
    sampled_nodes = set(random.sample(list(G.nodes()), int(len(G) * node_percentage)))
    sampled_G = G.subgraph(sampled_nodes).copy()  # subgraph only has edges for which nodes belong to sampled_nodes
    return sampled_G

# Sample nodes and retain edges(with 1% nodes)
sg = sample_nodes_induced_subgraph(en, node_percentage=0.01)

print(f"Sampled Graph: {sg.number_of_nodes()} nodes, {sg.number_of_edges()} edges")


Sampled Graph: 2652 nodes, 49 edges


In [None]:
#1% of the total size of nodes
plt.figure(figsize=(200,200))
plt.title("Visualization of the graph,size(200,200)")
nx.draw(sg,with_labels=True,node_color='red',edge_color='black',node_size=800)
plt.show()

In [18]:
#Exporting the nodes,edges to respective csv for visualization in gephi
def export_graph_to_csv(G, nodes_file="ENnodes.csv", edges_file="ENedges.csv"):
    """Exports a NetworkX graph to two CSV files for Gephi: one for nodes and one for edges."""
    
    # Export Nodes
    nodes_df = pd.DataFrame({"Id": list(G.nodes())})
    nodes_df.to_csv(nodes_file, index=False)
    
    # Export Edges
    edges_df = pd.DataFrame(G.edges(), columns=["Source", "Target"])
    edges_df.to_csv(edges_file, index=False)

# Example Usage
export_graph_to_csv(sg)  # Exports sampled graph to CSV


# Complete Network Statistics

In [23]:
#Average degree of the graph
avg_degree=0
len(en.degree)
for node,degree in dict(en.degree()).items():
    avg_degree+=degree/len(en.degree)

print(f'The average degree of the graph is {avg_degree}')

The average degree of the graph is 3.1675929626451564


In [24]:
#Density of a graph
max_edges=len(en.nodes)*(len(en.nodes)-1)/2
density=len(en.edges)/max_edges
print(f'The density of the given graph is {density}')

The density of the given graph is 1.1943581056223159e-05


In [30]:
#Find diameter, radius, and eccentricity of Graph.
eccentricity=nx.eccentricity(en)
radius=nx.radius(en)
diameter=nx.diameter(en)
print(f'The eccentricity of the given graph is {eccentricity}')
print(f'The radius of the given graph is {radius}')
print(f'The diameter of the given graph is {diameter}')

NetworkXError: Found infinite path length because the digraph is not strongly connected

Given that the diameter appears to infinite, it appears that there are nodes which have no edges to them.

In [31]:
#Top 10 nodes by degree
degree_dict=dict(en.degree())
sorted_list=sorted(degree_dict.items(),key=lambda item:item[1],reverse=True)
sorted_dict=dict(sorted_list)
for i,(key,value) in enumerate(sorted_dict.items()):
    if(i>=10):
        break
    print(f'{key}:{value}')

179170:7636
422:6436
30:6145
72:4314
298:4106
83:3626
485:3211
366:2801
192:2360
406:2142


In [37]:
# Global Clustering Coefficient
global_clustering = nx.transitivity(en)
print(f'Global Clustering Coefficient: {global_clustering}')

Global Clustering Coefficient: 0.02784941685410377


In [40]:
#Degree centralization
centrality=nx.degree_centrality(en)
num_nodes=len(en.nodes())
num_edges=len(en.edges())
maxval=max(value for _,value in centrality.items()) #max centrality
print(maxval)
p_sum=0
for _,degree in en.degree():
    p_sum+=(maxval-degree)

degree_centralization=p_sum/((num_nodes-1)*(num_nodes-2))
print(f'The degree centralizaiton for the given network is {degree_centralization}')

0.02879195212904345
The degree centralizaiton for the given network is -1.1835108688564075e-05


# Sampled Network(Random choice of nodes, 1% of the total)

In [32]:
#Average degree of the graph
avg_degree=0
len(sg.degree)
for node,degree in dict(sg.degree()).items():
    avg_degree+=degree/len(sg.degree)

print(f'The average degree of the graph is {avg_degree}')

The average degree of the graph is 0.036953242835595757


In [33]:
#Density of a graph
max_edges=len(sg.nodes)*(len(sg.nodes)-1)/2
density=len(sg.edges)/max_edges
print(f'The density of the given graph is {density}')

The density of the given graph is 1.3939359802186261e-05


In [34]:
#Find diameter, radius, and eccentricity of Graph.
eccentricity=nx.eccentricity(sg)
radius=nx.radius(sg)
diameter=nx.diameter(sg)
print(f'The eccentricity of the given graph is {eccentricity}')
print(f'The radius of the given graph is {radius}')
print(f'The diameter of the given graph is {diameter}')

NetworkXError: Found infinite path length because the digraph is not strongly connected

In [39]:
#Top 10 nodes by degree
degree_dict=dict(sg.degree())
sorted_list=sorted(degree_dict.items(),key=lambda item:item[1],reverse=True)
sorted_dict=dict(sorted_list)
for i,(key,value) in enumerate(sorted_dict.items()):
    if(i>=10):
        break
    print(f'{key}:{value}')

107:11
364:8
92476:8
4237:7
189839:4
26:3
20494:3
90281:2
16614:2
743:2


In [38]:
# Global Clustering Coefficient
global_clustering = nx.transitivity(sg)
print(f'Global Clustering Coefficient: {global_clustering}')

Global Clustering Coefficient: 0
