# Exploratory Data Analysis

- Global clustering coefficient
- Standard deviation
- Random walk mixing time
- Assortativity coefficient

In [1]:
# Imports

import numpy as np
import networkx as nx
from statistics import stdev

In [2]:
G = nx.read_edgelist('M1/edges.csv', delimiter = ',')
print(G)

Graph with 1429 nodes and 19357 edges


We will first find the global clustering coefficient:

In [35]:
# Transitivity = global clustering coefficient
global_clustering = nx.transitivity(G)
print('Global clustering coefficient:', global_clustering)

Global clustering coefficient: 0.2606387109920022


We will next compute the standard deviation:

In [36]:
# Calculate local clustering coefficients
local_clustering = nx.clustering(G)

# Compute standard deviation of the local clustering coefficients
local_clustering_values = list(local_clustering.values())
standard_deviation = stdev(local_clustering_values)

print('Standard deviation of clustering coefficients:', standard_deviation)

Standard deviation of clustering coefficients: 0.19210659033211586


We then find the random walk mixing time:

In [37]:
# Calculate the stochastic matrix P
P = nx.adjacency_matrix(G) # Initialize P as the adjecency matrix
P = P.astype(float).toarray() # Change dtype to float and convert to dense array
for row in P:
    row /= np.sum(row)

# Compute eigenvalues and eigenvectors of the stochastic matrix
eigenvalues, eigenvectors = np.linalg.eig(P)

# Find the second-largest eigenvalue and its corresponding eigenvector
lambda2 = np.sort(eigenvalues)[-2]
v2 = eigenvectors[:, np.argsort(eigenvalues)[-2]]

# Compute mixing time
mixing_time = -1 / np.log(1 - lambda2)

print('Random walk mixing time:', mixing_time)

Random walk mixing time: 0.2992249545530746


Lastly, we will compute the assortativity coefficient:

In [38]:
# Compute assortativity coefficient
assortativity = nx.degree_assortativity_coefficient(G)
print('Assortativity Coefficient:', assortativity)

Assortativity Coefficient: 0.11231425726944656


In [11]:
avg_shortest_path = nx.average_shortest_path_length(G)
print("average shortest path",avg_shortest_path)

# Calculate the average degree
average_degree = sum(dict(G.degree()).values()) / len(G)
print("average degree: ", average_degree)

# Calculate the diameter
diameter = nx.diameter(G)
print("diameter: ",diameter)

average shortest path 3.036131317467505
average degree:  27.091672498250524
diameter:  7


7
