In [6]:
import community as community_louvain
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import time
from scipy.cluster.hierarchy import dendrogram, linkage

In [7]:
def load_graph(graph_file):
    print(f"Loading graph from {graph_file}...")
    start_time = time.time()
    
    # Load the graph
    G = nx.read_graphml(graph_file)
    
    # Convert position strings back into lists of integers
    for node in G.nodes:
        pos_str = G.nodes[node].get('pos', '')  # Use .get to avoid KeyError if 'pos' does not exist
        if pos_str:  # Check if the position string is not empty
            G.nodes[node]['pos'] = list(map(int, pos_str.split(',')))
        else:
            G.nodes[node]['pos'] = []

    end_time = time.time()
    print(f"Graph loaded in {end_time - start_time:.2f} seconds. It has {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")
    return G

In [13]:
# Function to compute the partition with the Louvain method and build a dendrogram
def louvain_hierarchy(G):
    # Compute the initial partition
    partition = community_louvain.best_partition(G)
    dendro = [partition]

    # Build the hierarchy by iteratively applying Louvain method
    current_partition = partition.copy()
    level = 0
    while len(set(new_partition.values())) > 50:
        level += 1
        # Build the induced graph from the current partition
        induced_graph = community_louvain.induced_graph(current_partition, G)
        # Compute the new partition on the induced graph
        new_partition = community_louvain.best_partition(induced_graph)

        # Map partition from the induced graph back to the original graph
        for node, community in current_partition.items():
            current_partition[node] = new_partition[community]

        dendro.append(current_partition.copy())

    return dendro

# Convert the hierarchical partitioning into a format suitable for scipy's dendrogram function
def convert_to_scipy_dendrogram_format(dendro):
    # Flatten the dendrogram structure
    flat_dendro = {node: level for level, partition in enumerate(dendro) for node, community in partition.items()}
    
    # Create linkage matrix
    unique_levels = np.unique(list(flat_dendro.values()))
    linkage_matrix = []
    for i in range(len(unique_levels)-1):
        linkage_matrix.append([unique_levels[i], unique_levels[i+1], 1, 1]) # dummy values for distance and sample count
    
    return np.array(linkage_matrix)


In [10]:
G = load_graph("spotify_AugWeek1.graphml")

Loading graph from spotify_AugWeek1.graphml...
Graph loaded in 5.08 seconds. It has 5420 nodes and 114579 edges.


In [14]:
dendro = louvain_hierarchy(G)

KeyboardInterrupt: 

This approach to creating a dendrogram is illustrative and simplifies the process for demonstration purposes. The dendrogram generated here does not convey the exact hierarchical clustering in a traditional sense but rather shows the hierarchy of communities merged at each level of the Louvain method's application.

In [15]:
# Generate the linkage matrix and plot the dendrogram
linkage_matrix = convert_to_scipy_dendrogram_format(dendro)
dendrogram(linkage_matrix)
plt.title('Community Hierarchical Dendrogram')
plt.show()

NameError: name 'dendro' is not defined