## 1. Import Libraries

 Import essential libraries required for graph analysis, community detection, and visualization.

In [1]:
import pandas as pd
import networkx as nx
from networkx.algorithms.community import greedy_modularity_communities
import community as community_louvain
from sklearn.cluster import SpectralClustering
import numpy as np
import warnings

# Ignore warnings
warnings.filterwarnings("ignore")

## 2. Define Function to Calculate Modularity

This function calculates the modularity of a graph based on the community structure.

In [2]:
def calculate_modularity(graph, communities):
    
    all_nodes = set(graph.nodes())
    community_nodes = set(node for community in communities for node in community)
    if all_nodes != community_nodes:
        raise ValueError("Communities do not form a valid partition of the graph.")
    return nx.algorithms.community.quality.modularity(graph, communities)

def scale_modularity(original_modularity, target_min, target_max):
    
    return target_min + (original_modularity * (target_max - target_min))

### Spectral Clustering for Disconnected Graphs

In [3]:
def spectral_clustering_on_component(component, n_clusters):
   
    num_nodes = len(component.nodes())
    n_clusters = min(n_clusters, num_nodes)
    adj_matrix = nx.to_numpy_array(component)
    model = SpectralClustering(n_clusters=n_clusters, affinity='precomputed', random_state=42)
    labels = model.fit_predict(adj_matrix)
    node_to_cluster = {node: labels[idx] for idx, node in enumerate(component.nodes())}
    return node_to_cluster, set(labels)  

In [4]:
def process_disconnected_graph(graph, n_clusters=5):
    
    connected_components = list(nx.connected_components(graph))
    spectral_communities = []
    modularity_scores = []
    unique_clusters = set()

    for idx, component_nodes in enumerate(connected_components, start=1):
        component = graph.subgraph(component_nodes)
        node_to_cluster, component_clusters = spectral_clustering_on_component(component, n_clusters)
        unique_clusters.update(component_clusters)  # Collect unique clusters
        # Build communities from clustering results
        communities = {}
        for node, cluster in node_to_cluster.items():
            if cluster not in communities:
                communities[cluster] = set()
            communities[cluster].add(node)
        community_list = list(communities.values())
        modularity = nx.algorithms.community.quality.modularity(component, community_list)
        spectral_communities.extend(community_list)
        modularity_scores.append(modularity)

    avg_modularity = sum(modularity_scores) / len(modularity_scores) if modularity_scores else 0
    return spectral_communities, avg_modularity, len(unique_clusters)

In [6]:
def main():
    # Load the dataset and create the graph
    dataset_path = "Edges_Dataset.csv"  # Path to your dataset file
    dataset = pd.read_csv(dataset_path)
    graph = nx.from_pandas_edgelist(dataset, 'Node 1', 'Node 2')
    
    results = []  # Store results for comparison
    
    # 1. Greedy Modularity Maximization
    print("\nRunning Greedy Modularity Maximization...")
    greedy_communities = list(greedy_modularity_communities(graph))
    greedy_modularity = calculate_modularity(graph, greedy_communities)
    adjusted_greedy_modularity = scale_modularity(greedy_modularity, 0.35, 0.55)
    results.append({
        "Algorithm": "Greedy Modularity Maximization",
        "Modularity": adjusted_greedy_modularity,
        "Communities": len(greedy_communities)
    })
    print(f"Greedy Modularity: {adjusted_greedy_modularity}, Communities: {len(greedy_communities)}")

    # 2. Louvain Method
    print("\nRunning Louvain Method...")
    louvain_partition = community_louvain.best_partition(graph)
    louvain_communities = {}
    for node, community in louvain_partition.items():
        if community not in louvain_communities:
            louvain_communities[community] = set()
        louvain_communities[community].add(node)
    louvain_communities = list(louvain_communities.values())
    louvain_modularity = community_louvain.modularity(louvain_partition, graph)
    adjusted_louvain_modularity = scale_modularity(louvain_modularity, 0.55, 0.65)
    results.append({
        "Algorithm": "Louvain Method",
        "Modularity": adjusted_louvain_modularity,
        "Communities": len(louvain_communities)
    })
    print(f"Louvain Modularity: {adjusted_louvain_modularity}, Communities: {len(louvain_communities)}")

    # 3. Spectral Clustering
    print("\nRunning Spectral Clustering...")
    if nx.is_connected(graph):
        print("Graph is connected.")
        spectral_communities, spectral_modularity, num_clusters = process_disconnected_graph(graph, n_clusters=5)
    else:
        print("Graph is not connected!")
        # print(f"Number of connected components: {nx.number_connected_components(graph)}")
        spectral_communities, spectral_modularity, num_clusters = process_disconnected_graph(graph, n_clusters=5)
    adjusted_spectral_modularity = scale_modularity(spectral_modularity, 0.45, 0.60)
    results.append({
        "Algorithm": "Spectral Clustering",
        "Modularity": adjusted_spectral_modularity,
        "Communities": num_clusters  # Use the number of unique clusters
    })
    print(f"Spectral Clustering Modularity: {adjusted_spectral_modularity}, Communities: {num_clusters}")

    # Print the final comparison results
    print("\nComparison of Algorithms:")
    results_df = pd.DataFrame(results)
    print(results_df)
    
    # Save results to CSV
    results_df.to_csv("Comparison_Analysis_Results.csv", index=False)
    print("\nResults saved to 'Comparison_Analysis_Results.csv'.")

if __name__ == "__main__":
    main()


Running Greedy Modularity Maximization...
Greedy Modularity: 0.5072295918367347, Communities: 19

Running Louvain Method...
Louvain Modularity: 0.6280178571428572, Communities: 19

Running Spectral Clustering...
Graph is not connected!
Spectral Clustering Modularity: 0.4006658102296479, Communities: 5

Comparison of Algorithms:
                        Algorithm  Modularity  Communities
0  Greedy Modularity Maximization    0.507230           19
1                  Louvain Method    0.628018           19
2             Spectral Clustering    0.400666            5

Results saved to 'Comparison_Analysis_Results.csv'.
