In [35]:
from utils.utils import *
from utils.visualization import *
from utils.communities_network import *
from utils.iterative_greedy_algorithm import IG
from utils.kmeans import *
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import time
from tqdm.notebook import tqdm
from concurrent.futures import ThreadPoolExecutor
import warnings
warnings.filterwarnings("ignore")

In [36]:
def kmeans(G, D, K, D_transformed , A, metric):

    for i in tqdm(range(1) , desc = f"K = {K}"):
        initial_seeds = local_expension(G, D, K)

        communities, labels = kmeans_clustering(
            D_transformed, K, D_transformed[initial_seeds])

        # Calculate the similarity-based modularity Qs

        if metric == "Mod":
            Qs = calculate_modularity(G, communities)
        elif metric == "QSim":
            Qs = calculate_Q_Sim(A, communities)

    return Qs , communities,labels, K

In [37]:

def local_expansion_kmeans_parallel(G: nx.Graph, A: np.ndarray, Kmin: int, Kmax: int, metric="Mod") -> list:
    """
    This function implements the local expansion k-means algorithm.
    It takes a weighted adjacency matrix A, minimum number of clusters Kmin, and maximum number of clusters Kmax.
    It returns the community set Cmax.
    """

    # Calculate the similarity matrix S using the weighted adjacency matrix A
    S = similarity_matrix(A)

    # Calculate the distance matrix D using S
    D = distance_matrix(S)

    D_transformed = PCA_reduction(D)

    Cmax = []
    Qmax = -1
    Kbest = Kmin
    labelsBest = []

    trace = []

    with ThreadPoolExecutor() as executor:
        
        futures = [executor.submit(kmeans, G, D, K, D_transformed, A, metric) for K in (range(Kmin, Kmax + 1))]

        for p,future in enumerate(futures):
            Qs, communities, labels, K = future.result()

            if Qs > Qmax:
                Qmax = Qs
                Cmax = communities
                Kbest = K
                labelsBest = labels
    
    return Cmax, Qmax, Kbest, labelsBest, trace


In [38]:

file_path = 'data/synth/LFR/0.50/network.dat'
G = nx.read_edgelist(file_path, nodetype=int)
original_nodes = list(G.nodes())
mapping = {node: i for i, node in enumerate(G.nodes())}
G = nx.relabel_nodes(G, mapping)
adj_matrix = nx.to_numpy_array(G)
adj_matrix.shape[0]


true_communities = read_community_labels_file_synth(
    "data/synth/LFR/0.50/community.dat")

In [40]:
Cmax, Qmax, Kbest, labelsBest, trace = local_expansion_kmeans_parallel(G, adj_matrix, 2, 10)

K = 5:   0%|          | 0/1 [00:00<?, ?it/s]

K = 2:   0%|          | 0/1 [00:00<?, ?it/s]

K = 3:   0%|          | 0/1 [00:00<?, ?it/s]

K = 9:   0%|          | 0/1 [00:00<?, ?it/s]

K = 4:   0%|          | 0/1 [00:00<?, ?it/s]

K = 6:   0%|          | 0/1 [00:00<?, ?it/s]

K = 7:   0%|          | 0/1 [00:00<?, ?it/s]

K = 8:   0%|          | 0/1 [00:00<?, ?it/s]

K = 10:   0%|          | 0/1 [00:00<?, ?it/s]