### Define some spatial node network try methods on

# Different Communitity Detection Methods Below 

### Liedian methods

#### Liedian method (ensemble - highest modularity)

Note that there is no average or consensus taken on the ensemble.

In [None]:
import copy as cp
import networkx as nx
import igraph as ig
import leidenalg
from cdlib import NodeClustering, evaluation, ensemble

# Custom Leiden wrapper that mimics a CDlib algorithm
def leiden_resolution_weighted(graph_nx, resolution_parameter):
    """
    Custom Leiden wrapper for ensemble that supports resolution_parameter and edge weights.
    Assumes edge weights are stored under the attribute 'bathyAve'.
    """

    # Convert NetworkX to igraph and include weights
    edges = [(u, v, d.get("bathyAve", 1.0)) for u, v, d in graph_nx.edges(data=True)]
    g_ig = ig.Graph.TupleList(edges, directed=False, weights=True)

    # Run Leiden with resolution and weights
    partition = leidenalg.find_partition(
        g_ig,
        leidenalg.RBConfigurationVertexPartition,
        resolution_parameter=resolution_parameter,
        weights=g_ig.es["weight"]
    )

    # Extract communities (assume numeric node names)
    communities = [set(g_ig.vs[idx]["name"] if "name" in g_ig.vs.attributes() else idx for idx in comm)
                   for comm in partition]

    return NodeClustering(
        communities=communities,
        graph=graph_nx,
        method_name="leiden_weighted",
        method_parameters={"resolution_parameter": resolution_parameter}
    )

# Define ensemble parameter to sweep
resolution = ensemble.Parameter(name="resolution_parameter", start=30, end=30.1, step=0.1)

# Run ensemble grid search with custom Leiden wrapper
ensembleLen = 100;
configurations = [];
methods = [];
for i in range(ensembleLen):
    methods.append(leiden_resolution_weighted);
    configurations.append([resolution]);
#print(methods)
#print(configurations)
highScore = 0
for communities, score in ensemble.pool_grid_filter(
    basins.G,
    methods=methods,
    configurations=configurations,
    quality_score=evaluation.erdos_renyi_modularity,
    aggregate=max
):
    print("Score: {0}, Communities: {1}".format(score, len(communities.communities)))
    # Keep communities from the highest scoring model
    if highScore < score.score: 
        highScore = cp.deepcopy(score.score);
        communitiesFinal = cp.deepcopy(communities);
        
print("Score: {0}, Communities: {1}".format(highScore, len(communitiesFinal.communities)))

    
    

#basins.G

#LDcommunities
pos = np.array([])

cnt=0
for node in basins.G.nodes(data=True):
    #print( np.array(node[1]['pos']) )
    posi = np.array(node[1]['pos'])
    pos = np.append(pos, posi)
    
pos = pos.reshape(len(pos)//2, 2)
basinID = np.ones(len(pos))
basinID[:] = np.nan;
for node in basins.G.nodes(data=True):
    cntCom = 0
    for comidx in range(len(communitiesFinal.communities)):
        if node[0] in communitiesFinal.communities[comidx]:
            basinID[node[0]] = cntCom
        elif comidx == len(communitiesFinal.communities):
            basinID = np.append(basinID, np.nan)
        cntCom+=1



len(pos), len(basinID), np.sum(np.isnan(basinID))

plt.scatter(pos[:,1], pos[:,0], c=basinID, s=1, cmap='jet')



#### Liedian method (ensemble - consensus)

Note that there is no average or highest modularity included in the consensus.

In [None]:
import networkx as nx
import igraph as ig
import leidenalg
import numpy as np
from sklearn.cluster import AgglomerativeClustering
from cdlib import NodeClustering

def consensus_leiden(graph_nx, resolution_parameter=1.0, weight_attr="bathyAve", runs=20, distance_threshold=0.25):
    """
    Consensus clustering from multiple Leiden runs with proper node name handling and configurable threshold.
    """
    # Stable node ordering
    nodes = sorted(graph_nx.nodes())
    n = len(nodes)
    node_to_idx = {node: i for i, node in enumerate(nodes)}
    idx_to_node = {i: node for node, i in node_to_idx.items()}

    # Build weighted edge list with consistent node labels
    edges = [(node_to_idx[u], node_to_idx[v], d.get(weight_attr, 1.0)) for u, v, d in graph_nx.edges(data=True)]
    g = ig.Graph()
    g.add_vertices(n)
    g.add_edges([(u, v) for u, v, w in edges])
    g.es["weight"] = [w for _, _, w in edges]
    g.vs["name"] = list(range(n))  # Stable index-named nodes

    # Initialize co-association matrix
    coassoc = np.zeros((n, n))

    for i in range(runs):
        part = leidenalg.find_partition(
            g,
            leidenalg.RBConfigurationVertexPartition,
            resolution_parameter=resolution_parameter,
            weights=g.es["weight"],
            seed=i
        )
        for community in part:
            for u in community:
                for v in community:
                    coassoc[u, v] += 1

    # Normalize co-association matrix
    coassoc /= runs

    # Convert to dissimilarity for clustering
    distance = 1.0 - coassoc

    # Use Agglomerative Clustering with better threshold control
    model = AgglomerativeClustering(
        metric="precomputed",
        linkage="average",
        distance_threshold=distance_threshold,
        n_clusters=None
    )
    labels = model.fit_predict(distance)

    # Group nodes by cluster labels
    consensus_communities = [[] for _ in range(max(labels)+1)]
    for idx, label in enumerate(labels):
        consensus_communities[label].append(idx_to_node[idx])

    # Convert to sets
    consensus_communities = [set(c) for c in consensus_communities]

    return NodeClustering(
        communities=consensus_communities,
        graph=graph_nx,
        method_name="consensus_leiden_fixed",
        method_parameters={
            "resolution_parameter": resolution_parameter,
            "runs": runs,
            "distance_threshold": distance_threshold
        }
    )



communitiesFinal = consensus_leiden(basins.G,
                                          resolution_parameter=0.6,
                                          distance_threshold=0.3,
                                          runs=50)
print(f"Consensus communities found: {len(communitiesFinal.communities)}")



pos = np.array([])

cnt=0
for node in basins.G.nodes(data=True):
    #print( np.array(node[1]['pos']) )
    posi = np.array(node[1]['pos'])
    pos = np.append(pos, posi)
    
pos = pos.reshape(len(pos)//2, 2)
basinID = np.ones(len(pos))
basinID[:] = np.nan;
for node in basins.G.nodes(data=True):
    cntCom = 0
    for comidx in range(len(communitiesFinal.communities)):
        if node[0] in communitiesFinal.communities[comidx]:
            basinID[node[0]] = cntCom
        elif comidx == len(communitiesFinal.communities):
            basinID = np.append(basinID, np.nan)
        cntCom+=1


len(pos), len(basinID), np.sum(np.isnan(basinID))

plt.scatter(pos[:,1], pos[:,0], c=basinID, s=1, cmap='jet')


## infomap methods

#### Composite: infomap + girvan-newman

In [None]:

class compositeAl():
    
    
    def __init__(self, basins):
        
        self.G = basins.G
        
    
    def run(self):
        # Hierarchical community detection method 
        #
        # Imports
        from collections import defaultdict
        import itertools
        from cdlib import algorithms

        communities = algorithms.infomap(basins.G)


        def lists_to_cells(nested_list):
            """
            Convert a list of lists into a list of sets (cells).

            Parameters:
                nested_list (list of list): A list where each inner list represents a group or community.

            Returns:
                list of set: A list of sets, where each set corresponds to a community.
            """
            return [set(group) for group in nested_list]

        LDcommunities = lists_to_cells(communities.communities)

        self.LDcommunities = cp.deepcopy(LDcommunities)
        self.LDcommunitiesUnaltered = cp.deepcopy(LDcommunities);

        self.communitiesFinal = self.LDcommunities;



        ## Mapping from node to community index from Louvain community detection
        node_to_comm = {}
        for idx, comm in enumerate(LDcommunities):
            for node in comm:
                node_to_comm[node] = idx


        # Construct new graph with Louvain community consolidated nodes
        self.Gnew = nx.Graph()

        # Add *all* communities as nodes, even if disconnected
        self.Gnew.add_nodes_from(range(len(LDcommunities)))  # One node per community index

        # Track summed weights between communities
        edge_weights = defaultdict(float)

        # Track unisolated louvain communities (communities that connect to other communities).
        unisolatedCommunities = np.array([]);
        smallCommunities = np.array([]);
        if not minBasinLargerThanSmallMergers:
            # Iterate over all edges in the original graph
            for u, v, data in self.G.edges(data=True):
                cu = node_to_comm[u]
                cv = node_to_comm[v]
                weight = data.get('bathyAve', 1.0)

                if cu != cv:
                    # Undirected: sort community pair to avoid duplicates
                    edge = tuple(sorted((cu, cv)))
                    edge_weights[edge] += weight

                    # Tracks louvain community ids that connect to other communities
                    if (unisolatedCommunities != cu).all() | (len(unisolatedCommunities)==0):
                        unisolatedCommunities = np.append(unisolatedCommunities, cu)
        elif minBasinLargerThanSmallMergers:
            print("\n\n\n\nminBasinLargerThanSmallMergers1\n\n\n\n")
            # Get the area weights and basinID
            # area = nx.get_node_attributes(self.G, "areaWeightm2")

            # basinID = nx.get_node_attributes(self.G, "basinID")

            # basinIDList = np.array( [basinID[idx]['basinID'] for idx in nx.get_node_attributes(self.G, "basinID")] )
            # areaList = np.array( [area[idx] for idx in nx.get_node_attributes(self.G, "basinID")] )

            # # Sum areas with same basinIDs.
            # sumCommunities = np.zeros(len(np.unique(basinIDList)))
            # for i in range(len(np.unique(basinIDList))):
            #     sumCommunities[int(i)] = np.sum(areaList[i==basinIDList])

            # Get the area weights and basinID
            area = nx.get_node_attributes(self.G, "areaWeightm2")
            areaList = np.array( [area[idx] for idx in nx.get_node_attributes(self.G, "areaWeightm2")] )

            # Sum areas with same basinIDs.
            sumCommunities = np.zeros(len(self.LDcommunities))
            for i in range(len(sumCommunities)):
                sumCommunities[int(i)] = np.sum( areaList[ np.array( list(self.LDcommunities[i]) ) ] )

            if detectionMethod['mergerPackage']['mergeSmallBasins']['thresholdMethod'] == "%":
                # Using % of spatial graph area

                # Define in percentage of total graph area.
                sumCommunitiesPercentage = 100*sumCommunities/np.sum(sumCommunities)

                # Make list of communities that are larger than the smallest merged community
                smallCommunities = ( sumCommunitiesPercentage>np.max(detectionMethod['mergerPackage']['mergeSmallBasins']['threshold']) )

                # print("\n\n\n\nminBasinLargerThanSmallMergers2\n\n\n\n")
                # print("np.max(detectionMethod['mergerPackage']['mergeSmallBasins']['threshold'])\n", np.max(detectionMethod['mergerPackage']['mergeSmallBasins']['threshold']))
                # print("\nsumCommunitiesPercentage\n",sumCommunitiesPercentage)
            else:
                # Using absolute values of spatial graph area (i.e., m2)

                # Make list of communities that are larger than the smallest merged community
                smallCommunities = ( sumCommunities>np.max(detectionMethod['mergerPackage']['mergeSmallBasins']['threshold']) )


        # Communities that share no edge with other community
        # Used for determining the number of unisolated communities
        # when using the girvan-newman algorithm.
        # print("\n\n\n\nsum(unisolatedCommunities): {}\n\n\n\n".format(np.sum(unisolatedCommunities)))
        # print("\n\n\n\nunisolatedCommunities: {}\n\n\n\n".format(unisolatedCommunities) )
        isolatedCommunitiesCnt = len(LDcommunities)- len(unisolatedCommunities)

        # Add weighted edges to Gnew
        for (cu, cv), edge_weight in edge_weights.items():
            self.Gnew.add_edge(cu, cv, bathyAve=edge_weight)

        # Apply Girvan–Newman algorithm to the simplified community graph
        communityCnt = isolatedCommunitiesCnt + minBasinCnt
        print("Louvain Communities ({0}), Target ({1}), Isolated Communities {2}".format(len(LDcommunities),communityCnt, isolatedCommunitiesCnt))
        import time

        timestamp1 = time.time();
        comp = nx.community.girvan_newman(self.Gnew, most_valuable_edge=mostCentralEdge)
        print( "Time: {} seconds".format(timestamp1-time.time()) ); timestamp1 = time.time();
        limited = itertools.takewhile(lambda c: len(c) <= communityCnt, comp)
        print( "Time: {} seconds".format(timestamp1-time.time()) ); timestamp1 = time.time();
        for communities in limited:
            GNcommunities = communities
        print( "Time: {} seconds".format(timestamp1-time.time()) ); timestamp1 = time.time();
        self.GNcommunities = GNcommunities

        # Map each Girvan–Newman community to its Louvain community
        louvain_to_gn = {}
        for idx, comm in enumerate(GNcommunities):
            for c in comm:
                louvain_to_gn[c] = idx

        # Map each original node to a Girvan–Newman community via its Louvain community
        print( "Time: {} seconds".format(timestamp1-time.time()) ); timestamp1 = time.time();
        commNodes = [{} for _ in range(len(LDcommunities))]
        for commL in louvain_to_gn:
            commGN = louvain_to_gn[commL];

            #print(LDcommunities[commL])
            try:
                # Do not comment out. If this code can run then commNodes[commGN]
                # has already been defined
                len(commNodes[commGN]);
                commNodes[commGN].update(LDcommunities[commL])
            except:
                commNodes[commGN] = LDcommunities[commL]

        # Redefine the node community structure using Louvain & Girvan Newman composite communities
        self.communitiesFinal = commNodes;

        print( "Time: {} seconds".format(timestamp1-time.time()) ); timestamp1 = time.time();




