In [1]:

import pandas as pd
import networkx as nx
import igraph as ig
import leidenalg
#import matplotlib.pyplot as plt

def get_leiden_clusters(dfpath,rnaname,algo,rs): 
    # Load the correlation matrix
    df = pd.read_csv(f'multicov_Gephi/{dfpath}', index_col=0)

    # # Replace negative values with zero
    # df[df < 0] = 0

    # # Set diagonal to zero
    # for i in range(df.shape[0]):
    #     df.iat[i, i] = 0

    # Create a graph from the correlation matrix
    G = nx.from_pandas_adjacency(df)

    # Convert the networkx graph to an igraph graph
    ig_graph = ig.Graph.Adjacency((df.values > 0).tolist())
    ig_graph.es['weight'] = df.values[df.values > 0].flatten()

    # print(ig_graph.es.attributes()) # check edge attributes- weight
    # print(ig_graph.ecount()) # check edge count

    # Define the resolution parameter
    # resolution_parameter = 1.0  # Adjust this to your needs
    # Map algorithm names to classes
    algo_dict = {
        'ModularityVertexPartition': leidenalg.ModularityVertexPartition,
        'RBConfigurationVertexPartition': leidenalg.RBConfigurationVertexPartition,
        'RBERVertexPartition': leidenalg.RBERVertexPartition,
        'CPMVertexPartition': leidenalg.CPMVertexPartition,
        'SurpriseVertexPartition': leidenalg.SurpriseVertexPartition,
        'SignificanceVertexPartition': leidenalg.SignificanceVertexPartition,
}

    # Run the Leiden algorithm with RBERVertexPartition and resolution parameter
    if algo=='SurpriseVertexPartition':
        partition = leidenalg.find_partition(ig_graph,
                                         algo_dict[algo], 
                                         weights='weight')
    else:
        partition = leidenalg.find_partition(ig_graph,
                                            algo_dict[algo], 
                                            weights='weight',
                                            resolution_parameter=rs)

    # Get a list of node names
    node_names = list(df.index)

    # for testing, printing out the clusters

    # for i, community in enumerate(partition):
    #     # Map the node indices in this community to names
    #     community_names = [node_names[node_index] for node_index in community]
        
    #     print(f'Cluster {i+1}:', community_names)

    # Initialize lists to hold labels and colors
    labels = []
    colors = []

    # Iterate over each community
    for i, community in enumerate(partition):
        # For each node in the community, append the node label and community number
        for node_index in community:
            labels.append(node_names[node_index])
            colors.append(i + 1)

    # Create a DataFrame
    df_out = pd.DataFrame({'Id': labels, 'Label': labels, 'Color': colors})

    # Write DataFrame to CSV
    df_out.to_csv(f'multicov_ld_nodes/{rnaname}_nodes_ld.csv', index=False)



In [2]:
import os

def get_filenames(directory, suffix):
    return [f for f in os.listdir(directory) if f.endswith(suffix)]

directory = 'multicov_Gephi'
suffix = '_ld_matrix.csv'
filenames = get_filenames(directory, suffix)


In [3]:
rnanames = [f[:-len(suffix)] for f in filenames]

In [5]:
len(rnanames)

18642

In [6]:
for n in range(len(filenames)):
    if n % 200 ==0:
        print(n)
    f=filenames[n]
    r=rnanames[n]
    get_leiden_clusters(f, r,algo='RBERVertexPartition', rs=1.5)
    

0
200
400
600
800
1000
1200
1400
1600
1800
2000
2200
2400
2600
2800
3000
3200
3400
3600
3800
4000
4200
4400
4600
4800
5000
5200
5400
5600
5800
6000
6200
6400
6600
6800
7000
7200
7400
7600
7800
8000
8200
8400
8600
8800
9000
9200
9400
9600
9800
10000
10200
10400
10600
10800
11000
11200
11400
11600
11800
12000
12200
12400
12600
12800
13000
13200
13400
13600
13800
14000
14200
14400
14600
14800
15000
15200
15400
15600
15800
16000
16200
16400
16600
16800
17000
17200
17400
17600
17800
18000
18200
18400
18600
