In [2]:
import clusim.sim as csim
import leidenalg
from igraph import Graph
from clusim.clustering import Clustering, print_clustering
import pandas as pd
from infomap import Infomap
import numpy as np
import xnetwork as xn
import graph_tool as gt
import graph_tool.inference as gtInference
from graph_tool import Graph as gtGraph
from pathlib import Path 
from tqdm.auto import tqdm
from multiprocessing import Pool
from functools import partial
from sklearn.decomposition import PCA
from collections import defaultdict
from functools import partial

In [2]:
data_demo=pd.read_csv('/Users/fsfatemi/local_AD/KBASE_datashare/demo_final2024.merge_phenotype.506.csv')
data_demo.head()

Unnamed: 0,Last.Name,Collection_time,Timepoint.Label,AmyPos,Barcode,Batch,RIN.Value,Sex,Edu,APOE,APOEGrp,EnrollGrp,Age,DX,Visit.Label,GlCtx_CL,GlCtx_CL_log
0,BR0001,BL,0,0.0,42428646,7th,8.4,0,12,E3/3,0,MCI,64,3,0,1.1466,0.059412
1,BR0003,BL,0,0.0,42428655,5th,8.2,1,18,E3/3,0,MCI,81,3,0,1.1505,0.060887
2,BR0004,BL,0,1.0,42428635,10th,8.2,0,6,E3/4,1,MCI,71,3,0,1.7933,0.253653
3,BR0005,BL,0,1.0,42428621,8th,8.2,1,12,E3/4,1,MCI,77,3,0,1.7929,0.253556
4,BR0009,BL,0,0.0,42428626,3th,2.4,0,6,E3/3,0,MCI,67,3,0,1.1844,0.073498


In [None]:
data_GlCtx_CL_log=data_demo[['Barcode','GlCtx_CL_log']]
data_GlCtx_CL_log.set_index('Barcode', inplace=True)
data_GlCtx_CL_log= data_GlCtx_CL_log.T
data_GlCtx_CL_log.head()

In [3]:
data_deposition=pd.read_csv('/Users/fsfatemi/local_AD/KBASE_datashare/residuals_cqn_cpm.13603_1140.csv')
data_deposition.set_index('Barcode', inplace=True)
data_deposition=data_deposition.T
data_deposition.head()

Barcode,42425466,42425485,42425503,42425507,42425517,42425526,42425536,42425546,42425554,42425569,...,42719148,42719156,42719178,42729014,42729035,42729099,42729103,42729190,42729209,42733238
ENSG00000000419.14,-0.025602,0.167509,0.013347,0.504928,0.025635,-0.174476,0.004188,-0.091964,0.114177,-0.287812,...,-0.029503,0.028182,-0.332341,0.054127,-0.041002,-0.134013,0.01759,0.015801,0.150996,0.02332
ENSG00000000457.14,-0.120398,-0.016713,-0.036949,0.135288,0.080433,-0.05173,-0.018096,-0.10271,0.121348,-0.091808,...,-0.173309,0.311596,-0.213821,-0.021312,-0.059251,-0.055336,0.052658,0.128468,0.072779,-0.148871
ENSG00000000460.17,0.045326,-0.15961,-0.325952,-0.074574,0.150633,-0.097574,0.082463,0.032035,0.168103,0.099374,...,0.169062,0.122287,0.053825,0.107441,-0.11771,-0.063597,-0.2178,0.088137,0.083201,0.022423
ENSG00000000938.13,0.035264,-0.176183,0.285365,0.263099,-0.082066,-0.130983,-0.046495,-0.031446,-0.109125,-0.094433,...,-0.200148,0.108318,0.113011,0.065125,0.13282,0.310166,0.09851,0.088131,0.265204,0.091941
ENSG00000000971.17,0.910862,-1.307548,0.250657,0.49994,-1.035806,1.059606,0.393797,-0.466189,-0.557025,0.469142,...,0.095053,-0.840107,1.236145,0.4812,1.027841,0.06312,-0.162505,1.234146,-0.063983,0.370594


In [71]:
def calculate_eigengene(values_df):


    beta_values_matrix = values_df.values
    pca = PCA(n_components=1)
    pca.fit(beta_values_matrix)
    eigengene_values = pca.components_
    return pd.DataFrame(eigengene_values,columns=data_deposition.columns)

In [82]:
def calculateCorrelations(cg_names):

    data_module=data_deposition.loc[data_deposition.index.isin(cg_names)]
    eigengene_df = calculate_eigengene(data_module)

    series1 = eigengene_df.iloc[0]
    series2 = data_GlCtx_CL_log[eigengene_df.columns].iloc[0]
    correlation = series1.corr(series2)
    
    return {"correlation": correlation}

In [8]:
def membership_to_clusters(membership_vector):

    clusters = defaultdict(set)

    for idx, cluster_id in enumerate(membership_vector):
        clusters[cluster_id].add(idx)
    

    cluster_list = [set(cluster) for cluster in clusters.values()]
    
    return cluster_list

In [78]:
resolutions = [0.001,0.005,0.01,0.1,1.0,0.5,5.0,10.0,20.0,50,100]
markovTimes = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,5,10]

def processnetwork(networkFile):

    g = xn.load(networkFile)
    entries = []

    for res in resolutions:

        #leiden weighted
        propertyName="Leiden_weighted_%f"%res
        membership_list=g.vs[propertyName]

        clusters = membership_to_clusters(membership_list)

        for c in clusters:
            cg_names=[g.vs[idx]['Label'] for idx in c]
            entries.append({"cluster": cg_names,
                            "cluster size":len(cg_names),
                            "resolution":res,
                            "network":networkFile.stem,
                            "method":'leiden_weighted',
                            })

            entries[-1].update(calculateCorrelations(cg_names))
        
        
        
        #leiden unweighted
        propertyName="Leiden_unweighted_%f"%res
        membership_list=g.vs[propertyName]

        clusters = membership_to_clusters(membership_list)

        for c in clusters:
            
            cg_names=[g.vs[idx]['Label'] for idx in c]
            entries.append({"cluster": cg_names,
                            "cluster size":len(cg_names),
                            "resolution":res,
                            "network":networkFile.stem,
                            "method":'leiden_unweighted',
                            })

            entries[-1].update(calculateCorrelations(cg_names))
    del g
    return entries

In [None]:


if __name__ == "__main__":
    networksWithCommunitiesPath = Path("KNN_NetworksWithCommunities")
    networkFiles = list(networksWithCommunitiesPath.glob("*.xnet"))
    print(networkFiles)
    allEntries = []
    for networkFile in networkFiles:
        entries = processnetwork(networkFile)
        allEntries += entries
    
    df = pd.DataFrame(allEntries)
    df.to_csv("Correlation_results/module_correlations_leiden_all.csv", index=False)

