In [None]:
import sys
import pandas as pd
import numpy as np
import igraph as ig
import matplotlib.pyplot as plt
import seaborn as sns
from os import listdir
from scipy.stats import zscore
import leidenalg as la


sys.path.append("./utils")
import tchronet_utils


np.random.seed(10)

%matplotlib inline

Loading Network

In [None]:
network_path = 

In [None]:
# Load Edge List 
e = pd.read_hdf( network_path)
e_df = e.reset_index()

In [None]:
G = ig.Graph.TupleList(e_df.itertuples(index=False), directed=False, weights=None, edge_attrs = 'corr')

Create Cluster Tree

In [None]:
steps = [round(x, 2) for x in [0.1 * i for i in range(1, 21 + 1)]]

for x in steps :
    communities = la.find_partition( G , la.RBConfigurationVertexPartition  , resolution_parameter = x , weights = 'corr' , seed = 1234 ) #la.RBConfigurationVertexPartition la.CPMVertexPartition
    if x == 0.1 :
        data = {
            "Node": range(len(G.vs)),
            "Cluster_0.1": communities.membership
        }
        cluster_table = pd.DataFrame(data)

    else :
        cluster_table[ "Cluster_" + str(x)] = communities.membership

steps_plot = [round(x, 2) for x in [0.1 * i for i in range(1, 19)]]
tchronet_utils.plot_clusters_resolution(cluster_table , steps_plot)

Check modularity

In [None]:
resolution_values = np.arange(0.5, 2.6, 0.1)

modularity_scores = []
for x in list(resolution_values) :
    communities = la.find_partition( G , la.RBConfigurationVertexPartition  , resolution_parameter = x , weights = 'corr' , seed = 1234 )
    modularity = G.modularity(communities.membership)
    modularity_scores.append(modularity)

In [None]:
# Plot modularity and resolution
sns.barplot(x=modularity_scores , y=list(resolution_values) , orient = "h" , color = 'black' )
# Label the axes
plt.xlabel("Modularity")
plt.ylabel("Resolution")

plt.show()

Finding Communities

In [None]:
communities = la.find_partition( G , la.RBConfigurationVertexPartition  , resolution_parameter = 1.5 , weights = 'corr' , seed = 1234 ) # Change resolution_parameter for higher (e.g. 2) of lower resolution (e.g. 0.3 )

In [None]:
for x in communities :
    if len(x) > 100 :
        print(len(x))

In [None]:
# Example data
values =[ len(x) for x in communities if len(x)>100]

# Plot
sns.barplot(x=values , y=list(range(len(values))) , orient = "h" , color = 'black' )

# Label the axes
plt.xlabel("Communities Size")
plt.ylabel("Communities")

# plt.savefig("/mnt/nas-safu/analysis/PhDsdigiove/method_coAcces/data/CellReport/pictures/communities_size.png", format="png", dpi=300, bbox_inches="tight")

# Show the plot
plt.show()

Plot Trends

In [None]:
## Communities Spearman ROwnames
communities_list = get_communities_names(G , communities)

In [None]:
counts_path = 

In [None]:
data_matrix_df = pd.read_csv(counts_path , sep = "\t" , index_col = 0 )

plot_trends_zscore( communities_list , data_matrix_df , dim_y = 2 , dim_x=4 , custom_ylim=(-2 , 2))
# plt.savefig("/mnt/nas-safu/analysis/PhDsdigiove/method_coAcces/data/per_chr_norm/AllComm_allPeaks.png") 

Plot Annotations per community

In [None]:
homer_annotation_path = 

In [None]:
annotation = transform_annotation_homer(homer_annotation_path)
final_annotation = genomic_position_stackbar(communities_list, annotation)
plt.figure(figsize = (10 , 10 ))
#fig , ax = plt.subplots()
#fig.set_size_inches(18.5, 10.5)
final_annotation.groupby('community_numebr')['GenomicRegion'].value_counts(normalize=True).unstack('GenomicRegion').plot.bar(stacked=True ) 
plt.legend(loc = 'upper right' , bbox_to_anchor=(1.35, 0.75))

#plt.savefig('/mnt/nas-safu/analysis/PhDsdigiove/Vienna/Pictures/stacked_community.png', bbox_inches='tight' , dpi=300)