In [1]:
import networkx as nx
import numpy as np
import scipy as sp
from networkx.algorithms import community
from networkx.algorithms.community import greedy_modularity_communities
from networkx.algorithms.community import k_clique_communities
from community import community_louvain
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
G0 = nx.read_weighted_edgelist("4932.protein.links.v11.5.txt",comments="#",nodetype=str)
print(f"number of nodes in original dataset: ", len(G0.nodes))

#removing the prefix in proteins
protein_info = pd.read_csv("Protein_info.txt", sep='\t')
map_dic = protein_info.set_index('#string_protein_id').to_dict()['preferred_name']
   
G = nx.relabel_nodes(G0, map_dic)

# remove essential proteins
essential_proteins = pd.read_csv("yeast essential proteins.csv", header=None)[1]
print()
print(essential_proteins)
G.remove_nodes_from(essential_proteins)
print(f"number of nodes after removing essential proteins: ", len(G.nodes))  

# delete those edges with a combined score of <= threshold_score (small confidence)
threshold_score = 500
for edge in G.edges: 
    weight = list(G.get_edge_data(edge[0],edge[1]).values())
    if(weight[0] <= threshold_score):
        G.remove_edge(edge[0],edge[1])

number of nodes in original dataset:  6394

0         YAL001C
1         YAL003W
2         YAL012W
3         YAL025C
4         YAL032C
          ...    
1308    YKL138C-A
1309    YNL138W-A
1310    YNL024C-A
1311    YHR199C-A
1312    YIL102C-A
Name: 1, Length: 1313, dtype: object
number of nodes after removing essential proteins:  6324


In [3]:
partLouvain = community_louvain.best_partition(G)
number_of_communities = max(partLouvain.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities)
communities = {} #empty dictionary
for i in range(number_of_communities):
    communities[i] = [] #create an empty list for each community

for name, community in partLouvain.items():
    communities[community].append(name) #go through the computed partition and add each node to the appropriate list

# of partitions for Louvain modularity = 308


In [4]:
protein_interest = ['PDA1', 'PDB1', 'LAT1', 'LPD1', 'PKP1', 'PKP2', 'PTC5']
communities_interest = set()

for p in protein_interest:
    print(f"protein {p} in community {partLouvain[p]}")
    communities_interest.add(partLouvain[p])

protein PDA1 in community 9
protein PDB1 in community 9
protein LAT1 in community 9
protein LPD1 in community 9
protein PKP1 in community 6
protein PKP2 in community 6
protein PTC5 in community 6


In [5]:
G_enz = G.subgraph(communities[list(communities_interest)[1]]) #subgraph of community with phosphatase and kinase

In [8]:
from cdlib.algorithms import leiden
# Apply Leiden on the PDH regulators 100 times
p_reg = protein_interest[4:]
in_same_comm = 0
prev_run = set()
for i in range(0, 100):
    communities = leiden(G_enz)
    idx = 0
    for c in communities.communities:
        counter = 0
        for p in p_reg:
            if p in c:
                counter += 1
        if counter == 3:
            if prev_run == {}:
                prev_run = prev_run | set(c)
            else:
                prev_run = prev_run & set(c)
            in_same_comm += 1
        idx += 1

print("PDH regulators in same community {} out of 100 times.".format(in_same_comm))
print("Common proteins in all the times the 3 regulators were in the same community: {}".format(len(prev_run)))
# Conclusion: Regulators were in the same community around (50-95)/100 times. However, there were no common
# proteins found in all of these runs. Hence, no new information.

PDH regulators in same community 86 out of 100 times.
Common proteins in all the times the 3 regulators were in the same community: 0
