In [1]:
import networkx as nx
import numpy as np
import scipy as sp
from networkx.algorithms import community
from networkx.algorithms.community import greedy_modularity_communities
from networkx.algorithms.community import k_clique_communities
from community import community_louvain
import pandas as pd

In [20]:
G0 = nx.read_weighted_edgelist("4932.protein.links.v11.5.txt",comments="#",nodetype=str)
print(f"number of nodes in original dataset: ", len(G0.nodes))

#removing the prefix in proteins
map_dic = {}

for node in G0.nodes() :
    map_dic[node] = node[5:]
   
G = nx.relabel_nodes(G0, map_dic)

# remove essential proteins
essential_proteins = pd.read_csv("yeast essential proteins.csv", header=None)[1]
print()
print(essential_proteins)
G.remove_nodes_from(essential_proteins)
print(f"number of nodes after removing essential proteins: ", len(G.nodes))    

number of nodes in original dataset:  6394

0         YAL001C
1         YAL003W
2         YAL012W
3         YAL025C
4         YAL032C
          ...    
1308    YKL138C-A
1309    YNL138W-A
1310    YNL024C-A
1311    YHR199C-A
1312    YIL102C-A
Name: 1, Length: 1313, dtype: object
number of nodes after removing essential proteins:  5098


In [21]:
partLouvain = community_louvain.best_partition(G)
number_of_communities = max(partLouvain.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities)

# of partitions for Louvain modularity = 41


In [23]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities = {} #empty dictionary
for i in range(number_of_communities):
    communities[i] = [] #create an empty list for each community

for name, community in partLouvain.items():
    communities[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities:
    print('The size of community #', list(communities.keys())[k], 'is ',len(communities[k]))
    

The size of community # 0 is  345
The size of community # 1 is  1027
The size of community # 2 is  1080
The size of community # 3 is  829
The size of community # 4 is  767
The size of community # 5 is  542
The size of community # 6 is  474
The size of community # 7 is  1
The size of community # 8 is  1
The size of community # 9 is  1
The size of community # 10 is  1
The size of community # 11 is  1
The size of community # 12 is  1
The size of community # 13 is  1
The size of community # 14 is  1
The size of community # 15 is  1
The size of community # 16 is  1
The size of community # 17 is  1
The size of community # 18 is  1
The size of community # 19 is  1
The size of community # 20 is  1
The size of community # 21 is  1
The size of community # 22 is  1
The size of community # 23 is  1
The size of community # 24 is  1
The size of community # 25 is  1
The size of community # 26 is  1
The size of community # 27 is  1
The size of community # 28 is  1
The size of community # 29 is  1
The 

In [28]:
# partLouvain is a dict where the keys are the node names (i.e. protein names) and values are the index of the community that the protein is part of

protein_interest = set(['YER178W', 'YBR221C', 'YNL071W', 'YOR090C', 'YFL018C', 'YIL042C', 'YGL059W'])
for p in protein_interest:
    print(f"protein {p} in community {partLouvain[p]}")


protein YIL042C in community 4
protein YGL059W in community 4
protein YNL071W in community 1
protein YER178W in community 1
protein YBR221C in community 1
protein YOR090C in community 4
protein YFL018C in community 1


In [30]:
# add proteins of interest in the list below
protein_interest = set(['YER178W', 'YBR221C', 'YNL071W', 'YOR090C', 'YFL018C', 'YIL042C', 'YGL059W'])
degree_dic = {}

for p in protein_interest:
    degree_dic[p] = float(G.degree(p))
    
print("degree of each protein")
dict(sorted(degree_dic.items(), key=lambda item: item[1]))

degree of each protein


{'YIL042C': 266.0,
 'YGL059W': 272.0,
 'YOR090C': 317.0,
 'YNL071W': 491.0,
 'YBR221C': 580.0,
 'YFL018C': 582.0,
 'YER178W': 596.0}