In [1]:
import networkx as nx
import numpy as np
import scipy as sp
from networkx.algorithms import community
from networkx.algorithms.community import greedy_modularity_communities
from networkx.algorithms.community import k_clique_communities
import pandas as pd
from tqdm import tqdm

G0 = nx.read_weighted_edgelist("4932.protein.links.v11.5.txt",comments="#",nodetype=str)
print(f"number of nodes in original dataset: ", len(G0.nodes))

#removing the prefix in proteins
map_dic = {}

for node in G0.nodes() :
    map_dic[node] = node[5:]
   
G = nx.relabel_nodes(G0, map_dic)

# remove essential proteins
essential_proteins = pd.read_csv("yeast essential proteins.csv", header=None)[1]
print()
print(essential_proteins)
G.remove_nodes_from(essential_proteins)
print(f"number of nodes after removing essential proteins: ", len(G.nodes))  

# delete those edges with a combined score of <= threshold_score (small confidence)
threshold_score = 500
for edge in G.edges: 
    weight = list(G.get_edge_data(edge[0],edge[1]).values())
    if(weight[0] <= threshold_score):
        G.remove_edge(edge[0],edge[1])
print("Number of edges after filtering over low score", len(G.edges))

number of nodes in original dataset:  6394

0         YAL001C
1         YAL003W
2         YAL012W
3         YAL025C
4         YAL032C
          ...    
1308    YKL138C-A
1309    YNL138W-A
1310    YNL024C-A
1311    YHR199C-A
1312    YIL102C-A
Name: 1, Length: 1313, dtype: object
number of nodes after removing essential proteins:  5098
Number of edges after filtering over low score 99189


In [3]:
c = greedy_modularity_communities(G)

In [5]:
# number of communities
len(c)

327

In [6]:
# make a dict for protein -> community id
protein_community = dict()
for idx, s in enumerate(c):
    for p in s:
        protein_community[p]=idx

In [9]:
protein_interest = ['YER178W', 'YBR221C', 'YNL071W', 'YOR090C', 'YFL018C', 'YIL042C', 'YGL059W']
for p in protein_interest:
    print(p, protein_community[p])

YER178W 1
YBR221C 1
YNL071W 1
YOR090C 0
YFL018C 1
YIL042C 0
YGL059W 0
