In [2]:
import networkx as nx
import numpy as np
import scipy as sp
from networkx.algorithms import community
from networkx.algorithms.community import greedy_modularity_communities
from networkx.algorithms.community import k_clique_communities
import pandas as pd
from tqdm import tqdm

G0 = nx.read_weighted_edgelist("4932.protein.links.v11.5.txt",comments="#",nodetype=str)
print(f"number of nodes in original dataset: ", len(G0.nodes))

#removing the prefix in proteins
map_dic = {}

for node in G0.nodes() :
    map_dic[node] = node[5:]
   
G = nx.relabel_nodes(G0, map_dic)

# remove essential proteins
essential_proteins = pd.read_csv("yeast essential proteins.csv", header=None)[1]
print()
print(essential_proteins)
G.remove_nodes_from(essential_proteins)
print(f"number of nodes after removing essential proteins: ", len(G.nodes))  

# delete those edges with a combined score of <= threshold_score (small confidence)
threshold_score = 500
for edge in G.edges: 
    weight = list(G.get_edge_data(edge[0],edge[1]).values())
    if(weight[0] <= threshold_score):
        G.remove_edge(edge[0],edge[1])
print("Number of edges after filtering over low score", len(G.edges))

number of nodes in original dataset:  6394

0         YAL001C
1         YAL003W
2         YAL012W
3         YAL025C
4         YAL032C
          ...    
1308    YKL138C-A
1309    YNL138W-A
1310    YNL024C-A
1311    YHR199C-A
1312    YIL102C-A
Name: 1, Length: 1313, dtype: object
number of nodes after removing essential proteins:  5098
Number of edges after filtering over low score 99189


Proteins connected to all 4 proteins in the first community

In [3]:
protein1 = ['YER178W', 'YBR221C', 'YNL071W', 'YFL018C']
protein2 = ['YOR090C', 'YIL042C', 'YGL059W']

protein1_neighbours = []

for p in protein1:
    s = set()
    for n in G[p]:
        s.add(n)
    protein1_neighbours.append(s)
set.intersection(*protein1_neighbours)

{'YAL044C',
 'YBL099W',
 'YBR218C',
 'YCR005C',
 'YCR079W',
 'YDL078C',
 'YDL080C',
 'YDR148C',
 'YDR430C',
 'YGR087C',
 'YGR193C',
 'YIL125W',
 'YJL045W',
 'YJL046W',
 'YJR121W',
 'YKL085W',
 'YKL148C',
 'YKR097W',
 'YLR044C',
 'YLR134W',
 'YLR304C',
 'YNL037C',
 'YNR001C',
 'YOL126C',
 'YOR065W',
 'YOR090C',
 'YOR136W',
 'YOR142W',
 'YOR187W',
 'YPL262W',
 'YPR001W'}

Proteins connected to all 3 proteins in the second community

In [4]:
protein2_neighbours = []

for p in protein2:
    s = set()
    for n in G[p]:
        s.add(n)
    protein2_neighbours.append(s)
set.intersection(*protein2_neighbours)

{'YBL056W', 'YBR221C', 'YCR079W', 'YER178W', 'YGR193C', 'YHR076W', 'YNL071W'}

Proteins connected to all 7 proteins

In [5]:
set.intersection(*protein1_neighbours, *protein2_neighbours)

{'YCR079W', 'YGR193C'}