In [1]:
import networkx as nx
import numpy as np
import scipy as sp
from networkx.algorithms import community
from networkx.algorithms.community import greedy_modularity_communities
from networkx.algorithms.community import k_clique_communities
from community import community_louvain
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import SpectralClustering
from sklearn import metrics
from tqdm import tqdm

In [39]:
G0 = nx.read_weighted_edgelist("4932.protein.links.v11.5.txt",comments="#",nodetype=str)
print(f"number of nodes in original dataset: ", len(G0.nodes))

#removing the prefix in proteins
protein_info = pd.read_csv("Protein_info.txt", sep='\t')
map_dic = protein_info.set_index('#string_protein_id').to_dict()['preferred_name']
   
G = nx.relabel_nodes(G0, map_dic)

# remove essential proteins
essential_proteins = pd.read_csv("yeast essential proteins.csv", header=None)[1]
print()
print(essential_proteins)
G.remove_nodes_from(essential_proteins)
print(f"number of nodes after removing essential proteins: ", len(G.nodes))  

# delete those edges with a combined score of <= threshold_score (small confidence)
threshold_score = 500
for edge in G.edges: 
    weight = list(G.get_edge_data(edge[0],edge[1]).values())
    if(weight[0] <= threshold_score):
        G.remove_edge(edge[0],edge[1])

number of nodes in original dataset:  6394

0         YAL001C
1         YAL003W
2         YAL012W
3         YAL025C
4         YAL032C
          ...    
1308    YKL138C-A
1309    YNL138W-A
1310    YNL024C-A
1311    YHR199C-A
1312    YIL102C-A
Name: 1, Length: 1313, dtype: object
number of nodes after removing essential proteins:  6324


# Original Community

In [40]:
partLouvain = community_louvain.best_partition(G)
number_of_communities = max(partLouvain.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities)
communities = {} #empty dictionary
for i in range(number_of_communities):
    communities[i] = [] #create an empty list for each community

for name, community in partLouvain.items():
    communities[community].append(name) #go through the computed partition and add each node to the appropriate list
print(number_of_communities)
for k in communities:
    print('The size of community #', list(communities.keys())[k], 'is ',len(communities[k]))

# of partitions for Louvain modularity = 309
309
The size of community # 0 is  1
The size of community # 1 is  1
The size of community # 2 is  1627
The size of community # 3 is  982
The size of community # 4 is  627
The size of community # 5 is  1
The size of community # 6 is  1055
The size of community # 7 is  376
The size of community # 8 is  121
The size of community # 9 is  373
The size of community # 10 is  1
The size of community # 11 is  20
The size of community # 12 is  1
The size of community # 13 is  1
The size of community # 14 is  337
The size of community # 15 is  507
The size of community # 16 is  1
The size of community # 17 is  1
The size of community # 18 is  1
The size of community # 19 is  1
The size of community # 20 is  1
The size of community # 21 is  1
The size of community # 22 is  1
The size of community # 23 is  1
The size of community # 24 is  1
The size of community # 25 is  1
The size of community # 26 is  1
The size of community # 27 is  1
The size of comm

# remove NFU1

In [21]:
# H = G.remove_node('NFU1')
H1 = G
H1.remove_node("NFU1")
H1


<networkx.classes.graph.Graph at 0x7f9a07c81940>

In [7]:
H
print(H)

None


In [22]:
partLouvain = community_louvain.best_partition(H1)
number_of_communities = max(partLouvain.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities)
communities1 = {} #empty dictionary
for i in range(number_of_communities):
    communities1[i] = [] #create an empty list for each community

for name, community in partLouvain.items():
    communities1[community].append(name) #go through the computed partition and add each node to the appropriate list

# of partitions for Louvain modularity = 308


In [23]:
print('When knocking out NFU1, the sizes of the communities are now:')
for k in communities1:
    print('The size of community #', list(communities1.keys())[k], 'is ',len(communities1[k]))

When knocking out NFU1, the sizes of the communities are now:
The size of community # 0 is  1572
The size of community # 1 is  1
The size of community # 2 is  928
The size of community # 3 is  407
The size of community # 4 is  1
The size of community # 5 is  397
The size of community # 6 is  1
The size of community # 7 is  1404
The size of community # 8 is  360
The size of community # 9 is  249
The size of community # 10 is  547
The size of community # 11 is  1
The size of community # 12 is  160
The size of community # 13 is  1
The size of community # 14 is  1
The size of community # 15 is  1
The size of community # 16 is  1
The size of community # 17 is  1
The size of community # 18 is  1
The size of community # 19 is  1
The size of community # 20 is  1
The size of community # 21 is  1
The size of community # 22 is  1
The size of community # 23 is  1
The size of community # 24 is  1
The size of community # 25 is  1
The size of community # 26 is  1
The size of community # 27 is  1
The 

# knockout LIP5

In [24]:
H2 = G
H2.remove_node("LIP5")
H2

<networkx.classes.graph.Graph at 0x7f9a07c81940>

In [28]:

partLouvain2 = community_louvain.best_partition(H2)
number_of_communities2 = max(partLouvain2.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities2)
communities2 = {} #empty dictionary
for i in range(number_of_communities2):
    communities2[i] = [] #create an empty list for each community

for name, community in partLouvain2.items():
    communities2[community].append(name) #go through the computed partition and add each node to the appropriate list
    
print('When knocking out LIP5, the sizes of the communities are now:')
for k in communities2:
    print('The size of community #', list(communities2.keys())[k], 'is ',len(communities2[k]))

# of partitions for Louvain modularity = 308
When knocking out LIP5, the sizes of the communities are now:
The size of community # 0 is  1
The size of community # 1 is  1
The size of community # 2 is  909
The size of community # 3 is  402
The size of community # 4 is  1564
The size of community # 5 is  400
The size of community # 6 is  1
The size of community # 7 is  1460
The size of community # 8 is  335
The size of community # 9 is  511
The size of community # 10 is  1
The size of community # 11 is  179
The size of community # 12 is  1
The size of community # 13 is  263
The size of community # 14 is  1
The size of community # 15 is  1
The size of community # 16 is  1
The size of community # 17 is  1
The size of community # 18 is  1
The size of community # 19 is  1
The size of community # 20 is  1
The size of community # 21 is  1
The size of community # 22 is  1
The size of community # 23 is  1
The size of community # 24 is  1
The size of community # 25 is  1
The size of community # 2

# Knocking out LIP1

In [29]:
H3 = G
H3.remove_node("LIP1")
H3

<networkx.classes.graph.Graph at 0x7f9a07c81940>

In [30]:

partLouvain3 = community_louvain.best_partition(H3)
number_of_communities3 = max(partLouvain3.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities3)
communities3 = {} #empty dictionary
for i in range(number_of_communities3):
    communities3[i] = [] #create an empty list for each community

for name, community in partLouvain3.items():
    communities3[community].append(name) #go through the computed partition and add each node to the appropriate list
    
print('When knocking out LIP5, the sizes of the communities are now:')
for k in communities3:
    print('The size of community #', list(communities3.keys())[k], 'is ',len(communities3[k]))

# of partitions for Louvain modularity = 309
When knocking out LIP5, the sizes of the communities are now:
The size of community # 0 is  1
The size of community # 1 is  1437
The size of community # 2 is  909
The size of community # 3 is  406
The size of community # 4 is  405
The size of community # 5 is  415
The size of community # 6 is  91
The size of community # 7 is  1585
The size of community # 8 is  324
The size of community # 9 is  1
The size of community # 10 is  430
The size of community # 11 is  1
The size of community # 12 is  20
The size of community # 13 is  1
The size of community # 14 is  1
The size of community # 15 is  1
The size of community # 16 is  1
The size of community # 17 is  1
The size of community # 18 is  1
The size of community # 19 is  1
The size of community # 20 is  1
The size of community # 21 is  1
The size of community # 22 is  1
The size of community # 23 is  1
The size of community # 24 is  1
The size of community # 25 is  1
The size of community # 2

# Knocking out DLAT

In [34]:
H4 = G
H4.remove_node("DLAT")
H4

NetworkXError: The node DLAT is not in the graph.

In [35]:
partLouvain4 = community_louvain.best_partition(H4)
number_of_communities4 = max(partLouvain4.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities4)
communities4 = {} #empty dictionary
for i in range(number_of_communities4):
    communities4[i] = [] #create an empty list for each community

for name, community in partLouvain4.items():
    communities4[community].append(name) #go through the computed partition and add each node to the appropriate list
    
print('When knocking out LIP5, the sizes of the communities are now:')
for k in communities3:
    print('The size of community #', list(communities4.keys())[k], 'is ',len(communities4[k]))

# of partitions for Louvain modularity = 309
When knocking out LIP5, the sizes of the communities are now:
The size of community # 0 is  244
The size of community # 1 is  1
The size of community # 2 is  1
The size of community # 3 is  903
The size of community # 4 is  404
The size of community # 5 is  1506
The size of community # 6 is  395
The size of community # 7 is  203
The size of community # 8 is  1455
The size of community # 9 is  340
The size of community # 10 is  1
The size of community # 11 is  1
The size of community # 12 is  1
The size of community # 13 is  1
The size of community # 14 is  28
The size of community # 15 is  547
The size of community # 16 is  1
The size of community # 17 is  1
The size of community # 18 is  1
The size of community # 19 is  1
The size of community # 20 is  1
The size of community # 21 is  1
The size of community # 22 is  1
The size of community # 23 is  1
The size of community # 24 is  1
The size of community # 25 is  1
The size of community # 