In [44]:
# one needs to import those packages which are needed; best to be done at the beginning of the program.
import networkx as nx
import networkx.algorithms.community as nx_comm
import numpy as np
import pandas as pd
import scipy as sp
import random as rn

# some basic settings for plotting figures
import matplotlib.pyplot as plt
%matplotlib inline 
font = {'family' : 'DejaVu Sans',
        'weight' : 'bold',
        'size'   : 32}

plt.rc('font', **font)
import community as community_louvain

In [45]:
G0 = nx.read_weighted_edgelist("4932.protein.links.v11.5.txt",comments="#",nodetype=str)

In [46]:
threshold_score = 400
for edge in G0.edges: 
    weight = list(G0.get_edge_data(edge[0],edge[1]).values())
    if(weight[0] <= threshold_score):
        G0.remove_edge(edge[0],edge[1])

In [47]:
#number of nodes of network
print('number of nodes of G0:',G0.number_of_nodes())

#number of edges of network
print('number of edges of G0:',G0.number_of_edges())

number of nodes of G0: 6394
number of edges of G0: 282074


In [48]:
print('Is the full G0 connected?',nx.connected.is_connected(G0))
print('How many connected subgraphs are there?',nx.connected.number_connected_components(G0))

Is the full G0 connected? False
How many connected subgraphs are there? 282


In [49]:
#get the largest component
largest_cc = max(nx.connected_components(G0),key=len)
G = G0.subgraph(largest_cc)
print('Type',type(largest_cc))
print('number of nodes of largest connected subgraph of G:',G.number_of_nodes())
print('number of edges of largest connected subgraph of G0:',G.number_of_edges())

Type <class 'set'>
number of nodes of largest connected subgraph of G: 6113
number of edges of largest connected subgraph of G0: 282074


In [50]:
ess=pd.read_csv("essential_pro.csv",header=None)

In [51]:
ess_pro=pd.Series.to_list(ess[1])

In [52]:
for i in range(len(ess_pro)):
    ess_pro[i]='4932.'+ess_pro[i]

In [53]:
#ess_pro

In [54]:
G0.remove_nodes_from(ess_pro)

In [55]:
#number of nodes of network
print('number of nodes of G0:',G0.number_of_nodes())

#number of edges of network
print('number of edges of G0:',G0.number_of_edges())

number of nodes of G0: 5098
number of edges of G0: 137012


In [56]:
nodes = nx.shortest_path(G0,'4932.YKL126W').keys()

In [57]:
G=G0.subgraph(nodes)

In [58]:
#number of nodes of network
print('number of nodes of G:',G.number_of_nodes())

#number of edges of network
print('number of edges of G:',G.number_of_edges())

number of nodes of G: 4827
number of edges of G: 137012


In [None]:
# constants
R = 50
N = 10

In [None]:
# time to define a parent class of network
class Network:

    def __init__(self, graph, homologue, centrality_method, partition_method="louvain"):
        self.graph = graph
        self.homologue = homologue
        self.partition_method = partition_method
        self.partitions = []
        self.homologue_communities = []
        # TODO: self.adjacent_communtieis = []
        self.centrality_method = centrality_method
        self.central_nodes = [] # { encoding : centrality }

    def get_partition(self, s):
        if self.partition_method == "louvain":
            return nx_comm.louvain_communities(self.graph, resolution=R, seed=s)
        # ...
    
    def find_partions_robust(self):
        for i in range(N):
            self.partition.append(self.get_partition(rn.seed(i)))
        # TODO: retrieve self.homologue_communities
 
    def central_nodes(self, i):
        if self.centrality_method == "degree":
            return nx.degree_centrality(self.homologue_communities[i])
        elif self.centrality_method == "betweenness":
            return nx.betweenness_centrality(self.homologue_communities[i])
        elif self.centrality_method == "eigenvector":
            return nx.eigenvector_centrality(self.homologue_communities[i])

    def get_central_nodes_robust(self):
        for i in range(N):
            self.central_nodes.append(self.central_nodes(i))
        # TODO: decide how to cross reference the results

In [59]:
partLouvain = community_louvain.best_partition(G)
number_of_communities = max(partLouvain.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities)

# of partitions for Louvain modularity = 11


In [60]:
community_louvain.modularity(partLouvain, G)

0.5430213963298715

In [61]:
def community_collector(comm):
    number_of_communities = max(comm.values())+1
    communities = {} #empty dictionary
    for i in range(number_of_communities):
        communities[i] = [] #create an empty list for each community

    for name, community in comm.items():
        communities[community].append(name) 
    return communities

In [62]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities = community_collector(partLouvain)
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities:
    print('The size of community #', list(communities.keys())[k], 'is ',len(communities[k]))

The size of community # 0 is  151
The size of community # 1 is  814
The size of community # 2 is  280
The size of community # 3 is  671
The size of community # 4 is  299
The size of community # 5 is  414
The size of community # 6 is  1070
The size of community # 7 is  68
The size of community # 8 is  643
The size of community # 9 is  403
The size of community # 10 is  14


In [63]:
index_1=partLouvain['4932.YKL126W']

In [64]:
sub_1=G.subgraph(communities[index_1])

In [65]:
len(sub_1.nodes)

1070

In [66]:
def max_keys(dict):
    max_value=max(dict.values())
    return [k for k,v in dict.items() if v == max_value]

In [67]:
def max_key_value(dict):
   key=max_keys(dict)[0]
   return dict[key]

In [68]:
deg_cen=nx.degree_centrality(sub_1)
bet_cen = nx.betweenness_centrality(sub_1)
eig_cen = nx.eigenvector_centrality(sub_1)

In [69]:
central_dict={"Degree cen":(max_keys(deg_cen),max_key_value(deg_cen)),"Betweeness cen":(max_keys(bet_cen),max_key_value(bet_cen)),"Eigenvector cen":(max_keys(eig_cen),max_key_value(eig_cen))}

In [70]:
central_dict

{'Degree cen': (['4932.YHR030C'], 0.15715622076707203),
 'Betweeness cen': (['4932.YHR030C'], 0.024232868111066726),
 'Eigenvector cen': (['4932.YHR030C'], 0.13984431493055566)}

In [71]:
dendrogram = community_louvain.generate_dendrogram(G)
# for level in range(1,len(dendrogram) - 1) :
#      print("partition at level", level, "is", community_louvain.partition_at_level(dendrogram, level))  # NOQA

In [72]:

# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities = community_collector(community_louvain.partition_at_level(dendrogram, 0))
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities:
    print('The size of community #', list(communities.keys())[k], 'is ',len(communities[k]))

The size of community # 0 is  23
The size of community # 1 is  5
The size of community # 2 is  13
The size of community # 3 is  4
The size of community # 4 is  13
The size of community # 5 is  132
The size of community # 6 is  13
The size of community # 7 is  8
The size of community # 8 is  426
The size of community # 9 is  20
The size of community # 10 is  9
The size of community # 11 is  9
The size of community # 12 is  4
The size of community # 13 is  22
The size of community # 14 is  8
The size of community # 15 is  108
The size of community # 16 is  17
The size of community # 17 is  7
The size of community # 18 is  20
The size of community # 19 is  7
The size of community # 20 is  6
The size of community # 21 is  48
The size of community # 22 is  5
The size of community # 23 is  5
The size of community # 24 is  11
The size of community # 25 is  5
The size of community # 26 is  558
The size of community # 27 is  359
The size of community # 28 is  4
The size of community # 29 is  6


In [73]:
index_1=community_louvain.partition_at_level(dendrogram, 0)['4932.YKL126W']

In [74]:
len(communities[index_1])

558

In [75]:
aa=nx_comm.louvain_partitions(G, weight='weight', resolution=2)

In [76]:
aa

<generator object louvain_partitions at 0x000001BDA66A7AE0>

In [77]:
nxLouvain=nx_comm.louvain_communities(G, resolution=5)
len(nxLouvain)

63

In [78]:
idx='x'
for i in range(len(nxLouvain)):
    #print('Community', i, 'has', len(nxLouvain[i]), 'nodes.')
    if '4932.YKL126W' in nxLouvain[i]:
        idx=i

In [79]:
idx

37

In [80]:
len(nxLouvain[idx])

50

In [81]:
sub1=G.subgraph(nxLouvain[idx])

In [82]:
deg_cen=nx.degree_centrality(sub1)
bet_cen = nx.betweenness_centrality(sub1)
eig_cen = nx.eigenvector_centrality(sub1)

In [87]:
sorted(deg_cen)

TypeError: 'by' is an invalid keyword argument for sort()

In [83]:
central_dict={"Degree cen":(max_keys(deg_cen),max_key_value(deg_cen)),
"Betweeness cen":(max_keys(bet_cen),max_key_value(bet_cen)),"Eigenvector cen":(max_keys(eig_cen),max_key_value(eig_cen))}

In [84]:
central_dict

{'Degree cen': (['4932.YJR066W'], 0.5510204081632653),
 'Betweeness cen': (['4932.YGR086C'], 0.22915778778192306),
 'Eigenvector cen': (['4932.YJR066W'], 0.28304958112739714)}

In [91]:
sorted(deg_cen.items(), key=lambda item: item[1],reverse=True)[0:5]

[('4932.YJR066W', 0.5510204081632653),
 ('4932.YML121W', 0.44897959183673464),
 ('4932.YGR163W', 0.44897959183673464),
 ('4932.YHR205W', 0.42857142857142855),
 ('4932.YCR027C', 0.3877551020408163)]

In [92]:
sorted(bet_cen.items(), key=lambda item: item[1],reverse=True)[0:5]

[('4932.YGR086C', 0.22915778778192306),
 ('4932.YCR027C', 0.15342685832806954),
 ('4932.YDR032C', 0.15306122448979592),
 ('4932.YDR490C', 0.1289983992309011),
 ('4932.YJR066W', 0.1258085684848829)]

In [93]:
sorted(eig_cen.items(), key=lambda item: item[1],reverse=True)[0:5]

[('4932.YJR066W', 0.28304958112739714),
 ('4932.YML121W', 0.2702305501599234),
 ('4932.YGR163W', 0.2702305501599234),
 ('4932.YKR007W', 0.23357059299548372),
 ('4932.YEL062W', 0.22767153758638944)]