In [65]:
# one needs to import those packages which are needed; best to be done at the beginning of the program.
import networkx as nx
import networkx.algorithms.community as nx_comm
import numpy as np
import pandas as pd
import scipy as sp
import random as rn

# some basic settings for plotting figures
import matplotlib.pyplot as plt
%matplotlib inline 
font = {'family' : 'DejaVu Sans',
        'weight' : 'bold',
        'size'   : 32}

plt.rc('font', **font)
import community as community_louvain

In [15]:
# read the network
G0 = nx.read_weighted_edgelist("4932.protein.links.v11.5.txt",comments="#",nodetype=str)

In [16]:
# remove the edges below our chosen threshold
threshold_score = 400
for edge in G0.edges: 
    weight = list(G0.get_edge_data(edge[0],edge[1]).values())
    if(weight[0] <= threshold_score):
        G0.remove_edge(edge[0],edge[1])

In [19]:
# some basic information:
print('number of nodes of G0:',G0.number_of_nodes())
print('number of edges of G0:',G0.number_of_edges())
print('Is the full G0 connected?',nx.connected.is_connected(G0))
print('How many connected subgraphs are there?',nx.connected.number_connected_components(G0))
# largest subcomponent
largest_cc = max(nx.connected_components(G0),key=len)
G = G0.subgraph(largest_cc)
print('number of nodes of largest connected subgraph of G0:',G.number_of_nodes())
print('number of edges of largest connected subgraph of G0:',G.number_of_edges())

number of nodes of G0: 6394
number of edges of G0: 282074
Is the full G0 connected? False
How many connected subgraphs are there? 282
number of nodes of largest connected subgraph of G0: 6113
number of edges of largest connected subgraph of G0: 282074


In [20]:
# remove essential nodes
ess=pd.read_csv("essential_pro.csv",header=None)
ess_pro=pd.Series.to_list(ess[1])
for i in range(len(ess_pro)):
    ess_pro[i]='4932.'+ess_pro[i]
G0.remove_nodes_from(ess_pro)

In [21]:
# some more basic information:
print('number of nodes of G0:',G0.number_of_nodes())
print('number of edges of G0:',G0.number_of_edges())

number of nodes of G0: 5098
number of edges of G0: 137012


In [22]:
# take the nodes most closely connected to our protein of interest
nodes = nx.shortest_path(G0,'4932.YKL126W').keys()
G=G0.subgraph(nodes)

In [23]:
# some basic information about this subgraph
print('number of nodes of G:',G.number_of_nodes())
print('number of edges of G:',G.number_of_edges())

number of nodes of G: 4827
number of edges of G: 137012


In [30]:
# time to define a parent class of network
class Network:
    R = 50
    N = 10

    def __init__(self, graph, homologue, partition_method, centrality_method):
        self.graph = graph
        self.homologue = homologue
        self.partition_method = partition_method
        self.partitions = []
        self.homologue_communities = []
        # TODO: self.adjacent_communtieis = []
        self.centrality_method = centrality_method
        self.central_nodes = [] # { encoding : centrality }

    def get_partition(self, s):
        if self.partition_method == "louvain":
            return nx_comm.louvain_communities(self.graph, resolution=Network.R, seed=s)
        # ...

    def find_partitions_robust(self):
        self.partitions = [self.get_partition(rn.seed(i)) for i in range(Network.N)]
        # TODO: retrieve self.homologue_communities
 
    def get_central_nodes(self, i):
        if self.centrality_method == "degree":
            return nx.degree_centrality(self.homologue_communities[i])
        elif self.centrality_method == "betweenness":
            return nx.betweenness_centrality(self.homologue_communities[i])
        elif self.centrality_method == "eigenvector":
            return nx.eigenvector_centrality(self.homologue_communities[i])

    def get_central_nodes_robust(self):
        self.central_nodes = [self.get_central_nodes(i) for i in range(Network.N)]
        # TODO: decide how to cross reference the results

In [28]:
# get some information about the partition of the strongly connected graph
partLouvain = community_louvain.best_partition(G)

In [29]:
# some information about the subgraph's partition
print('# of partitions for Louvain modularity =',max(partLouvain.values())+1)
print(community_louvain.modularity(partLouvain, G))

# of partitions for Louvain modularity = 9
0.5435814018014984


In [42]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels
# and the values will be a list of nodes in that community.
def community_collector(comm):
    number_of_communities = max(comm.values())+1
    communities = {} #empty dictionary
    for i in range(number_of_communities):
        communities[i] = [] #create an empty list for each community

    for name, community in comm.items():
        communities[community].append(name) 
    return communities

communities = community_collector(partLouvain)

In [43]:
# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX
# would be. In your own investigations you can decide what is more useful.

# Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities:
    print('The size of community #', list(communities.keys())[k], 'is ',len(communities[k]))

The size of community # 0 is  645
The size of community # 1 is  273
The size of community # 2 is  346
The size of community # 3 is  308
The size of community # 4 is  400
The size of community # 5 is  1135
The size of community # 6 is  891
The size of community # 7 is  809
The size of community # 8 is  20


In [44]:
index_1=partLouvain['4932.YKL126W']
sub_1=G.subgraph(communities[index_1])

In [45]:
print(len(sub_1.nodes))

1135


In [46]:
def max_keys(dict):
    max_value=max(dict.values())
    return [k for k,v in dict.items() if v == max_value]

def max_key_value(dict):
   key=max_keys(dict)[0]
   return dict[key]

In [47]:
deg_cen = nx.degree_centrality(sub_1)
bet_cen = nx.betweenness_centrality(sub_1)
eig_cen = nx.eigenvector_centrality(sub_1)

In [48]:
central_dict={
    "Degree cen"        :   (max_keys(deg_cen),max_key_value(deg_cen)),
    "Betweeness cen"    :   (max_keys(bet_cen),max_key_value(bet_cen)),
    "Eigenvector cen"   :   (max_keys(eig_cen),max_key_value(eig_cen))
}
print(central_dict)

{'Degree cen': (['4932.YHR030C'], 0.14285714285714285), 'Betweeness cen': (['4932.YHR030C'], 0.01960049310913199), 'Eigenvector cen': (['4932.YHR030C'], 0.13661182974703667)}


In [50]:
dendrogram = community_louvain.generate_dendrogram(G)

# Let's construct a dictionary object called 'd_communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

d_communities = community_collector(community_louvain.partition_at_level(dendrogram, 0))
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in d_communities:
    print('The size of community #', list(d_communities.keys())[k], 'is ',len(d_communities[k]))

ind = community_louvain.partition_at_level(dendrogram, 0)['4932.YKL126W']
len(communities[ind])

The size of community # 0 is  9
The size of community # 1 is  6
The size of community # 2 is  12
The size of community # 3 is  36
The size of community # 4 is  7
The size of community # 5 is  9
The size of community # 6 is  36
The size of community # 7 is  18
The size of community # 8 is  4
The size of community # 9 is  18
The size of community # 10 is  5
The size of community # 11 is  13
The size of community # 12 is  94
The size of community # 13 is  32
The size of community # 14 is  2
The size of community # 15 is  7
The size of community # 16 is  65
The size of community # 17 is  18
The size of community # 18 is  2
The size of community # 19 is  36
The size of community # 20 is  21
The size of community # 21 is  9
The size of community # 22 is  6
The size of community # 23 is  4
The size of community # 24 is  35
The size of community # 25 is  5
The size of community # 26 is  6
The size of community # 27 is  6
The size of community # 28 is  3
The size of community # 29 is  7
The siz

KeyError: 40

In [53]:
print(community_louvain.partition_at_level(dendrogram, 0)['4932.YKL126W'])

40


In [67]:
nxLouvain=nx_comm.louvain_communities(G, resolution=5)
print(len(nxLouvain))
idx='x'
for i in range(len(nxLouvain)):
    if '4932.YKL126W' in nxLouvain[i]: idx=i
print(idx)
print(len(nxLouvain[idx]))

AttributeError: module 'networkx.algorithms.community' has no attribute 'louvain_communities'

In [81]:
sub1=G.subgraph(nxLouvain[idx])
deg_cen=nx.degree_centrality(sub1)
bet_cen = nx.betweenness_centrality(sub1)
eig_cen = nx.eigenvector_centrality(sub1)
print(sorted(deg_cen))

In [58]:
central_dict={"Degree cen":(max_keys(deg_cen),max_key_value(deg_cen)),
"Betweeness cen":(max_keys(bet_cen),max_key_value(bet_cen)),"Eigenvector cen":(max_keys(eig_cen),max_key_value(eig_cen))}
print(central_dict)

{'Degree cen': (['4932.YHR030C'], 0.14285714285714285), 'Betweeness cen': (['4932.YHR030C'], 0.01960049310913199), 'Eigenvector cen': (['4932.YHR030C'], 0.13661182974703667)}


In [57]:
print(sorted(deg_cen.items(), key=lambda item: item[1],reverse=True)[0:5])
print()
print(sorted(bet_cen.items(), key=lambda item: item[1],reverse=True)[0:5])
print()
print(sorted(eig_cen.items(), key=lambda item: item[1],reverse=True)[0:5])

[('4932.YHR030C', 0.14285714285714285), ('4932.YDL192W', 0.13051146384479717), ('4932.YML001W', 0.12698412698412698), ('4932.YBL016W', 0.12257495590828923), ('4932.YNL098C', 0.12081128747795414)]

[('4932.YHR030C', 0.01960049310913199), ('4932.YML001W', 0.01686450288370687), ('4932.YBR164C', 0.016137941371008116), ('4932.YNL098C', 0.015564708363344846), ('4932.YDR388W', 0.015217349939506165)]

[('4932.YHR030C', 0.13661182974703667), ('4932.YDL192W', 0.12609436063021143), ('4932.YBL016W', 0.12280312041458243), ('4932.YLR113W', 0.12264986267517693), ('4932.YER031C', 0.1172542500101329)]
