In [16]:
# one needs to import those packages which are needed; best to be done at the beginning of the program.
import networkx as nx

import numpy as np
import scipy as sp

# some basic settings for plotting figures
import matplotlib.pyplot as plt
%matplotlib inline 
font = {'family' : 'DejaVu Sans',
        'weight' : 'bold',
        'size'   : 32}

plt.rc('font', **font)

import community as community_louvain

Read in the network containing all human proteins on Uniprot

In [17]:
G0 = nx.read_weighted_edgelist("9606.protein.links.v11.5.txt",comments="#",nodetype=str)

In [18]:
#number of nodes of network
print('number of nodes of G0:',G0.number_of_nodes())

#number of edges of network
print('number of edges of G0:',G0.number_of_edges())

number of nodes of G0: 19385
number of edges of G0: 5969249


In [19]:
#get the largest component
largest_cc = max(nx.connected_components(G0),key=len)
G = G0.subgraph(largest_cc)
print('Type',type(largest_cc))
print('number of nodes of largest connected subgraph of G:',G.number_of_nodes())
print('number of edges of largest connected subgraph of G0:',G.number_of_edges())

Type <class 'set'>
number of nodes of largest connected subgraph of G: 19385
number of edges of largest connected subgraph of G0: 5969249


In [20]:
print("degree of target node: ",G0.degree('9606.ENSP00000375892'))

degree of target node:  1889


In [21]:
# compute degree sequence
degS=[G.degree()[node] for node in list(G.nodes())]
degS.sort()
degS=np.array(degS)

# 1st Partition

In [22]:
partLouvain = community_louvain.best_partition(G)
number_of_communities = max(partLouvain.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities)

# of partitions for Louvain modularity = 7


In [23]:
type(partLouvain)

dict

In [24]:
index_1=partLouvain['9606.ENSP00000375892']

In [25]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities = {} #empty dictionary
for i in range(number_of_communities):
    communities[i] = [] #create an empty list for each community

for name, community in partLouvain.items():
    communities[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities:
    print('The size of community #', list(communities.keys())[k], 'is ',len(communities[k]))

The size of community # 0 is  3409
The size of community # 1 is  3197
The size of community # 2 is  3408
The size of community # 3 is  3809
The size of community # 4 is  2181
The size of community # 5 is  3143
The size of community # 6 is  238


In [26]:
'9606.ENSP00000375892' in communities[index_1]

True

In [27]:
type(communities[index_1])

list

Extracting the subgraph containing AKT2

In [28]:
sub_1=G.subgraph(communities[index_1])

In [29]:
#number of nodes of network
print('number of nodes of sub_1:',sub_1.number_of_nodes())

#number of edges of network
print('number of edges of sub_1:',sub_1.number_of_edges())

number of nodes of sub_1: 3409
number of edges of sub_1: 457211


# 2nd Partition

In [30]:
partLouvain_2 = community_louvain.best_partition(sub_1)
number_of_communities_2 = max(partLouvain_2.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_2)

# of partitions for Louvain modularity = 8


In [31]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_2 = {} #empty dictionary
for i in range(number_of_communities_2):
    communities_2[i] = [] #create an empty list for each community

for name, community in partLouvain_2.items():
    communities_2[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_2:
    print('The size of community_2 #', list(communities_2.keys())[k], 'is ',len(communities_2[k]))

The size of community_2 # 0 is  121
The size of community_2 # 1 is  34
The size of community_2 # 2 is  487
The size of community_2 # 3 is  623
The size of community_2 # 4 is  785
The size of community_2 # 5 is  167
The size of community_2 # 6 is  502
The size of community_2 # 7 is  690


In [32]:
index_2=partLouvain_2['9606.ENSP00000375892']

Extracting the subgraph containing AKT2

In [33]:
sub_2=G.subgraph(communities_2[index_2])

In [34]:
#number of nodes of network
print('number of nodes of sub_2:',sub_2.number_of_nodes())

#number of edges of network
print('number of edges of sub_2:',sub_2.number_of_edges())

number of nodes of sub_2: 623
number of edges of sub_2: 35826


# 3rd Partition

In [35]:
partLouvain_3 = community_louvain.best_partition(sub_2)
number_of_communities_3 = max(partLouvain_3.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_3)

# of partitions for Louvain modularity = 6


In [36]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_3 = {} #empty dictionary
for i in range(number_of_communities_3):
    communities_3[i] = [] #create an empty list for each community

for name, community in partLouvain_3.items():
    communities_3[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_3:
    print('The size of community_3 #', list(communities_3.keys())[k], 'is ',len(communities_3[k]))

The size of community_3 # 0 is  156
The size of community_3 # 1 is  196
The size of community_3 # 2 is  11
The size of community_3 # 3 is  150
The size of community_3 # 4 is  65
The size of community_3 # 5 is  45


In [37]:
index_3=partLouvain_3['9606.ENSP00000375892']

In [38]:
index_3

4

Extracting the subgraph containing AKT2

In [39]:
sub_3=G.subgraph(communities_3[index_3])

In [40]:
#number of nodes of network
print('number of nodes of sub_3:',sub_3.number_of_nodes())

#number of edges of network
print('number of edges of sub_3:',sub_3.number_of_edges())

number of nodes of sub_3: 65
number of edges of sub_3: 1281


# 4th Partition

In [41]:
partLouvain_4 = community_louvain.best_partition(sub_3)
number_of_communities_4 = max(partLouvain_4.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_4)

# of partitions for Louvain modularity = 4


In [42]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_4 = {} #empty dictionary
for i in range(number_of_communities_4):
    communities_4[i] = [] #create an empty list for each community

for name, community in partLouvain_4.items():
    communities_4[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_4:
    print('The size of community_4 #', list(communities_4.keys())[k], 'is ',len(communities_4[k]))

The size of community_4 # 0 is  23
The size of community_4 # 1 is  13
The size of community_4 # 2 is  19
The size of community_4 # 3 is  10


In [43]:
index_4=partLouvain_4['9606.ENSP00000375892']

In [44]:
index_4

2

Extracting the subgraph containing AKT2

In [45]:
sub_4=G.subgraph(communities_4[index_4])

In [46]:
#number of nodes of network
print('number of nodes of sub_4:',sub_4.number_of_nodes())

#number of edges of network
print('number of edges of sub_4:',sub_4.number_of_edges())

number of nodes of sub_4: 19
number of edges of sub_4: 143


# 5th Partition

In [47]:
partLouvain_5 = community_louvain.best_partition(sub_4)
number_of_communities_5 = max(partLouvain_5.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_5)

# of partitions for Louvain modularity = 3


In [48]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_5 = {} #empty dictionary
for i in range(number_of_communities_5):
    communities_5[i] = [] #create an empty list for each community

for name, community in partLouvain_5.items():
    communities_5[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_5:
    print('The size of community_5 #', list(communities_5.keys())[k], 'is ',len(communities_5[k]))

The size of community_5 # 0 is  7
The size of community_5 # 1 is  4
The size of community_5 # 2 is  8


In [49]:
index_5=partLouvain_5['9606.ENSP00000375892']

The index of the community containing AKT2 is:

In [50]:
index_5

2

The community is reduced to a manageable size (11) so we can extract the proteins.

In [51]:
print('The interesting proteins related to APK-2 are:')
for i in communities_5[index_5]:
    pro=i.lstrip('9606.')
    print(pro)

The interesting proteins related to APK-2 are:
ENSP00000375892
ENSP00000262719
ENSP00000263826
ENSP00000354558
ENSP00000171887
ENSP00000225577
ENSP00000308413
ENSP00000468280


In [52]:
G_cluster = {}

for key in communities_5.keys():
    G_cluster[key] = G0.subgraph(communities_5[key])

Centrality of nodes in the community

In [53]:
def max_keys(dict):
    max_value=max(dict.values())
    return [k for k,v in dict.items() if v == max_value]

In [54]:
def max_key_value(dict):
   key=max_keys(dict)[0]
   return dict[key]

In [55]:
deg_cen=nx.degree_centrality(G_cluster[index_5])

In [56]:
deg_cen

{'9606.ENSP00000263826': 1.0,
 '9606.ENSP00000354558': 1.0,
 '9606.ENSP00000225577': 1.0,
 '9606.ENSP00000171887': 1.0,
 '9606.ENSP00000375892': 1.0,
 '9606.ENSP00000262719': 1.0,
 '9606.ENSP00000308413': 0.8571428571428571,
 '9606.ENSP00000468280': 0.8571428571428571}

In [57]:
bet_cen = nx.betweenness_centrality(G_cluster[index_5])

In [58]:
bet_cen

{'9606.ENSP00000263826': 0.007936507936507936,
 '9606.ENSP00000354558': 0.007936507936507936,
 '9606.ENSP00000225577': 0.007936507936507936,
 '9606.ENSP00000171887': 0.007936507936507936,
 '9606.ENSP00000375892': 0.007936507936507936,
 '9606.ENSP00000262719': 0.007936507936507936,
 '9606.ENSP00000308413': 0.0,
 '9606.ENSP00000468280': 0.0}

In [59]:
eig_cen = nx.eigenvector_centrality(G_cluster[index_5])

In [60]:
eig_cen

{'9606.ENSP00000263826': 0.363456285407753,
 '9606.ENSP00000354558': 0.363456285407753,
 '9606.ENSP00000225577': 0.363456285407753,
 '9606.ENSP00000171887': 0.363456285407753,
 '9606.ENSP00000375892': 0.363456285407753,
 '9606.ENSP00000262719': 0.363456285407753,
 '9606.ENSP00000308413': 0.3220226479501,
 '9606.ENSP00000468280': 0.3220226479501}

In [61]:
central_dict={"Degree cen":(max_keys(deg_cen),max_key_value(deg_cen)),"Betweeness cen":(max_keys(bet_cen),max_key_value(bet_cen)),"Eigenvector cen":(max_keys(eig_cen),max_key_value(eig_cen))}

In [62]:
central_dict

{'Degree cen': (['9606.ENSP00000263826',
   '9606.ENSP00000354558',
   '9606.ENSP00000225577',
   '9606.ENSP00000171887',
   '9606.ENSP00000375892',
   '9606.ENSP00000262719'],
  1.0),
 'Betweeness cen': (['9606.ENSP00000263826',
   '9606.ENSP00000354558',
   '9606.ENSP00000225577',
   '9606.ENSP00000171887',
   '9606.ENSP00000375892',
   '9606.ENSP00000262719'],
  0.007936507936507936),
 'Eigenvector cen': (['9606.ENSP00000263826',
   '9606.ENSP00000354558',
   '9606.ENSP00000225577',
   '9606.ENSP00000171887',
   '9606.ENSP00000375892',
   '9606.ENSP00000262719'],
  0.363456285407753)}

In [63]:
G_cluster

{0: <networkx.classes.graph.Graph at 0x7fe0442576a0>,
 1: <networkx.classes.graph.Graph at 0x7fe044257b50>,
 2: <networkx.classes.graph.Graph at 0x7fe044257280>}

find the communities which have links to the community of the target protein

In [64]:
#edges dict with community label as key and (0,1) for edge/no-edge as value
edges = {}
for i in range(number_of_communities_5-1):
    for j in range(i+1,number_of_communities_5):
        edges[str(i)+str(j)] = 0

for i in range(number_of_communities_5-1):
    for node in G_cluster[i].nodes():
        for neighbor in G0.neighbors(node):
            for j in range(i+1,number_of_communities_5):
                if neighbor in communities_5[j]:
                    edges[str(i)+str(j)] += 1

print(edges) # convention: 'ij' denotes the edge between node(=community) i and node(=community) j.

{'01': 25, '02': 44, '12': 25}


In [65]:
# Find the communities which have links to the community of the target protein
neighbor_community = []
for i in range(number_of_communities_5):
    if i < index_5:
        if edges[str(i)+str(index_5)] != 0:
            neighbor_community.append(i)
    if i > index_5:
        if edges[str(index_5)+str(i)] != 0:
            neighbor_community.append(i)
        
print(neighbor_community)

[0, 1]


Centrality of nodes in neighbouring community

In [66]:
com=G.subgraph(communities_5[1])
deg_cen_nei=nx.degree_centrality(com)


In [67]:
deg_cen_nei

{'9606.ENSP00000379842': 1.0,
 '9606.ENSP00000344220': 1.0,
 '9606.ENSP00000340608': 0.6666666666666666,
 '9606.ENSP00000429022': 0.6666666666666666}

In [68]:
bet_cen_nei=nx.betweenness_centrality(com)

In [69]:
bet_cen_nei

{'9606.ENSP00000379842': 0.16666666666666666,
 '9606.ENSP00000344220': 0.16666666666666666,
 '9606.ENSP00000340608': 0.0,
 '9606.ENSP00000429022': 0.0}

In [70]:
eig_cen_nei=nx.eigenvector_centrality(com)
eig_cen_nei

{'9606.ENSP00000379842': 0.5573453897277424,
 '9606.ENSP00000344220': 0.5573453897277424,
 '9606.ENSP00000340608': 0.43516217270028296,
 '9606.ENSP00000429022': 0.43516217270028296}

In [71]:
max(eig_cen_nei)

'9606.ENSP00000429022'