In [1]:
# one needs to import those packages which are needed; best to be done at the beginning of the program.
import networkx as nx

import numpy as np
import scipy as sp

# some basic settings for plotting figures
import matplotlib.pyplot as plt
%matplotlib inline 
font = {'family' : 'DejaVu Sans',
        'weight' : 'bold',
        'size'   : 32}

plt.rc('font', **font)
import community as community_louvain



Read in the network containing all human proteins on Uniprot

In [2]:
G0 = nx.read_weighted_edgelist("9606.protein.links.v11.5.txt",comments="#",nodetype=str)

In [3]:
#number of nodes of network
print('number of nodes of G0:',G0.number_of_nodes())

#number of edges of network
print('number of edges of G0:',G0.number_of_edges())

number of nodes of G0: 19385
number of edges of G0: 5969249


In [4]:
#get the largest component
largest_cc = max(nx.connected_components(G0),key=len)
G = G0.subgraph(largest_cc)
print('Type',type(largest_cc))
print('number of nodes of largest connected subgraph of G:',G.number_of_nodes())
print('number of edges of largest connected subgraph of G0:',G.number_of_edges())

Type <class 'set'>
number of nodes of largest connected subgraph of G: 19385
number of edges of largest connected subgraph of G0: 5969249


In [5]:
print("degree of target node: ",G0.degree('9606.ENSP00000375892'))

degree of target node:  1889


In [6]:
# compute degree sequence
degS=[G.degree()[node] for node in list(G.nodes())]
degS.sort()
degS=np.array(degS)

# 1st Partition

In [7]:
partLouvain = community_louvain.best_partition(G)
number_of_communities = max(partLouvain.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities)

# of partitions for Louvain modularity = 7


In [8]:
type(partLouvain)

dict

In [9]:
index_1=partLouvain['9606.ENSP00000375892']

In [10]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities = {} #empty dictionary
for i in range(number_of_communities):
    communities[i] = [] #create an empty list for each community

for name, community in partLouvain.items():
    communities[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities:
    print('The size of community #', list(communities.keys())[k], 'is ',len(communities[k]))

The size of community # 0 is  3992
The size of community # 1 is  3031
The size of community # 2 is  2530
The size of community # 3 is  255
The size of community # 4 is  3894
The size of community # 5 is  2316
The size of community # 6 is  3367


In [11]:
'9606.ENSP00000375892' in communities[index_1]

True

In [12]:
type(communities[index_1])

list

Extracting the subgraph containing AKT2

In [13]:
sub_1=G.subgraph(communities[index_1])

In [14]:
#number of nodes of network
print('number of nodes of sub_1:',sub_1.number_of_nodes())

#number of edges of network
print('number of edges of sub_1:',sub_1.number_of_edges())

number of nodes of sub_1: 3894
number of edges of sub_1: 644093


# 2nd Partition

In [15]:
partLouvain_2 = community_louvain.best_partition(sub_1)
number_of_communities_2 = max(partLouvain_2.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_2)

# of partitions for Louvain modularity = 6


In [16]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_2 = {} #empty dictionary
for i in range(number_of_communities_2):
    communities_2[i] = [] #create an empty list for each community

for name, community in partLouvain_2.items():
    communities_2[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_2:
    print('The size of community_2 #', list(communities_2.keys())[k], 'is ',len(communities_2[k]))

The size of community_2 # 0 is  900
The size of community_2 # 1 is  1078
The size of community_2 # 2 is  356
The size of community_2 # 3 is  725
The size of community_2 # 4 is  297
The size of community_2 # 5 is  538


In [17]:
index_2=partLouvain_2['9606.ENSP00000375892']

Extracting the subgraph containing AKT2

In [18]:
sub_2=G.subgraph(communities_2[index_2])

In [19]:
#number of nodes of network
print('number of nodes of sub_2:',sub_2.number_of_nodes())

#number of edges of network
print('number of edges of sub_2:',sub_2.number_of_edges())

number of nodes of sub_2: 297
number of edges of sub_2: 17565


# 3rd Partition

In [20]:
partLouvain_3 = community_louvain.best_partition(sub_2)
number_of_communities_3 = max(partLouvain_3.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_3)

# of partitions for Louvain modularity = 6


In [21]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_3 = {} #empty dictionary
for i in range(number_of_communities_3):
    communities_3[i] = [] #create an empty list for each community

for name, community in partLouvain_3.items():
    communities_3[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_3:
    print('The size of community_3 #', list(communities_3.keys())[k], 'is ',len(communities_3[k]))

The size of community_3 # 0 is  16
The size of community_3 # 1 is  25
The size of community_3 # 2 is  89
The size of community_3 # 3 is  73
The size of community_3 # 4 is  61
The size of community_3 # 5 is  33


In [22]:
index_3=partLouvain_3['9606.ENSP00000375892']

In [23]:
index_3

4

Extracting the subgraph containing AKT2

In [24]:
sub_3=G.subgraph(communities_3[index_3])

In [25]:
#number of nodes of network
print('number of nodes of sub_3:',sub_3.number_of_nodes())

#number of edges of network
print('number of edges of sub_3:',sub_3.number_of_edges())

number of nodes of sub_3: 61
number of edges of sub_3: 1102


# 4th Partition

In [26]:
partLouvain_4 = community_louvain.best_partition(sub_3)
number_of_communities_4 = max(partLouvain_4.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_4)

# of partitions for Louvain modularity = 4


In [27]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_4 = {} #empty dictionary
for i in range(number_of_communities_4):
    communities_4[i] = [] #create an empty list for each community

for name, community in partLouvain_4.items():
    communities_4[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_4:
    print('The size of community_4 #', list(communities_4.keys())[k], 'is ',len(communities_4[k]))

The size of community_4 # 0 is  7
The size of community_4 # 1 is  23
The size of community_4 # 2 is  18
The size of community_4 # 3 is  13


In [28]:
index_4=partLouvain_4['9606.ENSP00000375892']

In [29]:
index_4

2

Extracting the subgraph containing AKT2

In [30]:
sub_4=G.subgraph(communities_4[index_4])

In [31]:
#number of nodes of network
print('number of nodes of sub_4:',sub_4.number_of_nodes())

#number of edges of network
print('number of edges of sub_4:',sub_4.number_of_edges())

number of nodes of sub_4: 18
number of edges of sub_4: 126


# 5th Partition

In [32]:
partLouvain_5 = community_louvain.best_partition(sub_4)
number_of_communities_5 = max(partLouvain_5.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_5)

# of partitions for Louvain modularity = 2


In [33]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_5 = {} #empty dictionary
for i in range(number_of_communities_5):
    communities_5[i] = [] #create an empty list for each community

for name, community in partLouvain_5.items():
    communities_5[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_5:
    print('The size of community_5 #', list(communities_5.keys())[k], 'is ',len(communities_5[k]))

The size of community_5 # 0 is  5
The size of community_5 # 1 is  13


In [34]:
index_5=partLouvain_5['9606.ENSP00000375892']

The index of the community containing AKT2 is:

In [35]:
index_5

0

The community is reduced to a manageable size (11) so we can extract the proteins.

In [36]:
print('The interesting proteins related to APK-2 are:')
for i in communities_5[index_5]:
    pro=i.lstrip('9606.')
    print(pro)

The interesting proteins related to APK-2 are:
ENSP00000367830
ENSP00000375892
ENSP00000303830
ENSP00000344220
ENSP00000263915


In [37]:
G_cluster = {}

for key in communities_5.keys():
    G_cluster[key] = G0.subgraph(communities_5[key])

Centrality of nodes in the community

In [57]:
def max_keys(dict):
    max_value=max(dict.values())
    return [k for k,v in dict.items() if v == max_value]

In [63]:
def max_key_value(dict):
   key=max_keys(dict)[0]
   return dict[key]

In [45]:
deg_cen=nx.degree_centrality(G_cluster[index_5])

In [46]:
deg_cen

{'9606.ENSP00000367830': 1.0,
 '9606.ENSP00000375892': 1.0,
 '9606.ENSP00000303830': 1.0,
 '9606.ENSP00000344220': 1.0,
 '9606.ENSP00000263915': 1.0}

In [47]:
bet_cen = nx.betweenness_centrality(G_cluster[index_5])

In [48]:
bet_cen

{'9606.ENSP00000367830': 0.0,
 '9606.ENSP00000375892': 0.0,
 '9606.ENSP00000303830': 0.0,
 '9606.ENSP00000344220': 0.0,
 '9606.ENSP00000263915': 0.0}

In [49]:
eig_cen = nx.eigenvector_centrality(G_cluster[index_5])

In [50]:
eig_cen

{'9606.ENSP00000367830': 0.447213595499958,
 '9606.ENSP00000375892': 0.447213595499958,
 '9606.ENSP00000303830': 0.447213595499958,
 '9606.ENSP00000344220': 0.447213595499958,
 '9606.ENSP00000263915': 0.447213595499958}

In [64]:
central_dict={"Degree cen":(max_keys(deg_cen),max_key_value(deg_cen)),"Betweeness cen":(max_keys(bet_cen),max_key_value(bet_cen)),"Eigenvector cen":(max_keys(eig_cen),max_key_value(eig_cen))}

In [65]:
central_dict

{'Degree cen': (['9606.ENSP00000367830',
   '9606.ENSP00000375892',
   '9606.ENSP00000303830',
   '9606.ENSP00000344220',
   '9606.ENSP00000263915'],
  1.0),
 'Betweeness cen': (['9606.ENSP00000367830',
   '9606.ENSP00000375892',
   '9606.ENSP00000303830',
   '9606.ENSP00000344220',
   '9606.ENSP00000263915'],
  0.0),
 'Eigenvector cen': (['9606.ENSP00000367830',
   '9606.ENSP00000375892',
   '9606.ENSP00000303830',
   '9606.ENSP00000344220',
   '9606.ENSP00000263915'],
  0.447213595499958)}

In [38]:
G_cluster

{0: <networkx.classes.graph.Graph at 0x1b6a7807a90>,
 1: <networkx.classes.graph.Graph at 0x1b6a7805960>}

find the communities which have links to the community of the target protein

In [39]:
#edges dict with community label as key and (0,1) for edge/no-edge as value
edges = {}
for i in range(number_of_communities_5-1):
    for j in range(i+1,number_of_communities_5):
        edges[str(i)+str(j)] = 0

for i in range(number_of_communities_5-1):
    for node in G_cluster[i].nodes():
        for neighbor in G0.neighbors(node):
            for j in range(i+1,number_of_communities_5):
                if neighbor in communities_5[j]:
                    edges[str(i)+str(j)] += 1

print(edges) # convention: 'ij' denotes the edge between node(=community) i and node(=community) j.

{'01': 52}


In [40]:
# Find the communities which have links to the community of the target protein
neighbor_community = []
for i in range(number_of_communities_5):
    if i < index_5:
        if edges[str(i)+str(index_5)] != 0:
            neighbor_community.append(i)
    if i > index_5:
        if edges[str(index_5)+str(i)] != 0:
            neighbor_community.append(i)
        
print(neighbor_community)

[1]


Centrality of nodes in neighbouring community

In [51]:
com=G.subgraph(communities_5[1])
deg_cen_nei=nx.degree_centrality(com)


In [52]:
deg_cen_nei

{'9606.ENSP00000378217': 0.75,
 '9606.ENSP00000262741': 0.8333333333333333,
 '9606.ENSP00000222254': 0.8333333333333333,
 '9606.ENSP00000480059': 0.5833333333333333,
 '9606.ENSP00000366563': 0.9166666666666666,
 '9606.ENSP00000352121': 1.0,
 '9606.ENSP00000439913': 0.5833333333333333,
 '9606.ENSP00000215912': 0.41666666666666663,
 '9606.ENSP00000289153': 1.0,
 '9606.ENSP00000361202': 0.9166666666666666,
 '9606.ENSP00000263967': 1.0,
 '9606.ENSP00000268035': 0.9166666666666666,
 '9606.ENSP00000304895': 0.9166666666666666}

In [53]:
bet_cen_nei=nx.betweenness_centrality(com)

In [54]:
bet_cen_nei

{'9606.ENSP00000378217': 0.0,
 '9606.ENSP00000262741': 0.01515151515151515,
 '9606.ENSP00000222254': 0.01515151515151515,
 '9606.ENSP00000480059': 0.0,
 '9606.ENSP00000366563': 0.01515151515151515,
 '9606.ENSP00000352121': 0.0404040404040404,
 '9606.ENSP00000439913': 0.0,
 '9606.ENSP00000215912': 0.0,
 '9606.ENSP00000289153': 0.0404040404040404,
 '9606.ENSP00000361202': 0.01515151515151515,
 '9606.ENSP00000263967': 0.0404040404040404,
 '9606.ENSP00000268035': 0.01515151515151515,
 '9606.ENSP00000304895': 0.01515151515151515}

In [55]:
eig_cen_nei=nx.eigenvector_centrality(com)
eig_cen_nei

{'9606.ENSP00000378217': 0.2650085582921057,
 '9606.ENSP00000262741': 0.2780732558041615,
 '9606.ENSP00000222254': 0.2780732558041615,
 '9606.ENSP00000480059': 0.2106812492294767,
 '9606.ENSP00000366563': 0.30250631490069146,
 '9606.ENSP00000352121': 0.3155710124127473,
 '9606.ENSP00000439913': 0.2106812492294767,
 '9606.ENSP00000215912': 0.14680715734015237,
 '9606.ENSP00000289153': 0.3155710124127473,
 '9606.ENSP00000361202': 0.30250631490069146,
 '9606.ENSP00000263967': 0.3155710124127473,
 '9606.ENSP00000268035': 0.30250631490069146,
 '9606.ENSP00000304895': 0.30250631490069146}

In [56]:
max(eig_cen_nei)

'9606.ENSP00000480059'