In [41]:
# one needs to import those packages which are needed; best to be done at the beginning of the program.
import networkx as nx

import numpy as np
import scipy as sp

# some basic settings for plotting figures
import matplotlib.pyplot as plt
%matplotlib inline 
font = {'family' : 'DejaVu Sans',
        'weight' : 'bold',
        'size'   : 32}

plt.rc('font', **font)
import community as community_louvain

Read in the network containing all human proteins on Uniprot

In [42]:
G0 = nx.read_weighted_edgelist("9606.protein.links.v11.5.txt",comments="#",nodetype=str)

In [43]:
#number of nodes of network
print('number of nodes of G0:',G0.number_of_nodes())

#number of edges of network
print('number of edges of G0:',G0.number_of_edges())

number of nodes of G0: 19385
number of edges of G0: 5969249


In [44]:
#get the largest component
largest_cc = max(nx.connected_components(G0),key=len)
G = G0.subgraph(largest_cc)
print('Type',type(largest_cc))
print('number of nodes of largest connected subgraph of G:',G.number_of_nodes())
print('number of edges of largest connected subgraph of G0:',G.number_of_edges())

Type <class 'set'>
number of nodes of largest connected subgraph of G: 19385
number of edges of largest connected subgraph of G0: 5969249


In [45]:
print("degree of target node: ",G0.degree('9606.ENSP00000375892'))

degree of target node:  1889


In [46]:
# compute degree sequence
degS=[G.degree()[node] for node in list(G.nodes())]
degS.sort()
degS=np.array(degS)

# 1st Partition

In [47]:
partLouvain = community_louvain.best_partition(G)
number_of_communities = max(partLouvain.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities)

# of partitions for Louvain modularity = 7


In [48]:
type(partLouvain)

dict

In [49]:
index_1=partLouvain['9606.ENSP00000375892']

In [50]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities = {} #empty dictionary
for i in range(number_of_communities):
    communities[i] = [] #create an empty list for each community

for name, community in partLouvain.items():
    communities[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities:
    print('The size of community #', list(communities.keys())[k], 'is ',len(communities[k]))

The size of community # 0 is  2649
The size of community # 1 is  3299
The size of community # 2 is  3605
The size of community # 3 is  3644
The size of community # 4 is  2028
The size of community # 5 is  3219
The size of community # 6 is  941


In [51]:
'9606.ENSP00000375892' in communities[index_1]

True

In [52]:
type(communities[index_1])

list

Extracting the subgraph containing AKT2

In [53]:
sub_1=G.subgraph(communities[index_1])

In [54]:
#number of nodes of network
print('number of nodes of sub_1:',sub_1.number_of_nodes())

#number of edges of network
print('number of edges of sub_1:',sub_1.number_of_edges())

number of nodes of sub_1: 3644
number of edges of sub_1: 641161


# 2nd Partition

In [55]:
partLouvain_2 = community_louvain.best_partition(sub_1)
number_of_communities_2 = max(partLouvain_2.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_2)

# of partitions for Louvain modularity = 6


In [56]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_2 = {} #empty dictionary
for i in range(number_of_communities_2):
    communities_2[i] = [] #create an empty list for each community

for name, community in partLouvain_2.items():
    communities_2[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_2:
    print('The size of community_2 #', list(communities_2.keys())[k], 'is ',len(communities_2[k]))

The size of community_2 # 0 is  1027
The size of community_2 # 1 is  254
The size of community_2 # 2 is  335
The size of community_2 # 3 is  842
The size of community_2 # 4 is  657
The size of community_2 # 5 is  529


In [57]:
index_2=partLouvain_2['9606.ENSP00000375892']

Extracting the subgraph containing AKT2

In [58]:
sub_2=G.subgraph(communities_2[index_2])

In [59]:
#number of nodes of network
print('number of nodes of sub_2:',sub_2.number_of_nodes())

#number of edges of network
print('number of edges of sub_2:',sub_2.number_of_edges())

number of nodes of sub_2: 335
number of edges of sub_2: 20296


# 3rd Partition

In [60]:
partLouvain_3 = community_louvain.best_partition(sub_2)
number_of_communities_3 = max(partLouvain_3.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_3)

# of partitions for Louvain modularity = 4


In [61]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_3 = {} #empty dictionary
for i in range(number_of_communities_3):
    communities_3[i] = [] #create an empty list for each community

for name, community in partLouvain_3.items():
    communities_3[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_3:
    print('The size of community_3 #', list(communities_3.keys())[k], 'is ',len(communities_3[k]))

The size of community_3 # 0 is  74
The size of community_3 # 1 is  81
The size of community_3 # 2 is  78
The size of community_3 # 3 is  102


In [62]:
index_3=partLouvain_3['9606.ENSP00000375892']

In [63]:
index_3

3

Extracting the subgraph containing AKT2

In [64]:
sub_3=G.subgraph(communities_3[index_3])

In [65]:
#number of nodes of network
print('number of nodes of sub_3:',sub_3.number_of_nodes())

#number of edges of network
print('number of edges of sub_3:',sub_3.number_of_edges())

number of nodes of sub_3: 102
number of edges of sub_3: 2362


# 4th Partition

In [66]:
partLouvain_4 = community_louvain.best_partition(sub_3)
number_of_communities_4 = max(partLouvain_4.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_4)

# of partitions for Louvain modularity = 4


In [67]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_4 = {} #empty dictionary
for i in range(number_of_communities_4):
    communities_4[i] = [] #create an empty list for each community

for name, community in partLouvain_4.items():
    communities_4[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_4:
    print('The size of community_4 #', list(communities_4.keys())[k], 'is ',len(communities_4[k]))

The size of community_4 # 0 is  38
The size of community_4 # 1 is  18
The size of community_4 # 2 is  38
The size of community_4 # 3 is  8


In [68]:
index_4=partLouvain_4['9606.ENSP00000375892']

In [69]:
index_4

2

Extracting the subgraph containing AKT2

In [70]:
sub_4=G.subgraph(communities_4[index_4])

In [71]:
#number of nodes of network
print('number of nodes of sub_4:',sub_4.number_of_nodes())

#number of edges of network
print('number of edges of sub_4:',sub_4.number_of_edges())

number of nodes of sub_4: 38
number of edges of sub_4: 497


# 5th Partition

In [72]:
partLouvain_5 = community_louvain.best_partition(sub_4)
number_of_communities_5 = max(partLouvain_5.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_5)

# of partitions for Louvain modularity = 3


In [73]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_5 = {} #empty dictionary
for i in range(number_of_communities_5):
    communities_5[i] = [] #create an empty list for each community

for name, community in partLouvain_5.items():
    communities_5[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_5:
    print('The size of community_5 #', list(communities_5.keys())[k], 'is ',len(communities_5[k]))

The size of community_5 # 0 is  14
The size of community_5 # 1 is  14
The size of community_5 # 2 is  10


In [74]:
index_5=partLouvain_5['9606.ENSP00000375892']

The index of the community containing AKT2 is:

In [75]:
index_5

1

The community is reduced to a manageable size (11) so we can extract the proteins.

In [76]:
print('The interesting proteins related to APK-2 are:')
for i in communities_5[index_5]:
    pro=i.lstrip('9606.')
    print(pro)

The interesting proteins related to APK-2 are:
ENSP00000354558
ENSP00000225577
ENSP00000361021
ENSP00000451828
ENSP00000367830
ENSP00000339577
ENSP00000263826
ENSP00000251849
ENSP00000344220
ENSP00000345629
ENSP00000375711
ENSP00000340691
ENSP00000201979
ENSP00000375892


In [77]:
G_cluster = {}

for key in communities_5.keys():
    G_cluster[key] = G0.subgraph(communities_5[key])

In [78]:
G_cluster

{0: <networkx.classes.graph.Graph at 0x186ec4c6710>,
 1: <networkx.classes.graph.Graph at 0x186ea587880>,
 2: <networkx.classes.graph.Graph at 0x186e9abc1f0>}

find the communities which have links to the community of the target protein

In [79]:
#edges dict with community label as key and (0,1) for edge/no-edge as value
edges = {}
for i in range(number_of_communities_5-1):
    for j in range(i+1,number_of_communities_5):
        edges[str(i)+str(j)] = 0

for i in range(number_of_communities_5-1):
    for node in G_cluster[i].nodes():
        for neighbor in G0.neighbors(node):
            for j in range(i+1,number_of_communities_5):
                if neighbor in communities_5[j]:
                    edges[str(i)+str(j)] += 1

print(edges) # convention: 'ij' denotes the edge between node(=community) i and node(=community) j.

{'01': 142, '02': 89, '12': 78}


In [80]:
# Find the communities which have links to the community of the target protein
neighbor_community = []
for i in range(number_of_communities_5):
    if i < index_5:
        if edges[str(i)+str(index_5)] != 0:
            neighbor_community.append(i)
    if i > index_5:
        if edges[str(index_5)+str(i)] != 0:
            neighbor_community.append(i)
        
print(neighbor_community)

[0, 2]
