In [1]:
# one needs to import those packages which are needed; best to be done at the beginning of the program.
import networkx as nx

import numpy as np
import scipy as sp

# some basic settings for plotting figures
import matplotlib.pyplot as plt
%matplotlib inline 
font = {'family' : 'DejaVu Sans',
        'weight' : 'bold',
        'size'   : 32}

plt.rc('font', **font)
import community as community_louvain



In [2]:
G0 = nx.read_weighted_edgelist("9606.protein.links.v11.5.txt",comments="#",nodetype=str)

In [3]:
#number of nodes of network
print('number of nodes of G0:',G0.number_of_nodes())

#number of edges of network
print('number of edges of G0:',G0.number_of_edges())

number of nodes of G0: 19385
number of edges of G0: 5969249


In [4]:
#get the largest component
largest_cc = max(nx.connected_components(G0),key=len)
G = G0.subgraph(largest_cc)
print('Type',type(largest_cc))
print('number of nodes of largest connected subgraph of G:',G.number_of_nodes())
print('number of edges of largest connected subgraph of G0:',G.number_of_edges())

Type <class 'set'>
number of nodes of largest connected subgraph of G: 19385
number of edges of largest connected subgraph of G0: 5969249


In [5]:
print("degree of target node: ",G0.degree('9606.ENSP00000375892'))

degree of target node:  1889


In [6]:
# compute degree sequence
degS=[G.degree()[node] for node in list(G.nodes())]
degS.sort()
degS=np.array(degS)

In [7]:
partLouvain = community_louvain.best_partition(G)
number_of_communities = max(partLouvain.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities)

# of partitions for Louvain modularity = 7


In [8]:
type(partLouvain)

dict

In [9]:
index_1=partLouvain['9606.ENSP00000375892']

3

In [10]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities = {} #empty dictionary
for i in range(number_of_communities):
    communities[i] = [] #create an empty list for each community

for name, community in partLouvain.items():
    communities[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities:
    print('The size of community #', list(communities.keys())[k], 'is ',len(communities[k]))

The size of community # 0 is  2706
The size of community # 1 is  3271
The size of community # 2 is  3524
The size of community # 3 is  3604
The size of community # 4 is  2973
The size of community # 5 is  2967
The size of community # 6 is  340


In [16]:
'9606.ENSP00000375892' in communities[index_1]

True

In [17]:
type(communities[index_1])

list

In [18]:
sub_1=G.subgraph(communities[index_1])

In [20]:
#number of nodes of network
print('number of nodes of sub_1:',sub_1.number_of_nodes())

#number of edges of network
print('number of edges of sub_1:',sub_1.number_of_edges())

number of nodes of sub_1: 3604
number of edges of sub_1: 650722


In [23]:
partLouvain_2 = community_louvain.best_partition(sub_1)
number_of_communities_2 = max(partLouvain_2.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_2)

# of partitions for Louvain modularity = 6


In [24]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_2 = {} #empty dictionary
for i in range(number_of_communities_2):
    communities_2[i] = [] #create an empty list for each community

for name, community in partLouvain_2.items():
    communities_2[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_2:
    print('The size of community_2 #', list(communities_2.keys())[k], 'is ',len(communities_2[k]))

The size of community_2 # 0 is  311
The size of community_2 # 1 is  603
The size of community_2 # 2 is  328
The size of community_2 # 3 is  835
The size of community_2 # 4 is  497
The size of community_2 # 5 is  1030


In [25]:
index_2=partLouvain_2['9606.ENSP00000375892']

In [28]:
sub_2=G.subgraph(communities_2[index_2])

In [29]:
#number of nodes of network
print('number of nodes of sub_2:',sub_2.number_of_nodes())

#number of edges of network
print('number of edges of sub_2:',sub_2.number_of_edges())

number of nodes of sub_2: 328
number of edges of sub_2: 18637


In [30]:
partLouvain_3 = community_louvain.best_partition(sub_2)
number_of_communities_3 = max(partLouvain_3.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_3)

# of partitions for Louvain modularity = 4


In [31]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_3 = {} #empty dictionary
for i in range(number_of_communities_3):
    communities_3[i] = [] #create an empty list for each community

for name, community in partLouvain_3.items():
    communities_3[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_3:
    print('The size of community_3 #', list(communities_3.keys())[k], 'is ',len(communities_3[k]))

The size of community_3 # 0 is  34
The size of community_3 # 1 is  88
The size of community_3 # 2 is  60
The size of community_3 # 3 is  146


In [32]:
index_3=partLouvain_3['9606.ENSP00000375892']

In [33]:
index_3

3

In [34]:
sub_3=G.subgraph(communities_3[index_3])

In [35]:
#number of nodes of network
print('number of nodes of sub_3:',sub_3.number_of_nodes())

#number of edges of network
print('number of edges of sub_3:',sub_3.number_of_edges())

number of nodes of sub_3: 146
number of edges of sub_3: 5046


In [36]:
partLouvain_4 = community_louvain.best_partition(sub_3)
number_of_communities_4 = max(partLouvain_4.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_4)

# of partitions for Louvain modularity = 5


In [37]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_4 = {} #empty dictionary
for i in range(number_of_communities_4):
    communities_4[i] = [] #create an empty list for each community

for name, community in partLouvain_4.items():
    communities_4[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_4:
    print('The size of community_4 #', list(communities_4.keys())[k], 'is ',len(communities_4[k]))

The size of community_4 # 0 is  44
The size of community_4 # 1 is  52
The size of community_4 # 2 is  12
The size of community_4 # 3 is  18
The size of community_4 # 4 is  20


In [38]:
index_4=partLouvain_4['9606.ENSP00000375892']

In [39]:
index_4

1

In [41]:
sub_4=G.subgraph(communities_4[index_4])

In [42]:
#number of nodes of network
print('number of nodes of sub_4:',sub_4.number_of_nodes())

#number of edges of network
print('number of edges of sub_4:',sub_4.number_of_edges())

number of nodes of sub_4: 52
number of edges of sub_4: 797


In [43]:
partLouvain_5 = community_louvain.best_partition(sub_4)
number_of_communities_5 = max(partLouvain_5.values())+1 #We add one because the indexing starts at 0.
print('# of partitions for Louvain modularity =',number_of_communities_5)

# of partitions for Louvain modularity = 4


In [44]:
# Let's construct a dictionary object called 'communities'. The keys will be the community labels and the values 
# will be a list of nodes in that community. The more experienced python users among you will probably see an 
# easier/faster way to do this.

communities_5 = {} #empty dictionary
for i in range(number_of_communities_5):
    communities_5[i] = [] #create an empty list for each community

for name, community in partLouvain_5.items():
    communities_5[community].append(name) #go through the computed partition and add each node to the appropriate list
    

# The dictionary we have constructed is similar to what the output of the Louvain algorithm in NetworkX would be. 
# In your own investigations you can decide what is more useful.

#Now let's find out how big each community is. You could accomplish this in the following way:
for k in communities_5:
    print('The size of community_5 #', list(communities_5.keys())[k], 'is ',len(communities_5[k]))

The size of community_5 # 0 is  21
The size of community_5 # 1 is  3
The size of community_5 # 2 is  17
The size of community_5 # 3 is  11


In [45]:
index_5=partLouvain_5['9606.ENSP00000375892']

In [46]:
index_5

3

In [47]:
print('The interesting proteins related to APK-2 are:')
for i in communities_5[index_5]:
    pro=i.lstrip('9606.')
    print(pro)

The interesting proteins related to APK-2 are:
ENSP00000385824
ENSP00000324806
ENSP00000363377
ENSP00000354558
ENSP00000228872
ENSP00000263826
ENSP00000375892
ENSP00000381070
ENSP00000340691
ENSP00000280154
ENSP00000225577
