In [1]:
import networkx as nx
import matplotlib.pyplot as plt
from collections import defaultdict
import community
from networkx.algorithms.community import greedy_modularity_communities
from networkx.algorithms.community import label_propagation_communities
import collections

In [2]:
# network files
pre_fname='../data/RectumMicrobiome_PrePost/stool_network_pre.gml.txt.network'
post_fname='../data/RectumMicrobiome_PrePost/stool_network_post.gml.txt.network'

# networks information
pre_wG = nx.read_gml(pre_fname, label='id')
print("pre-network:")
print(nx.info(pre_wG))

print('-----------------------')

post_wG = nx.read_gml(post_fname, label='id')
print("post-network:")
print(nx.info(post_wG))

pre-network:
Name: 
Type: Graph
Number of nodes: 357
Number of edges: 5605
Average degree:  31.4006
-----------------------
post-network:
Name: 
Type: Graph
Number of nodes: 256
Number of edges: 1789
Average degree:  13.9766


In [3]:
# get the list of OTUIDs in the pre network 
pre_OTUID = nx.get_node_attributes(pre_wG,'OTUID') # format: OTUID[vertex_id] = OTUID

# get the list of OTUIDs in the post network 
post_OTUID = nx.get_node_attributes(post_wG,'OTUID') # format: OTUID[vertex_id] = OTUID

In [4]:
# format {OTUID_1 : {"kingdom" : "k_1", "phylum" : "p_1", ...}, OTUID_2 : {kingdom : "k_2", ...}, ...}
OTU_info = defaultdict(dict)
for i in range(357):
    OTU_info[pre_wG.node[i]['OTUID']]["kingdom"] = pre_wG.node[i]['kingdom']
    OTU_info[pre_wG.node[i]['OTUID']]["phylum"] = pre_wG.node[i]['phylum']
    OTU_info[pre_wG.node[i]['OTUID']]["class"] = pre_wG.node[i]['class']
    OTU_info[pre_wG.node[i]['OTUID']]["order"] = pre_wG.node[i]['order']
    OTU_info[pre_wG.node[i]['OTUID']]["family"] = pre_wG.node[i]['family']
    OTU_info[pre_wG.node[i]['OTUID']]["genus"] = pre_wG.node[i]['genus']
    OTU_info[pre_wG.node[i]['OTUID']]["specie"] = pre_wG.node[i]['specie']
    OTU_info[pre_wG.node[i]['OTUID']]["lasttaxa"] = pre_wG.node[i]['lasttaxa']
for i in range(256):
    OTU_info[post_wG.node[i]['OTUID']]["kingdom"] = post_wG.node[i]['kingdom']
    OTU_info[post_wG.node[i]['OTUID']]["phylum"] = post_wG.node[i]['phylum']
    OTU_info[post_wG.node[i]['OTUID']]["class"] = post_wG.node[i]['class']
    OTU_info[post_wG.node[i]['OTUID']]["order"] = post_wG.node[i]['order']
    OTU_info[post_wG.node[i]['OTUID']]["family"] = post_wG.node[i]['family']
    OTU_info[post_wG.node[i]['OTUID']]["genus"] = post_wG.node[i]['genus']
    OTU_info[post_wG.node[i]['OTUID']]["specie"] = post_wG.node[i]['specie']
    OTU_info[post_wG.node[i]['OTUID']]["lasttaxa"] = post_wG.node[i]['lasttaxa']

In [5]:
# clustering
# format: [community : (v1, v2, ..., vn)]
pre_cluster = greedy_modularity_communities(pre_wG)
print(len(pre_cluster)) # 8

post_cluster = greedy_modularity_communities(post_wG)
print(len(post_cluster)) # 9

8
9


In [6]:
# the main data structure, format : [[OTUs in group 0], [OTUs in group 1], ..., [OTUs in group 8]]
subgroup_clauset = []

pre_list = list()  
post_list = list()


for c1, v1 in enumerate(post_cluster):
    for v in v1:
        post_list.append(post_OTUID[v])
    for c2, v2 in enumerate(pre_cluster):
        for v in v2:
            pre_list.append(pre_OTUID[v])
        common = list(set(post_list).intersection(pre_list))
        
        if len(common) >= 8:
            subgroup_clauset.append(common)    
        pre_list = list()
    post_list = list()

<h1> the total count of each OTU in pre and post (not network, it consists of all OTUs)</h1>

In [8]:
import csv
# read
sample_pre_csv =  open("task3/shedd_Pre_counts.csv",'r') 
sample_post_csv =  open("task3/shedd_Post_counts.csv",'r') 

#write
total_pre_csv = open("task3/total_count_pre.csv", "w")
total_post_csv = open("task3/total_count_post.csv", "w")



# sum up the pre network
csv_pre_writter = csv.writer(total_pre_csv, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
csv_pre_writter.writerow(['OTU', 'Total_sample_counts'])
csv_pre_reader = csv.reader(sample_pre_csv, delimiter=',')
for i in list(csv_pre_reader)[1:]:
        sum = 0
        for n in i[1:]:
            sum += int(n)
        csv_pre_writter.writerow([i[0], sum])
        
sample_pre_csv.close()
total_pre_csv.close()



# sum up the post network
csv_post_writter = csv.writer(total_post_csv, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
csv_post_writter.writerow(['OTU', 'Total_sample_counts'])
csv_post_reader = csv.reader(sample_post_csv, delimiter=',')
for i in list(csv_post_reader)[1:]:
        sum = 0
        for n in i[1:]:
            sum += int(n)
        csv_post_writter.writerow([i[0], sum])
    
sample_post_csv.close()
total_post_csv.close()

<h1> Order counts of each cluster in pre network</h1>

In [8]:
# format : {1 : {order_1 : count, order_2 : count, ...}, 2 : {order_1 : count, order_2 : count, ...}, ...}
pre_cluster_order_info = defaultdict(lambda : defaultdict(lambda : 0))

# format : {outid_1 : count, outid_2 : count, ...}
pre_network_otu_count = defaultdict(lambda : 0)

# construct pre_network_otu_count
total_pre_csv = open("task3/total_count_pre.csv", "r")
csv_pre_reader = csv.reader(total_pre_csv, delimiter=',')
for i in list(csv_pre_reader)[1:]:
        pre_network_otu_count[i[0]] = int(i[1])
total_pre_csv.close()

for cluster_num, otus in enumerate(pre_cluster):
    for otu in otus:
        otuid = pre_OTUID[otu]
        order = OTU_info[otuid]['order']
        count = pre_network_otu_count[otuid]
        pre_cluster_order_info[cluster_num][order] += count

In [9]:
for cluster_num, order_info in pre_cluster_order_info.items():
    print("Pre-Cluster #{}".format(cluster_num))
    for order, count in order_info.items():
        print('[\'{}\', {}],'.format(order, count))
    print("\n-------------------\n")

Pre-Cluster #0
['o__Cardiobacteriales', 464],
['o__Pseudomonadales', 1474],
['Unknown', 11],
['o__[Saprospirales]', 531],
['o__Vibrionales', 15699],
['o__Bacteroidales', 6575],
['o__Alteromonadales', 1083],
['o__Fusobacteriales', 74579],
['o__Campylobacterales', 2069],
['o__Enterobacteriales', 4387],
['o__Gemellales', 318],
['o__Burkholderiales', 1303],
['o__Pasteurellales', 20510],
['o__Clostridiales', 89048],
['o__Flavobacteriales', 481],
['o__Lactobacillales', 91],
['o__Oceanospirillales', 42],
['o__Erysipelotrichales', 1492],
['o__Bacillales', 687],
['o__Mycoplasmatales', 140],
['o__Actinomycetales', 4756],
['o__Rhodobacterales', 97],

-------------------

Pre-Cluster #1
['o__Cardiobacteriales', 308],
['o__Pseudomonadales', 5511],
['o__Vibrionales', 19840],
['o__[Saprospirales]', 177],
['o__Pseudanabaenales', 536],
['o__Alteromonadales', 1941],
['o__Rhizobiales', 107],
['o__Fusobacteriales', 8479],
['o__Sva0725', 34],
['o__Bacillales', 31],
['o__Pasteurellales', 1004],
['o__Clostri

<h1> Order counts of each cluster in post network</h1>

In [10]:
# format : {1 : {order_1 : count, order_2 : count, ...}, 2 : {order_1 : count, order_2 : count, ...}, ...}
post_cluster_order_info = defaultdict(lambda : defaultdict(lambda : 0))

# format : {outid_1 : count, outid_2 : count, ...}
post_network_otu_count = defaultdict(lambda : 0)

# construct post_network_otu_count
total_post_csv = open("task3/total_count_post.csv", "r")
csv_post_reader = csv.reader(total_post_csv, delimiter=',')
for i in list(csv_post_reader)[1:]:
        post_network_otu_count[i[0]] = int(i[1])
total_post_csv.close()

for cluster_num, otus in enumerate(post_cluster):
    for otu in otus:
        otuid = post_OTUID[otu]
        order = OTU_info[otuid]['order']
        count = post_network_otu_count[otuid]
        post_cluster_order_info[cluster_num][order] += count

In [11]:
for cluster_num, order_info in post_cluster_order_info.items():
    print("Post-Cluster #{}".format(cluster_num))
    for order, count in order_info.items():
        print('[\'{}\', {}],'.format(order, count))
    print("\n-------------------\n")

Post-Cluster #0
['o__Burkholderiales', 324],
['o__Erysipelotrichales', 777],
['o__Fusobacteriales', 82140],
['o__Clostridiales', 96917],
['o__Enterobacteriales', 9681],
['o__Vibrionales', 33712],
['o__Bacteroidales', 5373],
['o__Actinomycetales', 828],

-------------------

Post-Cluster #1
['o__Gemellales', 32],
['o__Rhodobacterales', 465],
['o__Pseudomonadales', 4028],
['o__Pasteurellales', 19619],
['o__Clostridiales', 4050],
['o__Flavobacteriales', 13],
['o__Lactobacillales', 333],
['o__Oceanospirillales', 199],
['o__Sva0725', 18],
['o__[Saprospirales]', 54],
['o__Bacteroidales', 80],
['o__Alteromonadales', 31],
['o__Burkholderiales', 89],
['o__Rhizobiales', 31],
['o__Sphingomonadales', 171],
['o__Neisseriales', 126],
['o__Xanthomonadales', 64],
['o__Enterobacteriales', 384],
['o__Vibrionales', 1536],
['o__Bacillales', 601],
['o__Actinomycetales', 1189],

-------------------

Post-Cluster #2
['o__Gemellales', 566],
['o__Cardiobacteriales', 65],
['o__Pseudomonadales', 3916],
['o__Clos

In [30]:
for otu in post_cluster[7]:
    print("{} : {}".format(post_OTUID[otu], post_network_otu_count[post_OTUID[otu]]))

179845 : 30
1107027 : 27129
New.0.ReferenceOTU359 : 519


<h1> Order counts of each group in the pre network </h1>

In [12]:
# format : {42 : {order_1 : count, order_2 : count, ...}, 9 : {order_1 : count, order_2 : count, ...}, ...}
pre_group_order_info = defaultdict(lambda : defaultdict(lambda : 0))

for otus in subgroup_clauset:
    for otuid in otus:
        count = pre_network_otu_count[otuid]
        order = OTU_info[otuid]['order']
        pre_group_order_info[len(otus)][order] += count

In [13]:
for group_num, order_info in pre_group_order_info.items():
    print("Pre-group #{}".format(group_num))
    for order, count in order_info.items():
        print('[\'{}\', {}],'.format(order, count))
    print("\n-------------------\n")

Pre-group #17
['o__Burkholderiales', 35],
['o__Pseudomonadales', 4494],
['o__Flavobacteriales', 103],
['o__Vibrionales', 632],
['o__Pseudanabaenales', 346],
['o__Rhodobacterales', 1780],
['o__Alteromonadales', 1738],

-------------------

Pre-group #21
['o__Gemellales', 121],
['o__Bacillales', 93],
['o__Pseudomonadales', 1768],
['o__Sphingomonadales', 178],
['o__Clostridiales', 154],
['o__Neisseriales', 119],
['o__Rhodobacterales', 534],
['o__Enterobacteriales', 81],
['o__Bacteroidales', 336],
['o__Actinomycetales', 1260],
['o__Alteromonadales', 55],

-------------------

Pre-group #22
['o__Burkholderiales', 6383],
['o__Rhizobiales', 77],
['o__Sphingomonadales', 1204],
['o__Caulobacterales', 175],
['o__Xanthomonadales', 867],
['o__Bacillales', 8453],
['o__Actinomycetales', 161],

-------------------

Pre-group #9
['o__[Chthoniobacterales]', 263],
['o__Pseudomonadales', 121],
['o__Clostridiales', 110],
['o__Oceanospirillales', 61],
['o__Vibrionales', 201],
['o__Bacteroidales', 23],
['o_

<h1> Order counts of each group in the post network </h1>

In [14]:
# format : {1 : {order_1 : count, order_2 : count, ...}, 2 : {order_1 : count, order_2 : count, ...}, ...}
post_group_order_info = defaultdict(lambda : defaultdict(lambda : 0))

for otus in subgroup_clauset:
    for otuid in otus:
        count = post_network_otu_count[otuid]
        order = OTU_info[otuid]['order']
        post_group_order_info[len(otus)][order] += count

In [15]:
for group_num, order_info in post_group_order_info.items():
    print("Post-group #{}".format(group_num))
    for order, count in order_info.items():
        print('[\'{}\', {}],'.format(order, count))
    print("\n-------------------\n")

Post-group #17
['o__Burkholderiales', 88],
['o__Pseudomonadales', 3591],
['o__Flavobacteriales', 144],
['o__Vibrionales', 319],
['o__Pseudanabaenales', 68],
['o__Rhodobacterales', 2194],
['o__Alteromonadales', 1600],

-------------------

Post-group #21
['o__Gemellales', 32],
['o__Bacillales', 20],
['o__Pseudomonadales', 786],
['o__Sphingomonadales', 95],
['o__Clostridiales', 68],
['o__Neisseriales', 63],
['o__Rhodobacterales', 465],
['o__Enterobacteriales', 19],
['o__Bacteroidales', 80],
['o__Actinomycetales', 772],
['o__Alteromonadales', 31],

-------------------

Post-group #22
['o__Burkholderiales', 36670],
['o__Rhizobiales', 193],
['o__Sphingomonadales', 14743],
['o__Caulobacterales', 2510],
['o__Xanthomonadales', 11539],
['o__Bacillales', 5631],
['o__Actinomycetales', 1043],

-------------------

Post-group #9
['o__[Chthoniobacterales]', 398],
['o__Pseudomonadales', 163],
['o__Clostridiales', 75],
['o__Oceanospirillales', 45],
['o__Vibrionales', 1221],
['o__Bacteroidales', 22],
[

In [16]:
for cluster_num, otuids in enumerate(pre_cluster):
    print("Pre-cluster # {}".format(cluster_num))
    sum = 0
    for otuid in otuids:
        sum += pre_network_otu_count[pre_OTUID[otuid]]
    print("Total: {}".format(sum))

Pre-cluster # 0
Total: 225837
Pre-cluster # 1
Total: 51804
Pre-cluster # 2
Total: 6540
Pre-cluster # 3
Total: 50760
Pre-cluster # 4
Total: 7592
Pre-cluster # 5
Total: 9693
Pre-cluster # 6
Total: 14632
Pre-cluster # 7
Total: 307


In [17]:
for cluster_num, otuids in enumerate(post_cluster):
    print("Post-cluster # {}".format(cluster_num))
    sum = 0
    for otuid in otuids:
        sum += post_network_otu_count[post_OTUID[otuid]]
    print("Total: {}".format(sum))

Post-cluster # 0
Total: 229752
Post-cluster # 1
Total: 33113
Post-cluster # 2
Total: 47508
Post-cluster # 3
Total: 41074
Post-cluster # 4
Total: 78166
Post-cluster # 5
Total: 26657
Post-cluster # 6
Total: 4167
Post-cluster # 7
Total: 27678
Post-cluster # 8
Total: 8980


In [18]:
for otus in subgroup_clauset:
    print("Pre-group # {}".format(len(otus)))
    sum = 0
    for otu in otus:
        sum += pre_network_otu_count[otu]
    print("Total: {}".format(sum))

Pre-group # 42
Total: 169263
Pre-group # 8
Total: 23611
Pre-group # 21
Total: 4699
Pre-group # 15
Total: 5991
Pre-group # 11
Total: 21514
Pre-group # 17
Total: 9128
Pre-group # 25
Total: 3770
Pre-group # 9
Total: 28429
Pre-group # 22
Total: 17320


In [19]:
for otus in subgroup_clauset:
    print("Post-group # {}".format(len(otus)))
    sum = 0
    for otu in otus:
        sum += post_network_otu_count[otu]
    print("Total: {}".format(sum))

Post-group # 42
Total: 220762
Post-group # 8
Total: 27441
Post-group # 21
Total: 2431
Post-group # 15
Total: 2706
Post-group # 11
Total: 35040
Post-group # 17
Total: 8004
Post-group # 25
Total: 3966
Post-group # 9
Total: 27483
Post-group # 22
Total: 72329


In [20]:
probiotics = {'997439' : 'R4B', '703741' : 'R1L', '1110317' : 'R9L', '604966' : 'R6L', 
              '692154' : 'R5L', '559527' : 'R3B', '586093' : 'R8L', '679245' : 'R2L',
              '949863' : 'R7L', '1107027' : 'S1L'}

In [22]:
for cluster_num, otuids in enumerate(pre_cluster):
    print("Pre-cluster # {}".format(cluster_num))
    for otuid in otuids:
        if pre_OTUID[otuid] in probiotics.keys():
            print(probiotics[pre_OTUID[otuid]])

Pre-cluster # 0
Pre-cluster # 1
Pre-cluster # 2
Pre-cluster # 3
Pre-cluster # 4
R1L
Pre-cluster # 5
Pre-cluster # 6
S1L
Pre-cluster # 7


In [23]:
for cluster_num, otuids in enumerate(post_cluster):
    print("Post-cluster # {}".format(cluster_num))
    for otuid in otuids:
        if post_OTUID[otuid] in probiotics.keys():
            print(probiotics[post_OTUID[otuid]])

Post-cluster # 0
Post-cluster # 1
Post-cluster # 2
R3B
R1L
R6L
R5L
R2L
Post-cluster # 3
Post-cluster # 4
Post-cluster # 5
Post-cluster # 6
R4B
Post-cluster # 7
S1L
Post-cluster # 8


In [53]:
for key, value in probiotics.items():
    print('<tr>')
    print("<th scope=\"row\">{}</th>".format(value))
    print("<td>{}</td>".format(key))
    print("<td>{}</td>".format(pre_network_otu_count[key]))
    print("</tr>")

<tr>
<th scope="row">R5L</th>
<td>692154</td>
<td>4</td>
</tr>
<tr>
<th scope="row">R7L</th>
<td>949863</td>
<td>0</td>
</tr>
<tr>
<th scope="row">R3B</th>
<td>559527</td>
<td>10</td>
</tr>
<tr>
<th scope="row">R8L</th>
<td>586093</td>
<td>2</td>
</tr>
<tr>
<th scope="row">S1L</th>
<td>1107027</td>
<td>14462</td>
</tr>
<tr>
<th scope="row">R2L</th>
<td>679245</td>
<td>1</td>
</tr>
<tr>
<th scope="row">R4B</th>
<td>997439</td>
<td>12</td>
</tr>
<tr>
<th scope="row">R9L</th>
<td>1110317</td>
<td>2</td>
</tr>
<tr>
<th scope="row">R1L</th>
<td>703741</td>
<td>48</td>
</tr>
<tr>
<th scope="row">R6L</th>
<td>604966</td>
<td>4</td>
</tr>


In [54]:
for key, value in probiotics.items():
    print('<tr>')
    print("<th scope=\"row\">{}</th>".format(value))
    print("<td>{}</td>".format(key))
    print("<td>{}</td>".format(post_network_otu_count[key]))
    print("</tr>")

<tr>
<th scope="row">R5L</th>
<td>692154</td>
<td>91</td>
</tr>
<tr>
<th scope="row">R7L</th>
<td>949863</td>
<td>11</td>
</tr>
<tr>
<th scope="row">R3B</th>
<td>559527</td>
<td>1563</td>
</tr>
<tr>
<th scope="row">R8L</th>
<td>586093</td>
<td>13</td>
</tr>
<tr>
<th scope="row">S1L</th>
<td>1107027</td>
<td>27129</td>
</tr>
<tr>
<th scope="row">R2L</th>
<td>679245</td>
<td>437</td>
</tr>
<tr>
<th scope="row">R4B</th>
<td>997439</td>
<td>62</td>
</tr>
<tr>
<th scope="row">R9L</th>
<td>1110317</td>
<td>8</td>
</tr>
<tr>
<th scope="row">R1L</th>
<td>703741</td>
<td>496</td>
</tr>
<tr>
<th scope="row">R6L</th>
<td>604966</td>
<td>50</td>
</tr>


In [56]:
print(len(pre_cluster[5]))

5


In [57]:
for otus in subgroup_clauset:
    print("Group: {}".format(len(otus)))
    for otu in otus:
        if otu in probiotics.keys():
            print(probiotics[otu])

Group: 42
Group: 8
Group: 21
Group: 15
Group: 11
Group: 17
Group: 25
Group: 9
Group: 22
