# Decomp community

In [1]:
# qiime imports
import qiime2
from qiime2 import Artifact, Metadata

# General Tool Imports
import numpy as np
import pandas as pd
import collections
from pickle import load, dump
from IPython.display import display
import warnings

# Plotting Imports
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn as sns
# from statannotations.Annotator import Annotator

import itertools
import scipy
import skbio

from skbio.stats import subsample_counts
from skbio import OrdinationResults
from statsmodels.sandbox.stats.multicomp import multipletests

%matplotlib inline

In [2]:
samples = Metadata.load('16S_sample_metadata.tsv').to_dataframe()

In [3]:
data = Artifact.load('table_beetle.qza').view(pd.DataFrame)

In [4]:
samples['reads'] = data.sum(axis=1)

In [5]:
samples

Unnamed: 0_level_0,BarcodeSequence,LinkerPrimerSequence,Description,reads
#SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SSNvC1,GAGAGTGT,GTGCCAGCMGCCGCGGTAA,Untended-carcass,30013.0
SSNvC2,GAGATCAG,GTGCCAGCMGCCGCGGTAA,Untended-carcass,24511.0
SSNvC3,GAGATCTC,GTGCCAGCMGCCGCGGTAA,Untended-carcass,22579.0
SSNvC4,GAGATGAC,GTGCCAGCMGCCGCGGTAA,Untended-carcass,21104.0
SSNvC5,GAGATGTG,GTGCCAGCMGCCGCGGTAA,Untended-carcass,22131.0
SSNvC6,GAGTACAG,GTGCCAGCMGCCGCGGTAA,Untended-carcass,15443.0
SSNvT1,GAGTACTC,GTGCCAGCMGCCGCGGTAA,Tended-carcass,8213.0
SSNvT3,GAGTAGAC,GTGCCAGCMGCCGCGGTAA,Tended-carcass,11887.0
SSNvT4,GAGTAGTG,GTGCCAGCMGCCGCGGTAA,Tended-carcass,10480.0
SSNvT5,GAGTCACT,GTGCCAGCMGCCGCGGTAA,Tended-carcass,13859.0


In [6]:
samples.groupby(['Description'], dropna=False).count()['LinkerPrimerSequence'].to_frame()

Unnamed: 0_level_0,LinkerPrimerSequence
Description,Unnamed: 1_level_1
Soil,1
Tended-carcass,6
Untended-carcass,6


In [7]:
# get seqs
seqs = Artifact.load('seqs_beetle.qza').view(qiime2.Metadata).to_dataframe()

In [8]:
seqs.shape

(507, 1)

In [9]:
taxa = Artifact.load('taxonomy_beetles.qza').view(pd.DataFrame)

In [10]:
taxa['asv'] = seqs['Sequence']

### Import Decomposers

In [11]:
pmi_decom = pd.read_csv('../ASVs_repseq.txt',sep ='\t',index_col=0)

In [12]:
pmi_decom.groupby('genus').count()['150_asv'].to_frame()

Unnamed: 0_level_0,150_asv
genus,Unnamed: 1_level_1
Acinetobacter,5
Bacteroides,9
Ignatzschineria,4
Oblitimonas,1
Peptoniphilus,5
Savagea,3
Vagococcus,5
Wohlfahrtiimonas,3


In [13]:
# pmi decomposers found in cow data
beetle_decomp = list(set(pmi_decom.index.tolist()).intersection(set(seqs.index.tolist())))

In [14]:
print("{} ASVs were found in beetle data".format(len(beetle_decomp)))

3 ASVs were found in beetle data


## Collapse reads data table on 150 bp ASVs
sum reads of asvs with same 150 bp sequence

In [15]:
# make ra table
ra = data.apply(lambda x: x / x.sum(), axis=1)

In [16]:
## average of each asv across all the active decomp samples
ra_beetle = ra[beetle_decomp].groupby(samples['Description']).mean()

In [17]:
## averages summed by genus
ra_beetle.T.groupby(pmi_decom['genus']).sum()

Description,Soil,Tended-carcass,Untended-carcass
genus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Acinetobacter,0.000289,0.032962,0.00072
Vagococcus,0.000578,0.011615,0.013791


### how many samples are these decomposer asvs found in?  

In [18]:
# of samples each asv is found in
pd.concat([pmi_decom.loc[pmi_decom.index.isin(beetle_decomp)][['genus','species']],
           ra[beetle_decomp].groupby(samples['Description']).apply(lambda x: (x>0).sum()).T],axis=1)

Unnamed: 0,genus,species,Soil,Tended-carcass,Untended-carcass
1ba09da532a0f9985b3caf72a47b42e3,Acinetobacter,,1,6,6
cceb21819b328ad472f3e5fa20b3cbd9,Vagococcus,,0,2,0
92460066faddd83314cbc2348bf4fd29,Vagococcus,,1,6,6


In [19]:
## percent of samples each ASV is found in
pd.concat([pmi_decom.loc[pmi_decom.index.isin(beetle_decomp)][['genus','species']], (data[beetle_decomp].groupby(samples['Description']).apply(
    lambda x: (x>0).sum()).T)/(data[beetle_decomp].groupby(samples['Description']).count().T)*100], axis=1)

Unnamed: 0,genus,species,Soil,Tended-carcass,Untended-carcass
1ba09da532a0f9985b3caf72a47b42e3,Acinetobacter,,100.0,100.0,100.0
cceb21819b328ad472f3e5fa20b3cbd9,Vagococcus,,0.0,33.333333,0.0
92460066faddd83314cbc2348bf4fd29,Vagococcus,,100.0,100.0,100.0


### Look into taxonomy
The paper showed Wohlfahrtiimonas but it's not found here

In [46]:
pmi_decom

Unnamed: 0_level_0,taxonomy,150_asv,100_bp,kingdom,phylum,class,order,family,genus,species
#OTU ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
9acc238746a1f2aa7745a0b5720c4eac,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTG...,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTG...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,
6e8986f8088b452f964e7968bb8bca87,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTG...,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTG...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,
9e33b8985d44f2ed1b88ac7079f70793,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGTA...,TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGTA...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,
d5902b1353d3aa0effaaa81cba1d1516,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTA...,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTA...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,
1ba09da532a0f9985b3caf72a47b42e3,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCG...,TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,
174b6959ecfedfee56c9daf6ffa45d2b,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCA...,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCA...,Bacteria,Firmicutes,Bacilli,Bacillales,Planococcaceae,Savagea,uncultured bacterium
7567be5b5c8b7a16dbb1a84f6b46d965,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCA...,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCA...,Bacteria,Firmicutes,Bacilli,Bacillales,Planococcaceae,Savagea,uncultured bacterium
8a8c381201ca5a905366fdf242c076db,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCA...,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCA...,Bacteria,Firmicutes,Bacilli,Bacillales,Planococcaceae,Savagea,uncultured bacterium
837e0d796b199b8c9b462d97ad3c5599,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,TACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCG...,TACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Pseudomonadaceae,Oblitimonas,Oblitimonas alkaliphila
b08c80963ce4a0df0518836d2da209ce,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,


In [47]:
taxa.loc[taxa.Taxon.str.contains('wohlf', case=False)].Taxon.values

array(['d__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Wohlfahrtiimonas; s__Wohlfahrtiimonas_chitiniclastica',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Wohlfahrtiimonas; s__Wohlfahrtiimonas_chitiniclastica',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae'],
      dtype=object)

In [48]:
data[taxa.loc[taxa.Taxon.str.contains('wohlf', case=False)].index.tolist()]

Unnamed: 0,d2fe79722e176a4eea6e323ee5dc2358,c89ceebdd4b1aeb4d8d980212adde615,fe27b7e78175e79608c59c062f2b2b77,42cdafcee3ea0f539039a27f1d78dcc8
SSNvC3,9.0,0.0,0.0,0.0
SSNvC2,21.0,0.0,0.0,0.0
SSNvC5,171.0,1.0,0.0,0.0
SSNvT5,4293.0,0.0,0.0,0.0
SSNvC4,25.0,2.0,0.0,0.0
SSNvSP,173.0,0.0,0.0,0.0
SSNvC1,72.0,4.0,1.0,0.0
SSNvT3,9.0,38.0,0.0,15.0
SSNvT6,0.0,52.0,17.0,5.0
SSNvT1,18.0,11.0,14.0,7.0


In [49]:
wohl = taxa[taxa.Taxon.str.contains('Wohl', case=False)].asv.values

In [50]:
pmi_wohl = pmi_decom.loc[pmi_decom['genus']=='Wohlfahrtiimonas']['150_asv'].values

In [51]:
from Bio import Align
aligner = Align.PairwiseAligner()
for i in range(len(wohl)):
    alignments = aligner.align(pmi_wohl[0], 
                               wohl[i])
    alignment = alignments[0]
    print(alignment)

target            0 TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGC-GGTTAC
                  0 ||||||||||||||||||||||||||||||||||||||||||||||||||||--||||||
query             0 TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGG-TGGTTAC

target           59 TTAAGTT-AGATGTGAAAGCCCCGGGCTT-AACCTGGGAATTGCATTT-AATACTGGGTA
                 60 ||||||--||||||||||||||||||||--||||||||||||||||||-|-|||||||||
query            59 TTAAGT-CAGATGTGAAAGCCCCGGGCT-CAACCTGGGAATTGCATTTGA-TACTGGGTA

target          116 ACTAGAGTGTGGTAGAGAGTAGCGGAATTTCTGG 150
                120 |||||||||||||||||||||||||||||||||| 154
query           116 ACTAGAGTGTGGTAGAGAGTAGCGGAATTTCTGG 150

target            0 TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGCGGTTACT
                  0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
query             0 TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGCGGTTACT

target           60 TAAGTTAGATGTGAAAGCCCC-GGGCTTAACCT-GGGAATTGCATTTAATACTGGGTAAC
         

In [52]:
from Bio import Align
aligner = Align.PairwiseAligner()
for i in range(len(wohl)):
    alignments = aligner.align(pmi_wohl[1], 
                               wohl[i])
    alignment = alignments[0]
    print(alignment)

target            0 TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGC-GGTTAC
                  0 ||||||||||||||||||||||||||||||||||||||||||||||||||||--||||||
query             0 TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGG-TGGTTAC

target           59 TTAAGTT-AGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTT-AATACTGGGTAA
                 60 ||||||--|||||||||||||||||||||||||||||||||||||||-|-||||||||||
query            59 TTAAGT-CAGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTTGA-TACTGGGTAA

target          117 CTAGAGTGTGGTAGAGAGTAGCGGAATTTCTGG 150
                120 ||||||||||||||||||||||||||||||||| 153
query           117 CTAGAGTGTGGTAGAGAGTAGCGGAATTTCTGG 150

target            0 TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGCGGTTACT
                  0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
query             0 TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGCGGTTACT

target           60 TAAGTTAGATGTGAAAGCCCC-GGGCTC-AACCT-GGGAATTGCATTTAATACTGGGTAA
            

In [53]:
from Bio import Align
aligner = Align.PairwiseAligner()
for i in range(len(wohl)):
    alignments = aligner.align(pmi_wohl[2], 
                               wohl[i])
    alignment = alignments[0]
    print(alignment)

target            0 TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGC-GGTTAC
                  0 ||||||||||||||||||||||||||||||||||||||||||||||||||||--||||||
query             0 TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGG-TGGTTAC

target           59 TTAAGTT-AGATGTGAAAGCCCCGGGCTT-AACCTGAG-AATTGCATTT-AATACTGGGT
                 60 ||||||--||||||||||||||||||||--||||||-|-||||||||||-|-||||||||
query            59 TTAAGT-CAGATGTGAAAGCCCCGGGCT-CAACCTG-GGAATTGCATTTGA-TACTGGGT

target          115 AACTAGAGTGTGGTAGAGAGTAGCGGAATTTCTGG 150
                120 ||||||||||||||||||||||||||||||||||| 155
query           115 AACTAGAGTGTGGTAGAGAGTAGCGGAATTTCTGG 150

target            0 TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGCGGTTACT
                  0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
query             0 TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGCGGTTACT

target           60 TAAGTTAGATGTGAAAGCCCC-GGGCTTAACCT-GAGAATTGCATTTAATACTGGGTAAC
      

### RA of other Wohlfahrtiimonas asvs

In [54]:
## average of each asv across all the active decomp samples
ra_wohl = ra[taxa.loc[taxa.Taxon.str.contains('wohl', case=False)].index].groupby(samples['Description']).mean()

In [55]:
ra_wohl

Unnamed: 0_level_0,d2fe79722e176a4eea6e323ee5dc2358,c89ceebdd4b1aeb4d8d980212adde615,fe27b7e78175e79608c59c062f2b2b77,42cdafcee3ea0f539039a27f1d78dcc8
Description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Soil,0.01249,0.0,0.0,0.0
Tended-carcass,0.052385,0.002452,0.000944,0.00059
Untended-carcass,0.002235,4.6e-05,6e-06,0.0


In [56]:
ra_wohl.sum(axis=1).to_frame(name='Wohlfahrtiimonadaceae')

Unnamed: 0_level_0,Wohlfahrtiimonadaceae
Description,Unnamed: 1_level_1
Soil,0.01249
Tended-carcass,0.056371
Untended-carcass,0.002286


In [57]:
len(seqs['Sequence'][0])

150