# Decomp community

In [42]:
# qiime imports
import qiime2
from qiime2 import Artifact, Metadata

# General Tool Imports
import numpy as np
import pandas as pd
import collections
from pickle import load, dump
from IPython.display import display
import warnings

# Plotting Imports
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn as sns
# from statannotations.Annotator import Annotator

import itertools
import scipy
import skbio

from skbio.stats import subsample_counts
from skbio import OrdinationResults
from statsmodels.sandbox.stats.multicomp import multipletests

%matplotlib inline

In [43]:
samples = Metadata.load('1609_20230201-070309.txt').to_dataframe()

In [44]:
# drop samples that we don't care about
samples = samples[samples['paper_subject']!='extended'].copy()

In [45]:
#make new metatdata column
samples.loc[samples['paper_subject']=='mass','whatisit']=samples['mass']+'_kg'
samples.loc[samples['paper_subject']=='season','whatisit']=samples['season']
samples.loc[samples.index.str.contains('control'), 'whatisit']=samples['paper_subject']+'_control'

In [46]:
samples['whatisit'].value_counts().to_frame()

Unnamed: 0,whatisit
1_kg,24
20_kg,24
40_kg,24
50_kg,24
mass_control,24
season_control,24
summer,12
winter,12


In [47]:
reads = pd.read_csv('reads_per_sample_1609.csv',index_col=0)

In [48]:
# drop samples with fewer than 1000 reads
samples = samples[samples.index.isin(reads[reads['0']>1000].index)].copy()

In [49]:
samples['whatisit'].value_counts().to_frame()

Unnamed: 0,whatisit
40_kg,24
50_kg,23
season_control,23
20_kg,22
mass_control,20
1_kg,18
summer,10
winter,10


In [50]:
data = Artifact.load('table_3876.qza').view(pd.DataFrame)

In [51]:
# drop samples from data df
data = data.loc[data.index.isin(samples.index)].copy()

In [52]:
# make ra table
ra = data.apply(lambda x: x / x.sum(), axis=1)

### Import Decomposers

In [53]:
# set index to 100 bp asvs
pmi_decom = pd.read_csv('../ASVs_repseq.txt',sep ='\t',index_col=3)

In [54]:
# setting to 100 bp created duplicate indexes
pmi_decom[pmi_decom.index.duplicated()]

Unnamed: 0_level_0,#OTU ID,taxonomy,150_asv,kingdom,phylum,class,order,family,genus,species
100_bp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,8e87132c368c4f56dd114b1cab5f59a6,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,92460066faddd83314cbc2348bf4fd29,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,48113de4cb4849e5d543cbb0579c847e,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,3b97413ffc0ea1b441f7bd9daae2e3ee,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Cardiobacteriales,Wohlfahrtiimonadaceae,Ignatzschineria,
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,07db6c743fdc4cdcc722d93f60cc7d7d,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Cardiobacteriales,Wohlfahrtiimonadaceae,Ignatzschineria,Ignatzschineria larvae


In [55]:
# remove them
pmi_decom = pmi_decom[~pmi_decom.index.duplicated(keep='first')]

In [56]:
pmi_decom[pmi_decom.index.duplicated()]

Unnamed: 0_level_0,#OTU ID,taxonomy,150_asv,kingdom,phylum,class,order,family,genus,species
100_bp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


In [57]:
# only 5 asvs with duplicates
pmi_decom.shape

(30, 10)

In [58]:
# pmi decomposers found in swine data
swine_decomp = list(set(pmi_decom.index.tolist()).intersection(set(data.columns.tolist())))

In [59]:
len(swine_decomp)

25

In [60]:
# group samples
ra_swine = ra[swine_decomp].groupby(samples.whatisit).mean()

In [61]:
ra_swine.T.groupby(pmi_decom['genus']).sum()

whatisit,1_kg,20_kg,40_kg,50_kg,mass_control,season_control,summer,winter
genus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acinetobacter,0.015907,0.026276,0.008949,0.022248,0.000674,0.000292,0.007467,0.002428
Bacteroides,0.000321,0.003879,0.000656,0.003435,0.00049,3e-06,0.000759,9.4e-05
Ignatzschineria,0.021305,0.040307,0.058887,0.022639,0.002338,2.7e-05,0.000545,0.0
Oblitimonas,0.001296,0.0056,0.004232,0.017226,0.000287,7.2e-05,0.001952,0.0
Peptoniphilus,0.000318,0.00413,0.002279,0.003193,0.00034,8e-06,2.9e-05,6.7e-05
Savagea,0.003928,0.014175,0.006093,0.009298,0.001408,0.0,0.0,0.000116
Vagococcus,0.002743,0.004073,0.003566,0.001447,2.2e-05,0.0,5.6e-05,0.000746
Wohlfahrtiimonas,0.004177,0.002612,0.001247,0.000444,4.8e-05,0.0,0.0,0.0


### how many samples are these decomposer asvs found in?

In [62]:
# of samples each asv is found in
pd.concat([pmi_decom.loc[pmi_decom.index.isin(swine_decomp)][['genus','species']],
           data[swine_decomp].groupby(samples['whatisit']).apply(lambda x: (x>0).sum()).T],axis=1)

Unnamed: 0,genus,species,1_kg,20_kg,40_kg,50_kg,mass_control,season_control,summer,winter
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCCGAGCTTAACTTGGGAATTG,Acinetobacter,,14,19,16,17,16,20,9,7
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,Acinetobacter,,8,13,4,4,3,3,5,0
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTACGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,Acinetobacter,,14,16,14,13,7,3,1,0
TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCGCGTAGGCGGCTAATTAAGTCAAATGTGAAATCCCCGAGCTTAACTTGGGAATTG,Acinetobacter,,11,15,11,11,12,14,1,4
TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCACGCAGGCGGCCTTCTAAGTCTGATGTGAAATCCCACGGCTTAACCGTGGAAGGT,Savagea,uncultured bacterium,7,15,13,12,9,0,0,0
TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCACGCAGGCGGCCTTTTAAGTCTGATGTGAAATCCCACGGCTTAACCGTGGAAGGT,Savagea,uncultured bacterium,2,6,5,7,3,0,0,1
TACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGAAGTGAAAGCCCCGGGCTCAACCTGGGAATTG,Oblitimonas,Oblitimonas alkaliphila,8,15,11,14,9,2,4,0
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,Vagococcus,,8,6,4,3,2,0,0,0
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,Vagococcus,,9,12,8,10,2,0,3,1
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,Ignatzschineria,,13,20,18,18,14,8,2,0


In [63]:
## percent of samples each ASV is found in
pd.concat([pmi_decom.loc[pmi_decom.index.isin(swine_decomp)][['genus','species']], (data[swine_decomp].groupby(samples['whatisit']).apply(
    lambda x: (x>0).sum()).T)/(data[swine_decomp].groupby(samples['whatisit']).count().T)*100], axis=1)

Unnamed: 0,genus,species,1_kg,20_kg,40_kg,50_kg,mass_control,season_control,summer,winter
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCCGAGCTTAACTTGGGAATTG,Acinetobacter,,77.777778,86.363636,66.666667,73.913043,80.0,86.956522,90.0,70.0
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,Acinetobacter,,44.444444,59.090909,16.666667,17.391304,15.0,13.043478,50.0,0.0
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTACGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,Acinetobacter,,77.777778,72.727273,58.333333,56.521739,35.0,13.043478,10.0,0.0
TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCGCGTAGGCGGCTAATTAAGTCAAATGTGAAATCCCCGAGCTTAACTTGGGAATTG,Acinetobacter,,61.111111,68.181818,45.833333,47.826087,60.0,60.869565,10.0,40.0
TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCACGCAGGCGGCCTTCTAAGTCTGATGTGAAATCCCACGGCTTAACCGTGGAAGGT,Savagea,uncultured bacterium,38.888889,68.181818,54.166667,52.173913,45.0,0.0,0.0,0.0
TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCACGCAGGCGGCCTTTTAAGTCTGATGTGAAATCCCACGGCTTAACCGTGGAAGGT,Savagea,uncultured bacterium,11.111111,27.272727,20.833333,30.434783,15.0,0.0,0.0,10.0
TACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGAAGTGAAAGCCCCGGGCTCAACCTGGGAATTG,Oblitimonas,Oblitimonas alkaliphila,44.444444,68.181818,45.833333,60.869565,45.0,8.695652,40.0,0.0
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,Vagococcus,,44.444444,27.272727,16.666667,13.043478,10.0,0.0,0.0,0.0
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,Vagococcus,,50.0,54.545455,33.333333,43.478261,10.0,0.0,30.0,10.0
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,Ignatzschineria,,72.222222,90.909091,75.0,78.26087,70.0,34.782609,20.0,0.0


### look for other Wohl asvs

In [106]:
taxa = Artifact.load('taxonomy_3876.qza').view(pd.DataFrame)

In [107]:
taxa.head()

Unnamed: 0_level_0,Taxon,Confidence
Feature ID,Unnamed: 1_level_1,Unnamed: 2_level_1
TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGTCCTTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGT,d__Bacteria; p__Firmicutes; c__Bacilli; o__Bac...,0.996553648706803
TACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTAGGAGTGAAAGCCCCGGGCTCAACCTGGGAATTG,d__Bacteria; p__Proteobacteria; c__Gammaproteo...,0.7122412211504756
TACGTAGGGTGCAAGCGTTGTCCGGAATTACTGGGCGTAAAGAGTTCGTAGGCGGTTTGTCGCGTCGTTTGTGAAAACCAGCAGCTCAACTGCTGGCTTG,d__Bacteria; p__Actinobacteriota; c__Actinobac...,0.9999996917913678
TACGTAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGCAGGCGGTTTTGTAAGACAGAGGTGAAATCCCCGGGCTCAACCTGGGAACTG,d__Bacteria; p__Proteobacteria; c__Gammaproteo...,0.9848561116318736
TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGGATTTTAAGTCAGCGGTGAAATTTTCAGGCTCAACCTGAACACTG,d__Bacteria; p__Bacteroidota; c__Bacteroidia; ...,0.9999705301400762


In [108]:
taxa.loc[taxa.Taxon.str.contains('Wohlf', case=False)].Taxon.values

array(['d__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Wohlfahrtiimonas; s__Wohlfahrtiimonas_chitiniclastica',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Ignatzschineria; s__Ignatzschineria_sp.',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Ignatzschineria',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Ignatzschineria; s__swine_effluent',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Ignatzschineria',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Wohlfahrtiimonas; s__Wohlf

In [109]:
#anything with Wohl in name
wohl = taxa.loc[taxa.Taxon.str.contains('Wohlf', case=False)].copy()

In [110]:
# remove anything that has a g__Ignatzschineria classification
wohl = wohl.loc[~wohl.Taxon.str.contains('g__Ignat')]

In [111]:
wohl.Taxon.values

array(['d__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Wohlfahrtiimonas; s__Wohlfahrtiimonas_chitiniclastica',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Wohlfahrtiimonas; s__Wohlfahrtiimonas_chitiniclastica',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Wohlfahrtiimonas; s__Wohlfahrtiimonas_chitiniclastica',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Wohlfahrtiimonas; s__Wohlfahrtiimonas_chitiniclastica',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Wohlfahrtiimonas',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Ca

In [112]:
# all wohl in dataset
swine_wohl = list(set(data.columns.tolist()).intersection(set(wohl.index)))

In [113]:
# get new wohl asvs - remove those found in pmi decomposers
other_wohl = list(set(swine_wohl).difference(set(pmi_decom.index)))

In [114]:
pmi_decom.loc[pmi_decom.index.isin(other_wohl)]

Unnamed: 0_level_0,#OTU ID,taxonomy,150_asv,kingdom,phylum,class,order,family,genus,species
100_bp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


In [115]:
data[other_wohl].sum()

TACGGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGCGGTTACTTAAGTTAGATGTGAAAGCCCCGGGCTTAACCTGGGAATT       9.0
TCCGGGGGGTGCCCGCGTTCCTCGGCCTTCCTGGGCGTCCCGGGCGCGTAGGTGGTTACTTAAGTCAGATGTGAAAGCCCCGGGCTCAACCTGGGAATTG       2.0
TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAGGGCGCGTAGGCGGTTACTTAAGTTAGATGTGAAAGCCCCGGGCTTAACCTGGGAATTGC       2.0
TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTTACTTAAGTCAGATGTGAAAGCCCCGGGCTCAACCTGGGAATTG    4184.0
TCCGGGGGGTGCCCGCGTTCCTCGGCCTTCCTGGGCGTCCCGGGCGCGTAGGCGGTTACTTAAGTTAGATGTGAAAGCCCCGGGCTTAACCTGGGAATTG       9.0
dtype: float64

In [116]:
## average of each asv across all the active decomp samples
ra_wohl = ra[other_wohl].groupby(samples.whatisit).mean()

In [117]:
ra_wohl.sum(axis=1).to_frame(name='Wohlfahrtiimonadaceae')

Unnamed: 0_level_0,Wohlfahrtiimonadaceae
whatisit,Unnamed: 1_level_1
1_kg,0.000478
20_kg,0.000895
40_kg,0.001002
50_kg,0.000841
mass_control,3.8e-05
season_control,0.0
summer,0.000226
winter,0.0
