# Decomp community

In [1]:
# qiime imports
import qiime2
from qiime2 import Artifact, Metadata

# General Tool Imports
import numpy as np
import pandas as pd
import collections
from pickle import load, dump
from IPython.display import display
import warnings

# Plotting Imports
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn as sns
# from statannotations.Annotator import Annotator

import itertools
import scipy
import skbio

from skbio.stats import subsample_counts
from skbio import OrdinationResults
from statsmodels.sandbox.stats.multicomp import multipletests

%matplotlib inline

In [2]:
samples = Metadata.load('10141_20230201-070339.txt').to_dataframe()

In [3]:
reads = pd.read_csv('reads_per_sample.csv', index_col=0)

In [4]:
# get samples with >1000 reads
samples = samples[samples.index.isin(reads[reads['0']>1000].index)]

In [5]:
# change soil sample type to control soil vs soil w_corpse
samples.loc[samples['sample_type']=='soil','sample_type']="soil_"+samples['soil_ctrl']

In [6]:
samples['sample_type'].value_counts(dropna=False).to_frame()

Unnamed: 0,sample_type
skin,232
soil_w_corpse,207
soil_control,115
abdominal swab,112
cecum,78
feces,21
control blank,15
control,3


In [7]:
data = Artifact.load('table_1265.qza').view(pd.DataFrame)

In [8]:
# only get data for samples > 1000 reads
data = data[data.index.isin(samples.index)]

In [9]:
# make ra table
ra = data.apply(lambda x: x / x.sum(), axis=1)

### Import Decomposers

In [10]:
# set index to 100 bp asvs
pmi_decom = pd.read_csv('../ASVs_repseq.txt',sep ='\t',index_col=3)

In [11]:
# setting to 100 bp created duplicate indexes
pmi_decom[pmi_decom.index.duplicated()]

Unnamed: 0_level_0,#OTU ID,taxonomy,150_asv,kingdom,phylum,class,order,family,genus,species
100_bp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,8e87132c368c4f56dd114b1cab5f59a6,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,92460066faddd83314cbc2348bf4fd29,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,48113de4cb4849e5d543cbb0579c847e,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,3b97413ffc0ea1b441f7bd9daae2e3ee,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Cardiobacteriales,Wohlfahrtiimonadaceae,Ignatzschineria,
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,07db6c743fdc4cdcc722d93f60cc7d7d,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Cardiobacteriales,Wohlfahrtiimonadaceae,Ignatzschineria,Ignatzschineria larvae


In [12]:
# remove them
pmi_decom = pmi_decom[~pmi_decom.index.duplicated(keep='first')]

In [13]:
pmi_decom[pmi_decom.index.duplicated()]

Unnamed: 0_level_0,#OTU ID,taxonomy,150_asv,kingdom,phylum,class,order,family,genus,species
100_bp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


In [14]:
# only 5 asvs with duplicates
pmi_decom.shape

(30, 10)

In [15]:
# pmi decomposers found in mouse data = 8 asvs
mouse_decomp = list(set(pmi_decom.index.tolist()).intersection(set(data.columns.tolist())))

In [16]:
# get average RA by sample type
ra_mouse = ra[mouse_decomp].groupby(samples.sample_type).mean()

In [17]:
# sum RA across the decomposer genus
ra_mouse.T.groupby(pmi_decom['genus']).sum()

sample_type,abdominal swab,cecum,control,control blank,feces,skin,soil_control,soil_w_corpse
genus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acinetobacter,0.064253,0.004746298,0.608383,0.610728,0.001974,0.26707,0.127707,0.076809
Ignatzschineria,0.0,0.0,0.0,0.0,0.0,2e-06,0.0,0.0
Peptoniphilus,1e-06,5.110115e-07,0.000778,0.0,0.0,2.7e-05,3.2e-05,9e-06
Vagococcus,3.5e-05,2.59312e-07,0.001178,0.00219,0.0,0.000298,0.000154,5.1e-05


### how many samples are these decomposer asvs found in?

In [18]:
# of samples each asv is found in
pd.concat([pmi_decom.loc[pmi_decom.index.isin(mouse_decomp)][['genus','species']],
           data[mouse_decomp].groupby(samples['sample_type']).apply(lambda x: (x>0).sum()).T],axis=1)

Unnamed: 0,genus,species,abdominal swab,cecum,control,control blank,feces,skin,soil_control,soil_w_corpse
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCCGAGCTTAACTTGGGAATTG,Acinetobacter,,15,20,0,1,1,31,8,17
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,Acinetobacter,,112,78,3,15,21,231,115,206
TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCGCGTAGGCGGCTAATTAAGTCAAATGTGAAATCCCCGAGCTTAACTTGGGAATTG,Acinetobacter,,4,2,0,1,0,3,2,6
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,Vagococcus,,15,1,3,9,0,65,43,43
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,Ignatzschineria,,0,0,0,0,0,1,0,0
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTACTTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,Ignatzschineria,,0,0,0,0,0,2,0,0
TACGTAGGGGGCTAGCGTTGTCCGGAATCACTGGGCGTAAAGGGTTCGCAGGCGGAAATGCAAGTCAGATGTAAAAGGCAGTAGCTTAACTACTGTAAGC,Peptoniphilus,,2,1,1,0,0,11,6,6
TACGTAGGGGGCTAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTTCGCAGGCGGAAATACAAGTCAGGTGTAAAAGGCGGAGGCTTAACCTCCGTAAGC,Peptoniphilus,Peptoniphilus stercorisuis,0,0,0,0,0,1,0,0


In [19]:
## percent of samples each ASV is found in
pd.concat([pmi_decom.loc[pmi_decom.index.isin(mouse_decomp)][['genus','species']], (data[mouse_decomp].groupby(samples['sample_type']).apply(
    lambda x: (x>0).sum()).T)/(data[mouse_decomp].groupby(samples['sample_type']).count().T)*100], axis=1)

Unnamed: 0,genus,species,abdominal swab,cecum,control,control blank,feces,skin,soil_control,soil_w_corpse
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCCGAGCTTAACTTGGGAATTG,Acinetobacter,,13.392857,25.641026,0.0,6.666667,4.761905,13.362069,6.956522,8.21256
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,Acinetobacter,,100.0,100.0,100.0,100.0,100.0,99.568966,100.0,99.516908
TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCGCGTAGGCGGCTAATTAAGTCAAATGTGAAATCCCCGAGCTTAACTTGGGAATTG,Acinetobacter,,3.571429,2.564103,0.0,6.666667,0.0,1.293103,1.73913,2.898551
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,Vagococcus,,13.392857,1.282051,100.0,60.0,0.0,28.017241,37.391304,20.772947
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,Ignatzschineria,,0.0,0.0,0.0,0.0,0.0,0.431034,0.0,0.0
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTACTTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,Ignatzschineria,,0.0,0.0,0.0,0.0,0.0,0.862069,0.0,0.0
TACGTAGGGGGCTAGCGTTGTCCGGAATCACTGGGCGTAAAGGGTTCGCAGGCGGAAATGCAAGTCAGATGTAAAAGGCAGTAGCTTAACTACTGTAAGC,Peptoniphilus,,1.785714,1.282051,33.333333,0.0,0.0,4.741379,5.217391,2.898551
TACGTAGGGGGCTAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTTCGCAGGCGGAAATACAAGTCAGGTGTAAAAGGCGGAGGCTTAACCTCCGTAAGC,Peptoniphilus,Peptoniphilus stercorisuis,0.0,0.0,0.0,0.0,0.0,0.431034,0.0,0.0


### look for other Wohlfahrtiimonas ASVs

In [20]:
taxa = Artifact.load('taxonomy_10141.qza').view(pd.DataFrame)

In [21]:
taxa.loc[taxa.Taxon.str.contains('wohlf', case=False)].Taxon.values

array(['d__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Ignatzschineria',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Ignatzschineria; s__Ignatzschineria_sp.',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae',
       'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Cardiobacteriales; f__Wohlfahrtiimonadaceae; g__Ignatzschineria'],
      dtype=object)

In [24]:
wohl = taxa.loc[taxa.Taxon.str.contains('Wohl', case=False)]

In [25]:
# remove anything that has a g__Ignatzschineria classification
wohl = wohl.loc[~wohl.Taxon.str.contains('g__Ignat')]

In [27]:
fly_wohl = list(set(data.columns.tolist()).intersection(set(wohl.index)))

In [31]:
data[fly_wohl].sum()

Series([], dtype: float64)

There are no non-Ignatzschineria Wohlfahrtiimonadaceae asv in the mouse dataset