# Decomp community

In [1]:
# qiime imports
import qiime2
from qiime2 import Artifact, Metadata

# General Tool Imports
import numpy as np
import pandas as pd
import collections
from pickle import load, dump
from IPython.display import display
import warnings

# Plotting Imports
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn as sns
from statannotations.Annotator import Annotator

import itertools
import scipy
import skbio

from skbio.stats import subsample_counts
from skbio import OrdinationResults
from statsmodels.sandbox.stats.multicomp import multipletests

%matplotlib inline

In [2]:
samples = Metadata.load('sample_information_from_prep_333.tsv').to_dataframe()

In [3]:
samples.columns

Index(['dd_0', 'dd_5', 'dd_6', 'add_0c', 'add_4c', 'add_5c', 'add_6c',
       'empo_1', 'empo_2', 'empo_3', 'ph_ave', 'latitude', 'ph_read1',
       'taxon_id', 'elevation', 'env_biome', 'longitude', 'sample_no',
       'soil_type', 'experiment', 'gdna_plate', 'host_taxid', 'temp_c_max',
       'temp_c_min', 'description', 'dewpt_c_max', 'dewpt_c_min',
       'env_feature', 'env_package', 'rain_mm_max', 'rain_mm_min',
       'rain_mm_sum', 'sample_type', 'env_material', 'qiita_empo_1',
       'qiita_empo_2', 'qiita_empo_3', 'sample_group', 'soil_control',
       'dna_extracted', 'pool_name_16s', 'pool_name_18s', 'sequencecount',
       'soil_resample', 'host_body_site', 'original_order', 'rh_percent_max',
       'rh_percent_min', 'side_of_sample', 'temp_c_average', 'collection_date',
       'dewpt_c_average', 'host_subject_id', 'rain_mm_average',
       'sample_location', 'scientific_name', 'host_common_name',
       'ph_sample_mass_g', 'pre_post_rupture', 'soil_sample_site',
       'h

In [30]:
samples.groupby('sample_group',dropna=False).count()['qiita_study_id'].to_frame()

Unnamed: 0_level_0,qiita_study_id
sample_group,Unnamed: 1_level_1
skin.limb,53
skin.torso,109
soil.control,129
soil.corpse.head,86
soil.corpse.limb,86
soil.corpse.torso,172


In [6]:
data = Artifact.load('table_333.qza').view(pd.DataFrame)

In [7]:
# make ra table
ra = data.apply(lambda x: x / x.sum(), axis=1)

### Import Decomposers

In [14]:
pmi_decom = pd.read_csv('../ASVs_repseq.txt',sep ='\t',index_col=4)

In [15]:
pmi_decom.head()

Unnamed: 0_level_0,#OTU ID,taxonomy,SUM,asv,kingdom,phylum,class,order,family,genus,species
100_bp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCCGAGCTTAACTTGGGAATTG,9acc238746a1f2aa7745a0b5720c4eac,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,694120,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTG...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,6e8986f8088b452f964e7968bb8bca87,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,372201,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTG...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,
TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGTACGTAGGCGGCTAATTAAGTCGGATGTGAAATCCCCGAGCTCAACTTGGGAATTG,9e33b8985d44f2ed1b88ac7079f70793,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,143184,TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGTA...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTACGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,d5902b1353d3aa0effaaa81cba1d1516,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,108334,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTA...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,
TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCGCGTAGGCGGCTAATTAAGTCAAATGTGAAATCCCCGAGCTTAACTTGGGAATTG,1ba09da532a0f9985b3caf72a47b42e3,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,101710,TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,


In [16]:
pmi_decom.shape

(35, 11)

In [17]:
# pmi decomposers found in mouse data - 9 asvs
winter_decomp = list(set(pmi_decom.index.tolist()).intersection(set(data.columns.tolist())))

In [18]:
len(winter_decomp)

27

In [19]:
pmi_decom[pmi_decom.index.duplicated()]

Unnamed: 0_level_0,#OTU ID,taxonomy,SUM,asv,kingdom,phylum,class,order,family,genus,species
100_bp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,8e87132c368c4f56dd114b1cab5f59a6,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,90942,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,92460066faddd83314cbc2348bf4fd29,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,18517,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,48113de4cb4849e5d543cbb0579c847e,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,4319,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,3b97413ffc0ea1b441f7bd9daae2e3ee,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,346906,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Cardiobacteriales,Wohlfahrtiimonadaceae,Ignatzschineria,
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,07db6c743fdc4cdcc722d93f60cc7d7d,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,1191958,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Cardiobacteriales,Wohlfahrtiimonadaceae,Ignatzschineria,Ignatzschineria larvae


In [20]:
# setting to 100 bp created duplicate indexes
# remove them
pmi_decom = pmi_decom[~pmi_decom.index.duplicated(keep='first')]


In [21]:
pmi_decom[pmi_decom.index.duplicated()]

Unnamed: 0_level_0,#OTU ID,taxonomy,SUM,asv,kingdom,phylum,class,order,family,genus,species
100_bp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


In [22]:
pmi_decom.shape

(30, 11)

In [23]:
# group samples
ra_winter = ra[winter_decomp].groupby(samples.sample_group).mean()

In [24]:
ra_winter

Unnamed: 0_level_0,TACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGAAGTGAAAGCCCCGGGCTCAACCTGGGAATTG,TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGCGGTTACTTAAGTTAGATGTGAAAGCCCCGGGCTCAACCTGGGAATTG,TACGTAGGGGGCTAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTTCGCAGGCGGAAATATAAGTCAGGTGTAAAAGGCGGAGGCTCAACCTCCGTAAGC,TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCGCGTAGGCGGCTAATTAAGTCAAATGTGAAATCCCCGAGCTTAACTTGGGAATTG,TACGGAGGATCCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGAAAGGAAGTCAGTTGTGAAATTTTATGGCTCAACCATAAACTTG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCCTGAAAGTCAGCTGTGAAAGTTAATAGCTCAACTATTAAATTG,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,TACGTAGGGGGCTAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTTCGCAGGCGGAAATACAAGTCAGGTGTAAAAGGCGGAGGCTTAACCTCCGTAAGC,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACTGGTAAGTCAGCTGTGAAAGTTTACGGCTCAACCGTGAAATTG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACCTGAAAGTCAGCTGTGAAAGTTAGTAGCTCAACTACTAAATTG,...,TACGGAGGATCCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACTGGTAAGTCAGCTGTGAAAGTTTGCGGCTTAACCGTAAAATTG,TACGTAGGGGGCGAGCGTTGTCCGGAATTATTGGGCGTAAAGGGTTCGCAGGCGGAAATATAAGTCAGGTGTAAAAGGCGGAGGCTCAACCTCCGTAAGC,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCCGAGCTTAACTTGGGAATTG,TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGTACGTAGGCGGCTAATTAAGTCGGATGTGAAATCCCCGAGCTCAACTTGGGAATTG,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCCTGAAAGTCAGCTGTGAAAGTTAGTAGCTCAACTACTAAATTG,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCACGCAGGCGGCCTTTTAAGTCTGATGTGAAATCCCACGGCTTAACCGTGGAAGGT,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACTGATAAGTCAGCTGTGAAAGTTTACGGCTCAACCGTGAAATTG
sample_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
skin.limb,0.004826,0.000651,0.0046,0.001861,6.4e-05,5e-06,0.006698,0.013734,2e-06,2e-06,...,3.3e-05,0.005657,0.006769,0.0,0.000514,0.010097,1.9e-05,0.000325,0.014741,7.4e-05
skin.torso,0.003689,0.000432,0.007467,0.001109,0.000476,6e-05,0.011759,0.025102,4e-06,7e-06,...,9.3e-05,0.006268,0.006898,2.8e-05,0.001619,0.010444,4.2e-05,0.00356,0.03168,1.7e-05
soil.control,3e-06,0.0,1.1e-05,0.004264,3.2e-05,6e-06,4e-06,4.4e-05,8e-06,2e-06,...,2e-06,1.3e-05,0.004434,2.2e-05,2.6e-05,1.6e-05,1e-06,0.00401,4e-06,5e-06
soil.corpse.head,0.003977,5.3e-05,0.010034,0.001699,0.001377,0.000226,0.014411,0.03581,0.000281,7e-06,...,0.000203,0.005922,0.01816,0.0,0.000536,0.034971,0.000119,0.000664,0.002021,0.000716
soil.corpse.limb,0.004411,4e-05,0.000629,0.002865,0.000217,7e-06,0.00022,0.006954,5.4e-05,6e-06,...,5.4e-05,0.004425,0.022524,0.000981,6.1e-05,0.001639,8e-06,0.01592,0.000198,3.9e-05
soil.corpse.torso,0.002215,0.000163,0.003083,0.000948,0.000551,0.00042,0.001328,0.019096,0.000496,0.000265,...,0.000122,0.005873,0.040072,0.004811,0.000723,0.004558,0.000312,0.001005,0.010304,0.001366


In [25]:
ra_winter.T.groupby(pmi_decom['genus']).sum()

sample_group,skin.limb,skin.torso,soil.control,soil.corpse.head,soil.corpse.limb,soil.corpse.torso
genus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Acinetobacter,0.008994,0.011698,0.012758,0.021957,0.04236,0.046843
Bacteroides,0.000243,0.001362,0.000151,0.007603,0.002057,0.014992
Ignatzschineria,0.012303,0.013598,2.5e-05,0.05148,0.002931,0.007684
Oblitimonas,0.004826,0.003689,3e-06,0.003977,0.004411,0.002215
Peptoniphilus,0.024636,0.039385,6.9e-05,0.051768,0.012008,0.028053
Savagea,0.017142,0.037463,5e-06,0.004051,0.001029,0.011555
Vagococcus,0.007212,0.013378,3e-05,0.014947,0.00028,0.002051
Wohlfahrtiimonas,0.001713,0.002027,1e-06,0.002118,0.00011,0.000788
