# Decomp community

In [1]:
# qiime imports
import qiime2
from qiime2 import Artifact, Metadata

# General Tool Imports
import numpy as np
import pandas as pd
import collections
from pickle import load, dump
from IPython.display import display
import warnings

# Plotting Imports
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn as sns
from statannotations.Annotator import Annotator

import itertools
import scipy
import skbio

from skbio.stats import subsample_counts
from skbio import OrdinationResults
from statsmodels.sandbox.stats.multicomp import multipletests

%matplotlib inline

In [2]:
samples = Metadata.load('sample_information_from_prep_1107.tsv').to_dataframe()

In [3]:
samples.columns

Index(['add_0c', 'add_4c', 'add_5c', 'add_6c', 'empo_1', 'empo_2', 'empo_3',
       'latitude', 'taxon_id', 'elevation', 'env_biome', 'longitude',
       'sample_no', 'soil_type', 'experiment', 'host_taxid', 'temp_c_max',
       'temp_c_min', 'description', 'dewpt_c_max', 'dewpt_c_min',
       'env_feature', 'env_package', 'rain_mm_max', 'rain_mm_min',
       'rain_mm_sum', 'sample_type', 'env_material', 'qiita_empo_1',
       'qiita_empo_2', 'qiita_empo_3', 'sample_group', 'soil_control',
       'dna_extracted', 'soil_resample', 'host_body_site', 'rh_percent_max',
       'rh_percent_min', 'side_of_sample', 'skin_swab_type', 'temp_c_average',
       'collection_date', 'dewpt_c_average', 'host_subject_id',
       'rain_mm_average', 'sample_location', 'scientific_name',
       'host_common_name', 'pre_post_rupture', 'sample_type_site',
       'soil_sample_site', 'sample_packing_id', 'soil_type_control',
       'individual_humanid', 'rh_percent_average', 'sample_id_value_old',
       'col

In [4]:
samples.groupby('sample_group',dropna=False).count()['qiita_study_id'].to_frame()

Unnamed: 0_level_0,qiita_study_id
sample_group,Unnamed: 1_level_1
skin.head,76
skin.limb,222
skin.torso,356
soil.control,57
soil.corpse.head,42
soil.corpse.limb,40
soil.corpse.torso,204


In [5]:
data = Artifact.load('table_1107.qza').view(pd.DataFrame)

In [6]:
# make ra table
ra = data.apply(lambda x: x / x.sum(), axis=1)

### Import Decomposers

In [9]:
pmi_decom = pd.read_csv('../ASVs_repseq.txt',sep ='\t',index_col=4)

In [10]:
pmi_decom.head()

Unnamed: 0_level_0,#OTU ID,taxonomy,SUM,asv,kingdom,phylum,class,order,family,genus,species
100_bp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCCGAGCTTAACTTGGGAATTG,9acc238746a1f2aa7745a0b5720c4eac,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,694120,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTG...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,6e8986f8088b452f964e7968bb8bca87,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,372201,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTG...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,
TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGTACGTAGGCGGCTAATTAAGTCGGATGTGAAATCCCCGAGCTCAACTTGGGAATTG,9e33b8985d44f2ed1b88ac7079f70793,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,143184,TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGTA...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTACGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,d5902b1353d3aa0effaaa81cba1d1516,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,108334,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTA...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,
TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCGCGTAGGCGGCTAATTAAGTCAAATGTGAAATCCCCGAGCTTAACTTGGGAATTG,1ba09da532a0f9985b3caf72a47b42e3,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,101710,TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Pseudomonadales,Moraxellaceae,Acinetobacter,


In [11]:
pmi_decom.shape

(35, 11)

In [12]:
# pmi decomposers found in human data
human_decomp = list(set(pmi_decom.index.tolist()).intersection(set(data.columns.tolist())))

In [13]:
len(human_decomp)

27

In [14]:
pmi_decom[pmi_decom.index.duplicated()]

Unnamed: 0_level_0,#OTU ID,taxonomy,SUM,asv,kingdom,phylum,class,order,family,genus,species
100_bp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,8e87132c368c4f56dd114b1cab5f59a6,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,90942,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,92460066faddd83314cbc2348bf4fd29,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,18517,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,48113de4cb4849e5d543cbb0579c847e,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,4319,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,3b97413ffc0ea1b441f7bd9daae2e3ee,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,346906,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Cardiobacteriales,Wohlfahrtiimonadaceae,Ignatzschineria,
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,07db6c743fdc4cdcc722d93f60cc7d7d,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,1191958,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Cardiobacteriales,Wohlfahrtiimonadaceae,Ignatzschineria,Ignatzschineria larvae


In [15]:
# setting to 100 bp created duplicate indexes
# remove them
pmi_decom = pmi_decom[~pmi_decom.index.duplicated(keep='first')]


In [16]:
pmi_decom[pmi_decom.index.duplicated()]

Unnamed: 0_level_0,#OTU ID,taxonomy,SUM,asv,kingdom,phylum,class,order,family,genus,species
100_bp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


In [17]:
pmi_decom.shape

(30, 11)

In [16]:
# group samples
ra_human = ra[human_decomp].groupby(samples.sample_group).mean()

In [17]:
ra_human

Unnamed: 0_level_0,TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGCGGTTACTTAAGTTAGATGTGAAAGCCCCGGGCTTAACCTGGGAATTG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACTGATAAGTCAGCTGTGAAAGTTTACGGCTCAACCGTGAAATTG,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCCGAGCTTAACTTGGGAATTG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACCTGAAAGTCAGCTGTGAAAGTTAGTAGCTCAACTACTAAATTG,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTACTTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCACGCAGGCGGCCTTCTAAGTCTGATGTGAAATCCCACGGCTTAACCGTGGAAGGT,TACGGAGGATCCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGAAAGGAAGTCAGTTGTGAAATTTTATGGCTCAACCATAAACTTG,TACGTAGGGGGCTAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTTCGCAGGCGGAAATATAAGTCAGGTGTAAAAGGCGGAGGCTCAACCTCCGTAAGC,...,TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGTACGTAGGCGGCTAATTAAGTCGGATGTGAAATCCCCGAGCTCAACTTGGGAATTG,TACGTAGGGGGCTAGCGTTGTCCGGAATCACTGGGCGTAAAGGGTTCGCAGGCGGAAATGCAAGTCAGATGTAAAAGGCAGTAGCTTAACTACTGTAAGC,TACGTAGGGGGCTAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTTCGCAGGCGGAAATACAAGTCAGGTGTAAAAGGCGGAGGCTTAACCTCCGTAAGC,TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCGCGTAGGCGGCTAATTAAGTCAAATGTGAAATCCCCGAGCTTAACTTGGGAATTG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACTGGAAAGTCAGCTGTGAAAGTTTGTGGCTCAACCACGAAATTG,TACGGGGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGCGGTTACTTAAGTTAGATGTGAAAGCCCCGGGCTCAACCTGGGAATTG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCCTGAAAGTCAGCTGTGAAAGTTAATAGCTCAACTATTAAATTG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACTGGTAAGTCAGCTGTGAAAGTTTACGGCTCAACCGTGAAATTG,TACGGAGGATCCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACTGGTAAGTCAGCTGTGAAAGTTTGCGGCTTAACCGTAAAATTG,TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTACGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG
sample_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
skin.head,0.01821,0.0001758945,0.005582,1.4e-05,0.013917,0.239145,0.002706,0.020534,0.000373,4e-06,...,0.0,4.58406e-06,0.001125,0.00039,0.001308,0.000519,0.003017,0.002308,0.000532,0.030697
skin.limb,0.047191,0.0003798398,0.014382,6.1e-05,0.020526,0.129083,0.003973,0.023472,0.001167,2.7e-05,...,8.719307e-06,5.342011e-06,0.002735,0.003151,0.001796,0.004636,0.002752,0.006468,0.000625,0.019293
skin.torso,0.04481,0.0008105521,0.013041,1.3e-05,0.023367,0.115974,0.005622,0.023959,0.000365,3.2e-05,...,1.627455e-05,1.685784e-05,0.000837,0.005829,0.000775,0.008149,0.001349,0.00582,0.000378,0.018163
soil.control,1.4e-05,8.65636e-07,0.001011,0.0,2.3e-05,0.015994,1.4e-05,4.6e-05,1.3e-05,2e-06,...,8.46966e-07,0.0,0.000108,0.000976,1.9e-05,5e-06,3e-05,3.6e-05,8e-06,0.000151
soil.corpse.head,0.001468,3.921604e-05,0.025597,5.1e-05,0.000312,0.018328,0.000111,0.00208,0.002167,4e-06,...,0.0,0.0,0.000749,0.013613,0.000306,0.0001,0.001029,0.000206,0.001581,0.036921
soil.corpse.limb,0.001905,1.023598e-05,0.036762,0.0,0.000762,0.018984,7.3e-05,0.00042,0.000226,0.0,...,0.0,0.0,0.000265,0.014174,0.000417,7.3e-05,0.000304,0.000645,6.7e-05,0.016757
soil.corpse.torso,0.002272,0.0001407231,0.028281,9.3e-05,0.003226,0.027567,0.000226,0.001876,0.00124,2e-06,...,3.772105e-05,2.180245e-07,0.001029,0.007741,0.000978,0.000502,0.002315,0.000971,0.00059,0.017445


In [18]:
ra_human.T.groupby(pmi_decom['genus']).sum()

sample_group,skin.head,skin.limb,skin.torso,soil.control,soil.corpse.head,soil.corpse.limb,soil.corpse.torso
genus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acinetobacter,0.044058,0.052845,0.056054,0.002423,0.099872,0.078683,0.085313
Bacteroides,0.009752,0.016246,0.011988,0.000146,0.008682,0.003098,0.014213
Ignatzschineria,0.253063,0.149609,0.139341,0.016017,0.01864,0.019746,0.030793
Oblitimonas,0.066605,0.056453,0.068028,0.000253,0.024424,0.004198,0.023448
Peptoniphilus,0.001184,0.002791,0.000902,0.000111,0.000754,0.000267,0.001032
Savagea,0.086761,0.058346,0.055011,7.4e-05,0.002612,0.003666,0.004385
Vagococcus,0.013266,0.023549,0.023709,4.3e-05,0.001662,0.000706,0.003168
Wohlfahrtiimonas,0.01873,0.051826,0.052959,2e-05,0.001568,0.001977,0.002774
