# Decomp community

In [1]:
# qiime imports
import qiime2
from qiime2 import Artifact, Metadata

# General Tool Imports
import numpy as np
import pandas as pd
import collections
from pickle import load, dump
from IPython.display import display
import warnings

# Plotting Imports
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn as sns
from statannotations.Annotator import Annotator

import itertools
import scipy
import skbio

from skbio.stats import subsample_counts
from skbio import OrdinationResults
from statsmodels.sandbox.stats.multicomp import multipletests

%matplotlib inline

In [2]:
samples = Metadata.load('10321_20180418-105405.txt').to_dataframe()

In [3]:
samples.groupby(['family'], dropna=False).count()['fly'].to_frame()

Unnamed: 0_level_0,fly
family,Unnamed: 1_level_1
Anthomyiidae,3
Calliphoridae,74
Muscidae,1


In [4]:
reads = pd.read_csv('reads_per_sample_10321.csv', index_col=0)

In [5]:
# get samples with >1000 reads
samples = samples[samples.index.isin(reads[reads['0']>1000].index)].copy()

In [6]:
samples.groupby(['family'], dropna=False).count()['fly'].to_frame()

Unnamed: 0_level_0,fly
family,Unnamed: 1_level_1
Anthomyiidae,3
Calliphoridae,71
Muscidae,1


In [7]:
data = Artifact.load('table_1170.qza').view(pd.DataFrame)

In [8]:
# only get data for samples > 1000 reads
data = data[data.index.isin(samples.index)]

In [9]:
# make ra table
ra = data.apply(lambda x: x / x.sum(), axis=1)

### Add seq data into pmi asv table

In [10]:
# pmi_decom = pd.read_excel('../more_ASVs.xlsx', index_col=0)

In [11]:
# # add asvs to taxa df
# pmi_decom['asv'] = Artifact.load('../PMI-16S-nochlomito-rep-seqs.qza').view(qiime2.Metadata).to_dataframe()

In [12]:
# # trim it to 100 bp
# pmi_decom['100_bp'] = pmi_decom['asv'].str[:100]

In [13]:
# pmi_decom[
#     ["kingdom", "phylum", "class", "order", "family", "genus", "species"]
# ] = pmi_decom.taxonomy.str.split(";", expand=True)

# for col in ["kingdom", "phylum", "class", "order", "family", "genus", "species"]:
#     pmi_decom[col] = pmi_decom[col].str.split('__').str[1]


In [14]:
# pmi_decom.to_csv('../ASVs_repseq.txt',sep ='\t')

### Import Decomposers

In [15]:
pmi_decom = pd.read_csv('../ASVs_repseq.txt',sep ='\t',index_col=4)

In [16]:
pmi_decom.groupby('genus').count()['#OTU ID'].to_frame()

Unnamed: 0_level_0,#OTU ID
genus,Unnamed: 1_level_1
Acinetobacter,5
Bacteroides,9
Ignatzschineria,4
Oblitimonas,1
Peptoniphilus,5
Savagea,3
Vagococcus,5
Wohlfahrtiimonas,3


In [17]:
# setting to 100 bp created duplicate indexes
pmi_decom[pmi_decom.index.duplicated()]

Unnamed: 0_level_0,#OTU ID,taxonomy,SUM,asv,kingdom,phylum,class,order,family,genus,species
100_bp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,8e87132c368c4f56dd114b1cab5f59a6,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,90942,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,92460066faddd83314cbc2348bf4fd29,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,18517,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,48113de4cb4849e5d543cbb0579c847e,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,4319,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAG...,Bacteria,Firmicutes,Bacilli,Lactobacillales,Enterococcaceae,Vagococcus,
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,3b97413ffc0ea1b441f7bd9daae2e3ee,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,346906,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Cardiobacteriales,Wohlfahrtiimonadaceae,Ignatzschineria,
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCGCGTAGGTGGTATCTTAAGTTGGGTGTGAAATCCCCGGGCTCAACCTGGGAATTG,07db6c743fdc4cdcc722d93f60cc7d7d,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,1191958,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGGGCG...,Bacteria,Proteobacteria,Gammaproteobacteria,Cardiobacteriales,Wohlfahrtiimonadaceae,Ignatzschineria,Ignatzschineria larvae


In [18]:
# remove them
pmi_decom = pmi_decom[~pmi_decom.index.duplicated(keep='first')]

In [19]:
pmi_decom[pmi_decom.index.duplicated()]

Unnamed: 0_level_0,#OTU ID,taxonomy,SUM,asv,kingdom,phylum,class,order,family,genus,species
100_bp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


In [20]:
# only 5 asvs with duplicates
pmi_decom.shape

(30, 11)

In [21]:
# pmi decomposers found in fly data - 17 asvs
fly_decomp = list(set(pmi_decom.index.tolist()).intersection(set(data.columns.tolist())))

In [22]:
# group samples
ra_fly = ra[fly_decomp].groupby(samples.family).mean()

In [23]:
ra_fly.T.groupby(pmi_decom['genus']).sum()

family,Anthomyiidae,Calliphoridae,Muscidae
genus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Acinetobacter,0.0,0.012526,0.001268
Bacteroides,0.0,0.000233,0.0
Ignatzschineria,0.001512,0.171637,0.655151
Oblitimonas,0.0,0.000919,0.0
Peptoniphilus,0.0,0.002237,0.0
Savagea,0.0,0.007692,0.0
Vagococcus,0.0,0.055812,0.0
Wohlfahrtiimonas,0.0,0.008347,0.010037


### how many samples are these decomposer asvs found in?

In [26]:
# of samples each asv is found in
pd.concat([pmi_decom.loc[pmi_decom.index.isin(fly_decomp)][['genus','species']],
           data[fly_decomp].groupby(samples['family']).apply(lambda x: (x>0).sum()).T],axis=1)

Unnamed: 0,genus,species,Anthomyiidae,Calliphoridae,Muscidae
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCCGAGCTTAACTTGGGAATTG,Acinetobacter,,0,21,0
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,Acinetobacter,,0,24,0
TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGTACGTAGGCGGCTAATTAAGTCGGATGTGAAATCCCCGAGCTCAACTTGGGAATTG,Acinetobacter,,0,4,1
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTACGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,Acinetobacter,,0,4,0
TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCGCGTAGGCGGCTAATTAAGTCAAATGTGAAATCCCCGAGCTTAACTTGGGAATTG,Acinetobacter,,0,3,0
TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCACGCAGGCGGCCTTCTAAGTCTGATGTGAAATCCCACGGCTTAACCGTGGAAGGT,Savagea,uncultured bacterium,0,29,0
TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCACGCAGGCGGCCTTTTAAGTCTGATGTGAAATCCCACGGCTTAACCGTGGAAGGT,Savagea,uncultured bacterium,0,2,0
TACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGAAGTGAAAGCCCCGGGCTCAACCTGGGAATTG,Oblitimonas,Oblitimonas alkaliphila,0,8,0
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,Vagococcus,,0,50,0
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,Vagococcus,,0,9,0


In [29]:
## percent of samples each ASV is found in
pd.concat([pmi_decom.loc[pmi_decom.index.isin(fly_decomp)][['genus','species']], (data[fly_decomp].groupby(samples['family']).apply(
    lambda x: (x>0).sum()).T)/(data[fly_decomp].groupby(samples['family']).count().T)*100], axis=1)

Unnamed: 0,genus,species,Anthomyiidae,Calliphoridae,Muscidae
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCCGAGCTTAACTTGGGAATTG,Acinetobacter,,0.0,29.577465,0.0
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTGCGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,Acinetobacter,,0.0,33.802817,0.0
TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGTACGTAGGCGGCTAATTAAGTCGGATGTGAAATCCCCGAGCTCAACTTGGGAATTG,Acinetobacter,,0.0,5.633803,100.0
TACAGAGGGTGCGAGCGTTAATCGGATTTACTGGGCGTAAAGCGTACGTAGGCGGCTTTTTAAGTCGGATGTGAAATCCCTGAGCTTAACTTAGGAATTG,Acinetobacter,,0.0,5.633803,0.0
TACAGAGGGTGCAAGCGTTAATCGGATTTACTGGGCGTAAAGCGCGCGTAGGCGGCTAATTAAGTCAAATGTGAAATCCCCGAGCTTAACTTGGGAATTG,Acinetobacter,,0.0,4.225352,0.0
TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCACGCAGGCGGCCTTCTAAGTCTGATGTGAAATCCCACGGCTTAACCGTGGAAGGT,Savagea,uncultured bacterium,0.0,40.84507,0.0
TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCACGCAGGCGGCCTTTTAAGTCTGATGTGAAATCCCACGGCTTAACCGTGGAAGGT,Savagea,uncultured bacterium,0.0,2.816901,0.0
TACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGAAGTGAAAGCCCCGGGCTCAACCTGGGAATTG,Oblitimonas,Oblitimonas alkaliphila,0.0,11.267606,0.0
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,Vagococcus,,0.0,70.422535,0.0
TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCTCGGCTCAACCGAGGAAGGT,Vagococcus,,0.0,12.676056,0.0
