In [1]:
from glob import glob
import pandas as pd
from scipy.stats import mannwhitneyu
from statsmodels.sandbox.stats.multicomp import multipletests
import numpy as np
import seaborn as sns

sns.set_style('whitegrid')

def p_adjust(pvalues, method='fdr_bh'):
    res = multipletests(pvalues, method=method)
    return np.array(res[1], dtype=float)

# Correlating Kraken abundances with continuous vaccine titers

##### Michael Shaffer
##### 7/21/2022
##### Merck ESC, Sys bio group

To find if any bacterial taxa are correlated with vaccine response we will pick time points (2, 4, 6, 8 and 12 months) and correlate the bacterial abundances at those timepoints with continuous vaccine response at one year.

## Read in data

In [2]:
meta = pd.read_csv('../../data/metadata/stool/stool_metadata.csv', index_col='SampleID')
meta = pd.concat([meta,
                  pd.read_csv('../../data/metadata/stool/stool_abx_usage.csv', index_col='SampleID'),
                  pd.read_csv('../../data/metadata/stool/stool_titers_yr1.csv', index_col='SampleID')],
                 axis=1)
meta['VR_group'] = meta['VR_group'].fillna('Not Measured')
meta = meta.sort_values(['BabyN', 'age_at_collection'])
meta = meta.loc[~pd.isna(meta['median_mmNorm'])]
meta.head()

Unnamed: 0_level_0,SubmissionType,SampleNumber,SampleIDValidation,DiversigenCheckInSampleName,BoxLocation,SampleType,SampleSource,SequencingType,BabyN,BabyN_checked,...,median_mmNorm_PCV,median_mmNorm_DTAPHib,protectNorm_Dip,protectNorm_TET,protectNorm_PRP (Hib),protectNorm_PT,protectNorm_PRN,protectNorm_FHA,geommean_protectNorm,VR_group_v2
SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
106_V2,Primary in Tube,69,,,"Box 8, F3",Stool,Human Infant,MetaG,106,,...,0.061955,0.052874,2.1,3.0,2.6,0.3125,0.3125,1.375,1.140388,NVR
106_V6,Primary in Tube,121,,,"Box 10, C1",Stool,Human Infant,MetaG,106,,...,0.061955,0.052874,2.1,3.0,2.6,0.3125,0.3125,1.375,1.140388,NVR
106_V7,Primary in Tube,158,,,"Box 11, C3",Stool,Human Infant,MetaG,106,,...,0.061955,0.052874,2.1,3.0,2.6,0.3125,0.3125,1.375,1.140388,NVR
106_S_1,Primary in Tube,162,,,"Box 11, D1",Stool,Human Infant,MetaG,106,,...,0.061955,0.052874,2.1,3.0,2.6,0.3125,0.3125,1.375,1.140388,NVR
106_A1,Primary in Tube,188,,,"Box 12, B3",Stool,Human Infant,MetaG,106,,...,0.061955,0.052874,2.1,3.0,2.6,0.3125,0.3125,1.375,1.140388,NVR


In [3]:
kraken_genus_abunds = pd.read_csv('../../data/stool/kraken_taxa_level_abunds/kraken_genus_abunds.tsv', sep='\t', index_col=0)
kraken_genus_abunds = kraken_genus_abunds[meta.query("`gt_2.5` == True").index]
print(kraken_genus_abunds.shape)
display(kraken_genus_abunds.head())

(971, 529)


Unnamed: 0,106_V2,106_V6,106_V7,106_S_1,106_A1,107_V2,107_V3,107_V6,107_S1,107_V7,...,264_S2F,264_V9,264_V10,264_V11,264_V12,265_V2,265_V5,265_V6,265_V8,265_S1
d__Bacteria|g__Thermobaculum,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
d__Bacteria|p__Acidobacteria|c__Acidobacteriia|o__Acidobacteriales|f__Acidobacteriaceae|g__Acidobacterium,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,10.0,4.0,0.0,0.0,0.0,0.0,0.0
d__Bacteria|p__Acidobacteria|c__Acidobacteriia|o__Acidobacteriales|f__Acidobacteriaceae|g__Candidatus_Koribacter,0.0,0.0,17.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,2.0,5.0,0.0,2.0,0.0,4.0,0.0
d__Bacteria|p__Acidobacteria|c__Acidobacteriia|o__Acidobacteriales|f__Acidobacteriaceae|g__Granulicella,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,5.0,1.0,13.0,6.0,0.0,1.0,0.0,8.0,0.0
d__Bacteria|p__Acidobacteria|c__Acidobacteriia|o__Acidobacteriales|f__Acidobacteriaceae|g__Terriglobus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,14.0,3.0,0.0,2.0,0.0,3.0,0.0


In [4]:
kraken_family_abunds = pd.read_csv('../../data/stool/kraken_taxa_level_abunds/kraken_family_abunds.tsv', sep='\t', index_col=0)
kraken_family_abunds = kraken_family_abunds[meta.query("`gt_2.5` == True").index]
print(kraken_family_abunds.shape)
display(kraken_family_abunds.head())

(320, 529)


Unnamed: 0,106_V2,106_V6,106_V7,106_S_1,106_A1,107_V2,107_V3,107_V6,107_S1,107_V7,...,264_S2F,264_V9,264_V10,264_V11,264_V12,265_V2,265_V5,265_V6,265_V8,265_S1
d__Bacteria|p__Acidobacteria|c__Acidobacteriia|o__Acidobacteriales|f__Acidobacteriaceae,0.0,0.0,17.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,6.0,3.0,39.0,20.0,0.0,5.0,0.0,15.0,0.0
d__Bacteria|p__Acidobacteria|c__Solibacteres|o__Solibacterales|f__Solibacteraceae,0.0,0.0,14.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
d__Bacteria|p__Actinobacteria|c__Acidimicrobiia|o__Acidimicrobiales|f__Acidimicrobiaceae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
d__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Acidothermales|f__Acidothermaceae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
d__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae,0.0,163.0,6.0,5.0,283.0,0.0,9.0,87.0,82.0,18.0,...,1817.0,32.0,33.0,33.0,48.0,98.0,14.0,16.0,64.0,174.0


In [5]:
in_both = set(meta.index) & set(kraken_genus_abunds.columns)
meta_matched = meta.loc[in_both]
print(meta_matched.shape)

(529, 86)


  meta_matched = meta.loc[in_both]


In [6]:
meta_v5 = meta_matched.query("VisitCode == 'V5'")
meta_v6 = meta_matched.query("VisitCode == 'V6'")
meta_v7 = meta_matched.query("VisitCode == 'V7'")
meta_v9 = meta_matched.query("VisitCode == 'V9'")

meta_PCV = meta_matched.loc[~pd.isna(meta_matched['median_mmNorm_PCV'])]
meta_PCV_v5 = meta_PCV.query("VisitCode == 'V5'")
meta_PCV_v6 = meta_PCV.query("VisitCode == 'V6'")
meta_PCV_v7 = meta_PCV.query("VisitCode == 'V7'")
meta_PCV_v9 = meta_PCV.query("VisitCode == 'V9'")

## Genus level association with VRness

In [7]:
kraken_genus_abunds_v5 = kraken_genus_abunds[meta_v5.index]
kraken_genus_stats_v5_rows = list()
for genus, row in kraken_genus_abunds_v5.iterrows():
    lvr_abunds = row[meta_v5.query('VR_group == "LVR"').index]
    nvr_abunds = row[meta_v5.query('VR_group == "NVR"').index]
    # check for not all zeros
    # lvr_gt_20 = (lvr_abunds != 0).sum()/len(lvr_abunds) > .2
    # nvr_gt_20 = (nvr_abunds != 0).sum()/len(nvr_abunds) > .2
    lvr_gt_20 = lvr_abunds.sum()/len(lvr_abunds) > 10
    nvr_gt_20 = nvr_abunds.sum()/len(nvr_abunds) > 10
    if lvr_gt_20 or nvr_gt_20:
        stat, p_value = mannwhitneyu(lvr_abunds, nvr_abunds)
        kraken_genus_stats_v5_rows.append([genus, lvr_abunds.mean(), nvr_abunds.mean(), stat, p_value])
kraken_genus_stats_v5 = pd.DataFrame(kraken_genus_stats_v5_rows, columns=['genus', 'LVR_mean', 'NVR_mean', 'statistic', 'p_value']).sort_values('p_value')
kraken_genus_stats_v5['p_adj'] = p_adjust(kraken_genus_stats_v5['p_value'])
kraken_genus_stats_v5.head()

Unnamed: 0,genus,LVR_mean,NVR_mean,statistic,p_value,p_adj
115,d__Bacteria|p__Proteobacteria|c__Gammaproteoba...,0.571429,288.166667,58.0,0.022971,0.957382
11,d__Bacteria|p__Actinobacteria|c__Coriobacterii...,16.428571,5.75,171.5,0.053512,0.957382
34,d__Bacteria|p__Firmicutes|c__Bacilli|o__Lactob...,6704.285714,2241.083333,183.0,0.062108,0.957382
94,d__Bacteria|p__Proteobacteria|c__Gammaproteoba...,40.142857,5.972222,180.5,0.067442,0.957382
109,d__Bacteria|p__Proteobacteria|c__Gammaproteoba...,18.285714,1.555556,171.0,0.078602,0.957382


In [8]:
kraken_genus_abunds_v6 = kraken_genus_abunds[meta_v6.index]
kraken_genus_stats_v6_rows = list()
for genus, row in kraken_genus_abunds_v6.iterrows():
    lvr_abunds = row[meta_v6.query('VR_group == "LVR"').index]
    nvr_abunds = row[meta_v6.query('VR_group == "NVR"').index]
    # check for not all zeros
    # lvr_gt_20 = (lvr_abunds != 0).sum()/len(lvr_abunds) > .2
    # nvr_gt_20 = (nvr_abunds != 0).sum()/len(nvr_abunds) > .2
    lvr_gt_20 = lvr_abunds.sum()/len(lvr_abunds) > 10
    nvr_gt_20 = nvr_abunds.sum()/len(nvr_abunds) > 10
    if lvr_gt_20 or nvr_gt_20:
        stat, p_value = mannwhitneyu(lvr_abunds, nvr_abunds)
        kraken_genus_stats_v6_rows.append([genus, lvr_abunds.mean(), nvr_abunds.mean(), stat, p_value])
kraken_genus_stats_v6 = pd.DataFrame(kraken_genus_stats_v6_rows, columns=['genus', 'LVR_mean', 'NVR_mean', 'statistic', 'p_value']).sort_values('p_value')
kraken_genus_stats_v6['p_adj'] = p_adjust(kraken_genus_stats_v6['p_value'])
kraken_genus_stats_v6.head()

Unnamed: 0,genus,LVR_mean,NVR_mean,statistic,p_value,p_adj
2,d__Bacteria|p__Actinobacteria|c__Actinobacteri...,77.833333,1.484848,293.5,0.002944,0.223614
13,d__Bacteria|p__Actinobacteria|c__Coriobacterii...,127.833333,0.0,247.5,0.003636,0.223614
17,d__Bacteria|p__Actinobacteria|c__Coriobacterii...,11.0,13.909091,286.5,0.011924,0.376696
71,d__Bacteria|p__Proteobacteria|c__Alphaproteoba...,10.583333,2.818182,283.0,0.015719,0.376696
79,d__Bacteria|p__Proteobacteria|c__Deltaproteoba...,12.833333,0.151515,260.0,0.017698,0.376696


In [9]:
kraken_genus_abunds_v7 = kraken_genus_abunds[meta_v7.index]
kraken_genus_stats_v7_rows = list()
for genus, row in kraken_genus_abunds_v7.iterrows():
    lvr_abunds = row[meta_v7.query('VR_group == "LVR"').index]
    nvr_abunds = row[meta_v7.query('VR_group == "NVR"').index]
    # check for not all zeros
    # lvr_gt_20 = (lvr_abunds != 0).sum()/len(lvr_abunds) > .2
    # nvr_gt_20 = (nvr_abunds != 0).sum()/len(nvr_abunds) > .2
    lvr_gt_20 = lvr_abunds.sum()/len(lvr_abunds) > 10
    nvr_gt_20 = nvr_abunds.sum()/len(nvr_abunds) > 10
    if lvr_gt_20 or nvr_gt_20:
        stat, p_value = mannwhitneyu(lvr_abunds, nvr_abunds)
        kraken_genus_stats_v7_rows.append([genus, lvr_abunds.mean(), nvr_abunds.mean(), stat, p_value])
kraken_genus_stats_v7 = pd.DataFrame(kraken_genus_stats_v7_rows, columns=['genus', 'LVR_mean', 'NVR_mean', 'statistic', 'p_value']).sort_values('p_value')
kraken_genus_stats_v7['p_adj'] = p_adjust(kraken_genus_stats_v7['p_value'])
kraken_genus_stats_v7.head()

Unnamed: 0,genus,LVR_mean,NVR_mean,statistic,p_value,p_adj
33,d__Bacteria|p__Bacteroidetes|c__Cytophagia|o__...,11.727273,2.0,323.0,0.001031,0.080917
46,d__Bacteria|p__Firmicutes|c__Bacilli|o__Lactob...,563.272727,0.923077,343.0,0.00125,0.080917
5,d__Bacteria|p__Actinobacteria|c__Actinobacteri...,10.818182,2.307692,343.0,0.001853,0.080917
64,d__Bacteria|p__Firmicutes|c__Clostridia|o__Clo...,19.636364,14.051282,321.5,0.009481,0.261978
49,d__Bacteria|p__Firmicutes|c__Clostridia|o__Clo...,279.636364,136.282051,324.0,0.010628,0.261978


In [10]:
kraken_genus_abunds_v9 = kraken_genus_abunds[meta_v9.index]
kraken_genus_stats_v9_rows = list()
for genus, row in kraken_genus_abunds_v9.iterrows():
    lvr_abunds = row[meta_v9.query('VR_group == "LVR"').index]
    nvr_abunds = row[meta_v9.query('VR_group == "NVR"').index]
    # check for not all zeros
    # lvr_gt_20 = (lvr_abunds != 0).sum()/len(lvr_abunds) > .2
    # nvr_gt_20 = (nvr_abunds != 0).sum()/len(nvr_abunds) > .2
    lvr_gt_20 = lvr_abunds.sum()/len(lvr_abunds) > 10
    nvr_gt_20 = nvr_abunds.sum()/len(nvr_abunds) > 10
    if lvr_gt_20 or nvr_gt_20:
        stat, p_value = mannwhitneyu(lvr_abunds, nvr_abunds)
        kraken_genus_stats_v9_rows.append([genus, lvr_abunds.mean(), nvr_abunds.mean(), stat, p_value])
kraken_genus_stats_v9 = pd.DataFrame(kraken_genus_stats_v9_rows, columns=['genus', 'LVR_mean', 'NVR_mean', 'statistic', 'p_value']).sort_values('p_value')
kraken_genus_stats_v9['p_adj'] = p_adjust(kraken_genus_stats_v9['p_value'])
kraken_genus_stats_v9.head()

Unnamed: 0,genus,LVR_mean,NVR_mean,statistic,p_value,p_adj
112,d__Bacteria|p__Proteobacteria|c__Betaproteobac...,5.857143,26.457143,54.5,0.022194,0.669116
85,d__Bacteria|p__Firmicutes|c__Negativicutes|o__...,1624.857143,43.342857,189.5,0.023157,0.669116
142,d__Bacteria|p__Proteobacteria|c__Gammaproteoba...,10.0,44.828571,57.0,0.02817,0.669116
50,d__Bacteria|p__Firmicutes|c__Bacilli|o__Bacill...,37.285714,18.285714,186.5,0.031915,0.669116
21,d__Bacteria|p__Actinobacteria|c__Rubrobacteria...,3.857143,21.914286,60.0,0.035015,0.669116


Nothing is significant at the genus level.

## Family level correlations with median of all titers

In [11]:
kraken_family_abunds_v5 = kraken_family_abunds[meta_v5.index]
kraken_family_stats_v5_rows = list()
for family, row in kraken_family_abunds_v5.iterrows():
    lvr_abunds = row[meta_v5.query('VR_group == "LVR"').index]
    nvr_abunds = row[meta_v5.query('VR_group == "NVR"').index]
    # check for not all zeros
    # lvr_gt_20 = (lvr_abunds != 0).sum()/len(lvr_abunds) > .2
    # nvr_gt_20 = (nvr_abunds != 0).sum()/len(nvr_abunds) > .2
    lvr_gt_20 = lvr_abunds.sum()/len(lvr_abunds) > 10
    nvr_gt_20 = nvr_abunds.sum()/len(nvr_abunds) > 10
    if lvr_gt_20 or nvr_gt_20:
        stat, p_value = mannwhitneyu(lvr_abunds, nvr_abunds)
        kraken_family_stats_v5_rows.append([family, lvr_abunds.mean(), nvr_abunds.mean(), stat, p_value])
kraken_family_stats_v5 = pd.DataFrame(kraken_family_stats_v5_rows, columns=['family', 'LVR_mean', 'NVR_mean', 'statistic', 'p_value']).sort_values('p_value')
kraken_family_stats_v5['p_adj'] = p_adjust(kraken_family_stats_v5['p_value'])
kraken_family_stats_v5.head()

Unnamed: 0,family,LVR_mean,NVR_mean,statistic,p_value,p_adj
20,d__Bacteria|p__Bacteroidetes|c__Sphingobacteri...,13.714286,12.666667,184.5,0.053241,1.0
36,d__Bacteria|p__Firmicutes|c__Clostridia|o__Clo...,249.857143,93.444444,185.0,0.054091,1.0
26,d__Bacteria|p__Firmicutes|c__Bacilli|o__Lactob...,6734.857143,2256.166667,183.0,0.062108,1.0
71,d__Bacteria|p__Proteobacteria|c__Gammaproteoba...,41.714286,116.194444,174.0,0.117916,1.0
66,d__Bacteria|p__Proteobacteria|c__Gammaproteoba...,11.0,5.333333,166.5,0.121811,1.0


In [12]:
kraken_family_abunds_v6 = kraken_family_abunds[meta_v6.index]
kraken_family_stats_v6_rows = list()
for family, row in kraken_family_abunds_v6.iterrows():
    lvr_abunds = row[meta_v6.query('VR_group == "LVR"').index]
    nvr_abunds = row[meta_v6.query('VR_group == "NVR"').index]
    # check for not all zeros
    # lvr_gt_20 = (lvr_abunds != 0).sum()/len(lvr_abunds) > .2
    # nvr_gt_20 = (nvr_abunds != 0).sum()/len(nvr_abunds) > .2
    lvr_gt_20 = lvr_abunds.sum()/len(lvr_abunds) > 10
    nvr_gt_20 = nvr_abunds.sum()/len(nvr_abunds) > 10
    if lvr_gt_20 or nvr_gt_20:
        stat, p_value = mannwhitneyu(lvr_abunds, nvr_abunds)
        kraken_family_stats_v6_rows.append([family, lvr_abunds.mean(), nvr_abunds.mean(), stat, p_value])
kraken_family_stats_v6 = pd.DataFrame(kraken_family_stats_v6_rows, columns=['family', 'LVR_mean', 'NVR_mean', 'statistic', 'p_value']).sort_values('p_value')
kraken_family_stats_v6['p_adj'] = p_adjust(kraken_family_stats_v6['p_value'])
kraken_family_stats_v6.head()

Unnamed: 0,family,LVR_mean,NVR_mean,statistic,p_value,p_adj
50,d__Bacteria|p__Proteobacteria|c__Alphaproteoba...,18.416667,4.30303,293.0,0.012372,0.895142
44,d__Bacteria|p__Firmicutes|c__Negativicutes|o__...,10795.916667,148.272727,270.0,0.052819,0.895142
7,d__Bacteria|p__Actinobacteria|c__Actinobacteri...,11.833333,2.424242,267.0,0.068371,0.895142
3,d__Bacteria|p__Actinobacteria|c__Actinobacteri...,10.666667,6.909091,268.0,0.071678,0.895142
42,d__Bacteria|p__Firmicutes|c__Clostridia|o__Clo...,219.416667,137.848485,264.5,0.089834,0.895142


In [13]:
kraken_family_abunds_v7 = kraken_family_abunds[meta_v7.index]
kraken_family_stats_v7_rows = list()
for family, row in kraken_family_abunds_v7.iterrows():
    lvr_abunds = row[meta_v7.query('VR_group == "LVR"').index]
    nvr_abunds = row[meta_v7.query('VR_group == "NVR"').index]
    # check for not all zeros
    # lvr_gt_20 = (lvr_abunds != 0).sum()/len(lvr_abunds) > .2
    # nvr_gt_20 = (nvr_abunds != 0).sum()/len(nvr_abunds) > .2
    lvr_gt_20 = lvr_abunds.sum()/len(lvr_abunds) > 10
    nvr_gt_20 = nvr_abunds.sum()/len(nvr_abunds) > 10
    if lvr_gt_20 or nvr_gt_20:
        stat, p_value = mannwhitneyu(lvr_abunds, nvr_abunds)
        kraken_family_stats_v7_rows.append([family, lvr_abunds.mean(), nvr_abunds.mean(), stat, p_value])
kraken_family_stats_v7 = pd.DataFrame(kraken_family_stats_v7_rows, columns=['family', 'LVR_mean', 'NVR_mean', 'statistic', 'p_value']).sort_values('p_value')
kraken_family_stats_v7['p_adj'] = p_adjust(kraken_family_stats_v7['p_value'])
kraken_family_stats_v7.head()

Unnamed: 0,family,LVR_mean,NVR_mean,statistic,p_value,p_adj
44,d__Bacteria|p__Firmicutes|c__Clostridia|o__Clo...,3839.636364,720.487179,325.0,0.009975,0.315375
37,d__Bacteria|p__Firmicutes|c__Clostridia|o__Clo...,279.636364,136.282051,324.0,0.010628,0.315375
39,d__Bacteria|p__Firmicutes|c__Clostridia|o__Clo...,3690.454545,2043.564103,316.0,0.017996,0.315375
3,d__Bacteria|p__Actinobacteria|c__Actinobacteri...,13.363636,6.794872,314.0,0.018216,0.315375
25,d__Bacteria|p__Bacteroidetes|c__Sphingobacteri...,28.272727,13.974359,313.5,0.020849,0.315375


In [14]:
kraken_family_abunds_v9 = kraken_family_abunds[meta_v9.index]
kraken_family_stats_v9_rows = list()
for family, row in kraken_family_abunds_v9.iterrows():
    lvr_abunds = row[meta_v9.query('VR_group == "LVR"').index]
    nvr_abunds = row[meta_v9.query('VR_group == "NVR"').index]
    # check for not all zeros
    # lvr_gt_20 = (lvr_abunds != 0).sum()/len(lvr_abunds) > .2
    # nvr_gt_20 = (nvr_abunds != 0).sum()/len(nvr_abunds) > .2
    lvr_gt_20 = lvr_abunds.sum()/len(lvr_abunds) > 10
    nvr_gt_20 = nvr_abunds.sum()/len(nvr_abunds) > 10
    if lvr_gt_20 or nvr_gt_20:
        stat, p_value = mannwhitneyu(lvr_abunds, nvr_abunds)
        kraken_family_stats_v9_rows.append([family, lvr_abunds.mean(), nvr_abunds.mean(), stat, p_value])
kraken_family_stats_v9 = pd.DataFrame(kraken_family_stats_v9_rows, columns=['family', 'LVR_mean', 'NVR_mean', 'statistic', 'p_value']).sort_values('p_value')
kraken_family_stats_v9['p_adj'] = p_adjust(kraken_family_stats_v9['p_value'])
kraken_family_stats_v9.head()

Unnamed: 0,family,LVR_mean,NVR_mean,statistic,p_value,p_adj
79,d__Bacteria|p__Proteobacteria|c__Betaproteobac...,20.0,89.057143,39.5,0.005348,0.394218
75,d__Bacteria|p__Proteobacteria|c__Alphaproteoba...,5.857143,19.828571,51.0,0.016463,0.394218
109,d__Bacteria|p__Proteobacteria|c__Gammaproteoba...,9.285714,42.857143,52.0,0.018101,0.394218
34,d__Bacteria|p__Bacteroidetes|o__Bacteroidetes_...,4.857143,31.371429,57.5,0.028867,0.394218
84,d__Bacteria|p__Proteobacteria|c__Betaproteobac...,11.571429,31.971429,57.5,0.029385,0.394218


None of the individual genuses that are within the most significant family have compelling results.