# Statistics


**IDEAS**
- SMETANA
    - Find likelyhood that a community member from family X is dependent on a community member in family Y.
    - Find likelyhood that a community member from family X is dependent on a compound A.

- SteadierCOM
    - No statistics?

In [2]:
import math

import pandas as pd
from molmass import Formula
import reframed

In [3]:
import sys
sys.path.append("../functions/")
import general_functions as general_func

import colors_MAGs
import scipy.stats as stats

In [4]:
import numpy as np

### Load universal model

In [5]:
model_uni = reframed.load_cbmodel("/Users/idunmariaburgos/universal_model_extension/output/universe_bacteria.xml")

### Load data 

In [6]:
all_mags_paper = general_func.read_allmags_data()

In [7]:
SC1_C = pd.read_csv("../output/steadiercom_sample_0.1.3/results/results_99_SC1_C.tsv",sep="\t")
SC2_C = pd.read_csv("../output/steadiercom_sample_0.1.3/results/results_99_SC2_C.tsv",sep="\t")
SC1_X = pd.read_csv("../output/steadiercom_sample_0.1.3/results/results_99_SC1_X.tsv",sep="\t")

steadier_sample = pd.concat([SC1_C,SC2_C,SC1_X])
steadier_sample = steadier_sample[(steadier_sample.frequency>0.1) & (steadier_sample.rate>1e-6)]

In [8]:
steadier_sample_cross = steadier_sample[(steadier_sample.donor!="environment") & (steadier_sample.receiver!="environment") ].copy()

### Prepare to process data

In [9]:
chebi_lut, chebi_interesting, chebi_colors_ser = colors_MAGs.chebi_rxn_color_func(rxn_based=False)

**Change names of family for readability and better grouping**

In [10]:
all_mags_paper_reduced = all_mags_paper.copy()

all_mags_paper_reduced[all_mags_paper_reduced["new_coverage"]>10][["Source","Substrate","Family","new_coverage"]].sort_values(["Source","Substrate"])

all_mags_paper_reduced["Family"] = all_mags_paper.apply(lambda row: "f_"+row.Family,axis=1)

total_members_family = all_mags_paper_reduced.groupby("Family").count()["Source"].to_dict()
all_mags_paper_reduced["Family"] = all_mags_paper_reduced.apply(lambda row: row.Family if total_members_family[row.Family]>1 else "Other",axis=1)


In [11]:
def mag2family(all_mags_paper):  
    
    family_groups = all_mags_paper.groupby("Family").groups
    mag2family_dict = {mag:family for family,mags in family_groups.items() for mag in mags}
    
    return family_groups,mag2family_dict

**Create dictionaries to translate components into larger groups - MAG-> family, compound-> super_class**

In [12]:
MAGs_steady_com = set(list(steadier_sample_cross.donor.values)+list(steadier_sample_cross.receiver.values))

MAG2sour_sub_id = pd.read_csv("../output/MAG2community_id.tsv",sep="\t",header=None)
MAG2sour_sub_id.columns=["MAG","community_id"]

met2superclass_dict = pd.read_csv("../output/met_chebi_class.tsv",sep="\t",index_col=0)["self defined super class"].to_dict()

family_groups,mag2family_dict = mag2family(all_mags_paper_reduced)

In [13]:
def family_donor(row):
    if row.donor=="environment":
        return "environment"
    else:
        return mag2family_dict[row.donor]


def family_receiver(row):
    if row.receiver=="environment":
        return "environment"
    else:
        return mag2family_dict[row.receiver]


In [14]:
def met2metname(met):
    met_name = model_uni.metabolites[met].name
    return met_name

### Process data

**All compounds**

In [15]:
steadier_sample.loc[:,"family_donor"] = steadier_sample.apply(family_donor,axis=1).copy()
steadier_sample.loc[:,"family_receiver"] = steadier_sample.apply(family_receiver,axis=1).copy()
steadier_sample = steadier_sample[steadier_sample.compound.isin(met2superclass_dict.keys())].copy()
steadier_sample.loc[:,"super_class"] = steadier_sample.apply(lambda x: met2superclass_dict[x.compound],axis=1)

compounds = steadier_sample["compound"].map(met2metname)
steadier_sample.drop("compound",axis=1,inplace=True)
steadier_sample.loc[:,"compound"] = compounds


**Compounds cross-fed**

In [16]:
steadier_sample_cross.loc[:,"family_donor"] = steadier_sample_cross.apply(family_donor,axis=1).copy()
steadier_sample_cross.loc[:,"family_receiver"] = steadier_sample_cross.apply(family_receiver,axis=1).copy()
steadier_sample_cross = steadier_sample_cross[steadier_sample_cross.compound.isin(met2superclass_dict.keys())].copy()
steadier_sample_cross.loc[:,"super_class"] = steadier_sample_cross.apply(lambda x: met2superclass_dict[x.compound],axis=1)

compounds = steadier_sample_cross["compound"].map(met2metname)
steadier_sample_cross.drop("compound",axis=1,inplace=True)
steadier_sample_cross.loc[:,"compound"] = compounds

# FILTERS OUT UNINTERESTING COMPOUNDS
steadier_sample_cross = steadier_sample_cross[steadier_sample_cross.super_class.isin(chebi_lut.keys())].copy()

steadier_sample_cross["mass_rate*frequency"]=steadier_sample_cross["mass_rate"]*steadier_sample_cross["frequency"]

In [17]:
steadier_sample_cross

Unnamed: 0,donor,receiver,mass_rate,rate,frequency,community,medium,family_donor,family_receiver,super_class,compound,mass_rate*frequency
13,CH15-bin.0,CH15-bin.7,4.921516e-03,0.027318,0.27,CD_P,SC1_C,f_Treponemataceae,f_Sphaerochaetaceae,simple sugars,D-Glucose,1.328809e-03
33,CH13-bin.11,CH13-bin.4,2.822338e-03,0.008245,0.11,CD_A,SC1_C,f_Desulfovibrionaceae,f_Bacteroidaceae,oligosaccharides,Cellobiose,3.104572e-04
36,CH13-bin.14,CH13-bin.25,2.705991e-03,0.015020,0.20,CD_A,SC1_C,f_Lachnospiraceae,f_Sphaerochaetaceae,simple sugars,D-Glucose,5.411983e-04
39,CH15-bin.7,CH15-bin.8,2.435377e-03,0.013518,0.12,CD_P,SC1_C,f_Sphaerochaetaceae,f_Desulfovibrionaceae,simple sugars,D-Glucose,2.922452e-04
41,CH15-bin.23,CH15-bin.8,2.385105e-03,0.013239,0.17,CD_P,SC1_C,Other,f_Desulfovibrionaceae,simple sugars,D-Glucose,4.054678e-04
...,...,...,...,...,...,...,...,...,...,...,...,...
313,CH9-bin.0,CH9-bin.4,6.053229e-07,0.000018,0.97,CM_X,SC1_X,Other,f_Lactobacillaceae,gases,Hydrogen sulfide,5.871632e-07
315,CH14-bin.1,CH14-bin.4,5.164134e-07,0.000256,0.16,CD_X,SC1_X,f_Lachnospiraceae,f_Desulfovibrionaceae,gases,Hydrogen,8.262614e-08
316,CH3-bin.2,CH3-bin.0,4.612004e-07,0.000004,0.26,M_X,SC1_X,f_Clostridiaceae,Other,nucleotides and derivatives,Thymine C5H6N2O2,1.199121e-07
319,CH9-bin.2,CH9-bin.5,3.689349e-07,0.000011,0.24,CM_X,SC1_X,f_Clostridiaceae,f_Lactobacillaceae,gases,Hydrogen sulfide,8.854438e-08


### Overview of groups

In [18]:
pd.Series({family:len(mags) for family,mags in family_groups.items()})

Other                    18
f_Bacteroidaceae          4
f_CAG-74                  2
f_Clostridiaceae          3
f_Desulfobulbaceae        2
f_Desulfovibrionaceae    12
f_Dysgonomonadaceae       4
f_Fibrobacteraceae        2
f_Lachnospiraceae         8
f_Lactobacillaceae        2
f_Lentimicrobiaceae       2
f_Sphaerochaetaceae       6
f_Treponemataceae         4
f_Verruco-01              3
dtype: int64

In [19]:
all_mags_paper_reduced[all_mags_paper_reduced["new_coverage"]>10][["Source","Substrate","Family","Genus","new_coverage"]].sort_values(["Source","Substrate"])

Unnamed: 0_level_0,Source,Substrate,Family,Genus,new_coverage
MAG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CH13-bin.12,Compost_Digestate,Avicel,f_Fibrobacteraceae,Fibro-01,36.526676
CH13-bin.11,Compost_Digestate,Avicel,f_Desulfovibrionaceae,Desulfovibrio,10.15579
CH13-bin.4,Compost_Digestate,Avicel,f_Bacteroidaceae,Bacteroides,10.04397
CH15-bin.0,Compost_Digestate,PASC,f_Treponemataceae,DUOS01,23.91904
CH15-bin.23,Compost_Digestate,PASC,Other,DTFZ01,10.319866
CH14-bin.1,Compost_Digestate,Xylan,f_Lachnospiraceae,Lacrimispora,54.707294
CH14-bin.2,Compost_Digestate,Xylan,f_Lachnospiraceae,Robinsoniella,18.831114
CH8-bin.22,Cow_Manure,Avicel,f_Fibrobacteraceae,Fibro-01,42.725525
CH7-bin.23,Cow_Manure,PASC,f_Desulfovibrionaceae,Halodesulfovibrio,38.075882
CH9-bin.0,Cow_Manure,Xylan,Other,Bifidobacterium,70.451904


### Functions for statistics

In [20]:
def find_non_dependent(row,metric):
    return len(family_groups[row.name[0]]) - row[metric] #row.flux_mg


def statistics_adjustments(statistics_df):
    
    statistics_df = statistics_df.sort_values(by="p_value").copy()
    statistics_df["i"] = statistics_df["p_value"].rank(method="max")
    statistics_df["p_value_benjamini_h"] = statistics_df.apply(lambda row: min(row.p_value*statistics_df.shape[0]/row.i,1),axis=1)
    statistics_df.sort_index(inplace=True)
    return statistics_df


def statistics_function(steadier_sample_cross,dependent_variable,independent_variable,metric="flux_mg",metric_thresh=1e-6,pvalue_thresh=0.1):

    # Get average of each family according to each possible value of the independent variable
    # dependent_variable,dependent_variable.split("_")[1] here it decides if it is in the receiver or in the donor (dependent_variable.split("_")[1]) and groups by the [family_receiver,receiver,compound] and takes the mean of this
    steadiercom_crossfeeding_donor = steadier_sample_cross.loc[:,[dependent_variable,dependent_variable.split("_")[1],independent_variable,metric]].groupby([dependent_variable,dependent_variable.split("_")[1],independent_variable]).mean().copy()
    dependent = steadiercom_crossfeeding_donor[steadiercom_crossfeeding_donor[metric]>metric_thresh].reset_index().groupby([dependent_variable,independent_variable]).count().copy()
    not_dependent = dependent.apply(find_non_dependent,metric=metric,axis=1)

    # Add data for the missing values
    all_categories =set(not_dependent.index.get_level_values(1))

    for family in dependent.index.get_level_values(0):
        for category in all_categories-set(not_dependent.xs(family).index):
            not_dependent[(family,category)]=len(family_groups[family]) 
            
    concat_df = pd.concat({"dependent":dependent[metric],"not_dependent":not_dependent},axis=1).fillna(0)


    statistics = {}
    for independent_var in set(concat_df.index.get_level_values(1)):
        # Get the sub_df for the super class
        concat_df_sub = concat_df.xs(independent_var,level=1).copy()

        statistics[independent_var] = {}

        # For each row (each family)
        for i,row in concat_df_sub.iterrows():
            
            # Get the data for all other family
            other = concat_df_sub.loc[concat_df_sub.index[concat_df_sub.index!=i],:]
            
            data = pd.DataFrame({i:row,"other":other.sum()}).transpose().to_numpy()
            statistics[independent_var][(i,"data")]= data
            
            
            # Get odds ratio
            odds_ratio_num = data[0][0]
            odds_ratio_den = data[0][0] + data[0][1]
            other_num = data[1][0]
            other_den = data[1][0] + data[1][1]
            
            if odds_ratio_den == 0 or other_den == 0 or other_num==0:
                odds_ratio = math.inf
            else:
                odds_ratio = (odds_ratio_num / odds_ratio_den) / (other_num / other_den)
            statistics[independent_var][(i, "odds_ratio")] = odds_ratio
            
            
            # Calculate the Barnard exact statistical 
            p_value = stats.barnard_exact(pd.DataFrame({i:row,"other":other.sum()}).transpose().to_numpy(),alternative="greater")
            statistics[independent_var][(i,"p_value")]= p_value.pvalue
            

    statistics_df = pd.DataFrame(statistics)
    

    category_values = statistics_df.xs('p_value', level=1)
    

    values = {}
    for family,independent_var in category_values[category_values[category_values<pvalue_thresh].notnull()].stack().index:
        values[family,independent_var]= {"p_value":statistics_df.loc[(family,"p_value"),independent_var],"odds_ratio":statistics_df.loc[(family,"odds_ratio"),independent_var],"data":statistics_df.loc[(family,"data"),independent_var],"# interaction":concat_df.loc[(family,independent_var),"dependent"],"# no interaction":concat_df.loc[(family,independent_var),"not_dependent"]}

    significant = pd.DataFrame(values).transpose()
    if significant.empty==False:
        significant.index.names = (dependent_variable,independent_variable)

    return significant

## Likelyhood that family X is receiving compound A

In [21]:
statistics_df = statistics_function(steadier_sample_cross,"family_receiver","compound",metric="rate",metric_thresh=1e-6,pvalue_thresh=1)
statistics_df = statistics_adjustments(statistics_df)
statistics_df.shape

(153, 7)

In [22]:
statistics_df[(statistics_df.p_value_benjamini_h<0.05) & (statistics_df["odds_ratio"]>4.95)]

Unnamed: 0_level_0,Unnamed: 1_level_0,p_value,odds_ratio,data,# interaction,# no interaction,i,p_value_benjamini_h
family_receiver,compound,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
f_Lactobacillaceae,L Ornithine C5H13N2O2,0.000107,inf,"[[2.0, 0.0], [0.0, 70.0]]",2.0,0,1.0,0.016431
f_Sphaerochaetaceae,D-Glucose,0.000536,5.5,"[[5.0, 1.0], [10.0, 56.0]]",5.0,1,2.0,0.041018


In [23]:
statistics_df = statistics_function(steadier_sample_cross,"family_receiver","super_class",metric="rate",metric_thresh=1e-6,pvalue_thresh=1)
statistics_df = statistics_adjustments(statistics_df)
statistics_df.shape

(48, 7)

In [24]:
statistics_df[(statistics_df.p_value_benjamini_h<0.1) & (statistics_df["odds_ratio"]>4.95)]

Unnamed: 0_level_0,Unnamed: 1_level_0,p_value,odds_ratio,data,# interaction,# no interaction,i,p_value_benjamini_h
family_receiver,super_class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1


## Likelyhood that family X is donating a compound A to another family

In [25]:
statistics_df = statistics_function(steadier_sample_cross,"family_donor","compound",metric="rate",metric_thresh=1e-6,pvalue_thresh=1)
statistics_df = statistics_adjustments(statistics_df)
statistics_df.shape

(134, 7)

In [26]:
statistics_df[(statistics_df.p_value_benjamini_h<0.05) & (statistics_df["odds_ratio"]>4.95)]

Unnamed: 0_level_0,Unnamed: 1_level_0,p_value,odds_ratio,data,# interaction,# no interaction,i,p_value_benjamini_h
family_donor,compound,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
f_Lachnospiraceae,Guanine,0.000949,6.666667,"[[5.0, 3.0], [6.0, 58.0]]",5.0,3,5.0,0.025434
f_Lactobacillaceae,L Ornithine C5H13N2O2,0.000107,inf,"[[2.0, 0.0], [0.0, 70.0]]",2.0,0,1.0,0.014391
f_Sphaerochaetaceae,D-Glucose,0.000536,5.5,"[[5.0, 1.0], [10.0, 56.0]]",5.0,1,2.0,0.035924


In [27]:
statistics_df = statistics_function(steadier_sample_cross,"family_donor","super_class",metric="rate",metric_thresh=1e-6,pvalue_thresh=1)
statistics_df = statistics_adjustments(statistics_df)
statistics_df.shape

(54, 7)

In [28]:
statistics_df[(statistics_df.p_value_benjamini_h<0.1) & (statistics_df["odds_ratio"]>4.95)]

Unnamed: 0_level_0,Unnamed: 1_level_0,p_value,odds_ratio,data,# interaction,# no interaction,i,p_value_benjamini_h
family_donor,super_class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1


## Likelyhood that family X is receiving from family Y

In [29]:
statistics_df = statistics_function(steadier_sample_cross,"family_receiver","family_donor",metric="rate",metric_thresh=1e-6,pvalue_thresh=1)
statistics_df = statistics_adjustments(statistics_df)
statistics_df.shape

(89, 7)

In [30]:
statistics_df[(statistics_df.p_value_benjamini_h<0.05) & (statistics_df["odds_ratio"]>4.95)]

Unnamed: 0_level_0,Unnamed: 1_level_0,p_value,odds_ratio,data,# interaction,# no interaction,i,p_value_benjamini_h
family_receiver,family_donor,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1


### Who donates the most?

In [31]:
total_members = steadier_sample_cross.groupby("family_donor").nunique()["donor"]

### All communities - Relative abundance (above 10 %)

In [32]:
def abundance_statistic(abundance_communities,receiver_or_donor="receiver"):
    
    
    table_abundance_rec_don_dict = {}
    table_not_abundant_rec_don_dict = {}

    for compound in abundance_communities[abundance_communities.receiver_abundance_10].compound.unique():
        if receiver_or_donor=="receiver":
            abund_rec_don = len(abundance_communities[(abundance_communities.receiver_abundance_10) & (abundance_communities.compound==compound)].receiver.unique())
            not_abund_rec_don = len(abundance_communities[(~abundance_communities.receiver_abundance_10) & (abundance_communities.compound==compound)].receiver.unique())
        elif receiver_or_donor=="donor":
            abund_rec_don = len(abundance_communities[(abundance_communities.donor_abundance_10) & (abundance_communities.compound==compound)].donor.unique())
            not_abund_rec_don = len(abundance_communities[(~abundance_communities.donor_abundance_10) & (abundance_communities.compound==compound)].donor.unique())
        
        
        abund_not_rec_don = len(high_abundance) - abund_rec_don
        not_abund_not_rec_don = len(low_abunance) - not_abund_rec_don


        table = [[abund_rec_don,not_abund_rec_don],[abund_not_rec_don,not_abund_not_rec_don]]
        table_non_abundant = [[not_abund_rec_don,abund_rec_don],[not_abund_not_rec_don,abund_not_rec_don]]

        try:
            odds_ratio = (abund_rec_don/(abund_rec_don+abund_not_rec_don))/(not_abund_rec_don/(not_abund_rec_don+not_abund_not_rec_don))
        except:
            odds_ratio = math.inf


        p_value = stats.barnard_exact(table,alternative="greater")

        table_abundance_rec_don_dict[compound] = {"table":table,"p_value":p_value.pvalue,"odds_ratio":odds_ratio}


        p_value_not = stats.barnard_exact(table_non_abundant,alternative="greater")

        table_not_abundant_rec_don_dict[compound]= {"table":table,"p_value":p_value_not.pvalue,"odds_ratio":odds_ratio}
        
    return table_abundance_rec_don_dict, table_not_abundant_rec_don_dict
        


In [33]:
def abundance_statistic_super_class(abundance_communities,receiver_or_donor="receiver"):
    
    
    table_abundance_rec_don_dict = {}
    table_not_abundant_rec_don_dict = {}

    for super_class in abundance_communities[abundance_communities.receiver_abundance_10].super_class.unique():
        if receiver_or_donor=="receiver":
            abund_rec_don = len(abundance_communities[(abundance_communities.receiver_abundance_10) & (abundance_communities.super_class==super_class)].receiver.unique())
            not_abund_rec_don = len(abundance_communities[(~abundance_communities.receiver_abundance_10) & (abundance_communities.super_class==super_class)].receiver.unique())
        elif receiver_or_donor=="donor":
            abund_rec_don = len(abundance_communities[(abundance_communities.donor_abundance_10) & (abundance_communities.super_class==super_class)].donor.unique())
            not_abund_rec_don = len(abundance_communities[(~abundance_communities.donor_abundance_10) & (abundance_communities.super_class==super_class)].donor.unique())
        
        
        abund_not_rec_don = len(high_abundance) - abund_rec_don
        not_abund_not_rec_don = len(low_abunance) - not_abund_rec_don


        table = [[abund_rec_don,not_abund_rec_don],[abund_not_rec_don,not_abund_not_rec_don]]
        table_non_abundant = [[not_abund_rec_don,abund_rec_don],[not_abund_not_rec_don,abund_not_rec_don]]

        try:
            odds_ratio = (abund_rec_don/(abund_rec_don+abund_not_rec_don))/(not_abund_rec_don/(not_abund_rec_don+not_abund_not_rec_don))
        except:
            odds_ratio = math.inf


        p_value = stats.barnard_exact(table,alternative="greater")

        table_abundance_rec_don_dict[super_class] = {"table":table,"p_value":p_value.pvalue,"odds_ratio":odds_ratio}


        p_value_not = stats.barnard_exact(table_non_abundant,alternative="greater")

        table_not_abundant_rec_don_dict[super_class]= {"table":table,"p_value":p_value_not.pvalue,"odds_ratio":odds_ratio}
        
    return table_abundance_rec_don_dict, table_not_abundant_rec_don_dict
        


**Assign abundant/not-abundant classifier**

In [34]:
abundance_communities = steadier_sample_cross.copy()
abundance_communities["donor_abundance_10"] = abundance_communities["donor"].map(lambda x: all_mags_paper.loc[x,"new_coverage"]>10)
abundance_communities["receiver_abundance_10"] = abundance_communities["receiver"].map(lambda x: all_mags_paper.loc[x,"new_coverage"]>10)

**Overview of abundant members**

In [35]:
high_abundance = set(list(abundance_communities[abundance_communities.donor_abundance_10].donor.unique()) + list(abundance_communities[abundance_communities.receiver_abundance_10].receiver.unique()))
low_abunance = set(list(abundance_communities[~abundance_communities.donor_abundance_10].donor.unique()) + list(abundance_communities[~abundance_communities.receiver_abundance_10].receiver.unique()))

#### Statistics for receiving

In [36]:
table_abundance_rec_dict, table_not_abundant_rec_dict = abundance_statistic(abundance_communities,receiver_or_donor="receiver")

In [37]:
abund_rec_df = pd.DataFrame(table_abundance_rec_dict).T
abund_rec_df = statistics_adjustments(abund_rec_df)

abund_rec_df[abund_rec_df.p_value_benjamini_h<0.05].sort_values("p_value")

Unnamed: 0,table,p_value,odds_ratio,i,p_value_benjamini_h


In [38]:
not_abund_rec_df = pd.DataFrame(table_not_abundant_rec_dict).T
not_abund_rec_df = statistics_adjustments(not_abund_rec_df)
not_abund_rec_df["odds_ratio"] = not_abund_rec_df["odds_ratio"].map(lambda x: 1/x)
not_abund_rec_df[not_abund_rec_df.p_value_benjamini_h<0.05].sort_values("p_value")

Unnamed: 0,table,p_value,odds_ratio,i,p_value_benjamini_h
L-Lysine,"[[3, 46], [11, 12]]",0.000251,3.701149,1.0,0.006031
Guanine,"[[7, 53], [7, 5]]",0.000531,1.827586,2.0,0.006373


In [41]:
table_abundance_rec_dict, table_not_abundant_rec_dict = abundance_statistic_super_class(abundance_communities,receiver_or_donor="receiver")

In [42]:
abund_rec_df = pd.DataFrame(table_abundance_rec_dict).T
abund_rec_df = statistics_adjustments(abund_rec_df)

abund_rec_df[abund_rec_df.p_value_benjamini_h<0.05].sort_values("p_value")

Unnamed: 0,table,p_value,odds_ratio,i,p_value_benjamini_h


In [43]:
not_abund_rec_df = pd.DataFrame(table_not_abundant_rec_dict).T
not_abund_rec_df = statistics_adjustments(not_abund_rec_df)
not_abund_rec_df["odds_ratio"] = not_abund_rec_df["odds_ratio"].map(lambda x: 1/x)
not_abund_rec_df[not_abund_rec_df.p_value_benjamini_h<0.05].sort_values("p_value")

Unnamed: 0,table,p_value,odds_ratio,i,p_value_benjamini_h
amino acids and derivatives,"[[8, 53], [6, 5]]",0.002818,1.599138,1.0,0.022548
nucleotides and derivatives,"[[10, 56], [4, 2]]",0.004512,1.351724,2.0,0.018049


#### Statistics for donating

In [44]:
table_abundant_donation_dict, table_not_abundant_donation_dict = abundance_statistic(abundance_communities,receiver_or_donor="donor")

In [46]:
abundance_don_df = pd.DataFrame(table_abundant_donation_dict).T
abundance_don_df = statistics_adjustments(abundance_don_df)
abundance_don_df[abundance_don_df.p_value_benjamini_h<0.05].sort_values("p_value")


Unnamed: 0,table,p_value,odds_ratio,i,p_value_benjamini_h
Uracil,"[[7, 4], [7, 54]]",0.000372,7.25,1.0,0.008935
Thymine C5H6N2O2,"[[6, 3], [8, 55]]",0.000469,8.285714,2.0,0.005626
Acetaldehyde,"[[11, 18], [3, 40]]",0.00264,2.531746,3.0,0.021119
Guanine,"[[6, 5], [8, 53]]",0.002818,4.971429,5.0,0.013529
L-Lysine,"[[6, 5], [8, 53]]",0.002818,4.971429,5.0,0.013529


In [47]:
not_abundant_don_df = pd.DataFrame(table_not_abundant_donation_dict).T
not_abundant_don_df = statistics_adjustments(not_abundant_don_df)


not_abundant_don_df["odds_ratio"] = not_abundant_don_df["odds_ratio"].map(lambda x: math.inf if x==0 else 1/x)
not_abundant_don_df[not_abundant_don_df.p_value_benjamini_h<0.05].sort_values("p_value")

Unnamed: 0,table,p_value,odds_ratio,i,p_value_benjamini_h


In [48]:
table_abundant_donation_dict, table_not_abundant_donation_dict = abundance_statistic_super_class(abundance_communities,receiver_or_donor="donor")

In [49]:
abundance_don_df = pd.DataFrame(table_abundant_donation_dict).T
abundance_don_df = statistics_adjustments(abundance_don_df)
abundance_don_df[abundance_don_df.p_value_benjamini_h<0.05].sort_values("p_value")


Unnamed: 0,table,p_value,odds_ratio,i,p_value_benjamini_h
nucleotides and derivatives,"[[10, 15], [4, 43]]",0.002643,2.761905,1.0,0.021142
alcohols and aldehydes,"[[12, 23], [2, 35]]",0.002818,2.161491,2.0,0.011274
amino acids and derivatives,"[[10, 20], [4, 38]]",0.014672,2.071429,3.0,0.039124


In [50]:
not_abundant_don_df = pd.DataFrame(table_not_abundant_donation_dict).T
not_abundant_don_df = statistics_adjustments(not_abundant_don_df)


not_abundant_don_df["odds_ratio"] = not_abundant_don_df["odds_ratio"].map(lambda x: math.inf if x==0 else 1/x)
not_abundant_don_df[not_abundant_don_df.p_value_benjamini_h<0.05].sort_values("p_value")

Unnamed: 0,table,p_value,odds_ratio,i,p_value_benjamini_h


## Mass flow

#### Xylose

**Receiver of Xylose**

In [51]:
steadier_sample_cross[(steadier_sample_cross.compound=="D-Xylose")].value_counts("family_receiver")

family_receiver
Other                    2
f_Desulfovibrionaceae    2
f_Lachnospiraceae        2
f_Lactobacillaceae       2
f_Clostridiaceae         1
dtype: int64

In [52]:
steadier_sample_cross[(steadier_sample_cross.compound=="D-Xylose")].groupby(["family_receiver"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_receiver
Other                    0.049099
f_Clostridiaceae         0.009986
f_Lachnospiraceae        0.003343
f_Lactobacillaceae       0.002548
f_Desulfovibrionaceae    0.000704
Name: mass_rate*frequency, dtype: float64

**Donor of Xylose**

In [53]:
steadier_sample_cross[(steadier_sample_cross.compound=="D-Xylose")].value_counts("family_donor")

family_donor
Other                4
f_Lachnospiraceae    3
f_Clostridiaceae     2
dtype: int64

In [54]:
steadier_sample_cross[(steadier_sample_cross.compound=="D-Xylose")].groupby(["family_donor"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_donor
Other                0.053750
f_Clostridiaceae     0.010168
f_Lachnospiraceae    0.001763
Name: mass_rate*frequency, dtype: float64

In [55]:
steadier_sample_cross[(steadier_sample_cross.compound=="D-Xylose")].groupby(["community","family_receiver"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_receiver      
CD_X       f_Lachnospiraceae        0.001059
           f_Desulfovibrionaceae    0.000704
CM_X       Other                    0.048918
           f_Lactobacillaceae       0.002548
           f_Lachnospiraceae        0.002284
M_X        f_Clostridiaceae         0.009986
           Other                    0.000181
Name: mass_rate*frequency, dtype: float64

In [56]:
steadier_sample_cross[(steadier_sample_cross.compound=="D-Xylose")].groupby(["community","family_donor"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_donor     
CD_X       f_Lachnospiraceae    0.001763
CM_X       Other                0.053750
M_X        f_Clostridiaceae     0.010168
Name: mass_rate*frequency, dtype: float64

#### Acetaldehyde

**Receiver of Acetaldehyde**

In [57]:
steadier_sample_cross[(steadier_sample_cross.compound=="Acetaldehyde")].value_counts("family_receiver")

family_receiver
Other                    32
f_Desulfovibrionaceae    25
f_Sphaerochaetaceae      23
f_Lachnospiraceae        10
f_Treponemataceae        10
f_CAG-74                  8
f_Desulfobulbaceae        8
f_Verruco-01              6
f_Lactobacillaceae        4
f_Clostridiaceae          2
f_Dysgonomonadaceae       2
dtype: int64

In [58]:
steadier_sample_cross[(steadier_sample_cross.compound=="Acetaldehyde")].groupby(["family_receiver"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_receiver
Other                    0.009356
f_Desulfovibrionaceae    0.006491
f_Sphaerochaetaceae      0.005201
f_Lactobacillaceae       0.003951
f_Lachnospiraceae        0.003795
f_Verruco-01             0.002947
f_Treponemataceae        0.001093
f_Desulfobulbaceae       0.000695
f_Clostridiaceae         0.000348
f_CAG-74                 0.000263
f_Dysgonomonadaceae      0.000002
Name: mass_rate*frequency, dtype: float64

**Donor of Acetaldehyde**

In [59]:
steadier_sample_cross[(steadier_sample_cross.compound=="Acetaldehyde")].value_counts("family_donor")

family_donor
Other                    40
f_Lachnospiraceae        21
f_Verruco-01             13
f_Desulfovibrionaceae    11
f_Dysgonomonadaceae      10
f_Treponemataceae         9
f_Clostridiaceae          8
f_Fibrobacteraceae        8
f_Lentimicrobiaceae       6
f_Sphaerochaetaceae       4
dtype: int64

In [60]:
steadier_sample_cross[(steadier_sample_cross.compound=="Acetaldehyde")].groupby(["family_donor"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_donor
Other                    0.011941
f_Lachnospiraceae        0.006076
f_Desulfovibrionaceae    0.004056
f_Treponemataceae        0.003374
f_Clostridiaceae         0.003094
f_Verruco-01             0.002553
f_Fibrobacteraceae       0.002356
f_Dysgonomonadaceae      0.000406
f_Lentimicrobiaceae      0.000223
f_Sphaerochaetaceae      0.000065
Name: mass_rate*frequency, dtype: float64

In [61]:
steadier_sample_cross[(steadier_sample_cross.compound=="Acetaldehyde")].groupby(["community","family_receiver"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_receiver      
CD_A       f_Sphaerochaetaceae      0.000995
           f_Desulfovibrionaceae    0.000768
           f_Lachnospiraceae        0.000514
           f_Treponemataceae        0.000288
           Other                    0.000014
CD_P       Other                    0.002212
           f_Desulfovibrionaceae    0.001372
           f_Sphaerochaetaceae      0.000887
           f_Lachnospiraceae        0.000178
           f_Desulfobulbaceae       0.000164
           f_Dysgonomonadaceae      0.000002
CD_X       f_Lachnospiraceae        0.001546
           f_Desulfovibrionaceae    0.000571
CM_A       f_Verruco-01             0.002947
           f_Treponemataceae        0.000805
           f_Sphaerochaetaceae      0.000570
           f_CAG-74                 0.000156
           Other                    0.000036
CM_P       f_Sphaerochaetaceae      0.002748
           f_Desulfovibrionaceae    0.001479
           f_Desulfobulbaceae       0.000531
           Other      

In [62]:
steadier_sample_cross[(steadier_sample_cross.compound=="Acetaldehyde")].groupby(["community","family_donor"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_donor         
CD_A       f_Fibrobacteraceae       2.355715e-03
           f_Lachnospiraceae        2.229468e-04
           f_Desulfovibrionaceae    8.351066e-07
CD_P       f_Treponemataceae        3.373540e-03
           Other                    8.050396e-04
           f_Dysgonomonadaceae      3.848677e-04
           f_Lentimicrobiaceae      2.234827e-04
           f_Lachnospiraceae        2.783310e-05
CD_X       f_Lachnospiraceae        2.116405e-03
CM_A       Other                    2.943162e-03
           f_Verruco-01             1.535072e-03
           f_Dysgonomonadaceae      2.159235e-05
           f_Sphaerochaetaceae      1.520082e-05
CM_P       f_Desulfovibrionaceae    3.942029e-03
           f_Verruco-01             1.017542e-03
           Other                    1.018360e-04
           f_Sphaerochaetaceae      4.957513e-05
CM_X       Other                    8.091292e-03
           f_Clostridiaceae         2.562788e-03
M_P        f_Lachnospiraceae        

#### Glucose

**Receiver of Glucose**

In [63]:
steadier_sample_cross[(steadier_sample_cross.compound=="D-Glucose")].value_counts("family_receiver")

family_receiver
f_Desulfovibrionaceae    10
Other                     7
f_Sphaerochaetaceae       7
f_Verruco-01              4
f_Lentimicrobiaceae       3
f_Desulfobulbaceae        1
dtype: int64

In [64]:
steadier_sample_cross[(steadier_sample_cross.compound=="D-Glucose")].groupby(["family_receiver"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_receiver
f_Desulfovibrionaceae    0.006913
f_Sphaerochaetaceae      0.005009
f_Verruco-01             0.001583
Other                    0.001222
f_Lentimicrobiaceae      0.001204
f_Desulfobulbaceae       0.000130
Name: mass_rate*frequency, dtype: float64

**Donor of Glucose**

In [65]:
steadier_sample_cross[(steadier_sample_cross.compound=="D-Glucose")].value_counts("family_donor")

family_donor
f_Sphaerochaetaceae      13
Other                     6
f_Treponemataceae         5
f_Verruco-01              4
f_Lachnospiraceae         3
f_Desulfovibrionaceae     1
dtype: int64

In [66]:
steadier_sample_cross[(steadier_sample_cross.compound=="D-Glucose")].groupby(["family_donor"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_donor
f_Sphaerochaetaceae      0.006929
f_Treponemataceae        0.003833
Other                    0.002237
f_Verruco-01             0.002197
f_Lachnospiraceae        0.000681
f_Desulfovibrionaceae    0.000185
Name: mass_rate*frequency, dtype: float64

In [67]:
steadier_sample_cross[(steadier_sample_cross.compound=="D-Glucose")].groupby(["community","family_receiver"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_receiver      
CD_A       f_Sphaerochaetaceae      0.000541
           f_Desulfovibrionaceae    0.000250
CD_P       f_Desulfovibrionaceae    0.002872
           f_Sphaerochaetaceae      0.001329
           f_Lentimicrobiaceae      0.001204
CM_A       f_Verruco-01             0.001583
           f_Sphaerochaetaceae      0.001527
CM_P       f_Desulfovibrionaceae    0.003791
           f_Sphaerochaetaceae      0.001612
           Other                    0.001183
           f_Desulfobulbaceae       0.000130
M_P        Other                    0.000039
Name: mass_rate*frequency, dtype: float64

In [68]:
steadier_sample_cross[(steadier_sample_cross.compound=="D-Glucose")].groupby(["community","family_donor"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_donor         
CD_A       f_Lachnospiraceae        0.000642
           f_Sphaerochaetaceae      0.000103
           f_Treponemataceae        0.000047
CD_P       f_Treponemataceae        0.003787
           Other                    0.000924
           f_Sphaerochaetaceae      0.000695
CM_A       f_Verruco-01             0.001396
           Other                    0.001225
           f_Sphaerochaetaceae      0.000304
           f_Desulfovibrionaceae    0.000185
CM_P       f_Sphaerochaetaceae      0.005827
           f_Verruco-01             0.000801
           Other                    0.000088
M_P        f_Lachnospiraceae        0.000039
Name: mass_rate*frequency, dtype: float64

#### CO2

**Receiver of CO2**

In [69]:
steadier_sample_cross[(steadier_sample_cross.compound=="CO2 CO2")].value_counts("family_receiver")

family_receiver
f_Desulfovibrionaceae    67
Other                    37
f_Lachnospiraceae        18
f_Bacteroidaceae         13
f_Desulfobulbaceae       13
f_Sphaerochaetaceae      11
f_Treponemataceae         8
f_Lactobacillaceae        4
dtype: int64

In [70]:
steadier_sample_cross[(steadier_sample_cross.compound=="CO2 CO2")].groupby(["family_receiver"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_receiver
f_Lachnospiraceae        0.002639
Other                    0.002105
f_Desulfovibrionaceae    0.001892
f_Bacteroidaceae         0.000901
f_Treponemataceae        0.000363
f_Lactobacillaceae       0.000047
f_Sphaerochaetaceae      0.000037
f_Desulfobulbaceae       0.000002
Name: mass_rate*frequency, dtype: float64

**Donor of CO2**

In [71]:
steadier_sample_cross[(steadier_sample_cross.compound=="CO2 CO2")].value_counts("family_donor")

family_donor
Other                    38
f_Desulfovibrionaceae    26
f_Sphaerochaetaceae      20
f_Dysgonomonadaceae      14
f_Lachnospiraceae        14
f_Treponemataceae        11
f_Bacteroidaceae         10
f_Lentimicrobiaceae       8
f_Verruco-01              8
f_CAG-74                  6
f_Fibrobacteraceae        6
f_Desulfobulbaceae        5
f_Lactobacillaceae        3
f_Clostridiaceae          2
dtype: int64

In [72]:
steadier_sample_cross[(steadier_sample_cross.compound=="CO2 CO2")].groupby(["family_donor"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_donor
f_Lachnospiraceae        0.002695
Other                    0.001130
f_Desulfovibrionaceae    0.001127
f_Sphaerochaetaceae      0.000946
f_Treponemataceae        0.000924
f_Fibrobacteraceae       0.000391
f_Bacteroidaceae         0.000313
f_Lentimicrobiaceae      0.000227
f_Dysgonomonadaceae      0.000125
f_Desulfobulbaceae       0.000045
f_Verruco-01             0.000043
f_CAG-74                 0.000013
f_Lactobacillaceae       0.000005
f_Clostridiaceae         0.000001
Name: mass_rate*frequency, dtype: float64

In [73]:
steadier_sample_cross[(steadier_sample_cross.compound=="CO2 CO2")].groupby(["community","family_receiver"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_receiver      
CD_A       Other                    0.000491
           f_Treponemataceae        0.000363
           f_Lachnospiraceae        0.000060
           f_Desulfovibrionaceae    0.000010
CD_P       Other                    0.001575
           f_Bacteroidaceae         0.000901
           f_Desulfovibrionaceae    0.000211
CD_X       f_Lachnospiraceae        0.001614
CM_A       f_Desulfovibrionaceae    0.000069
           f_Sphaerochaetaceae      0.000037
CM_P       f_Desulfovibrionaceae    0.000474
           Other                    0.000039
           f_Desulfobulbaceae       0.000002
CM_X       f_Lachnospiraceae        0.000157
           f_Lactobacillaceae       0.000047
M_P        f_Desulfovibrionaceae    0.001128
           f_Lachnospiraceae        0.000808
Name: mass_rate*frequency, dtype: float64

In [74]:
steadier_sample_cross[(steadier_sample_cross.compound=="CO2 CO2")].groupby(["community","family_donor"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_donor         
CD_A       f_Fibrobacteraceae       0.000374
           f_Desulfovibrionaceae    0.000166
           f_Bacteroidaceae         0.000143
           f_Sphaerochaetaceae      0.000139
           f_Lachnospiraceae        0.000088
           f_Treponemataceae        0.000013
CD_P       f_Treponemataceae        0.000908
           f_Sphaerochaetaceae      0.000644
           f_Desulfovibrionaceae    0.000290
           Other                    0.000286
           f_Lentimicrobiaceae      0.000204
           f_Lachnospiraceae        0.000155
           f_Dysgonomonadaceae      0.000113
           f_Bacteroidaceae         0.000045
           f_Desulfobulbaceae       0.000044
CD_X       f_Lachnospiraceae        0.001585
           f_Desulfovibrionaceae    0.000029
CM_A       f_Verruco-01             0.000038
           Other                    0.000031
           f_Fibrobacteraceae       0.000017
           f_Desulfovibrionaceae    0.000006
           f_Sphaeroch

#### H2S

**Receiver of H2S**

In [75]:
steadier_sample_cross[(steadier_sample_cross.compound=="Hydrogen sulfide")].value_counts("family_receiver")

family_receiver
Other                    48
f_Lachnospiraceae        24
f_Fibrobacteraceae       20
f_Desulfobulbaceae       14
f_Desulfovibrionaceae    14
f_Treponemataceae        14
f_Verruco-01             11
f_Lactobacillaceae        6
f_Clostridiaceae          3
dtype: int64

In [76]:
steadier_sample_cross[(steadier_sample_cross.compound=="Hydrogen sulfide")].groupby(["family_receiver"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_receiver
f_Fibrobacteraceae       4.533399e-05
f_Clostridiaceae         4.377482e-05
f_Lachnospiraceae        1.560263e-05
Other                    1.264441e-05
f_Lactobacillaceae       4.144504e-06
f_Verruco-01             2.742478e-06
f_Treponemataceae        1.266628e-06
f_Desulfovibrionaceae    6.271221e-07
f_Desulfobulbaceae       4.730234e-07
Name: mass_rate*frequency, dtype: float64

**Donor of H2S**

In [77]:
steadier_sample_cross[(steadier_sample_cross.compound=="Hydrogen sulfide")].value_counts("family_donor")

family_donor
f_Desulfovibrionaceae    37
Other                    26
f_Sphaerochaetaceae      21
f_Bacteroidaceae         15
f_Lachnospiraceae        14
f_Dysgonomonadaceae       8
f_Treponemataceae         8
f_Verruco-01              8
f_Lentimicrobiaceae       6
f_CAG-74                  4
f_Clostridiaceae          4
f_Desulfobulbaceae        3
dtype: int64

In [78]:
steadier_sample_cross[(steadier_sample_cross.compound=="Hydrogen sulfide")].groupby(["family_donor"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_donor
f_Clostridiaceae         3.685406e-05
f_Lachnospiraceae        2.115000e-05
f_Desulfovibrionaceae    1.658480e-05
Other                    1.535789e-05
f_Sphaerochaetaceae      1.220223e-05
f_Bacteroidaceae         1.074676e-05
f_Verruco-01             1.004403e-05
f_Treponemataceae        1.168954e-06
f_Dysgonomonadaceae      1.096081e-06
f_CAG-74                 8.487231e-07
f_Lentimicrobiaceae      4.724953e-07
f_Desulfobulbaceae       8.356908e-08
Name: mass_rate*frequency, dtype: float64

In [79]:
steadier_sample_cross[(steadier_sample_cross.compound=="Hydrogen sulfide")].groupby(["community","family_receiver"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_receiver      
CD_A       f_Fibrobacteraceae       1.988441e-05
           f_Lachnospiraceae        1.317535e-06
           Other                    1.039135e-06
           f_Treponemataceae        6.453562e-07
           f_Desulfovibrionaceae    2.194237e-07
CD_P       Other                    1.836857e-06
           f_Lachnospiraceae        8.414832e-07
           f_Desulfobulbaceae       4.738554e-08
CD_X       f_Lachnospiraceae        1.045974e-05
           f_Desulfovibrionaceae    1.969027e-07
CM_A       f_Fibrobacteraceae       2.544958e-05
           Other                    7.579815e-07
           f_Treponemataceae        6.212718e-07
           f_Desulfovibrionaceae    2.107956e-07
CM_P       f_Verruco-01             2.742478e-06
           Other                    9.474161e-07
           f_Desulfobulbaceae       4.256378e-07
CM_X       Other                    8.063021e-06
           f_Clostridiaceae         7.337907e-06
           f_Lactobacillaceae       

In [80]:
steadier_sample_cross[(steadier_sample_cross.compound=="Hydrogen sulfide")].groupby(["community","family_donor"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_donor         
CD_A       f_Bacteroidaceae         8.553483e-06
           f_Sphaerochaetaceae      6.338141e-06
           f_Desulfovibrionaceae    6.063553e-06
           f_Lachnospiraceae        1.311470e-06
           Other                    4.435276e-07
           f_Treponemataceae        3.956885e-07
CD_P       f_Desulfovibrionaceae    5.644191e-07
           f_Bacteroidaceae         5.579267e-07
           f_Treponemataceae        5.343662e-07
           f_Lentimicrobiaceae      4.116396e-07
           f_Sphaerochaetaceae      3.040332e-07
           Other                    1.798150e-07
           f_Dysgonomonadaceae      1.051439e-07
           f_Desulfobulbaceae       3.682744e-08
           f_Lachnospiraceae        3.155476e-08
CD_X       f_Lachnospiraceae        9.478362e-06
           f_Desulfovibrionaceae    1.178285e-06
CM_A       f_Verruco-01             1.004403e-05
           Other                    5.285856e-06
           f_Sphaerochaetaceae      

#### Uracil

**Receiver of Uracil**

In [81]:
steadier_sample_cross[(steadier_sample_cross.compound=="Uracil")].value_counts("family_receiver")

family_receiver
Other                    23
f_Desulfovibrionaceae    16
f_Lachnospiraceae         9
f_Bacteroidaceae          7
f_Dysgonomonadaceae       6
f_Treponemataceae         6
f_Sphaerochaetaceae       5
f_Fibrobacteraceae        4
f_Desulfobulbaceae        3
f_Lentimicrobiaceae       3
f_CAG-74                  2
f_Clostridiaceae          2
f_Lactobacillaceae        2
dtype: int64

In [82]:
steadier_sample_cross[(steadier_sample_cross.compound=="Uracil")].groupby(["family_receiver"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_receiver
Other                    0.000363
f_Fibrobacteraceae       0.000300
f_Lachnospiraceae        0.000298
f_Desulfovibrionaceae    0.000132
f_Bacteroidaceae         0.000085
f_Clostridiaceae         0.000080
f_Sphaerochaetaceae      0.000067
f_Treponemataceae        0.000033
f_Dysgonomonadaceae      0.000023
f_Desulfobulbaceae       0.000019
f_Lentimicrobiaceae      0.000016
f_Lactobacillaceae       0.000012
f_CAG-74                 0.000010
Name: mass_rate*frequency, dtype: float64

**Donor of Uracil**

In [83]:
steadier_sample_cross[(steadier_sample_cross.compound=="Uracil")].value_counts("family_donor")

family_donor
f_Desulfovibrionaceae    27
Other                    20
f_Sphaerochaetaceae      18
f_Treponemataceae        14
f_Lachnospiraceae         7
f_Clostridiaceae          2
dtype: int64

In [84]:
steadier_sample_cross[(steadier_sample_cross.compound=="Uracil")].groupby(["family_donor"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_donor
f_Desulfovibrionaceae    0.000445
f_Sphaerochaetaceae      0.000435
f_Lachnospiraceae        0.000261
f_Treponemataceae        0.000191
f_Clostridiaceae         0.000078
Other                    0.000028
Name: mass_rate*frequency, dtype: float64

In [85]:
steadier_sample_cross[(steadier_sample_cross.compound=="Uracil")].groupby(["community","family_receiver"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_receiver      
CD_A       f_Fibrobacteraceae       0.000135
           f_Lachnospiraceae        0.000036
           f_Treponemataceae        0.000022
           f_Bacteroidaceae         0.000022
           f_Desulfovibrionaceae    0.000019
           Other                    0.000015
CD_P       Other                    0.000068
           f_Desulfovibrionaceae    0.000048
           f_Bacteroidaceae         0.000030
           f_Sphaerochaetaceae      0.000020
           f_Dysgonomonadaceae      0.000017
           f_Lachnospiraceae        0.000014
           f_Lentimicrobiaceae      0.000012
           f_Desulfobulbaceae       0.000006
CD_X       f_Lachnospiraceae        0.000081
           f_Desulfovibrionaceae    0.000012
CM_A       f_Fibrobacteraceae       0.000165
           Other                    0.000035
           f_Desulfovibrionaceae    0.000022
           f_Treponemataceae        0.000011
           f_Bacteroidaceae         0.000007
           f_CAG-74   

In [86]:
steadier_sample_cross[(steadier_sample_cross.compound=="Uracil")].groupby(["community","family_donor"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_donor         
CD_A       f_Sphaerochaetaceae      0.000194
           f_Desulfovibrionaceae    0.000054
CD_P       f_Treponemataceae        0.000191
           Other                    0.000023
CD_X       f_Lachnospiraceae        0.000093
CM_A       f_Sphaerochaetaceae      0.000240
           Other                    0.000005
CM_P       f_Desulfovibrionaceae    0.000160
CM_X       f_Lachnospiraceae        0.000168
M_P        f_Desulfovibrionaceae    0.000231
M_X        f_Clostridiaceae         0.000078
Name: mass_rate*frequency, dtype: float64

#### Guanine

**Receiver of Guanine**

In [87]:
steadier_sample_cross[(steadier_sample_cross.compound=="Guanine")].value_counts("family_receiver")

family_receiver
Other                    25
f_Desulfovibrionaceae    15
f_Lachnospiraceae         7
f_Sphaerochaetaceae       7
f_Dysgonomonadaceae       6
f_Treponemataceae         5
f_Bacteroidaceae          4
f_Lentimicrobiaceae       3
f_Verruco-01              3
f_CAG-74                  2
f_Clostridiaceae          2
f_Fibrobacteraceae        2
f_Lactobacillaceae        2
f_Desulfobulbaceae        1
dtype: int64

In [88]:
steadier_sample_cross[(steadier_sample_cross.compound=="Guanine")].groupby(["family_receiver"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_receiver
Other                    0.000659
f_Desulfovibrionaceae    0.000238
f_Lachnospiraceae        0.000122
f_Sphaerochaetaceae      0.000088
f_Clostridiaceae         0.000086
f_Bacteroidaceae         0.000062
f_Verruco-01             0.000035
f_Fibrobacteraceae       0.000034
f_Treponemataceae        0.000032
f_Lactobacillaceae       0.000027
f_Dysgonomonadaceae      0.000026
f_CAG-74                 0.000019
f_Lentimicrobiaceae      0.000016
f_Desulfobulbaceae       0.000013
Name: mass_rate*frequency, dtype: float64

**Donor of Guanine**

In [89]:
steadier_sample_cross[(steadier_sample_cross.compound=="Guanine")].value_counts("family_donor")

family_donor
f_Lachnospiraceae        24
f_Desulfovibrionaceae    23
f_Bacteroidaceae         12
f_Treponemataceae        12
f_Sphaerochaetaceae      11
f_Clostridiaceae          2
dtype: int64

In [90]:
steadier_sample_cross[(steadier_sample_cross.compound=="Guanine")].groupby(["family_donor"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


family_donor
f_Lachnospiraceae        0.000692
f_Desulfovibrionaceae    0.000314
f_Treponemataceae        0.000224
f_Sphaerochaetaceae      0.000110
f_Clostridiaceae         0.000078
f_Bacteroidaceae         0.000040
Name: mass_rate*frequency, dtype: float64

In [91]:
steadier_sample_cross[(steadier_sample_cross.compound=="Guanine")].groupby(["community","family_receiver"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_receiver      
CD_A       f_Fibrobacteraceae       0.000034
           f_Bacteroidaceae         0.000031
           f_Sphaerochaetaceae      0.000025
           f_Treponemataceae        0.000022
           f_Lachnospiraceae        0.000021
           Other                    0.000016
           f_Desulfovibrionaceae    0.000005
CD_P       Other                    0.000158
           f_Desulfovibrionaceae    0.000053
           f_Dysgonomonadaceae      0.000018
           f_Sphaerochaetaceae      0.000016
           f_Lentimicrobiaceae      0.000011
           f_Lachnospiraceae        0.000008
CD_X       f_Lachnospiraceae        0.000080
           f_Desulfovibrionaceae    0.000012
CM_A       Other                    0.000045
           f_Verruco-01             0.000020
           f_Desulfovibrionaceae    0.000019
           f_Treponemataceae        0.000010
           f_CAG-74                 0.000007
           f_Bacteroidaceae         0.000005
           f_Dysgonomo

In [92]:
steadier_sample_cross[(steadier_sample_cross.compound=="Guanine")].groupby(["community","family_donor"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


community  family_donor         
CD_A       f_Desulfovibrionaceae    1.076909e-04
           f_Lachnospiraceae        4.626516e-05
CD_P       f_Treponemataceae        2.239884e-04
           f_Bacteroidaceae         3.971050e-05
           f_Lachnospiraceae        4.633073e-07
CD_X       f_Lachnospiraceae        9.215064e-05
CM_A       f_Sphaerochaetaceae      1.098016e-04
CM_P       f_Desulfovibrionaceae    2.061102e-04
CM_X       f_Lachnospiraceae        3.325918e-04
M_P        f_Lachnospiraceae        2.206873e-04
M_X        f_Clostridiaceae         7.753084e-05
Name: mass_rate*frequency, dtype: float64

#### L-Lysine

**Receiver of L-Lysine**

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Lysine")].value_counts("family_receiver")

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Lysine")].groupby(["family_receiver"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


**Donor of L-Lysine**

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Lysine")].value_counts("family_donor")

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Lysine")].groupby(["family_donor"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Lysine")].groupby(["community","family_receiver"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Lysine")].groupby(["community","family_donor"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


#### L-Lactate

**Receiver of L-Lactate**

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Lactate")].value_counts("family_receiver")

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Lactate")].groupby(["family_receiver"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


**Donor of L-Lactate**

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Lactate")].value_counts("family_donor")

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Lactate")].groupby(["family_donor"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Lactate")].groupby(["community","family_receiver"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Lactate")].groupby(["community","family_donor"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


#### L-Malate

**Receiver of L-Malate**

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Malate")].value_counts("family_receiver")

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Malate")].groupby(["family_receiver"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


**Donor of L-Malate**

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Malate")].value_counts("family_donor")

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Malate")].groupby(["family_donor"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Malate")].groupby(["community","family_receiver"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="L-Malate")].groupby(["community","family_donor"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


#### Thymine

**Receiver of L-Malate**

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="Thymine C5H6N2O2")].value_counts("family_receiver")

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="Thymine C5H6N2O2")].groupby(["family_receiver"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


**Donor of L-Malate**

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="Thymine C5H6N2O2")].value_counts("family_donor")

In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="Thymine C5H6N2O2")].groupby(["family_donor"]).sum()["mass_rate*frequency"].sort_values(ascending=False)


In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="Thymine C5H6N2O2")].groupby(["community","family_receiver"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


In [None]:
steadier_sample_cross[(steadier_sample_cross.compound=="Thymine C5H6N2O2")].groupby(["community","family_donor"]).sum().sort_values(["community","mass_rate*frequency"],ascending=[True,False])["mass_rate*frequency"]


In [None]:
steadier_sample_cross[steadier_sample_cross.family_receiver=="f_Desulfovibrionaceae"].groupby(["super_class","compound"]).sum()["mass_rate*frequency"].sort_values()

In [None]:
steadier_sample_cross[steadier_sample_cross.family_donor=="f_Desulfovibrionaceae"].groupby(["super_class","compound"]).sum()["mass_rate*frequency"].sort_values()