In [12]:
import argparse
import uuid
import os
import shutil
import pandas as pd
from qiime2 import Visualization
import seaborn as sns; sns.set_theme(color_codes=True)

def save_long_wide(df, filename):
    """
    Generates tsv for a dataframe and for its transposed
    """
    df.to_csv(f"{filename}_wide.tsv", sep="\t")
    df.transpose().to_csv(f"{filename}_long.tsv", sep="\t")
    return

def export_qzv(qzv_in, argument):
    # generate the tmp dir
    # export qzv to a tmp dir
    # get the needed dataframes
    # remove the tmp dir

    tmpdir = str(uuid.uuid4())

    while tmpdir in os.listdir():
        tmpdir = str(uuid.uuid4())

    qzv = Visualization.load(qzv_in)
    qzv.export_data(tmpdir)
    
    # ANCOM table
    df_ancom = pd.read_csv(f"{tmpdir}/ancom.tsv", sep="\t", index_col=0)
    
    # Data table
    df_data = pd.read_csv(f"{tmpdir}/data.tsv", sep="\t", index_col=0)
    newrow = pd.DataFrame.from_dict({argument : ["-","-"]}, orient="index", columns=["W","clr"])
    
    # Generate extra row to avoid NAs    
    df_data = pd.concat([newrow, df_data], axis=0)
    
    # remove the "w"
    df_ancom.drop(["W"], axis=1, inplace=True)
    # Generate extra row to avoid NAs
    newrow = pd.DataFrame.from_dict({argument : ["-"]}, orient="index", columns=["Reject null hypothesis"])
    df_ancom = pd.concat([newrow, df_ancom], axis=0)
    
    # Percent abundances
    df_percent_abundances = pd.read_csv(f"{tmpdir}/percent-abundances.tsv", sep = "\t", index_col=0)
    df_percent_abundances.rename({"Group" : argument},axis=0)
    
    shutil.rmtree(tmpdir)

    return df_ancom, df_data, df_percent_abundances

def get_significative_taxa(df):
    # Get differentially expressed taxa
    # Those with "Reject null hypothesis" set as True
    significative_taxa = df[df["Reject null hypothesis"] == True].index

    if len(significative_taxa) == 0:
        print("No significative data found.")
        return None
    else:
        return list(significative_taxa)
    
def digest_name(string):
    name_as_list = string.replace("d__","domain: " ).replace("p__", "phylum: ").replace("c__", "class: " ).replace("o__", "order: " ).replace("f__","family: " ).replace("g__","genus: "  ).replace("s__","species: ").split(";")
    return name_as_list


In [13]:
qzv_in = "Assets/ancom_sample-origin_clean_full_6.qzv"
metadata = "Assets/metadata.tsv"
metadata_column = "sample-origin"
rel_freq_in = "Assets/relative_numbers_lvl_6_clean_long.tsv"

# generate the path where the relative abundances will be
# rel_abundances_path = f"../../09-qiime2_collapse_numbers/{args.mode}/{args.state}/lvl_{args.level}/{args.state}/relative_numbers_lvl_{args.level}_{args.state}_long.tsv"

df_ancom, df_data, df_percent_abundances = export_qzv(qzv_in, metadata_column)

# generate first output
df_out_1 = pd.concat([df_ancom, df_data, df_percent_abundances], axis=1)
save_long_wide(df_out_1, "_")

# get the significative data 
significative_taxa = get_significative_taxa(df_out_1)

# import relative abundances
rel_abs_df = pd.read_csv(rel_freq_in, header=0, index_col=0, delimiter="\t")

# Import metadata
column_df = pd.read_csv(metadata, header=0, index_col=0, delimiter="\t").pop(metadata_column)
df_out_2 = pd.concat([df_ancom, df_data, pd.concat([pd.DataFrame(column_df).transpose(), rel_abs_df], axis=0)], axis=1)


# if there are any significative taxa
# generate the heatmap with dendrogram plot
if significative_taxa is not None:
    
    sig_tax_abundances = rel_abs_df.loc[significative_taxa, :]
    
    # change the headers of the table
    # get current names
    rownames = sig_tax_abundances.index
    
    # change current name into new future name
    # Get the values from the wanted columns
    newnames = [digest_name(item) for item in rownames]
    newnames = [f"{item[-1]}; {item[-2]}" if "uncultured" in item[-1] and len(item) > 2 else item[-1] for item in newnames]
    
    namedict = { row : newname for row, newname in zip(rownames, newnames)}
    
    figure_df = sig_tax_abundances.rename(index=namedict)
    
    # associate color code to metadata
    color_codes = dict(zip(column_df.unique(), ["green", "red", "blue", "purple", "grey"]))
    col_colors = column_df.map(color_codes)
    
    figure = sns.clustermap(figure_df,
                  col_colors=col_colors,
                  row_cluster=False,
                  dendrogram_ratio=(0, .15),
                  cbar_pos=(0.9, 0.1, .05, .25),
                  cmap="Greens",
                  figsize=(15,10),
                  )

    figure.savefig("hmap_xsamples_ytaxa.png")
    
    reverse_figure_df = figure_df.transpose()
    
    figure = sns.clustermap(reverse_figure_df,
                            row_colors=col_colors,
                            col_cluster=False,
                            dendrogram_ratio=(0.15, 0),
                            cbar_pos=(0.9, 0.1, .05, .20),
                            cmap="Greens",
                            figsize=(10,15),
                           )
    
    figure.savefig("hmap_xtaxa_ysamples.png")
        
    


In [115]:
df_out_2

Unnamed: 0,Reject null hypothesis,W,clr,C-1-Cistus,C-2-Cistus,C-3-Cistus,T120-1-Cistus,T120-2-Cistus,T120-3-Cistus,C-11-AND-PL-31-Cistus,...,T120-11-AND-PL-214-Cistus,T120-12-AND-PL-217-Cistus,T120-16-AND-2-PL-229-Cistus,T120-19-AND-PL-238-Cistus,T120-21-BAD-PL-245-Cistus,T120-25-BAD-PL-257-Cistus,T120-28-BAD-PL-266-Cistus,T120-4-VALD-PL-192-Cistus,T120-6-VALD-PL-198-Cistus,T120-8-VALD-3-PL-205-Cistus
sample-origin,-,-,-,Seed,Seed,Seed,Seed,Seed,Seed,Grown plant,...,Grown plant,Grown plant,Grown plant,Grown plant,Grown plant,Grown plant,Grown plant,Grown plant,Grown plant,Grown plant
d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Erwiniaceae;g__Pantoea,True,511,4.38851,90.1648,96.3598,96.403,99.4365,98.6171,99.64,0.272116,...,0.131832,0.0320577,0.050939,0,0,0.0267389,0,0.205687,0.014708,0.117192
d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__uncultured,True,509,-2.18613,0,0,0,0,0,0,5.1394,...,10.9383,19.2186,14.4836,2.79516,12.716,14.9972,12.2257,5.49699,31.0781,5.09786
d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__Variovorax,True,499,-1.53386,0,0,0,0,0,0,1.87144,...,1.20909,2.30815,4.66941,21.7036,6.2482,3.47605,3.43344,3.72294,7.8335,1.77741
d__Bacteria;p__Chloroflexi;c__Chloroflexia;o__Chloroflexales;f__Roseiflexaceae;g__uncultured,True,498,-1.60078,0,0,0,0,0,0,9.30842,...,5.74033,2.56061,10.3576,4.18271,8.0154,15.7158,7.5666,4.11375,10.981,12.2505
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
d__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerocolumna,False,2,-0.165928,0,0,0,0,0,0,0,...,0.610192,5.52194,0,0,0,0.401083,0,0.0977014,0.641271,4.55096
d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhizobiales;f__Hyphomicrobiaceae;g__Hyphomicrobium,False,2,-0.355231,0,0,0,0,0,0,2.03317,...,0.576293,1.11401,0.560329,0.1564,0.550677,0.628363,0.512843,0,0.0941315,0.550803
d__Bacteria;p__Firmicutes;c__Bacilli;o__Bacillales;f__Bacillaceae;g__Bacillus,False,2,-0.323884,0,0,0,0,0,0,0.305489,...,2.50104,0.34462,0.288654,1.18303,0.0503887,0.217253,0.391151,0,0.10884,0
d__Bacteria;p__Firmicutes;c__Negativicutes;o__Veillonellales-Selenomonadales;f__Sporomusaceae;g__Pelosinus,False,1,-0.399741,0,0,0,0,0,0,0,...,1.79668,8.10258,0.0611268,0,0,0.324209,0,1.65578,1.19135,17.9773


In [126]:
level = 7
state = "clean"
mode = "full" 

relevant_rows = [metadata_column] + significative_taxa

print(relevant_rows)

df_out_3 = df_out_2.loc[relevant_rows, :]

# New row for columns
level_row = pd.DataFrame([
    ["-"] + [mode] * (df_out_3.shape[0]-1),
    ["-"] + [level] * (df_out_3.shape[0]-1),
    ["-"] + [state] * (df_out_3.shape[0]-1)    
]).transpose()

level_row

level_row.columns = ["Mode","Level","State"]
level_row.index = df_out_3.index

df_out_3 = pd.concat([level_row, df_out_3], axis=1)

['sample-origin', 'd__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Erwiniaceae;g__Pantoea', 'd__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__uncultured', 'd__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__Variovorax', 'd__Bacteria;p__Chloroflexi;c__Chloroflexia;o__Chloroflexales;f__Roseiflexaceae;g__uncultured', 'd__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhizobiales;f__Rhizobiaceae;g__Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium', 'd__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhizobiales;f__Xanthobacteraceae;g__Bradyrhizobium', 'd__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Chitinophagales;f__Chitinophagaceae;g__Niastella']


In [131]:
df_out_3

Unnamed: 0,Mode,Level,State,Reject null hypothesis,W,clr,C-1-Cistus,C-2-Cistus,C-3-Cistus,T120-1-Cistus,...,T120-11-AND-PL-214-Cistus,T120-12-AND-PL-217-Cistus,T120-16-AND-2-PL-229-Cistus,T120-19-AND-PL-238-Cistus,T120-21-BAD-PL-245-Cistus,T120-25-BAD-PL-257-Cistus,T120-28-BAD-PL-266-Cistus,T120-4-VALD-PL-192-Cistus,T120-6-VALD-PL-198-Cistus,T120-8-VALD-3-PL-205-Cistus
sample-origin,-,-,-,-,-,-,Seed,Seed,Seed,Seed,...,Grown plant,Grown plant,Grown plant,Grown plant,Grown plant,Grown plant,Grown plant,Grown plant,Grown plant,Grown plant
d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Erwiniaceae;g__Pantoea,full,7,clean,True,511,4.38851,90.1648,96.3598,96.403,99.4365,...,0.131832,0.0320577,0.050939,0,0,0.0267389,0,0.205687,0.014708,0.117192
d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__uncultured,full,7,clean,True,509,-2.18613,0,0,0,0,...,10.9383,19.2186,14.4836,2.79516,12.716,14.9972,12.2257,5.49699,31.0781,5.09786
d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__Variovorax,full,7,clean,True,499,-1.53386,0,0,0,0,...,1.20909,2.30815,4.66941,21.7036,6.2482,3.47605,3.43344,3.72294,7.8335,1.77741
d__Bacteria;p__Chloroflexi;c__Chloroflexia;o__Chloroflexales;f__Roseiflexaceae;g__uncultured,full,7,clean,True,498,-1.60078,0,0,0,0,...,5.74033,2.56061,10.3576,4.18271,8.0154,15.7158,7.5666,4.11375,10.981,12.2505
d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhizobiales;f__Rhizobiaceae;g__Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium,full,7,clean,True,497,-1.36127,0.0613112,0,0,0,...,2.71197,6.23522,1.41271,4.48749,3.09891,4.69601,1.66891,2.8282,5.80968,2.38681
d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__Rhizobiales;f__Xanthobacteraceae;g__Bradyrhizobium,full,7,clean,True,488,-0.910693,0,0,0,0,...,4.32031,1.8914,2.76429,1.21912,1.24172,2.64046,3.36825,1.84604,1.66789,2.79308
d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Chitinophagales;f__Chitinophagaceae;g__Niastella,full,7,clean,True,485,-1.08893,0,0,0,0,...,2.8551,0.773392,6.81903,1.6803,2.77498,3.85374,6.115,2.43739,1.04427,1.7696


In [146]:
headers = pd.DataFrame(df_out_3.iloc[1]).transpose()

In [147]:
headers

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,Mode,Level,State,Reject null hypothesis,W,clr,C-1-Cistus,C-2-Cistus,C-3-Cistus,T120-1-Cistus,...,T120-11-AND-PL-214-Cistus,T120-12-AND-PL-217-Cistus,T120-16-AND-2-PL-229-Cistus,T120-19-AND-PL-238-Cistus,T120-21-BAD-PL-245-Cistus,T120-25-BAD-PL-257-Cistus,T120-28-BAD-PL-266-Cistus,T120-4-VALD-PL-192-Cistus,T120-6-VALD-PL-198-Cistus,T120-8-VALD-3-PL-205-Cistus


# Abs_count, Rel_count, Prevalence, Cleaning, Repeat

In [None]:
from qiime2 import Artifact
import pandas as pd

import sys
import os

In [None]:
def save_long_wide(df, filename):
    """
    Generates tsv for a dataframe and for its transposed
    """
    
    df.to_csv(f"{filename}_wide.tsv", sep="\t")
    df.transpose().to_csv(f"{filename}_long.tsv", sep="\t")
    return

def relative_abundances(df):
    """
    Obtain the relative abundance of the otus
    """
    
    df["Total"] = df.sum(axis=1)
    rownum, colnum = df.shape
    for row in range(rownum):
        for col in range(colnum-1):
            df.iloc[row, col] = df.iloc[row, col] * 100 / df.iloc[row, colnum-1]

    df.drop("Total", axis=1, inplace=True)   
    
    return df
    
def normalize_dataframe(dataframe, criteria=0):
    """
    Change the dataframe to an absence-presence matrix
    based on a criteria (by now, a number)
    """
    
    row_number, col_number = dataframe.shape
    
    for row in range(0, row_number):
        for col in range(0, col_number):
            if dataframe.iloc[row, col] >= criteria:
                
                print(f"{dataframe.iloc[row, col]} is considered 1")
                
                dataframe.iloc[row, col] = 1
            else:
                print(f"{dataframe.iloc[row, col]} is considered 0")

                dataframe.iloc[row, col] = 0
                
    return dataframe

def create_category_dict(metadata):
    """
    Create, from the metadata dataframe, a dict with
    key: category; val: values in that category
    if only one category, it wont be taken into account
    """
    valid_categories = dict()
    category_names_list = list(metadata.columns)

    # get all different possibilities for each metadata column
    for col_index in range(metadata.shape[1]):
        
        # list from a set to avoid repeating
        groups = (list(set(metadata[category_names_list[col_index]])))
        
        # if more than 1 different category, add it to the dict
        if len(groups) > 1:
            category_name = category_names_list[col_index]
            valid_categories[category_name] = [item for item in groups]

    return valid_categories, category_names_list

def prevalences(df, metadata):
    """
    Calculate the prevalence for each group
    """
    
    df_prev = pd.concat([df, metadata], axis=1)
    category_dict, category_names_list = create_category_dict(metadata)
    
    for category, values in category_dict.items():
    
        prevalence_per_value = []

        for value in values:

            # Drop metadata columns
            sub_df = df_prev[df_prev[category] == value].drop(category_names_list, axis=1)
            
            # Normalize (0: absence, 1: presence)
            norm_df = normalize_dataframe(sub_df, criteria=1)
            norm_df.loc["Prevalence"] = norm_df.sum(axis=0)
            
            
            norm_df.transpose().to_csv("normalizada.tsv",sep="\t")


            row_number, col_number = norm_df.shape
        
            for column in range(0, col_number):
                # Get the relative abundance of each taxon on each group
                norm_df.iloc[row_number-1, column] = norm_df.iloc[row_number-1, column]*100/(row_number-1)
                norm_df.rename({"Prevalence":value}, axis=0, inplace=True)

            prevalence_per_value.append(norm_df.loc[value].to_frame().transpose())        
        
        prevalence_df = pd.concat(prevalence_per_value)
        
        save_long_wide(prevalence_df, f"prevalence")  
    

def clean_dataframe(df):
    """
    Remove the columns ending with ;__
    """
    
    df = df.loc[:,~df.columns.str.endswith(";__")]

    return df


def artifact_from_df(df_in, filename):
    
    clean_qza = Artifact.import_data("FeatureTable[Frequency]", df_in)
    clean_qza.save(f"{filename}.qza")
    
    return

In [None]:
qza_in = "Assets/collapsed_raw_full_table_lvl_5.qza"

In [None]:
df = Artifact.load(qza_in).view(pd.DataFrame)

In [None]:
save_long_wide(df,"absolute_numbers")

In [None]:
rel_df = relative_abundances(df)
save_long_wide(rel_df,"relative_numbers")

In [None]:
metadata_file = "Assets/metadata.tsv"

metadata = pd.read_csv(
    metadata_file,
    sep='\t',
    header=0,
    index_col=0
    )
prevalences(df, metadata)

In [None]:
clean = clean_dataframe(df)

artifact_from_df(clean, "table_clean")

# Prevalence

In [None]:
import sys
import os

import pandas as pd
from qiime2 import Artifact

In [None]:
def normalize_dataframe(dataframe, criteria=0):
    """
    Change the dataframe to an absence-presence matrix
    based on a criteria (by now, a number)
    """
    
    row_number, col_number = dataframe.shape
    
    for row in range(0, row_number):
        for col in range(0, col_number):
            if dataframe.iloc[row, col] >= criteria:
                dataframe.iloc[row, col] = 1
            else:
                dataframe.iloc[row, col] = 0
                
    return dataframe

def create_category_dict(metadata):
    """
    Create, from the metadata dataframe, a dict with
    key: category; val: values in that category
    if only one category, it wont be taken into account
    """
    valid_categories = dict()
    category_names_list = list(metadata.columns)

    # get all different possibilities for each metadata column
    for col_index in range(metadata.shape[1]):
        
        # list from a set to avoid repeating
        groups = (list(set(metadata[category_names_list[col_index]])))
        
        # if more than 1 different category, add it to the dict
        if len(groups) > 1:
            category_name = category_names_list[col_index]
            valid_categories[category_name] = [item for item in groups]

    return valid_categories, category_names_list

In [None]:
qza_in = "Assets/collapsed_raw_full_table_lvl_5.qza"
metadata_file = "Assets/metadata.tsv"
lvl = 6

In [None]:
try:
    os.mkdir(f"prevalence")
except:
    pass
qza = Artifact.load(qza_in)
counts = qza.view(pd.DataFrame)

metadata = pd.read_csv(
    metadata_file,
    sep='\t',
    header=0,
    index_col=0
    )
full_df = pd.concat([metadata, counts], axis=1)

In [None]:
counts

In [None]:
metadata

In [None]:
full_df

In [None]:
valid_categories, category_names_list = create_category_dict(metadata)

In [None]:
valid_categories

In [None]:
category_names_list

In [None]:
for category, values in valid_categories.items():
    
    prevalence_per_value = []
    
    # print(category)
    for value in values:
        # print(value)
        # Drop metadata columns
        sub_df = full_df[full_df[category] == value].drop(category_names_list, axis=1)
        # Normalize (0: absence, 1: presence)
        norm_df = normalize_dataframe(sub_df, criteria=1)
        norm_df.loc["Prevalence"] = norm_df.sum(axis=0)
        
        row_number, col_number = norm_df.shape
        
        # data
        
        for column in range(0, col_number):
            # Get the relative abundance of each taxon on each group
            norm_df.iloc[row_number-1, column] = norm_df.iloc[row_number-1, column]*100/(row_number-1)
        norm_df.rename({"Prevalence":value}, axis=0, inplace=True)
        
        prevalence_per_value.append(norm_df.loc[value].to_frame().transpose())        
        
    prevalence_df = pd.concat(prevalence_per_value)
    
    prevalence_df.to_csv(f"prevalence_lvl_{lvl}_{category}_{value}_wide.tsv", sep="\t")
    prevalence_df.transpose().to_csv(f"prevalence_lvl_{lvl}_{category}_{value}_long.tsv", sep="\t")
    
    
        

In [None]:
norm_df

In [None]:
prevalence_df

# RELATIVE COUNTS

In [None]:
import shutil
import os
import sys

import pandas as pd
from qiime2 import Artifact

In [None]:
filename = "Assets/collapsed_raw_full_table_lvl_5.qza"
outdir = "lvl6"

In [None]:
# Open visualization
qza_artifact = Artifact.load(filename)
df = qza_artifact.view(pd.DataFrame)

In [None]:
df

In [None]:
df["Total"] = df.sum(axis=1)

In [None]:
df

In [None]:
rownum, colnum = df.shape
for row in range(rownum):
    for col in range(colnum-1):
        df.iloc[row, col] = df.iloc[row, col] * 100 / df.iloc[row, colnum-1]

df.drop("Total", axis=1)
df.to_csv("relativ_freq.tsv", sep="\t")

In [None]:
df

In [None]:
df["Total"] = df.sum()
# Delete unwanted dirs & files
# Hardcoded but its always the same so
dirs_to_del = ["css", "js", "q2templateassets"]

for folder in dirs_to_del:
    shutil.rmtree(f"{outdir}/{folder}")

files_to_del = ["index.html"]
for file in files_to_del:
    os.remove(f"{outdir}/{file}")

In [None]:
df = pd.read_csv(f"{outdir}/metadata.tsv", sep="\t", header=0, index_col=0)

df = df.drop("#q2:types")
df.to_csv(f"{table_name}.tsv", sep="\t")
df.transpose().to_csv(f"{table_name}_long.tsv", sep="\t")

# ANCOM

In [None]:
import sys
import os
import shutil

import pandas as pd
from qiime2 import Visualization
import seaborn as sns; sns.set_theme(color_codes=True)

In [None]:
# ANCOM qzv file
qzv_in = "Assets/ancom_sample-origin_full.qzv"
tmpdir = "tmp"
rel_feat_table = "Assets/relativ_freq.tsv"

In [None]:
 def export_qzv(qzv_in, tmpdir):
    # export qzv to a tmp dir
    # get the needed dataframes
    # remove the tmp dir
    qzv = Visualization.load(qzv_in)
    qzv.export_data(tmpdir)
    
    # ancom table
    df_ancom = pd.read_csv(f"{tmpdir}/ancom.tsv", sep="\t", index_col=0)
    
    # Data table
    df_data = pd.read_csv(f"{tmpdir}/data.tsv", sep="\t", index_col=0)
    
    # Add extra row to avoid NAs
    df_data.loc["Group"] = 2 * ["-"]
    
    # remove the "w"
    df_ancom.drop(["W"], axis=1, inplace=True)

    # Percent abundances
    df_percent_abundances = pd.read_csv(f"{tmpdir}/percent-abundances.tsv", sep = "\t", index_col=0)

    shutil.rmtree(tmpdir)

    return df_ancom, df_data, df_percent_abundances

In [None]:
def get_significative_taxa(df):
    # Get differentially expressed taxa
    significative_df = df[df["Reject null hypothesis"] == True].loc[:,["Reject null hypothesis", "clr", "W"]]
    significative_taxa = list(significative_df.index)

    if len(significative_taxa) == 0:
        print("No significative data found.")
        return None
    else:
        return significative_taxa

In [None]:
relfq_df = pd.read_csv(rel_feat_table, sep="\t", header=0, index_col=0)
relfq_df

In [None]:
relqf_df = relfq_df.transpose().loc[significative_taxa]

In [None]:
relqf_df.loc["Metadata group"] = ["a"] * 26
relqf_df

In [None]:
final_df = pd.concat([significative_df,relqf_df], axis=1)

In [None]:
final_df