# ANCOM

In [168]:
import sys
import os
import shutil

import pandas as pd
from qiime2 import Visualization

In [126]:
qzv_in = "ancom_treatment_full_3.qzv"
outdir = "tmp"

qzv = Visualization.load(qzv_in)
qzv.export_data(outdir)

In [147]:
# import ancom table, add a "group" column to avoid NaNs 
df_ancom = pd.read_csv(f"{outdir}/ancom.tsv", sep="\t", index_col=0)
df_ancom.loc["Group"] = 2 * ["-"]
df_ancom.drop(["W"], axis=1, inplace=True)

# import data table, add a "group" column to avoid NaNs 
df_data = pd.read_csv(f"{outdir}/data.tsv", sep="\t", index_col=0)
df_data.loc["Group"] = 2 * ["-"]

# import percent abundances
df_percent_abundances = pd.read_csv(f"{outdir}/percent-abundances.tsv", sep = "\t", index_col=0)

# mix the three of them
df_ancom_data = pd.concat([df_ancom, df_data], axis=1)

df = pd.concat([df_ancom_data, df_percent_abundances], axis=1)
# df.to_csv(f".tsv", sep="\t")

Unnamed: 0,Reject null hypothesis,clr,W,0.0,25.0,50.0,75.0,100.0,0.0.1,25.0.1,50.0.1,75.0.1,100.0.1
d__Archaea;p__Crenarchaeota;c__Nitrososphaeria,False,1.32785,0,1.0,1.0,7.0,50.0,331.0,1.0,1.0,1.0,1.0,203.0
d__Bacteria;p__Patescibacteria;c__Berkelbacteria,False,-0.325253,0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,16.0
d__Bacteria;p__Patescibacteria;__,False,0.47602,0,1.0,1.0,1.0,1.0,72.0,1.0,1.0,1.0,1.0,1.0
d__Bacteria;p__Nitrospirota;c__Nitrospiria,False,-0.912242,0,1.0,1.0,1.0,1.0,11.0,1.0,1.0,1.0,4.0,117.0
d__Bacteria;p__Myxococcota;c__bacteriap25,False,-0.152795,0,1.0,1.0,1.0,1.0,10.0,1.0,1.0,1.0,1.0,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
d__Bacteria;p__Chloroflexi;__,False,1.01522,0,1.0,1.0,16.0,127.0,160.0,1.0,1.0,1.0,1.0,248.0
d__Bacteria;p__Bdellovibrionota;c__Oligoflexia,False,-0.295433,0,1.0,30.0,107.0,250.0,907.0,1.0,1.0,222.0,469.0,1127.0
d__Bacteria;p__Bdellovibrionota;c__Bdellovibrionia,False,-0.530309,0,1.0,32.0,102.0,210.0,567.0,1.0,48.0,174.0,365.0,851.0
d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae,False,-0.169534,0,1.0,22.0,116.0,300.0,357.0,1.0,1.0,176.0,552.0,761.0


In [169]:
shutil.rmtree("tmp")

In [171]:
# Get differentially expressed taxa
significative_df = df[df["Reject null hypothesis"] == True].loc[:,["Reject null hypothesis", "clr", "W"]]
significative_taxa = list(significative_df.index)

In [172]:
significative_df

Unnamed: 0,Reject null hypothesis,clr,W


In [165]:
type(significative_taxa)

list

# Prevalence

In [4]:
import sys
import os

import pandas as pd
from qiime2 import Artifact

In [5]:
def normalize_dataframe(dataframe, criteria=0):
    """
    Change the dataframe to an absence-presence matrix
    based on a criteria (by now, a number)
    """
    
    row_number, col_number = dataframe.shape
    
    for row in range(0, row_number):
        for col in range(0, col_number):
            if dataframe.iloc[row, col] >= criteria:
                dataframe.iloc[row, col] = 1
            else:
                dataframe.iloc[row, col] = 0
                
    return dataframe

def create_category_dict(metadata):
    """
    Create, from the metadata dataframe, a dict with
    key: category; val: values in that category
    if only one category, it wont be taken into account
    """
    valid_categories = dict()
    category_names_list = list(metadata.columns)

    # get all different possibilities for each metadata column
    for col_index in range(metadata.shape[1]):
        
        # list from a set to avoid repeating
        groups = (list(set(metadata[category_names_list[col_index]])))
        
        # if more than 1 different category, add it to the dict
        if len(groups) > 1:
            category_name = category_names_list[col_index]
            valid_categories[category_name] = [item for item in groups]

    return valid_categories, category_names_list

In [6]:
qza_in = "collapsed_full_table_raw_lvl_6.qza"
metadata_file = "metadata.tsv"
lvl = 6

In [13]:
try:
    os.mkdir(f"prevalence")
except:
    pass
qza = Artifact.load(qza_in)
counts = qza.view(pd.DataFrame)
metadata = pd.read_csv(
    metadata_file,
    sep='\t',
    header=0,
    index_col=0
    )
full_df = pd.concat([metadata, counts], axis=1)

In [11]:
counts

Unnamed: 0,d__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobacterium,d__Archaea;p__Euryarchaeota;c__Thermococci;o__Methanofastidiosales;f__Methanofastidiosaceae;g__Candidatus_Methanofastidiosum,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanomicrobiaceae;g__Methanoculleus,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanospirillaceae;g__Methanospirillum,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosaetaceae;g__Methanosaeta,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosarcinaceae;g__Methanosarcina,d__Bacteria;__;__;__;__;__,d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Acidobacteriaceae_(Subgroup_1);g__Acidipila,d__Bacteria;p__Actinobacteriota;__;__;__;__,d__Bacteria;p__Actinobacteriota;c__Acidimicrobiia;__;__;__,...,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Sphaerochaeta,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Treponema,d__Bacteria;p__Thermotogota;c__Thermotogae;o__Petrotogales;f__Petrotogaceae;g__AUTHM297,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Chlamydiaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;__,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Candidatus_Protochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Neochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__Candidatus_Rhabdochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__Pedosphaeraceae;g__uncultured
F10C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,827.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F20T4-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,556.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F31C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,367.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F46T2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,235.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F71C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,653.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F83C3-Lupinus,0.0,0.0,0.0,0.0,0.0,8.0,594.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F90T2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,341.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F92C2-Lupinus,4.0,0.0,0.0,0.0,0.0,46.0,631.0,0.0,247.0,0.0,...,0.0,0.0,0.0,47.0,0.0,0.0,0.0,0.0,0.0,0.0
F94T3-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,789.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
G04C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,580.0,0.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
metadata

Unnamed: 0_level_0,species,town,line
sample-id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
F10C2-Lupinus,Lupinus,Zafron,Control
F20T4-Lupinus,Lupinus,Zafron,Temprana
F31C2-Lupinus,Lupinus,Zafron,Control
F46T2-Lupinus,Lupinus,Zafron,Temprana
F71C2-Lupinus,Lupinus,Zafron,Control
F83C3-Lupinus,Lupinus,Zafron,Control
F90T2-Lupinus,Lupinus,Zafron,Temprana
F92C2-Lupinus,Lupinus,Zafron,Control
F94T3-Lupinus,Lupinus,Zafron,Temprana
G04C2-Lupinus,Lupinus,La Garranchosa,Control


In [14]:
full_df

Unnamed: 0_level_0,species,town,line,d__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobacterium,d__Archaea;p__Euryarchaeota;c__Thermococci;o__Methanofastidiosales;f__Methanofastidiosaceae;g__Candidatus_Methanofastidiosum,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanomicrobiaceae;g__Methanoculleus,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanospirillaceae;g__Methanospirillum,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosaetaceae;g__Methanosaeta,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosarcinaceae;g__Methanosarcina,d__Bacteria;__;__;__;__;__,...,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Sphaerochaeta,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Treponema,d__Bacteria;p__Thermotogota;c__Thermotogae;o__Petrotogales;f__Petrotogaceae;g__AUTHM297,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Chlamydiaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;__,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Candidatus_Protochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Neochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__Candidatus_Rhabdochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__Pedosphaeraceae;g__uncultured
sample-id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F10C2-Lupinus,Lupinus,Zafron,Control,0.0,0.0,0.0,0.0,0.0,0.0,827.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F20T4-Lupinus,Lupinus,Zafron,Temprana,0.0,0.0,0.0,0.0,0.0,0.0,556.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F31C2-Lupinus,Lupinus,Zafron,Control,0.0,0.0,0.0,0.0,0.0,0.0,367.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F46T2-Lupinus,Lupinus,Zafron,Temprana,0.0,0.0,0.0,0.0,0.0,0.0,235.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F71C2-Lupinus,Lupinus,Zafron,Control,0.0,0.0,0.0,0.0,0.0,0.0,653.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F83C3-Lupinus,Lupinus,Zafron,Control,0.0,0.0,0.0,0.0,0.0,8.0,594.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F90T2-Lupinus,Lupinus,Zafron,Temprana,0.0,0.0,0.0,0.0,0.0,0.0,341.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F92C2-Lupinus,Lupinus,Zafron,Control,4.0,0.0,0.0,0.0,0.0,46.0,631.0,...,0.0,0.0,0.0,47.0,0.0,0.0,0.0,0.0,0.0,0.0
F94T3-Lupinus,Lupinus,Zafron,Temprana,0.0,0.0,0.0,0.0,0.0,0.0,789.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
G04C2-Lupinus,Lupinus,La Garranchosa,Control,0.0,0.0,0.0,0.0,0.0,0.0,580.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
valid_categories, category_names_list = create_category_dict(metadata)

In [16]:
valid_categories

{'town': ['Zarapicos', 'Zafron', 'La Garranchosa', 'Rivera de la Lanchita'],
 'line': ['Temprana', 'Control']}

In [17]:
category_names_list

['species', 'town', 'line']

In [94]:
for category, values in valid_categories.items():
    
    prevalence_per_value = []
    
    # print(category)
    for value in values:
        # print(value)
        # Drop metadata columns
        sub_df = full_df[full_df[category] == value].drop(category_names_list, axis=1)
        # Normalize (0: absence, 1: presence)
        norm_df = normalize_dataframe(sub_df, criteria=1)
        norm_df.loc["Prevalence"] = norm_df.sum(axis=0)
        
        row_number, col_number = norm_df.shape
        
        # data
        
        for column in range(0, col_number):
            # Get the relative abundance of each taxon on each group
            norm_df.iloc[row_number-1, column] = norm_df.iloc[row_number-1, column]*100/(row_number-1)
        norm_df.rename({"Prevalence":value}, axis=0, inplace=True)
        
        prevalence_per_value.append(norm_df.loc[value].to_frame().transpose())        
        
    prevalence_df = pd.concat(prevalence_per_value)
    
    prevalence_df.to_csv(f"prevalence_lvl_{lvl}_{category}_{value}_wide.tsv", sep="\t")
    prevalence_df.transpose().to_csv(f"prevalence_lvl_{lvl}_{category}_{value}_long.tsv", sep="\t")
    
    
        

In [90]:
norm_df

Unnamed: 0_level_0,d__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobacterium,d__Archaea;p__Euryarchaeota;c__Thermococci;o__Methanofastidiosales;f__Methanofastidiosaceae;g__Candidatus_Methanofastidiosum,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanomicrobiaceae;g__Methanoculleus,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanospirillaceae;g__Methanospirillum,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosaetaceae;g__Methanosaeta,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosarcinaceae;g__Methanosarcina,d__Bacteria;__;__;__;__;__,d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Acidobacteriaceae_(Subgroup_1);g__Acidipila,d__Bacteria;p__Actinobacteriota;__;__;__;__,d__Bacteria;p__Actinobacteriota;c__Acidimicrobiia;__;__;__,...,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Sphaerochaeta,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Treponema,d__Bacteria;p__Thermotogota;c__Thermotogae;o__Petrotogales;f__Petrotogaceae;g__AUTHM297,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Chlamydiaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;__,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Candidatus_Protochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Neochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__Candidatus_Rhabdochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__Pedosphaeraceae;g__uncultured
sample-id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F10C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F31C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F71C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F83C3-Lupinus,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F92C2-Lupinus,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
G04C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
G15C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
G21C1-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
G60C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
G65C1-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [93]:
prevalence_df

Unnamed: 0,d__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobacterium,d__Archaea;p__Euryarchaeota;c__Thermococci;o__Methanofastidiosales;f__Methanofastidiosaceae;g__Candidatus_Methanofastidiosum,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanomicrobiaceae;g__Methanoculleus,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanospirillaceae;g__Methanospirillum,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosaetaceae;g__Methanosaeta,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosarcinaceae;g__Methanosarcina,d__Bacteria;__;__;__;__;__,d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Acidobacteriaceae_(Subgroup_1);g__Acidipila,d__Bacteria;p__Actinobacteriota;__;__;__;__,d__Bacteria;p__Actinobacteriota;c__Acidimicrobiia;__;__;__,...,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Sphaerochaeta,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Treponema,d__Bacteria;p__Thermotogota;c__Thermotogae;o__Petrotogales;f__Petrotogaceae;g__AUTHM297,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Chlamydiaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;__,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Candidatus_Protochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Neochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__Candidatus_Rhabdochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__Pedosphaeraceae;g__uncultured
Temprana,5.555556,5.555556,5.555556,5.555556,5.555556,16.666667,100.0,5.555556,11.111111,5.555556,...,5.555556,0.0,5.555556,5.555556,0.0,5.555556,11.111111,5.555556,11.111111,5.555556
Control,5.555556,0.0,0.0,0.0,0.0,11.111111,100.0,0.0,11.111111,11.111111,...,0.0,5.555556,0.0,5.555556,5.555556,0.0,5.555556,0.0,5.555556,0.0


# RELATIVE COUNTS

In [4]:
import shutil
import os
import sys

import pandas as pd
from qiime2 import Artifact

In [5]:
filename = "collapsed_full_table_clean_lvl6.qza"
outdir = "lvl6"

In [7]:
# Open visualization
qza_artifact = Artifact.load(filename)
df = qza_artifact.view(pd.DataFrame)

In [8]:
df

Unnamed: 0,d__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobacterium,d__Archaea;p__Euryarchaeota;c__Thermococci;o__Methanofastidiosales;f__Methanofastidiosaceae;g__Candidatus_Methanofastidiosum,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanomicrobiaceae;g__Methanoculleus,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanospirillaceae;g__Methanospirillum,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosaetaceae;g__Methanosaeta,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosarcinaceae;g__Methanosarcina,d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Acidobacteriaceae_(Subgroup_1);g__Acidipila,d__Bacteria;p__Actinobacteriota;c__Acidimicrobiia;o__Actinomarinales;f__uncultured;g__uncultured,d__Bacteria;p__Actinobacteriota;c__Acidimicrobiia;o__IMCC26256;f__IMCC26256;g__IMCC26256,d__Bacteria;p__Actinobacteriota;c__Acidimicrobiia;o__Microtrichales;f__Iamiaceae;g__Iamia,...,d__Bacteria;p__Spirochaetota;c__Leptospirae;o__Leptospirales;f__Leptospiraceae;g__Turneriella,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Sphaerochaeta,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Treponema,d__Bacteria;p__Thermotogota;c__Thermotogae;o__Petrotogales;f__Petrotogaceae;g__AUTHM297,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Chlamydiaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Candidatus_Protochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Neochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__Candidatus_Rhabdochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__Pedosphaeraceae;g__uncultured
F10C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F20T4-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F31C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,453.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F46T2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F71C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F83C3-Lupinus,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F90T2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F92C2-Lupinus,4.0,0.0,0.0,0.0,0.0,46.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,47.0,0.0,0.0,0.0,0.0,0.0
F94T3-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
G04C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,900.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
df["Total"] = df.sum(axis=1)

In [12]:
df

Unnamed: 0,d__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobacterium,d__Archaea;p__Euryarchaeota;c__Thermococci;o__Methanofastidiosales;f__Methanofastidiosaceae;g__Candidatus_Methanofastidiosum,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanomicrobiaceae;g__Methanoculleus,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanospirillaceae;g__Methanospirillum,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosaetaceae;g__Methanosaeta,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosarcinaceae;g__Methanosarcina,d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Acidobacteriaceae_(Subgroup_1);g__Acidipila,d__Bacteria;p__Actinobacteriota;c__Acidimicrobiia;o__Actinomarinales;f__uncultured;g__uncultured,d__Bacteria;p__Actinobacteriota;c__Acidimicrobiia;o__IMCC26256;f__IMCC26256;g__IMCC26256,d__Bacteria;p__Actinobacteriota;c__Acidimicrobiia;o__Microtrichales;f__Iamiaceae;g__Iamia,...,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Sphaerochaeta,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Treponema,d__Bacteria;p__Thermotogota;c__Thermotogae;o__Petrotogales;f__Petrotogaceae;g__AUTHM297,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Chlamydiaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Candidatus_Protochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Neochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__Candidatus_Rhabdochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__Pedosphaeraceae;g__uncultured,Total
F10C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38570.0
F20T4-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,48646.0
F31C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,453.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38135.0
F46T2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,34314.0
F71C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,51821.0
F83C3-Lupinus,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,32207.0
F90T2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37497.0
F92C2-Lupinus,4.0,0.0,0.0,0.0,0.0,46.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,47.0,0.0,0.0,0.0,0.0,0.0,28581.0
F94T3-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28322.0
G04C2-Lupinus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,900.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37367.0


In [14]:
rownum, colnum = df.shape
for row in range(rownum-1):
    for col in range(colnum-1):
        df.iloc[row, col] = df.iloc[row, col] * 100 / df.iloc[row, col-1]

  after removing the cwd from sys.path.


In [15]:
df

Unnamed: 0,d__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobacterium,d__Archaea;p__Euryarchaeota;c__Thermococci;o__Methanofastidiosales;f__Methanofastidiosaceae;g__Candidatus_Methanofastidiosum,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanomicrobiaceae;g__Methanoculleus,d__Archaea;p__Halobacterota;c__Methanomicrobia;o__Methanomicrobiales;f__Methanospirillaceae;g__Methanospirillum,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosaetaceae;g__Methanosaeta,d__Archaea;p__Halobacterota;c__Methanosarcinia;o__Methanosarciniales;f__Methanosarcinaceae;g__Methanosarcina,d__Bacteria;p__Acidobacteriota;c__Acidobacteriae;o__Acidobacteriales;f__Acidobacteriaceae_(Subgroup_1);g__Acidipila,d__Bacteria;p__Actinobacteriota;c__Acidimicrobiia;o__Actinomarinales;f__uncultured;g__uncultured,d__Bacteria;p__Actinobacteriota;c__Acidimicrobiia;o__IMCC26256;f__IMCC26256;g__IMCC26256,d__Bacteria;p__Actinobacteriota;c__Acidimicrobiia;o__Microtrichales;f__Iamiaceae;g__Iamia,...,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Sphaerochaeta,d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Spirochaetales;f__Spirochaetaceae;g__Treponema,d__Bacteria;p__Thermotogota;c__Thermotogae;o__Petrotogales;f__Petrotogaceae;g__AUTHM297,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Chlamydiaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Candidatus_Protochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Parachlamydiaceae;g__Neochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__Candidatus_Rhabdochlamydia,d__Bacteria;p__Verrucomicrobiota;c__Chlamydiae;o__Chlamydiales;f__Simkaniaceae;g__uncultured,d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Pedosphaerales;f__Pedosphaeraceae;g__uncultured,Total
F10C2-Lupinus,0.0,,,,,,,,,,...,,,,,,,,,,38570.0
F20T4-Lupinus,0.0,,,,,,,,,,...,,,,,,,,,,48646.0
F31C2-Lupinus,0.0,,,,,,,,,,...,,,,,,,,,,38135.0
F46T2-Lupinus,0.0,,,,,,,,,,...,,,,,,,,,,34314.0
F71C2-Lupinus,0.0,,,,,,,,,,...,,,,,,,,,,51821.0
F83C3-Lupinus,0.0,,,,,,,,,,...,,,,,,,,,,32207.0
F90T2-Lupinus,0.0,,,,,,,,,,...,,,,,,,,,,37497.0
F92C2-Lupinus,0.013995,0.0,,,,,,,,,...,,,,,,,,,,28581.0
F94T3-Lupinus,0.0,,,,,,,,,,...,,,,,,,,,,28322.0
G04C2-Lupinus,0.0,,,,,,,,,,...,,,,,,,,,,37367.0


In [4]:
df["Total"] = df.sum()
# Delete unwanted dirs & files
# Hardcoded but its always the same so
dirs_to_del = ["css", "js", "q2templateassets"]

for folder in dirs_to_del:
    shutil.rmtree(f"{outdir}/{folder}")

files_to_del = ["index.html"]
for file in files_to_del:
    os.remove(f"{outdir}/{file}")

In [8]:
df = pd.read_csv(f"{outdir}/metadata.tsv", sep="\t", header=0, index_col=0)

df = df.drop("#q2:types")
df.to_csv(f"{table_name}.tsv", sep="\t")
df.transpose().to_csv(f"{table_name}_long.tsv", sep="\t")