# Relative abundance

In [1]:
#Libraries
library("phyloseq")
library("microbiome")
#Load phyloseq object
load("phyloseq.RData")

Loading required package: ggplot2


microbiome R package (microbiome.github.com)
    


 Copyright (C) 2011-2022 Leo Lahti, 
    Sudarshan Shetty et al. <microbiome.github.io>



Attaching package: ‘microbiome’


The following object is masked from ‘package:ggplot2’:

    alpha


The following object is masked from ‘package:base’:

    transform




In [2]:
#Relative abundance ####
#Convert abundance table to relative abundance (compositional) table
pseq_relabund <- microbiome::transform(pseq, "compositional")
#summarise and check sample counts which should each amount to 1
microbiome::summarize_phyloseq(pseq_relabund)
microbiome::readcount(pseq_relabund)

Compositional = YES2

1] Min. number of reads = 12] Max. number of reads = 13] Total number of reads = 364] Average number of reads = 15] Median number of reads = 17] Sparsity = 0.9389890674680956] Any OTU sum to 1 or less? YES8] Number of singletons = 25479] Percent of OTUs that are singletons 
        (i.e. exactly one read detected across all samples)010] Number of sample variables are: 4sample.namesitemediareplicate2



In [3]:
#Check logic
#When using total abundance values it is useful to have 0 values, singletons, and doubletons
#This is because some alpha diversity metrics require them
#However, it is useful to remove low relative abundance data in relative abundance data
#This is so the rare ASVs do not overly affect certain types of analysis

#Next step is to remove ASVs with a relabund equal to 0
#This can occur if samples were removed with ASVs not present in the 
#remaining samples
pseq_relabund <- phyloseq::filter_taxa(
    pseq_relabund, function(x) sum(x) > 0, TRUE)
#Summarise and check sample counts which should each amount to around 1
microbiome::summarize_phyloseq(pseq_relabund)
microbiome::readcount(pseq_relabund)

Compositional = YES2

1] Min. number of reads = 12] Max. number of reads = 13] Total number of reads = 364] Average number of reads = 15] Median number of reads = 17] Sparsity = 0.9389890674680956] Any OTU sum to 1 or less? YES8] Number of singletons = 25479] Percent of OTUs that are singletons 
        (i.e. exactly one read detected across all samples)010] Number of sample variables are: 4sample.namesitemediareplicate2



In [4]:
#All the total relative abundance still equal 1
#This is expected since no samples were removed and
#This should only remove ASVs with no relative abundance

In [5]:
#We will now remove rare ASVs as these are not useful in relative abundance data
#compared to abundance data
#There are many methods to do this
#A common way, recommended by the phyloseq developer is to
#remove ASVs with a mean (across samples) less than 1e-5 (relabund)
pseq_relabund <- 
    phyloseq::filter_taxa(
        pseq_relabund, function(x) mean(x) > 1e-5, TRUE)

#Summarise and check sample counts which should each amount to around 1
microbiome::summarize_phyloseq(pseq_relabund)
microbiome::readcount(pseq_relabund)

Compositional = YES2

1] Min. number of reads = 0.9786221771124082] Max. number of reads = 13] Total number of reads = 35.89260725158684] Average number of reads = 0.9970168680996325] Median number of reads = 0.9998040393647047] Sparsity = 0.9315828797096816] Any OTU sum to 1 or less? YES8] Number of singletons = 20789] Percent of OTUs that are singletons 
        (i.e. exactly one read detected across all samples)010] Number of sample variables are: 4sample.namesitemediareplicate2



In [6]:
#Total relative abundance has decreased by a very small amount
#This is what we are looking for, if too much is being removed (>0.05)
#you will need to try to be gentler with the filtering
#For example, you could try 1e-6 instead of 1e-5

In [8]:
#Load in ASV count vector
load("num_asvs_vec.RData")
#Add relative abundance ASV count
num_asvs_vec["relabund"] <- nrow(phyloseq::otu_table(pseq_relabund))
num_asvs_vec
#Check how many ASVs lost
paste0("Lost ASVs equal: ", num_asvs_vec["abundance"] - num_asvs_vec["relabund"])
#Save vector as a new file
#Save object as file
save(num_asvs_vec, file = "num_asvs_vec.v2.RData")
#Remove object from environment
rm(num_asvs_vec)

In [None]:
#We have a lost a good amount of ASVs but these only equate to a very small
#amount of relabund.
#This is fine as we generally use relative abundance when looking at the
#larger picture.
#We can use a rarefied abundance table to look at the closer picture

#We are happy with our relative abundance table
#Therefore we can save it for further use and the remove it
save(pseq_relabund, file = "phyloseq_relabund.RData")
rm(pseq_relabund)
#We can see all the objects we have saved
ls()