vignettes/microshades-GP.Rmd

---
title: "Global Patterns Data"
author: "Erin Dahl"
date: '`r format(Sys.Date(), "%B %e, %Y")`'
output: rmarkdown::html_document
vignette: >
  %\VignetteIndexEntry{microshades-GP}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

# Global Patterns Data Vignette

This vignette explores the Global Patterns microbiome data available from phyloseq, which includes water samples, land samples, and human samples.

Learn more about the phyloseq package [here](https://bioconductor.org/packages/release/bioc/html/phyloseq.html).

Additionally, the package [speedyseq](https://github.com/mikemc/speedyseq) is necessary to use the function `prep_mdf()`. The package speedyseq provides faster versions of phyloseq’s plotting and taxonomic merging functions. Alternatively, the phyloseq object can be melted and transformed by using phyloseq functions `tax_glom()` and/or `transform_sample_counts()`, and melted by using `psmelt()`.

## Load the required packages

```{r message = FALSE, warning=FALSE}
library(microshades)
library(phyloseq)
library(ggplot2)
library(dplyr)
library(cowplot)
library(patchwork)
library(forcats)
library(tidyverse)

# The dataset Global Patterns is a phyloseq object available from the Phyloseq package
data(GlobalPatterns)

```

## Use the microshades functions

```{r}

# Agglomerate and normalize the phyloseq object, and melt to a data frame
mdf_prep <- prep_mdf(GlobalPatterns, remove_na = TRUE)

# There is an alternative to using this function if you do not have speedyseq:
# 
# mdf_prep <- GlobalPatterns %>%
#         tax_glom("Genus") %>%
#         phyloseq::transform_sample_counts(function(x) { x/sum(x) }) %>%
#         psmelt() %>%
#         filter(Abundance > 0)
#
# both options will produce the same results, however the microshades prep_mdf() uses the   
# speedyseq package to increase the speed of tax_glom() and psmelt(), which may be preferable  
# when working with large datasets

  
# Generate a color object for the specified data
color_objs_GP <- create_color_dfs(mdf_prep,selected_groups = c("Verrucomicrobia", "Proteobacteria", "Actinobacteria", "Bacteroidetes",
    "Firmicutes") , cvd = TRUE)

# Extract
mdf_GP <- color_objs_GP$mdf
cdf_GP <- color_objs_GP$cdf
```

## Plot 

Use `mdf_GP` as the object to plot and use `cdf_GP` to assign the correct color assignments. 

```{r fig.width= 9, fig.height= 4}
# Plot
plot <- plot_microshades(mdf_GP, cdf_GP)

plot_1 <- plot + scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.key.size = unit(0.2, "cm"), text=element_text(size=10)) +
  theme(axis.text.x = element_text(size= 6)) 

plot_1 

```

The `plot_microshades` returns a ggplot object, that allows for additional specifications for the plot to be declared. For example, this allows users to facet samples and more descriptive elements.


```{r fig.width= 9, fig.height= 4}

plot_2 <- plot + scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.key.size = unit(0.2, "cm"), text=element_text(size=10)) +
  theme(axis.text.x = element_text(size= 6)) +
  facet_wrap(~SampleType, scales = "free_x", nrow = 2) +
  theme (strip.text.x = element_text(size = 6))

plot_2
```


### Plot with custom legend

To Ensure that all elements of the custom legend are visible, adjust the `legend_key_size` and `legend_text_size`. Additionaly, the `fig.height` and `fig.width` may need to be declared.

```{r fig.width= 9, fig.height= 5}

GP_legend <-custom_legend(mdf_GP, cdf_GP)

plot_diff <- plot + scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.position = "none")  +
  theme(axis.text.x = element_text(size= 6)) +
  facet_wrap(~SampleType, scales = "free_x", nrow = 2) +
  theme(axis.text.x = element_text(size= 6)) + 
  theme(plot.margin = margin(6,20,6,6))

plot_grid(plot_diff, GP_legend,  rel_widths = c(1, .25))
```

## Plot with extended Proteobacteria colors

```{r fig.width= 9, fig.height= 7}

new_groups <- extend_group(mdf_GP, cdf_GP, "Phylum", "Genus", "Proteobacteria", existing_palette = "micro_cvd_orange", new_palette = "micro_orange", n_add = 5)

GP_legend_new <-custom_legend(new_groups$mdf, new_groups$cdf)

plot_diff <- plot_microshades(new_groups$mdf, new_groups$cdf) + 
  scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.position = "none")  +
  theme(axis.text.x = element_text(size= 6)) +
  facet_wrap(~SampleType, scales = "free_x", nrow = 2) +
  theme(axis.text.x = element_text(size= 6)) + 
  theme(plot.margin = margin(6,20,6,6))

plot_grid(plot_diff, GP_legend_new,  rel_widths = c(1, .25))

```


Re-examine data with smaller groups
```{r}
# Subset Global Patterns to smaller groups in the dataset
ps_water <- subset_samples(GlobalPatterns, SampleType %in% c("Freshwater", "Freshwater (creek)", "Ocean"))

ps_land <- subset_samples(GlobalPatterns, SampleType %in% c("Soil", "Sediment (estuary)"))

ps_human <- subset_samples(GlobalPatterns, SampleType %in% c("Skin", "Feces", "Tongue"))

# Agglomerate and normalize the phyloseq objects, and melt to a data frame
mdf_water <- prep_mdf(ps_water)
mdf_land <- prep_mdf(ps_land)
mdf_human <- prep_mdf(ps_human)

# Generate a color object for the specified data
color_objs_water <- create_color_dfs(mdf_water,selected_groups = c("Verrucomicrobia", "Proteobacteria", "Actinobacteria", "Bacteroidetes",
    "Firmicutes") , cvd = TRUE)

color_objs_water <- reorder_samples_by(color_objs_water$mdf, color_objs_water$cdf)

color_objs_land <- create_color_dfs(mdf_land,selected_groups = c("Verrucomicrobia", "Proteobacteria", "Actinobacteria", "Bacteroidetes",
    "Firmicutes") , cvd = TRUE)

color_objs_land <- reorder_samples_by(color_objs_land$mdf, color_objs_land$cdf)

color_objs_human <- create_color_dfs(mdf_human,selected_groups = c("Verrucomicrobia", "Proteobacteria", "Actinobacteria", "Bacteroidetes",
    "Firmicutes") , cvd = TRUE)

color_objs_human <- reorder_samples_by(color_objs_human$mdf, color_objs_human$cdf)

# Extract
mdf_water <- color_objs_water$mdf
cdf_water <- color_objs_water$cdf

mdf_land <- color_objs_land$mdf
cdf_land <- color_objs_land$cdf

mdf_human <- color_objs_human$mdf
cdf_human <- color_objs_human$cdf
```

#### Water Samples 
```{r fig.width= 9, fig.height= 5}
water_legend <-custom_legend(mdf_water, cdf_water)

water_plot <- plot_microshades(mdf_water, cdf_water) + 
  scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(size= 8)) +
  facet_wrap(~SampleType, scales = "free_x") +
  theme (strip.text.x = element_text(size = 8)) 


plot_grid(water_plot, water_legend,  rel_widths = c(1, .25))

```

##### Plot contributions

```{r fig.width = 14, fig.height=6}

freshwater_contribution <- plot_contributions(mdf_water, cdf_water, "SampleType", "Freshwater")
creek_contribution <- plot_contributions(mdf_water, cdf_water, "SampleType", "Freshwater (creek)")
ocean_contribution <- plot_contributions(mdf_water, cdf_water, "SampleType", "Ocean")

freshwater_contribution$box + 
  creek_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) + 
  ocean_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())

freshwater_contribution$mean + 
  creek_contribution$mean + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) + 
  ocean_contribution$mean + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())

freshwater_contribution$median + 
  creek_contribution$median + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) +
  ocean_contribution$median + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())
```


#### Land samples

```{r fig.width= 9, fig.height= 5}

land_legend <-custom_legend(mdf_land, cdf_land)

land_plot <- plot_microshades(mdf_land, cdf_land) + 
  scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(size= 8)) +
  facet_wrap(~SampleType, scales = "free_x") +
  theme (strip.text.x = element_text(size = 8)) 


plot_grid(land_plot, land_legend,  rel_widths = c(1, .25))

```

##### Plot contributions

```{r fig.width = 14, fig.height=6}

sediment_contribution <- plot_contributions(mdf_land, cdf_land, "SampleType", "Sediment (estuary)")
soil_contribution <- plot_contributions(mdf_land, cdf_land, "SampleType", "Soil")


sediment_contribution$box + 
  soil_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) 

sediment_contribution$mean + 
  soil_contribution$mean + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) 

sediment_contribution$median + 
  soil_contribution$median + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) 
  
```

#### Human Samples
```{r fig.width= 9, fig.height= 5}

human_legend <-custom_legend(mdf_human, cdf_human)

human_plot <- plot_microshades(mdf_human, cdf_human) + 
  scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(size= 8)) +
  facet_wrap(~SampleType, scales = "free_x") +
  theme (strip.text.x = element_text(size = 8)) 


plot_grid(human_plot, human_legend,  rel_widths = c(1, .25))

```

#### Plot contributions

```{r fig.width = 14, fig.height=6}

feces_contribution <- plot_contributions(mdf_human, cdf_human, "SampleType", "Feces")
skin_contribution <- plot_contributions(mdf_human, cdf_human, "SampleType", "Skin")
tongue_contribution <- plot_contributions(mdf_human, cdf_human, "SampleType", "Tongue")

feces_contribution$box + 
  skin_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) + 
  tongue_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())

feces_contribution$mean + 
  skin_contribution$mean + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) + 
  tongue_contribution$mean + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())

feces_contribution$median + 
  skin_contribution$median + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) +
  tongue_contribution$median + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())
```