## R code for figures in paper

Anneliek ter Horst, March 2023


- Transforming data
- Statistical tests (permanova)
- PCoA plots
- Boxplots for total number vOTUs found and soil moisture



In [None]:
# Load R libraries
library(vegan)
library(ape)
library(ggplot2)
library(RColorBrewer)
library(dplyr)
library(tidyr)
library(devtools)
library(ggConvexHull)
library(patchwork)

In [None]:
# Create color palettes before making plots

# Colors for bulk rhizo, 3 timepoints
col.palette.3 = c('#8c510a', '#d8b365', '#75C43C')

# Colors for bulk rhizo, 4 timepoints
col.palette.4 = c('#fe7f2d', '#fe7f2d', '#fcca46', '#619b8a')


# Colors for bulk-rhizo, 2 timepoints
col.palette.2 = c('#fe7f2d', '#619b8a')

# Colors for the locations
# light blue, dark blue, orange
col.palette.plot = c('#e6ab02', '#d95f02', '#7570b3', 'grey')
col.palette.plot = c('#219ebc', '#023047', '#fb8500', 'grey')

# treatment
col.pal.treat = c('#72b01d', '#B53471')

# time
col.pal.time = c('#bfc0c0', "#ef8354", "#2d3142", 'black')

In [None]:
## Load the files required and transpose

# Open the coverage table
otu <- read.table("./data/coverage_table.csv",  
              sep=",", header=T, row.names=1)

# Sub select based on header names (in this case only rhizosphere samples)
# otu <- otu %>% dplyr::select(contains("R",ignore.case = TRUE))

# categorical metadata
cat_meta <- read.csv("./data/metadata.csv", sep=',', header=1, row.names=1)


In [None]:
# transform the data
otu.xform <- decostand(otu, method="hellinger")

# create dissimilarity matrix for the vOTU table with bray method
otu.dist <- as.matrix(vegdist(otu.xform, method='bray'))

# nmds
otu.nmds <- metaMDS(otu.dist)
otu.nmds$stress

# R will not automatically bind datapoints with the same name, but randomly bind them
# therefore order cat data with this
cat_meta.ordered <- cat_meta[match(row.names(otu.nmds$points), row.names(cat_meta)),]   

# perform pcoa with ape package pcoa
pcoa <- pcoa(as.dist(otu.dist))

# make a dataframe named axes, put pcoa values in there
axes <- as.data.frame(pcoa$vectors)

# Give df extra column with the rownames in it 
axes$SampleID <- rownames(axes)

# put the metadata in the same dataframe, with correct sample name
cat_meta.ordered$SampleID <- rownames(cat_meta.ordered)

cat_meta$SampleID <- rownames(cat_meta)


# calculate the eigenvalues for each pcoa axes 
eigval <- round(pcoa$values$Relative_eig * 100, digits = 2)

# merge those dfs
axes <- merge(cat_meta.ordered, axes, by.x = "SampleID", by.y = "SampleID")


In [None]:
# Put those eigenvalues in a df so they easy to get to. 
eigval <- data.frame( PC = 1:length(eigval), Eigval = eigval)
# head(eigval) # see top eigenvalues
eigval[[1,2]] # see first axes percentage
eigval[[2,2]] # second axes
eigval[[3,2]] # third axes
eigval[[4,2]] # fourth axes


In [None]:
# Permanova test (for plot, treatment, timepoint combined)
pmanova2 = adonis2(as.dist(otu.dist) ~ Plot * Treat * Timepoint, data = cat_meta.ordered)
pmanova2

## Plots


In [None]:
# All points plot
colpal = col.palette.3

# set plot
p <- ggplot(axes, aes(Axis.1, Axis.2), width = 8, height = 10) 

# set color of the points as the factor moisture percentage, shape as tume, set size and see-throughness
#p + geom_point(aes(shape=as.character(Timepoint)), size= 4,colour = "black", stroke=0.5, alpha=0.9) +
p + geom_point(aes(color=bulk_rhizo_t, shape=as.character(Timepoint), fill=bulk_rhizo_t), size = 4, alpha=0.8)+
    
      #scale_shape_manual(values = c(15, 16)) + 
  # set text for the axis lables
  xlab(paste("PCo1 (", eigval$Eigval[1], " %)", sep = "")) +  
  ylab(paste("PCo2 (", eigval$Eigval[2], " %)", sep = "")) +
  scale_color_manual(name = 'soil', values = colpal) +

  # set shapes of the points
  scale_shape_manual(name = "Timepoint", values=c(21,22,23,24)) +
  scale_fill_manual(values=colpal) +
  theme_bw() +
  
  # set text size for whole graph. set the background color (white with no lines)
  theme(text = element_text(size = 16), panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
        legend.position = "left") 

In [None]:
# Plot for treatment in rhizosphere soils (split by plot)
colpal = col.pal.treat

# Create the hull shapes
hulls <- axes %>% 
  group_by(Treat)

# set plot
p <- ggplot(hulls, aes(Axis.1, Axis.2), width = 6, height = 4) 

# set color of the points as the factor depth, shape as year, set size and see-throughness
p + geom_point(aes(colour=as.character(Treat), shape=as.character(Treat)), size = 3,alpha=0.9, stroke=1) +
 
  # set text for the axis lables
  xlab(paste("PCo1 (", eigval$Eigval[1], " %)", sep = "")) +  # or somthing else
  ylab(paste("PCo2 (", eigval$Eigval[2], " %)", sep = "")) +
  
  # make one sub figure per plot
  facet_grid(. ~ Plot, scales = "free") +

  # put shape around pcoa points
  geom_convexhull(aes(colour=as.character(Treat)), alpha = 0.1) +

  # set the colors of the points with the colorbrewer pallet
  scale_color_manual(name = 'soil', values = colpal) +

  # set shapes of the points
  scale_shape_manual(name = "Plot", values=c(16,17,15,18)) +

  # tell where the legend has to be
  guides(color = guide_legend(title.position = "top", title.hjust = 0.5)) +
  theme_bw() +
  
  # set text size for whole graph. set the background color (white with no lines)
  theme(text = element_text(size = 16), panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
        legend.position = "none") 

In [None]:
# Capscale analysis for time
colpal = col.pal.time

# Capscale analysis
cap.clt <- capscale(as.dist(otu.dist) ~ Timepoint +
                    Condition(Plot,Treat) , data = cat_meta.ordered, add = T, na.action = na.omit)
cap.axes <- as.data.frame(scores(cap.clt, choices = c(1,2,3,4))$sites)


cap.clt.axes <- merge(cap.axes, cat_meta.ordered, by=0, all=TRUE)

# Plot capscale results
p <- ggplot(cap.clt.axes, aes(CAP1, CAP2),width = 6, height = 4)
d <- p + geom_point(aes(colour = as.character(Timepoint)), size = 4,alpha=0.8)+
  scale_color_manual(name = 'time', values = colpal) +
  guides(color = guide_legend(title.position = "top", title.hjust = 0.5)) +
  theme_bw() +
  
  # set text size for whole graph. set the background color (white with no lines)
  theme(text = element_text(size = 16), panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
        legend.position = "left") 
d

## Boxplots 
- Total number vOTUs found
- Soil moisture

In [None]:
# Open df with number of vOTUs
total_otu <- read.table("./total_otu.csv", 
              sep=",", header=T)

# Open df with soil moisture
meta <- read.table("./data/metadata.csv", 
              sep=",", header=T)

In [None]:
# Boxplot for moisture in bulk soils
moisture.p <- meta %>%  
  ggplot(aes(Timepoint, soil_moisture_perc, color = bulk_rhizo_t)) +
 geom_boxplot(size = 1, outlier.shape = 0) +
    ylab("Soil moisture % ") +
  xlab("") +
  scale_color_manual(name = 'soil', values = col.palette.3) +
  theme_bw() +
  geom_jitter() +
  theme(text = element_text(size = 18),
        panel.border = element_blank(), 
        strip.background = element_blank(),
        strip.text = element_blank(),
        legend.position = "none")
moisture.p


In [None]:
# Boxplot for total number vOTUs per timepoint and soil type
total_otu.p <- total_otu %>%  
  ggplot(aes(bulk_rhizo, sum, fill = bulk_rhizo)) +
 geom_boxplot(size = 1, outlier.shape = NA, color='black', fill=c('#8c510a', '#75C43C'), alpha=0.8) +
    ylab("Estimated viral species richness") +
  xlab("") +
  scale_color_manual(values = col.palette) +
  theme_bw() +
  theme(text = element_text(size = 18),
        panel.border = element_blank(), 
        strip.background = element_blank(),
        strip.text = element_blank(),
        legend.position = "none")

total_otu.p