## Notebook for making figures made with R in 'paper name'
Anneliek ter Horst

- Statistical tests
- PCOA plot
- Upset Plot
- Bar plot 
- Genome figures

In [None]:
# Libraries needed
library(UpSetR)
library(tidyverse)
library(vegan)
library(ggplot2)
library(ape)
library(ggrepel)
library(gggenes)
library(RColorBrewer)
library(scales)

In [None]:
# PCOA plot

# load data, coverage table
otu <- read.table("normalized_coverage_table.csv", 
              sep=",", header=T, row.names=1)

# load data, metadata
cat_meta <- read.table("metadata_trees.csv", 
                         sep=",", header=T, row.names=1)

# Transpose dataframe
otu <- t(otu)

# remove rows with all zeros
otu <- otu[apply(otu[,-1], 1, function(x) !all(x==0)),]

# transform the data
otu.xform <- decostand(otu, method="hellinger")

# create dissimilarity matrix for the vOTU table with bray method
otu.dist <- as.matrix(vegdist(otu.xform, method='bray'))

# nmds
otu.nmds <- metaMDS(otu.dist)
otu.nmds$stress

# perform pcoa with ape package pcoa
pcoa <- pcoa(as.dist(otu.dist))

# Bind the metadata and dissimilarity data based on the sample names
cat_meta.ordered <- cat_meta[match(row.names(otu.nmds$points), row.names(cat_meta)),] 

In [None]:
# make a dataframe named axes, put pcoa values in there
axes <- as.data.frame(pcoa$vectors)

# calculate the eigenvalues for each pcoa axes 
eigval <- round(pcoa$values$Relative_eig * 100, digits = 2)

# Put eigenvalues in their own df
eigval <- data.frame( PC = 1:length(eigval), Eigval = eigval)

axes$SampleID <- rownames(axes)

cat_meta.ordered$SampleID <- rownames(cat_meta.ordered)

# perform pcoa with ape package pcoa
pcoa <- pcoa(as.dist(otu.dist))

# make a dataframe named axes, put pcoa values in there
axes <- as.data.frame(pcoa$vectors)

# Give df extra column with the rownames in it 
axes$SampleID <- rownames(axes)

cat_meta.ordered$SampleID <- rownames(cat_meta.ordered)

axes <- merge(cat_meta.ordered, axes, by.x = "SampleID", by.y = "SampleID")

In [None]:
# permanova on viral distribution vs tree family
pmanova2 = adonis(as.dist(otu.dist) ~ sci_fam, data = cat_meta.ordered) 
pmanova2

In [None]:
# Create pcoa plot
pdf("PCOA_colored_tree_fam.pdf")
p <- ggplot(axes, aes(Axis.1, Axis.2)) 
a <- p + geom_point(aes(colour=sci_fam), size=4.5 ,alpha=0.8) + 
geom_text_repel(aes(label=sci_name_short, fontface = "italic", size=17)) +
    xlab(paste("PCo1 (", eigval$Eigval[1], " %)", sep = "")) +
    ylab(paste("PCo2 (", eigval$Eigval[2], " %)", sep = "")) +
  #scale_color_brewer(name = "sci_fam", palette = 'Dark2') +
  #scale_color_gradientn(colors = RColorBrewer::brewer.pal(11, "Spectral")) +
    scale_color_manual(values = c("#003f5c", "#bc5090", "#ffa600")) +
    geom_vline(xintercept = 0, linetype = 2) +
    geom_hline(yintercept = 0, linetype = 2) +
    theme_bw() +
    theme(text = element_text(size = 18),
        legend.position = "bottom") +
    guides(colour = guide_legend(title.position = "top", title.hjust = 0.5)) 
print(a)
dev.off()

In [None]:
# Upset plot
# Load data and format

# metadata
map <- read.csv("metadata_trees.csv", header = T)
map <- map %>% 
  rename("SampleID" = "HTS_ID")

# load data, coverage table
otu <- read.table("normalized_coverage_table.csv", 
              sep=",", header=T, row.names=1)

# clean the coverage table dataframe
row.names(otu) <- otu$contig
otu <- otu[,-1]
otu <- otu[,colnames(otu) %in% map$SampleID]

In [None]:
# Create UpSet plot (Sorry i know this is not pretty)
pdf("UpSetplot.pdf")
upset(fromList(list(Cupressaceae = filter(site.occurrence, SampleID == "RW3" | SampleID == "RW4" |
                                         SampleID == "Cyp4" | SampleID == "Cyp3" | SampleID == "Cedar2")$OTU_ID,
                                    Fagaceae = filter(site.occurrence, SampleID == "Oak9" | SampleID == "Oak8" |
                                                     SampleID == "Oak7" | SampleID == "Oak11" | SampleID == "Oak10" | SampleID == "Oak12")$OTU_ID,
                                    Pinaceae = filter(site.occurrence, SampleID == "Pine6" | SampleID == "Pine5" |
                                                     SampleID == "Pine4" | SampleID == "Fir3" | SampleID == "Fir4" )$OTU_ID)), 
      text.scale=2, nsets = 16, nintersects = 36, order.by = "freq") 
                    #nintersects = NA)))
dev.off()

In [None]:
# Genome figures
# Open the file with the genomes
genome <- read.table("contigs_1kb_3prot.csv",
                   sep=",", header=T)


# add gene direction
genome$direction <- ifelse(genome$strand == "forward", 1, -1)


# Plot the genomes in arrows 
p <- ggplot2::ggplot(genome, ggplot2::aes(xmin = start, xmax = end, y =
                                            molecule, fill = gene, forward = direction)) +
    geom_gene_arrow() 

# Give my color scheme instead
p2 <- p + scale_fill_manual(values = mycolors) + theme(text= element_blank(),
        axis.text = element_text(size = 10))

# Facet them so that each of them has its own bar (it goes wrong)
p3 <- p2 + ggplot2::facet_wrap(~ molecule, scales = "free", ncol = 1) + theme_genes() 

p3

# To file
pdf("genome_fig_3_prots.pdf")
p3
dev.off()