diff --git a/DESCRIPTION b/DESCRIPTION index 8be0013..d1e2208 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,6 +15,8 @@ Config/testthat/edition: 3 RoxygenNote: 7.2.1 Imports: dplyr, + ggplot2, + ggrepel, magrittr, metacoder, purrr, diff --git a/NAMESPACE b/NAMESPACE index 5760bdb..e9aa359 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,9 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(make_compare_mds) +export(plot_compare_heatmap) +export(plot_compare_mds) export(read_compare_csv) export(read_gather) export(read_taxonomy_annotate) diff --git a/R/plot_compare.R b/R/plot_compare.R new file mode 100644 index 0000000..7385581 --- /dev/null +++ b/R/plot_compare.R @@ -0,0 +1,96 @@ +#' Internal function to check if the sample name is a column or a rowname and move it to a rowname if it is a column. +#' +#' @param compare_df +#' +#' @return A tibble. +#' +#' @examples +#' check_compare_df_sample_col_and_move_to_rowname(compare_df) +check_compare_df_sample_col_and_move_to_rowname <- function(compare_df){ + # check if compare tibble was read in with sample names as a column + if("sample" %in% colnames(compare_df)){ + # move sample column to rownames + compare_df <- compare_df %>% + tibble::column_to_rownames("sample") + } + return(compare_df) +} + + +#' Perform an MDS analysis from a sourmash compare output and produce a tidy data.frame +#' +#' @param compare_df A tibble or data.frame produced by read_compare_csv() representing a (dis)similarity matrix output by sourmash compare. +#' +#' @return A data.frame. +#' @export +#' +#' @examples +#' make_compare_mds(compare_df) +make_compare_mds <- function(compare_df){ + # check if compare tibble was read in with sample names as a column + compare_df <- check_compare_df_sample_col_and_move_to_rowname(compare_df) + + compare_mds <- compare_df %>% + as.matrix() %>% + dist() %>% + cmdscale() %>% + as.data.frame() %>% + dplyr::rename(MDS1 = V1, MDS2 = V2) %>% + tibble::rownames_to_column("sample") + + return(compare_mds) +} + +#' Plot an MDS data.frame produced from the output of sourmash compare +#' +#' @param compare_mds A data.frame produced using make_compare_mds() +#' @param label Boolean controlling whether sample labels are added to the plot. Default TRUE plots labels. +#' +#' @return A ggplot2 plot. +#' @export +#' +#' @examples +#' plot_compare_mds(compare_mds) +plot_compare_mds <- function(compare_mds, label = TRUE){ + mds_plt <- ggplot2::ggplot(compare_mds, ggplot2::aes(x = MDS1, y = MDS2, label = sample)) + + ggplot2::geom_point() + + ggplot2::theme_classic() + + if(label == TRUE){ + mds_plt <- mds_plt + + ggrepel::geom_label_repel() + } + + return(mds_plt) +} + +#' Plot a heatmap from a tibble of sourmash compare results. +#' +#' @param compare_df A tibble or data.frame produced by read_compare_csv() representing a (dis)similarity matrix output by sourmash compare. +#' @param ... Arguments passed to base::heatmap(). +#' +#' @return A plot of sourmash compare results. +#' @export +#' +#' @examples +#' plot_compare_heatmap() +plot_compare_heatmap <- function(compare_df, seed = 42, ...){ + # set seed so the same visualization is produced each time the code is run + set.seed(seed = seed) + + # check if compare tibble was read in with sample names as a column + compare_df <- check_compare_df_sample_col_and_move_to_rowname(compare_df) + + compare_mat <- as.matrix(compare_df) + + # use hclust() and dist() to calculate clusters + hc_rows <- hclust(dist(compare_mat), method = "single") # method single matches sourmash plot + hc_cols <- hclust(dist(t(compare_mat)), method = "single") + + # plot the heatmap + heatmap(compare_mat, + Colv = as.dendrogram(hc_cols), + Rowv = as.dendrogram(hc_rows), + scale='none', ...) +} + diff --git a/man/check_compare_df_sample_col_and_move_to_rowname.Rd b/man/check_compare_df_sample_col_and_move_to_rowname.Rd new file mode 100644 index 0000000..f4185b4 --- /dev/null +++ b/man/check_compare_df_sample_col_and_move_to_rowname.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_compare.R +\name{check_compare_df_sample_col_and_move_to_rowname} +\alias{check_compare_df_sample_col_and_move_to_rowname} +\title{Internal function to check if the sample name is a column or a rowname and move it to a rowname if it is a column.} +\usage{ +check_compare_df_sample_col_and_move_to_rowname(compare_df) +} +\arguments{ +\item{compare_df}{} +} +\value{ +A tibble. +} +\description{ +Internal function to check if the sample name is a column or a rowname and move it to a rowname if it is a column. +} +\examples{ +check_compare_df_sample_col_and_move_to_rowname(compare_df) +} diff --git a/man/make_compare_mds.Rd b/man/make_compare_mds.Rd new file mode 100644 index 0000000..a445444 --- /dev/null +++ b/man/make_compare_mds.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_compare.R +\name{make_compare_mds} +\alias{make_compare_mds} +\title{Perform an MDS analysis from a sourmash compare output and produce a tidy data.frame} +\usage{ +make_compare_mds(compare_df) +} +\arguments{ +\item{compare_df}{A tibble or data.frame produced by read_compare_csv() representing a (dis)similarity matrix output by sourmash compare.} +} +\value{ +A data.frame. +} +\description{ +Perform an MDS analysis from a sourmash compare output and produce a tidy data.frame +} +\examples{ +make_compare_mds(compare_df) +} diff --git a/man/plot_compare_heatmap.Rd b/man/plot_compare_heatmap.Rd new file mode 100644 index 0000000..2201fff --- /dev/null +++ b/man/plot_compare_heatmap.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_compare.R +\name{plot_compare_heatmap} +\alias{plot_compare_heatmap} +\title{Plot a heatmap from a tibble of sourmash compare results.} +\usage{ +plot_compare_heatmap(compare_df, seed = 42, ...) +} +\arguments{ +\item{compare_df}{A tibble or data.frame produced by read_compare_csv() representing a (dis)similarity matrix output by sourmash compare.} + +\item{...}{Arguments passed to base::heatmap().} +} +\value{ +A plot of sourmash compare results. +} +\description{ +Plot a heatmap from a tibble of sourmash compare results. +} +\examples{ +plot_compare_heatmap() +} diff --git a/man/plot_compare_mds.Rd b/man/plot_compare_mds.Rd new file mode 100644 index 0000000..ffa7e00 --- /dev/null +++ b/man/plot_compare_mds.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_compare.R +\name{plot_compare_mds} +\alias{plot_compare_mds} +\title{Plot an MDS data.frame produced from the output of sourmash compare} +\usage{ +plot_compare_mds(compare_mds, label = TRUE) +} +\arguments{ +\item{compare_mds}{A data.frame produced using make_compare_mds()} + +\item{label}{Boolean controlling whether sample labels are added to the plot. Default TRUE plots labels.} +} +\value{ +A ggplot2 plot. +} +\description{ +Plot an MDS data.frame produced from the output of sourmash compare +} +\examples{ +plot_compare_mds(compare_mds) +} diff --git a/tests/testthat/test-plot_compare.R b/tests/testthat/test-plot_compare.R new file mode 100644 index 0000000..f03a910 --- /dev/null +++ b/tests/testthat/test-plot_compare.R @@ -0,0 +1,26 @@ +test_that("make_compare_mds outputs a data frame of the correct shape", { + mds <- make_compare_mds(read_compare_csv("comp_k31.csv")) + expect_equal(ncol(mds), 3) + expect_equal(nrow(mds), 6) +}) + +test_that("make_compare_mds calculates the correct position for one sample", { + mds <- make_compare_mds(read_compare_csv("comp_k31.csv")) + expect_equal(mds[1, 2], -0.37079668) + expect_equal(mds[1, 3], -0.26357615) +}) + +test_that("plot_compare_mds produces an object of class ggplot with correct data", { + mds <- make_compare_mds(read_compare_csv("comp_k31.csv")) + plt <- plot_compare_mds(mds) + expect_equal(class(plt)[2], "ggplot") + expect_equal(plt$data, mds) +}) + +test_that("plot_compare_mds label boolean behaves correctly by adding second layer", { + mds <- make_compare_mds(read_compare_csv("comp_k31.csv")) + plt_true <- plot_compare_mds(mds) + expect_equal(length(plt_true$layers), 2) + plt_false <- plot_compare_mds(mds, label = FALSE) + expect_equal(length(plt_false$layers), 1) +})