Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functions to plot the output of sourmash compare #9

Merged
merged 13 commits into from
Oct 13, 2022
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Config/testthat/edition: 3
RoxygenNote: 7.2.1
Imports:
dplyr,
ggplot2,
ggrepel,
magrittr,
metacoder,
purrr,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(make_compare_mds)
export(plot_compare_heatmap)
export(plot_compare_mds)
export(read_compare_csv)
export(read_gather)
export(read_taxonomy_annotate)
Expand Down
96 changes: 96 additions & 0 deletions R/plot_compare.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#' Internal function to check if the sample name is a column or a rowname and move it to a rowname if it is a column.
#'
#' @param compare_df
#'
#' @return A tibble.
#'
#' @examples
#' check_compare_df_sample_col_and_move_to_rowname(compare_df)
check_compare_df_sample_col_and_move_to_rowname <- function(compare_df){
# check if compare tibble was read in with sample names as a column
if("sample" %in% colnames(compare_df)){
# move sample column to rownames
compare_df <- compare_df %>%
tibble::column_to_rownames("sample")
}
return(compare_df)
}


#' Perform an MDS analysis from a sourmash compare output and produce a tidy data.frame
#'
#' @param compare_df A tibble or data.frame produced by read_compare_csv() representing a (dis)similarity matrix output by sourmash compare.
#'
#' @return A data.frame.
#' @export
#'
#' @examples
#' make_compare_mds(compare_df)
make_compare_mds <- function(compare_df){
# check if compare tibble was read in with sample names as a column
compare_df <- check_compare_df_sample_col_and_move_to_rowname(compare_df)

compare_mds <- compare_df %>%
as.matrix() %>%
dist() %>%
cmdscale() %>%
as.data.frame() %>%
dplyr::rename(MDS1 = V1, MDS2 = V2) %>%
tibble::rownames_to_column("sample")

return(compare_mds)
}

#' Plot an MDS data.frame produced from the output of sourmash compare
#'
#' @param compare_mds A data.frame produced using make_compare_mds()
#' @param label Boolean controlling whether sample labels are added to the plot. Default TRUE plots labels.
#'
#' @return A ggplot2 plot.
#' @export
#'
#' @examples
#' plot_compare_mds(compare_mds)
plot_compare_mds <- function(compare_mds, label = TRUE){
mds_plt <- ggplot2::ggplot(compare_mds, ggplot2::aes(x = MDS1, y = MDS2, label = sample)) +
ggplot2::geom_point() +
ggplot2::theme_classic()

if(label == TRUE){
mds_plt <- mds_plt +
ggrepel::geom_label_repel()
}

return(mds_plt)
}

#' Plot a heatmap from a tibble of sourmash compare results.
#'
#' @param compare_df A tibble or data.frame produced by read_compare_csv() representing a (dis)similarity matrix output by sourmash compare.
#' @param ... Arguments passed to base::heatmap().
#'
#' @return A plot of sourmash compare results.
#' @export
#'
#' @examples
#' plot_compare_heatmap()
plot_compare_heatmap <- function(compare_df, seed = 42, ...){
# set seed so the same visualization is produced each time the code is run
set.seed(seed = seed)

# check if compare tibble was read in with sample names as a column
compare_df <- check_compare_df_sample_col_and_move_to_rowname(compare_df)

compare_mat <- as.matrix(compare_df)

# use hclust() and dist() to calculate clusters
hc_rows <- hclust(dist(compare_mat), method = "single") # method single matches sourmash plot
hc_cols <- hclust(dist(t(compare_mat)), method = "single")

# plot the heatmap
heatmap(compare_mat,
Colv = as.dendrogram(hc_cols),
Rowv = as.dendrogram(hc_rows),
scale='none', ...)
}

20 changes: 20 additions & 0 deletions man/check_compare_df_sample_col_and_move_to_rowname.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/make_compare_mds.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/plot_compare_heatmap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/plot_compare_mds.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions tests/testthat/test-plot_compare.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
test_that("make_compare_mds outputs a data frame of the correct shape", {
mds <- make_compare_mds(read_compare_csv("comp_k31.csv"))
expect_equal(ncol(mds), 3)
expect_equal(nrow(mds), 6)
})

test_that("make_compare_mds calculates the correct position for one sample", {
mds <- make_compare_mds(read_compare_csv("comp_k31.csv"))
expect_equal(mds[1, 2], -0.37079668)
expect_equal(mds[1, 3], -0.26357615)
})

test_that("plot_compare_mds produces an object of class ggplot with correct data", {
mds <- make_compare_mds(read_compare_csv("comp_k31.csv"))
plt <- plot_compare_mds(mds)
expect_equal(class(plt)[2], "ggplot")
expect_equal(plt$data, mds)
})

test_that("plot_compare_mds label boolean behaves correctly by adding second layer", {
mds <- make_compare_mds(read_compare_csv("comp_k31.csv"))
plt_true <- plot_compare_mds(mds)
expect_equal(length(plt_true$layers), 2)
plt_false <- plot_compare_mds(mds, label = FALSE)
expect_equal(length(plt_false$layers), 1)
})