Skip to content

Commit

Permalink
Merge pull request #9 from Arcadia-Science/ter/plot_compare
Browse files Browse the repository at this point in the history
Add functions to plot the output of `sourmash compare`
  • Loading branch information
taylorreiter committed Oct 13, 2022
2 parents d9d318b + 15fb403 commit 488dff5
Show file tree
Hide file tree
Showing 8 changed files with 211 additions and 0 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Config/testthat/edition: 3
RoxygenNote: 7.2.1
Imports:
dplyr,
ggplot2,
ggrepel,
magrittr,
metacoder,
purrr,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(make_compare_mds)
export(plot_compare_heatmap)
export(plot_compare_mds)
export(read_compare_csv)
export(read_gather)
export(read_taxonomy_annotate)
Expand Down
96 changes: 96 additions & 0 deletions R/plot_compare.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#' Internal function to check if the sample name is a column or a rowname and move it to a rowname if it is a column.
#'
#' @param compare_df
#'
#' @return A tibble.
#'
#' @examples
#' check_compare_df_sample_col_and_move_to_rowname(compare_df)
check_compare_df_sample_col_and_move_to_rowname <- function(compare_df){
# check if compare tibble was read in with sample names as a column
if("sample" %in% colnames(compare_df)){
# move sample column to rownames
compare_df <- compare_df %>%
tibble::column_to_rownames("sample")
}
return(compare_df)
}


#' Perform an MDS analysis from a sourmash compare output and produce a tidy data.frame
#'
#' @param compare_df A tibble or data.frame produced by read_compare_csv() representing a (dis)similarity matrix output by sourmash compare.
#'
#' @return A data.frame.
#' @export
#'
#' @examples
#' make_compare_mds(compare_df)
make_compare_mds <- function(compare_df){
# check if compare tibble was read in with sample names as a column
compare_df <- check_compare_df_sample_col_and_move_to_rowname(compare_df)

compare_mds <- compare_df %>%
as.matrix() %>%
dist() %>%
cmdscale() %>%
as.data.frame() %>%
dplyr::rename(MDS1 = V1, MDS2 = V2) %>%
tibble::rownames_to_column("sample")

return(compare_mds)
}

#' Plot an MDS data.frame produced from the output of sourmash compare
#'
#' @param compare_mds A data.frame produced using make_compare_mds()
#' @param label Boolean controlling whether sample labels are added to the plot. Default TRUE plots labels.
#'
#' @return A ggplot2 plot.
#' @export
#'
#' @examples
#' plot_compare_mds(compare_mds)
plot_compare_mds <- function(compare_mds, label = TRUE){
mds_plt <- ggplot2::ggplot(compare_mds, ggplot2::aes(x = MDS1, y = MDS2, label = sample)) +
ggplot2::geom_point() +
ggplot2::theme_classic()

if(label == TRUE){
mds_plt <- mds_plt +
ggrepel::geom_label_repel()
}

return(mds_plt)
}

#' Plot a heatmap from a tibble of sourmash compare results.
#'
#' @param compare_df A tibble or data.frame produced by read_compare_csv() representing a (dis)similarity matrix output by sourmash compare.
#' @param ... Arguments passed to base::heatmap().
#'
#' @return A plot of sourmash compare results.
#' @export
#'
#' @examples
#' plot_compare_heatmap()
plot_compare_heatmap <- function(compare_df, seed = 42, ...){
# set seed so the same visualization is produced each time the code is run
set.seed(seed = seed)

# check if compare tibble was read in with sample names as a column
compare_df <- check_compare_df_sample_col_and_move_to_rowname(compare_df)

compare_mat <- as.matrix(compare_df)

# use hclust() and dist() to calculate clusters
hc_rows <- hclust(dist(compare_mat), method = "single") # method single matches sourmash plot
hc_cols <- hclust(dist(t(compare_mat)), method = "single")

# plot the heatmap
heatmap(compare_mat,
Colv = as.dendrogram(hc_cols),
Rowv = as.dendrogram(hc_rows),
scale='none', ...)
}

20 changes: 20 additions & 0 deletions man/check_compare_df_sample_col_and_move_to_rowname.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/make_compare_mds.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/plot_compare_heatmap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/plot_compare_mds.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions tests/testthat/test-plot_compare.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
test_that("make_compare_mds outputs a data frame of the correct shape", {
mds <- make_compare_mds(read_compare_csv("comp_k31.csv"))
expect_equal(ncol(mds), 3)
expect_equal(nrow(mds), 6)
})

test_that("make_compare_mds calculates the correct position for one sample", {
mds <- make_compare_mds(read_compare_csv("comp_k31.csv"))
expect_equal(mds[1, 2], -0.37079668)
expect_equal(mds[1, 3], -0.26357615)
})

test_that("plot_compare_mds produces an object of class ggplot with correct data", {
mds <- make_compare_mds(read_compare_csv("comp_k31.csv"))
plt <- plot_compare_mds(mds)
expect_equal(class(plt)[2], "ggplot")
expect_equal(plt$data, mds)
})

test_that("plot_compare_mds label boolean behaves correctly by adding second layer", {
mds <- make_compare_mds(read_compare_csv("comp_k31.csv"))
plt_true <- plot_compare_mds(mds)
expect_equal(length(plt_true$layers), 2)
plt_false <- plot_compare_mds(mds, label = FALSE)
expect_equal(length(plt_false$layers), 1)
})

0 comments on commit 488dff5

Please sign in to comment.