-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from Arcadia-Science/ter/plot_compare
Add functions to plot the output of `sourmash compare`
- Loading branch information
Showing
8 changed files
with
211 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
#' Internal function to check if the sample name is a column or a rowname and move it to a rowname if it is a column. | ||
#' | ||
#' @param compare_df | ||
#' | ||
#' @return A tibble. | ||
#' | ||
#' @examples | ||
#' check_compare_df_sample_col_and_move_to_rowname(compare_df) | ||
check_compare_df_sample_col_and_move_to_rowname <- function(compare_df){ | ||
# check if compare tibble was read in with sample names as a column | ||
if("sample" %in% colnames(compare_df)){ | ||
# move sample column to rownames | ||
compare_df <- compare_df %>% | ||
tibble::column_to_rownames("sample") | ||
} | ||
return(compare_df) | ||
} | ||
|
||
|
||
#' Perform an MDS analysis from a sourmash compare output and produce a tidy data.frame | ||
#' | ||
#' @param compare_df A tibble or data.frame produced by read_compare_csv() representing a (dis)similarity matrix output by sourmash compare. | ||
#' | ||
#' @return A data.frame. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' make_compare_mds(compare_df) | ||
make_compare_mds <- function(compare_df){ | ||
# check if compare tibble was read in with sample names as a column | ||
compare_df <- check_compare_df_sample_col_and_move_to_rowname(compare_df) | ||
|
||
compare_mds <- compare_df %>% | ||
as.matrix() %>% | ||
dist() %>% | ||
cmdscale() %>% | ||
as.data.frame() %>% | ||
dplyr::rename(MDS1 = V1, MDS2 = V2) %>% | ||
tibble::rownames_to_column("sample") | ||
|
||
return(compare_mds) | ||
} | ||
|
||
#' Plot an MDS data.frame produced from the output of sourmash compare | ||
#' | ||
#' @param compare_mds A data.frame produced using make_compare_mds() | ||
#' @param label Boolean controlling whether sample labels are added to the plot. Default TRUE plots labels. | ||
#' | ||
#' @return A ggplot2 plot. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' plot_compare_mds(compare_mds) | ||
plot_compare_mds <- function(compare_mds, label = TRUE){ | ||
mds_plt <- ggplot2::ggplot(compare_mds, ggplot2::aes(x = MDS1, y = MDS2, label = sample)) + | ||
ggplot2::geom_point() + | ||
ggplot2::theme_classic() | ||
|
||
if(label == TRUE){ | ||
mds_plt <- mds_plt + | ||
ggrepel::geom_label_repel() | ||
} | ||
|
||
return(mds_plt) | ||
} | ||
|
||
#' Plot a heatmap from a tibble of sourmash compare results. | ||
#' | ||
#' @param compare_df A tibble or data.frame produced by read_compare_csv() representing a (dis)similarity matrix output by sourmash compare. | ||
#' @param ... Arguments passed to base::heatmap(). | ||
#' | ||
#' @return A plot of sourmash compare results. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' plot_compare_heatmap() | ||
plot_compare_heatmap <- function(compare_df, seed = 42, ...){ | ||
# set seed so the same visualization is produced each time the code is run | ||
set.seed(seed = seed) | ||
|
||
# check if compare tibble was read in with sample names as a column | ||
compare_df <- check_compare_df_sample_col_and_move_to_rowname(compare_df) | ||
|
||
compare_mat <- as.matrix(compare_df) | ||
|
||
# use hclust() and dist() to calculate clusters | ||
hc_rows <- hclust(dist(compare_mat), method = "single") # method single matches sourmash plot | ||
hc_cols <- hclust(dist(t(compare_mat)), method = "single") | ||
|
||
# plot the heatmap | ||
heatmap(compare_mat, | ||
Colv = as.dendrogram(hc_cols), | ||
Rowv = as.dendrogram(hc_rows), | ||
scale='none', ...) | ||
} | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
test_that("make_compare_mds outputs a data frame of the correct shape", { | ||
mds <- make_compare_mds(read_compare_csv("comp_k31.csv")) | ||
expect_equal(ncol(mds), 3) | ||
expect_equal(nrow(mds), 6) | ||
}) | ||
|
||
test_that("make_compare_mds calculates the correct position for one sample", { | ||
mds <- make_compare_mds(read_compare_csv("comp_k31.csv")) | ||
expect_equal(mds[1, 2], -0.37079668) | ||
expect_equal(mds[1, 3], -0.26357615) | ||
}) | ||
|
||
test_that("plot_compare_mds produces an object of class ggplot with correct data", { | ||
mds <- make_compare_mds(read_compare_csv("comp_k31.csv")) | ||
plt <- plot_compare_mds(mds) | ||
expect_equal(class(plt)[2], "ggplot") | ||
expect_equal(plt$data, mds) | ||
}) | ||
|
||
test_that("plot_compare_mds label boolean behaves correctly by adding second layer", { | ||
mds <- make_compare_mds(read_compare_csv("comp_k31.csv")) | ||
plt_true <- plot_compare_mds(mds) | ||
expect_equal(length(plt_true$layers), 2) | ||
plt_false <- plot_compare_mds(mds, label = FALSE) | ||
expect_equal(length(plt_false$layers), 1) | ||
}) |