diff --git a/.Rbuildignore b/.Rbuildignore index 05ee2c7..54208b3 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,5 @@ ^\.github$ ^check_long_long_int\.R$ ^draft_vignettes\.Rmd$ +^.*\.log$ +^other_stuff$ diff --git a/.gitignore b/.gitignore index b4c1e70..077e745 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ Meta /Meta/ inst/doc *ICIKendallTau_run.log +/other_stuff/ +/large_test/ diff --git a/DESCRIPTION b/DESCRIPTION index 93ec860..100fde8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ICIKendallTau Title: Calculates information-content-informed Kendall-tau -Version: 0.3.22 +Version: 1.0.2 Date: 2024-01-26 Authors@R: c(person(given = c("Robert", "M"), family = "Flight", role = c("aut", "cre"), email = "rflight79@gmail.com", comment = @@ -16,9 +16,9 @@ Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 LinkingTo: Rcpp -Imports: Rcpp, purrr, utils +Imports: Rcpp, purrr, utils, stringr Suggests: furrr, future, testthat (>= 3.0.0), microbenchmark, - rmarkdown, knitr, dplyr + rmarkdown, knitr, dplyr, logger URL: https://moseleybioinformaticslab.github.io/ICIKendallTau https://github.com/moseleybioinformaticslab/ICIKendallTau Config/testthat/edition: 3 diff --git a/NEWS.md b/NEWS.md index f412138..6881600 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# ICIKendallTau 1.0.0 + +- Calculates correlation between columns of the matrix, **not** the rows. + # ICIKendallTau 0.3.20 - `kt_fast` now uses the same data.frame format for output as `ici_kendalltau`, but returns a matrix by default. The data.frame is useful when large amounts of comparisons are run. diff --git a/R/kendalltau.R b/R/kendalltau.R index 9d096ea..a493b50 100644 --- a/R/kendalltau.R +++ b/R/kendalltau.R @@ -3,7 +3,7 @@ #' Given a data-matrix, computes the information-content-informed (ICI) Kendall-tau-b between #' all samples. #' -#' @param data_matrix samples are rows, features are columns +#' @param data_matrix samples are columns, features are rows #' @param global_na what values should be treated as missing (NA)? #' @param zero_value what is the actual zero value? #' @param perspective how to treat missing data in denominator and ties, see details @@ -25,9 +25,7 @@ ici_kendalltau_ref = function(data_matrix, diag_good = TRUE, progress = FALSE){ - # assume row-wise (because that is what the description states), so need to transpose - # because `cor` actually does things columnwise. - data_matrix = t(data_matrix) + exclude_loc = matrix(FALSE, nrow = nrow(data_matrix), ncol = ncol(data_matrix)) # Actual NA and Inf values are special cases, so we do @@ -102,7 +100,7 @@ missing_either = function(in_x, in_y){ #' Calculates the completeness between any two samples using "or", is an #' entry missing in either X "or" Y. #' -#' @param data_matrix samples are rows, features are columns +#' @param data_matrix samples are columns, features are rows #' @param global_na globally, what should be treated as NA? #' @param include_only is there certain comparisons to do? #' @param return_matrix should the matrix or data.frame be returned? @@ -115,7 +113,6 @@ pairwise_completeness = function(data_matrix, include_only = NULL, return_matrix = TRUE){ - data_matrix = t(data_matrix) if (is.null(colnames(data_matrix))) { stop("rownames of data_matrix cannot be NULL!") @@ -248,7 +245,7 @@ pairwise_completeness = function(data_matrix, #' Given a data-matrix, computes the information-theoretic Kendall-tau-b between #' all samples. #' -#' @param data_matrix samples are rows, features are columns +#' @param data_matrix samples are columns, features are rows #' @param global_na globally, what should be treated as NA? #' @param perspective how to treat missing data in denominator and ties, see details #' @param scale_max should everything be scaled compared to the maximum correlation? @@ -285,7 +282,7 @@ pairwise_completeness = function(data_matrix, #' #' matrix_1 = cbind(s1, s2) #' -#' r_1 = ici_kendalltau(t(matrix_1)) +#' r_1 = ici_kendalltau(matrix_1) #' r_1$cor #' #' # s1 s2 @@ -301,7 +298,7 @@ pairwise_completeness = function(data_matrix, #' s4[sample(100, 50)] = NA #' #' matrix_2 = cbind(s3, s4) -#' r_2 = ici_kendalltau(t(matrix_2)) +#' r_2 = ici_kendalltau(matrix_2) #' r_2$cor #' # s3 s4 #' # s3 1.0000000 0.9944616 @@ -309,8 +306,8 @@ pairwise_completeness = function(data_matrix, #' #' # using include_only #' set.seed(1234) -#' x = matrix(rnorm(5000), nrow = 100, ncol = 50) -#' rownames(x) = paste0("s", seq(1, nrow(x))) +#' x = t(matrix(rnorm(5000), nrow = 100, ncol = 50)) +#' colnames(x) = paste0("s", seq(1, nrow(x))) #' #' # only calculate correlations of other columns with "s1" #' include_s1 = "s1" @@ -342,11 +339,14 @@ ici_kendalltau = function(data_matrix, do_log_memory = get("memory", envir = icikt_logger) # assume row-wise (because that is what the description states), so need to transpose - data_matrix = t(data_matrix) exclude_loc = matrix(FALSE, nrow = nrow(data_matrix), ncol = ncol(data_matrix)) + if (is.data.frame(data_matrix)) { + data_matrix = as.matrix(data_matrix) + } + if (is.null(colnames(data_matrix))) { - stop("rownames of data_matrix cannot be NULL!") + stop("colnames of data_matrix cannot be NULL!") } log_message("Processing missing values ...\n") diff --git a/R/reshaping.R b/R/reshaping.R index a35dada..ccb0390 100644 --- a/R/reshaping.R +++ b/R/reshaping.R @@ -1,8 +1,10 @@ +utils::globalVariables(c("stack", "ind", "values")) + #' convert matrix to data.frame #' #' Given a square correlation matrix, converts it to a long data.frame, with three columns. #' -#' @param cor_matrix the correlation matrix +#' @param in_matrix the correlation matrix #' #' @details The data.frame contains three columns: #' * s1: the first entry of comparison @@ -32,6 +34,7 @@ cor_matrix_2_long_df = function(in_matrix) #' Given a long data.frame, converts it to a possibly square correlation matrix #' #' @param long_df the long data.frame +#' @param is_square should it be a square matrix? #' #' #' @export diff --git a/README.Rmd b/README.Rmd index 0034985..a2757ab 100644 --- a/README.Rmd +++ b/README.Rmd @@ -80,7 +80,7 @@ s2 = s1 + 10 matrix_1 = cbind(s1, s2) -r_1 = ici_kendalltau(t(matrix_1)) +r_1 = ici_kendalltau(matrix_1) r_1$cor ``` @@ -95,7 +95,7 @@ s4 = s2 s4[sample(100, 50)] = NA matrix_2 = cbind(s3, s4) -r_2 = ici_kendalltau(t(matrix_2)) +r_2 = ici_kendalltau(matrix_2) r_2$cor ``` @@ -150,7 +150,7 @@ If you have {future} and the {furrr} packages installed, then it is also possibl #| eval: false library(furrr) future::plan(multicore, workers = 4) -r_3 = ici_kendalltau(t(matrix_2)) +r_3 = ici_kendalltau(matrix_2) ``` ## Many Many Comparisons @@ -163,7 +163,7 @@ To keep the `data.frame` output, add the argument `return_matrix=FALSE` to the c ```{r} #| label: matrix -r_4 = ici_kendalltau(t(matrix_2), return_matrix = FALSE) +r_4 = ici_kendalltau(matrix_2, return_matrix = FALSE) r_4 ``` diff --git a/README.md b/README.md index f3551a5..56fb0b6 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ s2 = s1 + 10 matrix_1 = cbind(s1, s2) -r_1 = ici_kendalltau(t(matrix_1)) +r_1 = ici_kendalltau(matrix_1) r_1$cor #> s1 s2 #> s1 1 1 @@ -101,7 +101,7 @@ s4 = s2 s4[sample(100, 50)] = NA matrix_2 = cbind(s3, s4) -r_2 = ici_kendalltau(t(matrix_2)) +r_2 = ici_kendalltau(matrix_2) r_2$cor #> s3 s4 #> s3 1.0000000 0.9944616 @@ -136,10 +136,10 @@ microbenchmark( times = 5 ) #> Unit: microseconds -#> expr min lq mean median uq max -#> cor(x, y, method = "kendall") 13379.183 13401.532 13493.0316 13401.829 13595.070 13687.544 -#> ici_kt(x, y, "global") 271.586 279.007 369.9088 314.779 357.086 627.086 -#> ici_kt(x2, y2, "global") 15983.902 16484.666 16952.9788 16667.885 17124.932 18503.509 +#> expr min lq mean median uq max +#> cor(x, y, method = "kendall") 13568.01 13818.243 14017.885 14090.436 14174.800 14437.933 +#> ici_kt(x, y, "global") 261.84 272.733 304.271 287.571 335.669 363.542 +#> ici_kt(x2, y2, "global") 14631.30 15614.509 16419.494 16340.620 17646.365 17864.673 #> neval #> 5 #> 5 @@ -177,7 +177,7 @@ resources for any multiprocessing engine registered with {future}. ``` r library(furrr) future::plan(multicore, workers = 4) -r_3 = ici_kendalltau(t(matrix_2)) +r_3 = ici_kendalltau(matrix_2) ``` ## Many Many Comparisons @@ -193,7 +193,7 @@ are converted to a matrix form if requested (the default).s To keep the of `ici_kendalltau`. ``` r -r_4 = ici_kendalltau(t(matrix_2), return_matrix = FALSE) +r_4 = ici_kendalltau(matrix_2, return_matrix = FALSE) r_4 #> $cor #> s1 s2 core raw pvalue taumax cor @@ -202,7 +202,7 @@ r_4 #> 3 s4 s4 0 1.0000000 0 1.000000 1.0000000 #> #> $run_time -#> [1] 0.02034521 +#> [1] 0.0196352 ``` ## Code of Conduct diff --git a/docs/404.html b/docs/404.html index 397fa0b..5c1ff30 100644 --- a/docs/404.html +++ b/docs/404.html @@ -24,7 +24,7 @@ ICIKendallTau - 0.3.21 + 1.0.1