diff --git a/R/colocboost_output.R b/R/colocboost_output.R index 4267891..b7a0265 100644 --- a/R/colocboost_output.R +++ b/R/colocboost_output.R @@ -507,14 +507,19 @@ get_ucos_summary <- function(cb_output, outcome_names = NULL, region_name = NULL } #' Extract CoS at different coverages -#' -#' @description `get_cos` get the colocalization confidence sets (CoS) with different coverage. If Genotype...provided...check purity -#' +#' +#' @description `get_cos` extracts colocalization confidence sets (CoS) at different coverage levels +#' from ColocBoost results. When genotype data (X) or correlation matrix (Xcorr) is provided, it +#' can also calculate and filter CoS based on purity statistics, ensuring that variants within +#' each CoS are sufficiently correlated. +#' #' @param cb_output Output object from `colocboost` analysis #' @param coverage A number between 0 and 1 specifying the \dQuote{coverage} of the estimated colocalization confidence sets (CoS) (default is 0.95). #' @param X Genotype matrix of values of the p variables. Used to compute correlations if Xcorr is not provided. #' @param Xcorr Correlation matrix of correlations between variables. Alternative to X. #' @param n_purity The maximum number of CoS variables used in calculating the correlation (\dQuote{purity}) statistics. +#' @param min_abs_corr The minimum absolute correlation value of variants in a CoS to be considered pass (\dQuote{purity}) statistics. +#' @param median_abs_corr The median absolute correlation value of variants in a CoS to be considered pass (\dQuote{purity}) statistics. #' When the number of variables included in the CoS is greater than this number, the CoS variables are randomly subsampled. #' #' @return A list of indices of variables in each CoS. diff --git a/man/get_cos.Rd b/man/get_cos.Rd index 3b277b6..9dbcee0 100644 --- a/man/get_cos.Rd +++ b/man/get_cos.Rd @@ -23,14 +23,21 @@ get_cos( \item{Xcorr}{Correlation matrix of correlations between variables. Alternative to X.} -\item{n_purity}{The maximum number of CoS variables used in calculating the correlation (\dQuote{purity}) statistics. +\item{n_purity}{The maximum number of CoS variables used in calculating the correlation (\dQuote{purity}) statistics.} + +\item{min_abs_corr}{The minimum absolute correlation value of variants in a CoS to be considered pass (\dQuote{purity}) statistics.} + +\item{median_abs_corr}{The median absolute correlation value of variants in a CoS to be considered pass (\dQuote{purity}) statistics. When the number of variables included in the CoS is greater than this number, the CoS variables are randomly subsampled.} } \value{ A list of indices of variables in each CoS. } \description{ -\code{get_cos} get the colocalization confidence sets (CoS) with different coverage. If Genotype...provided...check purity +\code{get_cos} extracts colocalization confidence sets (CoS) at different coverage levels +from ColocBoost results. When genotype data (X) or correlation matrix (Xcorr) is provided, it +can also calculate and filter CoS based on purity statistics, ensuring that variants within +each CoS are sufficiently correlated. } \examples{ # colocboost example diff --git a/tests/testthat/test_colocboost.R b/tests/testthat/test_colocboost.R index e7e7c25..7e2afab 100644 --- a/tests/testthat/test_colocboost.R +++ b/tests/testthat/test_colocboost.R @@ -49,11 +49,19 @@ test_that("colocboost runs with individual data", { X_list <- list(test_data$X, test_data$X) # Run colocboost with minimal parameters - result <- colocboost( - X = X_list, - Y = Y_list, - M = 10, # Small number of iterations for testing - output_level = 2 # More detailed output for testing + warnings <- capture_warnings({ + result <- colocboost( + X = X_list, + Y = Y_list, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + }) + + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("smallest number of variables", warnings)) || + any(grepl("did not coverage", warnings)) ) # Test that we get a colocboost object @@ -101,11 +109,19 @@ test_that("colocboost runs with summary statistics", { } # Run colocboost with summary statistics - result <- colocboost( - sumstat = sumstat_list, - LD = list(LD), - M = 10, # Small number of iterations for testing - output_level = 2 # More detailed output for testing + warnings <- capture_warnings({ + result <- colocboost( + sumstat = sumstat_list, + LD = list(LD), + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + }) + + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("smallest number of variables", warnings)) || + any(grepl("did not coverage", warnings)) ) # Test that we get a colocboost object @@ -124,12 +140,20 @@ test_that("colocboost handles focal outcome correctly", { X_list <- list(test_data$X, test_data$X) # Run colocboost with focal_outcome_idx = 1 - result <- colocboost( - X = X_list, - Y = Y_list, - focal_outcome_idx = 1, - M = 10, # Small number of iterations for testing - output_level = 2 # More detailed output for testing + warnings <- capture_warnings({ + result <- colocboost( + X = X_list, + Y = Y_list, + focal_outcome_idx = 1, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + }) + + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("smallest number of variables", warnings)) || + any(grepl("did not coverage", warnings)) ) # Test that we get a colocboost object @@ -148,11 +172,19 @@ test_that("get_cos_summary returns expected structure", { X_list <- list(test_data$X, test_data$X) # Run colocboost with minimal parameters - result <- colocboost( - X = X_list, - Y = Y_list, - M = 10, # Small number of iterations for testing - output_level = 2 # More detailed output for testing + warnings <- capture_warnings({ + result <- colocboost( + X = X_list, + Y = Y_list, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + }) + + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("smallest number of variables", warnings)) || + any(grepl("did not coverage", warnings)) ) # Get summary @@ -182,11 +214,19 @@ test_that("colocboost_plot runs without error", { X_list <- list(test_data$X, test_data$X) # Run colocboost with minimal parameters - result <- colocboost( - X = X_list, - Y = Y_list, - M = 10, # Small number of iterations for testing - output_level = 2 # More detailed output for testing + warnings <- capture_warnings({ + result <- colocboost( + X = X_list, + Y = Y_list, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + }) + + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("smallest number of variables", warnings)) || + any(grepl("did not coverage", warnings)) ) # Basic plotting should not throw an error @@ -202,11 +242,19 @@ test_that("get_robust_colocalization maintains colocboost structure", { X_list <- list(test_data$X, test_data$X) # Run colocboost with minimal parameters - result <- colocboost( - X = X_list, - Y = Y_list, - M = 10, # Small number of iterations for testing - output_level = 2 # More detailed output for testing + warnings <- capture_warnings({ + result <- colocboost( + X = X_list, + Y = Y_list, + M = 10, + output_level = 2 + ) + }) + + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("smallest number of variables", warnings)) || + any(grepl("did not coverage", warnings)) ) # Run get_strong_colocalization @@ -234,5 +282,13 @@ test_that("colocboost handles missing/invalid inputs appropriately", { Y_list <- list(test_data$Y[,1], test_data$Y[,2]) X_list <- list(X_bad, X_bad) - expect_error(colocboost(X = X_list, Y = Y_list), "Please provide the sample index of X and Y, since they do not have the same samples!") + expect_error( + suppressWarnings( + colocboost( + X = X_list, + Y = Y_list + ) + ), + "Please provide the sample index of X and Y, since they do not have the same samples!" + ) }) \ No newline at end of file diff --git a/tests/testthat/test_corner_cases.R b/tests/testthat/test_corner_cases.R index dcd9d68..78756cf 100644 --- a/tests/testthat/test_corner_cases.R +++ b/tests/testthat/test_corner_cases.R @@ -94,10 +94,12 @@ test_that("colocboost handles missing values in Y", { # Run colocboost - should handle NAs automatically expect_error( - result <- colocboost( - X = X_list, - Y = Y_list, - M = 5 # Small number of iterations for testing + suppressWarnings( + result <- colocboost( + X = X_list, + Y = Y_list, + M = 5 # Small number of iterations for testing + ) ), NA ) @@ -114,9 +116,11 @@ test_that("colocboost handles different sample sizes", { # Run colocboost - should error without sample indices expect_error( - colocboost( - X = test_data$X, - Y = test_data$Y + suppressWarnings( + colocboost( + X = test_data$X, + Y = test_data$Y + ) ) ) @@ -144,13 +148,17 @@ test_that("colocboost handles different variant sets", { test_data <- generate_edge_case_data("different_variants") # Run colocboost - expect_warning( + warnings <- capture_warnings({ result <- colocboost( X = test_data$X, Y = test_data$Y, dict_YX = dict_YX, M = 5 # Small number of iterations for testing ) + }) + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("did not coverage", warnings)) ) # Test that we get a colocboost object diff --git a/tests/testthat/test_model.R b/tests/testthat/test_model.R index 23041ed..8a8848f 100644 --- a/tests/testthat/test_model.R +++ b/tests/testthat/test_model.R @@ -116,12 +116,18 @@ test_that("colocboost_workhorse performs boosting iterations", { # If the workhorse function is exported if (exists("colocboost_workhorse")) { - expect_error({ + warnings <- capture_warnings({ result <- colocboost_workhorse( cb_obj$cb_data, M = 5 # Small number for testing ) - }, NA) + }) + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("did not coverage", warnings)) + ) + # Then test the result + expect_s3_class(result, "colocboost") } else { skip("colocboost_workhorse not directly accessible") } diff --git a/tests/testthat/test_sumstats.R b/tests/testthat/test_sumstats.R index 30e13ad..deb3aaa 100644 --- a/tests/testthat/test_sumstats.R +++ b/tests/testthat/test_sumstats.R @@ -102,11 +102,18 @@ test_sumstat_data <- generate_sumstat_test_data() # Test 1: Basic summary statistics input test_that("colocboost runs with basic summary statistics format", { # Run colocboost with sumstat and single LD matrix - result <- colocboost( - sumstat = test_sumstat_data$sumstat, - LD = test_sumstat_data$LD, - M = 10, # Small number of iterations for testing - output_level = 2 # More detailed output for testing + warnings <- capture_warnings({ + result <- colocboost( + sumstat = test_sumstat_data$sumstat, + LD = test_sumstat_data$LD, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + }) + + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("did not coverage", warnings)) ) # Test that we get a colocboost object @@ -124,13 +131,20 @@ test_that("colocboost runs with basic summary statistics format", { # Test 2: HyPrColoc compatible format test_that("colocboost runs with HyPrColoc compatible format", { # Run colocboost with effect matrices - result <- colocboost( - effect_est = test_sumstat_data$effect_est, - effect_se = test_sumstat_data$effect_se, - effect_n = test_sumstat_data$effect_n, - LD = test_sumstat_data$LD, - M = 10, # Small number of iterations for testing - output_level = 2 # More detailed output for testing + warnings <- capture_warnings({ + result <- colocboost( + effect_est = test_sumstat_data$effect_est, + effect_se = test_sumstat_data$effect_se, + effect_n = test_sumstat_data$effect_n, + LD = test_sumstat_data$LD, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + }) + + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("did not coverage", warnings)) ) # Test that we get a colocboost object @@ -147,11 +161,18 @@ test_that("colocboost runs with matched LD matrices", { LD_list <- list(test_sumstat_data$LD, test_sumstat_data$LD) # Run colocboost with summary statistics and multiple LD matrices - result <- colocboost( - sumstat = test_sumstat_data$sumstat, - LD = LD_list, - M = 10, # Small number of iterations for testing - output_level = 2 # More detailed output for testing + warnings <- capture_warnings({ + result <- colocboost( + sumstat = test_sumstat_data$sumstat, + LD = LD_list, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + }) + + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("did not coverage", warnings)) ) # Test that we get a colocboost object @@ -172,12 +193,19 @@ test_that("colocboost runs with dictionary-mapped LD matrices", { dict_sumstatLD <- matrix(c(1:2, 1:2), ncol = 2) # Run colocboost with dictionary mapping - result <- colocboost( - sumstat = test_sumstat_data$sumstat, - LD = LD_list, - dict_sumstatLD = dict_sumstatLD, - M = 10, # Small number of iterations for testing - output_level = 2 # More detailed output for testing + warnings <- capture_warnings({ + result <- colocboost( + sumstat = test_sumstat_data$sumstat, + LD = LD_list, + dict_sumstatLD = dict_sumstatLD, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + }) + + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("did not coverage", warnings)) ) # Test that we get a colocboost object @@ -191,10 +219,17 @@ test_that("colocboost runs with dictionary-mapped LD matrices", { # Test 5: Missing LD matrix (LD-free approach) test_that("colocboost runs with missing LD matrix", { # Run colocboost without LD matrix (should default to diagonal) - result <- colocboost( - sumstat = test_sumstat_data$sumstat, - M = 1, # Only one iteration expected in LD-free case - output_level = 2 # More detailed output for testing + warnings <- capture_warnings({ + result <- colocboost( + sumstat = test_sumstat_data$sumstat, + M = 1, # Only one iteration expected in LD-free case + output_level = 2 # More detailed output for testing + ) + }) + + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("The smallest number of variables across outcomes is 20 < 100", warnings)) ) # Test that we get a colocboost object @@ -229,14 +264,19 @@ test_that("colocboost handles missing sample size correctly", { }) # Should run but give a warning about missing sample size - expect_warning( + warnings <- capture_warnings({ result <- colocboost( sumstat = sumstat_no_n, LD = test_sumstat_data$LD, M = 10, output_level = 2 - ), - "Providing the sample size" + ) + }) + + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("Providing the sample size", warnings)) || + any(grepl("did not coverage", warnings)) ) # Still should get a valid result diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index 2a75356..e3bd354 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -175,14 +175,6 @@ test_that("get_cormat calculates correlation matrix correctly", { expect_equal(result, expected, tolerance = 1e-6) }) -# Test for check_jk_jkeach function -test_that("check_jk_jkeach identifies equivalent variants based on LD", { - skip("Internal function being tested through main function") - - # This function is harder to test directly as it relies on internal colocboost objects - # We'll test it indirectly through colocboost main function -}) - # Test for get_between_purity function test_that("get_between_purity calculates correlation between sets", { # Create test data with known correlation structure @@ -213,12 +205,6 @@ test_that("get_between_purity calculates correlation between sets", { expect_lt(result2[1], 0.5) }) -# Test for get_max_profile function -test_that("get_max_profile updates check_null_max correctly", { - skip("Internal function being tested through main function") - - # This function modifies internal CB object - hard to test directly -}) # Test for get_merge_ordered_with_indices function test_that("get_merge_ordered_with_indices merges vectors", { diff --git a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd index 4fb26e3..fdfa020 100644 --- a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd +++ b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd @@ -129,7 +129,7 @@ Example: for sQTL, `list(type_pattern = ".*clu_(\\d+_[+-?]).*", valid_pattern = - **`pip_cutoff_to_skip_sumstat`**: A vector of cutoff values for skipping analysis based on PIP values for each sumstat. Default is 0. - **`qc_method`**: Quality control method to use. Options are "rss_qc", "dentist", or "slalom" (default: "rss_qc"). - **`impute`**: Logical; if TRUE, performs imputation for outliers identified in the analysis (default: TRUE). -- **`impute_opts`**: A list of imputation options including rcond, R2_threshold, and minimum_ld (default: list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5)). +- **`impute_opts`**: A list of imputation options including rcond, R2_threshold, and minimum_ld (default: `list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5)`). ```{r, colocboost-analysis} diff --git a/vignettes/Interpret_ColocBoost_Output.Rmd b/vignettes/Interpret_ColocBoost_Output.Rmd index 67aab7e..085cec7 100644 --- a/vignettes/Interpret_ColocBoost_Output.Rmd +++ b/vignettes/Interpret_ColocBoost_Output.Rmd @@ -145,6 +145,11 @@ res$data_info$outcome_info **`cos_details`** provides a detailed information for colocalization events identified by `colocboost`. This section will provide a detailed explanation of the components in `cos_details`. +```{r cos-details} +names(res$cos_details) +``` + + #### 3.3.1. Colocalized variants for each CoS (**`cos`**) - **`cos`**: A list with a detailed information of colocalized variants for each CoS. @@ -201,7 +206,9 @@ res$cos_details$cos_top_variables ### 3.4. Model information (**`model_info`**) - **`model_coveraged`**: if the model is converged. +- **`outcome_model_coveraged`**: if the trait-specific model is converged. - **`n_updates`**: number of boosting rounds +- **`outcome_n_updates`**: number of boosting rounds for each trait. - **`jk_update`**: indices of the variants being updated in the model at each boosting round. ```{r jk_update} @@ -254,6 +261,7 @@ Y <- Ind_5traits$Y[1:3] X1 <- Heterogeneous_Effect$X[,1:3000] Y1 <- Heterogeneous_Effect$Y[,1,drop=F] res <- colocboost(X = c(X, list(X1)), Y = c(Y, list(Y1)), output_level = 2) +names(res$ucos_details) ``` #### 3.5.1. Trait-specific (uncolocalized) confidence sets (**`ucos`**) diff --git a/vignettes/Summary_Statistics_Colocalization.Rmd b/vignettes/Summary_Statistics_Colocalization.Rmd index 7824dd2..304b6bf 100644 --- a/vignettes/Summary_Statistics_Colocalization.Rmd +++ b/vignettes/Summary_Statistics_Colocalization.Rmd @@ -7,7 +7,7 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r, include = FALSE}s +```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>"