Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions R/colocboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#' @param effect_se Matrix of standard errors associated with the beta values
#' @param effect_n A scalar or a vector of sample sizes for estimating regression coefficients. Highly recommended!
#'
#' @param M The maximum number of gradient boosting rounds. If the number of outcomes are large, it will be automatically increased to a larger number.
#' @param M The maximum number of gradient boosting rounds for each outcome (default is 500).
#' @param stop_thresh The stop criterion for overall profile loglikelihood function.
#' @param tau The smooth parameter for proximity adaptive smoothing weights for the best update jk-star.
#' @param learning_rate_init The minimum learning rate for updating in each iteration.
Expand Down Expand Up @@ -140,7 +140,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
effect_se = NULL, # same as HyPrColoc, sebeta hat matrix with rowname of variable names
effect_n = NULL,
###### - Model Parameters
M = NULL, # maximum iteration time
M = 500, # maximum iteration for each outcome
stop_thresh = 1e-06, # stop criterion for profile_log and objective functions
tau = 0.01, # kernal_tau parameter for proximity smoothing weight
learning_rate_init = 0.01, # minimum step size for updating in each boosting round
Expand Down
6 changes: 3 additions & 3 deletions R/colocboost_inference.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ get_cormat <- function(X, intercepte = TRUE) {
return(cr)
}

#' @title Perform modularity-based hierarchical clustering
#' @title Perform modularity-based hierarchical clustering for a correlation matrix
#' @description
#' This function performs a modularity-based hierarchical clustering approach to identify clusters from a correlation matrix (LD matrix).
#' This function performs a modularity-based hierarchical clustering approach to identify clusters from a correlation matrix.
#'
#' @param cormat A correlation matrix.
#' @param min_cluster_corr The small correlation for the weights distributions across different iterations to be decided having only one cluster. Default is 0.8.
Expand Down Expand Up @@ -495,7 +495,7 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
if (delta <= 0) {
warning(paste(
"Warning message: potential sumstat & LD mismatch may happens for outcome", outcome_idx,
". Using logLR = CoS(profile) - max(profile). Please check our website."
". Using logLR = CoS(profile) - max(profile). Please check our website https://statfungen.github.io/colocboost/articles/."
))
}
cos_profile
Expand Down
9 changes: 8 additions & 1 deletion R/colocboost_init.R
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,11 @@ colocboost_init_para <- function(cb_data, cb_model, tau = 0.01,
}
# - initial coloc_thresh
coloc_thresh <- (1 - coloc_thresh) * max(sapply(1:length(cb_model), function(i) max(cb_model[[i]]$change_loglike)))
# - initial num_updates_outcome and coveraged outcome
num_updates_outcome <- lapply(1:L, function(i) 1)
coveraged_outcome <- lapply(1:L, function(i) TRUE)
names(num_updates_outcome) <- names(coveraged_outcome) <- names(cb_model)

cb_model_para <- list(
"L" = L,
"P" = P,
Expand All @@ -289,7 +294,9 @@ colocboost_init_para <- function(cb_data, cb_model, tau = 0.01,
"variables" = cb_data$variable.names,
"focal_outcome_idx" = focal_outcome_idx,
"coveraged" = TRUE,
"num_updates" = 1
"num_updates" = 1,
"coveraged_outcome" = coveraged_outcome,
"num_updates_outcome" = num_updates_outcome
)
class(cb_model_para) <- "colocboost"

Expand Down
7 changes: 6 additions & 1 deletion R/colocboost_output.R
Original file line number Diff line number Diff line change
Expand Up @@ -977,7 +977,9 @@ get_model_info <- function(cb_obj, outcome_names = NULL) {

profile_loglik <- cb_obj$cb_model_para$profile_loglike
n_updates <- cb_obj$cb_model_para$num_updates
n_updates_outcome <- cb_obj$cb_model_para$num_updates_outcome
model_coveraged <- cb_obj$cb_model_para$coveraged
model_coveraged_outcome <- cb_obj$cb_model_para$coveraged_outcome
jk_update <- cb_obj$cb_model_para$real_update_jk
if (!is.null(jk_update)){
rownames(jk_update) <- paste0("jk_star_", 1:nrow(jk_update))
Expand All @@ -987,14 +989,17 @@ get_model_info <- function(cb_obj, outcome_names = NULL) {
outcome_coupled_best_update_obj <- lapply(cb_obj$cb_model, function(cb) cb$obj_single)
outcome_profile_loglik <- lapply(cb_obj$cb_model, function(cb) cb$profile_loglike_each)
names(outcome_proximity_obj) <- names(outcome_coupled_best_update_obj) <-
names(outcome_profile_loglik) <- outcome_names
names(outcome_profile_loglik) <- names(n_updates_outcome) <-
names(model_coveraged_outcome) <- outcome_names
ll <- list(
"model_coveraged" = model_coveraged,
"n_updates" = n_updates,
"profile_loglik" = profile_loglik,
"outcome_profile_loglik" = outcome_profile_loglik,
"outcome_proximity_obj" = outcome_proximity_obj,
"outcome_coupled_best_update_obj" = outcome_coupled_best_update_obj,
"outcome_model_coveraged" = model_coveraged_outcome,
"outcome_n_updates" = n_updates_outcome,
"jk_star" = jk_update
)
return(ll)
Expand Down
3 changes: 2 additions & 1 deletion R/colocboost_update.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ boost_KL_delta <- function(z, ld_feature, adj_dep,
}


boost_check_stop <- function(cb_model, cb_model_para, pos_stop,
boost_check_stop <- function(cb_model, cb_model_para, pos_stop, stop_no_coverage,
multi_test_max = 1) {
# - check the iteration for the stop outcome (pos_stop has the same jk with original data)
iter_each <- sapply(pos_stop, function(i) {
Expand All @@ -189,6 +189,7 @@ boost_check_stop <- function(cb_model, cb_model_para, pos_stop,
# --- stop all pos_stop outcomes
cb_model_para$update_y[pos_stop] <- 0
cb_model_para$true_stop <- pos_stop
cb_model_para$no_coverage_stop <- which(stop_no_coverage==TRUE)
cb_model_para$need_more <- NULL
} else {
# keep boosting for outcome with <= 10 iterations
Expand Down
61 changes: 38 additions & 23 deletions R/colocboost_workhorse.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#'
#' @noRd
colocboost_workhorse <- function(cb_data,
M = NULL,
M = 500,
tau = 0.01,
prioritize_jkstar = TRUE,
learning_rate_init = 0.01,
Expand Down Expand Up @@ -70,11 +70,13 @@ colocboost_workhorse <- function(cb_data,
coloc_thresh = coloc_thresh
)

M_single_outcome <- 200
if (is.null(M)) {
M <- cb_model_para$L * M_single_outcome
if (M == 1){
cb_model_para$M <- 1
} else {
M_single_outcome <- M
M <- cb_model_para$L * M
cb_model_para$M <- M
}
cb_model_para$M <- M
# - if multi_test value > multi_test cutoff for some outcomes, we will not update them.
if (!is.null(cb_model_para$true_stop)) {
if (sum(cb_model_para$update_y == 1) == 0) {
Expand Down Expand Up @@ -133,20 +135,25 @@ colocboost_workhorse <- function(cb_data,
pos_rtr_stop <- which(sum_cor == 0)
if (length(pos_rtr_stop) != 0) {
cb_model_para$update_y[pos.update[pos_rtr_stop]] <- 0
if (!is.null(focal_outcome_idx)){
message_focal_text <- if (focal_outcome_idx %in% pos.update[pos_rtr_stop]) "including focal outcome" else NULL
} else {message_focal_text <- NULL}
message(paste(
"Gradient boosting for outcome", paste(pos.update[pos_rtr_stop], collapse = ", "),
"Gradient boosting for outcome", paste(pos.update[pos_rtr_stop], collapse = ", "), message_focal_text,
"stop since rtr < 0 or max(correlation) > 1 after", m, "iterations!",
"Results for this locus are not stable, please check if mismatch between sumstat and LD!",
"See details in tutorial website."
"See details in tutorial website https://statfungen.github.io/colocboost/articles/."
))
}

# - stop by others - need to check iterations
stop <- rep(NA, cb_model_para$L)
stop <- stop_no_coverage <- rep(NA, cb_model_para$L)
for (i in pos.update) {
if (i %in% pos.update[pos_rtr_stop]) {
stop[i] <- TRUE
stop[i] <- stop_no_coverage[i] <- TRUE
cb_model_para$coveraged_outcome[i] <- FALSE
} else {
stop_no_coverage[i] <- FALSE
M_i <- length(cb_model[[i]]$profile_loglike_each)
stop1 <- abs(cb_model[[i]]$profile_loglike_each[M_i] - cb_model[[i]]$profile_loglike_each[M_i - 1]) /
cb_model[[i]]$profile_loglike_each[M_i - 1] < cb_model[[i]]$stop_thresh
Expand All @@ -162,7 +169,15 @@ colocboost_workhorse <- function(cb_data,
stop3 <- (M_i > M_single_outcome)
# -- to ensure if some outcomes do not update previously
if (length(cb_model[[i]]$profile_loglike_each) >= 2) {
stop[i] <- (stop1 | stop2 | stop3) # (stop2 | stop3) # (stop1 | stop2 | stop3)
stop[i] <- (stop1 | stop2 | stop3)
if (stop3){
stop_no_coverage[i] <- TRUE
cb_model_para$coveraged_outcome[i] <- FALSE
warning(paste("ColocBoost gradient boosting for outcome", i, "did not coverage in",
M_single_outcome, "iterations! Please check consistency between summary statistics",
"and LD matrix. See details in tutorial website https://statfungen.github.io/colocboost/articles/."))

}
} else {
stop[i] <- FALSE
}
Expand All @@ -172,15 +187,15 @@ colocboost_workhorse <- function(cb_data,
if (all(length(stop) == 1 & stop)) {
cb_model_para$update_y <- 0
if (cb_model_para$L == 1) {
message(paste("Gradient boosting for outcome 1 converge after", m, "iterations!"))
if (!stop_no_coverage) message(paste("Gradient boosting for outcome 1 converged after", m, "iterations!"))
}
} else {
if (all(!stop[pos.update])) {
# -- all Y do not stop, need more iterations.
cb_model_para$update_y <- cb_model_para$update_y
} else {
pos_stop <- which(stop) # which outcome reach the stop criterion
ttmp <- boost_check_stop(cb_model, cb_model_para, pos_stop,
ttmp <- boost_check_stop(cb_model, cb_model_para, pos_stop, stop_no_coverage,
multi_test_max = multi_test_max
)
cb_model_para <- ttmp$cb_model_para
Expand All @@ -190,29 +205,26 @@ colocboost_workhorse <- function(cb_data,
####### ---------------------------------------------
# calculate objective function of Y for the last iteration.
cb_model <- boost_obj_last(cb_data, cb_model, cb_model_para)
tt_stop <- setdiff(cb_model_para$true_stop, cb_model_para$no_coverage_stop)
if (!is.null(focal_outcome_idx)) {
if (focal_outcome_idx %in% cb_model_para$true_stop) {
if (focal_outcome_idx %in% tt_stop) {
message(paste(
"Gradient boosting for focal outcome", focal_outcome_idx,
"converged after", m, "iterations!"
))
if (length(setdiff(cb_model_para$true_stop, focal_outcome_idx)) != 0) {
if (length(setdiff(tt_stop, focal_outcome_idx)) != 0) {
message(paste(
"Gradient boosting for outcome", paste(setdiff(cb_model_para$true_stop, focal_outcome_idx), collapse = ", "),
"Gradient boosting for outcome", paste(setdiff(tt_stop, focal_outcome_idx), collapse = ", "),
"converged after", m, "iterations!"
))
}
} else {
message(paste(
"Gradient boosting for outcome", paste(cb_model_para$true_stop, collapse = ", "),
"converged after", m, "iterations!"
))
if (length(tt_stop) != 0)
message(paste("Gradient boosting for outcome", paste(tt_stop, collapse = ", "), "converged after", m, "iterations!"))
}
} else {
message(paste(
"Gradient boosting for outcome", paste(cb_model_para$true_stop, collapse = ", "),
"converged after", m, "iterations!"
))
if (length(tt_stop) != 0)
message(paste("Gradient boosting for outcome", paste(tt_stop, collapse = ", "), "converged after", m, "iterations!"))
}
}
}
Expand Down Expand Up @@ -284,5 +296,8 @@ cb_model_para_update <- function(cb_model, cb_model_para) {
# - additional calculating profile_loglikelihood
tmp <- sum(sapply(1:length(cb_model), function(i) tail(cb_model[[i]]$profile_loglike_each, n = 1)))
cb_model_para$profile_loglike <- c(cb_model_para$profile_loglike, tmp)
num_updates_outcomes <- sapply(1:length(cb_model), function(i) length(cb_model[[i]]$profile_loglike_each))
names(num_updates_outcomes) <- names(cb_model)
cb_model_para$num_updates_outcome <- num_updates_outcomes
return(cb_model_para)
}
4 changes: 2 additions & 2 deletions man/colocboost.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/get_hierarchical_clusters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions tests/testthat/test_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ test_that("colocboost_plot handles filtering options", {
# Test with plot_focal_only
expect_error(suppressWarnings(colocboost_plot(cb_res, plot_focal_only = TRUE)), NA)

# Test with plot_focal_cos_outocme_only
expect_error(suppressWarnings(colocboost_plot(cb_res, plot_focal_cos_outocme_only = TRUE)), NA)
# Test with plot_focal_cos_outcome_only
expect_error(suppressWarnings(colocboost_plot(cb_res, plot_focal_cos_outcome_only = TRUE)), NA)
})

# Test colocboost_plot with visual customization options
Expand Down
8 changes: 4 additions & 4 deletions vignettes/ColocBoost_Wrapper_Pipeline.Rmd
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
---
title: "ColocBoost Wrapper Pipeline"
title: "Bioinformatics Pipeline for ColocBoost"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{ColocBoost Wrapper Pipeline}
%\VignetteIndexEntry{Bioinformatics Pipeline for ColocBoost}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
Expand All @@ -15,7 +15,7 @@ knitr::opts_chunk$set(
```


This vignette demonstrates how to use the ColocBoost wrapper pipeline to perform colocalization analysis with `colocboost`.
This vignette demonstrates how to use the bioinformatics pipeline for ColocBoost to perform colocalization analysis with `colocboost`.

- See more details about functions in the package `pecotmr` with [link](https://github.com/StatFunGen/pecotmr/tree/main) and
`colocboost_pipeline` with [link](https://github.com/StatFunGen/pecotmr/blob/main/R/colocboost_pipeline.R).
Expand Down Expand Up @@ -123,7 +123,7 @@ In this section, we perform the colocalization analysis using the `colocboost_an
- **`region_data`**: The output of the `load_multitask_regional_data` function.
- **`focal_trait`**: Name of the trait if performing disease-prioritized ColocBoost.
- **`event_filters`**: A list of patterns for filtering events based on context names.
Example: for sQTL, list(type_pattern = ".*clu_(\\d+_[+-?]).*", valid_pattern = "clu_(\\d+_[+-?]):PR:", exclude_pattern = "clu_(\\d+_[+-?]):IN:").
Example: for sQTL, `list(type_pattern = ".*clu_(\\d+_[+-?]).*", valid_pattern = "clu_(\\d+_[+-?]):PR:", exclude_pattern = "clu_(\\d+_[+-?]):IN:")`.
- **`maf_cutoff`**: A scalar to remove variants with maf < maf_cutoff, default is 0.005.
- **`pip_cutoff_to_skip_ind`**: A vector of cutoff values for skipping analysis based on PIP values for each context. Default is 0.
- **`pip_cutoff_to_skip_sumstat`**: A vector of cutoff values for skipping analysis based on PIP values for each sumstat. Default is 0.
Expand Down
6 changes: 3 additions & 3 deletions vignettes/Summary_Statistics_Colocalization.Rmd
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
---
title: "Summary Statistics Colocalization"
title: "Summary Statistics Data Colocalization"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Summary Statistics Colocalization}
%\VignetteIndexEntry{Summary Statistics Data Colocalization}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
```{r, include = FALSE}s
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
Expand Down