From 6bf86840f2c582353a38be3175ff021e8e2aa0ae Mon Sep 17 00:00:00 2001
From: Gao Wang <wangow@gmail.com>
Date: Thu, 7 May 2026 06:28:33 -0400
Subject: [PATCH 1/4] Add sldsc wrapper

---
 DESCRIPTION              |   2 +-
 R/sldsc_wrapper.R        | 326 +++++++++++++++++++++++++++++++++++++++
 tests/testthat/test_LD.R |  58 +++++++
 3 files changed, 385 insertions(+), 1 deletion(-)
 create mode 100644 R/sldsc_wrapper.R

diff --git a/DESCRIPTION b/DESCRIPTION
index ca1a7691..035eb535 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -68,4 +68,4 @@ LinkingTo:
     RcppArmadillo
 NeedsCompilation: yes
 VignetteBuilder: knitr
-RoxygenNote: 7.3.3
+Config/roxygen2/version: 8.0.0
diff --git a/R/sldsc_wrapper.R b/R/sldsc_wrapper.R
new file mode 100644
index 00000000..d33ff165
--- /dev/null
+++ b/R/sldsc_wrapper.R
@@ -0,0 +1,326 @@
+# Stratified LD Score Regression (S-LDSC) post-processing wrappers around polyfun.
+#
+# This file provides the post-processing layer for the xqtl-protocol sLDSC pipeline:
+# read polyfun outputs per trait, compute Gazal-style standardized tau* and
+# differential per-SNP heritability (EnrichStat), and run DerSimonian-Laird
+# random-effects meta-analysis across traits.
+#
+# Reference panel convention: all LD-derived quantities (baseline LD scores,
+# target LD scores, regression weights, allele frequencies) must come from the
+# same reference panel. Do not mix files from different panels (e.g. 1000G vs ADSP).
+#
+# MAF convention: by default we restrict to MAF > 5% per the sLDSC recommendation.
+# Pass maf_cutoff = 0 to opt out (not recommended).
+#
+# Cross-type comparison: tau* (Gazal et al. 2017 standardization) is the
+# cross-type comparable statistic. Use tau* to rank or meta-analyze annotations
+# that mix binary and continuous types. E (proportion-based enrichment) is
+# scale-dependent for continuous annotations and is only comparable within type.
+
+#' @title Read S-LDSC outputs from polyfun for one trait/run
+#'
+#' @description Reads the regression outputs produced by `polyfun/ldsc.py` for a
+#'   single polyfun run (one trait, one annotation set) and returns them as a
+#'   tidy list ready for downstream standardization. Hides the underlying file
+#'   formats; downstream code consumes only modeling quantities.
+#'
+#' @param prefix Character. Path prefix to the polyfun outputs for one trait/run.
+#'   The function appends `.results`, `.log`, and `.part_delete` to this prefix.
+#'   Example: `"/path/to/cwd/CAD_META.filtered.sumstats.gz"`.
+#'
+#' @return A named list with components:
+#'   \describe{
+#'     \item{categories}{Character vector of annotation names, in regression order
+#'       (target annotations first, baseline last by convention).}
+#'     \item{tau}{Numeric named vector of regression coefficients \eqn{\tau_C}.}
+#'     \item{tau_se}{Numeric named vector of standard errors of \eqn{\tau_C}.}
+#'     \item{enrichment}{Numeric named vector of \eqn{E_C = \pi^{h^2}_C / \pi^M_C}.}
+#'     \item{enrichment_se}{Numeric named vector of standard errors of \eqn{E_C}.}
+#'     \item{enrichment_p}{Numeric named vector of p-values for the differential
+#'       per-SNP heritability test (the EnrichStat p-value reported by polyfun).}
+#'     \item{prop_h2}{Numeric named vector \eqn{\pi^{h^2}_C}.}
+#'     \item{prop_snps}{Numeric named vector \eqn{\pi^M_C}.}
+#'     \item{h2g}{Scalar total trait heritability \eqn{h^2_g}.}
+#'     \item{tau_blocks}{Numeric matrix of per-block jackknife \eqn{\tau} values,
+#'       dimensions (n_blocks x n_annotations), columns named by category.}
+#'     \item{n_blocks}{Integer number of jackknife blocks (typically 200).}
+#'   }
+#'
+#' @examples
+#' \dontrun{
+#' run <- read_sldsc_trait("/output/CAD_META.filtered.sumstats.gz")
+#' run$tau["my_target_annotation"]
+#' }
+#'
+#' @export
+read_sldsc_trait <- function(prefix) {
+  stop("Not yet implemented (skeleton).")
+}
+
+
+#' @title Compute per-annotation standard deviation, MAF-restricted
+#'
+#' @description Computes the standard deviation of each annotation column in the
+#'   target annotation files, restricted to SNPs above a MAF cutoff. The MAF
+#'   restriction is required for the standardization to be internally consistent
+#'   with polyfun's regression, which operates on MAF > cutoff SNPs by default.
+#'
+#' @param target_anno_dir Character. Directory containing target annotation files
+#'   (one per chromosome) in polyfun's `.annot.gz` format.
+#' @param frqfile_dir Character or NULL. Directory containing PLINK `.frq` files
+#'   for the reference panel. Required when `maf_cutoff > 0`; the function
+#'   errors if missing.
+#' @param plink_name Character. Filename prefix of the `.frq` files
+#'   (e.g. `"ADSP_chr"`). Files are expected at `{plink_name}{chr}.frq`.
+#' @param maf_cutoff Numeric, default `0.05`. Only SNPs with `MAF > maf_cutoff`
+#'   contribute to sd. Set to `0` to disable filtering (must match the
+#'   `--not-M-5-50` regression mode for internal consistency).
+#' @param annot_cols Character or integer vector, default NULL. Annotation columns
+#'   to compute sd for. If NULL, all annotation columns (i.e. all columns past
+#'   the standard fixed columns CHR/SNP/BP/CM/A1/A2/MAF) are used.
+#'
+#' @return Named numeric vector of \eqn{sd_C} values, one per annotation.
+#'
+#' @examples
+#' \dontrun{
+#' sd_annot <- compute_sldsc_annot_sd(
+#'   target_anno_dir = "/output/ldscores/AC_DeJager_eQTL",
+#'   frqfile_dir     = "/ref/ADSP/frq",
+#'   plink_name      = "ADSP_chr",
+#'   maf_cutoff      = 0.05
+#' )
+#' }
+#'
+#' @export
+compute_sldsc_annot_sd <- function(target_anno_dir, frqfile_dir = NULL,
+                                   plink_name = "ADSP_chr",
+                                   maf_cutoff = 0.05, annot_cols = NULL) {
+  stop("Not yet implemented (skeleton).")
+}
+
+
+#' @title Reference-panel SNP count at a given MAF cutoff
+#'
+#' @description Returns the number of SNPs in the reference panel above the MAF
+#'   cutoff, matching the regression's M_5_50 (when `maf_cutoff > 0`) or M
+#'   (when `maf_cutoff == 0`). Used as \eqn{M_{ref}} in the standardization.
+#'
+#' @param target_anno_dir Character. Directory containing per-chromosome
+#'   `.l2.M_5_50` (when MAF-filtered) or `.l2.M` (when not) files produced by
+#'   polyfun's `compute_ldscores.py` for the target annotation.
+#' @param maf_cutoff Numeric, default `0.05`.
+#'
+#' @return Scalar integer: total number of SNPs in the reference panel above the cutoff.
+#'
+#' @export
+compute_sldsc_M_ref <- function(target_anno_dir, maf_cutoff = 0.05) {
+  stop("Not yet implemented (skeleton).")
+}
+
+
+#' @title Detect whether each annotation is binary or continuous
+#'
+#' @description Inspects each annotation column in the target annotation files
+#'   and returns whether its values lie in \{0, 1\} (binary) or take other values
+#'   (continuous). Used to select the appropriate within-type headline statistic.
+#'   For cross-type comparison, always use \eqn{\tau^*_C} regardless of the flag.
+#'
+#' @param target_anno_dir Character. Directory containing the target `.annot.gz`
+#'   files (one per chromosome).
+#' @param annot_cols Character or integer vector, default NULL.
+#'
+#' @return Named logical vector: TRUE for binary, FALSE for continuous.
+#'
+#' @export
+is_binary_sldsc_annot <- function(target_anno_dir, annot_cols = NULL) {
+  stop("Not yet implemented (skeleton).")
+}
+
+
+#' @title Standardize tau and compute per-block tau* and EnrichStat for one polyfun run
+#'
+#' @description Given the polyfun read result plus the annotation sd's and
+#'   reference SNP count, computes the Gazal-style standardized tau (\eqn{\tau^*})
+#'   and the differential per-SNP heritability statistic (EnrichStat) for the
+#'   target annotations, including their per-block jackknife values for use in
+#'   cross-trait meta-analysis.
+#'
+#' @details The standardization is
+#'   \deqn{\tau^*_C = \tau_C \cdot sd_C \cdot M_{ref} / h^2_g}
+#'   applied both to the point estimate and to each of the n_blocks jackknife
+#'   blocks. The EnrichStat is
+#'   \deqn{\frac{h^2_g}{M_{ref}}\left[\frac{\pi^{h^2}_C}{\pi^M_C}
+#'         - \frac{1-\pi^{h^2}_C}{1-\pi^M_C}\right]}
+#'   computed from the per-block tau values, with jackknife SE from the per-block
+#'   variance \eqn{\sqrt{\frac{(B-1)^2}{B} Var_b}}.
+#'
+#'   Only target annotations are returned; baseline annotations are dropped after
+#'   serving their role of conditioning the regression.
+#'
+#' @param trait_data List. Output of \code{\link{read_sldsc_trait}} for one polyfun run.
+#' @param sd_annot Named numeric vector. Output of
+#'   \code{\link{compute_sldsc_annot_sd}}, restricted to target annotations.
+#' @param M_ref Scalar. Output of \code{\link{compute_sldsc_M_ref}}.
+#' @param target_categories Character vector or NULL. Annotation names to keep.
+#'   If NULL, all categories with names matching `names(sd_annot)` are kept.
+#'
+#' @return A list with components:
+#'   \describe{
+#'     \item{summary}{Data frame with one row per target annotation and columns:
+#'       `target`, `tau`, `tau_se`, `tau_star`, `tau_star_se`, `enrichment`,
+#'       `enrichment_se`, `enrichment_p`, `enrichstat`, `enrichstat_se`.}
+#'     \item{tau_star_blocks}{Matrix (n_blocks x n_target) of per-block \eqn{\tau^*_C}.}
+#'     \item{enrichstat_blocks}{Matrix (n_blocks x n_target) of per-block EnrichStat.}
+#'     \item{h2g}{Scalar trait heritability.}
+#'     \item{n_blocks}{Integer.}
+#'   }
+#'
+#' @export
+standardize_sldsc_trait <- function(trait_data, sd_annot, M_ref,
+                                    target_categories = NULL) {
+  stop("Not yet implemented (skeleton).")
+}
+
+
+#' @title Random-effects meta-analysis of S-LDSC quantities across traits
+#'
+#' @description DerSimonian-Laird random-effects meta-analysis of one S-LDSC
+#'   quantity across multiple traits, for one annotation. Used as the entry
+#'   point both for the default pipeline meta and for user-driven re-meta over
+#'   subsets of traits.
+#'
+#' @details Implements
+#'   \deqn{\hat\theta_{meta} = \sum_i w_i \hat\theta_i / \sum_i w_i,
+#'         \quad SE_{meta} = 1/\sqrt{\sum_i w_i},
+#'         \quad w_i = 1/(SE_i^2 + \hat\sigma^2)}
+#'   via `rmeta::meta.summaries(..., method = "random")`. The two-sided p-value
+#'   is computed from the meta z-score under the standard normal.
+#'
+#'   `quantity = "tau_star"` and `"enrichstat"` use the jackknife SE from
+#'   per-block delete values (computed inside
+#'   \code{\link{standardize_sldsc_trait}}). `quantity = "enrichment"` uses
+#'   the SE reported directly by the regression engine.
+#'
+#' @param per_trait_estimates Named list of standardized per-trait results from
+#'   \code{\link{standardize_sldsc_trait}}. Pass a subset to re-meta on a
+#'   user-chosen group of traits.
+#' @param category Character. Annotation name to meta-analyze.
+#' @param quantity Character, one of `"tau_star"`, `"enrichment"`, or
+#'   `"enrichstat"`.
+#'
+#' @return A list with components: `mean`, `se`, `p`, `n_traits`,
+#'   `traits_used`, and `tau2` (the DerSimonian-Laird between-trait variance).
+#'
+#' @examples
+#' \dontrun{
+#' meta_all <- meta_sldsc_random(per_trait_results, "my_anno", "tau_star")
+#'
+#' # subset to a custom trait group
+#' subset <- per_trait_results[c("CAD_META", "AD_GWAX", "PD_meta")]
+#' meta_neuro <- meta_sldsc_random(subset, "my_anno", "tau_star")
+#' }
+#'
+#' @export
+meta_sldsc_random <- function(per_trait_estimates, category,
+                              quantity = c("tau_star", "enrichment", "enrichstat")) {
+  stop("Not yet implemented (skeleton).")
+}
+
+
+#' @title End-to-end S-LDSC post-processing across traits, single + joint in one pass
+#'
+#' @description Top-level convenience wrapper. Reads polyfun outputs (single-tau
+#'   runs and the joint-tau run) for each trait, standardizes both modes, and
+#'   runs the default random-effects meta-analysis across all traits supplied.
+#'   Single and joint analyses are produced in one pass; no `joint_tau` flag.
+#'
+#' @details For a set of \eqn{N} target annotations and one trait, the caller is
+#'   expected to have already produced \eqn{N+1} polyfun runs: \eqn{N} single-tau
+#'   runs (each fitting one target + baseline) and 1 joint-tau run (fitting all
+#'   \eqn{N} targets + baseline together). The pipeline reads all of these, drops
+#'   baseline categories, standardizes both modes, and assembles consolidated
+#'   per-trait and meta data frames.
+#'
+#'   Reports baseline annotation count and names via `message()` once at the
+#'   start of the run for transparency.
+#'
+#'   Cross-type comparison: the `meta$tau_star` frame is the apple-to-apple table
+#'   for ranking annotations regardless of binary/continuous type. The `is_binary`
+#'   column lets callers filter further when within-type comparison is desired.
+#'
+#' @param trait_single_prefixes Named list. For each trait (name = trait id),
+#'   a character vector of length \eqn{N} giving the polyfun output prefixes for
+#'   the \eqn{N} single-tau runs (one per target annotation). Order must match
+#'   `target_categories`.
+#' @param trait_joint_prefix Named character. For each trait (name = trait id,
+#'   matching `trait_single_prefixes`), the polyfun output prefix for the joint-tau
+#'   run that fits all \eqn{N} targets together.
+#' @param target_anno_dir Character. Directory of the target annotation files
+#'   (used for `sd_C` and binary detection). When all \eqn{N} targets share
+#'   one directory (multi-column annot files), pass that. When each target has
+#'   its own directory, pass a named character vector (names = target categories).
+#' @param frqfile_dir Character or NULL. Directory of `.frq` files for the panel.
+#' @param plink_name Character. Filename prefix of `.frq` files (default `"ADSP_chr"`).
+#' @param maf_cutoff Numeric, default `0.05`.
+#' @param target_categories Character vector or NULL. Target annotation names to
+#'   report. If NULL, auto-detected from the joint-run results as all categories
+#'   not in baseline.
+#'
+#' @return A list with components:
+#'   \describe{
+#'     \item{per_trait}{Named list of per-trait results. For each trait, a list with:
+#'       `summary` (wide data frame of single + joint estimates side-by-side per
+#'       target annotation, with `is_binary` flag), `tau_star_blocks_single`,
+#'       `tau_star_blocks_joint`, `enrichstat_blocks_single`,
+#'       `enrichstat_blocks_joint` (all matrices), and `h2g`.}
+#'     \item{meta}{List of three data frames, each with one row per target annotation:
+#'       \itemize{
+#'         \item `tau_star`: columns `target`, `is_binary`, `single_mean`,
+#'           `single_se`, `single_p`, `joint_mean`, `joint_se`, `joint_p`, `n_traits`.
+#'         \item `enrichment`: same shape.
+#'         \item `enrichstat`: same shape.
+#'       }
+#'       The `tau_star` frame is the cross-type comparable headline.}
+#'     \item{params}{List echoing `maf_cutoff`, `M_ref`, `target_categories`,
+#'       `n_baseline`, `baseline_categories`, and `trait_names` for reproducibility.}
+#'   }
+#'
+#' @examples
+#' \dontrun{
+#' res <- sldsc_postprocessing_pipeline(
+#'   trait_single_prefixes = list(
+#'     CAD_META = c("/output/CAD_META.single_anno1", "/output/CAD_META.single_anno2"),
+#'     AD_GWAX  = c("/output/AD_GWAX.single_anno1",  "/output/AD_GWAX.single_anno2")
+#'   ),
+#'   trait_joint_prefix = c(
+#'     CAD_META = "/output/CAD_META.joint",
+#'     AD_GWAX  = "/output/AD_GWAX.joint"
+#'   ),
+#'   target_anno_dir = "/output/ldscores/my_targets",
+#'   frqfile_dir     = "/ref/ADSP/frq",
+#'   plink_name      = "ADSP_chr",
+#'   maf_cutoff      = 0.05,
+#'   target_categories = c("anno1", "anno2")
+#' )
+#' res$meta$tau_star      # cross-type comparable headline
+#' res$meta$enrichment    # within-binary headline
+#'
+#' # later, re-meta on a custom subset of traits
+#' meta_neuro <- meta_sldsc_random(
+#'   res$per_trait[c("AD_GWAX", "PD_meta")], "anno1", "tau_star"
+#' )
+#' }
+#'
+#' @seealso \code{\link{read_sldsc_trait}}, \code{\link{standardize_sldsc_trait}},
+#'   \code{\link{meta_sldsc_random}}
+#'
+#' @export
+sldsc_postprocessing_pipeline <- function(trait_single_prefixes,
+                                          trait_joint_prefix,
+                                          target_anno_dir,
+                                          frqfile_dir = NULL,
+                                          plink_name = "ADSP_chr",
+                                          maf_cutoff = 0.05,
+                                          target_categories = NULL) {
+  stop("Not yet implemented (skeleton).")
+}
diff --git a/tests/testthat/test_LD.R b/tests/testthat/test_LD.R
index ea6a8d7b..058c6460 100644
--- a/tests/testthat/test_LD.R
+++ b/tests/testthat/test_LD.R
@@ -823,3 +823,61 @@ test_that("can_merge checks chromosome and size", {
   b3 <- data.frame(chrom = "2", size = 100)
   expect_false(pecotmr:::can_merge(b1, b3, max_size = 500))
 })
+
+# ---- check_ld ----
+test_that("check_ld detects positive definite matrix", {
+  R <- diag(5)
+  result <- check_ld(R, method = "check")
+  expect_true(result$is_pd)
+  expect_true(result$is_psd)
+  expect_equal(result$n_negative, 0)
+  expect_equal(result$min_eigenvalue, 1)
+  expect_equal(result$method_applied, "none")
+})
+
+test_that("check_ld detects non-PSD matrix", {
+  R <- matrix(0.9, 3, 3)
+  diag(R) <- 1
+  R[1, 3] <- R[3, 1] <- -0.5
+  result <- check_ld(R, method = "check")
+  expect_false(result$is_psd)
+  expect_true(result$n_negative > 0)
+  expect_true(result$min_eigenvalue < 0)
+  expect_equal(result$method_applied, "none")
+  expect_equal(result$R, R)
+})
+
+test_that("check_ld eigenfix repairs non-PSD matrix", {
+  R <- matrix(0.9, 3, 3)
+  diag(R) <- 1
+  R[1, 3] <- R[3, 1] <- -0.5
+  result <- check_ld(R, method = "eigenfix")
+  expect_equal(result$method_applied, "eigenfix")
+  result2 <- check_ld(result$R, method = "check")
+  expect_true(result2$is_psd)
+  expect_equal(diag(result$R), rep(1, 3))
+  expect_true(isSymmetric(result$R))
+})
+
+test_that("check_ld shrink repairs non-PD matrix", {
+  R <- matrix(0.9, 3, 3)
+  diag(R) <- 1
+  R[1, 3] <- R[3, 1] <- -0.5
+  result <- check_ld(R, method = "shrink", shrinkage = 0.1)
+  expect_equal(result$method_applied, "shrink")
+  result2 <- check_ld(result$R, method = "check")
+  expect_true(result2$is_pd)
+})
+
+test_that("check_ld eigenfix is no-op on PSD matrix", {
+  R <- diag(5)
+  result <- check_ld(R, method = "eigenfix")
+  expect_equal(result$method_applied, "none")
+  expect_equal(result$R, R)
+})
+
+test_that("check_ld condition_number is correct", {
+  R <- diag(c(1, 0.5, 0.1))
+  result <- check_ld(R, method = "check")
+  expect_equal(result$condition_number, 10)
+})

From 4d0b7afbb59a2e5162a566b1462ee9409f401429 Mon Sep 17 00:00:00 2001
From: Gao Wang <wangow@gmail.com>
Date: Thu, 7 May 2026 07:25:03 -0400
Subject: [PATCH 2/4] documentation update

---
 R/sldsc_wrapper.R | 175 +++++++++++++++++++++++++---------------------
 1 file changed, 96 insertions(+), 79 deletions(-)

diff --git a/R/sldsc_wrapper.R b/R/sldsc_wrapper.R
index d33ff165..62d96943 100644
--- a/R/sldsc_wrapper.R
+++ b/R/sldsc_wrapper.R
@@ -1,8 +1,8 @@
 # Stratified LD Score Regression (S-LDSC) post-processing wrappers around polyfun.
 #
 # This file provides the post-processing layer for the xqtl-protocol sLDSC pipeline:
-# read polyfun outputs per trait, compute Gazal-style standardized tau* and
-# differential per-SNP heritability (EnrichStat), and run DerSimonian-Laird
+# read polyfun outputs per trait, compute Gazal-style standardized tau* and the
+# differential per-SNP heritability statistic (EnrichStat), and run DerSimonian-Laird
 # random-effects meta-analysis across traits.
 #
 # Reference panel convention: all LD-derived quantities (baseline LD scores,
@@ -30,8 +30,8 @@
 #'
 #' @return A named list with components:
 #'   \describe{
-#'     \item{categories}{Character vector of annotation names, in regression order
-#'       (target annotations first, baseline last by convention).}
+#'     \item{categories}{Character vector of annotation names in regression order
+#'       (target annotations first, baseline last by polyfun convention).}
 #'     \item{tau}{Numeric named vector of regression coefficients \eqn{\tau_C}.}
 #'     \item{tau_se}{Numeric named vector of standard errors of \eqn{\tau_C}.}
 #'     \item{enrichment}{Numeric named vector of \eqn{E_C = \pi^{h^2}_C / \pi^M_C}.}
@@ -40,7 +40,7 @@
 #'       per-SNP heritability test (the EnrichStat p-value reported by polyfun).}
 #'     \item{prop_h2}{Numeric named vector \eqn{\pi^{h^2}_C}.}
 #'     \item{prop_snps}{Numeric named vector \eqn{\pi^M_C}.}
-#'     \item{h2g}{Scalar total trait heritability \eqn{h^2_g}.}
+#'     \item{h2g}{Scalar total trait heritability \eqn{h^2_g}, parsed from the log.}
 #'     \item{tau_blocks}{Numeric matrix of per-block jackknife \eqn{\tau} values,
 #'       dimensions (n_blocks x n_annotations), columns named by category.}
 #'     \item{n_blocks}{Integer number of jackknife blocks (typically 200).}
@@ -65,6 +65,12 @@ read_sldsc_trait <- function(prefix) {
 #'   restriction is required for the standardization to be internally consistent
 #'   with polyfun's regression, which operates on MAF > cutoff SNPs by default.
 #'
+#' @details Implementation walks per-chromosome `.annot.gz` files, joins them
+#'   with PLINK `.frq` files when `maf_cutoff > 0`, and pools across chromosomes
+#'   with the standard pooled-variance formula
+#'   \deqn{sd_C = \sqrt{\sum_{\text{chr}} (n_{\text{chr}} - 1)\,\mathrm{Var}(C_{\text{chr}}) \,/\, \sum_{\text{chr}} (n_{\text{chr}} - 1)}}
+#'   over the MAF-restricted SNPs only.
+#'
 #' @param target_anno_dir Character. Directory containing target annotation files
 #'   (one per chromosome) in polyfun's `.annot.gz` format.
 #' @param frqfile_dir Character or NULL. Directory containing PLINK `.frq` files
@@ -73,24 +79,14 @@ read_sldsc_trait <- function(prefix) {
 #' @param plink_name Character. Filename prefix of the `.frq` files
 #'   (e.g. `"ADSP_chr"`). Files are expected at `{plink_name}{chr}.frq`.
 #' @param maf_cutoff Numeric, default `0.05`. Only SNPs with `MAF > maf_cutoff`
-#'   contribute to sd. Set to `0` to disable filtering (must match the
-#'   `--not-M-5-50` regression mode for internal consistency).
+#'   contribute to sd. Set to `0` to disable (must match the regression's
+#'   `--not-M-5-50` mode).
 #' @param annot_cols Character or integer vector, default NULL. Annotation columns
 #'   to compute sd for. If NULL, all annotation columns (i.e. all columns past
 #'   the standard fixed columns CHR/SNP/BP/CM/A1/A2/MAF) are used.
 #'
 #' @return Named numeric vector of \eqn{sd_C} values, one per annotation.
 #'
-#' @examples
-#' \dontrun{
-#' sd_annot <- compute_sldsc_annot_sd(
-#'   target_anno_dir = "/output/ldscores/AC_DeJager_eQTL",
-#'   frqfile_dir     = "/ref/ADSP/frq",
-#'   plink_name      = "ADSP_chr",
-#'   maf_cutoff      = 0.05
-#' )
-#' }
-#'
 #' @export
 compute_sldsc_annot_sd <- function(target_anno_dir, frqfile_dir = NULL,
                                    plink_name = "ADSP_chr",
@@ -102,12 +98,14 @@ compute_sldsc_annot_sd <- function(target_anno_dir, frqfile_dir = NULL,
 #' @title Reference-panel SNP count at a given MAF cutoff
 #'
 #' @description Returns the number of SNPs in the reference panel above the MAF
-#'   cutoff, matching the regression's M_5_50 (when `maf_cutoff > 0`) or M
-#'   (when `maf_cutoff == 0`). Used as \eqn{M_{ref}} in the standardization.
+#'   cutoff, used as \eqn{M_{ref}} in the standardization. When
+#'   `maf_cutoff > 0` the function sums `.l2.M_5_50` files (matching polyfun's
+#'   regression which uses M_5_50 with `--frqfile-chr`); when `maf_cutoff == 0`
+#'   it sums `.l2.M` files (matching the `--not-M-5-50` regression mode).
 #'
 #' @param target_anno_dir Character. Directory containing per-chromosome
 #'   `.l2.M_5_50` (when MAF-filtered) or `.l2.M` (when not) files produced by
-#'   polyfun's `compute_ldscores.py` for the target annotation.
+#'   polyfun's `compute_ldscores.py`.
 #' @param maf_cutoff Numeric, default `0.05`.
 #'
 #' @return Scalar integer: total number of SNPs in the reference panel above the cutoff.
@@ -137,47 +135,57 @@ is_binary_sldsc_annot <- function(target_anno_dir, annot_cols = NULL) {
 }
 
 
-#' @title Standardize tau and compute per-block tau* and EnrichStat for one polyfun run
+#' @title Standardize tau and compute EnrichStat for one polyfun run
 #'
 #' @description Given the polyfun read result plus the annotation sd's and
 #'   reference SNP count, computes the Gazal-style standardized tau (\eqn{\tau^*})
 #'   and the differential per-SNP heritability statistic (EnrichStat) for the
-#'   target annotations, including their per-block jackknife values for use in
-#'   cross-trait meta-analysis.
+#'   target annotations.
 #'
-#' @details The standardization is
+#' @details Standardization:
 #'   \deqn{\tau^*_C = \tau_C \cdot sd_C \cdot M_{ref} / h^2_g}
 #'   applied both to the point estimate and to each of the n_blocks jackknife
-#'   blocks. The EnrichStat is
-#'   \deqn{\frac{h^2_g}{M_{ref}}\left[\frac{\pi^{h^2}_C}{\pi^M_C}
-#'         - \frac{1-\pi^{h^2}_C}{1-\pi^M_C}\right]}
-#'   computed from the per-block tau values, with jackknife SE from the per-block
-#'   variance \eqn{\sqrt{\frac{(B-1)^2}{B} Var_b}}.
+#'   blocks (the per-block \eqn{\tau^*_C} values are returned for use as inputs
+#'   to cross-trait random-effects meta).
+#'
+#'   EnrichStat point estimate:
+#'   \deqn{\text{EnrichStat}_C = \frac{h^2_g}{M_{ref}}\!\left[\frac{\pi^{h^2}_C}{\pi^M_C} - \frac{1-\pi^{h^2}_C}{1-\pi^M_C}\right]}
+#'   computed once from polyfun's reported \eqn{\pi^{h^2}_C} and \eqn{\pi^M_C}.
+#'   Its standard error is recovered from polyfun's `Enrichment_p` via
+#'   \deqn{|Z_C| = \Phi^{-1}(1 - p_C/2), \qquad SE_{\text{EnrichStat}_C} = |\text{EnrichStat}_C| / |Z_C|.}
 #'
 #'   Only target annotations are returned; baseline annotations are dropped after
-#'   serving their role of conditioning the regression.
+#'   serving their role of conditioning the regression. Following the original
+#'   pipeline scope, EnrichStat / E are computed only when the call corresponds
+#'   to a single-target run; in joint-mode runs only \eqn{\tau^*_C} is produced
+#'   per target.
 #'
 #' @param trait_data List. Output of \code{\link{read_sldsc_trait}} for one polyfun run.
 #' @param sd_annot Named numeric vector. Output of
 #'   \code{\link{compute_sldsc_annot_sd}}, restricted to target annotations.
 #' @param M_ref Scalar. Output of \code{\link{compute_sldsc_M_ref}}.
 #' @param target_categories Character vector or NULL. Annotation names to keep.
-#'   If NULL, all categories with names matching `names(sd_annot)` are kept.
+#'   If NULL, all categories matching `names(sd_annot)` are kept.
+#' @param mode Character, one of `"single"` or `"joint"`. Controls which
+#'   quantities are produced (E and EnrichStat are produced only for `"single"`;
+#'   \eqn{\tau^*_C} is produced for both).
 #'
 #' @return A list with components:
 #'   \describe{
-#'     \item{summary}{Data frame with one row per target annotation and columns:
-#'       `target`, `tau`, `tau_se`, `tau_star`, `tau_star_se`, `enrichment`,
-#'       `enrichment_se`, `enrichment_p`, `enrichstat`, `enrichstat_se`.}
+#'     \item{summary}{Data frame with one row per target annotation. Columns:
+#'       `target`, `tau`, `tau_se`, `tau_star`, `tau_star_se`. For `mode="single"`
+#'       additionally `enrichment`, `enrichment_se`, `enrichment_p`,
+#'       `enrichstat`, `enrichstat_se` (the back-solved SE).}
 #'     \item{tau_star_blocks}{Matrix (n_blocks x n_target) of per-block \eqn{\tau^*_C}.}
-#'     \item{enrichstat_blocks}{Matrix (n_blocks x n_target) of per-block EnrichStat.}
 #'     \item{h2g}{Scalar trait heritability.}
 #'     \item{n_blocks}{Integer.}
+#'     \item{mode}{Echo of the input mode.}
 #'   }
 #'
 #' @export
 standardize_sldsc_trait <- function(trait_data, sd_annot, M_ref,
-                                    target_categories = NULL) {
+                                    target_categories = NULL,
+                                    mode = c("single", "joint")) {
   stop("Not yet implemented (skeleton).")
 }
 
@@ -194,22 +202,30 @@ standardize_sldsc_trait <- function(trait_data, sd_annot, M_ref,
 #'         \quad SE_{meta} = 1/\sqrt{\sum_i w_i},
 #'         \quad w_i = 1/(SE_i^2 + \hat\sigma^2)}
 #'   via `rmeta::meta.summaries(..., method = "random")`. The two-sided p-value
-#'   is computed from the meta z-score under the standard normal.
-#'
-#'   `quantity = "tau_star"` and `"enrichstat"` use the jackknife SE from
-#'   per-block delete values (computed inside
-#'   \code{\link{standardize_sldsc_trait}}). `quantity = "enrichment"` uses
-#'   the SE reported directly by the regression engine.
+#'   is computed from the meta z-score under the standard normal:
+#'   \eqn{p = 2\Phi(-|Z_{meta}|)}.
+#'
+#'   Per-trait \eqn{SE_i} sources:
+#'   - `quantity = "tau_star"`: jackknife SE from the per-block \eqn{\tau^*}
+#'     values (computed in \code{\link{standardize_sldsc_trait}}).
+#'   - `quantity = "enrichment"`: polyfun-reported `Enrichment_std_error`.
+#'   - `quantity = "enrichstat"`: back-solved SE from polyfun's `Enrichment_p`,
+#'     \eqn{SE = |\text{EnrichStat}|/|\Phi^{-1}(1-p/2)|}.
+#'
+#'   For binary-annotation enrichment reporting, callers typically combine two
+#'   meta runs: effect size and SE from `quantity="enrichment"` (interpretable
+#'   on the original enrichment-fold scale), p-value from `quantity="enrichstat"`
+#'   (the appropriate hypothesis test). The top-level
+#'   \code{\link{sldsc_postprocessing_pipeline}} performs this combination.
 #'
 #' @param per_trait_estimates Named list of standardized per-trait results from
 #'   \code{\link{standardize_sldsc_trait}}. Pass a subset to re-meta on a
 #'   user-chosen group of traits.
 #' @param category Character. Annotation name to meta-analyze.
-#' @param quantity Character, one of `"tau_star"`, `"enrichment"`, or
-#'   `"enrichstat"`.
+#' @param quantity Character, one of `"tau_star"`, `"enrichment"`, or `"enrichstat"`.
 #'
-#' @return A list with components: `mean`, `se`, `p`, `n_traits`,
-#'   `traits_used`, and `tau2` (the DerSimonian-Laird between-trait variance).
+#' @return A list with components: `mean`, `se`, `p`, `n_traits`, `traits_used`,
+#'   and `tau2` (the DerSimonian-Laird between-trait variance).
 #'
 #' @examples
 #' \dontrun{
@@ -229,56 +245,57 @@ meta_sldsc_random <- function(per_trait_estimates, category,
 
 #' @title End-to-end S-LDSC post-processing across traits, single + joint in one pass
 #'
-#' @description Top-level convenience wrapper. Reads polyfun outputs (single-tau
-#'   runs and the joint-tau run) for each trait, standardizes both modes, and
-#'   runs the default random-effects meta-analysis across all traits supplied.
-#'   Single and joint analyses are produced in one pass; no `joint_tau` flag.
+#' @description Top-level convenience wrapper. Reads polyfun outputs (one
+#'   single-target run per target plus one joint run per trait), standardizes
+#'   both modes, and runs the default random-effects meta-analysis across all
+#'   traits supplied. Single-target and joint-target analyses are produced in
+#'   one pass.
 #'
-#' @details For a set of \eqn{N} target annotations and one trait, the caller is
-#'   expected to have already produced \eqn{N+1} polyfun runs: \eqn{N} single-tau
-#'   runs (each fitting one target + baseline) and 1 joint-tau run (fitting all
-#'   \eqn{N} targets + baseline together). The pipeline reads all of these, drops
+#' @details For \eqn{N} target annotations and one trait, the caller is expected
+#'   to have already produced \eqn{N+1} polyfun runs: \eqn{N} single-target runs
+#'   (each fitting one target + baseline) and 1 joint run (fitting all \eqn{N}
+#'   targets + baseline together). The pipeline reads all of these, drops
 #'   baseline categories, standardizes both modes, and assembles consolidated
 #'   per-trait and meta data frames.
 #'
 #'   Reports baseline annotation count and names via `message()` once at the
-#'   start of the run for transparency.
+#'   start of the run.
+#'
+#'   Cross-type comparison: the `meta$tau_star` frame is the apple-to-apple
+#'   table for ranking annotations regardless of binary/continuous type. The
+#'   `is_binary` column lets callers filter when within-type comparison is desired.
 #'
-#'   Cross-type comparison: the `meta$tau_star` frame is the apple-to-apple table
-#'   for ranking annotations regardless of binary/continuous type. The `is_binary`
-#'   column lets callers filter further when within-type comparison is desired.
+#'   Two-channel enrichment meta (binary annotations only): the `meta$enrichment`
+#'   frame's `single_mean` and `single_se` come from the meta on \eqn{E_C};
+#'   the `single_p` comes from the meta on EnrichStat (the appropriate
+#'   hypothesis test).
 #'
 #' @param trait_single_prefixes Named list. For each trait (name = trait id),
-#'   a character vector of length \eqn{N} giving the polyfun output prefixes for
-#'   the \eqn{N} single-tau runs (one per target annotation). Order must match
-#'   `target_categories`.
-#' @param trait_joint_prefix Named character. For each trait (name = trait id,
-#'   matching `trait_single_prefixes`), the polyfun output prefix for the joint-tau
-#'   run that fits all \eqn{N} targets together.
-#' @param target_anno_dir Character. Directory of the target annotation files
-#'   (used for `sd_C` and binary detection). When all \eqn{N} targets share
-#'   one directory (multi-column annot files), pass that. When each target has
-#'   its own directory, pass a named character vector (names = target categories).
-#' @param frqfile_dir Character or NULL. Directory of `.frq` files for the panel.
-#' @param plink_name Character. Filename prefix of `.frq` files (default `"ADSP_chr"`).
+#'   a character vector of length \eqn{N} giving the polyfun output prefixes
+#'   for the \eqn{N} single-target runs. Order must match `target_categories`.
+#' @param trait_joint_prefix Named character. For each trait (name matching
+#'   `trait_single_prefixes`), the polyfun output prefix for the joint run.
+#' @param target_anno_dir Character. Directory of target annotation files used
+#'   for `sd_C` and binary detection. Typically the joint-mode directory, since
+#'   it carries all target annotation columns.
+#' @param frqfile_dir Character or NULL.
+#' @param plink_name Character. Default `"ADSP_chr"`.
 #' @param maf_cutoff Numeric, default `0.05`.
-#' @param target_categories Character vector or NULL. Target annotation names to
-#'   report. If NULL, auto-detected from the joint-run results as all categories
-#'   not in baseline.
+#' @param target_categories Character vector or NULL. Auto-detected from the
+#'   joint-run results as all categories not in baseline if NULL.
 #'
 #' @return A list with components:
 #'   \describe{
 #'     \item{per_trait}{Named list of per-trait results. For each trait, a list with:
 #'       `summary` (wide data frame of single + joint estimates side-by-side per
-#'       target annotation, with `is_binary` flag), `tau_star_blocks_single`,
-#'       `tau_star_blocks_joint`, `enrichstat_blocks_single`,
-#'       `enrichstat_blocks_joint` (all matrices), and `h2g`.}
+#'       target annotation, with `is_binary` flag), `tau_star_blocks_single`
+#'       (matrix), `tau_star_blocks_joint` (matrix), and `h2g`.}
 #'     \item{meta}{List of three data frames, each with one row per target annotation:
 #'       \itemize{
 #'         \item `tau_star`: columns `target`, `is_binary`, `single_mean`,
 #'           `single_se`, `single_p`, `joint_mean`, `joint_se`, `joint_p`, `n_traits`.
-#'         \item `enrichment`: same shape.
-#'         \item `enrichstat`: same shape.
+#'         \item `enrichment`: same shape; effect/SE from E meta, p from EnrichStat meta.
+#'         \item `enrichstat`: same shape; pure EnrichStat meta.
 #'       }
 #'       The `tau_star` frame is the cross-type comparable headline.}
 #'     \item{params}{List echoing `maf_cutoff`, `M_ref`, `target_categories`,
@@ -303,7 +320,7 @@ meta_sldsc_random <- function(per_trait_estimates, category,
 #'   target_categories = c("anno1", "anno2")
 #' )
 #' res$meta$tau_star      # cross-type comparable headline
-#' res$meta$enrichment    # within-binary headline
+#' res$meta$enrichment    # within-binary headline (effect/SE from E, p from EnrichStat)
 #'
 #' # later, re-meta on a custom subset of traits
 #' meta_neuro <- meta_sldsc_random(

From eaff5433d1e6bbc2c68f415024b7372772ef2933 Mon Sep 17 00:00:00 2001
From: Gao Wang <wangow@gmail.com>
Date: Thu, 7 May 2026 08:41:05 -0400
Subject: [PATCH 3/4] Implement core functions

---
 R/sldsc_wrapper.R | 812 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 601 insertions(+), 211 deletions(-)

diff --git a/R/sldsc_wrapper.R b/R/sldsc_wrapper.R
index 62d96943..b8e17c80 100644
--- a/R/sldsc_wrapper.R
+++ b/R/sldsc_wrapper.R
@@ -17,6 +17,22 @@
 # that mix binary and continuous types. E (proportion-based enrichment) is
 # scale-dependent for continuous annotations and is only comparable within type.
 
+# ---- internal helpers ----
+
+.sldsc_std_cols <- c("CHR", "SNP", "BP", "CM", "A1", "A2", "MAF")
+
+.sldsc_chrom_from_filename <- function(f) {
+  bn <- basename(f)
+  m  <- regmatches(bn, regexec("\\.([0-9]+)\\.annot\\.gz$", bn))[[1]]
+  if (length(m) >= 2) as.integer(m[2]) else NA_integer_
+}
+
+.sldsc_detect_annot_cols <- function(file_path) {
+  sample <- data.table::fread(file_path, nrows = 5L)
+  setdiff(names(sample), .sldsc_std_cols)
+}
+
+
 #' @title Read S-LDSC outputs from polyfun for one trait/run
 #'
 #' @description Reads the regression outputs produced by `polyfun/ldsc.py` for a
@@ -28,23 +44,7 @@
 #'   The function appends `.results`, `.log`, and `.part_delete` to this prefix.
 #'   Example: `"/path/to/cwd/CAD_META.filtered.sumstats.gz"`.
 #'
-#' @return A named list with components:
-#'   \describe{
-#'     \item{categories}{Character vector of annotation names in regression order
-#'       (target annotations first, baseline last by polyfun convention).}
-#'     \item{tau}{Numeric named vector of regression coefficients \eqn{\tau_C}.}
-#'     \item{tau_se}{Numeric named vector of standard errors of \eqn{\tau_C}.}
-#'     \item{enrichment}{Numeric named vector of \eqn{E_C = \pi^{h^2}_C / \pi^M_C}.}
-#'     \item{enrichment_se}{Numeric named vector of standard errors of \eqn{E_C}.}
-#'     \item{enrichment_p}{Numeric named vector of p-values for the differential
-#'       per-SNP heritability test (the EnrichStat p-value reported by polyfun).}
-#'     \item{prop_h2}{Numeric named vector \eqn{\pi^{h^2}_C}.}
-#'     \item{prop_snps}{Numeric named vector \eqn{\pi^M_C}.}
-#'     \item{h2g}{Scalar total trait heritability \eqn{h^2_g}, parsed from the log.}
-#'     \item{tau_blocks}{Numeric matrix of per-block jackknife \eqn{\tau} values,
-#'       dimensions (n_blocks x n_annotations), columns named by category.}
-#'     \item{n_blocks}{Integer number of jackknife blocks (typically 200).}
-#'   }
+#' @return A named list. See `sldsc_postprocessing_pipeline` for components.
 #'
 #' @examples
 #' \dontrun{
@@ -52,24 +52,58 @@
 #' run$tau["my_target_annotation"]
 #' }
 #'
+#' @importFrom data.table fread
+#' @importFrom stats setNames var
 #' @export
 read_sldsc_trait <- function(prefix) {
-  stop("Not yet implemented (skeleton).")
+  results_file <- paste0(prefix, ".results")
+  log_file     <- paste0(prefix, ".log")
+  delete_file  <- paste0(prefix, ".part_delete")
+
+  for (f in c(results_file, log_file, delete_file)) {
+    if (!file.exists(f)) stop("read_sldsc_trait: missing file: ", f)
+  }
+
+  results <- data.table::fread(results_file)
+  cats <- as.character(results$Category)
+
+  log_lines <- readLines(log_file, warn = FALSE)
+  h2_line <- grep("Total Observed scale h2:", log_lines, value = TRUE)
+  if (length(h2_line) == 0L)
+    stop("read_sldsc_trait: could not find 'Total Observed scale h2:' in ", log_file)
+  h2g <- suppressWarnings(as.numeric(gsub(".*h2: (-?[0-9.eE+-]+).*", "\\1", h2_line[1])))
+  if (is.na(h2g))
+    stop("read_sldsc_trait: failed to parse h2g numeric from log line: ", h2_line[1])
+
+  delete_values <- as.matrix(data.table::fread(delete_file))
+  if (ncol(delete_values) != length(cats)) {
+    stop("read_sldsc_trait: .part_delete has ", ncol(delete_values),
+         " columns but .results has ", length(cats), " categories.")
+  }
+  colnames(delete_values) <- cats
+
+  list(
+    categories     = cats,
+    tau            = stats::setNames(as.numeric(results$Coefficient),                 cats),
+    tau_se         = stats::setNames(as.numeric(results[["Coefficient_std_error"]]),  cats),
+    enrichment     = stats::setNames(as.numeric(results$Enrichment),                  cats),
+    enrichment_se  = stats::setNames(as.numeric(results[["Enrichment_std_error"]]),   cats),
+    enrichment_p   = stats::setNames(as.numeric(results[["Enrichment_p"]]),           cats),
+    prop_h2        = stats::setNames(as.numeric(results[["Prop._h2"]]),               cats),
+    prop_snps      = stats::setNames(as.numeric(results[["Prop._SNPs"]]),             cats),
+    h2g            = h2g,
+    tau_blocks     = delete_values,
+    n_blocks       = nrow(delete_values)
+  )
 }
 
 
 #' @title Compute per-annotation standard deviation, MAF-restricted
 #'
 #' @description Computes the standard deviation of each annotation column in the
-#'   target annotation files, restricted to SNPs above a MAF cutoff. The MAF
-#'   restriction is required for the standardization to be internally consistent
-#'   with polyfun's regression, which operates on MAF > cutoff SNPs by default.
-#'
-#' @details Implementation walks per-chromosome `.annot.gz` files, joins them
-#'   with PLINK `.frq` files when `maf_cutoff > 0`, and pools across chromosomes
-#'   with the standard pooled-variance formula
-#'   \deqn{sd_C = \sqrt{\sum_{\text{chr}} (n_{\text{chr}} - 1)\,\mathrm{Var}(C_{\text{chr}}) \,/\, \sum_{\text{chr}} (n_{\text{chr}} - 1)}}
-#'   over the MAF-restricted SNPs only.
+#'   target annotation files, restricted to SNPs above a MAF cutoff via PLINK
+#'   `.frq` files. Required for internal consistency with polyfun's regression,
+#'   which operates on MAF > cutoff SNPs by default.
 #'
 #' @param target_anno_dir Character. Directory containing target annotation files
 #'   (one per chromosome) in polyfun's `.annot.gz` format.
@@ -78,50 +112,139 @@ read_sldsc_trait <- function(prefix) {
 #'   errors if missing.
 #' @param plink_name Character. Filename prefix of the `.frq` files
 #'   (e.g. `"ADSP_chr"`). Files are expected at `{plink_name}{chr}.frq`.
-#' @param maf_cutoff Numeric, default `0.05`. Only SNPs with `MAF > maf_cutoff`
-#'   contribute to sd. Set to `0` to disable (must match the regression's
-#'   `--not-M-5-50` mode).
+#' @param maf_cutoff Numeric, default `0.05`.
 #' @param annot_cols Character or integer vector, default NULL. Annotation columns
-#'   to compute sd for. If NULL, all annotation columns (i.e. all columns past
-#'   the standard fixed columns CHR/SNP/BP/CM/A1/A2/MAF) are used.
+#'   to compute sd for. If NULL, all annotation columns are used.
 #'
 #' @return Named numeric vector of \eqn{sd_C} values, one per annotation.
 #'
+#' @importFrom data.table fread
+#' @importFrom stats setNames var
 #' @export
 compute_sldsc_annot_sd <- function(target_anno_dir, frqfile_dir = NULL,
                                    plink_name = "ADSP_chr",
                                    maf_cutoff = 0.05, annot_cols = NULL) {
-  stop("Not yet implemented (skeleton).")
+  if (maf_cutoff > 0 && (is.null(frqfile_dir) || !dir.exists(frqfile_dir))) {
+    stop("compute_sldsc_annot_sd: maf_cutoff = ", maf_cutoff,
+         " requires frqfile_dir, but '", frqfile_dir, "' is not a directory.")
+  }
+  if (!dir.exists(target_anno_dir)) {
+    stop("compute_sldsc_annot_sd: target_anno_dir does not exist: ", target_anno_dir)
+  }
+
+  anno_files <- list.files(target_anno_dir, pattern = "\\.annot\\.gz$", full.names = TRUE)
+  if (length(anno_files) == 0L)
+    stop("compute_sldsc_annot_sd: no .annot.gz files in: ", target_anno_dir)
+
+  detected <- .sldsc_detect_annot_cols(anno_files[1])
+  if (is.null(annot_cols)) {
+    cols_use <- detected
+  } else if (is.numeric(annot_cols)) {
+    cols_use <- detected[annot_cols]
+  } else {
+    cols_use <- annot_cols
+  }
+  if (length(cols_use) == 0L)
+    stop("compute_sldsc_annot_sd: no annotation columns to process.")
+
+  num <- stats::setNames(numeric(length(cols_use)), cols_use)
+  den <- 0
+
+  for (anno_file in anno_files) {
+    dat <- data.table::fread(anno_file)
+    if (maf_cutoff > 0) {
+      chrom <- .sldsc_chrom_from_filename(anno_file)
+      if (is.na(chrom))
+        stop("compute_sldsc_annot_sd: could not parse chromosome from: ", anno_file)
+      frq_file <- file.path(frqfile_dir, paste0(plink_name, chrom, ".frq"))
+      if (!file.exists(frq_file))
+        stop("compute_sldsc_annot_sd: .frq file not found: ", frq_file)
+      frq <- data.table::fread(frq_file, select = c("SNP", "MAF"))
+      dat <- merge(dat, frq, by = "SNP", all.x = FALSE, all.y = FALSE)
+      dat <- dat[!is.na(dat$MAF) & dat$MAF > maf_cutoff, ]
+    }
+    if (nrow(dat) <= 1L) next
+    n_minus_1 <- nrow(dat) - 1L
+    for (col in cols_use) {
+      vals <- as.numeric(dat[[col]])
+      v <- stats::var(vals, na.rm = TRUE)
+      if (!is.na(v)) num[col] <- num[col] + n_minus_1 * v
+    }
+    den <- den + n_minus_1
+  }
+
+  if (den <= 0)
+    stop("compute_sldsc_annot_sd: zero degrees of freedom after MAF filtering.")
+  sqrt(num / den)
 }
 
 
 #' @title Reference-panel SNP count at a given MAF cutoff
 #'
 #' @description Returns the number of SNPs in the reference panel above the MAF
-#'   cutoff, used as \eqn{M_{ref}} in the standardization. When
-#'   `maf_cutoff > 0` the function sums `.l2.M_5_50` files (matching polyfun's
-#'   regression which uses M_5_50 with `--frqfile-chr`); when `maf_cutoff == 0`
-#'   it sums `.l2.M` files (matching the `--not-M-5-50` regression mode).
-#'
-#' @param target_anno_dir Character. Directory containing per-chromosome
-#'   `.l2.M_5_50` (when MAF-filtered) or `.l2.M` (when not) files produced by
-#'   polyfun's `compute_ldscores.py`.
+#'   cutoff. When `maf_cutoff > 0`, counts MAF > cutoff entries across all
+#'   `.frq` files. When `maf_cutoff == 0`, counts rows of `.l2.ldscore`
+#'   files in `target_anno_dir`.
+#'
+#' @param target_anno_dir Character or NULL. Directory of `.l2.ldscore` files
+#'   produced by polyfun's `compute_ldscores.py`. Required when
+#'   `maf_cutoff == 0`.
+#' @param frqfile_dir Character or NULL. Directory of PLINK `.frq` files.
+#'   Required when `maf_cutoff > 0`.
+#' @param plink_name Character. Filename prefix of `.frq` files.
 #' @param maf_cutoff Numeric, default `0.05`.
 #'
-#' @return Scalar integer: total number of SNPs in the reference panel above the cutoff.
+#' @return Scalar integer.
 #'
+#' @importFrom data.table fread
 #' @export
-compute_sldsc_M_ref <- function(target_anno_dir, maf_cutoff = 0.05) {
-  stop("Not yet implemented (skeleton).")
+compute_sldsc_M_ref <- function(target_anno_dir = NULL, frqfile_dir = NULL,
+                                plink_name = "ADSP_chr", maf_cutoff = 0.05) {
+  if (maf_cutoff > 0) {
+    if (is.null(frqfile_dir) || !dir.exists(frqfile_dir))
+      stop("compute_sldsc_M_ref: maf_cutoff = ", maf_cutoff,
+           " requires frqfile_dir.")
+    pat <- paste0("^", gsub("([.])", "\\\\\\1", plink_name), "[0-9]+\\.frq$")
+    frq_files <- list.files(frqfile_dir, pattern = pat, full.names = TRUE)
+    if (length(frq_files) == 0L)
+      frq_files <- list.files(frqfile_dir, pattern = "\\.frq$", full.names = TRUE)
+    if (length(frq_files) == 0L)
+      stop("compute_sldsc_M_ref: no .frq files found in: ", frqfile_dir)
+
+    total <- 0L
+    for (f in frq_files) {
+      frq <- data.table::fread(f, select = "MAF")
+      total <- total + sum(!is.na(frq$MAF) & frq$MAF > maf_cutoff)
+    }
+    return(as.integer(total))
+  }
+
+  if (is.null(target_anno_dir) || !dir.exists(target_anno_dir))
+    stop("compute_sldsc_M_ref: maf_cutoff = 0 requires target_anno_dir.")
+  files <- list.files(target_anno_dir,
+                      pattern = "\\.l2\\.ldscore\\.(gz|parquet)$",
+                      full.names = TRUE)
+  if (length(files) == 0L)
+    stop("compute_sldsc_M_ref: no .l2.ldscore files in: ", target_anno_dir)
+
+  total <- 0L
+  for (f in files) {
+    if (endsWith(f, ".parquet")) {
+      if (!requireNamespace("arrow", quietly = TRUE))
+        stop("compute_sldsc_M_ref: install 'arrow' to read .parquet files.")
+      total <- total + nrow(arrow::read_parquet(f))
+    } else {
+      total <- total + nrow(data.table::fread(f))
+    }
+  }
+  as.integer(total)
 }
 
 
 #' @title Detect whether each annotation is binary or continuous
 #'
-#' @description Inspects each annotation column in the target annotation files
-#'   and returns whether its values lie in \{0, 1\} (binary) or take other values
-#'   (continuous). Used to select the appropriate within-type headline statistic.
-#'   For cross-type comparison, always use \eqn{\tau^*_C} regardless of the flag.
+#' @description Inspects each annotation column and returns whether its values
+#'   lie in \{0, 1\} (binary) or take other values (continuous).
 #'
 #' @param target_anno_dir Character. Directory containing the target `.annot.gz`
 #'   files (one per chromosome).
@@ -129,207 +252,269 @@ compute_sldsc_M_ref <- function(target_anno_dir, maf_cutoff = 0.05) {
 #'
 #' @return Named logical vector: TRUE for binary, FALSE for continuous.
 #'
+#' @importFrom data.table fread
+#' @importFrom stats setNames
 #' @export
 is_binary_sldsc_annot <- function(target_anno_dir, annot_cols = NULL) {
-  stop("Not yet implemented (skeleton).")
+  anno_files <- list.files(target_anno_dir, pattern = "\\.annot\\.gz$", full.names = TRUE)
+  if (length(anno_files) == 0L)
+    stop("is_binary_sldsc_annot: no .annot.gz files in: ", target_anno_dir)
+
+  detected <- .sldsc_detect_annot_cols(anno_files[1])
+  if (is.null(annot_cols)) {
+    cols_use <- detected
+  } else if (is.numeric(annot_cols)) {
+    cols_use <- detected[annot_cols]
+  } else {
+    cols_use <- annot_cols
+  }
+
+  is_binary <- stats::setNames(rep(TRUE, length(cols_use)), cols_use)
+
+  for (f in anno_files) {
+    dat <- data.table::fread(f, select = cols_use)
+    for (col in cols_use) {
+      if (!is_binary[[col]]) next
+      vals <- unique(stats::na.omit(as.numeric(dat[[col]])))
+      if (any(!(vals %in% c(0, 1)))) is_binary[[col]] <- FALSE
+    }
+    if (!any(is_binary)) break
+  }
+
+  is_binary
 }
 
 
 #' @title Standardize tau and compute EnrichStat for one polyfun run
 #'
-#' @description Given the polyfun read result plus the annotation sd's and
-#'   reference SNP count, computes the Gazal-style standardized tau (\eqn{\tau^*})
-#'   and the differential per-SNP heritability statistic (EnrichStat) for the
-#'   target annotations.
-#'
-#' @details Standardization:
-#'   \deqn{\tau^*_C = \tau_C \cdot sd_C \cdot M_{ref} / h^2_g}
-#'   applied both to the point estimate and to each of the n_blocks jackknife
-#'   blocks (the per-block \eqn{\tau^*_C} values are returned for use as inputs
-#'   to cross-trait random-effects meta).
+#' @description Applies the Gazal standardization
+#'   \eqn{\tau^*_C = \tau_C \cdot sd_C \cdot M_{ref} / h^2_g} to the point and
+#'   to each jackknife block. For `mode = "single"`, additionally computes
+#'   EnrichStat and back-solves its standard error from polyfun's reported
+#'   `Enrichment_p` using \eqn{|Z| = \Phi^{-1}(1 - p/2)}.
 #'
-#'   EnrichStat point estimate:
-#'   \deqn{\text{EnrichStat}_C = \frac{h^2_g}{M_{ref}}\!\left[\frac{\pi^{h^2}_C}{\pi^M_C} - \frac{1-\pi^{h^2}_C}{1-\pi^M_C}\right]}
-#'   computed once from polyfun's reported \eqn{\pi^{h^2}_C} and \eqn{\pi^M_C}.
-#'   Its standard error is recovered from polyfun's `Enrichment_p` via
-#'   \deqn{|Z_C| = \Phi^{-1}(1 - p_C/2), \qquad SE_{\text{EnrichStat}_C} = |\text{EnrichStat}_C| / |Z_C|.}
+#' @param trait_data List from \code{\link{read_sldsc_trait}}.
+#' @param sd_annot Named numeric vector from \code{\link{compute_sldsc_annot_sd}}.
+#' @param M_ref Scalar from \code{\link{compute_sldsc_M_ref}}.
+#' @param target_categories Character vector or NULL. If NULL, intersects
+#'   `trait_data$categories` with `names(sd_annot)`.
+#' @param mode Character: `"single"` or `"joint"`.
 #'
-#'   Only target annotations are returned; baseline annotations are dropped after
-#'   serving their role of conditioning the regression. Following the original
-#'   pipeline scope, EnrichStat / E are computed only when the call corresponds
-#'   to a single-target run; in joint-mode runs only \eqn{\tau^*_C} is produced
-#'   per target.
-#'
-#' @param trait_data List. Output of \code{\link{read_sldsc_trait}} for one polyfun run.
-#' @param sd_annot Named numeric vector. Output of
-#'   \code{\link{compute_sldsc_annot_sd}}, restricted to target annotations.
-#' @param M_ref Scalar. Output of \code{\link{compute_sldsc_M_ref}}.
-#' @param target_categories Character vector or NULL. Annotation names to keep.
-#'   If NULL, all categories matching `names(sd_annot)` are kept.
-#' @param mode Character, one of `"single"` or `"joint"`. Controls which
-#'   quantities are produced (E and EnrichStat are produced only for `"single"`;
-#'   \eqn{\tau^*_C} is produced for both).
-#'
-#' @return A list with components:
-#'   \describe{
-#'     \item{summary}{Data frame with one row per target annotation. Columns:
-#'       `target`, `tau`, `tau_se`, `tau_star`, `tau_star_se`. For `mode="single"`
-#'       additionally `enrichment`, `enrichment_se`, `enrichment_p`,
-#'       `enrichstat`, `enrichstat_se` (the back-solved SE).}
-#'     \item{tau_star_blocks}{Matrix (n_blocks x n_target) of per-block \eqn{\tau^*_C}.}
-#'     \item{h2g}{Scalar trait heritability.}
-#'     \item{n_blocks}{Integer.}
-#'     \item{mode}{Echo of the input mode.}
-#'   }
+#' @return A list with `summary` (data frame), `tau_star_blocks` (matrix),
+#'   `h2g`, `n_blocks`, `mode`.
 #'
+#' @importFrom stats qnorm var
 #' @export
 standardize_sldsc_trait <- function(trait_data, sd_annot, M_ref,
                                     target_categories = NULL,
                                     mode = c("single", "joint")) {
-  stop("Not yet implemented (skeleton).")
+  mode <- match.arg(mode)
+  if (is.null(target_categories))
+    target_categories <- intersect(trait_data$categories, names(sd_annot))
+  if (length(target_categories) == 0L)
+    stop("standardize_sldsc_trait: no target categories.")
+
+  target_idx <- match(target_categories, trait_data$categories)
+  if (any(is.na(target_idx)))
+    stop("standardize_sldsc_trait: missing categories: ",
+         paste(target_categories[is.na(target_idx)], collapse = ", "))
+
+  h2g <- trait_data$h2g
+  sd_target <- as.numeric(sd_annot[target_categories])
+  if (any(is.na(sd_target) | sd_target == 0))
+    warning("standardize_sldsc_trait: zero/NA sd for some targets; tau* will be NA/0.")
+
+  coef       <- sd_target * M_ref / h2g
+  tau        <- as.numeric(trait_data$tau[target_categories])
+  tau_se     <- as.numeric(trait_data$tau_se[target_categories])
+  tau_star   <- tau * coef
+
+  blocks_target   <- trait_data$tau_blocks[, target_idx, drop = FALSE]
+  tau_star_blocks <- sweep(blocks_target, 2L, coef, FUN = "*")
+
+  B <- trait_data$n_blocks
+  jk_var <- apply(tau_star_blocks, 2L, function(x) stats::var(x, na.rm = TRUE))
+  tau_star_se <- sqrt((B - 1)^2 / B * jk_var)
+
+  summary_df <- data.frame(
+    target      = target_categories,
+    tau         = tau,
+    tau_se      = tau_se,
+    tau_star    = tau_star,
+    tau_star_se = tau_star_se,
+    stringsAsFactors = FALSE
+  )
+
+  if (mode == "single") {
+    enrich    <- as.numeric(trait_data$enrichment[target_categories])
+    enrich_se <- as.numeric(trait_data$enrichment_se[target_categories])
+    enrich_p  <- as.numeric(trait_data$enrichment_p[target_categories])
+    p_h2      <- as.numeric(trait_data$prop_h2[target_categories])
+    p_M       <- as.numeric(trait_data$prop_snps[target_categories])
+
+    diff_ratio  <- (p_h2 / p_M) - (1 - p_h2) / (1 - p_M)
+    enrichstat  <- (h2g / M_ref) * diff_ratio
+
+    abs_z <- stats::qnorm(1 - enrich_p / 2)
+    enrichstat_se <- abs(enrichstat) / abs_z
+    enrichstat_se[!is.finite(abs_z) | abs_z <= 0] <- NA_real_
+
+    summary_df$enrichment    <- enrich
+    summary_df$enrichment_se <- enrich_se
+    summary_df$enrichment_p  <- enrich_p
+    summary_df$enrichstat    <- enrichstat
+    summary_df$enrichstat_se <- enrichstat_se
+  }
+
+  list(
+    summary         = summary_df,
+    tau_star_blocks = tau_star_blocks,
+    h2g             = h2g,
+    n_blocks        = B,
+    mode            = mode
+  )
 }
 
 
 #' @title Random-effects meta-analysis of S-LDSC quantities across traits
 #'
 #' @description DerSimonian-Laird random-effects meta-analysis of one S-LDSC
-#'   quantity across multiple traits, for one annotation. Used as the entry
-#'   point both for the default pipeline meta and for user-driven re-meta over
-#'   subsets of traits.
+#'   quantity for one annotation across multiple traits.
 #'
-#' @details Implements
-#'   \deqn{\hat\theta_{meta} = \sum_i w_i \hat\theta_i / \sum_i w_i,
-#'         \quad SE_{meta} = 1/\sqrt{\sum_i w_i},
-#'         \quad w_i = 1/(SE_i^2 + \hat\sigma^2)}
-#'   via `rmeta::meta.summaries(..., method = "random")`. The two-sided p-value
-#'   is computed from the meta z-score under the standard normal:
-#'   \eqn{p = 2\Phi(-|Z_{meta}|)}.
-#'
-#'   Per-trait \eqn{SE_i} sources:
-#'   - `quantity = "tau_star"`: jackknife SE from the per-block \eqn{\tau^*}
-#'     values (computed in \code{\link{standardize_sldsc_trait}}).
+#' @details Per-trait \eqn{SE_i} sources:
+#'   - `quantity = "tau_star"`: jackknife SE from per-block \eqn{\tau^*}.
 #'   - `quantity = "enrichment"`: polyfun-reported `Enrichment_std_error`.
-#'   - `quantity = "enrichstat"`: back-solved SE from polyfun's `Enrichment_p`,
-#'     \eqn{SE = |\text{EnrichStat}|/|\Phi^{-1}(1-p/2)|}.
-#'
-#'   For binary-annotation enrichment reporting, callers typically combine two
-#'   meta runs: effect size and SE from `quantity="enrichment"` (interpretable
-#'   on the original enrichment-fold scale), p-value from `quantity="enrichstat"`
-#'   (the appropriate hypothesis test). The top-level
-#'   \code{\link{sldsc_postprocessing_pipeline}} performs this combination.
+#'   - `quantity = "enrichstat"`: back-solved SE from polyfun's `Enrichment_p`.
 #'
-#' @param per_trait_estimates Named list of standardized per-trait results from
-#'   \code{\link{standardize_sldsc_trait}}. Pass a subset to re-meta on a
-#'   user-chosen group of traits.
+#' @param per_trait_estimates Named list of per-trait results (each with a
+#'   `summary` data frame).
 #' @param category Character. Annotation name to meta-analyze.
-#' @param quantity Character, one of `"tau_star"`, `"enrichment"`, or `"enrichstat"`.
+#' @param quantity Character: `"tau_star"`, `"enrichment"`, or `"enrichstat"`.
 #'
-#' @return A list with components: `mean`, `se`, `p`, `n_traits`, `traits_used`,
-#'   and `tau2` (the DerSimonian-Laird between-trait variance).
-#'
-#' @examples
-#' \dontrun{
-#' meta_all <- meta_sldsc_random(per_trait_results, "my_anno", "tau_star")
-#'
-#' # subset to a custom trait group
-#' subset <- per_trait_results[c("CAD_META", "AD_GWAX", "PD_meta")]
-#' meta_neuro <- meta_sldsc_random(subset, "my_anno", "tau_star")
-#' }
+#' @return List with `mean`, `se`, `p`, `n_traits`, `traits_used`, `tau2`.
 #'
+#' @importFrom stats pnorm
 #' @export
 meta_sldsc_random <- function(per_trait_estimates, category,
                               quantity = c("tau_star", "enrichment", "enrichstat")) {
-  stop("Not yet implemented (skeleton).")
+  quantity <- match.arg(quantity)
+  col_pairs <- list(
+    tau_star   = c("tau_star",   "tau_star_se"),
+    enrichment = c("enrichment", "enrichment_se"),
+    enrichstat = c("enrichstat", "enrichstat_se")
+  )
+  cols <- col_pairs[[quantity]]
+  trait_names <- names(per_trait_estimates)
+  if (is.null(trait_names))
+    trait_names <- as.character(seq_along(per_trait_estimates))
+
+  means <- numeric(0); ses <- numeric(0); used <- character(0)
+  for (i in seq_along(per_trait_estimates)) {
+    pt <- per_trait_estimates[[i]]
+    if (is.null(pt) || is.null(pt$summary)) next
+    df <- pt$summary
+    row <- df[df$target == category, , drop = FALSE]
+    if (nrow(row) == 0L) next
+    if (!all(cols %in% names(row))) next
+    m <- as.numeric(row[[cols[1]]])[1]
+    s <- as.numeric(row[[cols[2]]])[1]
+    if (is.na(m) || is.na(s) || !is.finite(s) || s <= 0) next
+    means <- c(means, m); ses <- c(ses, s); used <- c(used, trait_names[i])
+  }
+
+  if (length(means) < 2L) {
+    return(list(mean = NA_real_, se = NA_real_, p = NA_real_,
+                n_traits = length(means), traits_used = used,
+                tau2 = NA_real_))
+  }
+  if (!requireNamespace("rmeta", quietly = TRUE))
+    stop("meta_sldsc_random: install the 'rmeta' package.")
+
+  meta <- rmeta::meta.summaries(means, ses, method = "random")
+  z    <- meta$summary / meta$se.summary
+  p    <- 2 * stats::pnorm(-abs(z))
+  list(
+    mean        = as.numeric(meta$summary),
+    se          = as.numeric(meta$se.summary),
+    p           = as.numeric(p),
+    n_traits    = length(means),
+    traits_used = used,
+    tau2        = as.numeric(meta$tau2)
+  )
+}
+
+
+# Internal helper: assemble a wide per-trait summary frame with single + joint
+# columns side by side.
+.sldsc_assemble_trait_summary <- function(single_df, joint_df, target_categories,
+                                          is_binary_vec) {
+  rows <- if (!is.null(single_df)) single_df$target else
+          if (!is.null(joint_df))  joint_df$target  else target_categories
+  out <- data.frame(target = rows,
+                    is_binary = unname(is_binary_vec[rows]),
+                    stringsAsFactors = FALSE)
+
+  add_cols <- function(out, src, suffix) {
+    cols_to_add <- c("tau", "tau_se", "tau_star", "tau_star_se",
+                     "enrichment", "enrichment_se", "enrichment_p",
+                     "enrichstat", "enrichstat_se")
+    for (c in cols_to_add) {
+      newcol <- paste0(c, "_", suffix)
+      if (!is.null(src) && c %in% names(src)) {
+        out[[newcol]] <- src[[c]][match(out$target, src$target)]
+      } else {
+        out[[newcol]] <- NA_real_
+      }
+    }
+    out
+  }
+  out <- add_cols(out, single_df, "single")
+  out <- add_cols(out, joint_df,  "joint")
+  out
+}
+
+
+# Internal helper: build a per-trait list view that meta_sldsc_random can read.
+# Each list element has a $summary frame with the requested mode's columns
+# renamed to the canonical names (tau_star, tau_star_se, enrichment, ...).
+.sldsc_view_for_meta <- function(per_trait, suffix) {
+  lapply(per_trait, function(pt) {
+    if (is.null(pt$summary)) return(NULL)
+    df <- pt$summary
+    cols_have <- c("tau_star", "tau_star_se", "enrichment", "enrichment_se",
+                   "enrichment_p", "enrichstat", "enrichstat_se")
+    src_cols <- paste0(cols_have, "_", suffix)
+    avail    <- src_cols %in% names(df)
+    if (!any(avail)) return(NULL)
+    new_df <- data.frame(target = df$target, stringsAsFactors = FALSE)
+    for (k in seq_along(cols_have)) {
+      if (avail[k]) new_df[[cols_have[k]]] <- df[[src_cols[k]]]
+    }
+    list(summary = new_df)
+  })
 }
 
 
 #' @title End-to-end S-LDSC post-processing across traits, single + joint in one pass
 #'
-#' @description Top-level convenience wrapper. Reads polyfun outputs (one
-#'   single-target run per target plus one joint run per trait), standardizes
-#'   both modes, and runs the default random-effects meta-analysis across all
-#'   traits supplied. Single-target and joint-target analyses are produced in
-#'   one pass.
-#'
-#' @details For \eqn{N} target annotations and one trait, the caller is expected
-#'   to have already produced \eqn{N+1} polyfun runs: \eqn{N} single-target runs
-#'   (each fitting one target + baseline) and 1 joint run (fitting all \eqn{N}
-#'   targets + baseline together). The pipeline reads all of these, drops
-#'   baseline categories, standardizes both modes, and assembles consolidated
-#'   per-trait and meta data frames.
-#'
-#'   Reports baseline annotation count and names via `message()` once at the
-#'   start of the run.
-#'
-#'   Cross-type comparison: the `meta$tau_star` frame is the apple-to-apple
-#'   table for ranking annotations regardless of binary/continuous type. The
-#'   `is_binary` column lets callers filter when within-type comparison is desired.
-#'
-#'   Two-channel enrichment meta (binary annotations only): the `meta$enrichment`
-#'   frame's `single_mean` and `single_se` come from the meta on \eqn{E_C};
-#'   the `single_p` comes from the meta on EnrichStat (the appropriate
-#'   hypothesis test).
-#'
-#' @param trait_single_prefixes Named list. For each trait (name = trait id),
-#'   a character vector of length \eqn{N} giving the polyfun output prefixes
-#'   for the \eqn{N} single-target runs. Order must match `target_categories`.
-#' @param trait_joint_prefix Named character. For each trait (name matching
-#'   `trait_single_prefixes`), the polyfun output prefix for the joint run.
-#' @param target_anno_dir Character. Directory of target annotation files used
-#'   for `sd_C` and binary detection. Typically the joint-mode directory, since
-#'   it carries all target annotation columns.
+#' @description Top-level orchestration. Reads polyfun outputs (one single-target
+#'   run per target plus, when available, one joint run per trait), standardizes
+#'   both modes, and runs the default random-effects meta across all traits.
+#'
+#' @param trait_single_prefixes Named list. For each trait, a character vector
+#'   of length \eqn{N} giving the polyfun output prefixes for the \eqn{N}
+#'   single-target runs (order must match `target_categories`).
+#' @param trait_joint_prefix Named character. For each trait, the polyfun output
+#'   prefix for the joint run. Pass `NA` (or `""`) for a trait without a joint run.
+#' @param target_anno_dir Character. Directory of target `.annot.gz` files used
+#'   for `sd_C` and binary detection (typically the joint-mode dir).
 #' @param frqfile_dir Character or NULL.
 #' @param plink_name Character. Default `"ADSP_chr"`.
 #' @param maf_cutoff Numeric, default `0.05`.
 #' @param target_categories Character vector or NULL. Auto-detected from the
-#'   joint-run results as all categories not in baseline if NULL.
+#'   first available run if NULL.
 #'
-#' @return A list with components:
-#'   \describe{
-#'     \item{per_trait}{Named list of per-trait results. For each trait, a list with:
-#'       `summary` (wide data frame of single + joint estimates side-by-side per
-#'       target annotation, with `is_binary` flag), `tau_star_blocks_single`
-#'       (matrix), `tau_star_blocks_joint` (matrix), and `h2g`.}
-#'     \item{meta}{List of three data frames, each with one row per target annotation:
-#'       \itemize{
-#'         \item `tau_star`: columns `target`, `is_binary`, `single_mean`,
-#'           `single_se`, `single_p`, `joint_mean`, `joint_se`, `joint_p`, `n_traits`.
-#'         \item `enrichment`: same shape; effect/SE from E meta, p from EnrichStat meta.
-#'         \item `enrichstat`: same shape; pure EnrichStat meta.
-#'       }
-#'       The `tau_star` frame is the cross-type comparable headline.}
-#'     \item{params}{List echoing `maf_cutoff`, `M_ref`, `target_categories`,
-#'       `n_baseline`, `baseline_categories`, and `trait_names` for reproducibility.}
-#'   }
-#'
-#' @examples
-#' \dontrun{
-#' res <- sldsc_postprocessing_pipeline(
-#'   trait_single_prefixes = list(
-#'     CAD_META = c("/output/CAD_META.single_anno1", "/output/CAD_META.single_anno2"),
-#'     AD_GWAX  = c("/output/AD_GWAX.single_anno1",  "/output/AD_GWAX.single_anno2")
-#'   ),
-#'   trait_joint_prefix = c(
-#'     CAD_META = "/output/CAD_META.joint",
-#'     AD_GWAX  = "/output/AD_GWAX.joint"
-#'   ),
-#'   target_anno_dir = "/output/ldscores/my_targets",
-#'   frqfile_dir     = "/ref/ADSP/frq",
-#'   plink_name      = "ADSP_chr",
-#'   maf_cutoff      = 0.05,
-#'   target_categories = c("anno1", "anno2")
-#' )
-#' res$meta$tau_star      # cross-type comparable headline
-#' res$meta$enrichment    # within-binary headline (effect/SE from E, p from EnrichStat)
-#'
-#' # later, re-meta on a custom subset of traits
-#' meta_neuro <- meta_sldsc_random(
-#'   res$per_trait[c("AD_GWAX", "PD_meta")], "anno1", "tau_star"
-#' )
-#' }
-#'
-#' @seealso \code{\link{read_sldsc_trait}}, \code{\link{standardize_sldsc_trait}},
-#'   \code{\link{meta_sldsc_random}}
+#' @return List with `per_trait`, `meta` (three frames), `params`.
 #'
 #' @export
 sldsc_postprocessing_pipeline <- function(trait_single_prefixes,
@@ -339,5 +524,210 @@ sldsc_postprocessing_pipeline <- function(trait_single_prefixes,
                                           plink_name = "ADSP_chr",
                                           maf_cutoff = 0.05,
                                           target_categories = NULL) {
-  stop("Not yet implemented (skeleton).")
+  trait_names <- names(trait_single_prefixes)
+  if (is.null(trait_names))
+    stop("sldsc_postprocessing_pipeline: trait_single_prefixes must be a named list.")
+
+  message("[sldsc] Computing M_ref...")
+  M_ref <- compute_sldsc_M_ref(target_anno_dir = target_anno_dir,
+                               frqfile_dir = frqfile_dir,
+                               plink_name = plink_name,
+                               maf_cutoff = maf_cutoff)
+  message(sprintf("[sldsc]   M_ref = %d (MAF cutoff %g)", M_ref, maf_cutoff))
+
+  message("[sldsc] Computing per-annotation sd...")
+  sd_annot_full <- compute_sldsc_annot_sd(target_anno_dir = target_anno_dir,
+                                          frqfile_dir = frqfile_dir,
+                                          plink_name = plink_name,
+                                          maf_cutoff = maf_cutoff)
+  message(sprintf("[sldsc]   sd computed for %d annotation columns",
+                  length(sd_annot_full)))
+
+  message("[sldsc] Detecting binary vs continuous annotations...")
+  is_binary_full <- is_binary_sldsc_annot(target_anno_dir = target_anno_dir)
+
+  # Auto-detect target categories from a representative run.
+  if (is.null(target_categories)) {
+    pivot_run <- NULL
+    if (!is.null(trait_joint_prefix) && length(trait_joint_prefix) > 0) {
+      jp <- trait_joint_prefix[[1]]
+      if (is.character(jp) && length(jp) == 1L && !is.na(jp) && nzchar(jp)) {
+        pivot_run <- tryCatch(read_sldsc_trait(jp), error = function(e) NULL)
+      }
+    }
+    if (is.null(pivot_run) &&
+        length(trait_single_prefixes) > 0L &&
+        length(trait_single_prefixes[[1]]) > 0L) {
+      pivot_run <- tryCatch(read_sldsc_trait(trait_single_prefixes[[1]][1]),
+                            error = function(e) NULL)
+    }
+    if (is.null(pivot_run))
+      stop("sldsc_postprocessing_pipeline: cannot auto-detect target_categories.")
+    target_categories <- intersect(pivot_run$categories, names(sd_annot_full))
+    message(sprintf("[sldsc] Auto-detected %d target categories", length(target_categories)))
+  }
+
+  baseline_categories <- character(0)
+  if (!is.null(trait_joint_prefix) && length(trait_joint_prefix) > 0L) {
+    jp <- trait_joint_prefix[[1]]
+    if (is.character(jp) && length(jp) == 1L && !is.na(jp) && nzchar(jp)) {
+      pivot <- tryCatch(read_sldsc_trait(jp), error = function(e) NULL)
+      if (!is.null(pivot))
+        baseline_categories <- setdiff(pivot$categories, target_categories)
+    }
+  }
+  if (length(baseline_categories) > 0L) {
+    msg_head <- paste(utils::head(baseline_categories, 5), collapse = ", ")
+    msg_tail <- if (length(baseline_categories) > 5) ", ..." else ""
+    message(sprintf("[sldsc] Detected %d baseline annotations: %s%s",
+                    length(baseline_categories), msg_head, msg_tail))
+  } else {
+    message("[sldsc] No baseline annotations detected (joint-run prefix missing or unreadable).")
+  }
+
+  sd_annot <- sd_annot_full[target_categories]
+  is_binary <- if (length(is_binary_full) > 0L) is_binary_full[target_categories] else
+               stats::setNames(rep(FALSE, length(target_categories)), target_categories)
+
+  message(sprintf("[sldsc] Standardizing %d traits...", length(trait_names)))
+  per_trait <- list()
+
+  for (trait in trait_names) {
+    # ---- single-mode ----
+    single_summaries <- list()
+    single_blocks    <- list()
+    single_h2gs      <- numeric(0)
+    sing_prefs <- trait_single_prefixes[[trait]]
+    for (i in seq_along(target_categories)) {
+      cat_name <- target_categories[i]
+      if (i > length(sing_prefs)) break
+      pref <- sing_prefs[i]
+      run  <- tryCatch(read_sldsc_trait(pref), error = function(e) {
+        warning(sprintf("[sldsc] Failed to read single %s for %s: %s",
+                        cat_name, trait, e$message)); NULL
+      })
+      if (is.null(run)) next
+      std <- tryCatch(
+        standardize_sldsc_trait(run, sd_annot[cat_name], M_ref,
+                                target_categories = cat_name, mode = "single"),
+        error = function(e) {
+          warning(sprintf("[sldsc] Failed to standardize single %s for %s: %s",
+                          cat_name, trait, e$message)); NULL
+        })
+      if (is.null(std)) next
+      single_summaries[[cat_name]] <- std$summary
+      single_blocks[[cat_name]]    <- std$tau_star_blocks
+      single_h2gs                  <- c(single_h2gs, std$h2g)
+    }
+    single_df <- if (length(single_summaries) > 0L)
+                   do.call(rbind, single_summaries) else NULL
+    if (!is.null(single_df)) rownames(single_df) <- NULL
+    blocks_single <- if (length(single_blocks) > 0L) do.call(cbind, single_blocks) else NULL
+
+    # ---- joint-mode ----
+    joint_df       <- NULL
+    blocks_joint   <- NULL
+    joint_h2g      <- NA_real_
+    n_blocks_trait <- NA_integer_
+    if (!is.null(trait_joint_prefix) && trait %in% names(trait_joint_prefix)) {
+      jp <- trait_joint_prefix[[trait]]
+      if (is.character(jp) && length(jp) == 1L && !is.na(jp) && nzchar(jp)) {
+        run <- tryCatch(read_sldsc_trait(jp), error = function(e) {
+          warning(sprintf("[sldsc] Failed to read joint for %s: %s",
+                          trait, e$message)); NULL
+        })
+        if (!is.null(run)) {
+          std <- tryCatch(
+            standardize_sldsc_trait(run, sd_annot, M_ref,
+                                    target_categories = target_categories,
+                                    mode = "joint"),
+            error = function(e) {
+              warning(sprintf("[sldsc] Failed to standardize joint for %s: %s",
+                              trait, e$message)); NULL
+            })
+          if (!is.null(std)) {
+            joint_df       <- std$summary
+            blocks_joint   <- std$tau_star_blocks
+            joint_h2g      <- std$h2g
+            n_blocks_trait <- std$n_blocks
+          }
+        }
+      }
+    }
+
+    summary_wide <- .sldsc_assemble_trait_summary(single_df, joint_df,
+                                                  target_categories, is_binary)
+    per_trait[[trait]] <- list(
+      summary                = summary_wide,
+      tau_star_blocks_single = blocks_single,
+      tau_star_blocks_joint  = blocks_joint,
+      h2g                    = if (!is.na(joint_h2g)) joint_h2g
+                               else if (length(single_h2gs) > 0L) median(single_h2gs)
+                               else NA_real_,
+      n_blocks               = n_blocks_trait
+    )
+  }
+
+  message("[sldsc] Running random-effects meta across traits...")
+  pt_view_single <- .sldsc_view_for_meta(per_trait, "single")
+  pt_view_joint  <- .sldsc_view_for_meta(per_trait, "joint")
+
+  build_table <- function(quantity, view, label) {
+    rows <- list()
+    for (cat in target_categories) {
+      m <- meta_sldsc_random(view, cat, quantity)
+      rows[[cat]] <- data.frame(
+        target    = cat,
+        is_binary = unname(is_binary[cat]),
+        mean      = m$mean,
+        se        = m$se,
+        p         = m$p,
+        n_traits  = m$n_traits,
+        stringsAsFactors = FALSE
+      )
+    }
+    df <- do.call(rbind, rows)
+    rownames(df) <- NULL
+    nm_old <- c("mean", "se", "p")
+    nm_new <- paste0(label, "_", nm_old)
+    names(df)[names(df) %in% nm_old] <- nm_new
+    df
+  }
+
+  meta_tau_star_single <- build_table("tau_star",   pt_view_single, "single")
+  meta_tau_star_joint  <- build_table("tau_star",   pt_view_joint,  "joint")
+  meta_E_single        <- build_table("enrichment", pt_view_single, "single")
+  meta_ES_single       <- build_table("enrichstat", pt_view_single, "single")
+
+  # Combine tau_star single + joint into one wide frame.
+  meta_tau_star <- meta_tau_star_single
+  ord <- match(meta_tau_star$target, meta_tau_star_joint$target)
+  meta_tau_star$joint_mean <- meta_tau_star_joint$joint_mean[ord]
+  meta_tau_star$joint_se   <- meta_tau_star_joint$joint_se[ord]
+  meta_tau_star$joint_p    <- meta_tau_star_joint$joint_p[ord]
+
+  # Two-channel enrichment meta: effect/SE from E meta, p from EnrichStat meta.
+  meta_enrichment <- meta_E_single
+  meta_enrichment$single_p <- meta_ES_single$single_p[match(meta_enrichment$target,
+                                                             meta_ES_single$target)]
+
+  # Pure EnrichStat meta (separate frame).
+  meta_enrichstat <- meta_ES_single
+
+  list(
+    per_trait = per_trait,
+    meta = list(
+      tau_star   = meta_tau_star,
+      enrichment = meta_enrichment,
+      enrichstat = meta_enrichstat
+    ),
+    params = list(
+      maf_cutoff          = maf_cutoff,
+      M_ref               = M_ref,
+      target_categories   = target_categories,
+      n_baseline          = length(baseline_categories),
+      baseline_categories = baseline_categories,
+      trait_names         = trait_names
+    )
+  )
 }

From 4f0f71b7c203a251afd006f61ac8c822aaf3bc5b Mon Sep 17 00:00:00 2001
From: Gao Wang <wangow@gmail.com>
Date: Thu, 7 May 2026 09:08:21 -0400
Subject: [PATCH 4/4] fix test

---
 tests/testthat/test_LD.R | 694 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 669 insertions(+), 25 deletions(-)

diff --git a/tests/testthat/test_LD.R b/tests/testthat/test_LD.R
index 058c6460..203e7c36 100644
--- a/tests/testthat/test_LD.R
+++ b/tests/testthat/test_LD.R
@@ -230,7 +230,7 @@ test_that("partition_LD_matrix validates block structure properly", {
     }
 
     # Expect an error for invalid block structure
-    expect_error(partition_LD_matrix(invalid_ld_data), "Matrix does not have the expected block structure")
+    expect_error(partition_LD_matrix(invalid_ld_data), "Matrix lacks expected block structure")
   }
 
   file.remove(LD_meta_file_path)
@@ -824,14 +824,27 @@ test_that("can_merge checks chromosome and size", {
   expect_false(pecotmr:::can_merge(b1, b3, max_size = 500))
 })
 
-# ---- check_ld ----
-test_that("check_ld detects positive definite matrix", {
+# ===========================================================================
+# check_ld (regularize_ld)
+# ===========================================================================
+
+test_that("check_ld reports PD for identity matrix", {
   R <- diag(5)
-  result <- check_ld(R, method = "check")
+  result <- check_ld(R)
+  expect_true(result$is_pd)
+  expect_true(result$is_psd)
+  expect_equal(result$method_applied, "none")
+  expect_equal(result$R, R)
+  expect_equal(result$condition_number, 1)
+})
+
+test_that("check_ld reports PD for well-conditioned correlation matrix", {
+  R <- matrix(0.3, 4, 4)
+  diag(R) <- 1
+  result <- check_ld(R)
   expect_true(result$is_pd)
   expect_true(result$is_psd)
   expect_equal(result$n_negative, 0)
-  expect_equal(result$min_eigenvalue, 1)
   expect_equal(result$method_applied, "none")
 })
 
@@ -839,45 +852,676 @@ test_that("check_ld detects non-PSD matrix", {
   R <- matrix(0.9, 3, 3)
   diag(R) <- 1
   R[1, 3] <- R[3, 1] <- -0.5
-  result <- check_ld(R, method = "check")
+  result <- check_ld(R)
   expect_false(result$is_psd)
   expect_true(result$n_negative > 0)
   expect_true(result$min_eigenvalue < 0)
   expect_equal(result$method_applied, "none")
-  expect_equal(result$R, R)
 })
 
-test_that("check_ld eigenfix repairs non-PSD matrix", {
+test_that("check_ld shrink method modifies non-PD matrix", {
   R <- matrix(0.9, 3, 3)
   diag(R) <- 1
   R[1, 3] <- R[3, 1] <- -0.5
-  result <- check_ld(R, method = "eigenfix")
-  expect_equal(result$method_applied, "eigenfix")
-  result2 <- check_ld(result$R, method = "check")
-  expect_true(result2$is_psd)
-  expect_equal(diag(result$R), rep(1, 3))
-  expect_true(isSymmetric(result$R))
+  result <- check_ld(R, method = "shrink")
+  expect_equal(result$method_applied, "shrink")
+  expect_false(identical(result$R, R))
+  # With strong enough shrinkage, result should be PD
+  result2 <- check_ld(R, method = "shrink", shrinkage = 0.5)
+  eig <- eigen(result2$R, symmetric = TRUE)
+  expect_true(all(eig$values > 0))
 })
 
-test_that("check_ld shrink repairs non-PD matrix", {
+test_that("check_ld eigenfix method improves non-PD matrix", {
   R <- matrix(0.9, 3, 3)
   diag(R) <- 1
   R[1, 3] <- R[3, 1] <- -0.5
-  result <- check_ld(R, method = "shrink", shrinkage = 0.1)
-  expect_equal(result$method_applied, "shrink")
-  result2 <- check_ld(result$R, method = "check")
-  expect_true(result2$is_pd)
+  original_min_eval <- min(eigen(R, symmetric = TRUE)$values)
+  result <- check_ld(R, method = "eigenfix")
+  expect_equal(result$method_applied, "eigenfix")
+  # Eigenfix should improve the minimum eigenvalue
+  fixed_min_eval <- min(eigen(result$R, symmetric = TRUE)$values)
+  expect_true(fixed_min_eval > original_min_eval)
+  # Unit diagonal preserved
+  expect_equal(diag(result$R), rep(1, 3))
+  # Symmetry preserved
+  expect_equal(result$R, t(result$R))
 })
 
-test_that("check_ld eigenfix is no-op on PSD matrix", {
-  R <- diag(5)
+test_that("check_ld shrink does nothing when matrix is already PD", {
+  R <- diag(3)
+  result <- check_ld(R, method = "shrink")
+  expect_equal(result$method_applied, "none")
+  expect_equal(result$R, R)
+})
+
+test_that("check_ld eigenfix does nothing when matrix is already PD", {
+  R <- diag(3)
   result <- check_ld(R, method = "eigenfix")
   expect_equal(result$method_applied, "none")
   expect_equal(result$R, R)
 })
 
-test_that("check_ld condition_number is correct", {
-  R <- diag(c(1, 0.5, 0.1))
-  result <- check_ld(R, method = "check")
-  expect_equal(result$condition_number, 10)
+# ===========================================================================
+# extract_block_matrices: out-of-range blocks
+# ===========================================================================
+
+test_that("extract_block_matrices warns and skips out-of-range blocks", {
+  mat <- diag(4)
+  vnames <- paste0("v", 1:4)
+  rownames(mat) <- colnames(mat) <- vnames
+  block_metadata <- data.frame(
+    block_id = c(1, 2),
+    start_idx = c(1, 10),
+    end_idx = c(2, 12),
+    chrom = c("1", "1"),
+    block_start = c(100, 500),
+    block_end = c(200, 600),
+    size = c(2, 3),
+    stringsAsFactors = FALSE
+  )
+  expect_warning(
+    result <- pecotmr:::extract_block_matrices(mat, block_metadata, vnames),
+    "outside the range"
+  )
+  valid_blocks <- result$ld_matrices[!sapply(result$ld_matrices, is.null)]
+  expect_equal(length(valid_blocks), 1)
+  expect_equal(nrow(valid_blocks[[1]]), 2)
+})
+
+# ===========================================================================
+# resolve_ld_source: type detection with real fixtures
+# ===========================================================================
+
+geno_test_data_dir <- test_path("test_data")
+geno_region_all <- "chr21:17513228-17592874"
+
+test_that("resolve_ld_source detects PLINK2 from metadata", {
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_resolve_p2_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("21", "0", "0", "test_variants", sep = "\t"), "\n",
+      file = meta_file, append = TRUE)
+  result <- pecotmr:::resolve_ld_source(meta_file)
+  expect_equal(result$type, "plink2")
+  expect_equal(result$meta_path, meta_file)
+})
+
+test_that("resolve_ld_source detects VCF from metadata", {
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_resolve_vcf_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("21", "0", "0", "test_variants.vcf.gz", sep = "\t"), "\n",
+      file = meta_file, append = TRUE)
+  result <- pecotmr:::resolve_ld_source(meta_file)
+  expect_equal(result$type, "vcf")
+})
+
+test_that("resolve_ld_source detects GDS from metadata", {
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_resolve_gds_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("21", "0", "0", "test_variants.gds", sep = "\t"), "\n",
+      file = meta_file, append = TRUE)
+  result <- pecotmr:::resolve_ld_source(meta_file)
+  expect_equal(result$type, "gds")
+})
+
+test_that("resolve_ld_source detects precomputed from metadata", {
+  # Existing LD block metadata with non-zero start/end
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_resolve_pre_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("chr1", "1000", "1200",
+            "LD_block_1.chr1_1000_1200.float16.txt.xz,LD_block_1.chr1_1000_1200.float16.bim",
+            sep = "\t"), "\n", file = meta_file, append = TRUE)
+  result <- pecotmr:::resolve_ld_source(meta_file)
+  expect_equal(result$type, "precomputed")
+})
+
+test_that("resolve_ld_source errors on missing file", {
+  expect_error(pecotmr:::resolve_ld_source("/nonexistent/file.tsv"), "not found")
+})
+
+test_that("resolve_ld_source errors on 0:0 sentinel with non-genotype path", {
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_resolve_bad_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("21", "0", "0", "nonexistent_prefix", sep = "\t"), "\n",
+      file = meta_file, append = TRUE)
+  expect_error(pecotmr:::resolve_ld_source(meta_file), "0:0 sentinel")
+})
+
+# ===========================================================================
+# resolve_genotype_path_for_region
+# ===========================================================================
+
+test_that("resolve_genotype_path_for_region resolves correct chromosome path", {
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_resolve_path_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("21", "0", "0", "test_variants", sep = "\t"), "\n",
+      file = meta_file, append = TRUE)
+  result <- pecotmr:::resolve_genotype_path_for_region(meta_file, geno_region_all)
+  expect_equal(result, file.path(geno_test_data_dir, "test_variants"))
+})
+
+test_that("resolve_genotype_path_for_region errors on missing chromosome", {
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_resolve_nochr_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("1", "0", "0", "test_variants", sep = "\t"), "\n",
+      file = meta_file, append = TRUE)
+  expect_error(
+    pecotmr:::resolve_genotype_path_for_region(meta_file, geno_region_all),
+    "No entry for chromosome"
+  )
+})
+
+# ===========================================================================
+# load_LD_from_genotype with real fixtures
+# ===========================================================================
+
+test_that("load_LD_from_genotype returns LD matrix with .afreq", {
+  skip_if_not_installed("pgenlibr")
+  plink_prefix <- file.path(geno_test_data_dir, "test_variants")
+  result <- pecotmr:::load_LD_from_genotype(plink_prefix, geno_region_all)
+  expect_true(is.list(result))
+  expect_true(is.matrix(result$LD_matrix))
+  expect_equal(nrow(result$LD_matrix), 349L)
+  expect_true(isSymmetric(result$LD_matrix))
+  expect_false(result$is_genotype)
+  # ref_panel should have allele_freq from .afreq file
+  expect_true("allele_freq" %in% names(result$ref_panel))
+  expect_true(all(result$ref_panel$allele_freq > 0))
+  expect_true(all(result$ref_panel$allele_freq < 1))
+  # block_metadata
+  expect_true(is.data.frame(result$block_metadata))
+  expect_equal(nrow(result$block_metadata), 1L)
+})
+
+test_that("load_LD_from_genotype returns genotype matrix when requested", {
+  skip_if_not_installed("pgenlibr")
+  plink_prefix <- file.path(geno_test_data_dir, "test_variants")
+  result <- pecotmr:::load_LD_from_genotype(plink_prefix, geno_region_all,
+                                             return_genotype = TRUE)
+  expect_true(result$is_genotype)
+  expect_equal(nrow(result$LD_matrix), 100L)  # samples
+  expect_equal(ncol(result$LD_matrix), 349L)  # variants
+})
+
+test_that("load_LD_from_genotype computes variance with n_sample", {
+  skip_if_not_installed("pgenlibr")
+  plink_prefix <- file.path(geno_test_data_dir, "test_variants")
+  result <- pecotmr:::load_LD_from_genotype(plink_prefix, geno_region_all,
+                                             n_sample = 100L)
+  expect_true("variance" %in% names(result$ref_panel))
+  expect_true("n_nomiss" %in% names(result$ref_panel))
+  expect_equal(result$ref_panel$n_nomiss[1], 100L)
+  expect_true(all(result$ref_panel$variance > 0))
+})
+
+test_that("load_LD_from_genotype falls back to computed AF without .afreq", {
+  skip_if_not_installed("VariantAnnotation")
+  vcf_path <- file.path(geno_test_data_dir, "test_variants.vcf.gz")
+  result <- suppressWarnings(
+    pecotmr:::load_LD_from_genotype(vcf_path, geno_region_all)
+  )
+  expect_true(is.matrix(result$LD_matrix))
+  expect_equal(nrow(result$LD_matrix), 349L)
+  expect_true(isSymmetric(result$LD_matrix))
+  # Allele frequencies computed from genotypes
+  expect_true("allele_freq" %in% names(result$ref_panel))
+  expect_true(all(result$ref_panel$allele_freq > 0))
+  expect_true(all(result$ref_panel$allele_freq < 1))
+})
+
+test_that("load_LD_from_genotype works with GDS files", {
+  skip_if_not_installed("SNPRelate")
+  skip_if_not_installed("gdsfmt")
+  gds_path <- file.path(geno_test_data_dir, "test_variants.gds")
+  result <- pecotmr:::load_LD_from_genotype(gds_path, geno_region_all)
+  expect_true(is.matrix(result$LD_matrix))
+  expect_equal(nrow(result$LD_matrix), 349L)
+  expect_true(isSymmetric(result$LD_matrix))
+})
+
+test_that("load_LD_from_genotype .afreq and computed AF are consistent", {
+  skip_if_not_installed("pgenlibr")
+  skip_if_not_installed("SNPRelate")
+  skip_if_not_installed("gdsfmt")
+  plink_prefix <- file.path(geno_test_data_dir, "test_variants")
+  gds_path <- file.path(geno_test_data_dir, "test_variants.gds")
+  res_afreq <- pecotmr:::load_LD_from_genotype(plink_prefix, geno_region_all)
+  res_computed <- pecotmr:::load_LD_from_genotype(gds_path, geno_region_all)
+  # Allele frequencies should be close (same data, different source)
+  expect_true(max(abs(res_afreq$ref_panel$allele_freq - res_computed$ref_panel$allele_freq)) < 0.01)
+})
+
+# ===========================================================================
+# load_LD_matrix with real genotype fixtures via metadata
+# ===========================================================================
+
+test_that("load_LD_matrix dispatches to PLINK2 genotype source", {
+  skip_if_not_installed("pgenlibr")
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_ldmat_p2_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("21", "0", "0", "test_variants", sep = "\t"), "\n",
+      file = meta_file, append = TRUE)
+  result <- load_LD_matrix(meta_file, geno_region_all)
+  expect_true(is.matrix(result$LD_matrix))
+  expect_equal(nrow(result$LD_matrix), 349L)
+  expect_false(result$is_genotype)
+})
+
+test_that("load_LD_matrix dispatches to VCF genotype source", {
+  skip_if_not_installed("VariantAnnotation")
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_ldmat_vcf_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("21", "0", "0", "test_variants.vcf.gz", sep = "\t"), "\n",
+      file = meta_file, append = TRUE)
+  result <- suppressWarnings(load_LD_matrix(meta_file, geno_region_all))
+  expect_true(is.matrix(result$LD_matrix))
+  expect_equal(nrow(result$LD_matrix), 349L)
+})
+
+test_that("load_LD_matrix dispatches to GDS genotype source", {
+  skip_if_not_installed("SNPRelate")
+  skip_if_not_installed("gdsfmt")
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_ldmat_gds_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("21", "0", "0", "test_variants.gds", sep = "\t"), "\n",
+      file = meta_file, append = TRUE)
+  result <- load_LD_matrix(meta_file, geno_region_all)
+  expect_true(is.matrix(result$LD_matrix))
+  expect_equal(nrow(result$LD_matrix), 349L)
+})
+
+test_that("load_LD_matrix return_genotype='auto' returns X for genotype source", {
+  skip_if_not_installed("pgenlibr")
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_ldmat_auto_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("21", "0", "0", "test_variants", sep = "\t"), "\n",
+      file = meta_file, append = TRUE)
+  result <- load_LD_matrix(meta_file, geno_region_all, return_genotype = "auto")
+  expect_true(result$is_genotype)
+  expect_equal(nrow(result$LD_matrix), 100L)  # samples
+  expect_equal(ncol(result$LD_matrix), 349L)  # variants
+})
+
+test_that("load_LD_matrix return_genotype=TRUE errors for precomputed", {
+  meta_file <- gsub("//", "/", tempfile(pattern = "ld_meta_file", tmpdir = tempdir(), fileext = ".tsv"))
+  on.exit(unlink(meta_file), add = TRUE)
+  meta_df <- data.frame(
+    chrom = "chr1", start = 1000, end = 1200,
+    path = paste0(
+      "./test_data/LD_block_1.chr1_1000_1200.float16.txt.xz,",
+      "./test_data/LD_block_1.chr1_1000_1200.float16.bim"
+    )
+  )
+  write_delim(meta_df, meta_file, delim = "\t")
+  region <- data.frame(chrom = "chr1", start = 1000, end = 1190)
+  expect_error(
+    load_LD_matrix(meta_file, region, return_genotype = TRUE),
+    "genotype files"
+  )
+})
+
+# ===========================================================================
+# resolve_ld_source: PLINK1 detection
+# ===========================================================================
+
+test_that("resolve_ld_source detects PLINK1 from metadata", {
+  skip_if_not_installed("snpStats")
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_resolve_p1_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("22", "0", "0", "protocol_example.genotype", sep = "\t"), "\n",
+      file = meta_file, append = TRUE)
+  result <- pecotmr:::resolve_ld_source(meta_file)
+  expect_equal(result$type, "plink1")
+})
+
+# ===========================================================================
+# load_LD_matrix: precomputed blocks via real .cor.xz fixtures
+# ===========================================================================
+
+test_that("load_LD_matrix loads single precomputed block", {
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_precomp_single_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("1", "1000", "1200",
+            "LD_block_1.chr1_1000_1200.float16.txt.xz,LD_block_1.chr1_1000_1200.float16.bim",
+            sep = "\t"), "\n", file = meta_file, append = TRUE)
+  result <- load_LD_matrix(meta_file, "chr1:1000-1190")
+  expect_true(is.matrix(result$LD_matrix))
+  expect_equal(nrow(result$LD_matrix), 5L)
+  expect_true(isSymmetric(result$LD_matrix))
+  expect_equal(length(result$LD_variants), 5L)
+  expect_true(all(grepl("^chr1:", result$LD_variants)))
+  expect_false(result$is_genotype)
+  # block_metadata should have one block
+  expect_equal(nrow(result$block_metadata), 1L)
+  # ref_panel should have variant info
+  expect_true("chrom" %in% names(result$ref_panel))
+  expect_true("pos" %in% names(result$ref_panel))
+})
+
+test_that("load_LD_matrix loads multiple precomputed blocks", {
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_precomp_multi_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  lines <- c(
+    paste("chrom", "start", "end", "path", sep = "\t"),
+    paste("1", "1000", "1200",
+          "LD_block_1.chr1_1000_1200.float16.txt.xz,LD_block_1.chr1_1000_1200.float16.bim",
+          sep = "\t"),
+    paste("1", "1200", "1400",
+          "LD_block_2.chr1_1200_1400.float16.txt.xz,LD_block_2.chr1_1200_1400.float16.bim",
+          sep = "\t"),
+    paste("1", "1400", "1600",
+          "LD_block_3.chr1_1400_1600.float16.txt.xz,LD_block_3.chr1_1400_1600.float16.bim",
+          sep = "\t")
+  )
+  writeLines(lines, meta_file)
+  result <- load_LD_matrix(meta_file, "chr1:1000-1500")
+  expect_true(is.matrix(result$LD_matrix))
+  # Should span blocks 1-3: 5 + 5 + 5 = 15 unique variants (no overlap in variant IDs)
+  expect_true(nrow(result$LD_matrix) >= 10)
+  expect_true(isSymmetric(result$LD_matrix))
+  expect_true(nrow(result$block_metadata) >= 2)
+})
+
+test_that("load_LD_matrix with n_sample for precomputed blocks with freq data", {
+  # The 9-column bim format includes allele_freq, variance, n_nomiss;
+  # the 6-column bim does not. With 6-col bim and no allele_freq,
+  # n_sample cannot compute variance — ref_panel has base columns only.
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_precomp_nsamp_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("1", "1000", "1200",
+            "LD_block_1.chr1_1000_1200.float16.txt.xz,LD_block_1.chr1_1000_1200.float16.bim",
+            sep = "\t"), "\n", file = meta_file, append = TRUE)
+  result <- load_LD_matrix(meta_file, "chr1:1000-1190", n_sample = 500L)
+  # ref_panel should always have basic variant info
+  expect_true(all(c("chrom", "pos", "A2", "A1", "variant_id") %in% names(result$ref_panel)))
+  # 6-col bim lacks allele_freq so variance computation is skipped
+  expect_false("variance" %in% names(result$ref_panel))
+})
+
+# ===========================================================================
+# process_LD_matrix: real .cor.xz fixtures
+# ===========================================================================
+
+test_that("process_LD_matrix reads .cor.xz with explicit bim path", {
+  ld_file <- file.path(geno_test_data_dir, "LD_block_1.chr1_1000_1200.float16.txt.xz")
+  bim_file <- file.path(geno_test_data_dir, "LD_block_1.chr1_1000_1200.float16.bim")
+  result <- pecotmr:::process_LD_matrix(ld_file, bim_file)
+  expect_true(is.list(result))
+  expect_true(is.matrix(result$LD_matrix))
+  expect_equal(nrow(result$LD_matrix), 5L)
+  expect_equal(ncol(result$LD_matrix), 5L)
+  expect_true(isSymmetric(result$LD_matrix))
+  # Diagonal should be 1
+  expect_true(all(abs(diag(result$LD_matrix) - 1) < 1e-4))
+  # Variant names should be chr:pos:A2:A1 format
+  expect_true(all(grepl("^chr1:", rownames(result$LD_matrix))))
+  # LD_variants data frame
+  expect_true(is.data.frame(result$LD_variants))
+  expect_true("variants" %in% names(result$LD_variants))
+  expect_equal(nrow(result$LD_variants), 5L)
+})
+
+test_that("process_LD_matrix reads different blocks consistently", {
+  bim1 <- file.path(geno_test_data_dir, "LD_block_1.chr1_1000_1200.float16.bim")
+  bim2 <- file.path(geno_test_data_dir, "LD_block_2.chr1_1200_1400.float16.bim")
+  ld1 <- file.path(geno_test_data_dir, "LD_block_1.chr1_1000_1200.float16.txt.xz")
+  ld2 <- file.path(geno_test_data_dir, "LD_block_2.chr1_1200_1400.float16.txt.xz")
+  r1 <- pecotmr:::process_LD_matrix(ld1, bim1)
+  r2 <- pecotmr:::process_LD_matrix(ld2, bim2)
+  # Different blocks should have different variant positions
+  expect_false(any(rownames(r1$LD_matrix) %in% rownames(r2$LD_matrix)))
+})
+
+test_that("process_LD_matrix reads 9-column bim with allele_freq/variance/n_nomiss", {
+  ld_file <- file.path(geno_test_data_dir, "LD_block_1.chr1_1000_1200.float16.txt.xz")
+  bim_file <- file.path(geno_test_data_dir, "LD_block_1.chr1_1000_1200.float16.9col.bim")
+  result <- pecotmr:::process_LD_matrix(ld_file, bim_file)
+  expect_equal(nrow(result$LD_matrix), 5L)
+  expect_true(isSymmetric(result$LD_matrix))
+  # 9-column bim should include extra columns
+  expect_true("allele_freq" %in% names(result$LD_variants))
+  expect_true("variance" %in% names(result$LD_variants))
+  expect_true("n_nomiss" %in% names(result$LD_variants))
+  expect_equal(result$LD_variants$allele_freq, c(0.3, 0.4, 0.2, 0.5, 0.15))
+  expect_equal(result$LD_variants$n_nomiss, rep(500, 5))
+})
+
+test_that("load_LD_matrix propagates allele_freq/variance/n_nomiss from 9-col bim", {
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_precomp_9col_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file)
+  cat(paste("1", "1000", "1200",
+            "LD_block_1.chr1_1000_1200.float16.txt.xz,LD_block_1.chr1_1000_1200.float16.9col.bim",
+            sep = "\t"), "\n", file = meta_file, append = TRUE)
+  result <- load_LD_matrix(meta_file, "chr1:1000-1190")
+  # ref_panel should carry the extra columns from the 9-col bim
+  expect_true("allele_freq" %in% names(result$ref_panel))
+  expect_true("variance" %in% names(result$ref_panel))
+  expect_true("n_nomiss" %in% names(result$ref_panel))
+  expect_equal(result$ref_panel$allele_freq, c(0.3, 0.4, 0.2, 0.5, 0.15))
+  expect_equal(result$ref_panel$n_nomiss, rep(500, 5))
+})
+
+# ===========================================================================
+# get_regional_ld_meta: real .cor.xz fixtures
+# ===========================================================================
+
+test_that("get_regional_ld_meta returns correct file paths for single block", {
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_regional_single_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  lines <- c(
+    paste("chrom", "start", "end", "path", sep = "\t"),
+    paste("1", "1000", "1200",
+          "LD_block_1.chr1_1000_1200.float16.txt.xz,LD_block_1.chr1_1000_1200.float16.bim",
+          sep = "\t"),
+    paste("1", "1200", "1400",
+          "LD_block_2.chr1_1200_1400.float16.txt.xz,LD_block_2.chr1_1200_1400.float16.bim",
+          sep = "\t")
+  )
+  writeLines(lines, meta_file)
+  result <- pecotmr:::get_regional_ld_meta(meta_file, "chr1:1050-1150")
+  expect_true(is.list(result))
+  expect_true(length(result$intersections$LD_file_paths) >= 1)
+  # All returned paths should exist
+  expect_true(all(file.exists(result$intersections$LD_file_paths)))
+  expect_true(all(file.exists(result$intersections$bim_file_paths)))
+})
+
+test_that("get_regional_ld_meta spans multiple blocks for wide region", {
+  meta_file <- file.path(geno_test_data_dir, "ld_meta_regional_multi_tmp.tsv")
+  on.exit(unlink(meta_file), add = TRUE)
+  lines <- c(
+    paste("chrom", "start", "end", "path", sep = "\t"),
+    paste("1", "1000", "1200",
+          "LD_block_1.chr1_1000_1200.float16.txt.xz,LD_block_1.chr1_1000_1200.float16.bim",
+          sep = "\t"),
+    paste("1", "1200", "1400",
+          "LD_block_2.chr1_1200_1400.float16.txt.xz,LD_block_2.chr1_1200_1400.float16.bim",
+          sep = "\t"),
+    paste("1", "1400", "1600",
+          "LD_block_3.chr1_1400_1600.float16.txt.xz,LD_block_3.chr1_1400_1600.float16.bim",
+          sep = "\t")
+  )
+  writeLines(lines, meta_file)
+  result <- pecotmr:::get_regional_ld_meta(meta_file, "chr1:1000-1500")
+  expect_true(length(result$intersections$LD_file_paths) >= 2)
+})
+
+# ===========================================================================
+# drop_collinear_columns: strategy variants
+# ===========================================================================
+
+test_that("drop_collinear_columns variance strategy removes lowest-variance column", {
+  set.seed(42)
+  X <- matrix(rnorm(100 * 4), 100, 4)
+  colnames(X) <- c("a", "b", "c", "d")
+  # Make column "c" have near-zero variance
+  X[, "c"] <- X[1, "c"]
+  result <- pecotmr:::drop_collinear_columns(
+    X, c("b", "c", "d"), strategy = "variance"
+  )
+  expect_false("c" %in% colnames(result))
+  expect_equal(ncol(result), 3L)
+})
+
+test_that("drop_collinear_columns response_correlation strategy works", {
+  set.seed(42)
+  X <- matrix(rnorm(100 * 3), 100, 3)
+  colnames(X) <- c("a", "b", "c")
+  y <- X[, "a"] + rnorm(100, sd = 0.1)  # y correlates strongly with "a"
+  result <- pecotmr:::drop_collinear_columns(
+    X, c("a", "b", "c"), strategy = "response_correlation", response = y
+  )
+  # Should keep "a" (highest |cor| with response) and remove one of b/c
+  expect_true("a" %in% colnames(result))
+  expect_equal(ncol(result), 2L)
+})
+
+test_that("drop_collinear_columns response_correlation errors without response", {
+  X <- matrix(1:12, 4, 3)
+  colnames(X) <- c("a", "b", "c")
+  expect_error(
+    pecotmr:::drop_collinear_columns(X, c("a", "b"), strategy = "response_correlation"),
+    "response must be supplied"
+  )
+})
+
+test_that("drop_collinear_columns with single problematic column removes it", {
+  X <- matrix(rnorm(40), 10, 4)
+  colnames(X) <- c("a", "b", "c", "d")
+  result <- pecotmr:::drop_collinear_columns(X, "b", strategy = "correlation")
+  expect_false("b" %in% colnames(result))
+  expect_equal(ncol(result), 3L)
+})
+
+# ===========================================================================
+# enforce_design_full_rank: additional strategies and fallback paths
+# ===========================================================================
+
+test_that("enforce_design_full_rank variance strategy produces full rank", {
+  set.seed(42)
+  X <- matrix(rnorm(100 * 4), 100, 4)
+  X[, 4] <- X[, 1] + X[, 2]  # rank deficient
+  colnames(X) <- c("a", "b", "c", "d")
+  C <- matrix(rnorm(100), 100, 1)
+  result <- enforce_design_full_rank(X, C, strategy = "variance")
+  full_design <- cbind(1, result, C)
+  expect_equal(qr(full_design)$rank, ncol(full_design))
+  expect_true(ncol(result) < ncol(X))
+})
+
+test_that("enforce_design_full_rank response_correlation strategy works", {
+  set.seed(42)
+  X <- matrix(rnorm(100 * 4), 100, 4)
+  X[, 4] <- X[, 1] + X[, 2]
+  colnames(X) <- c("a", "b", "c", "d")
+  C <- matrix(rnorm(100), 100, 1)
+  y <- X[, "a"] + rnorm(100, sd = 0.1)
+  result <- enforce_design_full_rank(X, C, strategy = "response_correlation", response = y)
+  full_design <- cbind(1, result, C)
+  expect_equal(qr(full_design)$rank, ncol(full_design))
+})
+
+test_that("enforce_design_full_rank returns unchanged X when already full rank", {
+  set.seed(42)
+  X <- matrix(rnorm(100 * 3), 100, 3)
+  colnames(X) <- c("a", "b", "c")
+  C <- matrix(rnorm(100), 100, 1)
+  result <- enforce_design_full_rank(X, C, strategy = "correlation")
+  expect_equal(ncol(result), ncol(X))
+})
+
+test_that("enforce_design_full_rank fallback to correlation pruning works", {
+  set.seed(42)
+  n <- 50
+  p <- 10
+  X <- matrix(rnorm(n * 3), n, 3)
+  # Create highly collinear columns that are hard for iterative removal
+  X <- cbind(X, X[, 1] + rnorm(n, sd = 1e-10),
+                 X[, 2] + rnorm(n, sd = 1e-10),
+                 X[, 3] + rnorm(n, sd = 1e-10),
+                 X[, 1] + X[, 2] + rnorm(n, sd = 1e-10))
+  colnames(X) <- paste0("v", seq_len(ncol(X)))
+  C <- matrix(rnorm(n), n, 1)
+  result <- enforce_design_full_rank(X, C, strategy = "correlation",
+                                      max_iterations = 2L)
+  full_design <- cbind(1, result, C)
+  expect_equal(qr(full_design)$rank, ncol(full_design))
+})
+
+# ===========================================================================
+# ld_clump_by_score: edge cases
+# ===========================================================================
+
+test_that("ld_clump_by_score errors on empty matrix", {
+  skip_if_not_installed("bigsnpr")
+  skip_if_not_installed("bigstatsr")
+  X <- matrix(numeric(0), nrow = 10, ncol = 0)
+  expect_error(ld_clump_by_score(X, score = numeric(0), chr = integer(0), pos = integer(0)),
+               "at least one column")
+})
+
+test_that("ld_clump_by_score returns 1L for single variant", {
+  skip_if_not_installed("bigsnpr")
+  skip_if_not_installed("bigstatsr")
+  X <- matrix(c(0, 1, 2, 1, 0), ncol = 1)
+  result <- ld_clump_by_score(X, score = 1.0, chr = 1L, pos = 100L)
+  expect_equal(result, 1L)
+})
+
+test_that("ld_clump_by_score errors on mismatched score length", {
+  skip_if_not_installed("bigsnpr")
+  skip_if_not_installed("bigstatsr")
+  X <- matrix(rnorm(20), 5, 4)
+  expect_error(ld_clump_by_score(X, score = c(1, 2), chr = rep(1L, 4), pos = 1:4),
+               "length\\(score\\)")
+})
+
+test_that("ld_clump_by_score errors on mismatched chr/pos length", {
+  skip_if_not_installed("bigsnpr")
+  skip_if_not_installed("bigstatsr")
+  X <- matrix(rnorm(20), 5, 4)
+  expect_error(ld_clump_by_score(X, score = runif(4), chr = rep(1L, 2), pos = 1:4),
+               "chr and pos")
+})
+
+# ===========================================================================
+# ld_prune_by_correlation: verbose paths
+# ===========================================================================
+
+test_that("ld_prune_by_correlation verbose reports pruning", {
+  # Create matrix with correlated columns
+  set.seed(42)
+  base <- rnorm(100)
+  X <- cbind(base, base + rnorm(100, sd = 0.1), rnorm(100), rnorm(100), rnorm(100))
+  colnames(X) <- paste0("v", 1:5)
+  expect_message(
+    ld_prune_by_correlation(X, cor_thres = 0.5, verbose = TRUE),
+    "pruned"
+  )
+})
+
+test_that("ld_prune_by_correlation verbose reports no pruning", {
+  # Create a small matrix with no correlated columns
+  set.seed(42)
+  X <- matrix(rnorm(500), 100, 5)
+  colnames(X) <- paste0("v", 1:5)
+  expect_message(
+    ld_prune_by_correlation(X, cor_thres = 0.999, verbose = TRUE),
+    "no columns pruned"
+  )
 })