From ef7b83cf7763ec3f2aa694deec510f74ebb9db18 Mon Sep 17 00:00:00 2001
From: Drew Herren <drewherrenopensource@gmail.com>
Date: Sun, 8 Jun 2025 14:36:17 -0600
Subject: [PATCH 01/12] Added support for weighted sampling without replacement
 in C++ with R wrappers and benchmark code

---
 NAMESPACE                                     |  1 +
 R/cpp11.R                                     |  4 ++
 R/model.R                                     | 18 +++++
 include/stochtree/discrete_sampler.h          | 65 +++++++++++++++++++
 man/sample_without_replacement.Rd             | 31 +++++++++
 src/cpp11.cpp                                 |  8 +++
 src/sampler.cpp                               | 38 ++++++++++-
 src/stochtree_types.h                         |  1 +
 tools/debug/sampling_without_replacement.R    |  7 ++
 .../sampling_without_replacement_comparison.R | 23 +++++++
 ...pling_without_replacement_microbenchmark.R | 13 ++++
 11 files changed, 208 insertions(+), 1 deletion(-)
 create mode 100644 include/stochtree/discrete_sampler.h
 create mode 100644 man/sample_without_replacement.Rd
 create mode 100644 tools/debug/sampling_without_replacement.R
 create mode 100644 tools/perf/sampling_without_replacement_comparison.R
 create mode 100644 tools/perf/sampling_without_replacement_microbenchmark.R
diff --git a/NAMESPACE b/NAMESPACE
index a78a5123..bad6d23c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -57,6 +57,7 @@ export(rootResetRandomEffectsModel)
 export(rootResetRandomEffectsTracker)
 export(sampleGlobalErrorVarianceOneIteration)
 export(sampleLeafVarianceOneIteration)
+export(sample_without_replacement)
 export(saveBARTModelToJson)
 export(saveBARTModelToJsonFile)
 export(saveBARTModelToJsonString)
diff --git a/R/cpp11.R b/R/cpp11.R
index 2260cad6..c6f147ba 100644
--- a/R/cpp11.R
+++ b/R/cpp11.R
@@ -640,6 +640,10 @@ forest_tracker_cpp <- function(data, feature_types, num_trees, n) {
   .Call(`_stochtree_forest_tracker_cpp`, data, feature_types, num_trees, n)
 }
 
+sample_without_replacement_integer_cpp <- function(population_vector, sampling_probs, sample_size) {
+  .Call(`_stochtree_sample_without_replacement_integer_cpp`, population_vector, sampling_probs, sample_size)
+}
+
 init_json_cpp <- function() {
   .Call(`_stochtree_init_json_cpp`)
 }
diff --git a/R/model.R b/R/model.R
index dfa88fc9..9519591c 100644
--- a/R/model.R
+++ b/R/model.R
@@ -271,3 +271,21 @@ createForestModel <- function(forest_dataset, forest_model_config, global_model_
     )))
 }
 
+
+#' Draw `sample_size` samples from `population_vector` without replacement, weighted by `sampling_probabilities`
+#'
+#' @param population_vector Vector from which to draw samples.
+#' @param sampling_probabilities Vector of probabilities of drawing each element of `population_vector`.
+#' @param sample_size Number of samples to draw from `population_vector`. Must be less than or equal to `length(population_vector)`
+#'
+#' @returns Vector of size `sample_size`
+#' @export
+#'
+#' @examples
+#' a <- c(4,3,2,5,1,9,7)
+#' p <- c(0.7,0.2,0.05,0.02,0.01,0.01,0.01)
+#' num_samples <- 5
+#' sample_without_replacement(a, p, num_samples)
+sample_without_replacement <- function(population_vector, sampling_probabilities, sample_size) {
+    return(sample_without_replacement_integer_cpp(population_vector, sampling_probabilities, sample_size))
+}
diff --git a/include/stochtree/discrete_sampler.h b/include/stochtree/discrete_sampler.h
new file mode 100644
index 00000000..c2a55882
--- /dev/null
+++ b/include/stochtree/discrete_sampler.h
@@ -0,0 +1,65 @@
+/*!
+ * Copyright (c) 2024 stochtree authors. All rights reserved.
+ * Licensed under the MIT License. See LICENSE file in the project root for license information.
+ */
+#ifndef STOCHTREE_DISCRETE_SAMPLER_H_
+#define STOCHTREE_DISCRETE_SAMPLER_H_
+#include <algorithm>
+#include <numeric>
+#include <random>
+#include <vector>
+
+namespace StochTree {
+
+/*! \brief Sample without replacement according to a set of probability weights. 
+ * This template function is a C++ variant of numpy's implementation:
+ * https://github.com/numpy/numpy/blob/031f44252d613f4524ad181e3eb2ae2791e22187/numpy/random/_generator.pyx#L925
+ */
+template <typename container_type, typename prob_type>
+void sample_without_replacement(container_type* output, prob_type* p, container_type* a, int population_size, int sample_size, std::mt19937& gen) {
+  std::vector<prob_type> p_copy(population_size);
+  std::memcpy(p_copy.data(), p, sizeof(prob_type) * population_size);
+  std::vector<int> indices(sample_size);
+  std::uniform_real_distribution<> unif(0.0, 1.0);
+  std::vector<prob_type> unif_samples(sample_size);
+  std::vector<prob_type> cdf(population_size);
+  
+  int fulfilled_sample_count = 0;
+  int remaining_sample_count = sample_size - fulfilled_sample_count;
+  while (fulfilled_sample_count < sample_size) {
+    if (fulfilled_sample_count > 0) {
+      for (int i = 0; i < fulfilled_sample_count; i++) p_copy[indices[i]] = 0.0;
+    }
+    std::generate(unif_samples.begin(), unif_samples.begin() + remaining_sample_count, [&gen, &unif](){
+      return unif(gen);
+    });
+    std::partial_sum(p_copy.cbegin(), p_copy.cend(), cdf.begin());
+    for (int i = 0; i < cdf.size(); i++) {
+      cdf[i] = cdf[i] / cdf[cdf.size()-1];
+    }
+    std::vector<int> matches(remaining_sample_count);
+    for (int i = 0; i < remaining_sample_count; i++) {
+      auto match = std::upper_bound(cdf.cbegin(), cdf.cend(), unif_samples[i]);
+      if (match != cdf.cend()) {
+        matches[i] = std::distance(cdf.cbegin(), match);
+      } else {
+        matches[i] = std::distance(cdf.cbegin(), cdf.cend());
+      }
+    }
+    std::sort(matches.begin(), matches.end());
+    auto last_unique = std::unique(matches.begin(), matches.end());
+    matches.erase(last_unique, matches.end());
+    for (int i = 0; i < matches.size(); i++) {
+      indices[fulfilled_sample_count + i] = matches[i];
+    }
+    fulfilled_sample_count += matches.size();
+    remaining_sample_count -= matches.size();
+  }
+  for (int i = 0; i < sample_size; i++) {
+    output[i] = a[indices[i]];
+  }
+}
+
+}
+
+#endif // STOCHTREE_DISCRETE_SAMPLER_H_
diff --git a/man/sample_without_replacement.Rd b/man/sample_without_replacement.Rd
new file mode 100644
index 00000000..36041e7d
--- /dev/null
+++ b/man/sample_without_replacement.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/model.R
+\name{sample_without_replacement}
+\alias{sample_without_replacement}
+\title{Draw \code{sample_size} samples from \code{population_vector} without replacement, weighted by \code{sampling_probabilities}}
+\usage{
+sample_without_replacement(
+  population_vector,
+  sampling_probabilities,
+  sample_size
+)
+}
+\arguments{
+\item{population_vector}{Vector from which to draw samples.}
+
+\item{sampling_probabilities}{Vector of probabilities of drawing each element of \code{population_vector}.}
+
+\item{sample_size}{Number of samples to draw from \code{population_vector}. Must be less than or equal to \code{length(population_vector)}}
+}
+\value{
+Vector of size \code{sample_size}
+}
+\description{
+Draw \code{sample_size} samples from \code{population_vector} without replacement, weighted by \code{sampling_probabilities}
+}
+\examples{
+a <- c(4,3,2,5,1,9,7)
+p <- c(0.7,0.2,0.05,0.02,0.01,0.01,0.01)
+num_samples <- 5
+sample_without_replacement(a, p, num_samples)
+}
diff --git a/src/cpp11.cpp b/src/cpp11.cpp
index d32eba1f..362656fa 100644
--- a/src/cpp11.cpp
+++ b/src/cpp11.cpp
@@ -1186,6 +1186,13 @@ extern "C" SEXP _stochtree_forest_tracker_cpp(SEXP data, SEXP feature_types, SEX
     return cpp11::as_sexp(forest_tracker_cpp(cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestDataset>>>(data), cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(feature_types), cpp11::as_cpp<cpp11::decay_t<int>>(num_trees), cpp11::as_cpp<cpp11::decay_t<StochTree::data_size_t>>(n)));
   END_CPP11
 }
+// sampler.cpp
+cpp11::writable::integers sample_without_replacement_integer_cpp(cpp11::integers population_vector, cpp11::doubles sampling_probs, int sample_size);
+extern "C" SEXP _stochtree_sample_without_replacement_integer_cpp(SEXP population_vector, SEXP sampling_probs, SEXP sample_size) {
+  BEGIN_CPP11
+    return cpp11::as_sexp(sample_without_replacement_integer_cpp(cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(population_vector), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles>>(sampling_probs), cpp11::as_cpp<cpp11::decay_t<int>>(sample_size)));
+  END_CPP11
+}
 // serialization.cpp
 cpp11::external_pointer<nlohmann::json> init_json_cpp();
 extern "C" SEXP _stochtree_init_json_cpp() {
@@ -1673,6 +1680,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_stochtree_sample_mcmc_one_iteration_cpp",                       (DL_FUNC) &_stochtree_sample_mcmc_one_iteration_cpp,                       17},
     {"_stochtree_sample_sigma2_one_iteration_cpp",                     (DL_FUNC) &_stochtree_sample_sigma2_one_iteration_cpp,                      5},
     {"_stochtree_sample_tau_one_iteration_cpp",                        (DL_FUNC) &_stochtree_sample_tau_one_iteration_cpp,                         4},
+    {"_stochtree_sample_without_replacement_integer_cpp",              (DL_FUNC) &_stochtree_sample_without_replacement_integer_cpp,               3},
     {"_stochtree_set_leaf_value_active_forest_cpp",                    (DL_FUNC) &_stochtree_set_leaf_value_active_forest_cpp,                     2},
     {"_stochtree_set_leaf_value_forest_container_cpp",                 (DL_FUNC) &_stochtree_set_leaf_value_forest_container_cpp,                  2},
     {"_stochtree_set_leaf_vector_active_forest_cpp",                   (DL_FUNC) &_stochtree_set_leaf_vector_active_forest_cpp,                    2},
diff --git a/src/sampler.cpp b/src/sampler.cpp
index 514ae006..30ff2540 100644
--- a/src/sampler.cpp
+++ b/src/sampler.cpp
@@ -1,6 +1,7 @@
 #include <cpp11.hpp>
 #include "stochtree_types.h"
 #include <stochtree/container.h>
+#include <stochtree/discrete_sampler.h>
 #include <stochtree/leaf_model.h>
 #include <stochtree/meta.h>
 #include <stochtree/partition_tracker.h>
@@ -281,4 +282,39 @@ cpp11::external_pointer<StochTree::ForestTracker> forest_tracker_cpp(cpp11::exte
     
     // Release management of the pointer to R session
     return cpp11::external_pointer<StochTree::ForestTracker>(tracker_ptr_.release());
-}
\ No newline at end of file
+}
+
+[[cpp11::register]]
+cpp11::writable::integers sample_without_replacement_integer_cpp(
+    cpp11::integers population_vector, 
+    cpp11::doubles sampling_probs, 
+    int sample_size
+) {
+    // Unpack pointer to population vector
+    int population_size = population_vector.size();
+    int* population_vector_ptr = INTEGER(PROTECT(population_vector));
+
+    // Unpack pointer to sampling probabilities
+    double* sampling_probs_ptr = REAL(PROTECT(sampling_probs));
+
+    // Create output vector
+    cpp11::writable::integers output(sample_size);
+    
+    // Unpack pointer to output vector
+    int* output_ptr = INTEGER(PROTECT(output));
+
+    // Create C++ RNG
+    std::random_device rd;
+    std::mt19937 gen(rd());
+  
+    // Run the sampler
+    StochTree::sample_without_replacement<int, double>(
+        output_ptr, sampling_probs_ptr, population_vector_ptr, population_size, sample_size, gen
+    );
+
+    // Unprotect raw pointers
+    UNPROTECT(3);
+
+    // Return result
+    return(output);
+}
diff --git a/src/stochtree_types.h b/src/stochtree_types.h
index 33a8fb61..d3d6327c 100644
--- a/src/stochtree_types.h
+++ b/src/stochtree_types.h
@@ -1,5 +1,6 @@
 #include <stochtree/container.h>
 #include <stochtree/data.h>
+#include <stochtree/discrete_sampler.h>
 #include <stochtree/leaf_model.h>
 #include <stochtree/meta.h>
 #include <stochtree/partition_tracker.h>
diff --git a/tools/debug/sampling_without_replacement.R b/tools/debug/sampling_without_replacement.R
new file mode 100644
index 00000000..9f32081e
--- /dev/null
+++ b/tools/debug/sampling_without_replacement.R
@@ -0,0 +1,7 @@
+library(stochtree)
+
+# Run sampler
+a <- c(4,3,2,5,1,9,7)
+p <- c(0.7,0.2,0.05,0.02,0.01,0.01,0.01)
+num_samples <- 5
+sample_without_replacement(as.integer(a), p, num_samples)
diff --git a/tools/perf/sampling_without_replacement_comparison.R b/tools/perf/sampling_without_replacement_comparison.R
new file mode 100644
index 00000000..1dc9c60a
--- /dev/null
+++ b/tools/perf/sampling_without_replacement_comparison.R
@@ -0,0 +1,23 @@
+library(stochtree)
+
+a <- c(4,3,2,5,1,9,7)
+p <- c(0.7,0.2,0.05,0.02,0.01,0.01,0.01)
+num_mc <- 100000
+num_samples <- 5
+results_stochtree <- matrix(NA_integer_, nrow = num_mc, ncol = num_samples)
+results_base_R <- matrix(NA_integer_, nrow = num_mc, ncol = num_samples)
+
+for (i in 1:num_mc) {
+    results_stochtree[i,] <- sample_without_replacement(as.integer(a), p, num_samples)
+    results_base_R[i,] <- sample(a, num_samples, replace = F, prob = p)
+}
+
+
+count_elems_stochtree <- rep(NA_integer_, length(a))
+count_elems_base_R <- rep(NA_integer_, length(a))
+for (i in 1:length(a)) {
+    count_elems_stochtree[i] <- sum(rowSums(results_stochtree == a[i]))
+    count_elems_base_R[i] <- sum(rowSums(results_base_R == a[i]))
+}
+count_elems_stochtree
+count_elems_base_R
diff --git a/tools/perf/sampling_without_replacement_microbenchmark.R b/tools/perf/sampling_without_replacement_microbenchmark.R
new file mode 100644
index 00000000..254ed05a
--- /dev/null
+++ b/tools/perf/sampling_without_replacement_microbenchmark.R
@@ -0,0 +1,13 @@
+library(microbenchmark)
+library(stochtree)
+
+# Run microbenchmark
+num_elements <- 10000
+a <- 1:num_elements
+p <- runif(num_elements)
+p <- p / sum(p)
+num_samples <- 500
+(bench_results <- microbenchmark(
+    sample(a, num_samples, replace = F, prob = p),
+    sample_without_replacement(as.integer(a), p, num_samples)
+))

From d82432e383220659ff9bb001ce6f30caa512c35b Mon Sep 17 00:00:00 2001
From: Drew Herren <drewherrenopensource@gmail.com>
Date: Sun, 8 Jun 2025 14:54:00 -0600
Subject: [PATCH 02/12] Fixed R sampling without replacement example

---
 R/model.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/model.R b/R/model.R
index 9519591c..f1ea8e54 100644
--- a/R/model.R
+++ b/R/model.R
@@ -282,7 +282,7 @@ createForestModel <- function(forest_dataset, forest_model_config, global_model_
 #' @export
 #'
 #' @examples
-#' a <- c(4,3,2,5,1,9,7)
+#' a <- as.integer(c(4,3,2,5,1,9,7))
 #' p <- c(0.7,0.2,0.05,0.02,0.01,0.01,0.01)
 #' num_samples <- 5
 #' sample_without_replacement(a, p, num_samples)

From 1fe4101d1d0759f3e216a46406057bdb68b550c1 Mon Sep 17 00:00:00 2001
From: Drew Herren <drewherrenopensource@gmail.com>
Date: Sun, 8 Jun 2025 15:27:33 -0600
Subject: [PATCH 03/12] Updated docs

---
 man/sample_without_replacement.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/sample_without_replacement.Rd b/man/sample_without_replacement.Rd
index 36041e7d..8f23d8e1 100644
--- a/man/sample_without_replacement.Rd
+++ b/man/sample_without_replacement.Rd
@@ -24,7 +24,7 @@ Vector of size \code{sample_size}
 Draw \code{sample_size} samples from \code{population_vector} without replacement, weighted by \code{sampling_probabilities}
 }
 \examples{
-a <- c(4,3,2,5,1,9,7)
+a <- as.integer(c(4,3,2,5,1,9,7))
 p <- c(0.7,0.2,0.05,0.02,0.01,0.01,0.01)
 num_samples <- 5
 sample_without_replacement(a, p, num_samples)

From 4cbcc5a4169ad02dbb17c66a6f97c11f1a1250e8 Mon Sep 17 00:00:00 2001
From: Drew Herren <drewherrenopensource@gmail.com>
Date: Mon, 9 Jun 2025 16:05:09 -0500
Subject: [PATCH 04/12] Added support for GFR feature subsampling in the BART R
 API

---
 R/bart.R                                      | 24 +++++--
 R/config.R                                    | 42 +++++++++++-
 R/cpp11.R                                     |  4 +-
 R/model.R                                     |  3 +-
 include/stochtree/tree_sampler.h              | 47 ++++++++++----
 man/ForestModelConfig.Rd                      | 40 +++++++++++-
 man/bart.Rd                                   |  3 +
 man/createForestModelConfig.Rd                |  5 +-
 src/cpp11.cpp                                 |  8 +--
 src/sampler.cpp                               | 10 +--
 tools/debug/gfr_feature_subsample_debug.R     | 36 +++++++++++
 .../gfr_feature_subsample_microbenchmark.R    | 64 +++++++++++++++++++
 12 files changed, 252 insertions(+), 34 deletions(-)
 create mode 100644 tools/debug/gfr_feature_subsample_debug.R
 create mode 100644 tools/perf/gfr_feature_subsample_microbenchmark.R

diff --git a/R/bart.R b/R/bart.R
index 224432ed..6576c83e 100644
--- a/R/bart.R
+++ b/R/bart.R
@@ -59,6 +59,7 @@
 #'   - `keep_vars` Vector of variable names or column indices denoting variables that should be included in the forest. Default: `NULL`.
 #'   - `drop_vars` Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: `NULL`. If both `drop_vars` and `keep_vars` are set, `drop_vars` will be ignored.
 #'   - `probit_outcome_model` Whether or not the outcome should be modeled as explicitly binary via a probit link. If `TRUE`, `y` must only contain the values `0` and `1`. Default: `FALSE`.
+#'   - `num_features_subsample` How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features passed in the training dataset.
 #'
 #' @param variance_forest_params (Optional) A list of variance forest model parameters, each of which has a default value processed internally, so this argument list is optional.
 #'
@@ -73,6 +74,7 @@
 #'   - `var_forest_prior_scale` Scale parameter in the `IG(var_forest_prior_shape, var_forest_prior_scale)` conditional error variance model (which is only sampled if `num_trees > 0`). Calibrated internally as `num_trees / leaf_prior_calibration_param^2` if not set.
 #'   - `keep_vars` Vector of variable names or column indices denoting variables that should be included in the forest. Default: `NULL`.
 #'   - `drop_vars` Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: `NULL`. If both `drop_vars` and `keep_vars` are set, `drop_vars` will be ignored.
+#'   - `num_features_subsample` How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features passed in the training dataset.
 #'   
 #' @return List of sampling outputs and a wrapper around the sampled forests (which can be used for in-memory prediction on new data, or serialized to JSON on disk).
 #' @export
@@ -98,6 +100,7 @@
 #' X_train <- X[train_inds,]
 #' y_test <- y[test_inds]
 #' y_train <- y[train_inds]
+#' 
 #' bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test, 
 #'                    num_gfr = 10, num_burnin = 0, num_mcmc = 10)
 bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train = NULL, 
@@ -127,7 +130,8 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
         sample_sigma2_leaf = TRUE, sigma2_leaf_init = NULL, 
         sigma2_leaf_shape = 3, sigma2_leaf_scale = NULL, 
         keep_vars = NULL, drop_vars = NULL, 
-        probit_outcome_model = FALSE
+        probit_outcome_model = FALSE, 
+        num_features_subsample = NULL
     )
     mean_forest_params_updated <- preprocessParams(
         mean_forest_params_default, mean_forest_params
@@ -141,7 +145,8 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
         var_forest_leaf_init = NULL,
         var_forest_prior_shape = NULL, 
         var_forest_prior_scale = NULL, 
-        keep_vars = NULL, drop_vars = NULL
+        keep_vars = NULL, drop_vars = NULL, 
+        num_features_subsample = NULL
     )
     variance_forest_params_updated <- preprocessParams(
         variance_forest_params_default, variance_forest_params
@@ -176,6 +181,7 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
     keep_vars_mean <- mean_forest_params_updated$keep_vars
     drop_vars_mean <- mean_forest_params_updated$drop_vars
     probit_outcome_model <- mean_forest_params_updated$probit_outcome_model
+    num_features_subsample_mean <- mean_forest_params_updated$num_features_subsample
     
     # 3. Variance forest parameters
     num_trees_variance <- variance_forest_params_updated$num_trees
@@ -189,6 +195,7 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
     b_forest <- variance_forest_params_updated$var_forest_prior_scale
     keep_vars_variance <- variance_forest_params_updated$keep_vars
     drop_vars_variance <- variance_forest_params_updated$drop_vars
+    num_features_subsample_variance <- variance_forest_params_updated$num_features_subsample
     
     # Check if there are enough GFR samples to seed num_chains samplers
     if (num_gfr > 0) {
@@ -373,6 +380,15 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
         variable_weights_variance <- variable_weights_variance[original_var_indices]*variable_weights_adj
         variable_weights_variance[!(original_var_indices %in% variable_subset_variance)] <- 0
     }
+    
+    # Set num_features_subsample to default, ncol(X_train), if not already set
+    if (is.null(num_features_subsample_mean)) {
+        num_features_subsample_mean <- ncol(X_train)
+    }
+    if (is.null(num_features_subsample_variance)) {
+        num_features_subsample_variance <- ncol(X_train)
+    }
+    
 
     # Convert all input data to matrices if not already converted
     if ((is.null(dim(leaf_basis_train))) && (!is.null(leaf_basis_train))) {
@@ -633,7 +649,7 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
                                                             num_observations=nrow(X_train), variable_weights=variable_weights_mean, leaf_dimension=leaf_dimension, 
                                                             alpha=alpha_mean, beta=beta_mean, min_samples_leaf=min_samples_leaf_mean, max_depth=max_depth_mean, 
                                                             leaf_model_type=leaf_model_mean_forest, leaf_model_scale=current_leaf_scale, 
-                                                            cutpoint_grid_size=cutpoint_grid_size)
+                                                            cutpoint_grid_size=cutpoint_grid_size, num_features_subsample=num_features_subsample_mean)
         forest_model_mean <- createForestModel(forest_dataset_train, forest_model_config_mean, global_model_config)
     }
     if (include_variance_forest) {
@@ -641,7 +657,7 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
                                                                 num_observations=nrow(X_train), variable_weights=variable_weights_variance, leaf_dimension=1, 
                                                                 alpha=alpha_variance, beta=beta_variance, min_samples_leaf=min_samples_leaf_variance, 
                                                                 max_depth=max_depth_variance, leaf_model_type=leaf_model_variance_forest, 
-                                                                cutpoint_grid_size=cutpoint_grid_size)
+                                                                cutpoint_grid_size=cutpoint_grid_size, num_features_subsample=num_features_subsample_variance)
         forest_model_variance <- createForestModel(forest_dataset_train, forest_model_config_variance, global_model_config)
     }
     
diff --git a/R/config.R b/R/config.R
index a5982cac..53c259ea 100644
--- a/R/config.R
+++ b/R/config.R
@@ -61,6 +61,9 @@ ForestModelConfig <- R6::R6Class(
         
         #' @field cutpoint_grid_size Number of unique cutpoints to consider
         cutpoint_grid_size = NULL,
+        
+        #' @field num_features_subsample Number of features to subsample for the GFR algorithm
+        num_features_subsample = NULL,
 
         #' Create a new ForestModelConfig object.
         #'
@@ -80,13 +83,14 @@ ForestModelConfig <- R6::R6Class(
         #' @param variance_forest_shape Shape parameter for IG leaf models (applicable when `leaf_model_type = 3`). Default: `1`.
         #' @param variance_forest_scale Scale parameter for IG leaf models (applicable when `leaf_model_type = 3`). Default: `1`.
         #' @param cutpoint_grid_size Number of unique cutpoints to consider (default: `100`)
+        #' @param num_features_subsample Number of features to subsample for the GFR algorithm
         #' 
         #' @return A new ForestModelConfig object.
         initialize = function(feature_types = NULL, sweep_update_indices = NULL, num_trees = NULL, num_features = NULL, 
                               num_observations = NULL, variable_weights = NULL, leaf_dimension = 1, 
                               alpha = 0.95, beta = 2.0, min_samples_leaf = 5, max_depth = -1, 
                               leaf_model_type = 1, leaf_model_scale = NULL, variance_forest_shape = 1.0, 
-                              variance_forest_scale = 1.0, cutpoint_grid_size = 100) {
+                              variance_forest_scale = 1.0, cutpoint_grid_size = 100, num_features_subsample = NULL) {
             if (is.null(feature_types)) {
                 if (is.null(num_features)) {
                     stop("Neither of `num_features` nor `feature_types` (a vector from which `num_features` can be inferred) was provided. Please provide at least one of these inputs when creating a ForestModelConfig object.")
@@ -132,6 +136,16 @@ ForestModelConfig <- R6::R6Class(
             self$variance_forest_shape <- variance_forest_shape
             self$variance_forest_scale <- variance_forest_scale
             self$cutpoint_grid_size <- cutpoint_grid_size
+            if (is.null(num_features_subsample)) {
+                num_features_subsample <- num_features
+            }
+            if (num_features_subsample > num_features) {
+                stop("`num_features_subsample` cannot be larger than `num_features`")
+            }
+            if (num_features_subsample <= 0) {
+                stop("`num_features_subsample` must at least 1")
+            }
+            self$num_features_subsample <- num_features_subsample
             
             if (!(as.integer(leaf_model_type) == leaf_model_type)) {
                 stop("`leaf_model_type` must be an integer between 0 and 3")
@@ -255,6 +269,19 @@ ForestModelConfig <- R6::R6Class(
             self$cutpoint_grid_size <- cutpoint_grid_size
         },
         
+        #' @description
+        #' Update number of features to subsample for the GFR algorithm
+        #' @param num_features_subsample Number of features to subsample for the GFR algorithm
+        update_num_features_subsample = function(num_features_subsample) {
+            if (num_features_subsample > self$num_features) {
+                stop("`num_features_subsample` cannot be larger than `num_features`")
+            }
+            if (num_features_subsample <= 0) {
+                stop("`num_features_subsample` must at least 1")
+            }
+            self$num_features_subsample <- num_features_subsample
+        }, 
+        
         #' @description
         #' Query feature types for this ForestModelConfig object
         #' @returns Vector of integer-coded feature types (integers where 0 = numeric, 1 = ordered categorical, 2 = unordered categorical)
@@ -358,6 +385,13 @@ ForestModelConfig <- R6::R6Class(
         #' @returns Number of unique cutpoints to consider
         get_cutpoint_grid_size = function() {
             return(self$cutpoint_grid_size)
+        }, 
+        
+        #' @description
+        #' Query number of features to subsample for the GFR algorithm
+        #' @returns Number of features to subsample for the GFR algorithm
+        get_num_features_subsample = function() {
+            return(self$num_features_subsample)
         }
     )
 )
@@ -424,6 +458,7 @@ GlobalModelConfig <- R6::R6Class(
 #' @param variance_forest_shape Shape parameter for IG leaf models (applicable when `leaf_model_type = 3`). Default: `1`.
 #' @param variance_forest_scale Scale parameter for IG leaf models (applicable when `leaf_model_type = 3`). Default: `1`.
 #' @param cutpoint_grid_size Number of unique cutpoints to consider (default: `100`)
+#' @param num_features_subsample Number of features to subsample for the GFR algorithm
 #' @return ForestModelConfig object
 #' @export
 #'
@@ -433,12 +468,13 @@ createForestModelConfig <- function(feature_types = NULL, sweep_update_indices =
                                     num_observations = NULL, variable_weights = NULL, leaf_dimension = 1, 
                                     alpha = 0.95, beta = 2.0, min_samples_leaf = 5, max_depth = -1, 
                                     leaf_model_type = 1, leaf_model_scale = NULL, variance_forest_shape = 1.0, 
-                                    variance_forest_scale = 1.0, cutpoint_grid_size = 100){
+                                    variance_forest_scale = 1.0, cutpoint_grid_size = 100, 
+                                    num_features_subsample = NULL){
     return(invisible((
         ForestModelConfig$new(feature_types, sweep_update_indices, num_trees, num_features, num_observations, 
                               variable_weights, leaf_dimension, alpha, beta, min_samples_leaf, 
                               max_depth, leaf_model_type, leaf_model_scale, variance_forest_shape, 
-                              variance_forest_scale, cutpoint_grid_size)
+                              variance_forest_scale, cutpoint_grid_size, num_features_subsample)
     )))
 }
 
diff --git a/R/cpp11.R b/R/cpp11.R
index c6f147ba..943206f1 100644
--- a/R/cpp11.R
+++ b/R/cpp11.R
@@ -580,8 +580,8 @@ compute_leaf_indices_cpp <- function(forest_container, covariates, forest_nums)
   .Call(`_stochtree_compute_leaf_indices_cpp`, forest_container, covariates, forest_nums)
 }
 
-sample_gfr_one_iteration_cpp <- function(data, residual, forest_samples, active_forest, tracker, split_prior, rng, sweep_indices, feature_types, cutpoint_grid_size, leaf_model_scale_input, variable_weights, a_forest, b_forest, global_variance, leaf_model_int, keep_forest) {
-  invisible(.Call(`_stochtree_sample_gfr_one_iteration_cpp`, data, residual, forest_samples, active_forest, tracker, split_prior, rng, sweep_indices, feature_types, cutpoint_grid_size, leaf_model_scale_input, variable_weights, a_forest, b_forest, global_variance, leaf_model_int, keep_forest))
+sample_gfr_one_iteration_cpp <- function(data, residual, forest_samples, active_forest, tracker, split_prior, rng, sweep_indices, feature_types, cutpoint_grid_size, leaf_model_scale_input, variable_weights, a_forest, b_forest, global_variance, leaf_model_int, keep_forest, num_features_subsample) {
+  invisible(.Call(`_stochtree_sample_gfr_one_iteration_cpp`, data, residual, forest_samples, active_forest, tracker, split_prior, rng, sweep_indices, feature_types, cutpoint_grid_size, leaf_model_scale_input, variable_weights, a_forest, b_forest, global_variance, leaf_model_int, keep_forest, num_features_subsample))
 }
 
 sample_mcmc_one_iteration_cpp <- function(data, residual, forest_samples, active_forest, tracker, split_prior, rng, sweep_indices, feature_types, cutpoint_grid_size, leaf_model_scale_input, variable_weights, a_forest, b_forest, global_variance, leaf_model_int, keep_forest) {
diff --git a/R/model.R b/R/model.R
index f1ea8e54..955037b0 100644
--- a/R/model.R
+++ b/R/model.R
@@ -86,6 +86,7 @@ ForestModel <- R6::R6Class(
             b_forest <- forest_model_config$variance_forest_scale
             global_scale <- global_model_config$global_error_variance
             cutpoint_grid_size <- forest_model_config$cutpoint_grid_size
+            num_features_subsample <- forest_model_config$num_features_subsample
             
             # Default to empty integer vector if sweep_update_indices is NULL
             if (is.null(sweep_update_indices)) {
@@ -113,7 +114,7 @@ ForestModel <- R6::R6Class(
                     forest_dataset$data_ptr, residual$data_ptr, 
                     forest_samples$forest_container_ptr, active_forest$forest_ptr, self$tracker_ptr, 
                     self$tree_prior_ptr, rng$rng_ptr, sweep_update_indices, feature_types, cutpoint_grid_size, leaf_model_scale, 
-                    variable_weights, a_forest, b_forest, global_scale, leaf_model_int, keep_forest
+                    variable_weights, a_forest, b_forest, global_scale, leaf_model_int, keep_forest, num_features_subsample
                 )
             } else {
                 sample_mcmc_one_iteration_cpp(
diff --git a/include/stochtree/tree_sampler.h b/include/stochtree/tree_sampler.h
index 5c234d23..750364d7 100644
--- a/include/stochtree/tree_sampler.h
+++ b/include/stochtree/tree_sampler.h
@@ -5,6 +5,7 @@
 #include <stochtree/container.h>
 #include <stochtree/cutpoint_candidates.h>
 #include <stochtree/data.h>
+#include <stochtree/discrete_sampler.h>
 #include <stochtree/ensemble.h>
 #include <stochtree/leaf_model.h>
 #include <stochtree/partition_tracker.h>
@@ -13,6 +14,7 @@
 #include <cmath>
 #include <map>
 #include <memory>
+#include <numeric>
 #include <random>
 #include <set>
 #include <string>
@@ -469,10 +471,10 @@ static inline void AdjustStateAfterTreeSampling(ForestTracker& tracker, LeafMode
 
 template <typename LeafModel, typename LeafSuffStat, typename... LeafSuffStatConstructorArgs>
 static inline void EvaluateAllPossibleSplits(
-  ForestDataset& dataset, ForestTracker& tracker, ColumnVector& residual, TreePrior& tree_prior, LeafModel& leaf_model, double global_variance, int tree_num, int split_node_id, 
+  ForestDataset& dataset, ForestTracker& tracker, ColumnVector& residual, TreePrior& tree_prior, std::mt19937& gen, LeafModel& leaf_model, double global_variance, int tree_num, int split_node_id, 
   std::vector<double>& log_cutpoint_evaluations, std::vector<int>& cutpoint_features, std::vector<double>& cutpoint_values, std::vector<FeatureType>& cutpoint_feature_types, 
   data_size_t& valid_cutpoint_count, CutpointGridContainer& cutpoint_grid_container, data_size_t node_begin, data_size_t node_end, std::vector<double>& variable_weights, 
-  std::vector<FeatureType>& feature_types, LeafSuffStatConstructorArgs&... leaf_suff_stat_args
+  std::vector<FeatureType>& feature_types, std::vector<bool>& feature_subset, LeafSuffStatConstructorArgs&... leaf_suff_stat_args
 ) {
     // Initialize sufficient statistics
   LeafSuffStat node_suff_stat = LeafSuffStat(leaf_suff_stat_args...);
@@ -510,7 +512,7 @@ static inline void EvaluateAllPossibleSplits(
   double split_log_ml;
   for (int j = 0; j < covariates.cols(); j++) {
 
-    if (std::abs(variable_weights.at(j)) > kEpsilon) {
+    if ((std::abs(variable_weights.at(j)) > kEpsilon) && (feature_subset[j])) {
       // Enumerate cutpoint strides
       cutpoint_grid_container.CalculateStrides(covariates, outcome, tracker.GetSortedNodeSampleTracker(), split_node_id, node_begin, node_end, j, feature_types);
       
@@ -571,14 +573,15 @@ static inline void EvaluateCutpoints(Tree* tree, ForestTracker& tracker, LeafMod
                                      std::mt19937& gen, int tree_num, double global_variance, int cutpoint_grid_size, int node_id, data_size_t node_begin, data_size_t node_end, 
                                      std::vector<double>& log_cutpoint_evaluations, std::vector<int>& cutpoint_features, std::vector<double>& cutpoint_values, 
                                      std::vector<FeatureType>& cutpoint_feature_types, data_size_t& valid_cutpoint_count, std::vector<double>& variable_weights, 
-                                     std::vector<FeatureType>& feature_types, CutpointGridContainer& cutpoint_grid_container, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
+                                     std::vector<FeatureType>& feature_types, CutpointGridContainer& cutpoint_grid_container, 
+                                     std::vector<bool>& feature_subset, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
   // Evaluate all possible cutpoints according to the leaf node model, 
   // recording their log-likelihood and other split information in a series of vectors.
   // The last element of these vectors concerns the "no-split" option.
   EvaluateAllPossibleSplits<LeafModel, LeafSuffStat, LeafSuffStatConstructorArgs...>(
-    dataset, tracker, residual, tree_prior, leaf_model, global_variance, tree_num, node_id, log_cutpoint_evaluations, 
+    dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, tree_num, node_id, log_cutpoint_evaluations, 
     cutpoint_features, cutpoint_values, cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 
-    node_begin, node_end, variable_weights, feature_types, leaf_suff_stat_args...
+    node_begin, node_end, variable_weights, feature_types, feature_subset, leaf_suff_stat_args...
   );
   
   // Compute an adjustment to reflect the no split prior probability and the number of cutpoints
@@ -599,7 +602,7 @@ static inline void SampleSplitRule(Tree* tree, ForestTracker& tracker, LeafModel
                                    TreePrior& tree_prior, std::mt19937& gen, int tree_num, double global_variance, int cutpoint_grid_size, 
                                    std::unordered_map<int, std::pair<data_size_t, data_size_t>>& node_index_map, std::deque<node_t>& split_queue, 
                                    int node_id, data_size_t node_begin, data_size_t node_end, std::vector<double>& variable_weights, 
-                                   std::vector<FeatureType>& feature_types, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
+                                   std::vector<FeatureType>& feature_types, std::vector<bool> feature_subset, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
   // Leaf depth
   int leaf_depth = tree->GetDepth(node_id);
 
@@ -619,7 +622,7 @@ static inline void SampleSplitRule(Tree* tree, ForestTracker& tracker, LeafModel
       tree, tracker, leaf_model, dataset, residual, tree_prior, gen, tree_num, global_variance,
       cutpoint_grid_size, node_id, node_begin, node_end, log_cutpoint_evaluations, cutpoint_features, 
       cutpoint_values, cutpoint_feature_types, valid_cutpoint_count, variable_weights, feature_types, 
-      cutpoint_grid_container, leaf_suff_stat_args...
+      cutpoint_grid_container, feature_subset, leaf_suff_stat_args...
     );
     // TODO: maybe add some checks here?
     
@@ -702,12 +705,32 @@ template <typename LeafModel, typename LeafSuffStat, typename... LeafSuffStatCon
 static inline void GFRSampleTreeOneIter(Tree* tree, ForestTracker& tracker, ForestContainer& forests, LeafModel& leaf_model, ForestDataset& dataset,
                                         ColumnVector& residual, TreePrior& tree_prior, std::mt19937& gen, std::vector<double>& variable_weights, 
                                         int tree_num, double global_variance, std::vector<FeatureType>& feature_types, int cutpoint_grid_size, 
-                                        LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
+                                        int num_features_subsample, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
   int root_id = Tree::kRoot;
   int curr_node_id;
   data_size_t curr_node_begin;
   data_size_t curr_node_end;
   data_size_t n = dataset.GetCovariates().rows();
+  int p = dataset.GetCovariates().cols();
+
+  // Subsample features (if requested)
+  std::vector<bool> feature_subset(p, true);
+  if (num_features_subsample < p) {
+    std::vector<int> feature_indices(p);
+    std::iota(feature_indices.begin(), feature_indices.end(), 0);
+    std::vector<int> features_selected(num_features_subsample);
+    sample_without_replacement<int, double>(
+      features_selected.data(), variable_weights.data(), feature_indices.data(), 
+      p, num_features_subsample, gen
+    );
+    for (int i = 0; i < p; i++) {
+      feature_subset.at(i) = false;
+    }
+    for (const auto& feat : features_selected) {
+      feature_subset.at(feat) = true;
+    }
+  }
+
   // Mapping from node id to start and end points of sorted indices
   std::unordered_map<int, std::pair<data_size_t, data_size_t>> node_index_map;
   node_index_map.insert({root_id, std::make_pair(0, n)});
@@ -728,7 +751,7 @@ static inline void GFRSampleTreeOneIter(Tree* tree, ForestTracker& tracker, Fore
     SampleSplitRule<LeafModel, LeafSuffStat, LeafSuffStatConstructorArgs...>(
       tree, tracker, leaf_model, dataset, residual, tree_prior, gen, tree_num, global_variance, cutpoint_grid_size, 
       node_index_map, split_queue, curr_node_id, curr_node_begin, curr_node_end, variable_weights, feature_types, 
-      leaf_suff_stat_args...);
+      feature_subset, leaf_suff_stat_args...);
   }
 }
 
@@ -765,7 +788,7 @@ template <typename LeafModel, typename LeafSuffStat, typename... LeafSuffStatCon
 static inline void GFRSampleOneIter(TreeEnsemble& active_forest, ForestTracker& tracker, ForestContainer& forests, LeafModel& leaf_model, ForestDataset& dataset, 
                                     ColumnVector& residual, TreePrior& tree_prior, std::mt19937& gen, std::vector<double>& variable_weights, 
                                     std::vector<int>& sweep_update_indices, double global_variance, std::vector<FeatureType>& feature_types, int cutpoint_grid_size, 
-                                    bool keep_forest, bool pre_initialized, bool backfitting, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
+                                    bool keep_forest, bool pre_initialized, bool backfitting, int num_features_subsample, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
   // Run the GFR algorithm for each tree
   int num_trees = forests.NumTrees();
   for (const int& i : sweep_update_indices) {
@@ -785,7 +808,7 @@ static inline void GFRSampleOneIter(TreeEnsemble& active_forest, ForestTracker&
     GFRSampleTreeOneIter<LeafModel, LeafSuffStat, LeafSuffStatConstructorArgs...>(
       tree, tracker, forests, leaf_model, dataset, residual, tree_prior, gen, 
       variable_weights, i, global_variance, feature_types, cutpoint_grid_size, 
-      leaf_suff_stat_args...
+      num_features_subsample, leaf_suff_stat_args...
     );
     
     // Sample leaf parameters for tree i
diff --git a/man/ForestModelConfig.Rd b/man/ForestModelConfig.Rd
index 5fe5c755..a20bf36a 100644
--- a/man/ForestModelConfig.Rd
+++ b/man/ForestModelConfig.Rd
@@ -34,6 +34,8 @@ Shape parameter for IG leaf models
 Scale parameter for IG leaf models
 
 Number of unique cutpoints to consider
+
+Number of features to subsample for the GFR algorithm
 }
 \description{
 The "low-level" stochtree interface enables a high degreee of sampler
@@ -76,7 +78,9 @@ forest model they wish to run.
 
 \item{\code{variance_forest_scale}}{Scale parameter for IG leaf models (applicable when \code{leaf_model_type = 3})}
 
-\item{\code{cutpoint_grid_size}}{Number of unique cutpoints to consider
+\item{\code{cutpoint_grid_size}}{Number of unique cutpoints to consider}
+
+\item{\code{num_features_subsample}}{Number of features to subsample for the GFR algorithm
 Create a new ForestModelConfig object.}
 }
 \if{html}{\out{</div>}}
@@ -96,6 +100,7 @@ Create a new ForestModelConfig object.}
 \item \href{#method-ForestModelConfig-update_variance_forest_shape}{\code{ForestModelConfig$update_variance_forest_shape()}}
 \item \href{#method-ForestModelConfig-update_variance_forest_scale}{\code{ForestModelConfig$update_variance_forest_scale()}}
 \item \href{#method-ForestModelConfig-update_cutpoint_grid_size}{\code{ForestModelConfig$update_cutpoint_grid_size()}}
+\item \href{#method-ForestModelConfig-update_num_features_subsample}{\code{ForestModelConfig$update_num_features_subsample()}}
 \item \href{#method-ForestModelConfig-get_feature_types}{\code{ForestModelConfig$get_feature_types()}}
 \item \href{#method-ForestModelConfig-get_sweep_indices}{\code{ForestModelConfig$get_sweep_indices()}}
 \item \href{#method-ForestModelConfig-get_variable_weights}{\code{ForestModelConfig$get_variable_weights()}}
@@ -111,6 +116,7 @@ Create a new ForestModelConfig object.}
 \item \href{#method-ForestModelConfig-get_variance_forest_shape}{\code{ForestModelConfig$get_variance_forest_shape()}}
 \item \href{#method-ForestModelConfig-get_variance_forest_scale}{\code{ForestModelConfig$get_variance_forest_scale()}}
 \item \href{#method-ForestModelConfig-get_cutpoint_grid_size}{\code{ForestModelConfig$get_cutpoint_grid_size()}}
+\item \href{#method-ForestModelConfig-get_num_features_subsample}{\code{ForestModelConfig$get_num_features_subsample()}}
 }
 }
 \if{html}{\out{<hr>}}
@@ -134,7 +140,8 @@ Create a new ForestModelConfig object.}
   leaf_model_scale = NULL,
   variance_forest_shape = 1,
   variance_forest_scale = 1,
-  cutpoint_grid_size = 100
+  cutpoint_grid_size = 100,
+  num_features_subsample = NULL
 )}\if{html}{\out{</div>}}
 }
 
@@ -172,6 +179,8 @@ Create a new ForestModelConfig object.}
 \item{\code{variance_forest_scale}}{Scale parameter for IG leaf models (applicable when \code{leaf_model_type = 3}). Default: \code{1}.}
 
 \item{\code{cutpoint_grid_size}}{Number of unique cutpoints to consider (default: \code{100})}
+
+\item{\code{num_features_subsample}}{Number of features to subsample for the GFR algorithm}
 }
 \if{html}{\out{</div>}}
 }
@@ -367,6 +376,23 @@ Update number of unique cutpoints to consider
 }
 }
 \if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-ForestModelConfig-update_num_features_subsample"></a>}}
+\if{latex}{\out{\hypertarget{method-ForestModelConfig-update_num_features_subsample}{}}}
+\subsection{Method \code{update_num_features_subsample()}}{
+Update number of features to subsample for the GFR algorithm
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{ForestModelConfig$update_num_features_subsample(num_features_subsample)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{num_features_subsample}}{Number of features to subsample for the GFR algorithm}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-ForestModelConfig-get_feature_types"></a>}}
 \if{latex}{\out{\hypertarget{method-ForestModelConfig-get_feature_types}{}}}
 \subsection{Method \code{get_feature_types()}}{
@@ -515,5 +541,15 @@ Query number of unique cutpoints to consider for this ForestModelConfig object
 \if{html}{\out{<div class="r">}}\preformatted{ForestModelConfig$get_cutpoint_grid_size()}\if{html}{\out{</div>}}
 }
 
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-ForestModelConfig-get_num_features_subsample"></a>}}
+\if{latex}{\out{\hypertarget{method-ForestModelConfig-get_num_features_subsample}{}}}
+\subsection{Method \code{get_num_features_subsample()}}{
+Query number of features to subsample for the GFR algorithm
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{ForestModelConfig$get_num_features_subsample()}\if{html}{\out{</div>}}
+}
+
 }
 }
diff --git a/man/bart.Rd b/man/bart.Rd
index 7a6b8006..0ea4d966 100644
--- a/man/bart.Rd
+++ b/man/bart.Rd
@@ -99,6 +99,7 @@ that were not in the training set.}
 \item \code{keep_vars} Vector of variable names or column indices denoting variables that should be included in the forest. Default: \code{NULL}.
 \item \code{drop_vars} Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: \code{NULL}. If both \code{drop_vars} and \code{keep_vars} are set, \code{drop_vars} will be ignored.
 \item \code{probit_outcome_model} Whether or not the outcome should be modeled as explicitly binary via a probit link. If \code{TRUE}, \code{y} must only contain the values \code{0} and \code{1}. Default: \code{FALSE}.
+\item \code{num_features_subsample} How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features passed in the training dataset.
 }}
 
 \item{variance_forest_params}{(Optional) A list of variance forest model parameters, each of which has a default value processed internally, so this argument list is optional.
@@ -114,6 +115,7 @@ that were not in the training set.}
 \item \code{var_forest_prior_scale} Scale parameter in the \code{IG(var_forest_prior_shape, var_forest_prior_scale)} conditional error variance model (which is only sampled if \code{num_trees > 0}). Calibrated internally as \code{num_trees / leaf_prior_calibration_param^2} if not set.
 \item \code{keep_vars} Vector of variable names or column indices denoting variables that should be included in the forest. Default: \code{NULL}.
 \item \code{drop_vars} Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: \code{NULL}. If both \code{drop_vars} and \code{keep_vars} are set, \code{drop_vars} will be ignored.
+\item \code{num_features_subsample} How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features passed in the training dataset.
 }}
 }
 \value{
@@ -143,6 +145,7 @@ X_test <- X[test_inds,]
 X_train <- X[train_inds,]
 y_test <- y[test_inds]
 y_train <- y[train_inds]
+
 bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test, 
                    num_gfr = 10, num_burnin = 0, num_mcmc = 10)
 }
diff --git a/man/createForestModelConfig.Rd b/man/createForestModelConfig.Rd
index 235552aa..b46be0e5 100644
--- a/man/createForestModelConfig.Rd
+++ b/man/createForestModelConfig.Rd
@@ -20,7 +20,8 @@ createForestModelConfig(
   leaf_model_scale = NULL,
   variance_forest_shape = 1,
   variance_forest_scale = 1,
-  cutpoint_grid_size = 100
+  cutpoint_grid_size = 100,
+  num_features_subsample = NULL
 )
 }
 \arguments{
@@ -55,6 +56,8 @@ createForestModelConfig(
 \item{variance_forest_scale}{Scale parameter for IG leaf models (applicable when \code{leaf_model_type = 3}). Default: \code{1}.}
 
 \item{cutpoint_grid_size}{Number of unique cutpoints to consider (default: \code{100})}
+
+\item{num_features_subsample}{Number of features to subsample for the GFR algorithm}
 }
 \value{
 ForestModelConfig object
diff --git a/src/cpp11.cpp b/src/cpp11.cpp
index 362656fa..aea80bc6 100644
--- a/src/cpp11.cpp
+++ b/src/cpp11.cpp
@@ -1076,10 +1076,10 @@ extern "C" SEXP _stochtree_compute_leaf_indices_cpp(SEXP forest_container, SEXP
   END_CPP11
 }
 // sampler.cpp
-void sample_gfr_one_iteration_cpp(cpp11::external_pointer<StochTree::ForestDataset> data, cpp11::external_pointer<StochTree::ColumnVector> residual, cpp11::external_pointer<StochTree::ForestContainer> forest_samples, cpp11::external_pointer<StochTree::TreeEnsemble> active_forest, cpp11::external_pointer<StochTree::ForestTracker> tracker, cpp11::external_pointer<StochTree::TreePrior> split_prior, cpp11::external_pointer<std::mt19937> rng, cpp11::integers sweep_indices, cpp11::integers feature_types, int cutpoint_grid_size, cpp11::doubles_matrix<> leaf_model_scale_input, cpp11::doubles variable_weights, double a_forest, double b_forest, double global_variance, int leaf_model_int, bool keep_forest);
-extern "C" SEXP _stochtree_sample_gfr_one_iteration_cpp(SEXP data, SEXP residual, SEXP forest_samples, SEXP active_forest, SEXP tracker, SEXP split_prior, SEXP rng, SEXP sweep_indices, SEXP feature_types, SEXP cutpoint_grid_size, SEXP leaf_model_scale_input, SEXP variable_weights, SEXP a_forest, SEXP b_forest, SEXP global_variance, SEXP leaf_model_int, SEXP keep_forest) {
+void sample_gfr_one_iteration_cpp(cpp11::external_pointer<StochTree::ForestDataset> data, cpp11::external_pointer<StochTree::ColumnVector> residual, cpp11::external_pointer<StochTree::ForestContainer> forest_samples, cpp11::external_pointer<StochTree::TreeEnsemble> active_forest, cpp11::external_pointer<StochTree::ForestTracker> tracker, cpp11::external_pointer<StochTree::TreePrior> split_prior, cpp11::external_pointer<std::mt19937> rng, cpp11::integers sweep_indices, cpp11::integers feature_types, int cutpoint_grid_size, cpp11::doubles_matrix<> leaf_model_scale_input, cpp11::doubles variable_weights, double a_forest, double b_forest, double global_variance, int leaf_model_int, bool keep_forest, int num_features_subsample);
+extern "C" SEXP _stochtree_sample_gfr_one_iteration_cpp(SEXP data, SEXP residual, SEXP forest_samples, SEXP active_forest, SEXP tracker, SEXP split_prior, SEXP rng, SEXP sweep_indices, SEXP feature_types, SEXP cutpoint_grid_size, SEXP leaf_model_scale_input, SEXP variable_weights, SEXP a_forest, SEXP b_forest, SEXP global_variance, SEXP leaf_model_int, SEXP keep_forest, SEXP num_features_subsample) {
   BEGIN_CPP11
-    sample_gfr_one_iteration_cpp(cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestDataset>>>(data), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ColumnVector>>>(residual), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestContainer>>>(forest_samples), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::TreeEnsemble>>>(active_forest), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestTracker>>>(tracker), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::TreePrior>>>(split_prior), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<std::mt19937>>>(rng), cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(sweep_indices), cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(feature_types), cpp11::as_cpp<cpp11::decay_t<int>>(cutpoint_grid_size), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles_matrix<>>>(leaf_model_scale_input), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles>>(variable_weights), cpp11::as_cpp<cpp11::decay_t<double>>(a_forest), cpp11::as_cpp<cpp11::decay_t<double>>(b_forest), cpp11::as_cpp<cpp11::decay_t<double>>(global_variance), cpp11::as_cpp<cpp11::decay_t<int>>(leaf_model_int), cpp11::as_cpp<cpp11::decay_t<bool>>(keep_forest));
+    sample_gfr_one_iteration_cpp(cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestDataset>>>(data), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ColumnVector>>>(residual), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestContainer>>>(forest_samples), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::TreeEnsemble>>>(active_forest), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestTracker>>>(tracker), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::TreePrior>>>(split_prior), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<std::mt19937>>>(rng), cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(sweep_indices), cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(feature_types), cpp11::as_cpp<cpp11::decay_t<int>>(cutpoint_grid_size), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles_matrix<>>>(leaf_model_scale_input), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles>>(variable_weights), cpp11::as_cpp<cpp11::decay_t<double>>(a_forest), cpp11::as_cpp<cpp11::decay_t<double>>(b_forest), cpp11::as_cpp<cpp11::decay_t<double>>(global_variance), cpp11::as_cpp<cpp11::decay_t<int>>(leaf_model_int), cpp11::as_cpp<cpp11::decay_t<bool>>(keep_forest), cpp11::as_cpp<cpp11::decay_t<int>>(num_features_subsample));
     return R_NilValue;
   END_CPP11
 }
@@ -1676,7 +1676,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_stochtree_rng_cpp",                                             (DL_FUNC) &_stochtree_rng_cpp,                                              1},
     {"_stochtree_root_reset_active_forest_cpp",                        (DL_FUNC) &_stochtree_root_reset_active_forest_cpp,                         1},
     {"_stochtree_root_reset_rfx_tracker_cpp",                          (DL_FUNC) &_stochtree_root_reset_rfx_tracker_cpp,                           4},
-    {"_stochtree_sample_gfr_one_iteration_cpp",                        (DL_FUNC) &_stochtree_sample_gfr_one_iteration_cpp,                        17},
+    {"_stochtree_sample_gfr_one_iteration_cpp",                        (DL_FUNC) &_stochtree_sample_gfr_one_iteration_cpp,                        18},
     {"_stochtree_sample_mcmc_one_iteration_cpp",                       (DL_FUNC) &_stochtree_sample_mcmc_one_iteration_cpp,                       17},
     {"_stochtree_sample_sigma2_one_iteration_cpp",                     (DL_FUNC) &_stochtree_sample_sigma2_one_iteration_cpp,                      5},
     {"_stochtree_sample_tau_one_iteration_cpp",                        (DL_FUNC) &_stochtree_sample_tau_one_iteration_cpp,                         4},
diff --git a/src/sampler.cpp b/src/sampler.cpp
index 30ff2540..1a5a5bb5 100644
--- a/src/sampler.cpp
+++ b/src/sampler.cpp
@@ -26,7 +26,7 @@ void sample_gfr_one_iteration_cpp(cpp11::external_pointer<StochTree::ForestDatas
                                   cpp11::doubles variable_weights, 
                                   double a_forest, double b_forest,
                                   double global_variance, int leaf_model_int, 
-                                  bool keep_forest
+                                  bool keep_forest, int num_features_subsample
 ) {
     // Refactoring completely out of the R interface.
     // Intention to refactor out of the C++ interface in the future.
@@ -84,13 +84,13 @@ void sample_gfr_one_iteration_cpp(cpp11::external_pointer<StochTree::ForestDatas
     
     // Run one iteration of the sampler
     if (model_type == StochTree::ModelType::kConstantLeafGaussian) {
-        StochTree::GFRSampleOneIter<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianConstantLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true);
+        StochTree::GFRSampleOneIter<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianConstantLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample);
     } else if (model_type == StochTree::ModelType::kUnivariateRegressionLeafGaussian) {
-        StochTree::GFRSampleOneIter<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianUnivariateRegressionLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true);
+        StochTree::GFRSampleOneIter<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianUnivariateRegressionLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample);
     } else if (model_type == StochTree::ModelType::kMultivariateRegressionLeafGaussian) {
-        StochTree::GFRSampleOneIter<StochTree::GaussianMultivariateRegressionLeafModel, StochTree::GaussianMultivariateRegressionSuffStat, int>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianMultivariateRegressionLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_basis);
+        StochTree::GFRSampleOneIter<StochTree::GaussianMultivariateRegressionLeafModel, StochTree::GaussianMultivariateRegressionSuffStat, int>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianMultivariateRegressionLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample, num_basis);
     } else if (model_type == StochTree::ModelType::kLogLinearVariance) {
-        StochTree::GFRSampleOneIter<StochTree::LogLinearVarianceLeafModel, StochTree::LogLinearVarianceSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::LogLinearVarianceLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, false);
+        StochTree::GFRSampleOneIter<StochTree::LogLinearVarianceLeafModel, StochTree::LogLinearVarianceSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::LogLinearVarianceLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, false, num_features_subsample);
     }
 }
 
diff --git a/tools/debug/gfr_feature_subsample_debug.R b/tools/debug/gfr_feature_subsample_debug.R
new file mode 100644
index 00000000..e724c6f5
--- /dev/null
+++ b/tools/debug/gfr_feature_subsample_debug.R
@@ -0,0 +1,36 @@
+# Load libraries
+library(stochtree)
+
+# Generate the data
+n <- 500
+p <- 20
+snr <- 2
+X <- matrix(runif(n*p), ncol = p)
+f_XW <- sin(4*pi*X[,1]) + cos(4*pi*X[,2]) + sin(4*pi*X[,3]) + cos(4*pi*X[,4])
+noise_sd <- sd(f_XW) / snr
+y <- f_XW + rnorm(n, 0, 1)*noise_sd
+
+# Split data into test and train sets
+test_set_pct <- 0.2
+n_test <- round(test_set_pct*n)
+n_train <- n - n_test
+test_inds <- sort(sample(1:n, n_test, replace = FALSE))
+train_inds <- (1:n)[!((1:n) %in% test_inds)]
+X_test <- as.data.frame(X[test_inds,])
+X_train <- as.data.frame(X[train_inds,])
+y_test <- y[test_inds]
+y_train <- y[train_inds]
+
+# Sampler settings
+num_gfr <- 100
+num_burnin <- 0
+num_mcmc <- 20
+
+# Run the sampler
+general_params <- list(random_seed = 1234, standardize = F, sample_sigma2_global = T)
+mean_params <- list(num_trees = 100, num_features_subsample = 5)
+bart_model <- stochtree::bart(
+    X_train = X, y_train = y, 
+    num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
+    general_params = general_params, mean_forest_params = mean_params
+)
\ No newline at end of file
diff --git a/tools/perf/gfr_feature_subsample_microbenchmark.R b/tools/perf/gfr_feature_subsample_microbenchmark.R
new file mode 100644
index 00000000..fa289a28
--- /dev/null
+++ b/tools/perf/gfr_feature_subsample_microbenchmark.R
@@ -0,0 +1,64 @@
+# Load libraries
+library(stochtree)
+library(microbenchmark)
+
+# Generate the data
+n <- 5000
+p <- 100
+snr <- 2
+X <- matrix(runif(n*p), ncol = p)
+f_XW <- sin(4*pi*X[,1]) + cos(4*pi*X[,2]) + sin(4*pi*X[,3]) + cos(4*pi*X[,4])
+noise_sd <- sd(f_XW) / snr
+y <- f_XW + rnorm(n, 0, 1)*noise_sd
+
+# Split data into test and train sets
+test_set_pct <- 0.2
+n_test <- round(test_set_pct*n)
+n_train <- n - n_test
+test_inds <- sort(sample(1:n, n_test, replace = FALSE))
+train_inds <- (1:n)[!((1:n) %in% test_inds)]
+X_test <- as.data.frame(X[test_inds,])
+X_train <- as.data.frame(X[train_inds,])
+y_test <- y[test_inds]
+y_train <- y[train_inds]
+
+# Sampler settings
+num_gfr <- 100
+num_burnin <- 0
+num_mcmc <- 0
+general_params <- list(sample_sigma2_global = T)
+mean_params_a <- list(num_trees = 100, num_features_subsample = 5)
+mean_params_b <- list(num_trees = 100)
+
+# Benchmark sampler with and without feature subsampling
+microbenchmark::microbenchmark(
+    stochtree::bart(
+        X_train = X, y_train = y, 
+        num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
+        general_params = general_params, mean_forest_params = mean_params_a
+    ), 
+    stochtree::bart(
+        X_train = X, y_train = y, 
+        num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
+        general_params = general_params, mean_forest_params = mean_params_b
+    ), 
+    times = 5
+)
+
+Rprof()
+stochtree::bart(
+    X_train = X, y_train = y, 
+    num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
+    general_params = general_params, mean_forest_params = mean_params_a
+)
+Rprof(NULL)
+summaryRprof()
+
+Rprof()
+stochtree::bart(
+    X_train = X, y_train = y, 
+    num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
+    general_params = general_params, mean_forest_params = mean_params_b
+)
+Rprof(NULL)
+summaryRprof()

From d09729364446f977d3ed0b8994a775098663f273 Mon Sep 17 00:00:00 2001
From: Drew Herren <drewherrenopensource@gmail.com>
Date: Mon, 9 Jun 2025 18:33:58 -0500
Subject: [PATCH 05/12] Updated Python BART interface

---
 R/bart.R                                      | 13 ++--
 R/config.R                                    |  2 +-
 .../supervised_learning_feature_subsets.py    | 54 ++++++++++++++++
 src/py_stochtree.cpp                          | 10 +--
 stochtree/bart.py                             | 18 +++++-
 stochtree/config.py                           | 62 ++++++++++++++++++-
 stochtree/sampler.py                          |  1 +
 vignettes/BayesianSupervisedLearning.Rmd      | 10 ++-
 8 files changed, 148 insertions(+), 22 deletions(-)
 create mode 100644 demo/debug/supervised_learning_feature_subsets.py

diff --git a/R/bart.R b/R/bart.R
index 6576c83e..1b28ee83 100644
--- a/R/bart.R
+++ b/R/bart.R
@@ -44,6 +44,7 @@
 #'   - `keep_every` How many iterations of the burned-in MCMC sampler should be run before forests and parameters are retained. Default `1`. Setting `keep_every <- k` for some `k > 1` will "thin" the MCMC samples by retaining every `k`-th sample, rather than simply every sample. This can reduce the autocorrelation of the MCMC samples.
 #'   - `num_chains` How many independent MCMC chains should be sampled. If `num_mcmc = 0`, this is ignored. If `num_gfr = 0`, then each chain is run from root for `num_mcmc * keep_every + num_burnin` iterations, with `num_mcmc` samples retained. If `num_gfr > 0`, each MCMC chain will be initialized from a separate GFR ensemble, with the requirement that `num_gfr >= num_chains`. Default: `1`.
 #'   - `verbose` Whether or not to print progress during the sampling loops. Default: `FALSE`.
+#'   - `probit_outcome_model` Whether or not the outcome should be modeled as explicitly binary via a probit link. If `TRUE`, `y` must only contain the values `0` and `1`. Default: `FALSE`.
 #'
 #' @param mean_forest_params (Optional) A list of mean forest model parameters, each of which has a default value processed internally, so this argument list is optional.
 #'
@@ -58,8 +59,7 @@
 #'   - `sigma2_leaf_scale` Scale parameter in the `IG(sigma2_leaf_shape, sigma2_leaf_scale)` leaf node parameter variance model. Calibrated internally as `0.5/num_trees` if not set here.
 #'   - `keep_vars` Vector of variable names or column indices denoting variables that should be included in the forest. Default: `NULL`.
 #'   - `drop_vars` Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: `NULL`. If both `drop_vars` and `keep_vars` are set, `drop_vars` will be ignored.
-#'   - `probit_outcome_model` Whether or not the outcome should be modeled as explicitly binary via a probit link. If `TRUE`, `y` must only contain the values `0` and `1`. Default: `FALSE`.
-#'   - `num_features_subsample` How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features passed in the training dataset.
+#'   - `num_features_subsample` How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 #'
 #' @param variance_forest_params (Optional) A list of variance forest model parameters, each of which has a default value processed internally, so this argument list is optional.
 #'
@@ -74,7 +74,7 @@
 #'   - `var_forest_prior_scale` Scale parameter in the `IG(var_forest_prior_shape, var_forest_prior_scale)` conditional error variance model (which is only sampled if `num_trees > 0`). Calibrated internally as `num_trees / leaf_prior_calibration_param^2` if not set.
 #'   - `keep_vars` Vector of variable names or column indices denoting variables that should be included in the forest. Default: `NULL`.
 #'   - `drop_vars` Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: `NULL`. If both `drop_vars` and `keep_vars` are set, `drop_vars` will be ignored.
-#'   - `num_features_subsample` How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features passed in the training dataset.
+#'   - `num_features_subsample` How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 #'   
 #' @return List of sampling outputs and a wrapper around the sampled forests (which can be used for in-memory prediction on new data, or serialized to JSON on disk).
 #' @export
@@ -117,7 +117,8 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
         sigma2_global_shape = 0, sigma2_global_scale = 0, 
         variable_weights = NULL, random_seed = -1, 
         keep_burnin = FALSE, keep_gfr = FALSE, keep_every = 1, 
-        num_chains = 1, verbose = FALSE
+        num_chains = 1, verbose = FALSE, 
+        probit_outcome_model = FALSE
     )
     general_params_updated <- preprocessParams(
         general_params_default, general_params
@@ -130,7 +131,6 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
         sample_sigma2_leaf = TRUE, sigma2_leaf_init = NULL, 
         sigma2_leaf_shape = 3, sigma2_leaf_scale = NULL, 
         keep_vars = NULL, drop_vars = NULL, 
-        probit_outcome_model = FALSE, 
         num_features_subsample = NULL
     )
     mean_forest_params_updated <- preprocessParams(
@@ -167,6 +167,7 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
     keep_every <- general_params_updated$keep_every
     num_chains <- general_params_updated$num_chains
     verbose <- general_params_updated$verbose
+    probit_outcome_model <- general_params_updated$probit_outcome_model
     
     # 2. Mean forest parameters
     num_trees_mean <- mean_forest_params_updated$num_trees
@@ -180,7 +181,6 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
     b_leaf <- mean_forest_params_updated$sigma2_leaf_scale
     keep_vars_mean <- mean_forest_params_updated$keep_vars
     drop_vars_mean <- mean_forest_params_updated$drop_vars
-    probit_outcome_model <- mean_forest_params_updated$probit_outcome_model
     num_features_subsample_mean <- mean_forest_params_updated$num_features_subsample
     
     # 3. Variance forest parameters
@@ -388,7 +388,6 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
     if (is.null(num_features_subsample_variance)) {
         num_features_subsample_variance <- ncol(X_train)
     }
-    
 
     # Convert all input data to matrices if not already converted
     if ((is.null(dim(leaf_basis_train))) && (!is.null(leaf_basis_train))) {
diff --git a/R/config.R b/R/config.R
index 53c259ea..0e2a0b40 100644
--- a/R/config.R
+++ b/R/config.R
@@ -143,7 +143,7 @@ ForestModelConfig <- R6::R6Class(
                 stop("`num_features_subsample` cannot be larger than `num_features`")
             }
             if (num_features_subsample <= 0) {
-                stop("`num_features_subsample` must at least 1")
+                stop("`num_features_subsample` must be at least 1")
             }
             self$num_features_subsample <- num_features_subsample
             
diff --git a/demo/debug/supervised_learning_feature_subsets.py b/demo/debug/supervised_learning_feature_subsets.py
new file mode 100644
index 00000000..e3d80bc5
--- /dev/null
+++ b/demo/debug/supervised_learning_feature_subsets.py
@@ -0,0 +1,54 @@
+# Supervised Learning Demo Script
+
+# Load necessary libraries
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+from stochtree import BARTModel
+from sklearn.model_selection import train_test_split
+
+# Generate sample data
+# RNG
+random_seed = 1234
+rng = np.random.default_rng(random_seed)
+
+# Generate covariates and basis
+n = 1000
+p_X = 20
+X = rng.uniform(0, 1, (n, p_X))
+
+# Define the outcome mean function
+def outcome_mean(X):
+    return np.where(
+        (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5, 
+        np.where(
+            (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5, 
+            np.where(
+                (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5, 
+                7.5
+            )
+        )
+    )
+
+# Generate outcome
+epsilon = rng.normal(0, 1, n)
+y = outcome_mean(X) + epsilon
+
+# Test-train split
+sample_inds = np.arange(n)
+train_inds, test_inds = train_test_split(sample_inds, test_size=0.2)
+X_train = X[train_inds,:]
+X_test = X[test_inds,:]
+y_train = y[train_inds]
+y_test = y[test_inds]
+
+# Run XBART with the full feature set
+bart_model_a = BARTModel()
+forest_config_a = {"num_trees": 100}
+bart_model_a.sample(X_train=X_train, y_train=y_train, X_test=X_test, num_gfr=100, num_mcmc=0, mean_forest_params=forest_config_a)
+
+# Run XBART with each tree considering random subsets of 5 features
+bart_model_b = BARTModel()
+forest_config_b = {"num_trees": 100, "num_features_subsample": 5}
+bart_model_b.sample(X_train=X_train, y_train=y_train, X_test=X_test, num_gfr=100, num_mcmc=0, mean_forest_params=forest_config_b)
diff --git a/src/py_stochtree.cpp b/src/py_stochtree.cpp
index 916bffa9..5ff25e72 100644
--- a/src/py_stochtree.cpp
+++ b/src/py_stochtree.cpp
@@ -1028,7 +1028,7 @@ class ForestSamplerCpp {
   void SampleOneIteration(ForestContainerCpp& forest_samples, ForestCpp& forest, ForestDatasetCpp& dataset, ResidualCpp& residual, RngCpp& rng, 
                           py::array_t<int> feature_types, py::array_t<int> sweep_update_indices, int cutpoint_grid_size, py::array_t<double> leaf_model_scale_input, 
                           py::array_t<double> variable_weights, double a_forest, double b_forest, double global_variance, 
-                          int leaf_model_int, bool keep_forest = true, bool gfr = true) {
+                          int leaf_model_int, int num_features_subsample, bool keep_forest = true, bool gfr = true) {
     // Refactoring completely out of the Python interface.
     // Intention to refactor out of the C++ interface in the future.
     bool pre_initialized = true;
@@ -1090,13 +1090,13 @@ class ForestSamplerCpp {
     std::mt19937* rng_ptr = rng.GetRng();
     if (gfr) {
       if (model_type == StochTree::ModelType::kConstantLeafGaussian) {
-        StochTree::GFRSampleOneIter<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianConstantLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true);
+        StochTree::GFRSampleOneIter<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianConstantLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample);
       } else if (model_type == StochTree::ModelType::kUnivariateRegressionLeafGaussian) {
-        StochTree::GFRSampleOneIter<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianUnivariateRegressionLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true);
+        StochTree::GFRSampleOneIter<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianUnivariateRegressionLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample);
       } else if (model_type == StochTree::ModelType::kMultivariateRegressionLeafGaussian) {
-        StochTree::GFRSampleOneIter<StochTree::GaussianMultivariateRegressionLeafModel, StochTree::GaussianMultivariateRegressionSuffStat, int>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianMultivariateRegressionLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_basis);
+        StochTree::GFRSampleOneIter<StochTree::GaussianMultivariateRegressionLeafModel, StochTree::GaussianMultivariateRegressionSuffStat, int>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianMultivariateRegressionLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample, num_basis);
       } else if (model_type == StochTree::ModelType::kLogLinearVariance) {
-        StochTree::GFRSampleOneIter<StochTree::LogLinearVarianceLeafModel, StochTree::LogLinearVarianceSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::LogLinearVarianceLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, false);
+        StochTree::GFRSampleOneIter<StochTree::LogLinearVarianceLeafModel, StochTree::LogLinearVarianceSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::LogLinearVarianceLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, false, num_features_subsample);
       }
     } else {
       if (model_type == StochTree::ModelType::kConstantLeafGaussian) {
diff --git a/stochtree/bart.py b/stochtree/bart.py
index 0539129a..1b51491e 100644
--- a/stochtree/bart.py
+++ b/stochtree/bart.py
@@ -131,6 +131,7 @@ def sample(
             * `keep_gfr` (`bool`): Whether or not "warm-start" / grow-from-root samples should be included in predictions. Defaults to `False`. Ignored if `num_mcmc == 0`.
             * `keep_every` (`int`): How many iterations of the burned-in MCMC sampler should be run before forests and parameters are retained. Defaults to `1`. Setting `keep_every = k` for some `k > 1` will "thin" the MCMC samples by retaining every `k`-th sample, rather than simply every sample. This can reduce the autocorrelation of the MCMC samples.
             * `num_chains` (`int`): How many independent MCMC chains should be sampled. If `num_mcmc = 0`, this is ignored. If `num_gfr = 0`, then each chain is run from root for `num_mcmc * keep_every + num_burnin` iterations, with `num_mcmc` samples retained. If `num_gfr > 0`, each MCMC chain will be initialized from a separate GFR ensemble, with the requirement that `num_gfr >= num_chains`. Defaults to `1`.
+            * `probit_outcome_model` (`bool`): Whether or not the outcome should be modeled as explicitly binary via a probit link. If `True`, `y` must only contain the values `0` and `1`. Default: `False`.
 
         mean_forest_params : dict, optional
             Dictionary of mean forest model parameters, each of which has a default value processed internally, so this argument is optional.
@@ -146,7 +147,7 @@ def sample(
             * `sigma2_leaf_scale` (`float`): Scale parameter in the `IG(sigma2_leaf_shape, sigma2_leaf_scale)` leaf node parameter variance model. Calibrated internally as `0.5/num_trees` if not set here.
             * `keep_vars` (`list` or `np.array`): Vector of variable names or column indices denoting variables that should be included in the mean forest. Defaults to `None`.
             * `drop_vars` (`list` or `np.array`): Vector of variable names or column indices denoting variables that should be excluded from the mean forest. Defaults to `None`. If both `drop_vars` and `keep_vars` are set, `drop_vars` will be ignored.
-            * `probit_outcome_model` (`bool`): Whether or not the outcome should be modeled as explicitly binary via a probit link. If `True`, `y` must only contain the values `0` and `1`. Default: `False`.
+            * `num_features_subsample` (`int`): How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 
         variance_forest_params : dict, optional
             Dictionary of variance forest model  parameters, each of which has a default value processed internally, so this argument is optional.
@@ -162,6 +163,7 @@ def sample(
             * `var_forest_prior_scale` (`float`): Scale parameter in the [optional] `IG(var_forest_prior_shape, var_forest_prior_scale)` conditional error variance forest (which is only sampled if `num_trees > 0`). Calibrated internally as `num_trees / leaf_prior_calibration_param^2` if not set here.
             * `keep_vars` (`list` or `np.array`): Vector of variable names or column indices denoting variables that should be included in the variance forest. Defaults to `None`.
             * `drop_vars` (`list` or `np.array`): Vector of variable names or column indices denoting variables that should be excluded from the variance forest. Defaults to `None`. If both `drop_vars` and `keep_vars` are set, `drop_vars` will be ignored.
+            * `num_features_subsample` (`int`): How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 
         previous_model_json : str, optional
             JSON string containing a previous BART model. This can be used to "continue" a sampler interactively after inspecting the samples or to run parallel chains "warm-started" from existing forest samples. Defaults to `None`.
@@ -206,6 +208,7 @@ def sample(
             "sigma2_leaf_scale": None,
             "keep_vars": None,
             "drop_vars": None,
+            "num_features_subsample": None,
         }
         mean_forest_params_updated = _preprocess_params(
             mean_forest_params_default, mean_forest_params
@@ -224,6 +227,7 @@ def sample(
             "var_forest_prior_scale": None,
             "keep_vars": None,
             "drop_vars": None,
+            "num_features_subsample": None,
         }
         variance_forest_params_updated = _preprocess_params(
             variance_forest_params_default, variance_forest_params
@@ -257,6 +261,7 @@ def sample(
         b_leaf = mean_forest_params_updated["sigma2_leaf_scale"]
         keep_vars_mean = mean_forest_params_updated["keep_vars"]
         drop_vars_mean = mean_forest_params_updated["drop_vars"]
+        num_features_subsample_mean = mean_forest_params_updated["num_features_subsample"]
 
         # 3. Variance forest parameters
         num_trees_variance = variance_forest_params_updated["num_trees"]
@@ -272,6 +277,7 @@ def sample(
         b_forest = variance_forest_params_updated["var_forest_prior_scale"]
         keep_vars_variance = variance_forest_params_updated["keep_vars"]
         drop_vars_variance = variance_forest_params_updated["drop_vars"]
+        num_features_subsample_variance = variance_forest_params_updated["num_features_subsample"]
 
         # Override keep_gfr if there are no MCMC samples
         if num_mcmc == 0:
@@ -714,6 +720,12 @@ def sample(
             [variable_subset_variance.count(i) == 0 for i in original_var_indices]
         ] = 0
 
+        # Set num_features_subsample to default, ncol(X_train), if not already set
+        if num_features_subsample_mean is None:
+            num_features_subsample_mean = X_train.shape[1]
+        if num_features_subsample_variance is None:
+            num_features_subsample_variance = X_train.shape[1]
+
         # Preliminary runtime checks for probit link
         if not self.include_mean_forest:
             self.probit_outcome_model = False
@@ -1048,7 +1060,8 @@ def sample(
                 max_depth=max_depth_mean,
                 leaf_model_type=leaf_model_mean_forest,
                 leaf_model_scale=current_leaf_scale,
-                cutpoint_grid_size=cutpoint_grid_size,
+                cutpoint_grid_size=cutpoint_grid_size, 
+                num_features_subsample=num_features_subsample_mean
             )
             forest_sampler_mean = ForestSampler(
                 forest_dataset_train,
@@ -1071,6 +1084,7 @@ def sample(
                 cutpoint_grid_size=cutpoint_grid_size,
                 variance_forest_shape=a_forest,
                 variance_forest_scale=b_forest,
+                num_features_subsample=num_features_subsample_variance
             )
             forest_sampler_variance = ForestSampler(
                 forest_dataset_train,
diff --git a/stochtree/config.py b/stochtree/config.py
index 7651e088..72cae512 100644
--- a/stochtree/config.py
+++ b/stochtree/config.py
@@ -52,7 +52,9 @@ class ForestModelConfig:
     variance_forest_scale : int, optional
         Scale parameter for IG leaf models (applicable when `leaf_model_type = 3`). Default: `1`.
     cutpoint_grid_size : int, optional
-        Number of unique cutpoints to consider (default: `100`)
+        Number of unique cutpoints to consider (default: `100`).
+    num_features_subsample : int, optional
+        Number of features to subsample for the GFR algorithm (default: `None`).
     """
 
     def __init__(
@@ -73,6 +75,7 @@ def __init__(
         variance_forest_shape=1.0,
         variance_forest_scale=1.0,
         cutpoint_grid_size=100,
+        num_features_subsample=None,
     ) -> None:
         # Preprocess inputs and run some error checks
         if feature_types is None:
@@ -150,6 +153,29 @@ def __init__(
                 )
         self.sweep_update_indices = sweep_update_indices
 
+        if sweep_update_indices is not None:
+            sweep_update_indices = _standardize_array_to_np(sweep_update_indices)
+            if np.min(sweep_update_indices) < 0:
+                raise ValueError(
+                    "sweep_update_indices must be a list / np.array of indices >= 0 and < num_trees",
+                )
+            if np.max(sweep_update_indices) >= num_trees:
+                raise ValueError(
+                    "sweep_update_indices must be a list / np.array of indices >= 0 and < num_trees",
+                )
+
+        if num_features_subsample is None:
+            num_features_subsample = num_features
+        if num_features_subsample > num_features:
+            raise ValueError(
+                "`num_features_subsample` cannot be larger than `num_features`",
+            )
+        if num_features_subsample <= 0:
+            raise ValueError(
+                "`num_features_subsample` must be at least 1",
+            )
+        self.num_features_subsample = num_features_subsample
+
         # Set internal config values
         self.num_trees = num_trees
         self.num_features = num_features
@@ -372,6 +398,29 @@ def update_cutpoint_grid_size(self, cutpoint_grid_size: int) -> None:
         """
         self.cutpoint_grid_size = cutpoint_grid_size
 
+    def update_num_features_subsample(self, num_features_subsample: int) -> None:
+        """
+        Update number of features to subsample for the GFR algorithm.
+
+        Parameters
+        ----------
+        num_features_subsample : int
+            Number of features to subsample for the GFR algorithm.
+
+        Returns
+        -------
+        self
+        """
+        if num_features_subsample > self.num_features:
+            raise ValueError(
+                "`num_features_subsample` cannot be larger than `num_features`",
+            )
+        if num_features_subsample <= 0:
+            raise ValueError(
+                "`num_features_subsample` must be at least 1",
+            )
+        self.num_features_subsample = num_features_subsample
+
     def get_feature_types(self) -> np.ndarray:
         """
         Query feature types (integer-coded so that 0 = numeric, 1 = ordered categorical, 2 = unordered categorical)
@@ -537,6 +586,17 @@ def get_cutpoint_grid_size(self) -> int:
         """
         return self.cutpoint_grid_size
 
+    def get_num_features_subsample(self) -> int:
+        """
+        Query number of features to subsample for the GFR algorithm
+
+        Returns
+        -------
+        num_features_subsample : int
+            Number of features to subsample for the GFR algorithm
+        """
+        return self.num_features_subsample
+
 
 class GlobalModelConfig:
     """
diff --git a/stochtree/sampler.py b/stochtree/sampler.py
index 3586c24a..be55286a 100644
--- a/stochtree/sampler.py
+++ b/stochtree/sampler.py
@@ -170,6 +170,7 @@ def sample_one_iteration(
             forest_config.get_variance_forest_scale(),
             global_config.get_global_error_variance(),
             forest_config.get_leaf_model_type(),
+            forest_config.get_num_features_subsample(),
             keep_forest,
             gfr,
         )
diff --git a/vignettes/BayesianSupervisedLearning.Rmd b/vignettes/BayesianSupervisedLearning.Rmd
index 250f74c1..4270fed4 100644
--- a/vignettes/BayesianSupervisedLearning.Rmd
+++ b/vignettes/BayesianSupervisedLearning.Rmd
@@ -374,9 +374,8 @@ num_gfr <- 10
 num_burnin <- 0
 num_mcmc <- 100
 num_samples <- num_gfr + num_burnin + num_mcmc
-general_params <- list(sample_sigma2_global = F)
-mean_forest_params <- list(sample_sigma2_leaf = F, num_trees = 100, 
-                           probit_outcome_model = T)
+general_params <- list(sample_sigma2_global = F, probit_outcome_model = T)
+mean_forest_params <- list(sample_sigma2_leaf = F, num_trees = 100)
 bart_model_warmstart <- stochtree::bart(
     X_train = X_train, y_train = y_train, X_test = X_test, 
     num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
@@ -447,9 +446,8 @@ num_gfr <- 0
 num_burnin <- 100
 num_mcmc <- 100
 num_samples <- num_gfr + num_burnin + num_mcmc
-general_params <- list(sample_sigma2_global = F)
-mean_forest_params <- list(sample_sigma2_leaf = F, num_trees = 100, 
-                           probit_outcome_model = T)
+general_params <- list(sample_sigma2_global = F, probit_outcome_model = T)
+mean_forest_params <- list(sample_sigma2_leaf = F, num_trees = 100)
 bart_model_root <- stochtree::bart(
     X_train = X_train, y_train = y_train, X_test = X_test, 
     num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 

From ff61ed81fb857776bc1ab3d5cb24015cb3c230df Mon Sep 17 00:00:00 2001
From: Drew Herren <drewherrenopensource@gmail.com>
Date: Tue, 10 Jun 2025 13:56:39 -0500
Subject: [PATCH 06/12] Updated examples and docs

---
 demo/debug/supervised_learning_feature_subsets.py | 9 ++++++++-
 man/bart.Rd                                       | 6 +++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/demo/debug/supervised_learning_feature_subsets.py b/demo/debug/supervised_learning_feature_subsets.py
index e3d80bc5..9ddbb447 100644
--- a/demo/debug/supervised_learning_feature_subsets.py
+++ b/demo/debug/supervised_learning_feature_subsets.py
@@ -7,6 +7,7 @@
 import matplotlib.pyplot as plt
 from stochtree import BARTModel
 from sklearn.model_selection import train_test_split
+import timeit
 
 # Generate sample data
 # RNG
@@ -15,7 +16,7 @@
 
 # Generate covariates and basis
 n = 1000
-p_X = 20
+p_X = 100
 X = rng.uniform(0, 1, (n, p_X))
 
 # Define the outcome mean function
@@ -44,11 +45,17 @@ def outcome_mean(X):
 y_test = y[test_inds]
 
 # Run XBART with the full feature set
+s = """\
 bart_model_a = BARTModel()
 forest_config_a = {"num_trees": 100}
 bart_model_a.sample(X_train=X_train, y_train=y_train, X_test=X_test, num_gfr=100, num_mcmc=0, mean_forest_params=forest_config_a)
+"""
+print(timeit.timeit(stmt=s, number=5, globals=globals()))
 
 # Run XBART with each tree considering random subsets of 5 features
+s = """\
 bart_model_b = BARTModel()
 forest_config_b = {"num_trees": 100, "num_features_subsample": 5}
 bart_model_b.sample(X_train=X_train, y_train=y_train, X_test=X_test, num_gfr=100, num_mcmc=0, mean_forest_params=forest_config_b)
+"""
+print(timeit.timeit(stmt=s, number=5, globals=globals()))
diff --git a/man/bart.Rd b/man/bart.Rd
index 0ea4d966..5712271e 100644
--- a/man/bart.Rd
+++ b/man/bart.Rd
@@ -83,6 +83,7 @@ that were not in the training set.}
 \item \code{keep_every} How many iterations of the burned-in MCMC sampler should be run before forests and parameters are retained. Default \code{1}. Setting \code{keep_every <- k} for some \code{k > 1} will "thin" the MCMC samples by retaining every \code{k}-th sample, rather than simply every sample. This can reduce the autocorrelation of the MCMC samples.
 \item \code{num_chains} How many independent MCMC chains should be sampled. If \code{num_mcmc = 0}, this is ignored. If \code{num_gfr = 0}, then each chain is run from root for \code{num_mcmc * keep_every + num_burnin} iterations, with \code{num_mcmc} samples retained. If \code{num_gfr > 0}, each MCMC chain will be initialized from a separate GFR ensemble, with the requirement that \code{num_gfr >= num_chains}. Default: \code{1}.
 \item \code{verbose} Whether or not to print progress during the sampling loops. Default: \code{FALSE}.
+\item \code{probit_outcome_model} Whether or not the outcome should be modeled as explicitly binary via a probit link. If \code{TRUE}, \code{y} must only contain the values \code{0} and \code{1}. Default: \code{FALSE}.
 }}
 
 \item{mean_forest_params}{(Optional) A list of mean forest model parameters, each of which has a default value processed internally, so this argument list is optional.
@@ -98,8 +99,7 @@ that were not in the training set.}
 \item \code{sigma2_leaf_scale} Scale parameter in the \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)} leaf node parameter variance model. Calibrated internally as \code{0.5/num_trees} if not set here.
 \item \code{keep_vars} Vector of variable names or column indices denoting variables that should be included in the forest. Default: \code{NULL}.
 \item \code{drop_vars} Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: \code{NULL}. If both \code{drop_vars} and \code{keep_vars} are set, \code{drop_vars} will be ignored.
-\item \code{probit_outcome_model} Whether or not the outcome should be modeled as explicitly binary via a probit link. If \code{TRUE}, \code{y} must only contain the values \code{0} and \code{1}. Default: \code{FALSE}.
-\item \code{num_features_subsample} How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features passed in the training dataset.
+\item \code{num_features_subsample} How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 }}
 
 \item{variance_forest_params}{(Optional) A list of variance forest model parameters, each of which has a default value processed internally, so this argument list is optional.
@@ -115,7 +115,7 @@ that were not in the training set.}
 \item \code{var_forest_prior_scale} Scale parameter in the \code{IG(var_forest_prior_shape, var_forest_prior_scale)} conditional error variance model (which is only sampled if \code{num_trees > 0}). Calibrated internally as \code{num_trees / leaf_prior_calibration_param^2} if not set.
 \item \code{keep_vars} Vector of variable names or column indices denoting variables that should be included in the forest. Default: \code{NULL}.
 \item \code{drop_vars} Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: \code{NULL}. If both \code{drop_vars} and \code{keep_vars} are set, \code{drop_vars} will be ignored.
-\item \code{num_features_subsample} How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features passed in the training dataset.
+\item \code{num_features_subsample} How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 }}
 }
 \value{

From 98ebce15b231379777837040bee06fbdbfcacf90 Mon Sep 17 00:00:00 2001
From: Drew Herren <drewherrenopensource@gmail.com>
Date: Tue, 10 Jun 2025 14:02:12 -0500
Subject: [PATCH 07/12] Updated BCF interface to allow for feature subsampling

---
 R/bcf.R    | 34 +++++++++++++++++++++++++++-------
 man/bcf.Rd |  3 +++
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/R/bcf.R b/R/bcf.R
index b9842c5d..11696f79 100644
--- a/R/bcf.R
+++ b/R/bcf.R
@@ -62,6 +62,7 @@
 #'   - `sigma2_leaf_scale` Scale parameter in the `IG(sigma2_leaf_shape, sigma2_leaf_scale)` leaf node parameter variance model. Calibrated internally as `0.5/num_trees` if not set here.
 #'   - `keep_vars` Vector of variable names or column indices denoting variables that should be included in the forest. Default: `NULL`.
 #'   - `drop_vars` Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: `NULL`. If both `drop_vars` and `keep_vars` are set, `drop_vars` will be ignored.
+#'   - `num_features_subsample` How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 #'
 #' @param treatment_effect_forest_params (Optional) A list of treatment effect forest model parameters, each of which has a default value processed internally, so this argument list is optional.
 #'
@@ -78,6 +79,7 @@
 #'   - `delta_max` Maximum plausible conditional distributional treatment effect (i.e. P(Y(1) = 1 | X) - P(Y(0) = 1 | X)) when the outcome is binary. Only used when the outcome is specified as a probit model in `general_params`. Must be > 0 and < 1. Default: `0.9`. Ignored if `sigma2_leaf_init` is set directly, as this parameter is used to calibrate `sigma2_leaf_init`.
 #'   - `keep_vars` Vector of variable names or column indices denoting variables that should be included in the forest. Default: `NULL`.
 #'   - `drop_vars` Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: `NULL`. If both `drop_vars` and `keep_vars` are set, `drop_vars` will be ignored.
+#'   - `num_features_subsample` How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 #'
 #' @param variance_forest_params (Optional) A list of variance forest model parameters, each of which has a default value processed internally, so this argument list is optional.
 #'
@@ -92,6 +94,7 @@
 #'   - `var_forest_prior_scale` Scale parameter in the `IG(var_forest_prior_shape, var_forest_prior_scale)` conditional error variance model (which is only sampled if `num_trees > 0`). Calibrated internally as `num_trees / 1.5^2` if not set.
 #'   - `keep_vars` Vector of variable names or column indices denoting variables that should be included in the forest. Default: `NULL`.
 #'   - `drop_vars` Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: `NULL`. If both `drop_vars` and `keep_vars` are set, `drop_vars` will be ignored.
+#'   - `num_features_subsample` How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 #'
 #' @return List of sampling outputs and a wrapper around the sampled forests (which can be used for in-memory prediction on new data, or serialized to JSON on disk).
 #' @export
@@ -171,7 +174,8 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
         min_samples_leaf = 5, max_depth = 10, 
         sample_sigma2_leaf = TRUE, sigma2_leaf_init = NULL, 
         sigma2_leaf_shape = 3, sigma2_leaf_scale = NULL, 
-        keep_vars = NULL, drop_vars = NULL
+        keep_vars = NULL, drop_vars = NULL, 
+        num_features_subsample = NULL
     )
     prognostic_forest_params_updated <- preprocessParams(
         prognostic_forest_params_default, prognostic_forest_params
@@ -183,8 +187,8 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
         min_samples_leaf = 5, max_depth = 5, 
         sample_sigma2_leaf = FALSE, sigma2_leaf_init = NULL, 
         sigma2_leaf_shape = 3, sigma2_leaf_scale = NULL, 
-        keep_vars = NULL, drop_vars = NULL, 
-        delta_max = 0.9
+        keep_vars = NULL, drop_vars = NULL, delta_max = 0.9, 
+        num_features_subsample = NULL
     )
     treatment_effect_forest_params_updated <- preprocessParams(
         treatment_effect_forest_params_default, treatment_effect_forest_params
@@ -198,7 +202,8 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
         variance_forest_init = NULL, 
         var_forest_prior_shape = NULL, 
         var_forest_prior_scale = NULL, 
-        keep_vars = NULL, drop_vars = NULL
+        keep_vars = NULL, drop_vars = NULL, 
+        num_features_subsample = NULL
     )
     variance_forest_params_updated <- preprocessParams(
         variance_forest_params_default, variance_forest_params
@@ -238,6 +243,7 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
     b_leaf_mu <- prognostic_forest_params_updated$sigma2_leaf_scale
     keep_vars_mu <- prognostic_forest_params_updated$keep_vars
     drop_vars_mu <- prognostic_forest_params_updated$drop_vars
+    num_features_subsample_mu <- prognostic_forest_params_updated$num_features_subsample
     
     # 3. Tau forest parameters
     num_trees_tau <- treatment_effect_forest_params_updated$num_trees
@@ -252,6 +258,7 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
     keep_vars_tau <- treatment_effect_forest_params_updated$keep_vars
     drop_vars_tau <- treatment_effect_forest_params_updated$drop_vars
     delta_max <- treatment_effect_forest_params_updated$delta_max
+    num_features_subsample_tau <- treatment_effect_forest_params_updated$num_features_subsample
     
     # 4. Variance forest parameters
     num_trees_variance <- variance_forest_params_updated$num_trees
@@ -265,6 +272,7 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
     b_forest <- variance_forest_params_updated$var_forest_prior_scale
     keep_vars_variance <- variance_forest_params_updated$keep_vars
     drop_vars_variance <- variance_forest_params_updated$drop_vars
+    num_features_subsample_variance <- variance_forest_params_updated$num_features_subsample
     
     # Check if there are enough GFR samples to seed num_chains samplers
     if (num_gfr > 0) {
@@ -477,6 +485,17 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
     X_test_raw <- X_test
     if (!is.null(X_test)) X_test <- preprocessPredictionData(X_test, X_train_metadata)
     
+    # Set num_features_subsample to default, ncol(X_train), if not already set
+    if (is.null(num_features_subsample_mu)) {
+        num_features_subsample_mu <- ncol(X_train)
+    }
+    if (is.null(num_features_subsample_tau)) {
+        num_features_subsample_tau <- ncol(X_train)
+    }
+    if (is.null(num_features_subsample_variance)) {
+        num_features_subsample_variance <- ncol(X_train)
+    }
+    
     # Convert all input data to matrices if not already converted
     if ((is.null(dim(Z_train))) && (!is.null(Z_train))) {
         Z_train <- as.matrix(as.numeric(Z_train))
@@ -899,12 +918,12 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
                                                       num_observations=nrow(X_train), variable_weights=variable_weights_mu, leaf_dimension=leaf_dimension_mu_forest, 
                                                       alpha=alpha_mu, beta=beta_mu, min_samples_leaf=min_samples_leaf_mu, max_depth=max_depth_mu, 
                                                       leaf_model_type=leaf_model_mu_forest, leaf_model_scale=current_leaf_scale_mu, 
-                                                      cutpoint_grid_size=cutpoint_grid_size)
+                                                      cutpoint_grid_size=cutpoint_grid_size, num_features_subsample = num_features_subsample_mu)
     forest_model_config_tau <- createForestModelConfig(feature_types=feature_types, num_trees=num_trees_tau, num_features=ncol(X_train), 
                                                        num_observations=nrow(X_train), variable_weights=variable_weights_tau, leaf_dimension=leaf_dimension_tau_forest, 
                                                        alpha=alpha_tau, beta=beta_tau, min_samples_leaf=min_samples_leaf_tau, max_depth=max_depth_tau, 
                                                        leaf_model_type=leaf_model_tau_forest, leaf_model_scale=current_leaf_scale_tau, 
-                                                       cutpoint_grid_size=cutpoint_grid_size)
+                                                       cutpoint_grid_size=cutpoint_grid_size, num_features_subsample = num_features_subsample_tau)
     forest_model_mu <- createForestModel(forest_dataset_train, forest_model_config_mu, global_model_config)
     forest_model_tau <- createForestModel(forest_dataset_train, forest_model_config_tau, global_model_config)
     if (include_variance_forest) {
@@ -912,7 +931,8 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
                                                                 num_observations=nrow(X_train), variable_weights=variable_weights_variance, 
                                                                 leaf_dimension=leaf_dimension_variance_forest, alpha=alpha_variance, beta=beta_variance, 
                                                                 min_samples_leaf=min_samples_leaf_variance, max_depth=max_depth_variance, 
-                                                                leaf_model_type=leaf_model_variance_forest, cutpoint_grid_size=cutpoint_grid_size)
+                                                                leaf_model_type=leaf_model_variance_forest, cutpoint_grid_size=cutpoint_grid_size, 
+                                                                num_features_subsample=num_features_subsample_variance)
         forest_model_variance <- createForestModel(forest_dataset_train, forest_model_config_variance, global_model_config)
     }
     
diff --git a/man/bcf.Rd b/man/bcf.Rd
index a7811b6a..14698735 100644
--- a/man/bcf.Rd
+++ b/man/bcf.Rd
@@ -107,6 +107,7 @@ that were not in the training set.}
 \item \code{sigma2_leaf_scale} Scale parameter in the \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)} leaf node parameter variance model. Calibrated internally as \code{0.5/num_trees} if not set here.
 \item \code{keep_vars} Vector of variable names or column indices denoting variables that should be included in the forest. Default: \code{NULL}.
 \item \code{drop_vars} Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: \code{NULL}. If both \code{drop_vars} and \code{keep_vars} are set, \code{drop_vars} will be ignored.
+\item \code{num_features_subsample} How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 }}
 
 \item{treatment_effect_forest_params}{(Optional) A list of treatment effect forest model parameters, each of which has a default value processed internally, so this argument list is optional.
@@ -124,6 +125,7 @@ that were not in the training set.}
 \item \code{delta_max} Maximum plausible conditional distributional treatment effect (i.e. P(Y(1) = 1 | X) - P(Y(0) = 1 | X)) when the outcome is binary. Only used when the outcome is specified as a probit model in \code{general_params}. Must be > 0 and < 1. Default: \code{0.9}. Ignored if \code{sigma2_leaf_init} is set directly, as this parameter is used to calibrate \code{sigma2_leaf_init}.
 \item \code{keep_vars} Vector of variable names or column indices denoting variables that should be included in the forest. Default: \code{NULL}.
 \item \code{drop_vars} Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: \code{NULL}. If both \code{drop_vars} and \code{keep_vars} are set, \code{drop_vars} will be ignored.
+\item \code{num_features_subsample} How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 }}
 
 \item{variance_forest_params}{(Optional) A list of variance forest model parameters, each of which has a default value processed internally, so this argument list is optional.
@@ -139,6 +141,7 @@ that were not in the training set.}
 \item \code{var_forest_prior_scale} Scale parameter in the \code{IG(var_forest_prior_shape, var_forest_prior_scale)} conditional error variance model (which is only sampled if \code{num_trees > 0}). Calibrated internally as \code{num_trees / 1.5^2} if not set.
 \item \code{keep_vars} Vector of variable names or column indices denoting variables that should be included in the forest. Default: \code{NULL}.
 \item \code{drop_vars} Vector of variable names or column indices denoting variables that should be excluded from the forest. Default: \code{NULL}. If both \code{drop_vars} and \code{keep_vars} are set, \code{drop_vars} will be ignored.
+\item \code{num_features_subsample} How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 }}
 }
 \value{

From cf03f4dd765533ef2b0aefc4d18f3b1eef677465 Mon Sep 17 00:00:00 2001
From: Drew Herren <drewherrenopensource@gmail.com>
Date: Tue, 10 Jun 2025 14:11:17 -0500
Subject: [PATCH 08/12] Updated Python BCF interface

---
 stochtree/bcf.py | 44 ++++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/stochtree/bcf.py b/stochtree/bcf.py
index e97e1a88..c4b67232 100644
--- a/stochtree/bcf.py
+++ b/stochtree/bcf.py
@@ -168,6 +168,7 @@ def sample(
             * `sigma2_leaf_scale` (`float`): Scale parameter in the `IG(sigma2_leaf_shape, sigma2_leaf_scale)` leaf node parameter variance model. Calibrated internally as `0.5/num_trees` if not set here.
             * `keep_vars` (`list` or `np.array`): Vector of variable names or column indices denoting variables that should be included in the prognostic (`mu(X)`) forest. Defaults to `None`.
             * `drop_vars` (`list` or `np.array`): Vector of variable names or column indices denoting variables that should be excluded from the prognostic (`mu(X)`) forest. Defaults to `None`. If both `drop_vars` and `keep_vars` are set, `drop_vars` will be ignored.
+            * `num_features_subsample` (`int`): How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 
         treatment_effect_forest_params : dict, optional
             Dictionary of treatment effect forest model parameters, each of which has a default value processed internally, so this argument is optional.
@@ -184,6 +185,7 @@ def sample(
             * `delta_max` (`float`): Maximum plausible conditional distributional treatment effect (i.e. P(Y(1) = 1 | X) - P(Y(0) = 1 | X)) when the outcome is binary. Only used when the outcome is specified as a probit model in `general_params`. Must be > 0 and < 1. Defaults to `0.9`. Ignored if `sigma2_leaf_init` is set directly, as this parameter is used to calibrate `sigma2_leaf_init`.
             * `keep_vars` (`list` or `np.array`): Vector of variable names or column indices denoting variables that should be included in the treatment effect (`tau(X)`) forest. Defaults to `None`.
             * `drop_vars` (`list` or `np.array`): Vector of variable names or column indices denoting variables that should be excluded from the treatment effect (`tau(X)`) forest. Defaults to `None`. If both `drop_vars` and `keep_vars` are set, `drop_vars` will be ignored.
+            * `num_features_subsample` (`int`): How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 
         variance_forest_params : dict, optional
             Dictionary of variance forest model  parameters, each of which has a default value processed internally, so this argument is optional.
@@ -199,6 +201,7 @@ def sample(
             * `var_forest_prior_scale` (`float`): Scale parameter in the [optional] `IG(var_forest_prior_shape, var_forest_prior_scale)` conditional error variance forest (which is only sampled if `num_trees > 0`). Calibrated internally as `num_trees / 1.5^2` if not set here.
             * `keep_vars` (`list` or `np.array`): Vector of variable names or column indices denoting variables that should be included in the variance forest. Defaults to `None`.
             * `drop_vars` (`list` or `np.array`): Vector of variable names or column indices denoting variables that should be excluded from the variance forest. Defaults to `None`. If both `drop_vars` and `keep_vars` are set, `drop_vars` will be ignored.
+            * `num_features_subsample` (`int`): How many features to subsample when growing each tree for the GFR algorithm. Defaults to the number of features in the training dataset.
 
         Returns
         -------
@@ -242,6 +245,7 @@ def sample(
             "sigma2_leaf_scale": None,
             "keep_vars": None,
             "drop_vars": None,
+            "num_features_subsample": None,
         }
         prognostic_forest_params_updated = _preprocess_params(
             prognostic_forest_params_default, prognostic_forest_params
@@ -261,6 +265,7 @@ def sample(
             "delta_max": 0.9,
             "keep_vars": None,
             "drop_vars": None,
+            "num_features_subsample": None,
         }
         treatment_effect_forest_params_updated = _preprocess_params(
             treatment_effect_forest_params_default, treatment_effect_forest_params
@@ -279,6 +284,7 @@ def sample(
             "var_forest_prior_scale": None,
             "keep_vars": None,
             "drop_vars": None,
+            "num_features_subsample": None,
         }
         variance_forest_params_updated = _preprocess_params(
             variance_forest_params_default, variance_forest_params
@@ -316,6 +322,7 @@ def sample(
         b_leaf_mu = prognostic_forest_params_updated["sigma2_leaf_scale"]
         keep_vars_mu = prognostic_forest_params_updated["keep_vars"]
         drop_vars_mu = prognostic_forest_params_updated["drop_vars"]
+        num_features_subsample_mu = prognostic_forest_params_updated["num_features_subsample"]
 
         # 3. Tau forest parameters
         num_trees_tau = treatment_effect_forest_params_updated["num_trees"]
@@ -334,6 +341,7 @@ def sample(
         delta_max = treatment_effect_forest_params_updated["delta_max"]
         keep_vars_tau = treatment_effect_forest_params_updated["keep_vars"]
         drop_vars_tau = treatment_effect_forest_params_updated["drop_vars"]
+        num_features_subsample_tau = treatment_effect_forest_params_updated["num_features_subsample"]
 
         # 4. Variance forest parameters
         num_trees_variance = variance_forest_params_updated["num_trees"]
@@ -349,6 +357,7 @@ def sample(
         b_forest = variance_forest_params_updated["var_forest_prior_scale"]
         keep_vars_variance = variance_forest_params_updated["keep_vars"]
         drop_vars_variance = variance_forest_params_updated["drop_vars"]
+        num_features_subsample_variance = variance_forest_params_updated["num_features_subsample"]
 
         # Override keep_gfr if there are no MCMC samples
         if num_mcmc == 0:
@@ -744,6 +753,19 @@ def sample(
             if not isinstance(keep_gfr, bool):
                 raise ValueError("keep_gfr must be a bool")
 
+        # Covariate preprocessing
+        self._covariate_preprocessor = CovariatePreprocessor()
+        self._covariate_preprocessor.fit(X_train)
+        X_train_processed = self._covariate_preprocessor.transform(X_train)
+        if X_test is not None:
+            X_test_processed = self._covariate_preprocessor.transform(X_test)
+        feature_types = np.asarray(
+            self._covariate_preprocessor._processed_feature_types
+        )
+        original_var_indices = (
+            self._covariate_preprocessor.fetch_original_feature_indices()
+        )
+
         # Standardize the keep variable lists to numeric indices
         if keep_vars_mu is not None:
             if isinstance(keep_vars_mu, list):
@@ -1052,18 +1074,13 @@ def sample(
         else:
             variable_subset_variance = [i for i in range(X_train.shape[1])]
 
-        # Covariate preprocessing
-        self._covariate_preprocessor = CovariatePreprocessor()
-        self._covariate_preprocessor.fit(X_train)
-        X_train_processed = self._covariate_preprocessor.transform(X_train)
-        if X_test is not None:
-            X_test_processed = self._covariate_preprocessor.transform(X_test)
-        feature_types = np.asarray(
-            self._covariate_preprocessor._processed_feature_types
-        )
-        original_var_indices = (
-            self._covariate_preprocessor.fetch_original_feature_indices()
-        )
+        # Set num_features_subsample to default, ncol(X_train), if not already set
+        if num_features_subsample_mu is None:
+            num_features_subsample_mu = X_train.shape[1]
+        if num_features_subsample_tau is None:
+            num_features_subsample_tau = X_train.shape[1]
+        if num_features_subsample_variance is None:
+            num_features_subsample_variance = X_train.shape[1]
 
         # Determine whether a test set is provided
         self.has_test = X_test is not None
@@ -1519,6 +1536,7 @@ def sample(
             leaf_model_type=leaf_model_mu,
             leaf_model_scale=current_leaf_scale_mu,
             cutpoint_grid_size=cutpoint_grid_size,
+            num_features_subsample=num_features_subsample_mu,
         )
         forest_sampler_mu = ForestSampler(
             forest_dataset_train,
@@ -1539,6 +1557,7 @@ def sample(
             leaf_model_type=leaf_model_tau,
             leaf_model_scale=current_leaf_scale_tau,
             cutpoint_grid_size=cutpoint_grid_size,
+            num_features_subsample=num_features_subsample_tau,
         )
         forest_sampler_tau = ForestSampler(
             forest_dataset_train,
@@ -1561,6 +1580,7 @@ def sample(
                 cutpoint_grid_size=cutpoint_grid_size,
                 variance_forest_shape=a_forest,
                 variance_forest_scale=b_forest,
+                num_features_subsample=num_features_subsample_variance,
             )
             forest_sampler_variance = ForestSampler(
                 forest_dataset_train, global_model_config, forest_model_config_variance

From 3a576e11a9c35e2247249c3527e0cee6bbfbb553 Mon Sep 17 00:00:00 2001
From: Drew Herren <drewherrenopensource@gmail.com>
Date: Tue, 10 Jun 2025 14:59:09 -0500
Subject: [PATCH 09/12] Updated feature subsampling benchmark script

---
 .../gfr_feature_subsample_microbenchmark.R    | 20 ++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/tools/perf/gfr_feature_subsample_microbenchmark.R b/tools/perf/gfr_feature_subsample_microbenchmark.R
index fa289a28..7327ca8a 100644
--- a/tools/perf/gfr_feature_subsample_microbenchmark.R
+++ b/tools/perf/gfr_feature_subsample_microbenchmark.R
@@ -3,7 +3,7 @@ library(stochtree)
 library(microbenchmark)
 
 # Generate the data
-n <- 5000
+n <- 1000
 p <- 100
 snr <- 2
 X <- matrix(runif(n*p), ncol = p)
@@ -46,8 +46,8 @@ microbenchmark::microbenchmark(
 )
 
 Rprof()
-stochtree::bart(
-    X_train = X, y_train = y, 
+model_subsampling <- stochtree::bart(
+    X_train = X_train, y_train = y_train, 
     num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
     general_params = general_params, mean_forest_params = mean_params_a
 )
@@ -55,10 +55,20 @@ Rprof(NULL)
 summaryRprof()
 
 Rprof()
-stochtree::bart(
-    X_train = X, y_train = y, 
+model_no_subsampling <- stochtree::bart(
+    X_train = X_train, y_train = y_train, 
     num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
     general_params = general_params, mean_forest_params = mean_params_b
 )
 Rprof(NULL)
 summaryRprof()
+
+# Compare out of sample RMSE of the two models
+y_hat_test_subsampling <- rowMeans(predict(model_subsampling, X = X_test)$y_hat)
+rmse_subsampling <- (
+    sqrt(mean((y_hat_test_subsampling - y_test)^2))
+)
+y_hat_test_no_subsampling <- rowMeans(predict(model_no_subsampling, X = X_test)$y_hat)
+rmse_no_subsampling <- (
+    sqrt(mean((y_hat_test_no_subsampling - y_test)^2))
+)

From 21f2897d75cbc2a5067afb819ad728a077f8d434 Mon Sep 17 00:00:00 2001
From: Drew Herren <drewherrenopensource@gmail.com>
Date: Tue, 10 Jun 2025 15:43:09 -0500
Subject: [PATCH 10/12] Updated benchmarking scripts

---
 .../supervised_learning_feature_subsets.py    | 45 ++++++----
 ...bcf_gfr_feature_subsample_microbenchmark.R | 89 +++++++++++++++++++
 2 files changed, 116 insertions(+), 18 deletions(-)
 create mode 100644 tools/perf/bcf_gfr_feature_subsample_microbenchmark.R

diff --git a/demo/debug/supervised_learning_feature_subsets.py b/demo/debug/supervised_learning_feature_subsets.py
index 9ddbb447..85e26dd1 100644
--- a/demo/debug/supervised_learning_feature_subsets.py
+++ b/demo/debug/supervised_learning_feature_subsets.py
@@ -2,9 +2,6 @@
 
 # Load necessary libraries
 import numpy as np
-import pandas as pd
-import seaborn as sns
-import matplotlib.pyplot as plt
 from stochtree import BARTModel
 from sklearn.model_selection import train_test_split
 import timeit
@@ -16,25 +13,21 @@
 
 # Generate covariates and basis
 n = 1000
-p_X = 100
-X = rng.uniform(0, 1, (n, p_X))
+p = 100
+X = rng.uniform(0, 1, (n, p))
 
 # Define the outcome mean function
 def outcome_mean(X):
-    return np.where(
-        (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5, 
-        np.where(
-            (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5, 
-            np.where(
-                (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5, 
-                7.5
-            )
-        )
+    return (
+        np.sin(4*np.pi*X[:,0]) + np.cos(4*np.pi*X[:,1]) + np.sin(4*np.pi*X[:,2]) + np.cos(4*np.pi*X[:,3])
     )
 
 # Generate outcome
-epsilon = rng.normal(0, 1, n)
-y = outcome_mean(X) + epsilon
+snr = 2
+f_X = outcome_mean(X)
+noise_sd = np.std(f_X) / snr
+epsilon = rng.normal(0, 1, n) * noise_sd
+y = f_X + epsilon
 
 # Test-train split
 sample_inds = np.arange(n)
@@ -50,7 +43,8 @@ def outcome_mean(X):
 forest_config_a = {"num_trees": 100}
 bart_model_a.sample(X_train=X_train, y_train=y_train, X_test=X_test, num_gfr=100, num_mcmc=0, mean_forest_params=forest_config_a)
 """
-print(timeit.timeit(stmt=s, number=5, globals=globals()))
+timing_no_subsampling = timeit.timeit(stmt=s, number=5, globals=globals())
+print(f"Average runtime, without feature subsampling (p = {p:d}): {timing_no_subsampling:.2f}")
 
 # Run XBART with each tree considering random subsets of 5 features
 s = """\
@@ -58,4 +52,19 @@ def outcome_mean(X):
 forest_config_b = {"num_trees": 100, "num_features_subsample": 5}
 bart_model_b.sample(X_train=X_train, y_train=y_train, X_test=X_test, num_gfr=100, num_mcmc=0, mean_forest_params=forest_config_b)
 """
-print(timeit.timeit(stmt=s, number=5, globals=globals()))
+timing_subsampling = timeit.timeit(stmt=s, number=5, globals=globals())
+print(f"Average runtime, subsampling 5 out of {p:d} features: {timing_subsampling:.2f}")
+
+# Compare RMSEs of each model
+bart_model_a = BARTModel()
+forest_config_a = {"num_trees": 100}
+bart_model_a.sample(X_train=X_train, y_train=y_train, X_test=X_test, num_gfr=100, num_mcmc=0, mean_forest_params=forest_config_a)
+bart_model_b = BARTModel()
+forest_config_b = {"num_trees": 100, "num_features_subsample": 5}
+bart_model_b.sample(X_train=X_train, y_train=y_train, X_test=X_test, num_gfr=100, num_mcmc=0, mean_forest_params=forest_config_b)
+y_hat_test_a = np.squeeze(bart_model_a.y_hat_test).mean(axis = 1)
+rmse_no_subsampling = np.sqrt(np.mean(np.power(y_test - y_hat_test_a,2)))
+print(f"Test set RMSE, no subsampling (p = {p:d}): {rmse_no_subsampling:.2f}")
+y_hat_test_b = np.squeeze(bart_model_b.y_hat_test).mean(axis = 1)
+rmse_subsampling = np.sqrt(np.mean(np.power(y_test - y_hat_test_b,2)))
+print(f"Test set RMSE, subsampling 5 out of {p:d} features: {rmse_subsampling:.2f}")
diff --git a/tools/perf/bcf_gfr_feature_subsample_microbenchmark.R b/tools/perf/bcf_gfr_feature_subsample_microbenchmark.R
new file mode 100644
index 00000000..21c239b9
--- /dev/null
+++ b/tools/perf/bcf_gfr_feature_subsample_microbenchmark.R
@@ -0,0 +1,89 @@
+# Load libraries
+library(stochtree)
+library(microbenchmark)
+
+# Generate the data
+n <- 1000
+p <- 100
+snr <- 2
+X <- matrix(rnorm(n*p), ncol = p)
+mu_x <- 1 + 2*X[,1] - 4*(X[,2] < 0) + 4*(X[,2] >= 0) + 3*(abs(X[,3]) - sqrt(2/pi))
+tau_x <- 1 + 2*X[,4]
+u <- runif(n)
+pi_x <- ((mu_x-1)/4) + 4*(u-0.5)
+Z <- pi_x + rnorm(n,0,1)
+E_XZ <- mu_x + Z*tau_x
+noise_sd <- sd(E_XZ) / snr
+y <- E_XZ + rnorm(n, 0, 1)*noise_sd
+
+# Split data into test and train sets
+test_set_pct <- 0.2
+n_test <- round(test_set_pct*n)
+n_train <- n - n_test
+test_inds <- sort(sample(1:n, n_test, replace = FALSE))
+train_inds <- (1:n)[!((1:n) %in% test_inds)]
+X_test <- X[test_inds,]
+X_train <- X[train_inds,]
+Z_test <- Z[test_inds]
+Z_train <- Z[train_inds]
+pi_x_test <- pi_x[test_inds]
+pi_x_train <- pi_x[train_inds]
+y_test <- y[test_inds]
+y_train <- y[train_inds]
+
+# Sampler settings
+num_gfr <- 100
+num_burnin <- 0
+num_mcmc <- 0
+general_params <- list(sample_sigma2_global = T)
+prog_params_a <- list(num_trees = 100, num_features_subsample = 5)
+trt_params_a <- list(num_trees = 100, num_features_subsample = 5)
+prog_params_b <- list(num_trees = 50)
+trt_params_b <- list(num_trees = 50)
+
+# Benchmark sampler with and without feature subsampling
+microbenchmark::microbenchmark(
+    stochtree::bcf(
+        X_train = X, Z_train = Z, propensity_train = pi_x, y_train = y, 
+        num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
+        general_params = general_params, prognostic_forest_params = prog_params_a, 
+        treatment_effect_forest_params = trt_params_a
+    ), 
+    stochtree::bcf(
+        X_train = X, Z_train = Z, propensity_train = pi_x, y_train = y, 
+        num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
+        general_params = general_params, prognostic_forest_params = prog_params_b, 
+        treatment_effect_forest_params = trt_params_b
+    ), 
+    times = 5
+)
+
+Rprof()
+model_subsampling <- stochtree::bcf(
+    X_train = X_train, Z_train = Z_train, propensity_train = pi_x_train, y_train = y_train, 
+    num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
+    general_params = general_params, prognostic_forest_params = prog_params_a, 
+    treatment_effect_forest_params = trt_params_a
+)
+Rprof(NULL)
+summaryRprof()
+
+Rprof()
+model_no_subsampling <- stochtree::bcf(
+    X_train = X_train, Z_train = Z_train, propensity_train = pi_x_train, y_train = y_train, 
+    num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
+    general_params = general_params, prognostic_forest_params = prog_params_b, 
+    treatment_effect_forest_params = trt_params_b
+)
+Rprof(NULL)
+summaryRprof()
+
+# Compare out of sample RMSE of the two models
+y_hat_test_subsampling <- rowMeans(predict(model_subsampling, X = X_test, Z = Z_test, propensity = pi_x_test)$y_hat)
+rmse_subsampling <- (
+    sqrt(mean((y_hat_test_subsampling - y_test)^2))
+)
+y_hat_test_no_subsampling <- rowMeans(predict(model_no_subsampling, X = X_test, Z = Z_test, propensity = pi_x_test)$y_hat)
+rmse_no_subsampling <- (
+    sqrt(mean((y_hat_test_no_subsampling - y_test)^2))
+)

From e9953002bd6b4709ee2cbb3d9e26495edccd1e81 Mon Sep 17 00:00:00 2001
From: Drew Herren <drewherrenopensource@gmail.com>
Date: Tue, 10 Jun 2025 16:02:55 -0500
Subject: [PATCH 11/12] Added python demo and benchmark script

---
 .../debug/causal_inference_feature_subsets.py | 76 +++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 demo/debug/causal_inference_feature_subsets.py

diff --git a/demo/debug/causal_inference_feature_subsets.py b/demo/debug/causal_inference_feature_subsets.py
new file mode 100644
index 00000000..00cab8b8
--- /dev/null
+++ b/demo/debug/causal_inference_feature_subsets.py
@@ -0,0 +1,76 @@
+# Supervised Learning Demo Script
+
+# Load necessary libraries
+import numpy as np
+from stochtree import BCFModel
+from sklearn.model_selection import train_test_split
+import timeit
+
+# Generate sample data
+# RNG
+random_seed = 1234
+rng = np.random.default_rng(random_seed)
+
+# Generate covariates and basis
+n = 1000
+p = 100
+X = rng.normal(0, 1, (n, p))
+
+# Generate outcome
+snr = 2
+mu_x = 1 + 2*X[:,0] + np.where(X[:,1] < 0, -4, 4) + 3*(np.abs(X[:,2]) - np.sqrt(2/np.pi))
+tau_x = 1 + 2*X[:,3]
+u = rng.uniform(0, 1, n)
+pi_x = ((mu_x-1.)/4.) + 4*(u-0.5)
+Z = pi_x + rng.normal(0, 1, n)
+E_XZ = mu_x + Z*tau_x
+noise_sd = np.std(E_XZ) / snr
+y = E_XZ + rng.normal(0, 1, n)*noise_sd
+
+# Test-train split
+sample_inds = np.arange(n)
+train_inds, test_inds = train_test_split(sample_inds, test_size=0.2)
+X_train = X[train_inds,:]
+X_test = X[test_inds,:]
+pi_x_train = pi_x[train_inds]
+pi_x_test = pi_x[test_inds]
+Z_train = Z[train_inds]
+Z_test = Z[test_inds]
+y_train = y[train_inds]
+y_test = y[test_inds]
+
+# Run XBART with the full feature set
+s = """\
+bcf_model_a = BCFModel()
+prog_forest_config_a = {"num_trees": 100}
+trt_forest_config_a = {"num_trees": 50}
+bcf_model_a.sample(X_train=X_train, Z_train=Z_train, pi_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, pi_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_a, treatment_effect_forest_params=trt_forest_config_a)
+"""
+timing_no_subsampling = timeit.timeit(stmt=s, number=5, globals=globals())
+print(f"Average runtime, without feature subsampling (p = {p:d}): {timing_no_subsampling:.2f}")
+
+# Run XBART with each tree considering random subsets of 5 features
+s = """\
+bcf_model_b = BCFModel()
+prog_forest_config_b = {"num_trees": 100, "num_features_subsample": 5}
+trt_forest_config_b = {"num_trees": 50, "num_features_subsample": 5}
+bcf_model_b.sample(X_train=X_train, Z_train=Z_train, pi_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, pi_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_b, treatment_effect_forest_params=trt_forest_config_b)
+"""
+timing_subsampling = timeit.timeit(stmt=s, number=5, globals=globals())
+print(f"Average runtime, subsampling 5 out of {p:d} features: {timing_subsampling:.2f}")
+
+# Compare RMSEs of each model
+bcf_model_a = BCFModel()
+prog_forest_config_a = {"num_trees": 100}
+trt_forest_config_a = {"num_trees": 50}
+bcf_model_a.sample(X_train=X_train, Z_train=Z_train, pi_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, pi_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_a, treatment_effect_forest_params=trt_forest_config_a)
+bcf_model_b = BCFModel()
+prog_forest_config_b = {"num_trees": 100, "num_features_subsample": 5}
+trt_forest_config_b = {"num_trees": 50, "num_features_subsample": 5}
+bcf_model_b.sample(X_train=X_train, Z_train=Z_train, pi_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, pi_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_b, treatment_effect_forest_params=trt_forest_config_b)
+y_hat_test_a = np.squeeze(bcf_model_a.y_hat_test).mean(axis = 1)
+rmse_no_subsampling = np.sqrt(np.mean(np.power(y_test - y_hat_test_a,2)))
+print(f"Test set RMSE, no subsampling (p = {p:d}): {rmse_no_subsampling:.2f}")
+y_hat_test_b = np.squeeze(bcf_model_b.y_hat_test).mean(axis = 1)
+rmse_subsampling = np.sqrt(np.mean(np.power(y_test - y_hat_test_b,2)))
+print(f"Test set RMSE, subsampling 5 out of {p:d} features: {rmse_subsampling:.2f}")

From ee95a2f23a9091fef1998d1cd463bb627de72d87 Mon Sep 17 00:00:00 2001
From: Drew Herren <drewherrenopensource@gmail.com>
Date: Tue, 10 Jun 2025 16:18:00 -0500
Subject: [PATCH 12/12] Updated C++ unit tests and standalone program

---
 debug/api_debug.cpp     |  9 +++++----
 test/cpp/test_model.cpp | 28 ++++++++++++++++++++--------
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/debug/api_debug.cpp b/debug/api_debug.cpp
index f31e8739..8958bd93 100644
--- a/debug/api_debug.cpp
+++ b/debug/api_debug.cpp
@@ -668,6 +668,7 @@ void RunDebug(int dgp_num = 0, const ModelType model_type = kConstantLeafGaussia
 
   // Prepare the samplers
   LeafModelVariant leaf_model = leafModelFactory(model_type, leaf_scale, leaf_scale_matrix, a_forest, b_forest);
+  int num_features_subsample = x_cols;
 
   // Initialize vector of sweep update indices
   std::vector<int> sweep_indices(num_trees);
@@ -687,13 +688,13 @@ void RunDebug(int dgp_num = 0, const ModelType model_type = kConstantLeafGaussia
 
       // Sample tree ensemble
       if (model_type == ModelType::kConstantLeafGaussian) {
-        GFRSampleOneIter<GaussianConstantLeafModel, GaussianConstantSuffStat>(active_forest, tracker, forest_samples, std::get<GaussianConstantLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, true);
+        GFRSampleOneIter<GaussianConstantLeafModel, GaussianConstantSuffStat>(active_forest, tracker, forest_samples, std::get<GaussianConstantLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, true, num_features_subsample);
       } else if (model_type == ModelType::kUnivariateRegressionLeafGaussian) {
-        GFRSampleOneIter<GaussianUnivariateRegressionLeafModel, GaussianUnivariateRegressionSuffStat>(active_forest, tracker, forest_samples, std::get<GaussianUnivariateRegressionLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, true);
+        GFRSampleOneIter<GaussianUnivariateRegressionLeafModel, GaussianUnivariateRegressionSuffStat>(active_forest, tracker, forest_samples, std::get<GaussianUnivariateRegressionLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, true, num_features_subsample);
       } else if (model_type == ModelType::kMultivariateRegressionLeafGaussian) {
-        GFRSampleOneIter<GaussianMultivariateRegressionLeafModel, GaussianMultivariateRegressionSuffStat, int>(active_forest, tracker, forest_samples, std::get<GaussianMultivariateRegressionLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, true, omega_cols);
+        GFRSampleOneIter<GaussianMultivariateRegressionLeafModel, GaussianMultivariateRegressionSuffStat, int>(active_forest, tracker, forest_samples, std::get<GaussianMultivariateRegressionLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, true, num_features_subsample, omega_cols);
       } else if (model_type == ModelType::kLogLinearVariance) {
-        GFRSampleOneIter<LogLinearVarianceLeafModel, LogLinearVarianceSuffStat>(active_forest, tracker, forest_samples, std::get<LogLinearVarianceLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, false);
+        GFRSampleOneIter<LogLinearVarianceLeafModel, LogLinearVarianceSuffStat>(active_forest, tracker, forest_samples, std::get<LogLinearVarianceLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, false, num_features_subsample);
       }
 
       if (rfx_included) {
diff --git a/test/cpp/test_model.cpp b/test/cpp/test_model.cpp
index 0e729bef..dbe85b7e 100644
--- a/test/cpp/test_model.cpp
+++ b/test/cpp/test_model.cpp
@@ -15,6 +15,9 @@ TEST(LeafConstantModel, FullEnumeration) {
   test_dataset = StochTree::TestUtils::LoadSmallDatasetUnivariateBasis();
   std::vector<StochTree::FeatureType> feature_types(test_dataset.x_cols, StochTree::FeatureType::kNumeric);
   std::vector<double> variable_weights(test_dataset.x_cols, 1./test_dataset.x_cols);
+  std::vector<bool> feature_subset(test_dataset.x_cols, true);
+  std::random_device rd;
+  std::mt19937 gen(rd());
 
   // Construct datasets
   using data_size_t = StochTree::data_size_t;
@@ -51,8 +54,8 @@ TEST(LeafConstantModel, FullEnumeration) {
 
   // Evaluate all possible cutpoints
   StochTree::EvaluateAllPossibleSplits<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(
-    dataset, tracker, residual, tree_prior, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
-    cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types
+    dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
+    cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types, feature_subset
   );
 
   // Check that there are (n - 2*min_samples_leaf + 1)*p + 1 cutpoints considered
@@ -74,6 +77,9 @@ TEST(LeafConstantModel, CutpointThinning) {
   test_dataset = StochTree::TestUtils::LoadSmallDatasetUnivariateBasis();
   std::vector<StochTree::FeatureType> feature_types(test_dataset.x_cols, StochTree::FeatureType::kNumeric);
   std::vector<double> variable_weights(test_dataset.x_cols, 1./test_dataset.x_cols);
+  std::vector<bool> feature_subset(test_dataset.x_cols, true);
+  std::random_device rd;
+  std::mt19937 gen(rd());
 
   // Construct datasets
   using data_size_t = StochTree::data_size_t;
@@ -110,8 +116,8 @@ TEST(LeafConstantModel, CutpointThinning) {
 
   // Evaluate all possible cutpoints
   StochTree::EvaluateAllPossibleSplits<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(
-    dataset, tracker, residual, tree_prior, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
-    cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types
+    dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
+    cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types, feature_subset
   );
 
   // Check that there are (n - 2*min_samples_leaf + 1)*p + 1 cutpoints considered
@@ -132,6 +138,9 @@ TEST(LeafUnivariateRegressionModel, FullEnumeration) {
   test_dataset = StochTree::TestUtils::LoadSmallDatasetUnivariateBasis();
   std::vector<StochTree::FeatureType> feature_types(test_dataset.x_cols, StochTree::FeatureType::kNumeric);
   std::vector<double> variable_weights(test_dataset.x_cols, 1./test_dataset.x_cols);
+  std::vector<bool> feature_subset(test_dataset.x_cols, true);
+  std::random_device rd;
+  std::mt19937 gen(rd());
 
   // Construct datasets
   using data_size_t = StochTree::data_size_t;
@@ -169,8 +178,8 @@ TEST(LeafUnivariateRegressionModel, FullEnumeration) {
 
   // Evaluate all possible cutpoints
   StochTree::EvaluateAllPossibleSplits<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(
-    dataset, tracker, residual, tree_prior, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
-    cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types
+    dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
+    cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types, feature_subset
   );
 
   // Check that there are (n - 2*min_samples_leaf + 1)*p + 1 cutpoints considered
@@ -192,6 +201,9 @@ TEST(LeafUnivariateRegressionModel, CutpointThinning) {
   test_dataset = StochTree::TestUtils::LoadSmallDatasetUnivariateBasis();
   std::vector<StochTree::FeatureType> feature_types(test_dataset.x_cols, StochTree::FeatureType::kNumeric);
   std::vector<double> variable_weights(test_dataset.x_cols, 1./test_dataset.x_cols);
+  std::vector<bool> feature_subset(test_dataset.x_cols, true);
+  std::random_device rd;
+  std::mt19937 gen(rd());
 
   // Construct datasets
   using data_size_t = StochTree::data_size_t;
@@ -229,8 +241,8 @@ TEST(LeafUnivariateRegressionModel, CutpointThinning) {
 
   // Evaluate all possible cutpoints
   StochTree::EvaluateAllPossibleSplits<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(
-    dataset, tracker, residual, tree_prior, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
-    cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types
+    dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
+    cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types, feature_subset
   );