# FAST-based feature filtering demo

This notebook demonstrates internal FAST-based feature filtering for factual explanations.

The core idea:
- Run an internal FAST pass on the same batch to obtain per-instance feature weights.
- Aggregate those weights and keep only the top-k most important features for the batch.
- Run the full factual explanation on this reduced feature set, reducing compute cost.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from calibrated_explanations import WrapCalibratedExplainer
from calibrated_explanations.api.config import ExplainerBuilder


In [20]:
# Generate a simple classification dataset with many features
X, y = make_classification(
    n_samples=2000,
    n_features=2000,
    n_informative=5,
    n_redundant=5,
    random_state=42,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=100, random_state=42)
X_train_proper, X_cal, y_train_proper, y_cal = train_test_split(X_train, y_train, test_size=200, random_state=42)

learner = RandomForestClassifier(n_estimators=50, random_state=42)
learner.fit(X_train_proper, y_train_proper)

0,1,2
,n_estimators,50
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [21]:
# Build an ExplainerConfig with internal feature filtering enabled
builder = ExplainerBuilder(learner)
config = (
    builder
    .task("classification")
    .perf_parallel(True, backend="threads", workers=4, granularity="feature")
    .perf_feature_filter(True, per_instance_top_k=5)
    .build_config()
)

# Construct the wrapper from the config
wrapper = WrapCalibratedExplainer._from_config(config)

# Calibrate the explainer (perf cache/parallel + feature filter are wired internally)
wrapper.calibrate(X_cal, y_cal)
explainer = wrapper.explainer
explainer

CalibratedExplainer(mode=classification, learner=RandomForestClassifier(n_estimators=50, random_state=42))

In [22]:
# Run factual explanations with internal FAST-based feature filtering enabled
explanations = wrapper.explain_factual(X_test)


  batch = plugin.explain_batch(x, request)


ConfigurationError: Explanation plugin execution failed for mode 'factual': Unable to allocate 15.3 MiB for an array with shape (1000, 2000) and data type float64

In [None]:
for e in explanations[:5]:
    e.plot(uncertainty=True)

In [None]:
# Inspect how many features remain after filtering
num_features = explainer.num_features
ignored = set(explanations.features_to_ignore)
kept = [f for f in range(num_features) if f not in ignored]
num_features, len(kept), kept

(200,
 59,
 [4,
  5,
  12,
  14,
  15,
  18,
  20,
  24,
  33,
  36,
  37,
  40,
  41,
  45,
  51,
  54,
  57,
  62,
  63,
  67,
  71,
  72,
  81,
  83,
  84,
  86,
  92,
  97,
  100,
  105,
  107,
  108,
  112,
  113,
  116,
  119,
  126,
  131,
  132,
  133,
  139,
  140,
  144,
  146,
  148,
  154,
  157,
  160,
  162,
  168,
  175,
  176,
  178,
  182,
  183,
  185,
  193,
  196,
  198])

The length of `kept` should be less than or equal to `per_instance_top_k` passed to `perf_feature_filter`.

You can tweak `per_instance_top_k` and rerun the notebook to see how the effective feature budget changes.

In [None]:
# Construct the wrapper from the config
wrapper_all = WrapCalibratedExplainer(learner)

# Calibrate the explainer (perf cache/parallel + feature filter are wired internally)
wrapper_all.calibrate(X_cal, y_cal)
explainer = wrapper_all.explainer


In [None]:
explanations_all = wrapper_all.explain_factual(X_test)
explanations_all.features_to_ignore

array([], dtype=int32)