From 9b659b4ca59760fdbe2ec3d8b9af58d9543c7b31 Mon Sep 17 00:00:00 2001 From: Newcoderorigin Date: Fri, 3 Oct 2025 20:40:22 -0500 Subject: [PATCH 1/3] Ensure calibration artifacts write to existing directories --- toptek/core/model.py | 250 ++++++++++++++++++++++++++++++++++++++++-- toptek/gui/widgets.py | 109 +++++++++++++++++- toptek/main.py | 20 +++- 3 files changed, 363 insertions(+), 16 deletions(-) diff --git a/toptek/core/model.py b/toptek/core/model.py index a728551..ded2ae2 100644 --- a/toptek/core/model.py +++ b/toptek/core/model.py @@ -1,9 +1,23 @@ -"""Simple machine-learning helpers for classification models.""" +"""Simple machine-learning helpers for classification models. + +This module offers light-weight wrappers around scikit-learn estimators to +standardise the training and calibration workflows used by the Toptek GUI and +CLI tools. + +Example +------- +>>> import numpy as np +>>> from pathlib import Path +>>> X = np.random.randn(200, 6) +>>> y = (X[:, 0] > 0).astype(int) +>>> result = train_classifier(X, y, models_dir=Path("models")) +>>> _ = calibrate_classifier(result.model_path, (X[:40], y[:40])) +""" from __future__ import annotations import pickle -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import Dict, Tuple @@ -13,15 +27,39 @@ from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, roc_auc_score from sklearn.model_selection import train_test_split +from sklearn.impute import SimpleImputer +from sklearn.pipeline import Pipeline @dataclass class TrainResult: - """Container for training outcomes.""" + """Container for training outcomes. + + Attributes + ---------- + model_path: + Filesystem location of the persisted estimator pipeline. + metrics: + Dictionary containing evaluation metrics computed on the validation + split. + threshold: + Decision threshold used when deriving discrete class predictions from + probabilities. + preprocessing: + Summary statistics describing how the feature matrix was sanitised. + retained_columns: + Tuple of retained column indices relative to the original feature + matrix. ``None`` when no trimming occurred. + original_feature_count: + Column count observed before preprocessing. + """ model_path: Path metrics: Dict[str, float] threshold: float + preprocessing: Dict[str, int] = field(default_factory=dict) + retained_columns: tuple[int, ...] | None = None + original_feature_count: int | None = None def train_classifier( @@ -32,19 +70,109 @@ def train_classifier( models_dir: Path, threshold: float = 0.65, ) -> TrainResult: - """Train a basic classifier and persist it to ``models_dir``.""" + """Train a basic classifier and persist it to ``models_dir``. + + Parameters + ---------- + X: + Feature matrix to train on. The routine casts the payload to ``float`` and + sanitises non-finite entries prior to fitting. + y: + Target labels associated with ``X``. The labels must contain at least two + distinct classes. + model_type: + Name of the estimator to fit (``"logistic"`` or ``"gbm"``). + models_dir: + Directory where the fitted pipeline should be persisted. + threshold: + Probability threshold for translating predictions into class labels when + deriving simple metrics. + + Returns + ------- + TrainResult + Metadata about the persisted model, including preprocessing telemetry. + + Raises + ------ + ValueError + If the feature matrix is not two-dimensional, lacks usable rows or columns + after cleaning, contains invalid target values, or the target labels collapse + into a single class. + + Example + ------- + >>> import numpy as np + >>> from pathlib import Path + >>> X = np.random.rand(120, 4) + >>> y = (X[:, 0] > 0.5).astype(int) + >>> train_classifier(X, y, models_dir=Path("models")) + TrainResult(...) + """ + + if X.ndim != 2: + raise ValueError("Feature matrix must be 2-dimensional") + + X = np.asarray(X, dtype=float) + y = np.asarray(y).ravel() + + original_feature_count = int(X.shape[1]) + + if not np.isfinite(y).all(): + raise ValueError( + "Target labels contain NaN or inf values; clean the labels before training" + ) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42) + non_finite_mask = ~np.isfinite(X) + imputed_cells = int(non_finite_mask.sum()) + if imputed_cells: + X = X.copy() + X[non_finite_mask] = np.nan + + row_all_nan = np.isnan(X).all(axis=1) + dropped_rows = int(row_all_nan.sum()) + if dropped_rows: + X = X[~row_all_nan] + y = y[~row_all_nan] + + if X.size == 0: + raise ValueError("No valid feature rows remain after removing all-NaN rows") + + col_all_nan = np.isnan(X).all(axis=0) + dropped_columns = int(col_all_nan.sum()) + retained_columns: tuple[int, ...] | None = None + if dropped_columns: + valid_column_mask = ~col_all_nan + X = X[:, valid_column_mask] + if X.shape[1] == 0: + raise ValueError("All feature columns were empty after cleaning; cannot train") + retained_columns = tuple(int(idx) for idx in np.flatnonzero(valid_column_mask)) + else: + retained_columns = None + + if np.unique(y).size < 2: + raise ValueError("Training requires at least two target classes") + + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, shuffle=True, random_state=42 + ) if model_type == "logistic": - model = LogisticRegression(max_iter=1000) + classifier = LogisticRegression(max_iter=1000) elif model_type == "gbm": - model = GradientBoostingClassifier() + classifier = GradientBoostingClassifier() else: raise ValueError("Unknown model type") - model.fit(X_train, y_train) - proba = model.predict_proba(X_test)[:, 1] + pipeline = Pipeline( + [ + ("imputer", SimpleImputer(strategy="median")), + ("classifier", classifier), + ] + ) + + pipeline.fit(X_train, y_train) + proba = pipeline.predict_proba(X_test)[:, 1] preds = (proba >= threshold).astype(int) metrics = { "accuracy": float(accuracy_score(y_test, preds)), @@ -53,8 +181,19 @@ def train_classifier( models_dir.mkdir(parents=True, exist_ok=True) model_path = models_dir / f"{model_type}_model.pkl" with model_path.open("wb") as handle: - pickle.dump(model, handle) - return TrainResult(model_path=model_path, metrics=metrics, threshold=threshold) + pickle.dump(pipeline, handle) + return TrainResult( + model_path=model_path, + metrics=metrics, + threshold=threshold, + preprocessing={ + "imputed_cells": imputed_cells, + "dropped_rows": dropped_rows, + "dropped_columns": dropped_columns, + }, + retained_columns=retained_columns, + original_feature_count=original_feature_count, + ) def load_model(model_path: Path): @@ -70,6 +209,8 @@ def calibrate_classifier( *, method: str = "sigmoid", output_path: Path | None = None, + feature_mask: tuple[int, ...] | np.ndarray | None = None, + original_feature_count: int | None = None, ) -> Path: """Calibrate a pre-trained classifier using hold-out data. @@ -84,17 +225,104 @@ def calibrate_classifier( output_path: Optional override for where the calibrated model should be persisted. + feature_mask: + Optional tuple of retained column indices from the original feature matrix. + When provided, the calibration features will be subset or reordered to + match the training-time dimensionality. + original_feature_count: + Number of columns in the original training feature matrix. Used to + determine whether the calibration payload still contains the untouched + feature space or has already been trimmed. + Returns ------- Path Filesystem path to the calibrated model artefact. + + Raises + ------ + ValueError + If the calibration payload is malformed, references invalid feature + indices, or lacks class diversity. + + Example + ------- + >>> from pathlib import Path + >>> import numpy as np + >>> model_path = Path("models/logistic_model.pkl") + >>> X_cal = np.random.rand(50, 4) + >>> y_cal = (X_cal[:, 0] > 0.5).astype(int) + >>> calibrate_classifier(model_path, (X_cal, y_cal)) + PosixPath('models/logistic_model_calibrated.pkl') """ X_cal, y_cal = calibration_data + X_cal = np.asarray(X_cal, dtype=float) + y_cal = np.asarray(y_cal).ravel() + + if X_cal.ndim != 2: + raise ValueError("Calibration feature matrix must be 2-dimensional") + + if not np.isfinite(y_cal).all(): + raise ValueError( + "Calibration labels contain NaN or inf values; clean the labels before calibrating" + ) + + if feature_mask is not None: + indices = np.asarray(feature_mask, dtype=int) + if indices.ndim != 1: + raise ValueError("Feature mask must be a 1-D sequence of column indices") + if indices.size == 0: + raise ValueError("Feature mask is empty; cannot realign calibration features") + if (indices < 0).any(): + raise ValueError("Feature mask cannot include negative column indices") + if not np.all(np.diff(indices) >= 0): + raise ValueError("Feature mask must be sorted in ascending order") + if np.unique(indices).size != indices.size: + raise ValueError("Feature mask contains duplicate column indices") + + max_index = int(indices.max()) + if original_feature_count is None or original_feature_count == X_cal.shape[1]: + if max_index >= X_cal.shape[1]: + raise ValueError( + "Feature mask references columns beyond the calibration matrix bounds" + ) + X_cal = X_cal[:, indices] + elif X_cal.shape[1] == indices.size: + # Calibration payload already trimmed to the retained columns. We assume the + # supplied feature order already matches the mask order since we no longer + # have the dropped columns to cross-check against. + pass + else: + raise ValueError( + "Calibration payload has unexpected dimensionality relative to the training mask" + ) + + non_finite = ~np.isfinite(X_cal) + if non_finite.any(): + X_cal = X_cal.copy() + X_cal[non_finite] = np.nan + + row_all_nan = np.isnan(X_cal).all(axis=1) + if row_all_nan.any(): + X_cal = X_cal[~row_all_nan] + y_cal = y_cal[~row_all_nan] + if X_cal.size == 0: + raise ValueError("No valid calibration rows remain after cleaning") + + if np.unique(y_cal).size < 2: + raise ValueError("Calibration requires at least two target classes") + pipeline = load_model(model_path) + expected_features = getattr(pipeline, "n_features_in_", None) + if expected_features is not None and X_cal.shape[1] != expected_features: + raise ValueError( + "Calibration feature matrix shape does not match the fitted model" + ) calibrator = CalibratedClassifierCV(estimator=pipeline, method=method, cv="prefit") calibrator.fit(X_cal, y_cal) target_path = output_path or model_path.with_name(f"{model_path.stem}_calibrated.pkl") + target_path.parent.mkdir(parents=True, exist_ok=True) with target_path.open("wb") as handle: pickle.dump(calibrator, handle) return target_path diff --git a/toptek/gui/widgets.py b/toptek/gui/widgets.py index bb509bf..3659a3d 100644 --- a/toptek/gui/widgets.py +++ b/toptek/gui/widgets.py @@ -227,17 +227,83 @@ def _train_model(self) -> None: feat_map = features.compute_features(df) X = np.column_stack(list(feat_map.values())) y = (np.diff(df["close"], prepend=df["close"].iloc[0]) > 0).astype(int) - result = model.train_classifier(X, y, model_type=self.model_type.get(), models_dir=self.paths.models) + + unique_labels = np.unique(y) + if unique_labels.size < 2: + self.status.config( + text="Training aborted: target labels lack class diversity.", foreground="#b91c1c" + ) + messagebox.showwarning( + "Training warning", + ( + "Training requires at least two classes after cleaning the dataset.\n" + "Collect more data or adjust preprocessing to obtain both up and down samples." + ), + ) + return + + try: + result = model.train_classifier( + X, y, model_type=self.model_type.get(), models_dir=self.paths.models + ) + except ValueError as exc: + self.status.config( + text="Training failed due to invalid feature matrix. Review warnings.", + foreground="#b91c1c", + ) + messagebox.showwarning( + "Training warning", + ( + "The classifier could not be trained with the current dataset.\n\n" + f"Details: {exc}" + ), + ) + self.log_event(f"Model training failed: {exc}", level="error") + return + preprocessing = result.preprocessing or {} + prep_notes = [] + if preprocessing.get("imputed_cells"): + prep_notes.append( + f"imputed {preprocessing['imputed_cells']} feature values" + ) + if preprocessing.get("dropped_rows"): + prep_notes.append( + f"dropped {preprocessing['dropped_rows']} all-NaN rows" + ) + if preprocessing.get("dropped_columns"): + prep_notes.append( + f"removed {preprocessing['dropped_columns']} empty columns" + ) + if result.retained_columns is not None and result.original_feature_count is not None: + prep_notes.append( + f"retained {len(result.retained_columns)} of {result.original_feature_count} feature columns" + ) + if prep_notes: + self.log_event( + "Preprocessing summary: " + ", ".join(prep_notes), + level="info", + ) calibrate_report = "skipped" + calibration_detail: str | None = None calibration_failed = False if self.calibrate_var.get() and len(X) > 60: cal_size = max(60, int(len(X) * 0.2)) X_cal = X[-cal_size:] y_cal = y[-cal_size:] + calibrate_kwargs = {} + if result.retained_columns is not None: + calibrate_kwargs["feature_mask"] = result.retained_columns + if result.original_feature_count is not None: + calibrate_kwargs["original_feature_count"] = result.original_feature_count try: - calibrated_path = model.calibrate_classifier(result.model_path, (X_cal, y_cal)) + calibrated_path = model.calibrate_classifier( + result.model_path, + (X_cal, y_cal), + **calibrate_kwargs, + ) except (ValueError, RuntimeError) as exc: calibrate_report = f"calibration failed: {exc}" + calibration_detail = calibrate_report calibration_failed = True self.log_event( f"Calibration failed for {result.model_path.name}: {exc}", @@ -257,16 +323,22 @@ def _train_model(self) -> None: ) else: calibrate_report = f"calibrated → {calibrated_path.name}" + calibration_detail = calibrate_report self.log_event( f"Calibration completed for {result.model_path.name} → {calibrated_path.name}", level="info", ) + calibrate_value = calibrate_report if not calibration_failed else "skipped" self.output.delete("1.0", tk.END) payload = { "model": self.model_type.get(), "metrics": result.metrics, "threshold": result.threshold, - "calibration": calibrate_report, + "preprocessing": preprocessing, + "retained_columns": list(result.retained_columns) if result.retained_columns is not None else None, + "original_feature_count": result.original_feature_count, + "calibration": calibrate_value, + "calibration_detail": calibration_detail, } self.output.insert(tk.END, json_dumps(payload)) self.update_section("training", payload) @@ -350,6 +422,7 @@ class TradeTab(BaseTab): def __init__(self, master: ttk.Notebook, configs: Dict[str, Dict[str, object]], paths: utils.AppPaths) -> None: self.guard_status = tk.StringVar(value="Topstep Guard: pending review") + self.guard_label: ttk.Label | None = None super().__init__(master, configs, paths) self._build() @@ -366,7 +439,8 @@ def _build(self) -> None: justify=tk.LEFT, ).pack(anchor=tk.W) - ttk.Label(intro, textvariable=self.guard_status, foreground="#1d4ed8").pack(anchor=tk.W, pady=(8, 0)) + self.guard_label = ttk.Label(intro, textvariable=self.guard_status, foreground="#1d4ed8") + self.guard_label.pack(anchor=tk.W, pady=(8, 0)) ttk.Button(self, text="Refresh Topstep guard", command=self._show_risk).pack(pady=(6, 0)) self.output = tk.Text(self, height=12) @@ -378,6 +452,13 @@ def _build(self) -> None: ) def _show_risk(self) -> None: + """Refresh the Topstep guard summary and surface a contextual dialog. + + The check recalculates a sample position size using the configured + Topstep profile, updates the guard label colouring, and displays either + an informational or warning dialog depending on whether the guard + remains in ``OK`` or moves into ``DEFENSIVE_MODE``. + """ profile = risk.RiskProfile( max_position_size=self.configs["risk"].get("max_position_size", 1), max_daily_loss=self.configs["risk"].get("max_daily_loss", 1000), @@ -389,6 +470,9 @@ def _show_risk(self) -> None: sample_size = risk.position_size(50000, profile, atr=3.5, tick_value=12.5, risk_per_trade=0.01) guard = "OK" if sample_size > 0 else "DEFENSIVE_MODE" self.guard_status.set(f"Topstep Guard: {guard}") + if self.guard_label is not None: + colour = "#15803d" if guard == "OK" else "#b91c1c" + self.guard_label.configure(foreground=colour) payload = { "profile": profile.__dict__, "suggested_contracts": sample_size, @@ -397,11 +481,28 @@ def _show_risk(self) -> None: "losses": profile.cooldown_losses, "minutes": profile.cooldown_minutes, }, + "topstep_guard": guard, "next_steps": "If guard shows DEFENSIVE_MODE, stand down and review journal before trading.", } self.output.delete("1.0", tk.END) self.output.insert(tk.END, json_dumps(payload)) + guard_message = ( + "Topstep guard assessment completed.\n\n" + f"Suggested contracts: {sample_size}.\n" + f"Daily loss cap: ${profile.max_daily_loss}.\n" + "Cooldown policy: " + f"{profile.cooldown_losses} losses → wait {profile.cooldown_minutes} minutes." + ) + + if guard == "OK": + messagebox.showinfo("Topstep Guard", guard_message) + else: + warning_message = ( + f"{guard_message}\n\nDEFENSIVE_MODE active. Stand down and review your journal before trading." + ) + messagebox.showwarning("Topstep Guard", warning_message) + __all__ = [ "LoginTab", diff --git a/toptek/main.py b/toptek/main.py index 53fb60d..fe0c683 100644 --- a/toptek/main.py +++ b/toptek/main.py @@ -43,7 +43,25 @@ def run_cli(args: argparse.Namespace, configs: Dict[str, Dict[str, object]], pat y = (np.diff(df["close"], prepend=df["close"].iloc[0]) > 0).astype(int) if args.cli == "train": - result = model.train_classifier(X, y, model_type=args.model, models_dir=paths.models) + try: + result = model.train_classifier(X, y, model_type=args.model, models_dir=paths.models) + except ValueError as exc: + logger.error("Training failed: %s", exc) + return + preprocess = result.preprocessing or {} + if preprocess: + logger.info( + "Preprocessing summary: imputed=%s dropped_rows=%s dropped_columns=%s", + preprocess.get("imputed_cells", 0), + preprocess.get("dropped_rows", 0), + preprocess.get("dropped_columns", 0), + ) + if result.retained_columns is not None and result.original_feature_count is not None: + logger.info( + "Retained %s of %s feature columns after cleaning", + len(result.retained_columns), + result.original_feature_count, + ) logger.info("Training complete: metrics=%s threshold=%.2f", result.metrics, result.threshold) elif args.cli == "backtest": returns = np.log(df["close"]).diff().fillna(0).to_numpy() From 29234b8695aca80f1b07bd25fa223299e8782940 Mon Sep 17 00:00:00 2001 From: Newcoderorigin Date: Fri, 3 Oct 2025 20:51:32 -0500 Subject: [PATCH 2/3] Resolve training and calibration regressions --- toptek/core/model.py | 13 ++++++++++--- toptek/gui/widgets.py | 15 +++++++-------- toptek/main.py | 30 +++++++++++++++++------------- 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/toptek/core/model.py b/toptek/core/model.py index ded2ae2..143db50 100644 --- a/toptek/core/model.py +++ b/toptek/core/model.py @@ -150,11 +150,16 @@ def train_classifier( else: retained_columns = None - if np.unique(y).size < 2: + classes, class_counts = np.unique(y, return_counts=True) + if classes.size < 2: raise ValueError("Training requires at least two target classes") + if class_counts.min() < 2: + raise ValueError( + "Each class needs at least two samples to support a validation split" + ) X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2, shuffle=True, random_state=42 + X, y, test_size=0.2, shuffle=True, random_state=42, stratify=y ) if model_type == "logistic": @@ -176,7 +181,9 @@ def train_classifier( preds = (proba >= threshold).astype(int) metrics = { "accuracy": float(accuracy_score(y_test, preds)), - "roc_auc": float(roc_auc_score(y_test, proba)), + "roc_auc": float(roc_auc_score(y_test, proba)) + if np.unique(y_test).size > 1 + else float("nan"), } models_dir.mkdir(parents=True, exist_ok=True) model_path = models_dir / f"{model_type}_model.pkl" diff --git a/toptek/gui/widgets.py b/toptek/gui/widgets.py index 3659a3d..b92ea08 100644 --- a/toptek/gui/widgets.py +++ b/toptek/gui/widgets.py @@ -284,7 +284,6 @@ def _train_model(self) -> None: level="info", ) calibrate_report = "skipped" - calibration_detail: str | None = None calibration_failed = False if self.calibrate_var.get() and len(X) > 60: cal_size = max(60, int(len(X) * 0.2)) @@ -293,8 +292,8 @@ def _train_model(self) -> None: calibrate_kwargs = {} if result.retained_columns is not None: calibrate_kwargs["feature_mask"] = result.retained_columns - if result.original_feature_count is not None: - calibrate_kwargs["original_feature_count"] = result.original_feature_count + if result.original_feature_count is not None: + calibrate_kwargs["original_feature_count"] = result.original_feature_count try: calibrated_path = model.calibrate_classifier( result.model_path, @@ -303,7 +302,6 @@ def _train_model(self) -> None: ) except (ValueError, RuntimeError) as exc: calibrate_report = f"calibration failed: {exc}" - calibration_detail = calibrate_report calibration_failed = True self.log_event( f"Calibration failed for {result.model_path.name}: {exc}", @@ -323,12 +321,11 @@ def _train_model(self) -> None: ) else: calibrate_report = f"calibrated → {calibrated_path.name}" - calibration_detail = calibrate_report self.log_event( f"Calibration completed for {result.model_path.name} → {calibrated_path.name}", level="info", ) - calibrate_value = calibrate_report if not calibration_failed else "skipped" + calibrate_value = "skipped" if calibration_failed else calibrate_report self.output.delete("1.0", tk.END) payload = { "model": self.model_type.get(), @@ -338,12 +335,14 @@ def _train_model(self) -> None: "retained_columns": list(result.retained_columns) if result.retained_columns is not None else None, "original_feature_count": result.original_feature_count, "calibration": calibrate_value, - "calibration_detail": calibration_detail, } self.output.insert(tk.END, json_dumps(payload)) self.update_section("training", payload) if not calibration_failed: - self.status.config(text="Model artefact refreshed. Continue to Backtest ▶", foreground="") + self.status.config( + text="Model artefact refreshed. Continue to Backtest ▶", + foreground="#15803d", + ) class BacktestTab(BaseTab): diff --git a/toptek/main.py b/toptek/main.py index fe0c683..4d93114 100644 --- a/toptek/main.py +++ b/toptek/main.py @@ -49,19 +49,23 @@ def run_cli(args: argparse.Namespace, configs: Dict[str, Dict[str, object]], pat logger.error("Training failed: %s", exc) return preprocess = result.preprocessing or {} - if preprocess: - logger.info( - "Preprocessing summary: imputed=%s dropped_rows=%s dropped_columns=%s", - preprocess.get("imputed_cells", 0), - preprocess.get("dropped_rows", 0), - preprocess.get("dropped_columns", 0), - ) - if result.retained_columns is not None and result.original_feature_count is not None: - logger.info( - "Retained %s of %s feature columns after cleaning", - len(result.retained_columns), - result.original_feature_count, - ) + logger.info( + "Preprocessing summary: imputed=%s dropped_rows=%s dropped_columns=%s", + preprocess.get("imputed_cells", 0), + preprocess.get("dropped_rows", 0), + preprocess.get("dropped_columns", 0), + ) + if result.original_feature_count is not None: + if result.retained_columns is not None: + logger.info( + "Retained %s of %s feature columns after cleaning", + len(result.retained_columns), + result.original_feature_count, + ) + else: + logger.info( + "All %s original feature columns retained", result.original_feature_count + ) logger.info("Training complete: metrics=%s threshold=%.2f", result.metrics, result.threshold) elif args.cli == "backtest": returns = np.log(df["close"]).diff().fillna(0).to_numpy() From cf83e8412ca92c24ee2526792e49e7a6412f1b48 Mon Sep 17 00:00:00 2001 From: Newcoderorigin Date: Fri, 3 Oct 2025 21:09:57 -0500 Subject: [PATCH 3/3] chore: annotate calibration kwargs --- toptek/gui/widgets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toptek/gui/widgets.py b/toptek/gui/widgets.py index b92ea08..8e242c8 100644 --- a/toptek/gui/widgets.py +++ b/toptek/gui/widgets.py @@ -289,7 +289,7 @@ def _train_model(self) -> None: cal_size = max(60, int(len(X) * 0.2)) X_cal = X[-cal_size:] y_cal = y[-cal_size:] - calibrate_kwargs = {} + calibrate_kwargs: dict[str, object] = {} if result.retained_columns is not None: calibrate_kwargs["feature_mask"] = result.retained_columns if result.original_feature_count is not None: