From 9b659b4ca59760fdbe2ec3d8b9af58d9543c7b31 Mon Sep 17 00:00:00 2001
From: Newcoderorigin <tmbfc1@outlook.com>
Date: Fri, 3 Oct 2025 20:40:22 -0500
Subject: [PATCH 1/3] Ensure calibration artifacts write to existing
 directories

---
 toptek/core/model.py  | 250 ++++++++++++++++++++++++++++++++++++++++--
 toptek/gui/widgets.py | 109 +++++++++++++++++-
 toptek/main.py        |  20 +++-
 3 files changed, 363 insertions(+), 16 deletions(-)

diff --git a/toptek/core/model.py b/toptek/core/model.py
index a728551..ded2ae2 100644
--- a/toptek/core/model.py
+++ b/toptek/core/model.py
@@ -1,9 +1,23 @@
-"""Simple machine-learning helpers for classification models."""
+"""Simple machine-learning helpers for classification models.
+
+This module offers light-weight wrappers around scikit-learn estimators to
+standardise the training and calibration workflows used by the Toptek GUI and
+CLI tools.
+
+Example
+-------
+>>> import numpy as np
+>>> from pathlib import Path
+>>> X = np.random.randn(200, 6)
+>>> y = (X[:, 0] > 0).astype(int)
+>>> result = train_classifier(X, y, models_dir=Path("models"))
+>>> _ = calibrate_classifier(result.model_path, (X[:40], y[:40]))
+"""
 
 from __future__ import annotations
 
 import pickle
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Dict, Tuple
 
@@ -13,15 +27,39 @@
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import accuracy_score, roc_auc_score
 from sklearn.model_selection import train_test_split
+from sklearn.impute import SimpleImputer
+from sklearn.pipeline import Pipeline
 
 
 @dataclass
 class TrainResult:
-    """Container for training outcomes."""
+    """Container for training outcomes.
+
+    Attributes
+    ----------
+    model_path:
+        Filesystem location of the persisted estimator pipeline.
+    metrics:
+        Dictionary containing evaluation metrics computed on the validation
+        split.
+    threshold:
+        Decision threshold used when deriving discrete class predictions from
+        probabilities.
+    preprocessing:
+        Summary statistics describing how the feature matrix was sanitised.
+    retained_columns:
+        Tuple of retained column indices relative to the original feature
+        matrix. ``None`` when no trimming occurred.
+    original_feature_count:
+        Column count observed before preprocessing.
+    """
 
     model_path: Path
     metrics: Dict[str, float]
     threshold: float
+    preprocessing: Dict[str, int] = field(default_factory=dict)
+    retained_columns: tuple[int, ...] | None = None
+    original_feature_count: int | None = None
 
 
 def train_classifier(
@@ -32,19 +70,109 @@ def train_classifier(
     models_dir: Path,
     threshold: float = 0.65,
 ) -> TrainResult:
-    """Train a basic classifier and persist it to ``models_dir``."""
+    """Train a basic classifier and persist it to ``models_dir``.
+
+    Parameters
+    ----------
+    X:
+        Feature matrix to train on. The routine casts the payload to ``float`` and
+        sanitises non-finite entries prior to fitting.
+    y:
+        Target labels associated with ``X``. The labels must contain at least two
+        distinct classes.
+    model_type:
+        Name of the estimator to fit (``"logistic"`` or ``"gbm"``).
+    models_dir:
+        Directory where the fitted pipeline should be persisted.
+    threshold:
+        Probability threshold for translating predictions into class labels when
+        deriving simple metrics.
+
+    Returns
+    -------
+    TrainResult
+        Metadata about the persisted model, including preprocessing telemetry.
+
+    Raises
+    ------
+    ValueError
+        If the feature matrix is not two-dimensional, lacks usable rows or columns
+        after cleaning, contains invalid target values, or the target labels collapse
+        into a single class.
+
+    Example
+    -------
+    >>> import numpy as np
+    >>> from pathlib import Path
+    >>> X = np.random.rand(120, 4)
+    >>> y = (X[:, 0] > 0.5).astype(int)
+    >>> train_classifier(X, y, models_dir=Path("models"))
+    TrainResult(...)
+    """
+
+    if X.ndim != 2:
+        raise ValueError("Feature matrix must be 2-dimensional")
+
+    X = np.asarray(X, dtype=float)
+    y = np.asarray(y).ravel()
+
+    original_feature_count = int(X.shape[1])
+
+    if not np.isfinite(y).all():
+        raise ValueError(
+            "Target labels contain NaN or inf values; clean the labels before training"
+        )
 
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
+    non_finite_mask = ~np.isfinite(X)
+    imputed_cells = int(non_finite_mask.sum())
+    if imputed_cells:
+        X = X.copy()
+        X[non_finite_mask] = np.nan
+
+    row_all_nan = np.isnan(X).all(axis=1)
+    dropped_rows = int(row_all_nan.sum())
+    if dropped_rows:
+        X = X[~row_all_nan]
+        y = y[~row_all_nan]
+
+    if X.size == 0:
+        raise ValueError("No valid feature rows remain after removing all-NaN rows")
+
+    col_all_nan = np.isnan(X).all(axis=0)
+    dropped_columns = int(col_all_nan.sum())
+    retained_columns: tuple[int, ...] | None = None
+    if dropped_columns:
+        valid_column_mask = ~col_all_nan
+        X = X[:, valid_column_mask]
+        if X.shape[1] == 0:
+            raise ValueError("All feature columns were empty after cleaning; cannot train")
+        retained_columns = tuple(int(idx) for idx in np.flatnonzero(valid_column_mask))
+    else:
+        retained_columns = None
+
+    if np.unique(y).size < 2:
+        raise ValueError("Training requires at least two target classes")
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, shuffle=True, random_state=42
+    )
 
     if model_type == "logistic":
-        model = LogisticRegression(max_iter=1000)
+        classifier = LogisticRegression(max_iter=1000)
     elif model_type == "gbm":
-        model = GradientBoostingClassifier()
+        classifier = GradientBoostingClassifier()
     else:
         raise ValueError("Unknown model type")
 
-    model.fit(X_train, y_train)
-    proba = model.predict_proba(X_test)[:, 1]
+    pipeline = Pipeline(
+        [
+            ("imputer", SimpleImputer(strategy="median")),
+            ("classifier", classifier),
+        ]
+    )
+
+    pipeline.fit(X_train, y_train)
+    proba = pipeline.predict_proba(X_test)[:, 1]
     preds = (proba >= threshold).astype(int)
     metrics = {
         "accuracy": float(accuracy_score(y_test, preds)),
@@ -53,8 +181,19 @@ def train_classifier(
     models_dir.mkdir(parents=True, exist_ok=True)
     model_path = models_dir / f"{model_type}_model.pkl"
     with model_path.open("wb") as handle:
-        pickle.dump(model, handle)
-    return TrainResult(model_path=model_path, metrics=metrics, threshold=threshold)
+        pickle.dump(pipeline, handle)
+    return TrainResult(
+        model_path=model_path,
+        metrics=metrics,
+        threshold=threshold,
+        preprocessing={
+            "imputed_cells": imputed_cells,
+            "dropped_rows": dropped_rows,
+            "dropped_columns": dropped_columns,
+        },
+        retained_columns=retained_columns,
+        original_feature_count=original_feature_count,
+    )
 
 
 def load_model(model_path: Path):
@@ -70,6 +209,8 @@ def calibrate_classifier(
     *,
     method: str = "sigmoid",
     output_path: Path | None = None,
+    feature_mask: tuple[int, ...] | np.ndarray | None = None,
+    original_feature_count: int | None = None,
 ) -> Path:
     """Calibrate a pre-trained classifier using hold-out data.
 
@@ -84,17 +225,104 @@ def calibrate_classifier(
     output_path:
         Optional override for where the calibrated model should be persisted.
 
+    feature_mask:
+        Optional tuple of retained column indices from the original feature matrix.
+        When provided, the calibration features will be subset or reordered to
+        match the training-time dimensionality.
+    original_feature_count:
+        Number of columns in the original training feature matrix. Used to
+        determine whether the calibration payload still contains the untouched
+        feature space or has already been trimmed.
+
     Returns
     -------
     Path
         Filesystem path to the calibrated model artefact.
+
+    Raises
+    ------
+    ValueError
+        If the calibration payload is malformed, references invalid feature
+        indices, or lacks class diversity.
+
+    Example
+    -------
+    >>> from pathlib import Path
+    >>> import numpy as np
+    >>> model_path = Path("models/logistic_model.pkl")
+    >>> X_cal = np.random.rand(50, 4)
+    >>> y_cal = (X_cal[:, 0] > 0.5).astype(int)
+    >>> calibrate_classifier(model_path, (X_cal, y_cal))
+    PosixPath('models/logistic_model_calibrated.pkl')
     """
 
     X_cal, y_cal = calibration_data
+    X_cal = np.asarray(X_cal, dtype=float)
+    y_cal = np.asarray(y_cal).ravel()
+
+    if X_cal.ndim != 2:
+        raise ValueError("Calibration feature matrix must be 2-dimensional")
+
+    if not np.isfinite(y_cal).all():
+        raise ValueError(
+            "Calibration labels contain NaN or inf values; clean the labels before calibrating"
+        )
+
+    if feature_mask is not None:
+        indices = np.asarray(feature_mask, dtype=int)
+        if indices.ndim != 1:
+            raise ValueError("Feature mask must be a 1-D sequence of column indices")
+        if indices.size == 0:
+            raise ValueError("Feature mask is empty; cannot realign calibration features")
+        if (indices < 0).any():
+            raise ValueError("Feature mask cannot include negative column indices")
+        if not np.all(np.diff(indices) >= 0):
+            raise ValueError("Feature mask must be sorted in ascending order")
+        if np.unique(indices).size != indices.size:
+            raise ValueError("Feature mask contains duplicate column indices")
+
+        max_index = int(indices.max())
+        if original_feature_count is None or original_feature_count == X_cal.shape[1]:
+            if max_index >= X_cal.shape[1]:
+                raise ValueError(
+                    "Feature mask references columns beyond the calibration matrix bounds"
+                )
+            X_cal = X_cal[:, indices]
+        elif X_cal.shape[1] == indices.size:
+            # Calibration payload already trimmed to the retained columns. We assume the
+            # supplied feature order already matches the mask order since we no longer
+            # have the dropped columns to cross-check against.
+            pass
+        else:
+            raise ValueError(
+                "Calibration payload has unexpected dimensionality relative to the training mask"
+            )
+
+    non_finite = ~np.isfinite(X_cal)
+    if non_finite.any():
+        X_cal = X_cal.copy()
+        X_cal[non_finite] = np.nan
+
+    row_all_nan = np.isnan(X_cal).all(axis=1)
+    if row_all_nan.any():
+        X_cal = X_cal[~row_all_nan]
+        y_cal = y_cal[~row_all_nan]
+    if X_cal.size == 0:
+        raise ValueError("No valid calibration rows remain after cleaning")
+
+    if np.unique(y_cal).size < 2:
+        raise ValueError("Calibration requires at least two target classes")
+
     pipeline = load_model(model_path)
+    expected_features = getattr(pipeline, "n_features_in_", None)
+    if expected_features is not None and X_cal.shape[1] != expected_features:
+        raise ValueError(
+            "Calibration feature matrix shape does not match the fitted model"
+        )
     calibrator = CalibratedClassifierCV(estimator=pipeline, method=method, cv="prefit")
     calibrator.fit(X_cal, y_cal)
     target_path = output_path or model_path.with_name(f"{model_path.stem}_calibrated.pkl")
+    target_path.parent.mkdir(parents=True, exist_ok=True)
     with target_path.open("wb") as handle:
         pickle.dump(calibrator, handle)
     return target_path
diff --git a/toptek/gui/widgets.py b/toptek/gui/widgets.py
index bb509bf..3659a3d 100644
--- a/toptek/gui/widgets.py
+++ b/toptek/gui/widgets.py
@@ -227,17 +227,83 @@ def _train_model(self) -> None:
         feat_map = features.compute_features(df)
         X = np.column_stack(list(feat_map.values()))
         y = (np.diff(df["close"], prepend=df["close"].iloc[0]) > 0).astype(int)
-        result = model.train_classifier(X, y, model_type=self.model_type.get(), models_dir=self.paths.models)
+
+        unique_labels = np.unique(y)
+        if unique_labels.size < 2:
+            self.status.config(
+                text="Training aborted: target labels lack class diversity.", foreground="#b91c1c"
+            )
+            messagebox.showwarning(
+                "Training warning",
+                (
+                    "Training requires at least two classes after cleaning the dataset.\n"
+                    "Collect more data or adjust preprocessing to obtain both up and down samples."
+                ),
+            )
+            return
+
+        try:
+            result = model.train_classifier(
+                X, y, model_type=self.model_type.get(), models_dir=self.paths.models
+            )
+        except ValueError as exc:
+            self.status.config(
+                text="Training failed due to invalid feature matrix. Review warnings.",
+                foreground="#b91c1c",
+            )
+            messagebox.showwarning(
+                "Training warning",
+                (
+                    "The classifier could not be trained with the current dataset.\n\n"
+                    f"Details: {exc}"
+                ),
+            )
+            self.log_event(f"Model training failed: {exc}", level="error")
+            return
+        preprocessing = result.preprocessing or {}
+        prep_notes = []
+        if preprocessing.get("imputed_cells"):
+            prep_notes.append(
+                f"imputed {preprocessing['imputed_cells']} feature values"
+            )
+        if preprocessing.get("dropped_rows"):
+            prep_notes.append(
+                f"dropped {preprocessing['dropped_rows']} all-NaN rows"
+            )
+        if preprocessing.get("dropped_columns"):
+            prep_notes.append(
+                f"removed {preprocessing['dropped_columns']} empty columns"
+            )
+        if result.retained_columns is not None and result.original_feature_count is not None:
+            prep_notes.append(
+                f"retained {len(result.retained_columns)} of {result.original_feature_count} feature columns"
+            )
+        if prep_notes:
+            self.log_event(
+                "Preprocessing summary: " + ", ".join(prep_notes),
+                level="info",
+            )
         calibrate_report = "skipped"
+        calibration_detail: str | None = None
         calibration_failed = False
         if self.calibrate_var.get() and len(X) > 60:
             cal_size = max(60, int(len(X) * 0.2))
             X_cal = X[-cal_size:]
             y_cal = y[-cal_size:]
+            calibrate_kwargs = {}
+            if result.retained_columns is not None:
+                calibrate_kwargs["feature_mask"] = result.retained_columns
+                if result.original_feature_count is not None:
+                    calibrate_kwargs["original_feature_count"] = result.original_feature_count
             try:
-                calibrated_path = model.calibrate_classifier(result.model_path, (X_cal, y_cal))
+                calibrated_path = model.calibrate_classifier(
+                    result.model_path,
+                    (X_cal, y_cal),
+                    **calibrate_kwargs,
+                )
             except (ValueError, RuntimeError) as exc:
                 calibrate_report = f"calibration failed: {exc}"
+                calibration_detail = calibrate_report
                 calibration_failed = True
                 self.log_event(
                     f"Calibration failed for {result.model_path.name}: {exc}",
@@ -257,16 +323,22 @@ def _train_model(self) -> None:
                 )
             else:
                 calibrate_report = f"calibrated → {calibrated_path.name}"
+                calibration_detail = calibrate_report
                 self.log_event(
                     f"Calibration completed for {result.model_path.name} → {calibrated_path.name}",
                     level="info",
                 )
+        calibrate_value = calibrate_report if not calibration_failed else "skipped"
         self.output.delete("1.0", tk.END)
         payload = {
             "model": self.model_type.get(),
             "metrics": result.metrics,
             "threshold": result.threshold,
-            "calibration": calibrate_report,
+            "preprocessing": preprocessing,
+            "retained_columns": list(result.retained_columns) if result.retained_columns is not None else None,
+            "original_feature_count": result.original_feature_count,
+            "calibration": calibrate_value,
+            "calibration_detail": calibration_detail,
         }
         self.output.insert(tk.END, json_dumps(payload))
         self.update_section("training", payload)
@@ -350,6 +422,7 @@ class TradeTab(BaseTab):
 
     def __init__(self, master: ttk.Notebook, configs: Dict[str, Dict[str, object]], paths: utils.AppPaths) -> None:
         self.guard_status = tk.StringVar(value="Topstep Guard: pending review")
+        self.guard_label: ttk.Label | None = None
         super().__init__(master, configs, paths)
         self._build()
 
@@ -366,7 +439,8 @@ def _build(self) -> None:
             justify=tk.LEFT,
         ).pack(anchor=tk.W)
 
-        ttk.Label(intro, textvariable=self.guard_status, foreground="#1d4ed8").pack(anchor=tk.W, pady=(8, 0))
+        self.guard_label = ttk.Label(intro, textvariable=self.guard_status, foreground="#1d4ed8")
+        self.guard_label.pack(anchor=tk.W, pady=(8, 0))
 
         ttk.Button(self, text="Refresh Topstep guard", command=self._show_risk).pack(pady=(6, 0))
         self.output = tk.Text(self, height=12)
@@ -378,6 +452,13 @@ def _build(self) -> None:
         )
 
     def _show_risk(self) -> None:
+        """Refresh the Topstep guard summary and surface a contextual dialog.
+
+        The check recalculates a sample position size using the configured
+        Topstep profile, updates the guard label colouring, and displays either
+        an informational or warning dialog depending on whether the guard
+        remains in ``OK`` or moves into ``DEFENSIVE_MODE``.
+        """
         profile = risk.RiskProfile(
             max_position_size=self.configs["risk"].get("max_position_size", 1),
             max_daily_loss=self.configs["risk"].get("max_daily_loss", 1000),
@@ -389,6 +470,9 @@ def _show_risk(self) -> None:
         sample_size = risk.position_size(50000, profile, atr=3.5, tick_value=12.5, risk_per_trade=0.01)
         guard = "OK" if sample_size > 0 else "DEFENSIVE_MODE"
         self.guard_status.set(f"Topstep Guard: {guard}")
+        if self.guard_label is not None:
+            colour = "#15803d" if guard == "OK" else "#b91c1c"
+            self.guard_label.configure(foreground=colour)
         payload = {
             "profile": profile.__dict__,
             "suggested_contracts": sample_size,
@@ -397,11 +481,28 @@ def _show_risk(self) -> None:
                 "losses": profile.cooldown_losses,
                 "minutes": profile.cooldown_minutes,
             },
+            "topstep_guard": guard,
             "next_steps": "If guard shows DEFENSIVE_MODE, stand down and review journal before trading.",
         }
         self.output.delete("1.0", tk.END)
         self.output.insert(tk.END, json_dumps(payload))
 
+        guard_message = (
+            "Topstep guard assessment completed.\n\n"
+            f"Suggested contracts: {sample_size}.\n"
+            f"Daily loss cap: ${profile.max_daily_loss}.\n"
+            "Cooldown policy: "
+            f"{profile.cooldown_losses} losses → wait {profile.cooldown_minutes} minutes."
+        )
+
+        if guard == "OK":
+            messagebox.showinfo("Topstep Guard", guard_message)
+        else:
+            warning_message = (
+                f"{guard_message}\n\nDEFENSIVE_MODE active. Stand down and review your journal before trading."
+            )
+            messagebox.showwarning("Topstep Guard", warning_message)
+
 
 __all__ = [
     "LoginTab",
diff --git a/toptek/main.py b/toptek/main.py
index 53fb60d..fe0c683 100644
--- a/toptek/main.py
+++ b/toptek/main.py
@@ -43,7 +43,25 @@ def run_cli(args: argparse.Namespace, configs: Dict[str, Dict[str, object]], pat
     y = (np.diff(df["close"], prepend=df["close"].iloc[0]) > 0).astype(int)
 
     if args.cli == "train":
-        result = model.train_classifier(X, y, model_type=args.model, models_dir=paths.models)
+        try:
+            result = model.train_classifier(X, y, model_type=args.model, models_dir=paths.models)
+        except ValueError as exc:
+            logger.error("Training failed: %s", exc)
+            return
+        preprocess = result.preprocessing or {}
+        if preprocess:
+            logger.info(
+                "Preprocessing summary: imputed=%s dropped_rows=%s dropped_columns=%s",
+                preprocess.get("imputed_cells", 0),
+                preprocess.get("dropped_rows", 0),
+                preprocess.get("dropped_columns", 0),
+            )
+        if result.retained_columns is not None and result.original_feature_count is not None:
+            logger.info(
+                "Retained %s of %s feature columns after cleaning",
+                len(result.retained_columns),
+                result.original_feature_count,
+            )
         logger.info("Training complete: metrics=%s threshold=%.2f", result.metrics, result.threshold)
     elif args.cli == "backtest":
         returns = np.log(df["close"]).diff().fillna(0).to_numpy()

From 29234b8695aca80f1b07bd25fa223299e8782940 Mon Sep 17 00:00:00 2001
From: Newcoderorigin <tmbfc1@outlook.com>
Date: Fri, 3 Oct 2025 20:51:32 -0500
Subject: [PATCH 2/3] Resolve training and calibration regressions

---
 toptek/core/model.py  | 13 ++++++++++---
 toptek/gui/widgets.py | 15 +++++++--------
 toptek/main.py        | 30 +++++++++++++++++-------------
 3 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/toptek/core/model.py b/toptek/core/model.py
index ded2ae2..143db50 100644
--- a/toptek/core/model.py
+++ b/toptek/core/model.py
@@ -150,11 +150,16 @@ def train_classifier(
     else:
         retained_columns = None
 
-    if np.unique(y).size < 2:
+    classes, class_counts = np.unique(y, return_counts=True)
+    if classes.size < 2:
         raise ValueError("Training requires at least two target classes")
+    if class_counts.min() < 2:
+        raise ValueError(
+            "Each class needs at least two samples to support a validation split"
+        )
 
     X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.2, shuffle=True, random_state=42
+        X, y, test_size=0.2, shuffle=True, random_state=42, stratify=y
     )
 
     if model_type == "logistic":
@@ -176,7 +181,9 @@ def train_classifier(
     preds = (proba >= threshold).astype(int)
     metrics = {
         "accuracy": float(accuracy_score(y_test, preds)),
-        "roc_auc": float(roc_auc_score(y_test, proba)),
+        "roc_auc": float(roc_auc_score(y_test, proba))
+        if np.unique(y_test).size > 1
+        else float("nan"),
     }
     models_dir.mkdir(parents=True, exist_ok=True)
     model_path = models_dir / f"{model_type}_model.pkl"
diff --git a/toptek/gui/widgets.py b/toptek/gui/widgets.py
index 3659a3d..b92ea08 100644
--- a/toptek/gui/widgets.py
+++ b/toptek/gui/widgets.py
@@ -284,7 +284,6 @@ def _train_model(self) -> None:
                 level="info",
             )
         calibrate_report = "skipped"
-        calibration_detail: str | None = None
         calibration_failed = False
         if self.calibrate_var.get() and len(X) > 60:
             cal_size = max(60, int(len(X) * 0.2))
@@ -293,8 +292,8 @@ def _train_model(self) -> None:
             calibrate_kwargs = {}
             if result.retained_columns is not None:
                 calibrate_kwargs["feature_mask"] = result.retained_columns
-                if result.original_feature_count is not None:
-                    calibrate_kwargs["original_feature_count"] = result.original_feature_count
+            if result.original_feature_count is not None:
+                calibrate_kwargs["original_feature_count"] = result.original_feature_count
             try:
                 calibrated_path = model.calibrate_classifier(
                     result.model_path,
@@ -303,7 +302,6 @@ def _train_model(self) -> None:
                 )
             except (ValueError, RuntimeError) as exc:
                 calibrate_report = f"calibration failed: {exc}"
-                calibration_detail = calibrate_report
                 calibration_failed = True
                 self.log_event(
                     f"Calibration failed for {result.model_path.name}: {exc}",
@@ -323,12 +321,11 @@ def _train_model(self) -> None:
                 )
             else:
                 calibrate_report = f"calibrated → {calibrated_path.name}"
-                calibration_detail = calibrate_report
                 self.log_event(
                     f"Calibration completed for {result.model_path.name} → {calibrated_path.name}",
                     level="info",
                 )
-        calibrate_value = calibrate_report if not calibration_failed else "skipped"
+        calibrate_value = "skipped" if calibration_failed else calibrate_report
         self.output.delete("1.0", tk.END)
         payload = {
             "model": self.model_type.get(),
@@ -338,12 +335,14 @@ def _train_model(self) -> None:
             "retained_columns": list(result.retained_columns) if result.retained_columns is not None else None,
             "original_feature_count": result.original_feature_count,
             "calibration": calibrate_value,
-            "calibration_detail": calibration_detail,
         }
         self.output.insert(tk.END, json_dumps(payload))
         self.update_section("training", payload)
         if not calibration_failed:
-            self.status.config(text="Model artefact refreshed. Continue to Backtest ▶", foreground="")
+            self.status.config(
+                text="Model artefact refreshed. Continue to Backtest ▶",
+                foreground="#15803d",
+            )
 
 
 class BacktestTab(BaseTab):
diff --git a/toptek/main.py b/toptek/main.py
index fe0c683..4d93114 100644
--- a/toptek/main.py
+++ b/toptek/main.py
@@ -49,19 +49,23 @@ def run_cli(args: argparse.Namespace, configs: Dict[str, Dict[str, object]], pat
             logger.error("Training failed: %s", exc)
             return
         preprocess = result.preprocessing or {}
-        if preprocess:
-            logger.info(
-                "Preprocessing summary: imputed=%s dropped_rows=%s dropped_columns=%s",
-                preprocess.get("imputed_cells", 0),
-                preprocess.get("dropped_rows", 0),
-                preprocess.get("dropped_columns", 0),
-            )
-        if result.retained_columns is not None and result.original_feature_count is not None:
-            logger.info(
-                "Retained %s of %s feature columns after cleaning",
-                len(result.retained_columns),
-                result.original_feature_count,
-            )
+        logger.info(
+            "Preprocessing summary: imputed=%s dropped_rows=%s dropped_columns=%s",
+            preprocess.get("imputed_cells", 0),
+            preprocess.get("dropped_rows", 0),
+            preprocess.get("dropped_columns", 0),
+        )
+        if result.original_feature_count is not None:
+            if result.retained_columns is not None:
+                logger.info(
+                    "Retained %s of %s feature columns after cleaning",
+                    len(result.retained_columns),
+                    result.original_feature_count,
+                )
+            else:
+                logger.info(
+                    "All %s original feature columns retained", result.original_feature_count
+                )
         logger.info("Training complete: metrics=%s threshold=%.2f", result.metrics, result.threshold)
     elif args.cli == "backtest":
         returns = np.log(df["close"]).diff().fillna(0).to_numpy()

From cf83e8412ca92c24ee2526792e49e7a6412f1b48 Mon Sep 17 00:00:00 2001
From: Newcoderorigin <tmbfc1@outlook.com>
Date: Fri, 3 Oct 2025 21:09:57 -0500
Subject: [PATCH 3/3] chore: annotate calibration kwargs

---
 toptek/gui/widgets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/toptek/gui/widgets.py b/toptek/gui/widgets.py
index b92ea08..8e242c8 100644
--- a/toptek/gui/widgets.py
+++ b/toptek/gui/widgets.py
@@ -289,7 +289,7 @@ def _train_model(self) -> None:
             cal_size = max(60, int(len(X) * 0.2))
             X_cal = X[-cal_size:]
             y_cal = y[-cal_size:]
-            calibrate_kwargs = {}
+            calibrate_kwargs: dict[str, object] = {}
             if result.retained_columns is not None:
                 calibrate_kwargs["feature_mask"] = result.retained_columns
             if result.original_feature_count is not None: