In [1]:
import sys
import scanpy as sc

sys.path.append("../..")
from scripts.runner_models import cv_baselines_5fold

In [2]:
# Load data
IN_PATH_neu = "../../data/raw_count/GSE169569_raw_counts.h5ad"
IN_PATH_cov = "../../data/raw_count/GSE228841_raw_counts.h5ad"

adata_neu = sc.read_h5ad(IN_PATH_neu)
adata_cov = sc.read_h5ad(IN_PATH_cov)

In [3]:
import numpy as np

# Extract data matrices and metadata
X_counts = adata_neu.layers["counts"] # raw counts
gene_names = np.array(adata_neu.var_names, dtype=str)
batches = np.array(adata_neu.obs["BioProject"], dtype=str)
norm_layer = adata_neu.layers["log2_1p_CPM_original"]

In [4]:
def pick_score(summary_df, model, metric="NB_ll_thin", direction="maximize"):
    s = summary_df[summary_df["model"] == model]
    if len(s) != 1:
        
        s = s.sort_values(metric, ascending=(direction!="maximize")).iloc[:1]
    val = float(s.iloc[0][metric])
    return val


In [7]:
import optuna
from sklearn.model_selection import ParameterGrid

# ----- Model-specific search space -----
def _baseline_search_space(trial, model_name: str) -> dict:
    """
    Return a dict of hyperparameters for the given baseline model using Optuna's suggest_* API.
    """
    m = model_name.upper()
    if m == "KNN":
        return {
            "n_neighbors": trial.suggest_int("n_neighbors", 3, 50, step=1),
            "weights": trial.suggest_categorical("weights", ["uniform", "distance"]),
        }
    elif m == "MAGIC":
        return {
            # include None in the search space
            "n_pca": trial.suggest_categorical("n_pca", [None, 30, 50, 100]),
            "t":     trial.suggest_int("t", 2, 5),                  # diffusion steps
            "knn":   trial.suggest_categorical("knn", [5, 10, 15, 30]),
        }
    elif m in ("MEAN", "MEDIAN"):
        # no hyperparameters to tune for these baselines
        return {}
    else:
        raise ValueError(f"Unknown baseline: {model_name}")

def _to_grid_one_point(params: dict) -> dict:
    """
    Convert a single-configuration dict into a ParameterGrid-compatible dict (values must be lists).
    """
    return {k: [v] for k, v in params.items()}

def tune_baseline_with_optuna(
    X_counts,
    model_name: str,
    n_trials: int,
    metric: str,                # e.g., "NB_ll_thin", "NB_ll_zero", "MAE_thin", ...
    direction: str,             # "maximize" or "minimize"
    k: int, n_hvg: int, R: int,
    mask_frac: float, thinning_p: float, random_state: int,
    # HVG / batch settings
    hvg_mode: str,
    gene_names=None, batches=None, norm_layer=None,
    batch_key="batch", seurat_layer_name="log2_1p_CPM_original",
    labels=None,
    save_dir=None,
):
    """
    Tune (or just evaluate) a single baseline model with Optuna and return the best params and metric value.

    Notes
    -----
    - For MEAN/MEDIAN, there are no tunable hyperparameters; we just run a single evaluation.
    - All metrics (MAE/MSE/MedianL1, NB_ll/NB_dev for zeroing/thinning, optional Silhouette)
      are computed inside cv_baselines_5fold. This function selects one metric as the objective.
    """
    m = model_name.upper()

    # MEAN/MEDIAN (or if n_trials <= 1): skip Optuna and run a single evaluation.
    if m in ("MEAN", "MEDIAN") or n_trials <= 1:
        model_grids = {m: {}}  # empty grid
        summary, _ = cv_baselines_5fold(
            X_counts,
            model_grids=model_grids,
            k=k, n_hvg=n_hvg, R=R,
            mask_frac=mask_frac, thinning_p=thinning_p, random_state=random_state,
            save_dir=save_dir,
            hvg_mode=hvg_mode,
            gene_names=gene_names, batches=batches, norm_layer=norm_layer,
            batch_key=batch_key, seurat_layer_name=seurat_layer_name,
            labels=labels,  # pass labels to enable Silhouette if available
        )
        if metric not in summary.columns:
            raise KeyError(f"Metric '{metric}' not in summary columns: {list(summary.columns)}")
        return {}, float(summary.loc[summary["model"] == m, metric].mean())

    # ----- Optuna objective -----
    def objective(trial: optuna.Trial):
        params = _baseline_search_space(trial, m)
        model_grids = {m: _to_grid_one_point(params)}

        summary, detailed = cv_baselines_5fold(
            X_counts,
            model_grids=model_grids,
            k=k, n_hvg=n_hvg, R=R,
            mask_frac=mask_frac, thinning_p=thinning_p, random_state=random_state,
            save_dir=None,  # avoid writing CSVs during search
            hvg_mode=hvg_mode,
            gene_names=gene_names, batches=batches, norm_layer=norm_layer,
            batch_key=batch_key, seurat_layer_name=seurat_layer_name,
            labels=labels,
        )
        if metric not in summary.columns:
            raise KeyError(f"Metric '{metric}' not in summary columns: {list(summary.columns)}")

        val = float(summary.loc[summary["model"] == m, metric].mean())
        # store extra info for debugging if needed
        trial.set_user_attr("summary", summary.to_dict("records"))
        return val

    study = optuna.create_study(direction=direction, sampler=optuna.samplers.TPESampler(seed=random_state))
    study.optimize(objective, n_trials=n_trials, gc_after_trial=True)

    best_params = study.best_params
    best_val = float(study.best_value)
    return best_params, best_val


In [None]:
# 1) Tune or evaluate each baseline
best_knn,    val_knn    = tune_baseline_with_optuna(
    X_counts, model_name="KNN",
    n_trials=40, metric="NB_ll_thin", direction="maximize",
    k=5, n_hvg=2000, R=3,
    mask_frac=0.10, thinning_p=0.10, random_state=123,
    save_dir=None,
    hvg_mode="seurat_v3",
    gene_names=gene_names, batches=batches, norm_layer=norm_layer,
    batch_key="BioProject", seurat_layer_name="log2_1p_CPM_original",
    labels=None,
)

best_magic,  val_magic  = tune_baseline_with_optuna(
    X_counts, model_name="MAGIC",
    n_trials=40, metric="NB_ll_thin", direction="maximize",
    k=5, n_hvg=2000, R=3,
    mask_frac=0.10, thinning_p=0.10, random_state=123,
    save_dir=None,
    hvg_mode="seurat_v3",
    gene_names=gene_names, batches=batches, norm_layer=norm_layer,
    batch_key="BioProject", seurat_layer_name="log2_1p_CPM_original",
    labels=None,
)

best_mean,   val_mean   = tune_baseline_with_optuna(
    X_counts, model_name="MEAN",
    n_trials=1, metric="NB_ll_thin", direction="maximize",
    k=5, n_hvg=2000, R=3,
    mask_frac=0.10, thinning_p=0.10, random_state=123,
    save_dir=None,
    hvg_mode="seurat_v3",
    gene_names=gene_names, batches=batches, norm_layer=norm_layer,
    batch_key="BioProject", seurat_layer_name="log2_1p_CPM_original",
    labels=None,
)

best_median, val_median = tune_baseline_with_optuna(
    X_counts, model_name="MEDIAN",
    n_trials=1, metric="NB_ll_thin", direction="maximize",
    k=5, n_hvg=2000, R=3,
    mask_frac=0.10, thinning_p=0.10, random_state=123,
    save_dir=None,
    hvg_mode="seurat_v3",
    gene_names=gene_names, batches=batches, norm_layer=norm_layer,
    batch_key="BioProject", seurat_layer_name="log2_1p_CPM_original",
    labels=None,
)

print("Best KNN:", best_knn, "val=", val_knn)
print("Best MAGIC:", best_magic, "val=", val_magic)
print("MEAN val=", val_mean, "MEDIAN val=", val_median)

# 2) Final 5-fold CV with the best settings (remember: ParameterGrid needs lists)
final_grid = {
    "MEAN":   {},
    "MEDIAN": {},
    "KNN":    {k: [v] for k, v in best_knn.items()},
    "MAGIC":  {k: [v] for k, v in best_magic.items()},
}

summary_df, details_df = cv_baselines_5fold(
    X_counts,
    model_grids=final_grid,
    k=5, n_hvg=2000, R=3,
    mask_frac=0.10, thinning_p=0.10, random_state=123,
    save_dir="results/baselines_all_best",
    hvg_mode="seurat_v3",
    gene_names=gene_names, batches=batches, norm_layer=norm_layer,
    batch_key="BioProject", seurat_layer_name="log2_1p_CPM_original",
    labels=None,  # pass labels if you want Silhouette to be computed
)


[I 2025-09-17 20:25:24,891] A new study created in memory with name: no-name-159b7145-53e3-4e9a-a045-7e2af4cebe13


2025-09-17 20:25:24 | [cv_baselines_5fold] start k=5 n_hvg=2000 mode=seurat_v3
2025-09-17 20:25:24 | X_counts: dense  (447, 51777) dtype=int64
2025-09-17 20:25:54 | X_counts: zeros=66.23%, approx_int=True
2025-09-17 20:25:54 | [time] [BASE] fold 1/5 start


  return fn(*args_all, **kw)


2025-09-17 20:27:20 | [BASE] fold 1 | HVG=2000
2025-09-17 20:27:20 | [BASE] fold 1 model=KNN params={'n_neighbors': 36, 'weights': 'uniform'}
2025-09-17 20:27:20 | [build_baseline] KNN params={'n_neighbors': 36, 'weights': 'uniform'}
2025-09-17 20:27:20 | [KNN] fit n_neighbors=36 weights=uniform
2025-09-17 20:27:20 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 20:27:20 | [KNN] predict_mean
2025-09-17 20:27:20 | [time] [KNN] impute start
2025-09-17 20:27:20 | [time] [KNN] impute end in 0.01s
2025-09-17 20:27:20 | [KNN] predict_mean
2025-09-17 20:27:20 | [time] [KNN] impute start
2025-09-17 20:27:21 | [time] [KNN] impute end in 0.55s
2025-09-17 20:27:21 | [KNN] predict_mean
2025-09-17 20:27:21 | [time] [KNN] impute start
2025-09-17 20:27:21 | [time] [KNN] impute end in 0.55s
2025-09-17 20:27:21 | [KNN] predict_mean
2025-09-17 20:27:21 | [time] [KNN] impute start
2025-09-17 20:27:22 | [time] [KNN] impute end in 0.54s
2025-09-17 20:27:22 | [KNN] predict_mean
2025-09-17 20:27:22 | [t

  return fn(*args_all, **kw)


2025-09-17 20:28:55 | [BASE] fold 2 | HVG=2000
2025-09-17 20:28:55 | [BASE] fold 2 model=KNN params={'n_neighbors': 36, 'weights': 'uniform'}
2025-09-17 20:28:55 | [build_baseline] KNN params={'n_neighbors': 36, 'weights': 'uniform'}
2025-09-17 20:28:55 | [KNN] fit n_neighbors=36 weights=uniform
2025-09-17 20:28:55 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 20:28:55 | [KNN] predict_mean
2025-09-17 20:28:55 | [time] [KNN] impute start
2025-09-17 20:28:55 | [time] [KNN] impute end in 0.01s
2025-09-17 20:28:55 | [KNN] predict_mean
2025-09-17 20:28:55 | [time] [KNN] impute start
2025-09-17 20:28:56 | [time] [KNN] impute end in 0.56s
2025-09-17 20:28:56 | [KNN] predict_mean
2025-09-17 20:28:56 | [time] [KNN] impute start
2025-09-17 20:28:56 | [time] [KNN] impute end in 0.55s
2025-09-17 20:28:56 | [KNN] predict_mean
2025-09-17 20:28:56 | [time] [KNN] impute start
2025-09-17 20:28:57 | [time] [KNN] impute end in 0.55s
2025-09-17 20:28:57 | [KNN] predict_mean
2025-09-17 20:28:57 | [t

  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  return fn(*args_all, **kw)


2025-09-17 20:30:23 | [BASE] fold 3 | HVG=2000
2025-09-17 20:30:23 | [BASE] fold 3 model=KNN params={'n_neighbors': 36, 'weights': 'uniform'}
2025-09-17 20:30:23 | [build_baseline] KNN params={'n_neighbors': 36, 'weights': 'uniform'}
2025-09-17 20:30:23 | [KNN] fit n_neighbors=36 weights=uniform
2025-09-17 20:30:23 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 20:30:23 | [KNN] predict_mean
2025-09-17 20:30:23 | [time] [KNN] impute start
2025-09-17 20:30:23 | [time] [KNN] impute end in 0.01s
2025-09-17 20:30:24 | [KNN] predict_mean
2025-09-17 20:30:24 | [time] [KNN] impute start
2025-09-17 20:30:24 | [time] [KNN] impute end in 0.55s
2025-09-17 20:30:24 | [KNN] predict_mean
2025-09-17 20:30:24 | [time] [KNN] impute start
2025-09-17 20:30:25 | [time] [KNN] impute end in 0.54s
2025-09-17 20:30:25 | [KNN] predict_mean
2025-09-17 20:30:25 | [time] [KNN] impute start
2025-09-17 20:30:25 | [time] [KNN] impute end in 0.54s
2025-09-17 20:30:25 | [KNN] predict_mean
2025-09-17 20:30:25 | [t

  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  return fn(*args_all, **kw)


2025-09-17 20:32:00 | [BASE] fold 4 | HVG=2000
2025-09-17 20:32:01 | [BASE] fold 4 model=KNN params={'n_neighbors': 36, 'weights': 'uniform'}
2025-09-17 20:32:01 | [build_baseline] KNN params={'n_neighbors': 36, 'weights': 'uniform'}
2025-09-17 20:32:01 | [KNN] fit n_neighbors=36 weights=uniform
2025-09-17 20:32:01 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 20:32:01 | [KNN] predict_mean
2025-09-17 20:32:01 | [time] [KNN] impute start
2025-09-17 20:32:01 | [time] [KNN] impute end in 0.01s
2025-09-17 20:32:01 | [KNN] predict_mean
2025-09-17 20:32:01 | [time] [KNN] impute start
2025-09-17 20:32:01 | [time] [KNN] impute end in 0.55s
2025-09-17 20:32:01 | [KNN] predict_mean
2025-09-17 20:32:01 | [time] [KNN] impute start
2025-09-17 20:32:02 | [time] [KNN] impute end in 0.54s
2025-09-17 20:32:02 | [KNN] predict_mean
2025-09-17 20:32:02 | [time] [KNN] impute start
2025-09-17 20:32:02 | [time] [KNN] impute end in 0.54s
2025-09-17 20:32:02 | [KNN] predict_mean
2025-09-17 20:32:02 | [t

  return fn(*args_all, **kw)


2025-09-17 20:33:29 | [BASE] fold 5 | HVG=2000
2025-09-17 20:33:29 | [BASE] fold 5 model=KNN params={'n_neighbors': 36, 'weights': 'uniform'}
2025-09-17 20:33:29 | [build_baseline] KNN params={'n_neighbors': 36, 'weights': 'uniform'}
2025-09-17 20:33:29 | [KNN] fit n_neighbors=36 weights=uniform
2025-09-17 20:33:29 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 20:33:29 | [KNN] predict_mean
2025-09-17 20:33:29 | [time] [KNN] impute start
2025-09-17 20:33:29 | [time] [KNN] impute end in 0.01s
2025-09-17 20:33:29 | [KNN] predict_mean
2025-09-17 20:33:29 | [time] [KNN] impute start
2025-09-17 20:33:30 | [time] [KNN] impute end in 0.55s
2025-09-17 20:33:30 | [KNN] predict_mean
2025-09-17 20:33:30 | [time] [KNN] impute start
2025-09-17 20:33:31 | [time] [KNN] impute end in 0.54s
2025-09-17 20:33:31 | [KNN] predict_mean
2025-09-17 20:33:31 | [time] [KNN] impute start
2025-09-17 20:33:31 | [time] [KNN] impute end in 0.54s
2025-09-17 20:33:31 | [KNN] predict_mean
2025-09-17 20:33:31 | [t

[I 2025-09-17 20:33:31,763] Trial 0 finished with value: -1.1681311690940077 and parameters: {'n_neighbors': 36, 'weights': 'uniform'}. Best is trial 0 with value: -1.1681311690940077.


2025-09-17 20:33:31 | [cv_baselines_5fold] start k=5 n_hvg=2000 mode=seurat_v3
2025-09-17 20:33:31 | X_counts: dense  (447, 51777) dtype=int64
2025-09-17 20:34:04 | X_counts: zeros=66.23%, approx_int=True
2025-09-17 20:34:04 | [time] [BASE] fold 1/5 start


  return fn(*args_all, **kw)


2025-09-17 20:35:41 | [BASE] fold 1 | HVG=2000
2025-09-17 20:35:41 | [BASE] fold 1 model=KNN params={'n_neighbors': 29, 'weights': 'uniform'}
2025-09-17 20:35:41 | [build_baseline] KNN params={'n_neighbors': 29, 'weights': 'uniform'}
2025-09-17 20:35:41 | [KNN] fit n_neighbors=29 weights=uniform
2025-09-17 20:35:41 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 20:35:41 | [KNN] predict_mean
2025-09-17 20:35:41 | [time] [KNN] impute start
2025-09-17 20:35:41 | [time] [KNN] impute end in 0.01s
2025-09-17 20:35:41 | [KNN] predict_mean
2025-09-17 20:35:41 | [time] [KNN] impute start
2025-09-17 20:35:42 | [time] [KNN] impute end in 0.55s
2025-09-17 20:35:42 | [KNN] predict_mean
2025-09-17 20:35:42 | [time] [KNN] impute start
2025-09-17 20:35:42 | [time] [KNN] impute end in 0.55s
2025-09-17 20:35:42 | [KNN] predict_mean
2025-09-17 20:35:42 | [time] [KNN] impute start
2025-09-17 20:35:43 | [time] [KNN] impute end in 0.54s
2025-09-17 20:35:43 | [KNN] predict_mean
2025-09-17 20:35:43 | [t

  return fn(*args_all, **kw)


2025-09-17 20:37:29 | [BASE] fold 2 | HVG=2000
2025-09-17 20:37:29 | [BASE] fold 2 model=KNN params={'n_neighbors': 29, 'weights': 'uniform'}
2025-09-17 20:37:29 | [build_baseline] KNN params={'n_neighbors': 29, 'weights': 'uniform'}
2025-09-17 20:37:29 | [KNN] fit n_neighbors=29 weights=uniform
2025-09-17 20:37:29 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 20:37:29 | [KNN] predict_mean
2025-09-17 20:37:29 | [time] [KNN] impute start
2025-09-17 20:37:29 | [time] [KNN] impute end in 0.01s
2025-09-17 20:37:29 | [KNN] predict_mean
2025-09-17 20:37:29 | [time] [KNN] impute start
2025-09-17 20:37:29 | [time] [KNN] impute end in 0.56s
2025-09-17 20:37:29 | [KNN] predict_mean
2025-09-17 20:37:29 | [time] [KNN] impute start
2025-09-17 20:37:30 | [time] [KNN] impute end in 0.55s
2025-09-17 20:37:30 | [KNN] predict_mean
2025-09-17 20:37:30 | [time] [KNN] impute start


  + r * np.log(p) + x * np.log1p(-p))


2025-09-17 20:37:31 | [time] [KNN] impute end in 0.54s
2025-09-17 20:37:31 | [KNN] predict_mean
2025-09-17 20:37:31 | [time] [KNN] impute start
2025-09-17 20:37:31 | [time] [KNN] impute end in 0.01s
2025-09-17 20:37:31 | [KNN] predict_mean
2025-09-17 20:37:31 | [time] [KNN] impute start
2025-09-17 20:37:31 | [time] [KNN] impute end in 0.01s
2025-09-17 20:37:31 | [KNN] predict_mean
2025-09-17 20:37:31 | [time] [KNN] impute start
2025-09-17 20:37:31 | [time] [KNN] impute end in 0.00s
2025-09-17 20:37:31 | [time] [BASE] fold 2/5 end in 107.59s
2025-09-17 20:37:31 | [time] [BASE] fold 3/5 start


  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  return fn(*args_all, **kw)


2025-09-17 20:39:13 | [BASE] fold 3 | HVG=2000
2025-09-17 20:39:13 | [BASE] fold 3 model=KNN params={'n_neighbors': 29, 'weights': 'uniform'}
2025-09-17 20:39:13 | [build_baseline] KNN params={'n_neighbors': 29, 'weights': 'uniform'}
2025-09-17 20:39:13 | [KNN] fit n_neighbors=29 weights=uniform
2025-09-17 20:39:13 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 20:39:13 | [KNN] predict_mean
2025-09-17 20:39:13 | [time] [KNN] impute start
2025-09-17 20:39:13 | [time] [KNN] impute end in 0.01s
2025-09-17 20:39:13 | [KNN] predict_mean
2025-09-17 20:39:13 | [time] [KNN] impute start
2025-09-17 20:39:14 | [time] [KNN] impute end in 0.55s
2025-09-17 20:39:14 | [KNN] predict_mean
2025-09-17 20:39:14 | [time] [KNN] impute start
2025-09-17 20:39:14 | [time] [KNN] impute end in 0.54s
2025-09-17 20:39:14 | [KNN] predict_mean
2025-09-17 20:39:14 | [time] [KNN] impute start
2025-09-17 20:39:15 | [time] [KNN] impute end in 0.54s
2025-09-17 20:39:15 | [KNN] predict_mean
2025-09-17 20:39:15 | [t

  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  return fn(*args_all, **kw)


2025-09-17 20:41:05 | [BASE] fold 4 | HVG=2000
2025-09-17 20:41:05 | [BASE] fold 4 model=KNN params={'n_neighbors': 29, 'weights': 'uniform'}
2025-09-17 20:41:05 | [build_baseline] KNN params={'n_neighbors': 29, 'weights': 'uniform'}
2025-09-17 20:41:05 | [KNN] fit n_neighbors=29 weights=uniform
2025-09-17 20:41:05 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 20:41:05 | [KNN] predict_mean
2025-09-17 20:41:05 | [time] [KNN] impute start
2025-09-17 20:41:05 | [time] [KNN] impute end in 0.01s
2025-09-17 20:41:05 | [KNN] predict_mean
2025-09-17 20:41:05 | [time] [KNN] impute start
2025-09-17 20:41:06 | [time] [KNN] impute end in 0.55s
2025-09-17 20:41:06 | [KNN] predict_mean
2025-09-17 20:41:06 | [time] [KNN] impute start
2025-09-17 20:41:06 | [time] [KNN] impute end in 0.54s
2025-09-17 20:41:06 | [KNN] predict_mean
2025-09-17 20:41:06 | [time] [KNN] impute start
2025-09-17 20:41:07 | [time] [KNN] impute end in 0.55s
2025-09-17 20:41:07 | [KNN] predict_mean
2025-09-17 20:41:07 | [t

  return fn(*args_all, **kw)


2025-09-17 20:42:43 | [BASE] fold 5 | HVG=2000
2025-09-17 20:42:43 | [BASE] fold 5 model=KNN params={'n_neighbors': 29, 'weights': 'uniform'}
2025-09-17 20:42:43 | [build_baseline] KNN params={'n_neighbors': 29, 'weights': 'uniform'}
2025-09-17 20:42:43 | [KNN] fit n_neighbors=29 weights=uniform
2025-09-17 20:42:43 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 20:42:43 | [KNN] predict_mean
2025-09-17 20:42:43 | [time] [KNN] impute start
2025-09-17 20:42:43 | [time] [KNN] impute end in 0.01s
2025-09-17 20:42:43 | [KNN] predict_mean
2025-09-17 20:42:43 | [time] [KNN] impute start
2025-09-17 20:42:44 | [time] [KNN] impute end in 0.55s
2025-09-17 20:42:44 | [KNN] predict_mean
2025-09-17 20:42:44 | [time] [KNN] impute start
2025-09-17 20:42:44 | [time] [KNN] impute end in 0.55s
2025-09-17 20:42:44 | [KNN] predict_mean
2025-09-17 20:42:44 | [time] [KNN] impute start
2025-09-17 20:42:45 | [time] [KNN] impute end in 0.54s
2025-09-17 20:42:45 | [KNN] predict_mean
2025-09-17 20:42:45 | [t

[I 2025-09-17 20:42:45,580] Trial 1 finished with value: -1.1681311690940077 and parameters: {'n_neighbors': 29, 'weights': 'uniform'}. Best is trial 0 with value: -1.1681311690940077.


2025-09-17 20:42:45 | [cv_baselines_5fold] start k=5 n_hvg=2000 mode=seurat_v3
2025-09-17 20:42:45 | X_counts: dense  (447, 51777) dtype=int64
2025-09-17 20:43:24 | X_counts: zeros=66.23%, approx_int=True
2025-09-17 20:43:24 | [time] [BASE] fold 1/5 start


  return fn(*args_all, **kw)


2025-09-17 20:44:56 | [BASE] fold 1 | HVG=2000
2025-09-17 20:44:56 | [BASE] fold 1 model=KNN params={'n_neighbors': 50, 'weights': 'uniform'}
2025-09-17 20:44:56 | [build_baseline] KNN params={'n_neighbors': 50, 'weights': 'uniform'}
2025-09-17 20:44:56 | [KNN] fit n_neighbors=50 weights=uniform
2025-09-17 20:44:56 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 20:44:56 | [KNN] predict_mean
2025-09-17 20:44:56 | [time] [KNN] impute start
2025-09-17 20:44:56 | [time] [KNN] impute end in 0.01s
2025-09-17 20:44:56 | [KNN] predict_mean
2025-09-17 20:44:56 | [time] [KNN] impute start
2025-09-17 20:44:56 | [time] [KNN] impute end in 0.56s
2025-09-17 20:44:56 | [KNN] predict_mean
2025-09-17 20:44:56 | [time] [KNN] impute start
2025-09-17 20:44:57 | [time] [KNN] impute end in 0.55s
2025-09-17 20:44:57 | [KNN] predict_mean
2025-09-17 20:44:57 | [time] [KNN] impute start
2025-09-17 20:44:57 | [time] [KNN] impute end in 0.55s
2025-09-17 20:44:57 | [KNN] predict_mean
2025-09-17 20:44:57 | [t

  return fn(*args_all, **kw)


2025-09-17 20:46:38 | [BASE] fold 2 | HVG=2000
2025-09-17 20:46:38 | [BASE] fold 2 model=KNN params={'n_neighbors': 50, 'weights': 'uniform'}
2025-09-17 20:46:38 | [build_baseline] KNN params={'n_neighbors': 50, 'weights': 'uniform'}
2025-09-17 20:46:38 | [KNN] fit n_neighbors=50 weights=uniform
2025-09-17 20:46:38 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 20:46:38 | [KNN] predict_mean
2025-09-17 20:46:38 | [time] [KNN] impute start
2025-09-17 20:46:38 | [time] [KNN] impute end in 0.01s
2025-09-17 20:46:38 | [KNN] predict_mean
2025-09-17 20:46:38 | [time] [KNN] impute start
2025-09-17 20:46:39 | [time] [KNN] impute end in 0.56s
2025-09-17 20:46:39 | [KNN] predict_mean
2025-09-17 20:46:39 | [time] [KNN] impute start
2025-09-17 20:46:39 | [time] [KNN] impute end in 0.55s
2025-09-17 20:46:39 | [KNN] predict_mean
2025-09-17 20:46:39 | [time] [KNN] impute start
2025-09-17 20:46:40 | [time] [KNN] impute end in 0.55s
2025-09-17 20:46:40 | [KNN] predict_mean
2025-09-17 20:46:40 | [t

  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  return fn(*args_all, **kw)


2025-09-17 20:48:20 | [BASE] fold 3 | HVG=2000
2025-09-17 20:48:20 | [BASE] fold 3 model=KNN params={'n_neighbors': 50, 'weights': 'uniform'}
2025-09-17 20:48:20 | [build_baseline] KNN params={'n_neighbors': 50, 'weights': 'uniform'}
2025-09-17 20:48:20 | [KNN] fit n_neighbors=50 weights=uniform
2025-09-17 20:48:20 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 20:48:20 | [KNN] predict_mean
2025-09-17 20:48:20 | [time] [KNN] impute start
2025-09-17 20:48:20 | [time] [KNN] impute end in 0.01s
2025-09-17 20:48:20 | [KNN] predict_mean
2025-09-17 20:48:20 | [time] [KNN] impute start
2025-09-17 20:48:21 | [time] [KNN] impute end in 0.56s
2025-09-17 20:48:21 | [KNN] predict_mean
2025-09-17 20:48:21 | [time] [KNN] impute start
2025-09-17 20:48:22 | [time] [KNN] impute end in 0.55s
2025-09-17 20:48:22 | [KNN] predict_mean
2025-09-17 20:48:22 | [time] [KNN] impute start
2025-09-17 20:48:22 | [time] [KNN] impute end in 0.56s
2025-09-17 20:48:22 | [KNN] predict_mean
2025-09-17 20:48:22 | [t

  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  return fn(*args_all, **kw)


2025-09-17 20:51:37 | [BASE] fold 4 | HVG=2000
2025-09-17 20:51:37 | [BASE] fold 4 model=KNN params={'n_neighbors': 50, 'weights': 'uniform'}
2025-09-17 20:51:37 | [build_baseline] KNN params={'n_neighbors': 50, 'weights': 'uniform'}
2025-09-17 20:51:37 | [KNN] fit n_neighbors=50 weights=uniform
2025-09-17 20:51:37 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 20:51:37 | [KNN] predict_mean
2025-09-17 20:51:37 | [time] [KNN] impute start
2025-09-17 20:51:37 | [time] [KNN] impute end in 0.01s
2025-09-17 20:51:37 | [KNN] predict_mean
2025-09-17 20:51:37 | [time] [KNN] impute start
2025-09-17 20:51:37 | [time] [KNN] impute end in 0.56s
2025-09-17 20:51:37 | [KNN] predict_mean
2025-09-17 20:51:37 | [time] [KNN] impute start
2025-09-17 20:51:38 | [time] [KNN] impute end in 0.55s
2025-09-17 20:51:38 | [KNN] predict_mean
2025-09-17 20:51:38 | [time] [KNN] impute start
2025-09-17 20:51:39 | [time] [KNN] impute end in 0.56s
2025-09-17 20:51:39 | [KNN] predict_mean
2025-09-17 20:51:39 | [t

  return fn(*args_all, **kw)


2025-09-17 20:55:02 | [BASE] fold 5 | HVG=2000
2025-09-17 20:55:02 | [BASE] fold 5 model=KNN params={'n_neighbors': 50, 'weights': 'uniform'}
2025-09-17 20:55:02 | [build_baseline] KNN params={'n_neighbors': 50, 'weights': 'uniform'}
2025-09-17 20:55:02 | [KNN] fit n_neighbors=50 weights=uniform
2025-09-17 20:55:02 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 20:55:02 | [KNN] predict_mean
2025-09-17 20:55:02 | [time] [KNN] impute start
2025-09-17 20:55:02 | [time] [KNN] impute end in 0.01s
2025-09-17 20:55:02 | [KNN] predict_mean
2025-09-17 20:55:02 | [time] [KNN] impute start
2025-09-17 20:55:02 | [time] [KNN] impute end in 0.56s
2025-09-17 20:55:02 | [KNN] predict_mean
2025-09-17 20:55:02 | [time] [KNN] impute start
2025-09-17 20:55:03 | [time] [KNN] impute end in 0.55s
2025-09-17 20:55:03 | [KNN] predict_mean
2025-09-17 20:55:03 | [time] [KNN] impute start
2025-09-17 20:55:04 | [time] [KNN] impute end in 0.55s
2025-09-17 20:55:04 | [KNN] predict_mean
2025-09-17 20:55:04 | [t

[I 2025-09-17 20:55:04,163] Trial 2 finished with value: -1.1681311690940077 and parameters: {'n_neighbors': 50, 'weights': 'uniform'}. Best is trial 0 with value: -1.1681311690940077.


2025-09-17 20:55:04 | [cv_baselines_5fold] start k=5 n_hvg=2000 mode=seurat_v3
2025-09-17 20:55:04 | X_counts: dense  (447, 51777) dtype=int64
2025-09-17 20:56:09 | X_counts: zeros=66.23%, approx_int=True
2025-09-17 20:56:09 | [time] [BASE] fold 1/5 start


  return fn(*args_all, **kw)


2025-09-17 20:58:42 | [BASE] fold 1 | HVG=2000
2025-09-17 20:58:42 | [BASE] fold 1 model=KNN params={'n_neighbors': 21, 'weights': 'distance'}
2025-09-17 20:58:42 | [build_baseline] KNN params={'n_neighbors': 21, 'weights': 'distance'}
2025-09-17 20:58:42 | [KNN] fit n_neighbors=21 weights=distance
2025-09-17 20:58:42 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 20:58:42 | [KNN] predict_mean
2025-09-17 20:58:42 | [time] [KNN] impute start
2025-09-17 20:58:42 | [time] [KNN] impute end in 0.01s
2025-09-17 20:58:42 | [KNN] predict_mean
2025-09-17 20:58:42 | [time] [KNN] impute start
2025-09-17 20:58:42 | [time] [KNN] impute end in 0.58s
2025-09-17 20:58:42 | [KNN] predict_mean
2025-09-17 20:58:42 | [time] [KNN] impute start
2025-09-17 20:58:43 | [time] [KNN] impute end in 0.57s
2025-09-17 20:58:43 | [KNN] predict_mean
2025-09-17 20:58:43 | [time] [KNN] impute start
2025-09-17 20:58:43 | [time] [KNN] impute end in 0.57s
2025-09-17 20:58:43 | [KNN] predict_mean
2025-09-17 20:58:43 |

  return fn(*args_all, **kw)


2025-09-17 21:01:13 | [BASE] fold 2 | HVG=2000
2025-09-17 21:01:13 | [BASE] fold 2 model=KNN params={'n_neighbors': 21, 'weights': 'distance'}
2025-09-17 21:01:13 | [build_baseline] KNN params={'n_neighbors': 21, 'weights': 'distance'}
2025-09-17 21:01:13 | [KNN] fit n_neighbors=21 weights=distance
2025-09-17 21:01:13 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 21:01:13 | [KNN] predict_mean
2025-09-17 21:01:13 | [time] [KNN] impute start
2025-09-17 21:01:13 | [time] [KNN] impute end in 0.01s
2025-09-17 21:01:13 | [KNN] predict_mean
2025-09-17 21:01:13 | [time] [KNN] impute start
2025-09-17 21:01:13 | [time] [KNN] impute end in 0.58s
2025-09-17 21:01:13 | [KNN] predict_mean
2025-09-17 21:01:13 | [time] [KNN] impute start
2025-09-17 21:01:14 | [time] [KNN] impute end in 0.57s
2025-09-17 21:01:14 | [KNN] predict_mean
2025-09-17 21:01:14 | [time] [KNN] impute start


  + r * np.log(p) + x * np.log1p(-p))


2025-09-17 21:01:15 | [time] [KNN] impute end in 0.58s
2025-09-17 21:01:15 | [KNN] predict_mean
2025-09-17 21:01:15 | [time] [KNN] impute start
2025-09-17 21:01:15 | [time] [KNN] impute end in 0.01s
2025-09-17 21:01:15 | [KNN] predict_mean
2025-09-17 21:01:15 | [time] [KNN] impute start
2025-09-17 21:01:15 | [time] [KNN] impute end in 0.01s
2025-09-17 21:01:15 | [KNN] predict_mean
2025-09-17 21:01:15 | [time] [KNN] impute start
2025-09-17 21:01:15 | [time] [KNN] impute end in 0.00s
2025-09-17 21:01:15 | [time] [BASE] fold 2/5 end in 151.11s
2025-09-17 21:01:15 | [time] [BASE] fold 3/5 start


  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  return fn(*args_all, **kw)


2025-09-17 21:03:10 | [BASE] fold 3 | HVG=2000
2025-09-17 21:03:10 | [BASE] fold 3 model=KNN params={'n_neighbors': 21, 'weights': 'distance'}
2025-09-17 21:03:10 | [build_baseline] KNN params={'n_neighbors': 21, 'weights': 'distance'}
2025-09-17 21:03:10 | [KNN] fit n_neighbors=21 weights=distance
2025-09-17 21:03:10 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 21:03:10 | [KNN] predict_mean
2025-09-17 21:03:10 | [time] [KNN] impute start
2025-09-17 21:03:10 | [time] [KNN] impute end in 0.01s
2025-09-17 21:03:10 | [KNN] predict_mean
2025-09-17 21:03:10 | [time] [KNN] impute start
2025-09-17 21:03:10 | [time] [KNN] impute end in 0.58s
2025-09-17 21:03:10 | [KNN] predict_mean
2025-09-17 21:03:10 | [time] [KNN] impute start
2025-09-17 21:03:11 | [time] [KNN] impute end in 0.57s
2025-09-17 21:03:11 | [KNN] predict_mean
2025-09-17 21:03:11 | [time] [KNN] impute start
2025-09-17 21:03:11 | [time] [KNN] impute end in 0.56s
2025-09-17 21:03:11 | [KNN] predict_mean
2025-09-17 21:03:11 |

  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  return fn(*args_all, **kw)


2025-09-17 21:05:05 | [BASE] fold 4 | HVG=2000
2025-09-17 21:05:05 | [BASE] fold 4 model=KNN params={'n_neighbors': 21, 'weights': 'distance'}
2025-09-17 21:05:05 | [build_baseline] KNN params={'n_neighbors': 21, 'weights': 'distance'}
2025-09-17 21:05:05 | [KNN] fit n_neighbors=21 weights=distance
2025-09-17 21:05:05 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 21:05:05 | [KNN] predict_mean
2025-09-17 21:05:05 | [time] [KNN] impute start
2025-09-17 21:05:05 | [time] [KNN] impute end in 0.01s
2025-09-17 21:05:05 | [KNN] predict_mean
2025-09-17 21:05:05 | [time] [KNN] impute start
2025-09-17 21:05:06 | [time] [KNN] impute end in 0.57s
2025-09-17 21:05:06 | [KNN] predict_mean
2025-09-17 21:05:06 | [time] [KNN] impute start
2025-09-17 21:05:06 | [time] [KNN] impute end in 0.56s
2025-09-17 21:05:06 | [KNN] predict_mean
2025-09-17 21:05:06 | [time] [KNN] impute start
2025-09-17 21:05:07 | [time] [KNN] impute end in 0.57s
2025-09-17 21:05:07 | [KNN] predict_mean
2025-09-17 21:05:07 |

  return fn(*args_all, **kw)


2025-09-17 21:07:00 | [BASE] fold 5 | HVG=2000
2025-09-17 21:07:00 | [BASE] fold 5 model=KNN params={'n_neighbors': 21, 'weights': 'distance'}
2025-09-17 21:07:00 | [build_baseline] KNN params={'n_neighbors': 21, 'weights': 'distance'}
2025-09-17 21:07:00 | [KNN] fit n_neighbors=21 weights=distance
2025-09-17 21:07:00 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 21:07:00 | [KNN] predict_mean
2025-09-17 21:07:00 | [time] [KNN] impute start
2025-09-17 21:07:00 | [time] [KNN] impute end in 0.01s
2025-09-17 21:07:00 | [KNN] predict_mean
2025-09-17 21:07:00 | [time] [KNN] impute start
2025-09-17 21:07:01 | [time] [KNN] impute end in 0.57s
2025-09-17 21:07:01 | [KNN] predict_mean
2025-09-17 21:07:01 | [time] [KNN] impute start
2025-09-17 21:07:02 | [time] [KNN] impute end in 0.56s
2025-09-17 21:07:02 | [KNN] predict_mean
2025-09-17 21:07:02 | [time] [KNN] impute start
2025-09-17 21:07:02 | [time] [KNN] impute end in 0.57s
2025-09-17 21:07:02 | [KNN] predict_mean
2025-09-17 21:07:02 |

[I 2025-09-17 21:07:02,871] Trial 3 finished with value: -1.1681311690940077 and parameters: {'n_neighbors': 21, 'weights': 'distance'}. Best is trial 0 with value: -1.1681311690940077.


2025-09-17 21:07:03 | [cv_baselines_5fold] start k=5 n_hvg=2000 mode=seurat_v3
2025-09-17 21:07:03 | X_counts: dense  (447, 51777) dtype=int64
2025-09-17 21:07:49 | X_counts: zeros=66.23%, approx_int=True
2025-09-17 21:07:49 | [time] [BASE] fold 1/5 start


  return fn(*args_all, **kw)


2025-09-17 21:09:52 | [BASE] fold 1 | HVG=2000
2025-09-17 21:09:52 | [BASE] fold 1 model=KNN params={'n_neighbors': 24, 'weights': 'distance'}
2025-09-17 21:09:52 | [build_baseline] KNN params={'n_neighbors': 24, 'weights': 'distance'}
2025-09-17 21:09:52 | [KNN] fit n_neighbors=24 weights=distance
2025-09-17 21:09:52 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 21:09:52 | [KNN] predict_mean
2025-09-17 21:09:52 | [time] [KNN] impute start
2025-09-17 21:09:52 | [time] [KNN] impute end in 0.01s
2025-09-17 21:09:52 | [KNN] predict_mean
2025-09-17 21:09:52 | [time] [KNN] impute start
2025-09-17 21:09:52 | [time] [KNN] impute end in 0.57s
2025-09-17 21:09:52 | [KNN] predict_mean
2025-09-17 21:09:52 | [time] [KNN] impute start
2025-09-17 21:09:53 | [time] [KNN] impute end in 0.57s
2025-09-17 21:09:53 | [KNN] predict_mean
2025-09-17 21:09:53 | [time] [KNN] impute start
2025-09-17 21:09:53 | [time] [KNN] impute end in 0.56s
2025-09-17 21:09:53 | [KNN] predict_mean
2025-09-17 21:09:53 |

  return fn(*args_all, **kw)


2025-09-17 21:12:00 | [BASE] fold 2 | HVG=2000
2025-09-17 21:12:00 | [BASE] fold 2 model=KNN params={'n_neighbors': 24, 'weights': 'distance'}
2025-09-17 21:12:00 | [build_baseline] KNN params={'n_neighbors': 24, 'weights': 'distance'}
2025-09-17 21:12:00 | [KNN] fit n_neighbors=24 weights=distance
2025-09-17 21:12:00 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 21:12:00 | [KNN] predict_mean
2025-09-17 21:12:00 | [time] [KNN] impute start
2025-09-17 21:12:00 | [time] [KNN] impute end in 0.01s
2025-09-17 21:12:00 | [KNN] predict_mean
2025-09-17 21:12:00 | [time] [KNN] impute start
2025-09-17 21:12:00 | [time] [KNN] impute end in 0.57s
2025-09-17 21:12:00 | [KNN] predict_mean
2025-09-17 21:12:00 | [time] [KNN] impute start
2025-09-17 21:12:01 | [time] [KNN] impute end in 0.57s
2025-09-17 21:12:01 | [KNN] predict_mean
2025-09-17 21:12:01 | [time] [KNN] impute start


  + r * np.log(p) + x * np.log1p(-p))


2025-09-17 21:12:01 | [time] [KNN] impute end in 0.57s
2025-09-17 21:12:02 | [KNN] predict_mean
2025-09-17 21:12:02 | [time] [KNN] impute start
2025-09-17 21:12:02 | [time] [KNN] impute end in 0.01s
2025-09-17 21:12:02 | [KNN] predict_mean
2025-09-17 21:12:02 | [time] [KNN] impute start
2025-09-17 21:12:02 | [time] [KNN] impute end in 0.01s
2025-09-17 21:12:02 | [KNN] predict_mean
2025-09-17 21:12:02 | [time] [KNN] impute start
2025-09-17 21:12:02 | [time] [KNN] impute end in 0.00s
2025-09-17 21:12:02 | [time] [BASE] fold 2/5 end in 128.04s
2025-09-17 21:12:02 | [time] [BASE] fold 3/5 start


  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  return fn(*args_all, **kw)


2025-09-17 21:14:01 | [BASE] fold 3 | HVG=2000
2025-09-17 21:14:01 | [BASE] fold 3 model=KNN params={'n_neighbors': 24, 'weights': 'distance'}
2025-09-17 21:14:01 | [build_baseline] KNN params={'n_neighbors': 24, 'weights': 'distance'}
2025-09-17 21:14:01 | [KNN] fit n_neighbors=24 weights=distance
2025-09-17 21:14:01 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 21:14:01 | [KNN] predict_mean
2025-09-17 21:14:01 | [time] [KNN] impute start
2025-09-17 21:14:01 | [time] [KNN] impute end in 0.01s
2025-09-17 21:14:01 | [KNN] predict_mean
2025-09-17 21:14:01 | [time] [KNN] impute start
2025-09-17 21:14:02 | [time] [KNN] impute end in 0.59s
2025-09-17 21:14:02 | [KNN] predict_mean
2025-09-17 21:14:02 | [time] [KNN] impute start
2025-09-17 21:14:02 | [time] [KNN] impute end in 0.57s
2025-09-17 21:14:02 | [KNN] predict_mean
2025-09-17 21:14:03 | [time] [KNN] impute start
2025-09-17 21:14:03 | [time] [KNN] impute end in 0.57s
2025-09-17 21:14:03 | [KNN] predict_mean
2025-09-17 21:14:03 |

  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  return fn(*args_all, **kw)


2025-09-17 21:15:58 | [BASE] fold 4 | HVG=2000
2025-09-17 21:15:58 | [BASE] fold 4 model=KNN params={'n_neighbors': 24, 'weights': 'distance'}
2025-09-17 21:15:58 | [build_baseline] KNN params={'n_neighbors': 24, 'weights': 'distance'}
2025-09-17 21:15:58 | [KNN] fit n_neighbors=24 weights=distance
2025-09-17 21:15:58 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 21:15:58 | [KNN] predict_mean
2025-09-17 21:15:58 | [time] [KNN] impute start
2025-09-17 21:15:58 | [time] [KNN] impute end in 0.01s
2025-09-17 21:15:58 | [KNN] predict_mean
2025-09-17 21:15:58 | [time] [KNN] impute start
2025-09-17 21:15:59 | [time] [KNN] impute end in 0.58s
2025-09-17 21:15:59 | [KNN] predict_mean
2025-09-17 21:15:59 | [time] [KNN] impute start
2025-09-17 21:16:00 | [time] [KNN] impute end in 0.57s
2025-09-17 21:16:00 | [KNN] predict_mean
2025-09-17 21:16:00 | [time] [KNN] impute start
2025-09-17 21:16:00 | [time] [KNN] impute end in 0.57s
2025-09-17 21:16:00 | [KNN] predict_mean
2025-09-17 21:16:00 |

  return fn(*args_all, **kw)


2025-09-17 21:18:03 | [BASE] fold 5 | HVG=2000
2025-09-17 21:18:03 | [BASE] fold 5 model=KNN params={'n_neighbors': 24, 'weights': 'distance'}
2025-09-17 21:18:03 | [build_baseline] KNN params={'n_neighbors': 24, 'weights': 'distance'}
2025-09-17 21:18:03 | [KNN] fit n_neighbors=24 weights=distance
2025-09-17 21:18:03 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 21:18:03 | [KNN] predict_mean
2025-09-17 21:18:03 | [time] [KNN] impute start
2025-09-17 21:18:03 | [time] [KNN] impute end in 0.01s
2025-09-17 21:18:03 | [KNN] predict_mean
2025-09-17 21:18:03 | [time] [KNN] impute start
2025-09-17 21:18:03 | [time] [KNN] impute end in 0.57s
2025-09-17 21:18:03 | [KNN] predict_mean
2025-09-17 21:18:03 | [time] [KNN] impute start
2025-09-17 21:18:04 | [time] [KNN] impute end in 0.56s
2025-09-17 21:18:04 | [KNN] predict_mean
2025-09-17 21:18:04 | [time] [KNN] impute start
2025-09-17 21:18:05 | [time] [KNN] impute end in 0.56s
2025-09-17 21:18:05 | [KNN] predict_mean
2025-09-17 21:18:05 |

[I 2025-09-17 21:18:05,215] Trial 4 finished with value: -1.1681311690940077 and parameters: {'n_neighbors': 24, 'weights': 'distance'}. Best is trial 0 with value: -1.1681311690940077.


2025-09-17 21:18:05 | [cv_baselines_5fold] start k=5 n_hvg=2000 mode=seurat_v3
2025-09-17 21:18:05 | X_counts: dense  (447, 51777) dtype=int64
2025-09-17 21:18:52 | X_counts: zeros=66.23%, approx_int=True
2025-09-17 21:18:52 | [time] [BASE] fold 1/5 start


  return fn(*args_all, **kw)


2025-09-17 21:20:43 | [BASE] fold 1 | HVG=2000
2025-09-17 21:20:43 | [BASE] fold 1 model=KNN params={'n_neighbors': 38, 'weights': 'uniform'}
2025-09-17 21:20:43 | [build_baseline] KNN params={'n_neighbors': 38, 'weights': 'uniform'}
2025-09-17 21:20:43 | [KNN] fit n_neighbors=38 weights=uniform
2025-09-17 21:20:43 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 21:20:43 | [KNN] predict_mean
2025-09-17 21:20:43 | [time] [KNN] impute start
2025-09-17 21:20:43 | [time] [KNN] impute end in 0.01s
2025-09-17 21:20:43 | [KNN] predict_mean
2025-09-17 21:20:43 | [time] [KNN] impute start
2025-09-17 21:20:44 | [time] [KNN] impute end in 0.56s
2025-09-17 21:20:44 | [KNN] predict_mean
2025-09-17 21:20:44 | [time] [KNN] impute start
2025-09-17 21:20:45 | [time] [KNN] impute end in 0.55s
2025-09-17 21:20:45 | [KNN] predict_mean
2025-09-17 21:20:45 | [time] [KNN] impute start
2025-09-17 21:20:45 | [time] [KNN] impute end in 0.55s
2025-09-17 21:20:45 | [KNN] predict_mean
2025-09-17 21:20:45 | [t

  return fn(*args_all, **kw)


2025-09-17 21:22:29 | [BASE] fold 2 | HVG=2000
2025-09-17 21:22:29 | [BASE] fold 2 model=KNN params={'n_neighbors': 38, 'weights': 'uniform'}
2025-09-17 21:22:29 | [build_baseline] KNN params={'n_neighbors': 38, 'weights': 'uniform'}
2025-09-17 21:22:29 | [KNN] fit n_neighbors=38 weights=uniform
2025-09-17 21:22:29 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 21:22:29 | [KNN] predict_mean
2025-09-17 21:22:29 | [time] [KNN] impute start
2025-09-17 21:22:29 | [time] [KNN] impute end in 0.01s
2025-09-17 21:22:29 | [KNN] predict_mean
2025-09-17 21:22:29 | [time] [KNN] impute start
2025-09-17 21:22:29 | [time] [KNN] impute end in 0.56s
2025-09-17 21:22:29 | [KNN] predict_mean
2025-09-17 21:22:29 | [time] [KNN] impute start
2025-09-17 21:22:30 | [time] [KNN] impute end in 0.55s
2025-09-17 21:22:30 | [KNN] predict_mean
2025-09-17 21:22:30 | [time] [KNN] impute start
2025-09-17 21:22:30 | [time] [KNN] impute end in 0.55s
2025-09-17 21:22:30 | [KNN] predict_mean
2025-09-17 21:22:30 | [t

  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  return fn(*args_all, **kw)


2025-09-17 21:24:11 | [BASE] fold 3 | HVG=2000
2025-09-17 21:24:11 | [BASE] fold 3 model=KNN params={'n_neighbors': 38, 'weights': 'uniform'}
2025-09-17 21:24:11 | [build_baseline] KNN params={'n_neighbors': 38, 'weights': 'uniform'}
2025-09-17 21:24:11 | [KNN] fit n_neighbors=38 weights=uniform
2025-09-17 21:24:11 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 21:24:11 | [KNN] predict_mean
2025-09-17 21:24:11 | [time] [KNN] impute start
2025-09-17 21:24:11 | [time] [KNN] impute end in 0.01s
2025-09-17 21:24:11 | [KNN] predict_mean
2025-09-17 21:24:11 | [time] [KNN] impute start
2025-09-17 21:24:12 | [time] [KNN] impute end in 0.55s
2025-09-17 21:24:12 | [KNN] predict_mean
2025-09-17 21:24:12 | [time] [KNN] impute start
2025-09-17 21:24:13 | [time] [KNN] impute end in 0.54s
2025-09-17 21:24:13 | [KNN] predict_mean
2025-09-17 21:24:13 | [time] [KNN] impute start
2025-09-17 21:24:13 | [time] [KNN] impute end in 0.55s
2025-09-17 21:24:13 | [KNN] predict_mean
2025-09-17 21:24:13 | [t

  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  return fn(*args_all, **kw)


2025-09-17 21:26:13 | [BASE] fold 4 | HVG=2000
2025-09-17 21:26:13 | [BASE] fold 4 model=KNN params={'n_neighbors': 38, 'weights': 'uniform'}
2025-09-17 21:26:13 | [build_baseline] KNN params={'n_neighbors': 38, 'weights': 'uniform'}
2025-09-17 21:26:13 | [KNN] fit n_neighbors=38 weights=uniform
2025-09-17 21:26:13 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 21:26:13 | [KNN] predict_mean
2025-09-17 21:26:13 | [time] [KNN] impute start
2025-09-17 21:26:13 | [time] [KNN] impute end in 0.01s
2025-09-17 21:26:14 | [KNN] predict_mean
2025-09-17 21:26:14 | [time] [KNN] impute start
2025-09-17 21:26:14 | [time] [KNN] impute end in 0.55s
2025-09-17 21:26:14 | [KNN] predict_mean
2025-09-17 21:26:14 | [time] [KNN] impute start
2025-09-17 21:26:15 | [time] [KNN] impute end in 0.55s
2025-09-17 21:26:15 | [KNN] predict_mean
2025-09-17 21:26:15 | [time] [KNN] impute start
2025-09-17 21:26:15 | [time] [KNN] impute end in 0.55s
2025-09-17 21:26:15 | [KNN] predict_mean
2025-09-17 21:26:15 | [t

  return fn(*args_all, **kw)


2025-09-17 21:28:06 | [BASE] fold 5 | HVG=2000
2025-09-17 21:28:06 | [BASE] fold 5 model=KNN params={'n_neighbors': 38, 'weights': 'uniform'}
2025-09-17 21:28:06 | [build_baseline] KNN params={'n_neighbors': 38, 'weights': 'uniform'}
2025-09-17 21:28:06 | [KNN] fit n_neighbors=38 weights=uniform
2025-09-17 21:28:06 | X_train: dense  (358, 2000) dtype=int64
2025-09-17 21:28:06 | [KNN] predict_mean
2025-09-17 21:28:06 | [time] [KNN] impute start
2025-09-17 21:28:06 | [time] [KNN] impute end in 0.01s
2025-09-17 21:28:06 | [KNN] predict_mean
2025-09-17 21:28:06 | [time] [KNN] impute start
2025-09-17 21:28:07 | [time] [KNN] impute end in 0.55s
2025-09-17 21:28:07 | [KNN] predict_mean
2025-09-17 21:28:07 | [time] [KNN] impute start
2025-09-17 21:28:07 | [time] [KNN] impute end in 0.54s
2025-09-17 21:28:08 | [KNN] predict_mean
2025-09-17 21:28:08 | [time] [KNN] impute start
2025-09-17 21:28:08 | [time] [KNN] impute end in 0.54s
2025-09-17 21:28:08 | [KNN] predict_mean
2025-09-17 21:28:08 | [t

[I 2025-09-17 21:28:08,672] Trial 5 finished with value: -1.1681311690940077 and parameters: {'n_neighbors': 38, 'weights': 'uniform'}. Best is trial 0 with value: -1.1681311690940077.


2025-09-17 21:28:08 | [cv_baselines_5fold] start k=5 n_hvg=2000 mode=seurat_v3
2025-09-17 21:28:08 | X_counts: dense  (447, 51777) dtype=int64
2025-09-17 21:28:42 | X_counts: zeros=66.23%, approx_int=True
2025-09-17 21:28:42 | [time] [BASE] fold 1/5 start


  return fn(*args_all, **kw)


2025-09-17 21:30:33 | [BASE] fold 1 | HVG=2000
2025-09-17 21:30:33 | [BASE] fold 1 model=KNN params={'n_neighbors': 28, 'weights': 'distance'}
2025-09-17 21:30:33 | [build_baseline] KNN params={'n_neighbors': 28, 'weights': 'distance'}
2025-09-17 21:30:33 | [KNN] fit n_neighbors=28 weights=distance
2025-09-17 21:30:33 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 21:30:33 | [KNN] predict_mean
2025-09-17 21:30:33 | [time] [KNN] impute start
2025-09-17 21:30:33 | [time] [KNN] impute end in 0.01s
2025-09-17 21:30:33 | [KNN] predict_mean
2025-09-17 21:30:33 | [time] [KNN] impute start
2025-09-17 21:30:34 | [time] [KNN] impute end in 0.58s
2025-09-17 21:30:34 | [KNN] predict_mean
2025-09-17 21:30:34 | [time] [KNN] impute start
2025-09-17 21:30:34 | [time] [KNN] impute end in 0.57s
2025-09-17 21:30:34 | [KNN] predict_mean
2025-09-17 21:30:34 | [time] [KNN] impute start
2025-09-17 21:30:35 | [time] [KNN] impute end in 0.57s
2025-09-17 21:30:35 | [KNN] predict_mean
2025-09-17 21:30:35 |

  return fn(*args_all, **kw)


2025-09-17 21:32:22 | [BASE] fold 2 | HVG=2000
2025-09-17 21:32:22 | [BASE] fold 2 model=KNN params={'n_neighbors': 28, 'weights': 'distance'}
2025-09-17 21:32:22 | [build_baseline] KNN params={'n_neighbors': 28, 'weights': 'distance'}
2025-09-17 21:32:22 | [KNN] fit n_neighbors=28 weights=distance
2025-09-17 21:32:22 | X_train: dense  (357, 2000) dtype=int64
2025-09-17 21:32:22 | [KNN] predict_mean
2025-09-17 21:32:22 | [time] [KNN] impute start
2025-09-17 21:32:22 | [time] [KNN] impute end in 0.01s
2025-09-17 21:32:22 | [KNN] predict_mean
2025-09-17 21:32:22 | [time] [KNN] impute start
2025-09-17 21:32:23 | [time] [KNN] impute end in 0.58s
2025-09-17 21:32:23 | [KNN] predict_mean
2025-09-17 21:32:23 | [time] [KNN] impute start
2025-09-17 21:32:23 | [time] [KNN] impute end in 0.57s
2025-09-17 21:32:23 | [KNN] predict_mean
2025-09-17 21:32:23 | [time] [KNN] impute start


  + r * np.log(p) + x * np.log1p(-p))


2025-09-17 21:32:24 | [time] [KNN] impute end in 0.57s
2025-09-17 21:32:24 | [KNN] predict_mean
2025-09-17 21:32:24 | [time] [KNN] impute start
2025-09-17 21:32:24 | [time] [KNN] impute end in 0.01s
2025-09-17 21:32:24 | [KNN] predict_mean
2025-09-17 21:32:24 | [time] [KNN] impute start
2025-09-17 21:32:24 | [time] [KNN] impute end in 0.01s
2025-09-17 21:32:24 | [KNN] predict_mean
2025-09-17 21:32:24 | [time] [KNN] impute start
2025-09-17 21:32:24 | [time] [KNN] impute end in 0.00s
2025-09-17 21:32:24 | [time] [BASE] fold 2/5 end in 109.09s
2025-09-17 21:32:24 | [time] [BASE] fold 3/5 start


  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  + r * np.log(p) + x * np.log1p(-p))
  return fn(*args_all, **kw)


In [None]:
### Old function
best_params, best_val = tune_baseline_with_optuna(
    X_counts,
    model_name="KNN",
    n_trials=40,
    metric="NB_ll_thin",
    direction="maximize",
    k=5, n_hvg=2000, R=3,
    mask_frac=0.10, thinning_p=0.10, random_state=123,
    save_dir=None,                        
    hvg_mode="seurat_v3",
    gene_names=gene_names, batches=batches, norm_layer=norm_layer,
    batch_key="BioProject", seurat_layer_name="log2_1p_CPM_original",
    labels=None)

# Save the file / Final
final_grid = {"KNN": best_params}
summary_df, details_df = cv_baselines_5fold(
    X_counts,
    model_grids=final_grid,
    k=5, n_hvg=2000, R=3,
    mask_frac=0.10, thinning_p=0.10, random_state=123,
    save_dir="results/baselines_knn_best",
    hvg_mode="seurat_v3",
    gene_names=gene_names, batches=batches, norm_layer=norm_layer,
    batch_key="BioProject", seurat_layer_name="log2_1p_CPM_original",
    labels=None,
)


In [17]:
summary_df

Unnamed: 0,model,n_hvg,MAE_zero,MSE_zero,MedianL1_zero,NB_ll_zero,NB_dev_zero,MAE_thin,MSE_thin,MedianL1_thin,NB_ll_thin,NB_dev_thin,Silhouette,params
0,KNN,2000,145.182524,3973526.0,4.792379,-inf,19.890789,22.624352,232493.451225,0.34262,-1.050366,0.450777,,"{'n_neighbors': 5, 'weights': 'distance'}"
1,KNN,2000,146.401514,3977597.0,4.88,-inf,19.890562,22.624352,232493.451225,0.34262,-1.050366,0.450777,,"{'n_neighbors': 5, 'weights': 'uniform'}"
2,KNN,2000,145.935892,3813152.0,5.468255,-inf,10.073955,22.624352,232493.451225,0.34262,-1.050366,0.450777,,"{'n_neighbors': 15, 'weights': 'distance'}"
3,KNN,2000,148.85101,3923409.0,5.6,-inf,10.081122,22.624352,232493.451225,0.34262,-1.050366,0.450777,,"{'n_neighbors': 15, 'weights': 'uniform'}"
4,KNN,2000,152.806336,4116963.0,5.955009,-8.618125,8.61529,22.624352,232493.451225,0.34262,-1.050366,0.450777,,"{'n_neighbors': 30, 'weights': 'distance'}"
5,KNN,2000,157.783914,4289946.0,6.163333,-8.626906,8.632852,22.624352,232493.451225,0.34262,-1.050366,0.450777,,"{'n_neighbors': 30, 'weights': 'uniform'}"
6,MAGIC,2000,117.391397,1452627.0,6.251003,-4.460702,0.300443,7.082175,11648.230403,0.175739,-0.96715,0.284345,,"{'knn': 5, 'n_pca': 50, 't': 3}"
7,MAGIC,2000,117.419249,1452795.0,6.249631,-4.460723,0.300485,7.082792,11648.96459,0.175767,-0.967162,0.284369,,"{'knn': 5, 'n_pca': None, 't': 3}"
8,MAGIC,2000,136.522088,2423615.0,6.917324,-4.48852,0.356079,7.811972,15521.961188,0.201358,-0.980229,0.310503,,"{'knn': 10, 'n_pca': 50, 't': 3}"
9,MAGIC,2000,136.533152,2423911.0,6.91752,-4.48854,0.356119,7.812378,15523.153214,0.201368,-0.980244,0.310534,,"{'knn': 10, 'n_pca': None, 't': 3}"


In [18]:
details_df

Unnamed: 0,fold,model,params,n_hvg,MAE_zero,MSE_zero,MedianL1_zero,NB_ll_zero,NB_dev_zero,MAE_thin,MSE_thin,MedianL1_thin,NB_ll_thin,NB_dev_thin,Silhouette
0,1,MEAN,{},2000,476.273391,45578340.0,8.824463,-4.604419,0.584565,24.850731,272955.866306,0.35817,-1.075026,0.466243,
1,1,MEDIAN,{},2000,463.297857,46618700.0,5.0,-322.340513,636.056755,23.95093,285212.385313,0.0,-13.946393,26.208977,
2,1,KNN,"{'n_neighbors': 5, 'weights': 'uniform'}",2000,152.062595,2585136.0,4.733333,-13.937682,19.251094,24.850731,272955.866306,0.35817,-1.075026,0.466243,
3,1,KNN,"{'n_neighbors': 5, 'weights': 'distance'}",2000,152.073686,2693523.0,4.654197,-13.936884,19.249497,24.850731,272955.866306,0.35817,-1.075026,0.466243,
4,1,KNN,"{'n_neighbors': 15, 'weights': 'uniform'}",2000,151.858591,2588800.0,5.6,-9.686541,10.74881,24.850731,272955.866306,0.35817,-1.075026,0.466243,
5,1,KNN,"{'n_neighbors': 15, 'weights': 'distance'}",2000,149.895981,2563858.0,5.464955,-9.682484,10.740696,24.850731,272955.866306,0.35817,-1.075026,0.466243,
6,1,KNN,"{'n_neighbors': 30, 'weights': 'uniform'}",2000,161.202911,2868117.0,6.211111,-9.104153,9.584034,24.850731,272955.866306,0.35817,-1.075026,0.466243,
7,1,KNN,"{'n_neighbors': 30, 'weights': 'distance'}",2000,156.160156,2763174.0,5.888115,-9.088341,9.552409,24.850731,272955.866306,0.35817,-1.075026,0.466243,
8,1,MAGIC,"{'knn': 5, 'n_pca': None, 't': 3}",2000,122.705347,1109985.0,6.336733,-4.467122,0.30997,7.727939,13318.755574,0.185031,-0.989382,0.294955,
9,1,MAGIC,"{'knn': 5, 'n_pca': 50, 't': 3}",2000,122.698113,1109908.0,6.3376,-4.467107,0.309941,7.727283,13317.980124,0.185043,-0.989369,0.29493,
