üß© 0) Setup & Imports

In [2]:
# ===================== PARAMETERS / IMPORTS =====================
from pathlib import Path
import sys, subprocess, numpy as np, pandas as pd, joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, RobustScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.utils.class_weight import compute_class_weight
from sklearn.impute import SimpleImputer

# Project config
PROJ_ROOT = Path("../").resolve()
SRC_DIR   = PROJ_ROOT / "src"
if str(PROJ_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJ_ROOT))

from src.config import PATHS, CFG, print_summary
print_summary()

# Dims (fallbacks if CFG unset)
MAP_DIM     = CFG.MAP_DIM or 165
PROMPT_DIM  = CFG.PROMPT_DIM or 512
FUSED_DIM   = CFG.FUSED_DIM or (MAP_DIM + PROMPT_DIM)
BATCH_SIZE  = CFG.BATCH_SIZE

# Clean outputs for a fresh run
PATHS.clean_outputs()


=== CONFIG SUMMARY ===
PROJ_ROOT  : /Users/amirdonyadide/Documents/GitHub/Thesis
DATA_DIR   : /Users/amirdonyadide/Documents/GitHub/Thesis/data
INPUT_DIR  : /Users/amirdonyadide/Documents/GitHub/Thesis/data/input
OUTPUT_DIR : /Users/amirdonyadide/Documents/GitHub/Thesis/data/output
MAPS_ROOT  : /Users/amirdonyadide/Documents/GitHub/Thesis/data/input/samples/pairs
INPUT PAT. : *_input.geojson
PROMPTS_CSV: /Users/amirdonyadide/Documents/GitHub/Thesis/data/input/prompts.csv
PAIRS_CSV  : /Users/amirdonyadide/Documents/GitHub/Thesis/data/input/pairs.csv
PROMPT_OUT : /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/prompt_out
MAP_OUT    : /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/map_out
TRAIN_OUT  : /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/train_out
MODEL_OUT  : /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/models
SPLIT_OUT  : /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/train_out/splits
PRM_NPZ    : /Users/amirdonyadide/Document

üìö 1) Build Prompt Embeddings (USE)

In [3]:
# === PROMPT EMBEDDINGS ===
cmd = [
    sys.executable, "-m", "src.mapvec.prompts.prompt_embeddings",
    "--input",    str(PATHS.PROMPTS_CSV),
    "--model",    str(CFG.USE_MODEL),
    "--l2",
    "--out_dir",  str(PATHS.PROMPT_OUT),
    "-v",
]
print("CMD:", " ".join(cmd))
res = subprocess.run(cmd, cwd=str(PATHS.PROJ_ROOT))
if res.returncode != 0:
    raise SystemExit("Prompt embedding step failed.")
print("‚úÖ Prompt embeddings completed.")


CMD: /opt/anaconda3/envs/thesis/bin/python -m src.mapvec.prompts.prompt_embeddings --input /Users/amirdonyadide/Documents/GitHub/Thesis/data/input/prompts.csv --model dan --l2 --out_dir /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/prompt_out -v


17:44:25 | DEBUG | FILE_DIR=/Users/amirdonyadide/Documents/GitHub/Thesis/src/mapvec/prompts
17:44:25 | DEBUG | PROJECT_ROOT=/Users/amirdonyadide/Documents/GitHub/Thesis
17:44:25 | DEBUG | DEFAULT_DATA_DIR=/Users/amirdonyadide/Documents/GitHub/Thesis/data
17:44:25 | INFO | DATA_DIR=/Users/amirdonyadide/Documents/GitHub/Thesis/data
17:44:25 | INFO | INPUT=/Users/amirdonyadide/Documents/GitHub/Thesis/data/input/prompts.csv
17:44:25 | INFO | OUT_DIR=/Users/amirdonyadide/Documents/GitHub/Thesis/data/output/prompt_out
17:44:25 | INFO | Reading CSV: /Users/amirdonyadide/Documents/GitHub/Thesis/data/input/prompts.csv
17:44:25 | INFO | Loaded 500 prompts (id_col=prompt_id). Sample IDs: p001, p002, p003‚Ä¶
17:44:25 | INFO | Using local USE-dan at /Users/amirdonyadide/Documents/GitHub/Thesis/data/input/model_dan
17:44:25 | INFO | Loading USE-dan from local path: /Users/amirdonyadide/Documents/GitHub/Thesis/data/input/model_dan ‚Ä¶
17:44:28 | INFO | Fingerprint not found. Saved model loading will 

‚úÖ Prompt embeddings completed.


üó∫Ô∏è 2) Build Map Embeddings (geometric)

In [4]:
# === MAP EMBEDDINGS ===
cmd = [
    sys.executable, "-m", "src.mapvec.maps.map_embeddings",
    "--root", str(PATHS.MAPS_ROOT),
    "--pattern", PATHS.INPUT_MAPS_PATTERN,
    "--out_dir", str(PATHS.MAP_OUT),
    "--norm", "fixed",
    "--norm-wh", "400x400",
    "-v",
]
print("CMD:", " ".join(cmd))
res = subprocess.run(cmd, cwd=str(PATHS.PROJ_ROOT))
if res.returncode != 0:
    raise SystemExit("Map embedding step failed.")
print("‚úÖ Map embeddings completed.")


CMD: /opt/anaconda3/envs/thesis/bin/python -m src.mapvec.maps.map_embeddings --root /Users/amirdonyadide/Documents/GitHub/Thesis/data/input/samples/pairs --pattern *_input.geojson --out_dir /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/map_out --norm fixed --norm-wh 400x400 -v


17:44:31 | DEBUG | PROJECT_ROOT=/Users/amirdonyadide/Documents/GitHub/Thesis
17:44:31 | DEBUG | DATA_DIR=/Users/amirdonyadide/Documents/GitHub/Thesis/data
17:44:31 | INFO | Scanning /Users/amirdonyadide/Documents/GitHub/Thesis/data/input/samples/pairs (pattern=*_input.geojson)‚Ä¶
17:44:31 | INFO | First pass: counting polygons to normalize poly_count‚Ä¶
17:44:36 | INFO | Max polygons across dataset: 789
17:44:38 | INFO | OK  map_id=0073  -> vector[165]
17:44:39 | INFO | OK  map_id=0080  -> vector[165]
17:44:40 | INFO | OK  map_id=0093  -> vector[165]
17:44:43 | INFO | OK  map_id=0122  -> vector[165]
17:44:44 | INFO | OK  map_id=0123  -> vector[165]
17:44:45 | INFO | OK  map_id=0127  -> vector[165]
17:44:46 | INFO | OK  map_id=0158  -> vector[165]
17:44:48 | INFO | OK  map_id=0159  -> vector[165]
17:44:49 | INFO | OK  map_id=0160  -> vector[165]
17:44:50 | INFO | OK  map_id=0165  -> vector[165]
17:44:51 | INFO | OK  map_id=0167  -> vector[165]
17:44:52 | INFO | OK  map_id=0168  -> vecto

‚úÖ Map embeddings completed.


17:52:16 | INFO | OK  map_id=1757  -> vector[165]
17:52:16 | INFO | Saved 300 vectors (failed=0) to /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/map_out


üîó 3) Concatenate (pairs ‚Üí fused rows)

In [5]:
# === CONCATENATION ===
cmd = [
    sys.executable, "-m", "src.mapvec.concat.concat_embeddings",
    "--pairs",      str(PATHS.PAIRS_CSV),
    "--map_npz",    str(PATHS.MAP_OUT / "maps_embeddings.npz"),
    "--prompt_npz", str(PATHS.PROMPT_OUT / "prompts_embeddings.npz"),
    "--out_dir",    str(PATHS.TRAIN_OUT),
    "--drop_dupes",
    # "--l2-prompt",     # safety net if you want L2 here as well
    # "--fail_on_missing"
    # "--save-blocks"
]
print("CMD:", " ".join(cmd))
res = subprocess.run(cmd, cwd=str(PATHS.PROJ_ROOT))
if res.returncode != 0:
    raise SystemExit("Concatenation step failed.")
print("‚úÖ Concatenation completed.")


CMD: /opt/anaconda3/envs/thesis/bin/python -m src.mapvec.concat.concat_embeddings --pairs /Users/amirdonyadide/Documents/GitHub/Thesis/data/input/pairs.csv --map_npz /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/map_out/maps_embeddings.npz --prompt_npz /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/prompt_out/prompts_embeddings.npz --out_dir /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/train_out --drop_dupes
‚úÖ Concatenation completed.


17:52:23 | INFO | Map  embeddings: (300, 165) from /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/map_out/maps_embeddings.npz
17:52:23 | INFO | Prompt embeddings: (500, 512) from /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/prompt_out/prompts_embeddings.npz
17:52:23 | INFO | X shape = (450, 677)  (map_dim=165, prompt_dim=512)
17:52:23 | INFO | Saved to /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/train_out in 0.02s


üì• 4) Load & Basic Cleaning

In [6]:
# === LOAD FUSED DATA ===
X = np.load(PATHS.TRAIN_OUT / "X_concat.npy")
pairs_df = pd.read_parquet(PATHS.TRAIN_OUT / "train_pairs.parquet")
print(f"Loaded X: {X.shape}, pairs: {pairs_df.shape}")

OP_COL = "operator"
PARAM_COLS = ["param"]

df = pairs_df.copy()
df[OP_COL] = df[OP_COL].astype(str).str.strip().str.lower()

mask = df[OP_COL].notna()
for c in PARAM_COLS:
    mask &= df[c].notna()

X  = X[mask.values].astype("float32", copy=False)
df = df.loc[mask].reset_index(drop=True)
print(f"After cleaning: X={X.shape}, df={df.shape}, ops={sorted(df[OP_COL].unique())}")


Loaded X: (450, 677), pairs: (450, 4)
After cleaning: X=(450, 677), df=(450, 4), ops=['aggregate', 'displace', 'select', 'simplify']


‚úÇÔ∏è 5) Split & Targets

In [7]:
# === SPLIT ===
FIXED_CLASSES = ["simplify", "select", "aggregate", "displace"]

X_train, X_temp, df_train, df_temp = train_test_split(
    X, df,
    test_size=CFG.VAL_RATIO + CFG.TEST_RATIO,
    random_state=CFG.SEED,
    shuffle=True,
    stratify=df[OP_COL] if df[OP_COL].nunique() > 1 else None
)
rel_test = CFG.TEST_RATIO / (CFG.VAL_RATIO + CFG.TEST_RATIO)
X_val, X_test, df_val, df_test = train_test_split(
    X_temp, df_temp,
    test_size=rel_test,
    random_state=CFG.SEED,
    shuffle=True,
    stratify=df_temp[OP_COL] if df_temp[OP_COL].nunique() > 1 else None
)

print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

# === TARGETS ===
le = LabelEncoder().fit(FIXED_CLASSES)
y_train_cls = le.transform(df_train[OP_COL])
y_val_cls   = le.transform(df_val[OP_COL])
y_test_cls  = le.transform(df_test[OP_COL])

y_train_reg = df_train[PARAM_COLS].to_numpy(dtype="float32")
y_val_reg   = df_val[PARAM_COLS].to_numpy(dtype="float32")
y_test_reg  = df_test[PARAM_COLS].to_numpy(dtype="float32")


Train: (315, 677), Val: (67, 677), Test: (68, 677)


üßº 6) Modality-Aware Preprocessing (map only)

In [8]:
# === MODALITY-AWARE PREPROCESSING ===
MAP_DIM     = CFG.MAP_DIM or 165       # set to true map dim
PROMPT_DIM  = CFG.PROMPT_DIM or 512

def split_blocks(X):
    X_map    = X[:, :MAP_DIM].astype(np.float32, copy=True)
    X_prompt = X[:, MAP_DIM:MAP_DIM+PROMPT_DIM].astype(np.float32, copy=True)
    return X_map, X_prompt

def l2_normalize_rows(A, eps=1e-12):
    nrm = np.sqrt((A * A).sum(axis=1, keepdims=True))
    return A / np.maximum(nrm, eps)

# split
Xm_tr, Xp_tr = split_blocks(X_train)
Xm_va, Xp_va = split_blocks(X_val)
Xm_te, Xp_te = split_blocks(X_test)

# prompts: L2 only
Xp_tr = l2_normalize_rows(Xp_tr)
Xp_va = l2_normalize_rows(Xp_va)
Xp_te = l2_normalize_rows(Xp_te)

# maps: inf‚ÜíNaN
for A in (Xm_tr, Xm_va, Xm_te):
    A[~np.isfinite(A)] = np.nan

# impute (train)
imp = SimpleImputer(strategy="median")
Xm_tr_imp = imp.fit_transform(Xm_tr)
Xm_va_imp = imp.transform(Xm_va)
Xm_te_imp = imp.transform(Xm_te)

# clip (5‚Äì95%) train thresholds
q_lo = np.nanpercentile(Xm_tr_imp, 5, axis=0)
q_hi = np.nanpercentile(Xm_tr_imp, 95, axis=0)
def clip_to_q(A, lo, hi): return np.clip(A, lo, hi)

Xm_tr_imp = clip_to_q(Xm_tr_imp, q_lo, q_hi)
Xm_va_imp = clip_to_q(Xm_va_imp, q_lo, q_hi)
Xm_te_imp = clip_to_q(Xm_te_imp, q_lo, q_hi)

# drop zero-variance cols on train
stds = np.nanstd(Xm_tr_imp, axis=0)
keep_mask = stds > 1e-12

# scale kept columns (train fit)
scaler = RobustScaler(with_centering=True, with_scaling=True, quantile_range=(5, 95))
Xm_tr_kept = scaler.fit_transform(Xm_tr_imp[:, keep_mask])
Xm_va_kept = scaler.transform(Xm_va_imp[:, keep_mask])
Xm_te_kept = scaler.transform(Xm_te_imp[:, keep_mask])

# rebuild full map dim (dropped cols = 0)
Xm_tr_s = np.zeros_like(Xm_tr_imp, dtype=np.float32)
Xm_va_s = np.zeros_like(Xm_va_imp, dtype=np.float32)
Xm_te_s = np.zeros_like(Xm_te_imp, dtype=np.float32)
Xm_tr_s[:, keep_mask] = Xm_tr_kept.astype(np.float32)
Xm_va_s[:, keep_mask] = Xm_va_kept.astype(np.float32)
Xm_te_s[:, keep_mask] = Xm_te_kept.astype(np.float32)

# fuse back
X_train_s = np.concatenate([Xm_tr_s, Xp_tr], axis=1).astype(np.float32)
X_val_s   = np.concatenate([Xm_va_s, Xp_va], axis=1).astype(np.float32)
X_test_s  = np.concatenate([Xm_te_s, Xp_te], axis=1).astype(np.float32)

assert np.isfinite(X_train_s).all() and np.isfinite(X_val_s).all() and np.isfinite(X_test_s).all(), "Non-finite after preprocessing."
print("‚úÖ Modality-aware preprocessing complete.")

# save preprocessing bundle
joblib.dump({
    "imp": imp, "q_lo": q_lo, "q_hi": q_hi,
    "keep_mask": keep_mask, "scaler": scaler,
    "map_dim": MAP_DIM, "prompt_dim": PROMPT_DIM
}, PATHS.TRAIN_OUT / "preproc.joblib")


‚úÖ Modality-aware preprocessing complete.


['/Users/amirdonyadide/Documents/GitHub/Thesis/data/output/train_out/preproc.joblib']

‚öñÔ∏è 7) Class Weights

In [9]:
classes  = list(le.classes_)
n_classes = len(classes)
cls_w    = compute_class_weight(class_weight="balanced",
                                classes=np.arange(n_classes),
                                y=y_train_cls)
sample_w = np.array([cls_w[c] for c in y_train_cls], dtype="float32")
print("Class weights:", dict(zip(classes, cls_w)))


Class weights: {np.str_('aggregate'): np.float64(0.9264705882352942), np.str_('displace'): np.float64(1.0361842105263157), np.str_('select'): np.float64(0.984375), np.str_('simplify'): np.float64(1.0641891891891893)}


üß† 8) Train MLP

In [10]:
clf = MLPClassifier(
    hidden_layer_sizes=(128, 64),
    activation="relu",
    solver="adam",
    alpha=1e-3,
    learning_rate_init=3e-4,
    batch_size=32,
    max_iter=300,
    early_stopping=True,
    n_iter_no_change=15,
    validation_fraction=0.15,
    tol=1e-4,
    random_state=CFG.SEED,
    verbose=True
)

try:
    clf.fit(X_train_s, y_train_cls, sample_weight=sample_w)
except Exception as e:
    print(f"‚ö†Ô∏è Adam crashed ({e}). Falling back to lbfgs (no sample_weight).")
    clf = MLPClassifier(
        hidden_layer_sizes=(128, 64),
        activation="relu",
        solver="lbfgs",
        alpha=1e-3,
        max_iter=500,
        tol=1e-4,
        random_state=CFG.SEED,
        verbose=True
    )
    clf.fit(X_train_s, y_train_cls)
print("‚úÖ Training done.")


Iteration 1, loss = nan
Validation score: 0.270091
Iteration 2, loss = nan
Validation score: 0.270091
Iteration 3, loss = nan
Validation score: 0.270091
Iteration 4, loss = nan
Validation score: 0.270091
Iteration 5, loss = nan
Validation score: 0.270091
Iteration 6, loss = nan
Validation score: 0.270091
Iteration 7, loss = nan
Validation score: 0.270091
Iteration 8, loss = nan
Validation score: 0.270091
Iteration 9, loss = nan
Validation score: 0.270091
Iteration 10, loss = nan
Validation score: 0.270091
Iteration 11, loss = nan
Validation score: 0.270091
Iteration 12, loss = nan
Validation score: 0.270091
Iteration 13, loss = nan
Validation score: 0.270091
Iteration 14, loss = nan
Validation score: 0.270091
Iteration 15, loss = nan
Validation score: 0.270091
Iteration 16, loss = nan
Validation score: 0.270091
Iteration 17, loss = nan
Validation score: 0.270091
Validation score did not improve more than tol=0.000100 for 15 consecutive epochs. Stopping.
‚ö†Ô∏è Adam crashed (Solver prod

  self.beta_2 * v + (1 - self.beta_2) * (grad**2)
  activations[i + 1] += self.intercepts_[i]
  ret = a @ b
  activations[i + 1] += self.intercepts_[i]
ABNORMAL: 

You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


üìä 9) Evaluate

In [37]:
def eval_split(name, Xs, ys_true):
    y_pred = clf.predict(Xs)
    acc = accuracy_score(ys_true, y_pred)
    f1m = f1_score(ys_true, y_pred, average="macro")
    print(f"\n{name}:  acc={acc:.4f}  f1_macro={f1m:.4f}")
    print(classification_report(ys_true, y_pred, target_names=classes, digits=3))
    return y_pred

print("\n===== Validation =====")
_ = eval_split("VAL", X_val_s, y_val_cls)

print("\n===== Test =====")
y_test_pred = eval_split("TEST", X_test_s, y_test_cls)



===== Validation =====

VAL:  acc=0.2090  f1_macro=0.1359
              precision    recall  f1-score   support

   aggregate      0.237     0.500     0.321        18
    displace      0.000     0.000     0.000        16
      select      0.179     0.294     0.222        17
    simplify      0.000     0.000     0.000        16

    accuracy                          0.209        67
   macro avg      0.104     0.199     0.136        67
weighted avg      0.109     0.209     0.143        67


===== Test =====

TEST:  acc=0.2500  f1_macro=0.1520
              precision    recall  f1-score   support

   aggregate      0.289     0.722     0.413        18
    displace      0.000     0.000     0.000        17
      select      0.174     0.222     0.195        18
    simplify      0.000     0.000     0.000        15

    accuracy                          0.250        68
   macro avg      0.116     0.236     0.152        68
weighted avg      0.123     0.250     0.161        68



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


üîç 10) Sanity Checks

In [38]:
def check_matrix(name, X):
    print(f"\n--- {name} ---")
    print("shape:", X.shape, "dtype:", X.dtype)
    print("finite:", np.isfinite(X).all())
    print("min/max:", np.nanmin(X), np.nanmax(X))
    print("mean/std:", np.nanmean(X), np.nanstd(X))
    col_nan = np.isnan(X).all(axis=0).sum()
    col_zero_var = (np.nanstd(X, axis=0) == 0).sum()
    print("all-NaN cols:", col_nan, "zero-variance cols:", col_zero_var)

check_matrix("X_train_s", X_train_s)
check_matrix("X_val_s",   X_val_s)
check_matrix("X_test_s",  X_test_s)
print("classes present in train:", sorted(set(df_train[OP_COL])))



--- X_train_s ---
shape: (315, 677) dtype: float32
finite: True
min/max: -1.024782 1.0188221
mean/std: 0.0036010856 0.17772073
all-NaN cols: 0 zero-variance cols: 14

--- X_val_s ---
shape: (67, 677) dtype: float32
finite: True
min/max: -1.024782 1.0188221
mean/std: 0.005705521 0.17796075
all-NaN cols: 0 zero-variance cols: 14

--- X_test_s ---
shape: (68, 677) dtype: float32
finite: True
min/max: -1.024782 1.0188221
mean/std: 0.0046883137 0.17945407
all-NaN cols: 0 zero-variance cols: 14
classes present in train: ['aggregate', 'displace', 'select', 'simplify']


üíæ 11) Save Artifacts

In [39]:
joblib.dump(clf, PATHS.TRAIN_OUT / "mlp_classifier.joblib")
joblib.dump(le,  PATHS.TRAIN_OUT / "label_encoder.joblib")
print("‚úÖ Saved model + label encoder to:", PATHS.TRAIN_OUT)


‚úÖ Saved model + label encoder to: /Users/amirdonyadide/Documents/GitHub/Thesis/data/output/train_out


üöÄ 12) Inference Helper

In [40]:
def apply_preproc_and_predict(X_concat: np.ndarray,
                              preproc_path=PATHS.TRAIN_OUT / "preproc.joblib",
                              model_path=PATHS.TRAIN_OUT / "mlp_classifier.joblib",
                              le_path=PATHS.TRAIN_OUT / "label_encoder.joblib"):
    """
    X_concat: (N, MAP_DIM + PROMPT_DIM). Prompts can be raw; they will be L2 here.
    Returns: (labels_str, labels_idx)
    """
    bundle = joblib.load(preproc_path)
    imp, q_lo, q_hi = bundle["imp"], bundle["q_lo"], bundle["q_hi"]
    keep_mask, scaler = bundle["keep_mask"], bundle["scaler"]
    map_dim, prompt_dim = bundle["map_dim"], bundle["prompt_dim"]

    def l2_rows(A, eps=1e-12):
        n = np.sqrt((A * A).sum(axis=1, keepdims=True))
        return A / np.maximum(n, eps)

    X_map    = X_concat[:, :map_dim].astype(np.float32, copy=True)
    X_prompt = X_concat[:, map_dim:map_dim+prompt_dim].astype(np.float32, copy=True)
    X_prompt = l2_rows(X_prompt)

    X_map[~np.isfinite(X_map)] = np.nan
    X_map_imp = imp.transform(X_map)
    X_map_imp = np.clip(X_map_imp, q_lo, q_hi)

    X_map_std = np.zeros_like(X_map_imp, dtype=np.float32)
    X_map_std[:, keep_mask] = scaler.transform(X_map_imp[:, keep_mask]).astype(np.float32)

    X_s = np.concatenate([X_map_std, X_prompt], axis=1).astype(np.float32)

    clf_ = joblib.load(model_path)
    le_  = joblib.load(le_path)
    y_pred_idx = clf_.predict(X_s)
    y_pred_lbl = le_.inverse_transform(y_pred_idx)
    return y_pred_lbl, y_pred_idx

print("‚úÖ Pipeline ready. Use apply_preproc_and_predict(...) for inference.")


‚úÖ Pipeline ready. Use apply_preproc_and_predict(...) for inference.
