In [39]:
import os
import pandas as pd
import numpy as np
from pathlib import Path

DATA_PROCESSED = "../data/processed/tabular"
MODELS_DIR = "../models"
RESULTS_DIR = "../results/metrics"

os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

# Files produced by 06_split_data.ipynb
TRAIN_PATH = f"{DATA_PROCESSED}/train.csv"
VAL_PATH   = f"{DATA_PROCESSED}/val.csv"
TEST_PATH  = f"{DATA_PROCESSED}/test.csv"

# Load all datasets
df_train = pd.read_csv(TRAIN_PATH, low_memory=False)
df_val   = pd.read_csv(VAL_PATH, low_memory=False)
df_test  = pd.read_csv(TEST_PATH, low_memory=False)

print("Data shapes:")
print(f"Train: {df_train.shape}")
print(f"Val: {df_val.shape}")
print(f"Test: {df_test.shape}")

df_train.head(4)

Data shapes:
Train: (24545, 16)
Val: (5260, 16)
Test: (5260, 16)


Unnamed: 0,shipment_id,origin,destination,dispatch_date,delivery_date,delay_days,disruption_type,risk_score,source,lead_time_days,delay_severity,month,weekday,quarter,year,route_risk_score
0,O1000,B33,S23,2023-10-27 00:00:00,2023-10-28,0.0,,0.0,resilience,1.0,Minor,10,4,4,2023,1.0
1,O1001,B1,S20,2023-07-08 00:00:00,2023-07-09,0.0,,0.0,resilience,1.0,Minor,7,5,3,2023,1.0
2,O1002,B2,S10,2023-12-29 00:00:00,2024-01-07,7.0,Shortage,1.0,resilience,9.0,Severe,12,4,4,2023,1.0
3,O1003,B6,S10,2023-01-17 00:00:00,2023-01-20,0.0,,0.0,resilience,3.0,Moderate,1,1,1,2023,1.0


Utility: Feature/Target Auto-Detection

In [40]:
def find_col(candidates, cols, col_map=None, return_all=False, verbose=True):
    """
    Return columns from candidates that exist in cols (case-insensitive).

    Parameters:
        candidates (list): List of candidate column names.
        cols (list): List of columns to search in (lowercase).
        col_map (dict, optional): Mapping from lowercase to original column names.
        return_all (bool): If True, return all matches; else first match.
        verbose (bool): If True, prints warnings for multiple matches.

    Returns:
        str or list: Matched column name(s) or None.
    """
    matches = [c for c in candidates if c.lower() in cols]
    if not matches:
        return None
    if return_all:
        return [col_map[c.lower()] if col_map else c for c in matches]
    if len(matches) > 1 and verbose:
        print(f"⚠️ Multiple matches found: {matches}. Using the first one.")
    return col_map[matches[0].lower()] if col_map else matches[0]


# Lowercase lookup
cols = [c.lower() for c in df_train.columns]
col_map = {c.lower(): c for c in df_train.columns}

# --- Classification target candidates ---
classification_candidates = [
    "disruption_flag","is_disrupted","disrupted","risk_flag",
    "has_disruption","disruption","incident_flag","disruption_type"
]
clf_target = find_col(classification_candidates, cols, col_map=col_map)

# --- Regression target candidates ---
regression_candidates = [
    "delay_days","delivery_delay_days","delay","days_delayed",
    "delay_in_days","lead_time_delays","lead_time_days","delay_severity"
]
reg_target = find_col(regression_candidates, cols, col_map=col_map)

# --- ID & Date candidates ---
id_candidates = ["shipment_id","id","order_id","consignment_id"]
date_candidates = [
    "dispatch_date","delivery_date","ship_date","event_time",
    "timestamp","created_at","pickup_date"
]
id_col   = find_col(id_candidates, cols, col_map=col_map)
date_col = find_col(date_candidates, cols, col_map=col_map)

# --- Summary ---
print("✅ Classification target:", clf_target)
print("✅ Regression target:", reg_target)
print("✅ ID column:", id_col)
print("✅ Date column:", date_col)

# --- Guardrails ---
if clf_target is None and reg_target is None:
    raise ValueError(
        "❌ No target columns detected. "
        "Please set clf_target/reg_target manually."
    )


⚠️ Multiple matches found: ['delay_days', 'lead_time_days', 'delay_severity']. Using the first one.
⚠️ Multiple matches found: ['dispatch_date', 'delivery_date']. Using the first one.
✅ Classification target: disruption_type
✅ Regression target: delay_days
✅ ID column: shipment_id
✅ Date column: dispatch_date


Split Features

In [41]:
# ---------------------------
# Safe Drop of Non-Feature Columns
# ---------------------------
drop_cols = {id_col, date_col, clf_target, reg_target} - {None}

def safe_drop(df, drop_cols):
    """Drop columns safely if they exist in DataFrame."""
    return df.drop(columns=[c for c in drop_cols if c in df.columns], errors="ignore")

X_train_full = safe_drop(df_train, drop_cols)
X_val_full   = safe_drop(df_val, drop_cols)
X_test_full  = safe_drop(df_test, drop_cols)

# ---------------------------
# Extract Targets
# ---------------------------
def extract_target(df, target_col, name="target"):
    """Extract target column from DataFrame if exists."""
    if target_col and target_col in df.columns:
        y = df[target_col].copy()
        print(f"✅ {name} extracted: {target_col}, shape: {y.shape}")
        return y
    print(f"⚠️ {name} not found in DataFrame.")
    return None

# Classification targets
y_train_clf = extract_target(df_train, clf_target, "Classification target (train)")
y_val_clf   = extract_target(df_val, clf_target, "Classification target (val)")
y_test_clf  = extract_target(df_test, clf_target, "Classification target (test)")

# Regression targets
y_train_reg = extract_target(df_train, reg_target, "Regression target (train)")
y_val_reg   = extract_target(df_val, reg_target, "Regression target (val)")
y_test_reg  = extract_target(df_test, reg_target, "Regression target (test)")

# ---------------------------
# Detect Feature Types
# ---------------------------
num_cols = [c for c in X_train_full.columns if pd.api.types.is_numeric_dtype(X_train_full[c])]
cat_cols = [c for c in X_train_full.columns if c not in num_cols]

print("\n✅ Features prepared")
print(f"   Numerical columns ({len(num_cols)}): {num_cols[:5]}{'...' if len(num_cols) > 5 else ''}")
print(f"   Categorical columns ({len(cat_cols)}): {cat_cols[:5]}{'...' if len(cat_cols) > 5 else ''}")
print(f"   Regression target: {reg_target}")
print(f"   Classification target: {clf_target}")


✅ Classification target (train) extracted: disruption_type, shape: (24545,)
✅ Classification target (val) extracted: disruption_type, shape: (5260,)
✅ Classification target (test) extracted: disruption_type, shape: (5260,)
✅ Regression target (train) extracted: delay_days, shape: (24545,)
✅ Regression target (val) extracted: delay_days, shape: (5260,)
✅ Regression target (test) extracted: delay_days, shape: (5260,)

✅ Features prepared
   Numerical columns (7): ['risk_score', 'lead_time_days', 'month', 'weekday', 'quarter']...
   Categorical columns (5): ['origin', 'destination', 'delivery_date', 'source', 'delay_severity']
   Regression target: delay_days
   Classification target: disruption_type


Common Preprocess Pipline (Impute + Scale + One-Hot)

In [42]:
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
import numpy as np
import pandas as pd

def robust_stratified_split(X, y, test_size=0.3, val_size=0.5, random_state=42):
    """
    Splits X, y into train/val/test with:
    - Imputation for missing values
    - Stratified splitting if possible
    - Falls back to random split if only one class
    """
    # ------------------------
    # Drop rows where target is NaN
    # ------------------------
    mask = y.notna()
    X, y = X.loc[mask].copy(), y.loc[mask].copy()

    # ------------------------
    # Impute missing features
    # ------------------------
    # Numeric
    num_cols = X.select_dtypes(include=["int64", "float64"]).columns
    if len(num_cols) > 0:
        num_imputer = SimpleImputer(strategy="median")
        X.loc[:, num_cols] = num_imputer.fit_transform(X[num_cols])

    # Categorical
    cat_cols = X.select_dtypes(include="object").columns
    if len(cat_cols) > 0:
        cat_imputer = SimpleImputer(strategy="most_frequent")
        X.loc[:, cat_cols] = cat_imputer.fit_transform(X[cat_cols])

    # ------------------------
    # Encode categorical target for stratification if needed
    # ------------------------
    if y.dtype == object or str(y.dtype) == 'category':
        y_strat = y.astype("category").cat.codes
    else:
        y_strat = y.copy()

    # ------------------------
    # Check if target has at least 2 classes
    # ------------------------
    unique_classes = np.unique(y_strat)
    stratify_possible = len(unique_classes) > 1

    if not stratify_possible:
        print("⚠️ Only one class present. Using random split instead of stratified.")

    # ------------------------
    # Stratified or random train/temp split
    # ------------------------
    X_train, X_temp, y_train, y_temp = train_test_split(
        X, y, test_size=test_size, stratify=y_strat if stratify_possible else None,
        random_state=random_state
    )

    # ------------------------
    # Stratified or random val/test split
    # ------------------------
    y_temp_strat = y_temp.astype("category").cat.codes if (stratify_possible and y_temp.dtype == object) else y_temp
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp, test_size=val_size, stratify=y_temp_strat if stratify_possible else None,
        random_state=random_state
    )

    # ------------------------
    # Log class distributions
    # ------------------------
    print("Train classes:", np.unique(y_train, return_counts=True))
    print("Val classes:", np.unique(y_val, return_counts=True))
    print("Test classes:", np.unique(y_test, return_counts=True))

    return X_train, X_val, X_test, y_train, y_val, y_test


In [43]:
# ------------------------
# Prepare features and target
# ------------------------
if clf_target in df_train.columns:
    X = df_train.drop(columns=[clf_target], errors="ignore").copy()
    y = df_train[clf_target].copy()
else:
    print(f"⚠️ Classification target '{clf_target}' not found in df_train. Skipping split.")
    X = df_train.copy()
    y = None

# ------------------------
# Perform robust stratified split if target is valid
# ------------------------
if y is not None and y.notna().any():
    try:
        X_train, X_val, X_test, y_train, y_val, y_test = robust_stratified_split(X, y)
        
        # ------------------------
        # Reset indices for convenience
        # ------------------------
        for df in [X_train, X_val, X_test, y_train, y_val, y_test]:
            df.reset_index(drop=True, inplace=True)

        print("✅ Train/Val/Test split done")
        print("Train shape:", X_train.shape, "| Val shape:", X_val.shape, "| Test shape:", X_test.shape)
    except ValueError as e:
        print(f"⚠️ Skipping split: {e}")
        X_train, X_val, X_test = X.copy(), None, None
        y_train, y_val, y_test = y.copy() if y is not None else None, None, None
else:
    print("⚠️ Skipping split: no valid target detected.")
    X_train, X_val, X_test = X.copy(), None, None
    y_train, y_val, y_test = y.copy() if y is not None else None, None, None


Train classes: (array(['Clear', 'Customs', 'Detour', 'Heavy', 'High Risk', 'Low Risk',
       'Moderate Risk', 'Shortage', 'Strike', 'Weather'], dtype=object), array([  230,   174,   241,   229, 11213,  1486,  2382,   189,   171,
         186]))
Val classes: (array(['Clear', 'Customs', 'Detour', 'Heavy', 'High Risk', 'Low Risk',
       'Moderate Risk', 'Shortage', 'Strike', 'Weather'], dtype=object), array([  49,   37,   52,   49, 2403,  319,  510,   41,   36,   40]))
Test classes: (array(['Clear', 'Customs', 'Detour', 'Heavy', 'High Risk', 'Low Risk',
       'Moderate Risk', 'Shortage', 'Strike', 'Weather'], dtype=object), array([  49,   37,   52,   49, 2403,  318,  511,   40,   37,   40]))
✅ Train/Val/Test split done
Train shape: (16501, 15) | Val shape: (3536, 15) | Test shape: (3536, 15)


In [44]:
# ---------------------------
# Consistent Preprocessing for Delay Classification
# ---------------------------
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import resample
from collections import Counter
import pandas as pd
import numpy as np
import os
import json

clf_delay_results = {}

if reg_target is not None:
    
    # ---------------------------
    # FIT PREPROCESSOR ON FULL TRAINING DATA FIRST
    # ---------------------------
    print("🔧 Setting up consistent preprocessing...")
    
    # Column Groups (using your existing logic)
    X_cols = X_train.columns if X_train is not None else []
    
    # Special categorical
    special_cat_cols = [c for c in ["disruption_type"] if c in X_cols]
    
    # Flag columns (binary indicators)
    flag_cols = [c for c in X_cols if c.lower() in ["risk_flag","disruption_flag","incident_flag"]]
    
    # Special numeric columns
    special_num_cols = [c for c in X_cols if c.lower() in ["lead_time_days", "route_risk_score"]]
    
    # General numeric (exclude special numeric, flags, regression target)
    num_cols = [c for c in X_train.select_dtypes(include=["int64","float64"]).columns 
                if c not in special_num_cols + flag_cols + ([reg_target] if reg_target else [])]
    
    # General categorical (exclude special categorical, flags, ID/date)
    non_features = [id_col, date_col] if 'id_col' in globals() and 'date_col' in globals() else []
    cat_cols = [c for c in X_train.select_dtypes(include="object").columns 
                if c not in special_cat_cols + flag_cols + [col for col in non_features if col]]
    
    # ---------------------------
    # Create Pipelines
    # ---------------------------
    num_pipeline = Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler(with_mean=False))
    ]) if num_cols else None
    
    special_num_pipeline = Pipeline([
        ("imputer", SimpleImputer(strategy="constant", fill_value=0)),
        ("scaler", StandardScaler(with_mean=False))
    ]) if special_num_cols else None
    
    special_cat_pipeline = Pipeline([
        ("imputer", SimpleImputer(strategy="constant", fill_value="none")),
        ("ohe", OneHotEncoder(handle_unknown="ignore", sparse_output=True, drop='first'))  # Add drop='first' to reduce features
    ]) if special_cat_cols else None
    
    flag_pipeline = Pipeline([
        ("imputer", SimpleImputer(strategy="constant", fill_value=0))
    ]) if flag_cols else None
    
    general_cat_pipeline = Pipeline([
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("ohe", OneHotEncoder(handle_unknown="ignore", sparse_output=True, drop='first'))  # Add drop='first' to reduce features
    ]) if cat_cols else None
    
    # ---------------------------
    # ColumnTransformer
    # ---------------------------
    transformers = []
    if num_cols and num_pipeline: transformers.append(("num", num_pipeline, num_cols))
    if special_num_cols and special_num_pipeline: transformers.append(("special_num", special_num_pipeline, special_num_cols))
    if special_cat_cols and special_cat_pipeline: transformers.append(("special_cat", special_cat_pipeline, special_cat_cols))
    if flag_cols and flag_pipeline: transformers.append(("flags", flag_pipeline, flag_cols))
    if cat_cols and general_cat_pipeline: transformers.append(("general_cat", general_cat_pipeline, cat_cols))
    
    preprocessor = ColumnTransformer(
        transformers=transformers,
        remainder="drop",
        sparse_threshold=0.3
    )
    
    print("✅ Preprocessor configuration:")
    print(f"   Numeric columns ({len(num_cols)}): {num_cols[:3]}{'...' if len(num_cols) > 3 else ''}")
    print(f"   Special numeric columns ({len(special_num_cols)}): {special_num_cols}")
    print(f"   Special categorical columns ({len(special_cat_cols)}): {special_cat_cols}")
    print(f"   Flag columns ({len(flag_cols)}): {flag_cols}")
    print(f"   General categorical columns ({len(cat_cols)}): {cat_cols[:3]}{'...' if len(cat_cols) > 3 else ''}")
    
    # ---------------------------
    # FIT PREPROCESSOR ON FULL TRAINING DATA
    # ---------------------------
    print("🔧 Fitting preprocessor on full training data...")
    preprocessor.fit(X_train)
    
    # Transform all datasets using the fitted preprocessor
    print("🔧 Transforming datasets...")
    X_train_processed = preprocessor.transform(X_train)
    X_val_processed = preprocessor.transform(X_val)
    X_test_processed = preprocessor.transform(X_test)
    
    # Convert to dense if needed (for easier handling)
    if hasattr(X_train_processed, 'toarray'):
        X_train_processed = X_train_processed.toarray()
        X_val_processed = X_val_processed.toarray()
        X_test_processed = X_test_processed.toarray()
    
    print(f"✅ Processed dataset shapes:")
    print(f"   Training: {X_train_processed.shape}")
    print(f"   Validation: {X_val_processed.shape}")
    print(f"   Test: {X_test_processed.shape}")
    
    # Verify all datasets have the same number of features
    if not (X_train_processed.shape[1] == X_val_processed.shape[1] == X_test_processed.shape[1]):
        raise ValueError(f"Feature count mismatch after preprocessing: "
                        f"Train={X_train_processed.shape[1]}, "
                        f"Val={X_val_processed.shape[1]}, "
                        f"Test={X_test_processed.shape[1]}")

🔧 Setting up consistent preprocessing...
✅ Preprocessor configuration:
   Numeric columns (5): ['risk_score', 'month', 'weekday']...
   Special numeric columns (2): ['lead_time_days', 'route_risk_score']
   Special categorical columns (0): []
   Flag columns (0): []
   General categorical columns (5): ['origin', 'destination', 'delivery_date']...
🔧 Fitting preprocessor on full training data...
🔧 Transforming datasets...
✅ Processed dataset shapes:
   Training: (16501, 1751)
   Validation: (3536, 1751)
   Test: (3536, 1751)




Classification: Risk Prediction

In [45]:
import os
import json
import joblib
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    f1_score,
    roc_auc_score
)
from sklearn.utils.class_weight import compute_class_weight

clf_results = {}

# -------------------------------
# Proceed only if classification target exists
# -------------------------------
if clf_target is not None and y_train is not None:

    # -------------------------------
    # Merge rare classes
    # -------------------------------
    class_counts = y_train.value_counts()
    rare_classes = class_counts[class_counts < 1000].index.tolist()  # adjust threshold as needed
    if rare_classes:
        print(f"⚠️ Merging rare classes into 'Other': {rare_classes}")
        y_train_proc = y_train.replace(rare_classes, "Other")
        y_val_proc = y_val.replace(rare_classes, "Other")
        y_test_proc = y_test.replace(rare_classes, "Other")
    else:
        y_train_proc, y_val_proc, y_test_proc = y_train.copy(), y_val.copy(), y_test.copy()

    # -------------------------------
    # Detect if binary or multi-class
    # -------------------------------
    classes = np.unique(y_train_proc)
    is_binary = len(classes) == 2
    print(f"Detected target '{clf_target}' with {len(classes)} unique classes.")
    print(f"✅ {'Binary' if is_binary else 'Multi-class'} classification detected.")

    # -------------------------------
    # Compute class weights for Logistic Regression
    # -------------------------------
    class_weights = compute_class_weight("balanced", classes=classes, y=y_train_proc)
    class_weight_dict = {cls: w for cls, w in zip(classes, class_weights)}
    print(f"✅ Class weights for Logistic Regression: {class_weight_dict}")

    # -------------------------------
    # Ensure all splits have >1 class
    # -------------------------------
    skip_classification = any(len(np.unique(y)) < 2 for y in [y_train_proc, y_val_proc, y_test_proc])
    if skip_classification:
        print("⚠️ Skipping classification due to insufficient class diversity in splits.")
    else:

        # -------------------------------
        # Define pipelines
        # -------------------------------
        logreg = Pipeline([
            ("prep", preprocessor),
            ("clf", LogisticRegression(
                max_iter=5000,
                solver="lbfgs",
                multi_class="multinomial",
                class_weight=class_weight_dict
            ))
        ])

        rf = Pipeline([
            ("prep", preprocessor),
            ("clf", RandomForestClassifier(
                n_estimators=300,
                max_depth=15,
                min_samples_leaf=5,
                random_state=42,
                n_jobs=-1
            ))
        ])

        models = {"logreg": logreg, "rf": rf}

        # -------------------------------
        # Train, evaluate, and save models
        # -------------------------------
        for name, pipe in models.items():
            print(f"🔹 Training {name}...")
            pipe.fit(X_train, y_train_proc)

            # Predictions
            test_pred = pipe.predict(X_test)
            val_pred = pipe.predict(X_val)

            # Classification report & confusion matrix
            report = classification_report(y_test_proc, test_pred, output_dict=True, zero_division=0)
            cm = confusion_matrix(y_test_proc, test_pred).tolist()
            f1_macro = f1_score(y_test_proc, test_pred, average="macro")

            # Multi-class AUC
            try:
                val_pred_prob = pipe.predict_proba(X_val)
                test_pred_prob = pipe.predict_proba(X_test)
                val_auc = roc_auc_score(y_val_proc, val_pred_prob, multi_class='ovr')
                test_auc = roc_auc_score(y_test_proc, test_pred_prob, multi_class='ovr')
            except Exception as e:
                val_auc, test_auc = None, None
                print(f"⚠️ Could not compute multi-class AUC for {name}: {e}")

            clf_results[name] = {
                "f1_macro": float(f1_macro),
                "val_auc": float(val_auc) if val_auc is not None else None,
                "test_auc": float(test_auc) if test_auc is not None else None,
                "report": report,
                "cm": cm
            }

            # Save model
            os.makedirs(MODELS_DIR, exist_ok=True)
            joblib.dump(pipe, os.path.join(MODELS_DIR, f"clf_{name}.joblib"))

        # Save metrics
        os.makedirs(RESULTS_DIR, exist_ok=True)
        with open(os.path.join(RESULTS_DIR, "classification_results.json"), "w") as f:
            json.dump(clf_results, f, indent=2)

        print("✅ Classification results saved with class imbalance handling!")
        for k, v in clf_results.items():
            print(f"  {k}: F1-macro = {v['f1_macro']}, Test AUC = {v['test_auc']}")

else:
    print("⚠️ Skipping classification: no valid classification target detected or target is empty.")


⚠️ Merging rare classes into 'Other': ['Detour', 'Clear', 'Heavy', 'Shortage', 'Weather', 'Customs', 'Strike']
Detected target 'disruption_type' with 4 unique classes.
✅ Multi-class classification detected.
✅ Class weights for Logistic Regression: {'High Risk': np.float64(0.36789886738606975), 'Low Risk': np.float64(2.7760767160161506), 'Moderate Risk': np.float64(1.7318429890848026), 'Other': np.float64(2.9051056338028167)}
🔹 Training logreg...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=5000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


🔹 Training rf...




✅ Classification results saved with class imbalance handling!
  logreg: F1-macro = 0.7505935893630877, Test AUC = 0.893349486214937
  rf: F1-macro = 0.3719721145157785, Test AUC = 1.0


Delay Classification

In [46]:
# ---------------------------
# Improved Delay Classification with Model Saving
# ---------------------------
import os
import json
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import resample
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import joblib  # <-- for saving models

clf_delay_results = {}
best_models = {}  # To save best models

if reg_target is not None:

    # ---------------------------
    # Bucketize delays into classes
    # ---------------------------
    def bucketize_delay(y):
        y = pd.to_numeric(y, errors="coerce")
        if pd.isna(y):
            return "On-time"
        y = max(y, 0)
        if y == 0:
            return "On-time"
        elif 1 <= y <= 3:
            return "Minor delay"
        elif 4 <= y <= 7:
            return "Moderate delay"
        else:
            return "Severe delay"

    y_train_c = y_train_reg.apply(bucketize_delay)
    y_val_c   = y_val_reg.apply(bucketize_delay)
    y_test_c  = y_test_reg.apply(bucketize_delay)

    print("🔹 Original class distribution:")
    print(y_train_c.value_counts().sort_index())

    # ---------------------------
    # Align X and y
    # ---------------------------
    def align_features_targets(X, y):
        if isinstance(X, np.ndarray):
            X = pd.DataFrame(X)
        common_idx = X.index.intersection(y.index)
        return X.loc[common_idx], y.loc[common_idx]

    X_train_aligned, y_train_aligned = align_features_targets(X_train_processed, y_train_c)
    X_val_aligned, y_val_aligned     = align_features_targets(X_val_processed, y_val_c)
    X_test_aligned, y_test_aligned   = align_features_targets(X_test_processed, y_test_c)

    # ---------------------------
    # Improved undersampling
    # ---------------------------
    def undersample_balanced(X, y, strategy='uniform', random_state=42):
        df = X.copy()
        df['target'] = y
        class_counts = y.value_counts()
        print(f"🔹 Original class counts: {dict(class_counts)}")
        
        target_size = min(3000, class_counts.min()) if strategy=='uniform' else class_counts.min()
        print(f"🔹 Target size per class: {target_size}")
        
        balanced_dfs = []
        for cls in class_counts.index:
            cls_df = df[df['target']==cls]
            if len(cls_df) > target_size:
                cls_df = resample(cls_df, replace=False, n_samples=target_size, random_state=random_state)
            balanced_dfs.append(cls_df)
        
        df_balanced = pd.concat(balanced_dfs).sample(frac=1, random_state=random_state).reset_index(drop=True)
        X_bal = df_balanced.drop(columns=['target']).values
        y_bal = df_balanced['target'].values
        return X_bal, y_bal

    X_train_balanced, y_train_balanced = undersample_balanced(X_train_aligned, y_train_aligned)
    print("✅ Class counts after undersampling:")
    print(pd.Series(y_train_balanced).value_counts().sort_index())

    # ---------------------------
    # Multi-class encoding
    # ---------------------------
    delay_le = LabelEncoder()
    y_train_enc = delay_le.fit_transform(y_train_balanced)
    y_val_enc   = delay_le.transform(y_val_aligned)
    y_test_enc  = delay_le.transform(y_test_aligned)
    print("✅ Encoded classes (multi-class):", dict(zip(delay_le.classes_, delay_le.transform(delay_le.classes_))))

    # ---------------------------
    # Binary collapse (Severe vs Not-Severe)
    # ---------------------------
    def collapse_binary(y):
        return np.where(y=="Severe delay", "Severe", "Not Severe")

    y_train_bin = collapse_binary(y_train_balanced)
    y_val_bin   = collapse_binary(y_val_aligned)
    y_test_bin  = collapse_binary(y_test_aligned)

    bin_le = LabelEncoder()
    y_train_bin_enc = bin_le.fit_transform(y_train_bin)
    y_val_bin_enc   = bin_le.transform(y_val_bin)
    y_test_bin_enc  = bin_le.transform(y_test_bin)
    print("✅ Encoded classes (binary):", dict(zip(bin_le.classes_, bin_le.transform(bin_le.classes_))))

    # ---------------------------
    # Define models
    # ---------------------------
    def get_models(num_classes):
        return {
            "logreg": LogisticRegression(max_iter=5000, solver='lbfgs', multi_class='multinomial', random_state=42),
            "rf": RandomForestClassifier(n_estimators=300, max_depth=15, min_samples_split=10,
                                         min_samples_leaf=5, random_state=42, n_jobs=-1),
            "xgb": XGBClassifier(
                objective="multi:softprob" if num_classes>2 else "binary:logistic",
                eval_metric="mlogloss",
                n_estimators=300,
                max_depth=6,
                learning_rate=0.1,
                subsample=0.8,
                colsample_bytree=0.8,
                reg_alpha=0.1,
                reg_lambda=0.1,
                random_state=42,
                n_jobs=-1,
                verbosity=0
            )
        }

    os.makedirs(RESULTS_DIR, exist_ok=True)
    MODELS_DIR = os.path.join(RESULTS_DIR, "models")
    os.makedirs(MODELS_DIR, exist_ok=True)

    # ---------------------------
    # Multi-class classification
    # ---------------------------
    print("\n=== Multi-class classification ===")
    models_mc = get_models(len(delay_le.classes_))
    for name, model in models_mc.items():
        print(f"🔹 Training {name} (multi-class)...")
        try:
            model.fit(X_train_balanced, y_train_enc)
            val_pred_enc = model.predict(X_val_aligned)
            test_pred_enc = model.predict(X_test_aligned)
            val_pred = delay_le.inverse_transform(val_pred_enc)
            test_pred = delay_le.inverse_transform(test_pred_enc)

            val_report = classification_report(y_val_aligned, val_pred, output_dict=True, zero_division=0)
            test_report = classification_report(y_test_aligned, test_pred, output_dict=True, zero_division=0)

            clf_delay_results[f"{name}_multi"] = {
                "val_report": val_report,
                "test_report": test_report,
                "confusion_matrix": confusion_matrix(y_test_aligned, test_pred, labels=delay_le.classes_).tolist(),
                "classes": list(delay_le.classes_)
            }

            # Save best model by F1
            test_f1 = test_report['macro avg']['f1-score']
            if 'multi' not in best_models or test_f1 > best_models['multi']['f1']:
                best_models['multi'] = {'name': name, 'f1': test_f1, 'accuracy': test_report['accuracy'], 'model': model}
                joblib.dump(model, os.path.join(MODELS_DIR, f"best_delay_multi_{name}.joblib"))

            print(f"   Validation accuracy: {val_report['accuracy']:.3f}")
            print(f"   Test accuracy: {test_report['accuracy']:.3f}")
        except Exception as e:
            print(f"   ❌ Error training {name}: {str(e)}")

    # ---------------------------
    # Binary classification
    # ---------------------------
    print("\n=== Binary classification (Severe vs Not Severe) ===")
    models_bin = get_models(len(bin_le.classes_))
    for name, model in models_bin.items():
        print(f"🔹 Training {name} (binary)...")
        try:
            model.fit(X_train_balanced, y_train_bin_enc)
            val_pred_enc = model.predict(X_val_aligned)
            test_pred_enc = model.predict(X_test_aligned)
            val_pred = bin_le.inverse_transform(val_pred_enc)
            test_pred = bin_le.inverse_transform(test_pred_enc)

            val_report = classification_report(y_val_bin, val_pred, output_dict=True, zero_division=0)
            test_report = classification_report(y_test_bin, test_pred, output_dict=True, zero_division=0)

            clf_delay_results[f"{name}_binary"] = {
                "val_report": val_report,
                "test_report": test_report,
                "confusion_matrix": confusion_matrix(y_test_bin, test_pred, labels=bin_le.classes_).tolist(),
                "classes": list(bin_le.classes_)
            }

            # Save best model by F1
            test_f1 = test_report['macro avg']['f1-score']
            if 'binary' not in best_models or test_f1 > best_models['binary']['f1']:
                best_models['binary'] = {'name': name, 'f1': test_f1, 'accuracy': test_report['accuracy'], 'model': model}
                joblib.dump(model, os.path.join(MODELS_DIR, f"best_delay_binary_{name}.joblib"))

            print(f"   Validation accuracy: {val_report['accuracy']:.3f}")
            print(f"   Test accuracy: {test_report['accuracy']:.3f}")
        except Exception as e:
            print(f"   ❌ Error training {name}: {str(e)}")

    # ---------------------------
    # Save results JSON
    # ---------------------------
    with open(os.path.join(RESULTS_DIR, "delay_classification_results.json"), "w") as f:
        json.dump(clf_delay_results, f, indent=2)

    # ---------------------------
    # Print summary
    # ---------------------------
    print("\n=== SUMMARY OF RESULTS ===")
    for model_type, info in best_models.items():
        print(f"🏆 Best {model_type} model: {info['name']} (F1: {info['f1']:.3f}, Acc: {info['accuracy']:.3f})")
    
    print("✅ Delay classification and model saving completed successfully!")

else:
    print("⚠️ Skipping delay classification: no regression target detected.")


🔹 Original class distribution:
delay_days
Minor delay        3348
Moderate delay     3884
On-time            5262
Severe delay      12051
Name: count, dtype: int64
🔹 Original class counts: {'Severe delay': np.int64(7641), 'On-time': np.int64(3822), 'Minor delay': np.int64(2546), 'Moderate delay': np.int64(2492)}
🔹 Target size per class: 2492
✅ Class counts after undersampling:
Minor delay       2492
Moderate delay    2492
On-time           2492
Severe delay      2492
Name: count, dtype: int64
✅ Encoded classes (multi-class): {'Minor delay': np.int64(0), 'Moderate delay': np.int64(1), 'On-time': np.int64(2), 'Severe delay': np.int64(3)}
✅ Encoded classes (binary): {np.str_('Not Severe'): np.int64(0), np.str_('Severe'): np.int64(1)}

=== Multi-class classification ===
🔹 Training logreg (multi-class)...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=5000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


   Validation accuracy: 0.188
   Test accuracy: 0.181
🔹 Training rf (multi-class)...
   Validation accuracy: 0.251
   Test accuracy: 0.236
🔹 Training xgb (multi-class)...
   Validation accuracy: 0.258
   Test accuracy: 0.260

=== Binary classification (Severe vs Not Severe) ===
🔹 Training logreg (binary)...




   Validation accuracy: 0.447
   Test accuracy: 0.459
🔹 Training rf (binary)...
   Validation accuracy: 0.447
   Test accuracy: 0.459
🔹 Training xgb (binary)...
   Validation accuracy: 0.454
   Test accuracy: 0.464

=== SUMMARY OF RESULTS ===
🏆 Best multi model: xgb (F1: 0.235, Acc: 0.260)
🏆 Best binary model: xgb (F1: 0.345, Acc: 0.464)
✅ Delay classification and model saving completed successfully!


In [47]:
# ---------------------------
# Delay Classification with SMOTE & Best Model Saving
# ---------------------------
import os
import json
import joblib
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE

if reg_target is not None:

    # ---------------------------
    # Bucketize delays
    # ---------------------------
    def bucketize_delay(y):
        y = pd.to_numeric(y, errors="coerce")
        if pd.isna(y) or y == 0:
            return "On-time"
        elif 1 <= y <= 3:
            return "Minor delay"
        elif 4 <= y <= 7:
            return "Moderate delay"
        else:
            return "Severe delay"

    y_train_c = y_train_reg.apply(bucketize_delay)
    y_val_c   = y_val_reg.apply(bucketize_delay)
    y_test_c  = y_test_reg.apply(bucketize_delay)

    # ---------------------------
    # Align X and y
    # ---------------------------
    def align_features_targets(X, y):
        if isinstance(X, np.ndarray):
            X = pd.DataFrame(X)
        common_idx = X.index.intersection(y.index)
        return X.loc[common_idx], y.loc[common_idx]

    X_train_aligned, y_train_aligned = align_features_targets(X_train_processed, y_train_c)
    X_val_aligned, y_val_aligned     = align_features_targets(X_val_processed, y_val_c)
    X_test_aligned, y_test_aligned   = align_features_targets(X_test_processed, y_test_c)

    # ---------------------------
    # Multi-class encoding with SMOTE
    # ---------------------------
    delay_le = LabelEncoder()
    y_train_enc = delay_le.fit_transform(y_train_aligned)

    # Apply SMOTE for multi-class
    smote_mc = SMOTE(random_state=42)
    X_train_bal, y_train_bal = smote_mc.fit_resample(X_train_aligned, y_train_enc)

    y_val_enc = delay_le.transform(y_val_aligned)
    y_test_enc = delay_le.transform(y_test_aligned)

    # Binary collapse (Severe vs Not-Severe)
    def collapse_binary(y_raw):
        return np.where(y_raw == "Severe delay", "Severe", "Not Severe")

    y_train_bin = collapse_binary(y_train_aligned)
    y_val_bin   = collapse_binary(y_val_aligned)
    y_test_bin  = collapse_binary(y_test_aligned)

    bin_le = LabelEncoder()
    y_train_bin_enc = bin_le.fit_transform(y_train_bin)
    y_val_bin_enc   = bin_le.transform(y_val_bin)
    y_test_bin_enc  = bin_le.transform(y_test_bin)

    # ---------------------------
    # Define models
    # ---------------------------
    def get_models(num_classes):
        return {
            "logreg": LogisticRegression(max_iter=5000, solver='lbfgs',
                                         multi_class='multinomial' if num_classes>2 else 'auto',
                                         random_state=42),
            "rf": RandomForestClassifier(n_estimators=200, max_depth=15,
                                         min_samples_split=10, min_samples_leaf=5,
                                         random_state=42, n_jobs=-1),
            "xgb": XGBClassifier(
                objective="multi:softprob" if num_classes > 2 else "binary:logistic",
                eval_metric="mlogloss",
                n_estimators=300,
                max_depth=6,
                learning_rate=0.1,
                subsample=0.8,
                colsample_bytree=0.8,
                reg_alpha=0.1,
                reg_lambda=0.1,
                random_state=42,
                n_jobs=-1,
                verbosity=0
            )
        }

    clf_delay_results = {}
    best_models = {}

    # ---------------------------
    # Train Multi-class models
    # ---------------------------
    print("=== Multi-class classification ===")
    models_mc = get_models(len(delay_le.classes_))
    for name, model in models_mc.items():
        print(f"🔹 Training {name} (multi-class)...")
        model.fit(X_train_bal, y_train_bal)
        val_pred_enc = model.predict(X_val_aligned)
        test_pred_enc = model.predict(X_test_aligned)
        val_pred = delay_le.inverse_transform(val_pred_enc)
        test_pred = delay_le.inverse_transform(test_pred_enc)

        val_report = classification_report(y_val_aligned, val_pred, output_dict=True, zero_division=0)
        test_report = classification_report(y_test_aligned, test_pred, output_dict=True, zero_division=0)

        clf_delay_results[f"{name}_multi"] = {
            "val_report": val_report,
            "test_report": test_report,
            "confusion_matrix": confusion_matrix(y_test_aligned, test_pred, labels=delay_le.classes_).tolist(),
            "classes": list(delay_le.classes_)
        }

        f1_test = test_report['macro avg']['f1-score']
        if "multi" not in best_models or f1_test > best_models["multi"]["f1"]:
            best_models["multi"] = {"name": name, "f1": f1_test, "model": model}

    # ---------------------------
    # Train Binary models
    # ---------------------------
    print("\n=== Binary classification (Severe vs Not Severe) ===")
    models_bin = get_models(len(bin_le.classes_))
    for name, model in models_bin.items():
        print(f"🔹 Training {name} (binary)...")
        model.fit(X_train_aligned, y_train_bin_enc)
        val_pred_enc = model.predict(X_val_aligned)
        test_pred_enc = model.predict(X_test_aligned)
        val_pred = bin_le.inverse_transform(val_pred_enc)
        test_pred = bin_le.inverse_transform(test_pred_enc)

        val_report = classification_report(y_val_bin, val_pred, output_dict=True, zero_division=0)
        test_report = classification_report(y_test_bin, test_pred, output_dict=True, zero_division=0)

        clf_delay_results[f"{name}_binary"] = {
            "val_report": val_report,
            "test_report": test_report,
            "confusion_matrix": confusion_matrix(y_test_bin, test_pred, labels=bin_le.classes_).tolist(),
            "classes": list(bin_le.classes_)
        }

        f1_test = test_report['macro avg']['f1-score']
        if "binary" not in best_models or f1_test > best_models["binary"]["f1"]:
            best_models["binary"] = {"name": name, "f1": f1_test, "model": model}

    # ---------------------------
    # Save results and best models
    # ---------------------------
    os.makedirs(RESULTS_DIR, exist_ok=True)
    with open(os.path.join(RESULTS_DIR, "delay_classification_results_smote.json"), "w") as f:
        json.dump(clf_delay_results, f, indent=2)

    os.makedirs(MODELS_DIR, exist_ok=True)
    for key, info in best_models.items():
        save_path = os.path.join(MODELS_DIR, f"best_delay_smote{key}.joblib")
        joblib.dump(info["model"], save_path)
        print(f"✅ Saved best {key} model: {save_path}")

    print("\n✅ Delay classification completed successfully!")
else:
    print("⚠️ Skipping delay classification: no regression target detected.")


=== Multi-class classification ===
🔹 Training logreg (multi-class)...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=5000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


🔹 Training rf (multi-class)...
🔹 Training xgb (multi-class)...

=== Binary classification (Severe vs Not Severe) ===
🔹 Training logreg (binary)...




🔹 Training rf (binary)...
🔹 Training xgb (binary)...
✅ Saved best multi model: ../results/metrics\models\best_delay_smotemulti.joblib
✅ Saved best binary model: ../results/metrics\models\best_delay_smotebinary.joblib

✅ Delay classification completed successfully!


Anomaly Detection (IsolationForest)

In [48]:
import os
import json
import joblib
import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

def run_isolation_forest(X_train, X_test, num_cols, RESULTS_DIR="results", MODELS_DIR="models", percentile=5):
    if len(num_cols) == 0:
        print("⚠️ Skipping anomaly detection: no numeric columns found.")
        return None, None

    # Pipeline for numeric columns + IsolationForest
    num_pipeline = Pipeline([
        ("scaler", StandardScaler(with_mean=False))
    ])

    iso_pipeline = Pipeline([
        ("num_only", ColumnTransformer([("num", num_pipeline, num_cols)], remainder="drop")),
        ("iso", IsolationForest(
            n_estimators=300,
            contamination="auto",
            random_state=42,
            n_jobs=-1
        ))
    ])

    # Fit pipeline
    iso_pipeline.fit(X_train)

    # Score test samples
    test_scores = iso_pipeline["iso"].score_samples(iso_pipeline["num_only"].transform(X_test))

    # Determine anomaly threshold
    anomaly_threshold = np.percentile(test_scores, percentile)
    anomalies = (test_scores <= anomaly_threshold).astype(int)

    anomaly_report = {
        "threshold_percentile": percentile,
        "threshold_value": float(anomaly_threshold),
        "anomalies_detected": int(anomalies.sum()),
        "total_test": int(len(anomalies)),
        "rate": float(anomalies.mean())
    }

    # Save results
    os.makedirs(RESULTS_DIR, exist_ok=True)
    with open(os.path.join(RESULTS_DIR, "anomaly_results.json"), "w") as f:
        json.dump(anomaly_report, f, indent=2)

    # Save trained pipeline
    os.makedirs(MODELS_DIR, exist_ok=True)
    joblib.dump({
        "pipeline": iso_pipeline,
        "threshold": anomaly_threshold
    }, os.path.join(MODELS_DIR, "anomaly_isolation_forest.joblib"))

    print("✅ Anomaly detection summary:", anomaly_report)

    # Return anomalies and report
    return anomalies, anomaly_report

# Example usage
anomalies, anomaly_report = run_isolation_forest(X_train, X_test, num_cols, RESULTS_DIR=RESULTS_DIR, MODELS_DIR=MODELS_DIR)


✅ Anomaly detection summary: {'threshold_percentile': 5, 'threshold_value': -0.5977612512504518, 'anomalies_detected': 177, 'total_test': 3536, 'rate': 0.05005656108597285}


In [50]:
# ========================================================
# 7) Feature Importance (Optional, Tree-based models)
# ========================================================
def export_rf_importance(pipeline, out_json):
    """
    Export feature importances from RandomForest model
    after preprocessing (numeric + categorical features).
    """
    if "prep" not in pipeline.named_steps:
        print("⚠️ Pipeline has no 'prep' step. Skipping feature importance.")
        return

    ct = pipeline.named_steps["prep"]

    # Find classifier step
    rf_step = [k for k in pipeline.named_steps if "clf" in k or "reg" in k]
    if not rf_step:
        print("⚠️ No RandomForest step found in pipeline. Skipping.")
        return
    rf = pipeline.named_steps[rf_step[0]]

    # ✅ Get feature names directly from the ColumnTransformer
    try:
        full_features = ct.get_feature_names_out()
    except:
        # fallback if version <1.0
        num_features = list(ct.transformers_[0][2])
        if "cat" in ct.named_transformers_:
            cat_ohe = ct.named_transformers_["cat"].named_steps.get("cat_ohe", None)
            if cat_ohe:
                cat_features = list(cat_ohe.get_feature_names_out(ct.transformers_[1][2]))
            else:
                cat_features = []
        else:
            cat_features = []
        full_features = num_features + cat_features

    importances = getattr(rf, "feature_importances_", None)
    if importances is None:
        print("⚠️ Model has no feature_importances_. Skipping.")
        return

    if len(full_features) != len(importances):
        print(f"⚠️ Mismatch: {len(full_features)} features vs {len(importances)} importances. Aligning...")
        min_len = min(len(full_features), len(importances))
        full_features = full_features[:min_len]
        importances = importances[:min_len]

    imp_df = pd.DataFrame({"feature": full_features, "importance": importances})
    imp_df = imp_df.sort_values("importance", ascending=False).head(50)

    os.makedirs(RESULTS_DIR, exist_ok=True)
    imp_df.to_json(out_json, orient="records", indent=2)

    print(f"✅ Top feature importances saved to {out_json}")
    display(imp_df.head(20))



# ========================================================
# 8) Save a Compact “Model Card”
# ========================================================
from datetime import datetime

os.makedirs(RESULTS_DIR, exist_ok=True)
os.makedirs(MODELS_DIR, exist_ok=True)

model_card = {
    "created_at": datetime.utcnow().isoformat() + "Z",
    "train_rows": int(len(df_train)),
    "val_rows": int(len(df_val)),
    "test_rows": int(len(df_test)),
    "targets": {
        "classification": clf_target,
        "anomaly_detection": True
    },
    "models_trained": list(clf_results.keys()) + ["isolation_forest"],
    "paths": {
        "models_dir": MODELS_DIR,
        "results_dir": RESULTS_DIR
    }
}

with open(os.path.join(RESULTS_DIR, "model_card.json"), "w") as f:
    json.dump(model_card, f, indent=2)

print("✅ Model card saved.")


# ========================================================
# 9) Inference Helpers (for Backend)
# ========================================================
def load_model(path):
    """
    Load a joblib-saved model/pipeline.
    """
    if not os.path.exists(path):
        raise FileNotFoundError(f"Model not found: {path}")
    return joblib.load(path)


def predict_disruption(model_path, df_batch: pd.DataFrame) -> dict:
    """
    Predict disruption type (classification).
    Returns predicted classes and probabilities.
    """
    model = load_model(model_path)
    preds = model.predict(df_batch)

    probs = getattr(model, "predict_proba", None)
    if callable(probs):
        probs = model.predict_proba(df_batch)
        classes = model.classes_.tolist()
    else:
        probs = None
        classes = None

    return {"predictions": preds.tolist(), "probabilities": probs.tolist() if probs is not None else None, "classes": classes}


def score_anomaly(model_path, df_batch: pd.DataFrame, percentile: float = 5) -> dict:
    """
    Score anomalies using IsolationForest.
    Returns anomaly scores and binary anomaly flags.
    """
    pipe = load_model(model_path)
    if "num_only" not in pipe.named_steps or "iso" not in pipe.named_steps:
        raise ValueError("Pipeline must contain 'num_only' and 'iso' steps")

    X = pipe["num_only"].transform(df_batch)
    scores = pipe["iso"].score_samples(X)
    threshold = np.percentile(scores, percentile)
    flags = (scores <= threshold).astype(int)

    return {"scores": scores.tolist(), "anomaly_flags": flags.tolist(), "threshold": float(threshold)}


✅ Model card saved.
