# Support Vector Machines (Linear & Kernel)

Compare LinearSVC (with calibration) and RBF-kernel SVC. Scaling is critical; Also handle probability calibration where needed.

In [None]:
import time, pandas as pd, numpy as np, matplotlib.pyplot as plt, warnings
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC, SVC
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score, brier_score_loss, confusion_matrix
from sklearn.inspection import permutation_importance, PartialDependenceDisplay

!wget -q https://raw.githubusercontent.com/Jihun-ust/ust-mail-557/main/Classification/classification_utils.py
import classification_utils as utils
csv_path = "https://raw.githubusercontent.com/Jihun-ust/ust-mail-557/main/Classification/classification.csv"
warnings.filterwarnings("ignore")

df = pd.read_csv(csv_path, parse_dates=["ts"]).sort_values("ts")
train, test = utils.chrono_split(df, "ts", test_frac=0.2)

features = ["ad_channel","device","region","campaign","spend_l7","pages_per_session","sessions_l30","time_on_site_s","pricing_views_l7","past_purchases","discount_flag","competitor_visits"]
target = "converted"

X_train, y_train = train[features], train[target]
X_test, y_test = test[features], test[target]

pre = ColumnTransformer([
    ("num", StandardScaler(), ["spend_l7","pages_per_session","sessions_l30","time_on_site_s","pricing_views_l7","past_purchases"]),
    ("cat", OneHotEncoder(handle_unknown="ignore"), ["ad_channel","device","region","campaign"]),
    ("bin", "passthrough", ["discount_flag","competitor_visits"])
])

# LinearSVC + calibration (no native predict_proba)
lin = Pipeline([("pre", pre), ("svc", LinearSVC(C=1.0, class_weight=None, random_state=42))])
lin.fit(X_train, y_train)
# Calibrate on a slice of the training set
val = train.iloc[-5000:]
cal_lin = CalibratedClassifierCV(lin, cv="prefit", method="isotonic")
cal_lin.fit(val[features], val[target])
probs_lin = cal_lin.predict_proba(X_test)[:,1]
_ = utils.evaluate_classifier(y_test, probs_lin, title_prefix="LinearSVC (calibrated)")

# RBF SVC (native probas if probability=True; more expensive)
rbf = Pipeline([("pre", pre), ("svc", SVC(C=1.0, kernel="rbf", gamma="scale", probability=True, random_state=42))])
rbf.fit(X_train, y_train)
probs_rbf = rbf.predict_proba(X_test)[:,1]
_ = utils.evaluate_classifier(y_test, probs_rbf, title_prefix="RBF SVC")

### High-level Report Review (Example)

Compared two Support Vector Machine (SVM) models:
   - Linear SVM: Assumes a straight-line boundary between converted vs. non-converted leads.
   - RBF SVM: Uses a more flexible boundary that can curve to capture nonlinear patterns.

Key results
   - Overall accuracy: Both models are the same (~60%). Neither kernel improves the headline number.
   - Converted leads (class 1): Both models are strong, with recall ~88–90%. That means they catch almost all of the leads who actually convert.
   - Non-converted leads (class 0): Both models struggle. Recall is only ~16–20%, meaning most non-converting leads are still being misclassified as converters.
   - Trade-off:
      - Linear kernel → slightly worse on finding positives, slightly better on catching negatives.
      - RBF kernel → slightly better on positives, but even weaker on negatives.

Interpretation
   - Both models over-favor predicting “converted”, which inflates recall for converters but misses too many non-converters.
   - This is why accuracy stalls around 60%: the model isn’t learning enough structure in the data to separate the groups cleanly.
   - The choice of kernel (linear vs. RBF) makes only a marginal difference here — the limitation is likely the data features, not the algorithm.

Quick takeaway
   - These models are good at ensuring we don’t “miss” potential converters, but they generate a lot of false positives (non-converters wrongly flagged as converters).
   - For decision-making, this means marketing/sales resources may be over-allocated to leads who won’t convert.
   - Possible improvements:
      - Enrich the features (better signals of non-conversion).
	  - Rebalance the training (penalize misclassifying non-converters more heavily).
	  - Tune thresholds for different use-cases (e.g., prioritize precision if minimizing wasted effort is the goal).

#### (Advanced) Time-aware hyperparameter curves (C for Linear; C & γ for RBF)
   - Uses TimeSeriesSplit to respect chronology. For group-aware validation (e.g., by campaign), swap in GroupKFold or PurgedGroupTimeSeriesSplit and pass groups=train["campaign"].
   
   - **Warning**: Running this cell on Google Colab (free tire) can take over an hour. You're welcome to try it, but to save time the faster versions are provided below.

In [None]:
# Time-aware hyperparameter curves
def time_aware_scores(pipe, X, y, Cs, gammas=None, kernel="linear", metric="average_precision", n_splits=5):
    tscv = TimeSeriesSplit(n_splits=n_splits)
    scores = []

    for C in Cs:
        if kernel == "linear":
            base = Pipeline([("pre", pre), ("svc", LinearSVC(C=C, random_state=42))])
            row = {"C": C, "gamma": np.nan}
            row_scores = []
            for tr_idx, va_idx in tscv.split(X, y):
                base.fit(X.iloc[tr_idx], y.iloc[tr_idx])
                # calibrate on the tail of training fold to avoid leakage
                cal_slice = tr_idx[int(0.8*len(tr_idx)):]  # last 20% of train as calib
                cal = CalibratedClassifierCV(base, cv="prefit", method="isotonic")
                cal.fit(X.iloc[cal_slice], y.iloc[cal_slice])
                p = cal.predict_proba(X.iloc[va_idx])[:,1]
                s = average_precision_score(y.iloc[va_idx], p) if metric=="average_precision" else roc_auc_score(y.iloc[va_idx], p)
                row_scores.append(s)
            row["mean"], row["std"] = float(np.mean(row_scores)), float(np.std(row_scores))
            scores.append(row)

        elif kernel == "rbf":
            assert gammas is not None
            for g in gammas:
                base = Pipeline([("pre", pre), ("svc", SVC(C=C, gamma=g, kernel="rbf", probability=True, random_state=42))])
                row = {"C": C, "gamma": g}
                row_scores = []
                for tr_idx, va_idx in tscv.split(X, y):
                    base.fit(X.iloc[tr_idx], y.iloc[tr_idx])
                    p = base.predict_proba(X.iloc[va_idx])[:,1]
                    s = average_precision_score(y.iloc[va_idx], p) if metric=="average_precision" else roc_auc_score(y.iloc[va_idx], p)
                    row_scores.append(s)
                row["mean"], row["std"] = float(np.mean(row_scores)), float(np.std(row_scores))
                scores.append(row)

    import pandas as pd
    out = pd.DataFrame(scores)
    return out

Cs = [0.05, 0.1, 0.2, 0.5, 1, 2, 5]
Gammas = ["scale", 0.05, 0.1, 0.2]  # modest grid; expand if compute budget allows

lin_grid = time_aware_scores(lin, X_train, y_train, Cs=Cs, kernel="linear", metric="average_precision")
rbf_grid = time_aware_scores(rbf, X_train, y_train, Cs=Cs, gammas=Gammas, kernel="rbf", metric="average_precision")

# Plots
plt.figure(figsize=(6,4))
plt.errorbar(lin_grid["C"], lin_grid["mean"], yerr=lin_grid["std"], marker="o")
plt.xscale("log"); plt.ylim(0,1)
plt.xlabel("C (log)"); plt.ylabel("PR-AUC (mean±sd)"); plt.title("LinearSVC (calibrated), Time-aware PR-AUC")
plt.tight_layout(); plt.show()

# For RBF, one curve per gamma
plt.figure(figsize=(7,4))
for g in rbf_grid["gamma"].dropna().unique():
    sub = rbf_grid[rbf_grid["gamma"]==g].sort_values("C")
    lbl = f"gamma={g}"
    plt.errorbar(sub["C"], sub["mean"], yerr=sub["std"], marker="o", label=lbl)
plt.xscale("log"); plt.ylim(0,1); plt.legend()
plt.xlabel("C (log)"); plt.ylabel("PR-AUC (mean±sd)"); plt.title("RBF SVC, Time-aware PR-AUC")
plt.tight_layout(); plt.show()

Fast version: LinearSVC time-aware PR-AUC vs C

In [None]:
# Fast version: LinearSVC time-aware PR-AUC vs C (subsample + 3 folds + decision_function)
t0 = time.time()

# Chronology-preserving subsample (use earliest portion of the training horizon)
frac = 0.30  # 30% of train; bump up if you have time
n_sub = max(2000, int(len(X_train) * frac))
X_sub, y_sub = X_train.iloc[:n_sub], y_train.iloc[:n_sub]

Cs = [0.05, 0.1, 0.2, 0.5, 1, 2, 5]
tscv = TimeSeriesSplit(n_splits=3)

scores_mean, scores_std = [], []

for C in Cs:
    fold_scores = []
    for tr_idx, va_idx in tscv.split(X_sub, y_sub):
        pipe = Pipeline([("pre", pre), ("svc", LinearSVC(C=C, random_state=42))])
        pipe.fit(X_sub.iloc[tr_idx], y_sub.iloc[tr_idx])
        # Use decision_function for ranking; AP accepts real-valued scores
        s = pipe.decision_function(X_sub.iloc[va_idx])
        ap = average_precision_score(y_sub.iloc[va_idx], s)
        fold_scores.append(ap)
    scores_mean.append(np.mean(fold_scores))
    scores_std.append(np.std(fold_scores))

elapsed = time.time() - t0
print(f"[LinearSVC] Subsample n={n_sub}, folds=3, grid={Cs} → done in {elapsed:.1f}s")

# Plot
plt.figure(figsize=(6,4))
plt.errorbar(Cs, scores_mean, yerr=scores_std, marker="o")
plt.xscale("log"); plt.ylim(0,1)
plt.xlabel("C (log scale)"); plt.ylabel("PR-AUC (mean±sd)")
plt.title("LinearSVC, Time-aware PR-AUC vs C (fast)")
plt.tight_layout(); plt.show()

# Optional: pick best C and (re)fit with calibration only once on full train
best_C = Cs[int(np.argmax(scores_mean))]
print("Best C (fast search):", best_C)
lin_fast = Pipeline([("pre", pre), ("svc", LinearSVC(C=best_C, random_state=42))]).fit(X_train, y_train)
cal_lin_fast = CalibratedClassifierCV(lin_fast, cv="prefit", method="isotonic")
cal_lin_fast.fit(train.iloc[-5000:][features], train.iloc[-5000:][target])
probs_lin_fast = cal_lin_fast.predict_proba(X_test)[:,1]
_ = utils.evaluate_classifier(y_test, probs_lin_fast, title_prefix=f"LinearSVC C={best_C} (fast search, calibrated)")

In [None]:
# Linear SVM — margin width vs C, support vectors vs C, accuracy vs C
t0 = time.time()

# Small, log-spaced C grid (tweak if you want denser curves)
Cs = [0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10]

margin_widths = []
n_support      = []
accs           = []

for C in Cs:
    # Use SVC(kernel="linear") so we get both coef_ (for margin) and n_support_
    model = Pipeline([("pre", pre), ("svc", SVC(kernel="linear", C=C, random_state=42))])
    model.fit(X_train, y_train)

    svc = model.named_steps["svc"]
    # Margin width = 2 / ||w||  (in the model's feature space after preprocessing)
    w = svc.coef_.ravel()
    m = 2.0 / (np.linalg.norm(w) + 1e-12)
    margin_widths.append(m)

    n_sv = int(svc.n_support_.sum())
    n_support.append(n_sv)

    acc = accuracy_score(y_test, model.predict(X_test))
    accs.append(acc)

elapsed = time.time() - t0
print(f"[E1a] Done in {elapsed:.1f}s across {len(Cs)} C values.")

# Optional: numeric summary
summary = pd.DataFrame({"C": Cs,
                        "margin_width": margin_widths,
                        "n_support": n_support,
                        "accuracy": accs})
display(summary)

# Plots
plt.figure(figsize=(6,4))
plt.plot(Cs, margin_widths, marker="o")
plt.xscale("log"); plt.xlabel("C (log)"); plt.ylabel("Margin width (2/||w||)")
plt.title("Linear SVM — Margin width vs C")
plt.tight_layout(); plt.show()

plt.figure(figsize=(6,4))
plt.plot(Cs, n_support, marker="o")
plt.xscale("log"); plt.xlabel("C (log)"); plt.ylabel("# Support vectors")
plt.title("Linear SVM — Support vectors vs C")
plt.tight_layout(); plt.show()

plt.figure(figsize=(6,4))
plt.plot(Cs, accs, marker="o")
plt.xscale("log"); plt.xlabel("C (log)"); plt.ylabel("Accuracy (test)")
plt.title("Linear SVM — Accuracy vs C")
plt.tight_layout(); plt.show()

Fast version: RBF SVC time-aware PR-AUC heatmap

In [None]:
# Fast version: RBF SVC time-aware PR-AUC heatmap (subsample + 3 folds + decision_function)
t0 = time.time()

# Reuse the same chronology-preserving subsample
X_sub, y_sub = X_train.iloc[:n_sub], y_train.iloc[:n_sub]

Cs = [0.5, 2.0]            # very small grid
Gammas = ["scale", 0.2]    # keep tiny; swap/add values if you have budget
tscv = TimeSeriesSplit(n_splits=3)

heat = np.zeros((len(Cs), len(Gammas)))
for i, C in enumerate(Cs):
    for j, g in enumerate(Gammas):
        fold_scores = []
        for tr_idx, va_idx in tscv.split(X_sub, y_sub):
            pipe = Pipeline([("pre", pre), ("svc", SVC(C=C, kernel="rbf", gamma=g,
                                                      probability=False, random_state=42))])
            pipe.fit(X_sub.iloc[tr_idx], y_sub.iloc[tr_idx])
            s = pipe.decision_function(X_sub.iloc[va_idx])  # scores for AP
            ap = average_precision_score(y_sub.iloc[va_idx], s)
            fold_scores.append(ap)
        heat[i, j] = np.mean(fold_scores)

elapsed = time.time() - t0
print(f"[RBF SVC] Subsample n={n_sub}, folds=3, grid={len(Cs)}x{len(Gammas)} → done in {elapsed:.1f}s")

# Plot heatmap
plt.figure(figsize=(5.5,3.8))
for (i,C) in enumerate(Cs):
    for (j,g) in enumerate(Gammas):
        plt.text(j, i, f"{heat[i,j]:.3f}", ha="center", va="center")
plt.imshow(heat, aspect="auto", origin="upper", vmin=0, vmax=1)
plt.colorbar(label="PR-AUC")
plt.xticks(range(len(Gammas)), [str(g) for g in Gammas])
plt.yticks(range(len(Cs)), [str(c) for c in Cs])
plt.xlabel("gamma"); plt.ylabel("C")
plt.title("RBF SVC, Time-aware PR-AUC (fast)")
plt.tight_layout(); plt.show()

# Choose best (C,gamma) and refit once on full train with probability=True (for downstream needs)
best_i, best_j = np.unravel_index(np.argmax(heat), heat.shape)
best_C, best_g = Cs[best_i], Gammas[best_j]
print("Best (C, gamma) from fast search:", best_C, best_g)

rbf_fast = Pipeline([("pre", pre), ("svc", SVC(C=best_C, kernel="rbf", gamma=best_g,
                                               probability=True, random_state=42))]).fit(X_train, y_train)
probs_rbf_fast = rbf_fast.predict_proba(X_test)[:,1]
_ = utils.evaluate_classifier(y_test, probs_rbf_fast, title_prefix=f"RBF SVC C={best_C}, γ={best_g} (fast search)")

#### Calibration curves (pre/post) with Brier score

In [None]:
# Calibration curves & Brier score
# LinearSVC
# PRE: use an uncalibrated decision_function -> map to [0,1] by rank (for curve shape only)
lin_uncal = Pipeline([("pre", pre), ("svc", LinearSVC(C=1.0, random_state=42))]).fit(X_train, y_train)
dec_test = lin_uncal.decision_function(X_test)
# normalize to [0,1] (not a true prob; for visual comparison only)
p_lin_pre = (dec_test - dec_test.min()) / (dec_test.max() - dec_test.min() + 1e-12)

# POST: use your calibrated linear
p_lin_post = probs_lin
print("Linear, Brier pre (scaled df):", round(brier_score_loss(y_test, p_lin_pre), 4))
print("Linear, Brier post (isotonic):", round(brier_score_loss(y_test, p_lin_post), 4))

pt_pre, pp_pre   = calibration_curve(y_test, p_lin_pre,  n_bins=10, strategy="quantile")
pt_post, pp_post = calibration_curve(y_test, p_lin_post, n_bins=10, strategy="quantile")

plt.figure(figsize=(6,5))
plt.plot(pp_pre, pt_pre, marker="o", label="Linear, pre")
plt.plot(pp_post, pt_post, marker="o", label="Linear, post (isotonic)")
plt.plot([0,1],[0,1],'--',lw=1,label="perfect")
plt.xlabel("Predicted"); plt.ylabel("Observed"); plt.title("Calibration, LinearSVC")
plt.legend(); plt.tight_layout(); plt.show()

# RBF SVC
# PRE: native probabilities from SVC(probability=True) use Platt scaling internally
p_rbf_pre = probs_rbf

# POST: external isotonic calibration on a validation slice
val = train.iloc[-5000:]
rbf_prefit = Pipeline([("pre", pre), ("svc", SVC(C=1.0, kernel="rbf", gamma="scale", probability=True, random_state=42))])
rbf_prefit.fit(X_train, y_train)
rbf_cal = CalibratedClassifierCV(rbf_prefit, cv="prefit", method="isotonic")
rbf_cal.fit(val[features], val[target])
p_rbf_post = rbf_cal.predict_proba(X_test)[:,1]

print("RBF, Brier pre (Platt):", round(brier_score_loss(y_test, p_rbf_pre), 4))
print("RBF, Brier post (isotonic):", round(brier_score_loss(y_test, p_rbf_post), 4))

pt_pre, pp_pre   = calibration_curve(y_test, p_rbf_pre,  n_bins=10, strategy="quantile")
pt_post, pp_post = calibration_curve(y_test, p_rbf_post, n_bins=10, strategy="quantile")

plt.figure(figsize=(6,5))
plt.plot(pp_pre, pt_pre, marker="o", label="RBF, pre (Platt)")
plt.plot(pp_post, pt_post, marker="o", label="RBF, post (isotonic)")
plt.plot([0,1],[0,1],'--',lw=1,label="perfect")
plt.xlabel("Predicted"); plt.ylabel("Observed"); plt.title("Calibration, RBF SVC")
plt.legend(); plt.tight_layout(); plt.show()

#### Support-vector counts (linear & RBF) and threshold sensitivity

In [None]:
# Support-vector counts & threshold sensitivity
# True SV counts: need libsvm interface -> use SVC(kernel="linear") for count (on a manageable sample if huge)
# (LinearSVC doesn't expose support_; this is a standard workaround)
sub_n = min(len(X_train), 20000)
X_sub, y_sub = X_train.iloc[:sub_n], y_train.iloc[:sub_n]
svc_linear = Pipeline([("pre", pre), ("svc", SVC(kernel="linear", C=1.0, probability=False, random_state=42))]).fit(X_sub, y_sub)
n_sv_linear = int(svc_linear.named_steps["svc"].n_support_.sum())

svc_rbf = Pipeline([("pre", pre), ("svc", SVC(kernel="rbf", C=1.0, gamma="scale", probability=True, random_state=42))]).fit(X_sub, y_sub)
n_sv_rbf = int(svc_rbf.named_steps["svc"].n_support_.sum())

print(f"Support vectors (linear SVC via libsvm): {n_sv_linear} / {len(X_sub)}")
print(f"Support vectors (RBF SVC):              {n_sv_rbf} / {len(X_sub)}")

# Threshold sensitivity (Expected Value curve) using calibrated linear and RBF probs
gain_tp, cost_fp, cost_fn, cost_tn = 100.0, 10.0, 40.0, 0.0
def ev_curve(y_true, p):
    ts = np.linspace(0.01, 0.99, 99); evs = []
    for t in ts:
        yhat = (p >= t).astype(int)
        tn, fp, fn, tp = confusion_matrix(y_true, yhat).ravel()
        evs.append(tp*gain_tp - fp*cost_fp - fn*cost_fn - tn*cost_tn)
    return ts, np.array(evs)

ts_lin, ev_lin = ev_curve(y_test, probs_lin)
ts_rbf, ev_rbf = ev_curve(y_test, probs_rbf)

plt.figure(figsize=(7,4))
plt.plot(ts_lin, ev_lin, label="Linear (calibrated)")
plt.plot(ts_rbf, ev_rbf, label="RBF")
plt.xlabel("Threshold"); plt.ylabel("Expected value"); plt.title("Threshold sensitivity")
plt.legend(); plt.tight_layout(); plt.show()

#### Driver summary
   - Linear: Top standardized coefficients (weights from LinearSVC after scaling/OHE)
   - RBF: Permutation importances (+ PDPs) for top features
   - PDPs are computed on the pipeline with raw column names; scikit-learn will route these through your ColumnTransformer.

In [None]:
# Driver summary (Linear vs RBF)
# Linear: top standardized coefficients
lin_fit = lin  # already fitted pipeline
feat_names = lin_fit.named_steps["pre"].get_feature_names_out()
coef = lin_fit.named_steps["svc"].coef_.ravel()
coef_df = pd.DataFrame({"feature": feat_names, "coef": coef}).sort_values("coef", ascending=False)
top_pos = coef_df.head(10)
top_neg = coef_df.tail(10).sort_values("coef")

print("Top +coefficients (increase conversion odds):")
display(top_pos)
print("Top -coefficients (decrease conversion odds):")
display(top_neg)

# Plain-language driver summary. Linear: use top coefficients
top_lin = coef_df.assign(absv=coef_df["coef"].abs()).sort_values("absv", ascending=False).head(4)
share = float(top_lin["absv"].sum() / coef_df["coef"].abs().sum() + 1e-12)
drivers = ", ".join(top_lin["feature"].tolist())
print(f"Policy summary (Linear): ~{share*100:.0f}% of the decision signal comes from {drivers}. Prioritize data quality and controls here.")

plt.figure(figsize=(7,4))
plt.barh(top_pos["feature"][::-1], top_pos["coef"][::-1])
plt.title("LinearSVC, Top positive standardized coefficients")
plt.tight_layout(); plt.show()

plt.figure(figsize=(7,4))
plt.barh(top_neg["feature"], top_neg["coef"])
plt.title("LinearSVC, Top negative standardized coefficients")
plt.tight_layout(); plt.show()

# RBF: permutation importances + PDPs for top features (original column names)
pi = permutation_importance(rbf, X_test, y_test, n_repeats=20, random_state=42, n_jobs=-1, scoring="average_precision")
fn_pi = list(features)
pi_df = pd.DataFrame({"feature": fn_pi, "mean": pi.importances_mean, "std": pi.importances_std})
pi_df = pi_df.sort_values("mean", ascending=False)

# Map transformed feature names back to original columns for PDP
def to_original(feat):
    if "__" in feat:
        feat = feat.split("__",1)[1]
    for base in ["ad_channel","device","region","campaign"]:
        if feat.startswith(base + "_"):
            return base
    for base in ["spend_l7","pages_per_session","sessions_l30","time_on_site_s","pricing_views_l7","past_purchases","discount_flag","competitor_visits"]:
        if feat.startswith(base):
            return base
    return feat

orig_rank = (pi_df.assign(orig=pi_df["feature"].map(to_original))
                 .groupby("orig", as_index=False)["mean"].sum()
                 .sort_values("mean", ascending=False))

display(orig_rank.head(12))

# Plain-language driver summary. RBF: use aggregated permutation importances
top_rbf = orig_rank.head(4)
share = float(top_rbf["mean"].sum() / orig_rank["mean"].sum() + 1e-12)
drivers = ", ".join(top_rbf["orig"].tolist())
print(f"Policy summary (RBF): ~{share*100:.0f}% of the decision signal comes from {drivers}. Prioritize data quality and controls here.")

# PDPs for top 3 original features
top3 = orig_rank["orig"].head(3).tolist()
for f in top3:
    try:
        PartialDependenceDisplay.from_estimator(rbf, X_test, [f], kind="both", grid_resolution=30, ice_lines_kw={"alpha":0.12})
        plt.suptitle(f"RBF SVC, PDP/ICE for {f}")
        plt.tight_layout(); plt.show()
    except Exception as e:
        print(f"PDP failed for {f}: {e}")

In [None]:
# Plain-language driver summary
# For Linear: use top coefficients
top_lin = coef_df.assign(absv=coef_df["coef"].abs()).sort_values("absv", ascending=False).head(4)
share = float(top_lin["absv"].sum() / coef_df["coef"].abs().sum() + 1e-12)
drivers = ", ".join(top_lin["feature"].tolist())
print(f"Policy summary (Linear): ~{share*100:.0f}% of the decision signal comes from {drivers}. Prioritize data quality and controls here.")

In [None]:
# RBF: use aggregated permutation importances
top_rbf = orig_rank.head(4)
share = float(top_rbf["mean"].sum() / orig_rank["mean"].sum() + 1e-12)
drivers = ", ".join(top_rbf["orig"].tolist())
print(f"Policy summary (RBF): ~{share*100:.0f}% of the decision signal comes from {drivers}. Prioritize data quality and controls here.")

#### Decision table (template): probability → action by season/segment with owners & SLA

In [None]:
# Decision table mapping probabilities to actions by season/segment
import numpy as np, pandas as pd

# Build helper columns: season from timestamp; segment examples
test_ct = test.copy()
test_ct["season"] = test_ct["ts"].dt.month%12//3 + 1  # 1=Winter,2=Spring,3=Summer,4=Fall
test_ct["p_lin"] = probs_lin
test_ct["p_rbf"] = probs_rbf

# Choose one model's probabilities for actioning; here: calibrated linear
test_ct["p"] = test_ct["p_lin"]

# Define probability bands -> actions/owners/SLA
def band(p):
    if p >= 0.80: return "P3: High-touch offer"
    if p >= 0.50: return "P2: Standard offer"
    if p >= 0.30: return "P1: Nurture"
    return "P0: No action"

owner_map = {
    "P3: High-touch offer": "Sales (AE)",
    "P2: Standard offer": "Lifecycle Marketing",
    "P1: Nurture": "CRM / Email",
    "P0: No action": "No owner"
}
sla_map = {
    "P3: High-touch offer": "Respond < 24h",
    "P2: Standard offer": "Campaign within 72h",
    "P1: Nurture": "Weekly cadence",
    "P0: No action": "—"
}

test_ct["policy"] = test_ct["p"].apply(band)
test_ct["owner"]  = test_ct["policy"].map(owner_map)
test_ct["SLA"]    = test_ct["policy"].map(sla_map)

# Summarize by season & a key segment (e.g., ad_channel); adjust to your needs
summary = (test_ct.groupby(["season","ad_channel","policy"], as_index=False)
                  .agg(n=("converted","size"),
                       avg_p=("p","mean"),
                       conv_rate=(target,"mean")))

# Sort and display compact "decision table"
summary = summary.sort_values(["season","ad_channel","policy","avg_p"], ascending=[True, True, False, False])

# Attach owners/SLA for each policy row
summary["owner"] = summary["policy"].map(owner_map)
summary["SLA"]   = summary["policy"].map(sla_map)
display(summary.head(40))

# Optional: export to CSV for ops handoff
# summary.to_csv("svm_decision_table_by_season_segment.csv", index=False)