## Setup
- Setup: paths, environment, imports

In [1]:
# --- Setup: paths, environment, imports ---

import os
import sys
from pathlib import Path

# Ensure src/ is importable even when running from 06_interpretability/
repo_root = Path.cwd().resolve().parents[0]
src_path = repo_root / "src"
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

# Set local data path (DO NOT COMMIT dataset)
os.environ["DATA_PATH"] = "/Users/munaugas/Desktop/Thesis/adult_reconstruction.csv"
print("Using DATA_PATH:", os.environ.get("DATA_PATH"))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import shap
from scipy.stats import spearmanr

# Make sure outputs exist
os.makedirs("../results", exist_ok=True)
os.makedirs("../figures", exist_ok=True)

# Reuse perturbations from robustness stage
from thesis_pipeline.robustness.add_noise import add_gaussian_noise
from thesis_pipeline.robustness.distribution_shift import apply_simple_shift




Using DATA_PATH: /Users/munaugas/Desktop/Thesis/adult_reconstruction.csv


  from .autonotebook import tqdm as notebook_tqdm


## Pipeline and model training
- Load data, preprocess, split, and train models

In [2]:
# --- Load data, preprocess, split, and train models (same as baseline/robustness) ---

from thesis_pipeline.preprocessing.clean_data import load_data
from thesis_pipeline.preprocessing.feature_engineering import engineer_features_and_target
from thesis_pipeline.preprocessing.encode_features import encode_features
from thesis_pipeline.splitting.split_data import stratified_train_val_test_split

from thesis_pipeline.model_training.train_rf import train_random_forest
from thesis_pipeline.model_training.train_gbdt import train_gbdt
from thesis_pipeline.model_training.train_xgboost import train_xgboost

# Load raw data
df = load_data()

# Feature/target split
X_raw, y, df_with_target = engineer_features_and_target(df)

# Encoding
X, encoder, categorical_cols, numeric_cols = encode_features(X_raw)

# Splitting
splits_obj = stratified_train_val_test_split(X, y)
X_train, y_train = splits_obj.X_train, splits_obj.y_train
X_val, y_val     = splits_obj.X_val, splits_obj.y_val
X_test, y_test   = splits_obj.X_test, splits_obj.y_test

# Training (unpack outputs: model, eval_df, best_params)
rf_model, rf_eval, rf_params = train_random_forest(X_train, y_train, X_val, y_val, X_test, y_test)
gbdt_model, gbdt_eval, gbdt_params = train_gbdt(X_train, y_train, X_val, y_val, X_test, y_test)
xgb_model, xgb_eval, xgb_params = train_xgboost(X_train, y_train, X_val, y_val, X_test, y_test)

best_models = {
    "RandomForest": rf_model,
    "GBDT": gbdt_model,
    "XGBoost": xgb_model,
}

print("Models trained:", list(best_models.keys()))
print("X_train:", X_train.shape, "| X_test:", X_test.shape)



Fitting 3 folds for each of 15 candidates, totalling 45 fits
Fitting 3 folds for each of 15 candidates, totalling 45 fits
Fitting 3 folds for each of 15 candidates, totalling 45 fits
Models trained: ['RandomForest', 'GBDT', 'XGBoost']
X_train: (34671, 13) | X_test: (7430, 13)


## SHAP computation on clean data
- Sample data for SHAP computation

In [3]:
RANDOM_STATE = 42
shap_sample_size = 1000

X_train_shap = X_train.sample(n=min(shap_sample_size, len(X_train)), random_state=RANDOM_STATE).copy()
X_test_shap  = X_test.sample(n=min(shap_sample_size, len(X_test)),  random_state=RANDOM_STATE).copy()

print("SHAP sample sizes:")
print("X_train_shap:", X_train_shap.shape)
print("X_test_shap:", X_test_shap.shape)



SHAP sample sizes:
X_train_shap: (1000, 13)
X_test_shap: (1000, 13)


- Compute SHAP values (TreeExplainer) and save global summary plots

In [4]:
import re
from pathlib import Path

RANDOM_STATE = 42
shap_sample_size = min(1000, len(X_test))

X_test_shap = X_test.sample(n=shap_sample_size, random_state=RANDOM_STATE).copy()

FIG_DIR = (repo_root / "figures")
SUBGROUP_FIG_DIR = FIG_DIR / "subgroup_shap"
SUBGROUP_FIG_DIR.mkdir(exist_ok=True)
RES_DIR = (repo_root / "results")
FIG_DIR.mkdir(exist_ok=True)
RES_DIR.mkdir(exist_ok=True)

def safe_name(x: str) -> str:
    x = str(x)
    x = x.replace(" ", "")
    x = x.replace("/", "_")
    x = x.replace("+", "plus")
    x = re.sub(r"[^A-Za-z0-9_\-\.]", "_", x)
    return x

def get_standard_shap_values(explainer, X: pd.DataFrame) -> np.ndarray:
    """
    Return 2D SHAP values (n_samples, n_features) for positive class.
    Also collapses interaction-style outputs to main-effect values.
    """
    shap_vals = explainer.shap_values(X)

    # list -> take positive class
    if isinstance(shap_vals, list):
        shap_vals = shap_vals[1] if len(shap_vals) > 1 else shap_vals[0]

    # Explanation -> .values
    if hasattr(shap_vals, "values"):
        shap_vals = shap_vals.values

    shap_vals = np.asarray(shap_vals)

    # multi-output (n, p, 2) -> take class 1
    if shap_vals.ndim == 3 and shap_vals.shape[2] == 2:
        shap_vals = shap_vals[:, :, 1]

    # interaction (n, p, p) -> collapse
    if shap_vals.ndim == 3 and shap_vals.shape[1] == X.shape[1] and shap_vals.shape[2] == X.shape[1]:
        shap_vals = shap_vals.sum(axis=2)

    if shap_vals.ndim != 2:
        raise ValueError(f"Expected 2D SHAP values, got shape {shap_vals.shape}")

    return shap_vals

shap_results = {}

for model_name, model in best_models.items():
    print(f"\n=== SHAP for {model_name} (clean) ===")

    explainer = shap.TreeExplainer(model)
    shap_2d = get_standard_shap_values(explainer, X_test_shap)

    shap_results[model_name] = {
        "explainer": explainer,
        "shap_values_clean": shap_2d,
        "X_test_shap": X_test_shap,
    }

    shap.summary_plot(shap_2d, X_test_shap, show=False, max_display=10)
    plt.title(f"SHAP Summary Plot - {model_name}")

    outpath = FIG_DIR / f"shap_summary_clean_{safe_name(model_name)}.png"
    plt.savefig(outpath, dpi=300, bbox_inches="tight")
    plt.close()
    print(f"Saved: {outpath}")




=== SHAP for RandomForest (clean) ===
Saved: /Users/munaugas/MSc_Data_Science_Thesis/MSc_Data_Science_Thesis/figures/shap_summary_clean_RandomForest.png

=== SHAP for GBDT (clean) ===
Saved: /Users/munaugas/MSc_Data_Science_Thesis/MSc_Data_Science_Thesis/figures/shap_summary_clean_GBDT.png

=== SHAP for XGBoost (clean) ===
Saved: /Users/munaugas/MSc_Data_Science_Thesis/MSc_Data_Science_Thesis/figures/shap_summary_clean_XGBoost.png


## Global SHAP importance tables
- Export global SHAP importance tables (Top-K + full summary)

In [6]:
# %%
# --- Export global SHAP importance + summary tables as CSV + LaTeX ---

import os
import numpy as np
import pandas as pd

os.makedirs("../results", exist_ok=True)

TOP_K = 10  
OUT_DIR = "../results"

# ---------- helpers ----------
def mean_abs_shap_profile(shap_values_2d: np.ndarray, feature_cols) -> pd.Series:
    """Mean absolute SHAP value per feature."""
    vals = np.abs(shap_values_2d).mean(axis=0)
    return pd.Series(vals, index=list(feature_cols))

def to_latex_table(df: pd.DataFrame, out_tex: str, caption: str, label: str):
    df.to_latex(
        out_tex,
        index=False,
        float_format="%.3f",
        caption=caption,
        label=label,
        escape=True
    )

# ---------- 1) shap_global_importance_topk (per model, top-k) ----------
rows = []
for model_name, info in shap_results.items():
    shap_2d = info["shap_values_clean"]          # (n_samples, n_features)
    Xs = info["X_test_shap"]                      # dataframe with columns
    profile = mean_abs_shap_profile(shap_2d, Xs.columns)

    topk = profile.sort_values(ascending=False).head(TOP_K)
    for feat, val in topk.items():
        rows.append({
            "model": model_name,
            "feature": feat,
            "mean_abs_shap": float(val)
        })

shap_global_importance_topk = pd.DataFrame(rows)

out_csv = f"{OUT_DIR}/shap_global_importance_topk.csv"
shap_global_importance_topk.to_csv(out_csv, index=False)
print(f"Saved: {out_csv}")

out_tex = f"{OUT_DIR}/shap_global_importance_topk.tex"
to_latex_table(
    shap_global_importance_topk,
    out_tex,
    caption=f"Top_K{
        
    } features by mean absolute SHAP value (global importance) for each model on the clean test set.",
    label="tab:shap-global-importance-topk",
)
print(f"Saved: {out_tex}")


# ---------- 2) shap_summary_table (all features, aggregated per model) ----------
# This produces a compact table where each feature is a row and each model is a column.
# Values are mean(|SHAP|) so it reads as "global importance".

summary_frames = []
for model_name, info in shap_results.items():
    shap_2d = info["shap_values_clean"]
    Xs = info["X_test_shap"]
    profile = mean_abs_shap_profile(shap_2d, Xs.columns).rename(model_name)
    summary_frames.append(profile)

shap_summary_table = pd.concat(summary_frames, axis=1).reset_index().rename(columns={"index": "feature"})

# Optional: also add an "avg_across_models" column to rank features overall
model_cols = [c for c in shap_summary_table.columns if c != "feature"]
shap_summary_table["avg_across_models"] = shap_summary_table[model_cols].mean(axis=1)

# Sort by overall average importance (descending)
shap_summary_table = shap_summary_table.sort_values("avg_across_models", ascending=False)

# Save CSV
out_csv = f"{OUT_DIR}/shap_summary_table.csv"
shap_summary_table.to_csv(out_csv, index=False)
print(f"Saved: {out_csv}")

# Save LaTeX (rounded visually via float_format)
out_tex = f"{OUT_DIR}/shap_summary_table.tex"
to_latex_table(
    shap_summary_table,
    out_tex,
    caption="Global SHAP importance table (mean absolute SHAP values per feature).",
    label="tab:shap-summary-table",
)
print(f"Saved: {out_tex}")


SyntaxError: f-string: valid expression required before '}' (2981903130.py, line 56)

## Subgroup setup
- Build subgroup metadata (gender, race_binary, age_group) (same as LIME)

In [None]:
# Build subgroup metadata using original rows corresponding to X_test indices
meta_test = df_with_target.loc[X_test.index, ["gender", "race", "age"]].copy()

# Race: White vs Non-White
meta_test["race_binary"] = np.where(meta_test["race"] == "White", "White", "Non-White")

# Age groups as in thesis / Colab
age_bins   = [17, 30, 45, 60, 90]
age_labels = ["18-30", "31-45", "46-60", "61+"]
meta_test["age_group"] = pd.cut(meta_test["age"], bins=age_bins, labels=age_labels)

meta_test.head()


Unnamed: 0,gender,race,age,race_binary,age_group
21460,Male,White,21,White,18-30
35060,Male,Amer-Indian-Eskimo,51,Non-White,46-60
1633,Male,White,34,White,31-45
22480,Female,White,26,White,18-30
47104,Female,White,28,White,18-30


## Subgroup SHAP explanations
- Generate subgroup SHAP summary plots (by gender, race, age)

In [None]:
def plot_and_save_shap_summary_by_subgroup(
    model_name: str,
    group_col: str,
    *,
    max_display: int = 10,
    min_group_size: int = 50
):
    info = shap_results[model_name]
    shap_values = info["shap_values_clean"]   # guaranteed 2D from helper
    Xs = info["X_test_shap"]

    groups = meta_test.loc[Xs.index, group_col]

    print(f"\n=== SHAP by {group_col} for {model_name} ===")
    for group_value in groups.dropna().unique():
        mask = (groups == group_value)
        n = int(mask.sum())
        if n < min_group_size:
            print(f"Skipping {group_col}={group_value} (n={n})")
            continue

        shap.summary_plot(
            shap_values[mask.values],
            Xs.loc[mask],
            max_display=max_display,
            show=False
        )
        plt.title(f"{model_name} - SHAP ({group_col}={group_value})")

        fname = f"shap_summary_{safe_name(group_col)}_{safe_name(group_value)}_{safe_name(model_name)}.png"
        outpath = SUBGROUP_FIG_DIR / fname

        plt.savefig(outpath, dpi=300, bbox_inches="tight")
        plt.close()
        print(f"Saved: {outpath}")

for model_name in best_models.keys():
    for group_col in ["gender", "race_binary", "age_group"]:
        plot_and_save_shap_summary_by_subgroup(model_name, group_col)




=== SHAP by gender for RandomForest ===
Saved: /Users/munaugas/MSc_Data_Science_Thesis/MSc_Data_Science_Thesis/figures/shap_summary_gender_Male_RandomForest.png
Saved: /Users/munaugas/MSc_Data_Science_Thesis/MSc_Data_Science_Thesis/figures/shap_summary_gender_Female_RandomForest.png

=== SHAP by race_binary for RandomForest ===
Saved: /Users/munaugas/MSc_Data_Science_Thesis/MSc_Data_Science_Thesis/figures/shap_summary_race_binary_White_RandomForest.png
Saved: /Users/munaugas/MSc_Data_Science_Thesis/MSc_Data_Science_Thesis/figures/shap_summary_race_binary_Non-White_RandomForest.png

=== SHAP by age_group for RandomForest ===
Saved: /Users/munaugas/MSc_Data_Science_Thesis/MSc_Data_Science_Thesis/figures/shap_summary_age_group_18-30_RandomForest.png
Saved: /Users/munaugas/MSc_Data_Science_Thesis/MSc_Data_Science_Thesis/figures/shap_summary_age_group_46-60_RandomForest.png
Saved: /Users/munaugas/MSc_Data_Science_Thesis/MSc_Data_Science_Thesis/figures/shap_summary_age_group_31-45_RandomFor

- Export subgroup SHAP importance table (Top-K mean(|SHAP|))

In [None]:
# --- Export subgroup mean(|SHAP|) importance (Top-K) as CSV + LaTeX ---

import os
import numpy as np
import pandas as pd

os.makedirs("../results", exist_ok=True)

TOP_K_CSV = 10   # keep more detail in CSV
TOP_K_TEX = 3    # keep appendix short
MIN_GROUP_SIZE = 50
group_cols = ["gender", "race_binary", "age_group"]

def mean_abs_shap_profile(shap_values_2d: np.ndarray, feature_cols) -> pd.Series:
    vals = np.abs(shap_values_2d).mean(axis=0)
    return pd.Series(vals, index=list(feature_cols))

rows_csv = []
rows_tex = []

for model_name, info in shap_results.items():
    shap_2d = info["shap_values_clean"]   # (n_samples, n_features)
    Xs      = info["X_test_shap"]         # SHAP subset

    for group_col in group_cols:
        groups = meta_test.loc[Xs.index, group_col]

        for group_value in groups.dropna().unique():
            mask = (groups == group_value)
            n = int(mask.sum())
            if n < MIN_GROUP_SIZE:
                continue

            prof = mean_abs_shap_profile(shap_2d[mask.values], Xs.columns).sort_values(ascending=False)

            # --- CSV rows (Top_K_CSV) ---
            topk_csv = prof.head(TOP_K_CSV)
            for rank, (feat, val) in enumerate(topk_csv.items(), start=1):
                rows_csv.append({
                    "model": model_name,
                    "group_col": group_col,
                    "group_value": str(group_value),
                    "n_samples": n,
                    "rank": rank,
                    "feature": feat,
                    "mean_abs_shap": float(val),
                })

            # --- LaTeX rows (Top_K_TEX) ---
            topk_tex = prof.head(TOP_K_TEX)
            for rank, (feat, val) in enumerate(topk_tex.items(), start=1):
                rows_tex.append({
                    "model": model_name,
                    "group_col": group_col,
                    "group_value": str(group_value),
                    "n_samples": n,
                    "rank": rank,
                    "feature": feat,
                    "mean_abs_shap": float(val),
                })

# --- Save CSV (Top_K_CSV) ---
shap_subgroup_importance_topk_csv = pd.DataFrame(rows_csv)
out_csv = "../results/shap_subgroup_importance_topk.csv"
shap_subgroup_importance_topk_csv.to_csv(out_csv, index=False)
print(f"Saved: {out_csv}")

# --- Save LaTeX (Top_K_TEX) ---
shap_subgroup_importance_topk_tex = pd.DataFrame(rows_tex)
shap_subgroup_importance_topk_tex["mean_abs_shap"] = shap_subgroup_importance_topk_tex["mean_abs_shap"].round(3)

out_tex = "../results/shap_subgroup_importance_topk.tex"
shap_subgroup_importance_topk_tex.to_latex(
    out_tex,
    longtable=True,
    index=False,
    float_format="%.3f",
    caption=f"Top-{TOP_K_TEX} subgroup SHAP importance by mean absolute SHAP value (clean test set).",
    label="tab:shap-subgroup-importance-topk",
    escape=True,
)
print(f"Saved: {out_tex}")

print(f"CSV Top-K = {TOP_K_CSV} | LaTeX Top-K = {TOP_K_TEX}")



Saved: ../results/shap_subgroup_importance_topk.csv
Saved: ../results/shap_subgroup_importance_topk.tex
CSV Top-K = 10 | LaTeX Top-K = 3


## SHAP robustness evaluation
- Robustness design: mean(|SHAP|) profiles and stability metrics 
(helpers: mean_abs_shap_profile, shap_stability_metrics)

- Compute SHAP robustness under noise and distribution shift

In [None]:
def to_2d_shap(shap_values):
    """
    Convert SHAP output to a 2D numpy array: (n_samples, n_features).

    Handles:
    - list output (older SHAP): [class0, class1]
    - 3D output (n_samples, n_features, n_classes)
    - already-2D output
    """
    if isinstance(shap_values, list):
        # binary classification: take class 1 (positive class)
        shap_values = shap_values[1]

    shap_values = np.asarray(shap_values)

    if shap_values.ndim == 3:
        # shape: (n_samples, n_features, n_classes) -> take positive class
        shap_values = shap_values[:, :, 1]

    if shap_values.ndim != 2:
        raise ValueError(f"Expected 2D SHAP array, got shape {shap_values.shape}")

    return shap_values


def mean_abs_shap_profile(shap_values_2d: np.ndarray, feature_cols) -> pd.Series:
    """Mean absolute SHAP value per feature (global importance profile)."""
    vals = np.abs(shap_values_2d).mean(axis=0)
    return pd.Series(vals, index=list(feature_cols))


In [None]:

noise_sigmas = [0.1, 0.5, 1.0]
rows = []

def shap_stability_metrics(clean_profile: pd.Series, pert_profile: pd.Series):
    clean_profile = clean_profile.sort_index()
    pert_profile  = pert_profile.sort_index()

    rho, _ = spearmanr(clean_profile.values, pert_profile.values)

    c = clean_profile / (clean_profile.sum() + 1e-12)
    p = pert_profile  / (pert_profile.sum()  + 1e-12)
    l1 = float(np.abs(c - p).sum())
    return float(rho), l1

for model_name, model in best_models.items():
    print(f"\n=== SHAP robustness for {model_name} ===")
    explainer = shap.TreeExplainer(model)

    shap_clean_2d = get_standard_shap_values(explainer, X_test_shap)
    clean_profile = mean_abs_shap_profile(shap_clean_2d, X_test_shap.columns)

    # Noise
    for sigma in noise_sigmas:
        X_noisy = add_gaussian_noise(X_test_shap, numeric_cols, sigma=sigma, random_state=RANDOM_STATE)
        shap_noisy_2d = get_standard_shap_values(explainer, X_noisy)
        noisy_profile = mean_abs_shap_profile(shap_noisy_2d, X_test_shap.columns)

        rho, l1 = shap_stability_metrics(clean_profile, noisy_profile)
        rows.append({
            "model": model_name,
            "condition": f"gauss_sigma_{sigma}",
            "spearman_rank_corr": rho,
            "l1_distance": l1,
        })

    # Shift
    X_shifted = apply_simple_shift(X_test_shap, numeric_cols)
    shap_shift_2d = get_standard_shap_values(explainer, X_shifted)
    shift_profile = mean_abs_shap_profile(shap_shift_2d, X_test_shap.columns)

    rho, l1 = shap_stability_metrics(clean_profile, shift_profile)
    rows.append({
        "model": model_name,
        "condition": "shifted",
        "spearman_rank_corr": rho,
        "l1_distance": l1,
    })

shap_robustness_df = pd.DataFrame(rows)
shap_robustness_df



=== SHAP robustness for RandomForest ===

=== SHAP robustness for GBDT ===

=== SHAP robustness for XGBoost ===


Unnamed: 0,model,condition,spearman_rank_corr,l1_distance
0,RandomForest,gauss_sigma_0.1,1.0,0.000969
1,RandomForest,gauss_sigma_0.5,1.0,0.010143
2,RandomForest,gauss_sigma_1.0,1.0,0.01893
3,RandomForest,shifted,0.994505,0.038278
4,GBDT,gauss_sigma_0.1,1.0,0.0
5,GBDT,gauss_sigma_0.5,0.994505,0.006119
6,GBDT,gauss_sigma_1.0,0.994505,0.013553
7,GBDT,shifted,0.978022,0.078162
8,XGBoost,gauss_sigma_0.1,0.989011,0.036117
9,XGBoost,gauss_sigma_0.5,0.989011,0.036389


- Save SHAP robustness results (CSV + figures + LaTeX)

In [None]:
import os
import matplotlib.pyplot as plt

os.makedirs("../results", exist_ok=True)
os.makedirs("../figures", exist_ok=True)

# Save table referenced in Results
out_csv = "../results/shap_robustness_summary.csv"
shap_robustness_df.to_csv(out_csv, index=False)
print(f"Saved: {out_csv}")

# Order conditions for nicer plots
condition_order = ["gauss_sigma_0.1", "gauss_sigma_0.5", "gauss_sigma_1.0", "shifted"]
shap_robustness_df["condition"] = pd.Categorical(
    shap_robustness_df["condition"],
    categories=condition_order,
    ordered=True
)

df_plot = shap_robustness_df.sort_values(["model", "condition"]).copy()

# Plot Spearman rank correlation
plt.figure()
for model in df_plot["model"].unique():
    sub = df_plot[df_plot["model"] == model].sort_values("condition")
    plt.plot(sub["condition"].astype(str), sub["spearman_rank_corr"], marker="o", label=model)

plt.ylabel("Spearman rank correlation")
plt.title("SHAP robustness: feature ranking stability")
plt.xticks(rotation=30, ha="right")
plt.legend()

out_fig = "../figures/shap_robustness_spearman.png"
plt.savefig(out_fig, dpi=300, bbox_inches="tight")
plt.close()
print(f"Saved: {out_fig}")

# Plot L1 distance
plt.figure()
for model in df_plot["model"].unique():
    sub = df_plot[df_plot["model"] == model].sort_values("condition")
    plt.plot(sub["condition"].astype(str), sub["l1_distance"], marker="o", label=model)

plt.ylabel("L1 distance (normalised profiles)")
plt.title("SHAP robustness: attribution magnitude change")
plt.xticks(rotation=30, ha="right")
plt.legend()

out_fig = "../figures/shap_robustness_l1.png"
plt.savefig(out_fig, dpi=300, bbox_inches="tight")
plt.close()
print(f"Saved: {out_fig}")


Saved: ../results/shap_robustness_summary.csv
Saved: ../figures/shap_robustness_spearman.png
Saved: ../figures/shap_robustness_l1.png


In [None]:
# Export SHAP robustness results (.csv -> .tex) for Overleaf

import os

os.makedirs("../results", exist_ok=True)

shap_fmt = shap_robustness_df.copy()

# Round numeric columns to 3 decimals
num_cols = ["spearman_rank_corr", "l1_distance"]
shap_fmt[num_cols] = shap_fmt[num_cols].round(3)

out_tex = "../results/shap_robustness_summary.tex"

shap_fmt.to_latex(
    out_tex,
    index=False,
    caption="SHAP robustness summary under Gaussian noise and a structured distribution shift.",
    label="tab:shap-robustness",
    float_format="%.3f",
    escape=True,   # keep safe for underscores, etc.
)

print(f"Saved: {out_tex}")


Saved: ../results/shap_robustness_summary.tex
