In [1]:
# explain_anomalies.py
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from scipy.stats import zscore

# --------------------------
# CONFIG
FEATURES_CSV = "features.csv"   # output from your feature_engineering step
OUTPUT_EXPLANATIONS = "explanations.csv"
RANDOM_STATE = 42
CONTAMINATION = 0.15   # keep consistent with your model earlier
TOP_K_FEATURES = 3
# --------------------------

def load_features(path):
    df = pd.read_csv(path, index_col=0)
    # assume 'is_attack' exists only for evaluation; keep it separately
    if "is_attack" in df.columns:
        y_true = df["is_attack"].astype(int)
        X = df.drop(columns=["is_attack"])
    else:
        y_true = None
        X = df
    return X, y_true

def train_model(X):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    model = IsolationForest(
        n_estimators=200,
        contamination=CONTAMINATION,
        random_state=RANDOM_STATE
    )
    model.fit(X_scaled)
    return model, scaler, X_scaled

def compute_scores(model, scaler, X):
    X_scaled = scaler.transform(X)
    # decision_function: larger => more normal, smaller => more anomalous
    scores = model.decision_function(X_scaled)
    preds = model.predict(X_scaled)  # -1 anomaly, 1 normal
    # convert to 1 (attack) / 0 (normal)
    y_pred = np.where(preds == -1, 1, 0)
    return scores, y_pred, X_scaled

def feature_ablation_contributions(model, scaler, X, X_scaled, feature_names):
    """
    For each feature and each user, set feature to population median and recompute score.
    Contribution = (masked_score - original_score).
    If contribution > 0, masking the feature made the point MORE normal => that feature was pushing it towards anomalous.
    """
    # original scores
    original_scores = model.decision_function(X_scaled)
    medians = X.median(axis=0).values
    contributions = np.zeros((X.shape[0], X.shape[1]))
    # iterate features
    for j, fname in enumerate(feature_names):
        X_masked = X.copy().values
        # set j-th column to median
        X_masked[:, j] = medians[j]
        X_masked_scaled = scaler.transform(X_masked)
        masked_scores = model.decision_function(X_masked_scaled)
        # contribution: masked - original (positive means feature contributed to anomaly)
        contributions[:, j] = masked_scores - original_scores
    return original_scores, contributions

def make_explanations(X, scores, y_pred, contributions, feature_names, y_true=None, top_k=3):
    rows = []
    # population z-scores for context (per feature)
    z = pd.DataFrame(zscore(X, nan_policy='omit'), columns=feature_names, index=X.index)
    for i, uid in enumerate(X.index):
        row = {}
        row["user"] = uid
        row["anomaly_score"] = float(scores[i])  # higher is more normal
        row["predicted_attack"] = int(y_pred[i])
        if y_true is not None:
            row["true_attack"] = int(y_true.loc[uid])
        # per-feature contributions for this user
        contribs = contributions[i, :]
        # make a sorted list of (feature, contribution, zscore)
        feats = []
        for j, fname in enumerate(feature_names):
            feats.append((fname, float(contribs[j]), float(z.loc[uid, fname])))
        feats_sorted = sorted(feats, key=lambda x: x[1], reverse=True)  # largest positive contribution first
        top_feats = feats_sorted[:top_k]
        # human readable explanation
        explanation_lines = []
        for fname, contrib, zscore_val in top_feats:
            # contribution sign meaning:
            # - positive: setting that feature to population median made user more normal => feature pushed towards anomalous
            # Build explanation
            expl = f"{fname}: contribution={contrib:.4f}, z_score={zscore_val:.2f}"
            # add human context
            if abs(zscore_val) >= 2.0:
                expl += " (strong deviation)"
            elif abs(zscore_val) >= 1.0:
                expl += " (moderate deviation)"
            explanation_lines.append(expl)
        row["top_features"] = "; ".join([f"{f[0]}({f[1]:.3f})" for f in top_feats])
        row["explanation"] = " | ".join(explanation_lines)
        rows.append(row)
    return pd.DataFrame(rows).set_index("user")

def main():
    # 1. load
    X, y_true = load_features(FEATURES_CSV)
    feature_names = list(X.columns)
    print("Loaded features:", feature_names)
    # 2. train an IsolationForest (reproducible)
    model, scaler, X_scaled = train_model(X)
    # 3. compute scores & predictions
    scores, y_pred, X_scaled = compute_scores(model, scaler, X)
    # 4. ablation contributions
    original_scores, contributions = feature_ablation_contributions(model, scaler, X, X_scaled, feature_names)
    # 5. explanations dataframe
    explanations = make_explanations(X, original_scores, y_pred, contributions, feature_names, y_true=y_true, top_k=TOP_K_FEATURES)
    # 6. combine for easy inspection
    combined = X.copy()
    combined["anomaly_score"] = explanations["anomaly_score"]
    combined["predicted_attack"] = explanations["predicted_attack"]
    if y_true is not None:
        combined["true_attack"] = explanations["true_attack"]
    combined["top_features"] = explanations["top_features"]
    combined["explanation"] = explanations["explanation"]
    # save
    combined.to_csv(OUTPUT_EXPLANATIONS)
    print("Wrote explanations to", OUTPUT_EXPLANATIONS)
    # pretty print summary
    pd.set_option('display.max_colwidth', 200)
    print("\n--- Analyst-friendly report (top lines) ---")
    print(combined.sort_values("anomaly_score").head(10))  # show most anomalous (lowest score)
    print("\nFor each user, explanations.csv contains top contributing features and z-scores.")
    print("Interpretation rule: positive contribution means that feature pushed the score towards anomaly for that user.")
    print("Z-score indicates how far the user's value is from population mean (|z| >= 2 is strong).")

if __name__ == "__main__":
    main()


Loaded features: ['avg_login_hour', 'std_login_hour', 'unique_countries', 'unique_devices', 'failed_login_rate', 'resource_entropy']
Wrote explanations to explanations.csv

--- Analyst-friendly report (top lines) ---
                   avg_login_hour  std_login_hour  unique_countries  \
user                                                                  
user1@example.com        7.023256        1.299970                 2   
user4@example.com       17.023256        2.815674                 1   
user3@example.com        6.952381        0.935802                 1   
user5@example.com       12.095238        0.957882                 1   
user8@example.com       16.952381        0.730933                 1   
user7@example.com       17.142857        0.751305                 1   
user2@example.com        8.261905        0.912235                 1   
user6@example.com        7.404762        0.989198                 1   

                   unique_devices  failed_login_rate  resource_entropy  

