# Proposed Approach implementation on the sampled data
## -----------------------------------------------------------------

## -----  MLP1L -----

### MLP1L - 32

In [20]:
import os
import warnings
import numpy as np
import torch
import joblib
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate

# ─────────────── PATHS ────────────────────────────────────────────────────
MODEL_PATH = r"Models and Data splits/model_MLP1L_32.pt"
DATA_PKL   = r"Models and Data splits/Sampled_AllModels_train.pkl"
SUR_DIR    = r"Models and Data splits"
OUT_DIR    = "adversarial_8bit_images/MLP1L_32_train"
os.makedirs(OUT_DIR, exist_ok=True)

# ─────────── HYPER-PARAMETERS (0-1 domain) ────────────────────────────────
EPSILON    = 0.1     # FGSM step size
MAX_ITERS  = 5       # FGSM iterations
BIN_STEPS  = 20      # binary-search iterations
MAX_L2     = 1500    # maximum allowed L2 magnitude in pixel space

# ───────────────────────────────────────────────────────────────────────────
def sigmoid(z):
    z = np.asarray(z, dtype=np.float32)
    pos = z >= 0
    out = np.empty_like(z)
    out[pos]  = 1.0 / (1.0 + np.exp(-z[pos]))
    ez        = np.exp(z[~pos])
    out[~pos] = ez / (1.0 + ez)
    return out

def softmax(lgt):
    lgt = np.asarray(lgt, dtype=np.float32)
    e = np.exp(lgt - lgt.max())
    return e / e.sum()

warnings.filterwarnings("ignore", category=RuntimeWarning,
                        message="overflow encountered")

# ─────────── DATA & MODEL ────────────────────────────────────────────────
data = joblib.load(DATA_PKL)
X, y, _ = data

if X.max() > 1.0:
    X = X.astype(np.float32) / 255.0
    warnings.warn("Data appeared in [0,255]; normalized to [0,1].")
else:
    warnings.warn("Data is already scaled to [0,1]; proceeding without change.")

model = torch.jit.load(MODEL_PATH, map_location="cpu").eval()

def to_model(x01: np.ndarray) -> torch.Tensor:
    flat = x01.reshape(-1) if x01.ndim == 2 else x01
    return torch.from_numpy(flat[None]).float()

# ─────────── surrogate cache ─────────────────────────────────────────────
sur_cache = {}
def load_sur(label):
    if label not in sur_cache:
        s = joblib.load(os.path.join(SUR_DIR, f"surrogate_digit_{label}.pkl"))
        sur_cache[label] = (s.coef_.astype(np.float32),
                            s.intercept_.astype(np.float32))
    return sur_cache[label]

def push_one_uint8(x_float01: np.ndarray, x_clean01: np.ndarray) -> np.ndarray:
    x_pix  = x_float01 * 255.0
    x_orig = x_clean01 * 255.0
    xi     = np.rint(x_pix).astype(np.int16)
    sign   = np.sign(x_pix - x_orig).astype(np.int16)
    changed = sign != 0
    xi[changed] += sign[changed]
    return np.clip(xi, 0, 255).astype(np.uint8).reshape(28, 28)

# ─────────── stats collectors ─────────────────────────────────────────────
total_trials = 0
succ_total   = 0
misclassified = 0
records = []

# ───────────────────────── TARGETED ATTACK LOOP ──────────────────────────
for source_digit in range(10):
    idxs = np.where(y == source_digit)[0][:100]

    for rank, idx in enumerate(idxs, 1):
        x0 = X[idx].copy()
        y0 = int(y[idx])

        pred0 = model(to_model(x0)).argmax().item()
        if pred0 != y0:
            misclassified += 1
            continue

        total_trials += 1

        for target_digit in range(10):
            if target_digit == y0:
                continue

            query_count = [0]
            def model_query(x_tensor: torch.Tensor):
                query_count[0] += 1
                return model(x_tensor)

            W_src, b_src = load_sur(y0)
            W_tgt, b_tgt = load_sur(target_digit)

            x = x0.copy()
            success = False

            for _ in range(MAX_ITERS):
                flat = x.reshape(-1)

                if W_src.shape[0] == 1:
                    p_src = sigmoid(W_src[0] @ flat + b_src[0])
                    grad_src = W_src[0] * (p_src - 1)
                else:
                    p_src = softmax(W_src @ flat + b_src)
                    oh_src = np.zeros_like(p_src); oh_src[y0] = 1
                    grad_src = W_src.T @ (p_src - oh_src)

                if W_tgt.shape[0] == 1:
                    p_tgt = sigmoid(W_tgt[0] @ flat + b_tgt[0])
                    grad_tgt = W_tgt[0] * p_tgt
                else:
                    p_tgt = softmax(W_tgt @ flat + b_tgt)
                    oh_tgt = np.zeros_like(p_tgt); oh_tgt[target_digit] = 1
                    grad_tgt = W_tgt.T @ (p_tgt - oh_tgt)

                grad = grad_tgt - grad_src

                x = np.clip(x + EPSILON * np.sign(grad.reshape(x.shape)), 0.0, 1.0)
                if model_query(to_model(x)).argmax().item() == target_digit:
                    success = True
                    break

            if not success:
                continue

            d, lo, hi, best = x - x0, 0.0, 1.0, 1.0
            for _ in range(BIN_STEPS):
                mid = (lo + hi) / 2
                xm  = np.clip(x0 + mid * d, 0.0, 1.0)
                if model_query(to_model(xm)).argmax().item() == target_digit:
                    best, hi = mid, mid
                else:
                    lo = mid
            x_best = np.clip(x0 + best * d, 0.0, 1.0)

            delta = (x_best - x0).reshape(-1) * 255.0
            l2_raw = np.linalg.norm(delta)
            if l2_raw > MAX_L2:
                scale = MAX_L2 / l2_raw
                x_best = x0 + (x_best - x0) * scale

            x_uint8 = push_one_uint8(x_best.reshape(28,28), x0.reshape(28,28))

            if model_query(to_model(x_uint8 / 255.0)).argmax().item() != target_digit:
                continue

            y_adv = int(model_query(to_model(x_uint8 / 255.0)).argmax().item())
            l2_final = np.linalg.norm(
                x_uint8.astype(np.float32) - (x0 * 255.0).reshape(28,28)
            )

            succ_total += 1
            fname = f"true{y0}_adv{y_adv}_mag{l2_final:.1f}_sample{rank}.png"
            Image.fromarray(x_uint8, mode="L") \
                 .save(os.path.join(OUT_DIR, fname))

            records.append({
                'sample_idx': idx,
                'true_label': y0,
                'target_label': target_digit,
                'adv_label': y_adv,
                'success': True,
                'queries': query_count[0],
                'l2_mag': l2_final
            })

# ─────────── build DataFrame & save CSV ──────────────────────────────────
df = pd.DataFrame(records)
csv_path = os.path.join(OUT_DIR, "targeted_attack_stats.csv")
df.to_csv(csv_path, index=False)

# ─────────── enhanced summary matrix ──────────────────────────────────────
pivot_data = df.groupby(['true_label', 'target_label']).agg(
    success_count=('success', 'sum'),
    mean_l2=('l2_mag', 'mean'),
    mean_queries=('queries', 'mean')
).reset_index()

pivot_data['cell'] = pivot_data.apply(
    lambda row: f"{int(row.success_count)} / {row.mean_l2:.1f} / {row.mean_queries:.1f}", axis=1)

matrix = pivot_data.pivot(index="true_label", columns="target_label", values="cell").fillna("-")

print("\n===== Targeted Attack Summary Matrix =====")
print(matrix.to_string())

count_matrix = pivot_data.pivot(index="true_label", columns="target_label", values="success_count").fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(count_matrix, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Success Count'})
plt.title("Targeted Attack Success Count")
plt.xlabel("Target Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "summary_success_heatmap.png"))
plt.close()

print(f"\nTotal successful attacks: {succ_total} / {total_trials * 9}")
print(f"Stats saved to CSV: {csv_path}")

# ─────────── Three Summary Tables ────────────────────────────────────────
success_counts = df.groupby(['true_label', 'target_label'])['success'].sum().unstack().fillna(0).astype(int)
print("\n===== Success Counts Table =====")
print(tabulate(success_counts, headers='keys', tablefmt='fancy_grid'))

query_stats = df.groupby(['true_label', 'target_label'])['queries'].agg(['min', 'max', 'mean']).unstack().round(1)
query_min = query_stats['min'].fillna('-')
query_max = query_stats['max'].fillna('-')
query_mean = query_stats['mean'].fillna('-')

print("\n===== Query Stats Table =====")
print("Min Queries:")
print(tabulate(query_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Queries:")
print(tabulate(query_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Queries:")
print(tabulate(query_mean, headers='keys', tablefmt='fancy_grid'))

# Round and format L2 magnitude stats to 2 decimal places
mag_stats = df.groupby(['true_label', 'target_label'])['l2_mag'].agg(['min', 'max', 'mean']).unstack()

def format_float_table(table):
    return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')

mag_min = format_float_table(mag_stats['min'])
mag_max = format_float_table(mag_stats['max'])
mag_mean = format_float_table(mag_stats['mean'])

print("\n===== L2 Magnitude Stats Table =====")
print("Min Magnitude:")
print(tabulate(mag_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Magnitude:")
print(tabulate(mag_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Magnitude:")
print(tabulate(mag_mean, headers='keys', tablefmt='fancy_grid'))






===== Targeted Attack Summary Matrix =====
target_label                   0                   1                   2                   3                   4                   5                   6                   7                   8                   9
true_label                                                                                                                                                                                                          
0                              -  66 / 1169.1 / 26.4   99 / 832.8 / 24.4  92 / 1056.1 / 24.9  79 / 1139.9 / 25.8  100 / 780.1 / 24.1  100 / 572.1 / 24.1  100 / 739.2 / 24.5  55 / 1207.0 / 25.5  100 / 795.9 / 24.6
1              98 / 966.5 / 25.9                   -  100 / 457.7 / 23.4  100 / 474.1 / 23.5   98 / 820.1 / 24.8  100 / 644.5 / 24.1  100 / 372.5 / 23.6  100 / 322.2 / 23.4  100 / 539.1 / 23.8  100 / 405.9 / 23.6
2             78 / 1121.8 / 25.5   3 / 1084.7 / 26.3                   -   76 / 930.0 / 24.6  48 / 1159.

  return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')


### MLP1L - 64

In [48]:
import os
import warnings
import numpy as np
import torch
import joblib
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate

# ─────────────── PATHS ────────────────────────────────────────────────────
MODEL_PATH = r"Models and Data splits/model_MLP1L_64.pt"
DATA_PKL   = r"Models and Data splits/Sampled_AllModels_train.pkl"
SUR_DIR    = r"Models and Data splits"
OUT_DIR    = "adversarial_8bit_images/MLP1L_64_train"
os.makedirs(OUT_DIR, exist_ok=True)

# ─────────── HYPER-PARAMETERS (0-1 domain) ────────────────────────────────
EPSILON    = 0.1     # FGSM step size
MAX_ITERS  = 5       # FGSM iterations
BIN_STEPS  = 20      # binary-search iterations
MAX_L2     = 1500    # maximum allowed L2 magnitude in pixel space

# ───────────────────────────────────────────────────────────────────────────
def sigmoid(z):
    z = np.asarray(z, dtype=np.float32)
    pos = z >= 0
    out = np.empty_like(z)
    out[pos]  = 1.0 / (1.0 + np.exp(-z[pos]))
    ez        = np.exp(z[~pos])
    out[~pos] = ez / (1.0 + ez)
    return out

def softmax(lgt):
    lgt = np.asarray(lgt, dtype=np.float32)
    e = np.exp(lgt - lgt.max())
    return e / e.sum()

warnings.filterwarnings("ignore", category=RuntimeWarning,
                        message="overflow encountered")

# ─────────── DATA & MODEL ────────────────────────────────────────────────
data = joblib.load(DATA_PKL)
X, y, _ = data

if X.max() > 1.0:
    X = X.astype(np.float32) / 255.0
    warnings.warn("Data appeared in [0,255]; normalized to [0,1].")
else:
    warnings.warn("Data is already scaled to [0,1]; proceeding without change.")

model = torch.jit.load(MODEL_PATH, map_location="cpu").eval()

def to_model(x01: np.ndarray) -> torch.Tensor:
    flat = x01.reshape(-1) if x01.ndim == 2 else x01
    return torch.from_numpy(flat[None]).float()

# ─────────── surrogate cache ─────────────────────────────────────────────
sur_cache = {}
def load_sur(label):
    if label not in sur_cache:
        s = joblib.load(os.path.join(SUR_DIR, f"surrogate_digit_{label}.pkl"))
        sur_cache[label] = (s.coef_.astype(np.float32),
                            s.intercept_.astype(np.float32))
    return sur_cache[label]

def push_one_uint8(x_float01: np.ndarray, x_clean01: np.ndarray) -> np.ndarray:
    x_pix  = x_float01 * 255.0
    x_orig = x_clean01 * 255.0
    xi     = np.rint(x_pix).astype(np.int16)
    sign   = np.sign(x_pix - x_orig).astype(np.int16)
    changed = sign != 0
    xi[changed] += sign[changed]
    return np.clip(xi, 0, 255).astype(np.uint8).reshape(28, 28)

# ─────────── stats collectors ─────────────────────────────────────────────
total_trials = 0
succ_total   = 0
misclassified = 0
records = []

# ───────────────────────── TARGETED ATTACK LOOP ──────────────────────────
for source_digit in range(10):
    idxs = np.where(y == source_digit)[0][:100]

    for rank, idx in enumerate(idxs, 1):
        x0 = X[idx].copy()
        y0 = int(y[idx])

        pred0 = model(to_model(x0)).argmax().item()
        if pred0 != y0:
            misclassified += 1
            continue

        total_trials += 1

        for target_digit in range(10):
            if target_digit == y0:
                continue

            query_count = [0]
            def model_query(x_tensor: torch.Tensor):
                query_count[0] += 1
                return model(x_tensor)

            W_src, b_src = load_sur(y0)
            W_tgt, b_tgt = load_sur(target_digit)

            x = x0.copy()
            success = False

            for _ in range(MAX_ITERS):
                flat = x.reshape(-1)

                if W_src.shape[0] == 1:
                    p_src = sigmoid(W_src[0] @ flat + b_src[0])
                    grad_src = W_src[0] * (p_src - 1)
                else:
                    p_src = softmax(W_src @ flat + b_src)
                    oh_src = np.zeros_like(p_src); oh_src[y0] = 1
                    grad_src = W_src.T @ (p_src - oh_src)

                if W_tgt.shape[0] == 1:
                    p_tgt = sigmoid(W_tgt[0] @ flat + b_tgt[0])
                    grad_tgt = W_tgt[0] * p_tgt
                else:
                    p_tgt = softmax(W_tgt @ flat + b_tgt)
                    oh_tgt = np.zeros_like(p_tgt); oh_tgt[target_digit] = 1
                    grad_tgt = W_tgt.T @ (p_tgt - oh_tgt)

                grad = grad_tgt - grad_src

                x = np.clip(x + EPSILON * np.sign(grad.reshape(x.shape)), 0.0, 1.0)
                if model_query(to_model(x)).argmax().item() == target_digit:
                    success = True
                    break

            if not success:
                continue

            d, lo, hi, best = x - x0, 0.0, 1.0, 1.0
            for _ in range(BIN_STEPS):
                mid = (lo + hi) / 2
                xm  = np.clip(x0 + mid * d, 0.0, 1.0)
                if model_query(to_model(xm)).argmax().item() == target_digit:
                    best, hi = mid, mid
                else:
                    lo = mid
            x_best = np.clip(x0 + best * d, 0.0, 1.0)

            delta = (x_best - x0).reshape(-1) * 255.0
            l2_raw = np.linalg.norm(delta)
            if l2_raw > MAX_L2:
                scale = MAX_L2 / l2_raw
                x_best = x0 + (x_best - x0) * scale

            x_uint8 = push_one_uint8(x_best.reshape(28,28), x0.reshape(28,28))

            if model_query(to_model(x_uint8 / 255.0)).argmax().item() != target_digit:
                continue

            y_adv = int(model_query(to_model(x_uint8 / 255.0)).argmax().item())
            l2_final = np.linalg.norm(
                x_uint8.astype(np.float32) - (x0 * 255.0).reshape(28,28)
            )

            succ_total += 1
            fname = f"true{y0}_adv{y_adv}_mag{l2_final:.1f}_sample{rank}.png"
            Image.fromarray(x_uint8, mode="L") \
                 .save(os.path.join(OUT_DIR, fname))

            records.append({
                'sample_idx': idx,
                'true_label': y0,
                'target_label': target_digit,
                'adv_label': y_adv,
                'success': True,
                'queries': query_count[0],
                'l2_mag': l2_final
            })

# ─────────── build DataFrame & save CSV ──────────────────────────────────
df = pd.DataFrame(records)
csv_path = os.path.join(OUT_DIR, "targeted_attack_stats.csv")
df.to_csv(csv_path, index=False)

# ─────────── enhanced summary matrix ──────────────────────────────────────
pivot_data = df.groupby(['true_label', 'target_label']).agg(
    success_count=('success', 'sum'),
    mean_l2=('l2_mag', 'mean'),
    mean_queries=('queries', 'mean')
).reset_index()

pivot_data['cell'] = pivot_data.apply(
    lambda row: f"{int(row.success_count)} / {row.mean_l2:.1f} / {row.mean_queries:.1f}", axis=1)

matrix = pivot_data.pivot(index="true_label", columns="target_label", values="cell").fillna("-")

print("\n===== Targeted Attack Summary Matrix =====")
print(matrix.to_string())

count_matrix = pivot_data.pivot(index="true_label", columns="target_label", values="success_count").fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(count_matrix, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Success Count'})
plt.title("Targeted Attack Success Count")
plt.xlabel("Target Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "summary_success_heatmap.png"))
plt.close()

print(f"\nTotal successful attacks: {succ_total} / {total_trials * 9}")
print(f"Stats saved to CSV: {csv_path}")

# ─────────── Three Summary Tables ────────────────────────────────────────
success_counts = df.groupby(['true_label', 'target_label'])['success'].sum().unstack().fillna(0).astype(int)
print("\n===== Success Counts Table =====")
print(tabulate(success_counts, headers='keys', tablefmt='fancy_grid'))

query_stats = df.groupby(['true_label', 'target_label'])['queries'].agg(['min', 'max', 'mean']).unstack().round(1)
query_min = query_stats['min'].fillna('-')
query_max = query_stats['max'].fillna('-')
query_mean = query_stats['mean'].fillna('-')

print("\n===== Query Stats Table =====")
print("Min Queries:")
print(tabulate(query_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Queries:")
print(tabulate(query_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Queries:")
print(tabulate(query_mean, headers='keys', tablefmt='fancy_grid'))

# Round and format L2 magnitude stats to 2 decimal places
mag_stats = df.groupby(['true_label', 'target_label'])['l2_mag'].agg(['min', 'max', 'mean']).unstack()

def format_float_table(table):
    return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')

mag_min = format_float_table(mag_stats['min'])
mag_max = format_float_table(mag_stats['max'])
mag_mean = format_float_table(mag_stats['mean'])

print("\n===== L2 Magnitude Stats Table =====")
print("Min Magnitude:")
print(tabulate(mag_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Magnitude:")
print(tabulate(mag_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Magnitude:")
print(tabulate(mag_mean, headers='keys', tablefmt='fancy_grid'))






===== Targeted Attack Summary Matrix =====
target_label                   0                   1                   2                   3                   4                   5                   6                   7                   8                   9
true_label                                                                                                                                                                                                          
0                              -  72 / 1254.4 / 26.6  77 / 1084.5 / 24.9  94 / 1121.6 / 25.1  85 / 1168.9 / 25.9  98 / 1048.1 / 24.9   98 / 942.0 / 24.9  100 / 795.8 / 24.6  20 / 1236.4 / 25.4   97 / 940.6 / 24.9
1             92 / 1181.7 / 26.4                   -  100 / 642.3 / 23.8  100 / 473.0 / 23.6   87 / 978.0 / 25.2  100 / 676.1 / 24.1  100 / 552.3 / 24.0  100 / 354.4 / 23.5  100 / 749.6 / 24.4  100 / 423.9 / 23.7
2             79 / 1239.4 / 25.8  59 / 1187.9 / 26.0                   -  100 / 754.6 / 24.3  86 / 1092.

  return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')


### MLP1L - 128

In [66]:
import os
import warnings
import numpy as np
import torch
import joblib
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate

# ─────────────── PATHS ────────────────────────────────────────────────────
MODEL_PATH = r"Models and Data splits/model_MLP1L_128.pt"
DATA_PKL   = r"Models and Data splits/Sampled_AllModels_train.pkl"
SUR_DIR    = r"Models and Data splits"
OUT_DIR    = "adversarial_8bit_images/MLP1L_128_train"
os.makedirs(OUT_DIR, exist_ok=True)

# ─────────── HYPER-PARAMETERS (0-1 domain) ────────────────────────────────
EPSILON    = 0.1     # FGSM step size
MAX_ITERS  = 5       # FGSM iterations
BIN_STEPS  = 20      # binary-search iterations
MAX_L2     = 1500    # maximum allowed L2 magnitude in pixel space

# ───────────────────────────────────────────────────────────────────────────
def sigmoid(z):
    z = np.asarray(z, dtype=np.float32)
    pos = z >= 0
    out = np.empty_like(z)
    out[pos]  = 1.0 / (1.0 + np.exp(-z[pos]))
    ez        = np.exp(z[~pos])
    out[~pos] = ez / (1.0 + ez)
    return out

def softmax(lgt):
    lgt = np.asarray(lgt, dtype=np.float32)
    e = np.exp(lgt - lgt.max())
    return e / e.sum()

warnings.filterwarnings("ignore", category=RuntimeWarning,
                        message="overflow encountered")

# ─────────── DATA & MODEL ────────────────────────────────────────────────
data = joblib.load(DATA_PKL)
X, y, _ = data

if X.max() > 1.0:
    X = X.astype(np.float32) / 255.0
    warnings.warn("Data appeared in [0,255]; normalized to [0,1].")
else:
    warnings.warn("Data is already scaled to [0,1]; proceeding without change.")

model = torch.jit.load(MODEL_PATH, map_location="cpu").eval()

def to_model(x01: np.ndarray) -> torch.Tensor:
    flat = x01.reshape(-1) if x01.ndim == 2 else x01
    return torch.from_numpy(flat[None]).float()

# ─────────── surrogate cache ─────────────────────────────────────────────
sur_cache = {}
def load_sur(label):
    if label not in sur_cache:
        s = joblib.load(os.path.join(SUR_DIR, f"surrogate_digit_{label}.pkl"))
        sur_cache[label] = (s.coef_.astype(np.float32),
                            s.intercept_.astype(np.float32))
    return sur_cache[label]

def push_one_uint8(x_float01: np.ndarray, x_clean01: np.ndarray) -> np.ndarray:
    x_pix  = x_float01 * 255.0
    x_orig = x_clean01 * 255.0
    xi     = np.rint(x_pix).astype(np.int16)
    sign   = np.sign(x_pix - x_orig).astype(np.int16)
    changed = sign != 0
    xi[changed] += sign[changed]
    return np.clip(xi, 0, 255).astype(np.uint8).reshape(28, 28)

# ─────────── stats collectors ─────────────────────────────────────────────
total_trials = 0
succ_total   = 0
misclassified = 0
records = []

# ───────────────────────── TARGETED ATTACK LOOP ──────────────────────────
for source_digit in range(10):
    idxs = np.where(y == source_digit)[0][:100]

    for rank, idx in enumerate(idxs, 1):
        x0 = X[idx].copy()
        y0 = int(y[idx])

        pred0 = model(to_model(x0)).argmax().item()
        if pred0 != y0:
            misclassified += 1
            continue

        total_trials += 1

        for target_digit in range(10):
            if target_digit == y0:
                continue

            query_count = [0]
            def model_query(x_tensor: torch.Tensor):
                query_count[0] += 1
                return model(x_tensor)

            W_src, b_src = load_sur(y0)
            W_tgt, b_tgt = load_sur(target_digit)

            x = x0.copy()
            success = False

            for _ in range(MAX_ITERS):
                flat = x.reshape(-1)

                if W_src.shape[0] == 1:
                    p_src = sigmoid(W_src[0] @ flat + b_src[0])
                    grad_src = W_src[0] * (p_src - 1)
                else:
                    p_src = softmax(W_src @ flat + b_src)
                    oh_src = np.zeros_like(p_src); oh_src[y0] = 1
                    grad_src = W_src.T @ (p_src - oh_src)

                if W_tgt.shape[0] == 1:
                    p_tgt = sigmoid(W_tgt[0] @ flat + b_tgt[0])
                    grad_tgt = W_tgt[0] * p_tgt
                else:
                    p_tgt = softmax(W_tgt @ flat + b_tgt)
                    oh_tgt = np.zeros_like(p_tgt); oh_tgt[target_digit] = 1
                    grad_tgt = W_tgt.T @ (p_tgt - oh_tgt)

                grad = grad_tgt - grad_src

                x = np.clip(x + EPSILON * np.sign(grad.reshape(x.shape)), 0.0, 1.0)
                if model_query(to_model(x)).argmax().item() == target_digit:
                    success = True
                    break

            if not success:
                continue

            d, lo, hi, best = x - x0, 0.0, 1.0, 1.0
            for _ in range(BIN_STEPS):
                mid = (lo + hi) / 2
                xm  = np.clip(x0 + mid * d, 0.0, 1.0)
                if model_query(to_model(xm)).argmax().item() == target_digit:
                    best, hi = mid, mid
                else:
                    lo = mid
            x_best = np.clip(x0 + best * d, 0.0, 1.0)

            delta = (x_best - x0).reshape(-1) * 255.0
            l2_raw = np.linalg.norm(delta)
            if l2_raw > MAX_L2:
                scale = MAX_L2 / l2_raw
                x_best = x0 + (x_best - x0) * scale

            x_uint8 = push_one_uint8(x_best.reshape(28,28), x0.reshape(28,28))

            if model_query(to_model(x_uint8 / 255.0)).argmax().item() != target_digit:
                continue

            y_adv = int(model_query(to_model(x_uint8 / 255.0)).argmax().item())
            l2_final = np.linalg.norm(
                x_uint8.astype(np.float32) - (x0 * 255.0).reshape(28,28)
            )

            succ_total += 1
            fname = f"true{y0}_adv{y_adv}_mag{l2_final:.1f}_sample{rank}.png"
            Image.fromarray(x_uint8, mode="L") \
                 .save(os.path.join(OUT_DIR, fname))

            records.append({
                'sample_idx': idx,
                'true_label': y0,
                'target_label': target_digit,
                'adv_label': y_adv,
                'success': True,
                'queries': query_count[0],
                'l2_mag': l2_final
            })

# ─────────── build DataFrame & save CSV ──────────────────────────────────
df = pd.DataFrame(records)
csv_path = os.path.join(OUT_DIR, "targeted_attack_stats.csv")
df.to_csv(csv_path, index=False)

# ─────────── enhanced summary matrix ──────────────────────────────────────
pivot_data = df.groupby(['true_label', 'target_label']).agg(
    success_count=('success', 'sum'),
    mean_l2=('l2_mag', 'mean'),
    mean_queries=('queries', 'mean')
).reset_index()

pivot_data['cell'] = pivot_data.apply(
    lambda row: f"{int(row.success_count)} / {row.mean_l2:.1f} / {row.mean_queries:.1f}", axis=1)

matrix = pivot_data.pivot(index="true_label", columns="target_label", values="cell").fillna("-")

print("\n===== Targeted Attack Summary Matrix =====")
print(matrix.to_string())

count_matrix = pivot_data.pivot(index="true_label", columns="target_label", values="success_count").fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(count_matrix, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Success Count'})
plt.title("Targeted Attack Success Count")
plt.xlabel("Target Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "summary_success_heatmap.png"))
plt.close()

print(f"\nTotal successful attacks: {succ_total} / {total_trials * 9}")
print(f"Stats saved to CSV: {csv_path}")

# ─────────── Three Summary Tables ────────────────────────────────────────
success_counts = df.groupby(['true_label', 'target_label'])['success'].sum().unstack().fillna(0).astype(int)
print("\n===== Success Counts Table =====")
print(tabulate(success_counts, headers='keys', tablefmt='fancy_grid'))

query_stats = df.groupby(['true_label', 'target_label'])['queries'].agg(['min', 'max', 'mean']).unstack().round(1)
query_min = query_stats['min'].fillna('-')
query_max = query_stats['max'].fillna('-')
query_mean = query_stats['mean'].fillna('-')

print("\n===== Query Stats Table =====")
print("Min Queries:")
print(tabulate(query_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Queries:")
print(tabulate(query_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Queries:")
print(tabulate(query_mean, headers='keys', tablefmt='fancy_grid'))

# Round and format L2 magnitude stats to 2 decimal places
mag_stats = df.groupby(['true_label', 'target_label'])['l2_mag'].agg(['min', 'max', 'mean']).unstack()

def format_float_table(table):
    return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')

mag_min = format_float_table(mag_stats['min'])
mag_max = format_float_table(mag_stats['max'])
mag_mean = format_float_table(mag_stats['mean'])

print("\n===== L2 Magnitude Stats Table =====")
print("Min Magnitude:")
print(tabulate(mag_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Magnitude:")
print(tabulate(mag_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Magnitude:")
print(tabulate(mag_mean, headers='keys', tablefmt='fancy_grid'))






===== Targeted Attack Summary Matrix =====
target_label                   0                   1                   2                   3                   4                   5                   6                   7                   8                   9
true_label                                                                                                                                                                                                          
0                              -  47 / 1306.9 / 26.7  95 / 1010.0 / 24.7   7 / 1388.4 / 25.6  52 / 1267.4 / 26.2  75 / 1193.3 / 25.1  97 / 1006.2 / 25.1  85 / 1110.6 / 25.3   7 / 1334.9 / 25.7  96 / 1066.1 / 25.2
1             87 / 1246.0 / 26.5                   -  100 / 574.8 / 23.9  100 / 593.5 / 23.9   65 / 940.5 / 25.1  91 / 1024.5 / 25.0  100 / 621.0 / 24.1  100 / 448.6 / 23.6   97 / 846.7 / 24.6  100 / 608.2 / 24.1
2             71 / 1247.8 / 25.8  11 / 1271.8 / 26.1                   -  54 / 1081.5 / 24.8  18 / 1366.

  return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')


### MLP1L - 256

In [85]:
import os
import warnings
import numpy as np
import torch
import joblib
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate

# ─────────────── PATHS ────────────────────────────────────────────────────
MODEL_PATH = r"Models and Data splits/model_MLP1L_256.pt"
DATA_PKL   = r"Models and Data splits/Sampled_AllModels_train.pkl"
SUR_DIR    = r"Models and Data splits"
OUT_DIR    = "adversarial_8bit_images/MLP1L_256_train"
os.makedirs(OUT_DIR, exist_ok=True)

# ─────────── HYPER-PARAMETERS (0-1 domain) ────────────────────────────────
EPSILON    = 0.1     # FGSM step size
MAX_ITERS  = 5       # FGSM iterations
BIN_STEPS  = 20      # binary-search iterations
MAX_L2     = 1500    # maximum allowed L2 magnitude in pixel space

# ───────────────────────────────────────────────────────────────────────────
def sigmoid(z):
    z = np.asarray(z, dtype=np.float32)
    pos = z >= 0
    out = np.empty_like(z)
    out[pos]  = 1.0 / (1.0 + np.exp(-z[pos]))
    ez        = np.exp(z[~pos])
    out[~pos] = ez / (1.0 + ez)
    return out

def softmax(lgt):
    lgt = np.asarray(lgt, dtype=np.float32)
    e = np.exp(lgt - lgt.max())
    return e / e.sum()

warnings.filterwarnings("ignore", category=RuntimeWarning,
                        message="overflow encountered")

# ─────────── DATA & MODEL ────────────────────────────────────────────────
data = joblib.load(DATA_PKL)
X, y, _ = data

if X.max() > 1.0:
    X = X.astype(np.float32) / 255.0
    warnings.warn("Data appeared in [0,255]; normalized to [0,1].")
else:
    warnings.warn("Data is already scaled to [0,1]; proceeding without change.")

model = torch.jit.load(MODEL_PATH, map_location="cpu").eval()

def to_model(x01: np.ndarray) -> torch.Tensor:
    flat = x01.reshape(-1) if x01.ndim == 2 else x01
    return torch.from_numpy(flat[None]).float()

# ─────────── surrogate cache ─────────────────────────────────────────────
sur_cache = {}
def load_sur(label):
    if label not in sur_cache:
        s = joblib.load(os.path.join(SUR_DIR, f"surrogate_digit_{label}.pkl"))
        sur_cache[label] = (s.coef_.astype(np.float32),
                            s.intercept_.astype(np.float32))
    return sur_cache[label]

def push_one_uint8(x_float01: np.ndarray, x_clean01: np.ndarray) -> np.ndarray:
    x_pix  = x_float01 * 255.0
    x_orig = x_clean01 * 255.0
    xi     = np.rint(x_pix).astype(np.int16)
    sign   = np.sign(x_pix - x_orig).astype(np.int16)
    changed = sign != 0
    xi[changed] += sign[changed]
    return np.clip(xi, 0, 255).astype(np.uint8).reshape(28, 28)

# ─────────── stats collectors ─────────────────────────────────────────────
total_trials = 0
succ_total   = 0
misclassified = 0
records = []

# ───────────────────────── TARGETED ATTACK LOOP ──────────────────────────
for source_digit in range(10):
    idxs = np.where(y == source_digit)[0][:100]

    for rank, idx in enumerate(idxs, 1):
        x0 = X[idx].copy()
        y0 = int(y[idx])

        pred0 = model(to_model(x0)).argmax().item()
        if pred0 != y0:
            misclassified += 1
            continue

        total_trials += 1

        for target_digit in range(10):
            if target_digit == y0:
                continue

            query_count = [0]
            def model_query(x_tensor: torch.Tensor):
                query_count[0] += 1
                return model(x_tensor)

            W_src, b_src = load_sur(y0)
            W_tgt, b_tgt = load_sur(target_digit)

            x = x0.copy()
            success = False

            for _ in range(MAX_ITERS):
                flat = x.reshape(-1)

                if W_src.shape[0] == 1:
                    p_src = sigmoid(W_src[0] @ flat + b_src[0])
                    grad_src = W_src[0] * (p_src - 1)
                else:
                    p_src = softmax(W_src @ flat + b_src)
                    oh_src = np.zeros_like(p_src); oh_src[y0] = 1
                    grad_src = W_src.T @ (p_src - oh_src)

                if W_tgt.shape[0] == 1:
                    p_tgt = sigmoid(W_tgt[0] @ flat + b_tgt[0])
                    grad_tgt = W_tgt[0] * p_tgt
                else:
                    p_tgt = softmax(W_tgt @ flat + b_tgt)
                    oh_tgt = np.zeros_like(p_tgt); oh_tgt[target_digit] = 1
                    grad_tgt = W_tgt.T @ (p_tgt - oh_tgt)

                grad = grad_tgt - grad_src

                x = np.clip(x + EPSILON * np.sign(grad.reshape(x.shape)), 0.0, 1.0)
                if model_query(to_model(x)).argmax().item() == target_digit:
                    success = True
                    break

            if not success:
                continue

            d, lo, hi, best = x - x0, 0.0, 1.0, 1.0
            for _ in range(BIN_STEPS):
                mid = (lo + hi) / 2
                xm  = np.clip(x0 + mid * d, 0.0, 1.0)
                if model_query(to_model(xm)).argmax().item() == target_digit:
                    best, hi = mid, mid
                else:
                    lo = mid
            x_best = np.clip(x0 + best * d, 0.0, 1.0)

            delta = (x_best - x0).reshape(-1) * 255.0
            l2_raw = np.linalg.norm(delta)
            if l2_raw > MAX_L2:
                scale = MAX_L2 / l2_raw
                x_best = x0 + (x_best - x0) * scale

            x_uint8 = push_one_uint8(x_best.reshape(28,28), x0.reshape(28,28))

            if model_query(to_model(x_uint8 / 255.0)).argmax().item() != target_digit:
                continue

            y_adv = int(model_query(to_model(x_uint8 / 255.0)).argmax().item())
            l2_final = np.linalg.norm(
                x_uint8.astype(np.float32) - (x0 * 255.0).reshape(28,28)
            )

            succ_total += 1
            fname = f"true{y0}_adv{y_adv}_mag{l2_final:.1f}_sample{rank}.png"
            Image.fromarray(x_uint8, mode="L") \
                 .save(os.path.join(OUT_DIR, fname))

            records.append({
                'sample_idx': idx,
                'true_label': y0,
                'target_label': target_digit,
                'adv_label': y_adv,
                'success': True,
                'queries': query_count[0],
                'l2_mag': l2_final
            })

# ─────────── build DataFrame & save CSV ──────────────────────────────────
df = pd.DataFrame(records)
csv_path = os.path.join(OUT_DIR, "targeted_attack_stats.csv")
df.to_csv(csv_path, index=False)

# ─────────── enhanced summary matrix ──────────────────────────────────────
pivot_data = df.groupby(['true_label', 'target_label']).agg(
    success_count=('success', 'sum'),
    mean_l2=('l2_mag', 'mean'),
    mean_queries=('queries', 'mean')
).reset_index()

pivot_data['cell'] = pivot_data.apply(
    lambda row: f"{int(row.success_count)} / {row.mean_l2:.1f} / {row.mean_queries:.1f}", axis=1)

matrix = pivot_data.pivot(index="true_label", columns="target_label", values="cell").fillna("-")

print("\n===== Targeted Attack Summary Matrix =====")
print(matrix.to_string())

count_matrix = pivot_data.pivot(index="true_label", columns="target_label", values="success_count").fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(count_matrix, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Success Count'})
plt.title("Targeted Attack Success Count")
plt.xlabel("Target Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "summary_success_heatmap.png"))
plt.close()

print(f"\nTotal successful attacks: {succ_total} / {total_trials * 9}")
print(f"Stats saved to CSV: {csv_path}")

# ─────────── Three Summary Tables ────────────────────────────────────────
success_counts = df.groupby(['true_label', 'target_label'])['success'].sum().unstack().fillna(0).astype(int)
print("\n===== Success Counts Table =====")
print(tabulate(success_counts, headers='keys', tablefmt='fancy_grid'))

query_stats = df.groupby(['true_label', 'target_label'])['queries'].agg(['min', 'max', 'mean']).unstack().round(1)
query_min = query_stats['min'].fillna('-')
query_max = query_stats['max'].fillna('-')
query_mean = query_stats['mean'].fillna('-')

print("\n===== Query Stats Table =====")
print("Min Queries:")
print(tabulate(query_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Queries:")
print(tabulate(query_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Queries:")
print(tabulate(query_mean, headers='keys', tablefmt='fancy_grid'))

# Round and format L2 magnitude stats to 2 decimal places
mag_stats = df.groupby(['true_label', 'target_label'])['l2_mag'].agg(['min', 'max', 'mean']).unstack()

def format_float_table(table):
    return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')

mag_min = format_float_table(mag_stats['min'])
mag_max = format_float_table(mag_stats['max'])
mag_mean = format_float_table(mag_stats['mean'])

print("\n===== L2 Magnitude Stats Table =====")
print("Min Magnitude:")
print(tabulate(mag_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Magnitude:")
print(tabulate(mag_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Magnitude:")
print(tabulate(mag_mean, headers='keys', tablefmt='fancy_grid'))






===== Targeted Attack Summary Matrix =====
target_label                   0                   1                   2                   3                   4                   5                   6                   7                   8                   9
true_label                                                                                                                                                                                                          
0                              -   1 / 1519.3 / 27.0  73 / 1022.9 / 24.8   8 / 1392.4 / 25.8  34 / 1262.8 / 26.1  50 / 1169.2 / 25.1  90 / 1077.8 / 25.2  92 / 1098.2 / 25.3   1 / 1486.2 / 26.0  77 / 1193.2 / 25.5
1             98 / 1159.8 / 26.2                   -  100 / 547.9 / 23.8  100 / 840.9 / 24.4  14 / 1211.3 / 25.9  100 / 866.2 / 24.6  100 / 813.1 / 24.6  100 / 558.9 / 23.9   32 / 996.1 / 25.2  100 / 611.8 / 24.1
2             29 / 1358.4 / 26.4                   -                   -  47 / 1079.5 / 24.9   2 / 1508.

  return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')


## -----  MLP2L -----

### MLP2L - 32

In [103]:
import os
import warnings
import numpy as np
import torch
import joblib
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate

# ─────────────── PATHS ────────────────────────────────────────────────────
MODEL_PATH = r"Models and Data splits/model_MLP2L_32.pt"
DATA_PKL   = r"Models and Data splits/Sampled_AllModels_train.pkl"
SUR_DIR    = r"Models and Data splits"
OUT_DIR    = "adversarial_8bit_images/MLP2L_32_train"
os.makedirs(OUT_DIR, exist_ok=True)

# ─────────── HYPER-PARAMETERS (0-1 domain) ────────────────────────────────
EPSILON    = 0.1     # FGSM step size
MAX_ITERS  = 5       # FGSM iterations
BIN_STEPS  = 20      # binary-search iterations
MAX_L2     = 1500    # maximum allowed L2 magnitude in pixel space

# ───────────────────────────────────────────────────────────────────────────
def sigmoid(z):
    z = np.asarray(z, dtype=np.float32)
    pos = z >= 0
    out = np.empty_like(z)
    out[pos]  = 1.0 / (1.0 + np.exp(-z[pos]))
    ez        = np.exp(z[~pos])
    out[~pos] = ez / (1.0 + ez)
    return out

def softmax(lgt):
    lgt = np.asarray(lgt, dtype=np.float32)
    e = np.exp(lgt - lgt.max())
    return e / e.sum()

warnings.filterwarnings("ignore", category=RuntimeWarning,
                        message="overflow encountered")

# ─────────── DATA & MODEL ────────────────────────────────────────────────
data = joblib.load(DATA_PKL)
X, y, _ = data

if X.max() > 1.0:
    X = X.astype(np.float32) / 255.0
    warnings.warn("Data appeared in [0,255]; normalized to [0,1].")
else:
    warnings.warn("Data is already scaled to [0,1]; proceeding without change.")

model = torch.jit.load(MODEL_PATH, map_location="cpu").eval()

def to_model(x01: np.ndarray) -> torch.Tensor:
    flat = x01.reshape(-1) if x01.ndim == 2 else x01
    return torch.from_numpy(flat[None]).float()

# ─────────── surrogate cache ─────────────────────────────────────────────
sur_cache = {}
def load_sur(label):
    if label not in sur_cache:
        s = joblib.load(os.path.join(SUR_DIR, f"surrogate_digit_{label}.pkl"))
        sur_cache[label] = (s.coef_.astype(np.float32),
                            s.intercept_.astype(np.float32))
    return sur_cache[label]

def push_one_uint8(x_float01: np.ndarray, x_clean01: np.ndarray) -> np.ndarray:
    x_pix  = x_float01 * 255.0
    x_orig = x_clean01 * 255.0
    xi     = np.rint(x_pix).astype(np.int16)
    sign   = np.sign(x_pix - x_orig).astype(np.int16)
    changed = sign != 0
    xi[changed] += sign[changed]
    return np.clip(xi, 0, 255).astype(np.uint8).reshape(28, 28)

# ─────────── stats collectors ─────────────────────────────────────────────
total_trials = 0
succ_total   = 0
misclassified = 0
records = []

# ───────────────────────── TARGETED ATTACK LOOP ──────────────────────────
for source_digit in range(10):
    idxs = np.where(y == source_digit)[0][:100]

    for rank, idx in enumerate(idxs, 1):
        x0 = X[idx].copy()
        y0 = int(y[idx])

        pred0 = model(to_model(x0)).argmax().item()
        if pred0 != y0:
            misclassified += 1
            continue

        total_trials += 1

        for target_digit in range(10):
            if target_digit == y0:
                continue

            query_count = [0]
            def model_query(x_tensor: torch.Tensor):
                query_count[0] += 1
                return model(x_tensor)

            W_src, b_src = load_sur(y0)
            W_tgt, b_tgt = load_sur(target_digit)

            x = x0.copy()
            success = False

            for _ in range(MAX_ITERS):
                flat = x.reshape(-1)

                if W_src.shape[0] == 1:
                    p_src = sigmoid(W_src[0] @ flat + b_src[0])
                    grad_src = W_src[0] * (p_src - 1)
                else:
                    p_src = softmax(W_src @ flat + b_src)
                    oh_src = np.zeros_like(p_src); oh_src[y0] = 1
                    grad_src = W_src.T @ (p_src - oh_src)

                if W_tgt.shape[0] == 1:
                    p_tgt = sigmoid(W_tgt[0] @ flat + b_tgt[0])
                    grad_tgt = W_tgt[0] * p_tgt
                else:
                    p_tgt = softmax(W_tgt @ flat + b_tgt)
                    oh_tgt = np.zeros_like(p_tgt); oh_tgt[target_digit] = 1
                    grad_tgt = W_tgt.T @ (p_tgt - oh_tgt)

                grad = grad_tgt - grad_src

                x = np.clip(x + EPSILON * np.sign(grad.reshape(x.shape)), 0.0, 1.0)
                if model_query(to_model(x)).argmax().item() == target_digit:
                    success = True
                    break

            if not success:
                continue

            d, lo, hi, best = x - x0, 0.0, 1.0, 1.0
            for _ in range(BIN_STEPS):
                mid = (lo + hi) / 2
                xm  = np.clip(x0 + mid * d, 0.0, 1.0)
                if model_query(to_model(xm)).argmax().item() == target_digit:
                    best, hi = mid, mid
                else:
                    lo = mid
            x_best = np.clip(x0 + best * d, 0.0, 1.0)

            delta = (x_best - x0).reshape(-1) * 255.0
            l2_raw = np.linalg.norm(delta)
            if l2_raw > MAX_L2:
                scale = MAX_L2 / l2_raw
                x_best = x0 + (x_best - x0) * scale

            x_uint8 = push_one_uint8(x_best.reshape(28,28), x0.reshape(28,28))

            if model_query(to_model(x_uint8 / 255.0)).argmax().item() != target_digit:
                continue

            y_adv = int(model_query(to_model(x_uint8 / 255.0)).argmax().item())
            l2_final = np.linalg.norm(
                x_uint8.astype(np.float32) - (x0 * 255.0).reshape(28,28)
            )

            succ_total += 1
            fname = f"true{y0}_adv{y_adv}_mag{l2_final:.1f}_sample{rank}.png"
            Image.fromarray(x_uint8, mode="L") \
                 .save(os.path.join(OUT_DIR, fname))

            records.append({
                'sample_idx': idx,
                'true_label': y0,
                'target_label': target_digit,
                'adv_label': y_adv,
                'success': True,
                'queries': query_count[0],
                'l2_mag': l2_final
            })

# ─────────── build DataFrame & save CSV ──────────────────────────────────
df = pd.DataFrame(records)
csv_path = os.path.join(OUT_DIR, "targeted_attack_stats.csv")
df.to_csv(csv_path, index=False)

# ─────────── enhanced summary matrix ──────────────────────────────────────
pivot_data = df.groupby(['true_label', 'target_label']).agg(
    success_count=('success', 'sum'),
    mean_l2=('l2_mag', 'mean'),
    mean_queries=('queries', 'mean')
).reset_index()

pivot_data['cell'] = pivot_data.apply(
    lambda row: f"{int(row.success_count)} / {row.mean_l2:.1f} / {row.mean_queries:.1f}", axis=1)

matrix = pivot_data.pivot(index="true_label", columns="target_label", values="cell").fillna("-")

print("\n===== Targeted Attack Summary Matrix =====")
print(matrix.to_string())

count_matrix = pivot_data.pivot(index="true_label", columns="target_label", values="success_count").fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(count_matrix, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Success Count'})
plt.title("Targeted Attack Success Count")
plt.xlabel("Target Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "summary_success_heatmap.png"))
plt.close()

print(f"\nTotal successful attacks: {succ_total} / {total_trials * 9}")
print(f"Stats saved to CSV: {csv_path}")

# ─────────── Three Summary Tables ────────────────────────────────────────
success_counts = df.groupby(['true_label', 'target_label'])['success'].sum().unstack().fillna(0).astype(int)
print("\n===== Success Counts Table =====")
print(tabulate(success_counts, headers='keys', tablefmt='fancy_grid'))

query_stats = df.groupby(['true_label', 'target_label'])['queries'].agg(['min', 'max', 'mean']).unstack().round(1)
query_min = query_stats['min'].fillna('-')
query_max = query_stats['max'].fillna('-')
query_mean = query_stats['mean'].fillna('-')

print("\n===== Query Stats Table =====")
print("Min Queries:")
print(tabulate(query_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Queries:")
print(tabulate(query_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Queries:")
print(tabulate(query_mean, headers='keys', tablefmt='fancy_grid'))

# Round and format L2 magnitude stats to 2 decimal places
mag_stats = df.groupby(['true_label', 'target_label'])['l2_mag'].agg(['min', 'max', 'mean']).unstack()

def format_float_table(table):
    return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')

mag_min = format_float_table(mag_stats['min'])
mag_max = format_float_table(mag_stats['max'])
mag_mean = format_float_table(mag_stats['mean'])

print("\n===== L2 Magnitude Stats Table =====")
print("Min Magnitude:")
print(tabulate(mag_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Magnitude:")
print(tabulate(mag_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Magnitude:")
print(tabulate(mag_mean, headers='keys', tablefmt='fancy_grid'))






===== Targeted Attack Summary Matrix =====
target_label                   0                   1                   2                    3                   4                   5                   6                   7                   8                   9
true_label                                                                                                                                                                                                           
0                              -   8 / 1349.4 / 26.6  61 / 1023.3 / 24.9    3 / 1168.7 / 25.3  26 / 1259.7 / 26.2  28 / 1160.6 / 25.1  89 / 1053.0 / 25.2  85 / 1108.4 / 25.3  30 / 1131.9 / 25.3  47 / 1183.3 / 25.5
1             21 / 1370.0 / 26.7                   -  100 / 781.3 / 24.2  100 / 1003.3 / 24.8   98 / 830.8 / 24.8   81 / 737.5 / 24.2  100 / 946.8 / 24.9  100 / 534.7 / 23.9   99 / 608.0 / 24.0  100 / 764.0 / 24.4
2             19 / 1411.2 / 26.1   1 / 1472.8 / 27.0                   -   29 / 1202.2 / 25.2  30 / 

  return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')


### MLP2L - 64

In [123]:
import os
import warnings
import numpy as np
import torch
import joblib
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate

# ─────────────── PATHS ────────────────────────────────────────────────────
MODEL_PATH = r"Models and Data splits/model_MLP2L_64.pt"
DATA_PKL   = r"Models and Data splits/Sampled_AllModels_train.pkl"
SUR_DIR    = r"Models and Data splits"
OUT_DIR    = "adversarial_8bit_images/MLP2L_64_train"
os.makedirs(OUT_DIR, exist_ok=True)

# ─────────── HYPER-PARAMETERS (0-1 domain) ────────────────────────────────
EPSILON    = 0.1     # FGSM step size
MAX_ITERS  = 5       # FGSM iterations
BIN_STEPS  = 20      # binary-search iterations
MAX_L2     = 1500    # maximum allowed L2 magnitude in pixel space

# ───────────────────────────────────────────────────────────────────────────
def sigmoid(z):
    z = np.asarray(z, dtype=np.float32)
    pos = z >= 0
    out = np.empty_like(z)
    out[pos]  = 1.0 / (1.0 + np.exp(-z[pos]))
    ez        = np.exp(z[~pos])
    out[~pos] = ez / (1.0 + ez)
    return out

def softmax(lgt):
    lgt = np.asarray(lgt, dtype=np.float32)
    e = np.exp(lgt - lgt.max())
    return e / e.sum()

warnings.filterwarnings("ignore", category=RuntimeWarning,
                        message="overflow encountered")

# ─────────── DATA & MODEL ────────────────────────────────────────────────
data = joblib.load(DATA_PKL)
X, y, _ = data

if X.max() > 1.0:
    X = X.astype(np.float32) / 255.0
    warnings.warn("Data appeared in [0,255]; normalized to [0,1].")
else:
    warnings.warn("Data is already scaled to [0,1]; proceeding without change.")

model = torch.jit.load(MODEL_PATH, map_location="cpu").eval()

def to_model(x01: np.ndarray) -> torch.Tensor:
    flat = x01.reshape(-1) if x01.ndim == 2 else x01
    return torch.from_numpy(flat[None]).float()

# ─────────── surrogate cache ─────────────────────────────────────────────
sur_cache = {}
def load_sur(label):
    if label not in sur_cache:
        s = joblib.load(os.path.join(SUR_DIR, f"surrogate_digit_{label}.pkl"))
        sur_cache[label] = (s.coef_.astype(np.float32),
                            s.intercept_.astype(np.float32))
    return sur_cache[label]

def push_one_uint8(x_float01: np.ndarray, x_clean01: np.ndarray) -> np.ndarray:
    x_pix  = x_float01 * 255.0
    x_orig = x_clean01 * 255.0
    xi     = np.rint(x_pix).astype(np.int16)
    sign   = np.sign(x_pix - x_orig).astype(np.int16)
    changed = sign != 0
    xi[changed] += sign[changed]
    return np.clip(xi, 0, 255).astype(np.uint8).reshape(28, 28)

# ─────────── stats collectors ─────────────────────────────────────────────
total_trials = 0
succ_total   = 0
misclassified = 0
records = []

# ───────────────────────── TARGETED ATTACK LOOP ──────────────────────────
for source_digit in range(10):
    idxs = np.where(y == source_digit)[0][:100]

    for rank, idx in enumerate(idxs, 1):
        x0 = X[idx].copy()
        y0 = int(y[idx])

        pred0 = model(to_model(x0)).argmax().item()
        if pred0 != y0:
            misclassified += 1
            continue

        total_trials += 1

        for target_digit in range(10):
            if target_digit == y0:
                continue

            query_count = [0]
            def model_query(x_tensor: torch.Tensor):
                query_count[0] += 1
                return model(x_tensor)

            W_src, b_src = load_sur(y0)
            W_tgt, b_tgt = load_sur(target_digit)

            x = x0.copy()
            success = False

            for _ in range(MAX_ITERS):
                flat = x.reshape(-1)

                if W_src.shape[0] == 1:
                    p_src = sigmoid(W_src[0] @ flat + b_src[0])
                    grad_src = W_src[0] * (p_src - 1)
                else:
                    p_src = softmax(W_src @ flat + b_src)
                    oh_src = np.zeros_like(p_src); oh_src[y0] = 1
                    grad_src = W_src.T @ (p_src - oh_src)

                if W_tgt.shape[0] == 1:
                    p_tgt = sigmoid(W_tgt[0] @ flat + b_tgt[0])
                    grad_tgt = W_tgt[0] * p_tgt
                else:
                    p_tgt = softmax(W_tgt @ flat + b_tgt)
                    oh_tgt = np.zeros_like(p_tgt); oh_tgt[target_digit] = 1
                    grad_tgt = W_tgt.T @ (p_tgt - oh_tgt)

                grad = grad_tgt - grad_src

                x = np.clip(x + EPSILON * np.sign(grad.reshape(x.shape)), 0.0, 1.0)
                if model_query(to_model(x)).argmax().item() == target_digit:
                    success = True
                    break

            if not success:
                continue

            d, lo, hi, best = x - x0, 0.0, 1.0, 1.0
            for _ in range(BIN_STEPS):
                mid = (lo + hi) / 2
                xm  = np.clip(x0 + mid * d, 0.0, 1.0)
                if model_query(to_model(xm)).argmax().item() == target_digit:
                    best, hi = mid, mid
                else:
                    lo = mid
            x_best = np.clip(x0 + best * d, 0.0, 1.0)

            delta = (x_best - x0).reshape(-1) * 255.0
            l2_raw = np.linalg.norm(delta)
            if l2_raw > MAX_L2:
                scale = MAX_L2 / l2_raw
                x_best = x0 + (x_best - x0) * scale

            x_uint8 = push_one_uint8(x_best.reshape(28,28), x0.reshape(28,28))

            if model_query(to_model(x_uint8 / 255.0)).argmax().item() != target_digit:
                continue

            y_adv = int(model_query(to_model(x_uint8 / 255.0)).argmax().item())
            l2_final = np.linalg.norm(
                x_uint8.astype(np.float32) - (x0 * 255.0).reshape(28,28)
            )

            succ_total += 1
            fname = f"true{y0}_adv{y_adv}_mag{l2_final:.1f}_sample{rank}.png"
            Image.fromarray(x_uint8, mode="L") \
                 .save(os.path.join(OUT_DIR, fname))

            records.append({
                'sample_idx': idx,
                'true_label': y0,
                'target_label': target_digit,
                'adv_label': y_adv,
                'success': True,
                'queries': query_count[0],
                'l2_mag': l2_final
            })

# ─────────── build DataFrame & save CSV ──────────────────────────────────
df = pd.DataFrame(records)
csv_path = os.path.join(OUT_DIR, "targeted_attack_stats.csv")
df.to_csv(csv_path, index=False)

# ─────────── enhanced summary matrix ──────────────────────────────────────
pivot_data = df.groupby(['true_label', 'target_label']).agg(
    success_count=('success', 'sum'),
    mean_l2=('l2_mag', 'mean'),
    mean_queries=('queries', 'mean')
).reset_index()

pivot_data['cell'] = pivot_data.apply(
    lambda row: f"{int(row.success_count)} / {row.mean_l2:.1f} / {row.mean_queries:.1f}", axis=1)

matrix = pivot_data.pivot(index="true_label", columns="target_label", values="cell").fillna("-")

print("\n===== Targeted Attack Summary Matrix =====")
print(matrix.to_string())

count_matrix = pivot_data.pivot(index="true_label", columns="target_label", values="success_count").fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(count_matrix, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Success Count'})
plt.title("Targeted Attack Success Count")
plt.xlabel("Target Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "summary_success_heatmap.png"))
plt.close()

print(f"\nTotal successful attacks: {succ_total} / {total_trials * 9}")
print(f"Stats saved to CSV: {csv_path}")

# ─────────── Three Summary Tables ────────────────────────────────────────
success_counts = df.groupby(['true_label', 'target_label'])['success'].sum().unstack().fillna(0).astype(int)
print("\n===== Success Counts Table =====")
print(tabulate(success_counts, headers='keys', tablefmt='fancy_grid'))

query_stats = df.groupby(['true_label', 'target_label'])['queries'].agg(['min', 'max', 'mean']).unstack().round(1)
query_min = query_stats['min'].fillna('-')
query_max = query_stats['max'].fillna('-')
query_mean = query_stats['mean'].fillna('-')

print("\n===== Query Stats Table =====")
print("Min Queries:")
print(tabulate(query_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Queries:")
print(tabulate(query_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Queries:")
print(tabulate(query_mean, headers='keys', tablefmt='fancy_grid'))

# Round and format L2 magnitude stats to 2 decimal places
mag_stats = df.groupby(['true_label', 'target_label'])['l2_mag'].agg(['min', 'max', 'mean']).unstack()

def format_float_table(table):
    return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')

mag_min = format_float_table(mag_stats['min'])
mag_max = format_float_table(mag_stats['max'])
mag_mean = format_float_table(mag_stats['mean'])

print("\n===== L2 Magnitude Stats Table =====")
print("Min Magnitude:")
print(tabulate(mag_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Magnitude:")
print(tabulate(mag_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Magnitude:")
print(tabulate(mag_mean, headers='keys', tablefmt='fancy_grid'))






===== Targeted Attack Summary Matrix =====
target_label                   0                   1                   2                   3                   4                   5                    6                   7                   8                   9
true_label                                                                                                                                                                                                           
0                              -  11 / 1370.7 / 26.6  44 / 1222.0 / 25.3   3 / 1384.0 / 25.7  30 / 1279.9 / 26.2  17 / 1210.8 / 25.3   51 / 1211.7 / 25.4  39 / 1265.9 / 25.7   4 / 1224.2 / 25.2  38 / 1295.4 / 25.8
1             49 / 1335.2 / 26.7                   -  100 / 958.6 / 24.6  99 / 1076.4 / 25.2  68 / 1205.7 / 25.9  94 / 1172.1 / 25.6  100 / 1130.6 / 25.3  100 / 811.1 / 24.5  92 / 1170.2 / 25.8   99 / 889.6 / 24.8
2             36 / 1279.2 / 25.9   4 / 1246.9 / 25.8                   -  57 / 1100.0 / 25.0  37 / 1

  return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')


### MLP2L - 128

In [141]:
import os
import warnings
import numpy as np
import torch
import joblib
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate

# ─────────────── PATHS ────────────────────────────────────────────────────
MODEL_PATH = r"Models and Data splits/model_MLP2L_128.pt"
DATA_PKL   = r"Models and Data splits/Sampled_AllModels_train.pkl"
SUR_DIR    = r"Models and Data splits"
OUT_DIR    = "adversarial_8bit_images/MLP2L_128_train"
os.makedirs(OUT_DIR, exist_ok=True)

# ─────────── HYPER-PARAMETERS (0-1 domain) ────────────────────────────────
EPSILON    = 0.1     # FGSM step size
MAX_ITERS  = 5       # FGSM iterations
BIN_STEPS  = 20      # binary-search iterations
MAX_L2     = 1500    # maximum allowed L2 magnitude in pixel space

# ───────────────────────────────────────────────────────────────────────────
def sigmoid(z):
    z = np.asarray(z, dtype=np.float32)
    pos = z >= 0
    out = np.empty_like(z)
    out[pos]  = 1.0 / (1.0 + np.exp(-z[pos]))
    ez        = np.exp(z[~pos])
    out[~pos] = ez / (1.0 + ez)
    return out

def softmax(lgt):
    lgt = np.asarray(lgt, dtype=np.float32)
    e = np.exp(lgt - lgt.max())
    return e / e.sum()

warnings.filterwarnings("ignore", category=RuntimeWarning,
                        message="overflow encountered")

# ─────────── DATA & MODEL ────────────────────────────────────────────────
data = joblib.load(DATA_PKL)
X, y, _ = data

if X.max() > 1.0:
    X = X.astype(np.float32) / 255.0
    warnings.warn("Data appeared in [0,255]; normalized to [0,1].")
else:
    warnings.warn("Data is already scaled to [0,1]; proceeding without change.")

model = torch.jit.load(MODEL_PATH, map_location="cpu").eval()

def to_model(x01: np.ndarray) -> torch.Tensor:
    flat = x01.reshape(-1) if x01.ndim == 2 else x01
    return torch.from_numpy(flat[None]).float()

# ─────────── surrogate cache ─────────────────────────────────────────────
sur_cache = {}
def load_sur(label):
    if label not in sur_cache:
        s = joblib.load(os.path.join(SUR_DIR, f"surrogate_digit_{label}.pkl"))
        sur_cache[label] = (s.coef_.astype(np.float32),
                            s.intercept_.astype(np.float32))
    return sur_cache[label]

def push_one_uint8(x_float01: np.ndarray, x_clean01: np.ndarray) -> np.ndarray:
    x_pix  = x_float01 * 255.0
    x_orig = x_clean01 * 255.0
    xi     = np.rint(x_pix).astype(np.int16)
    sign   = np.sign(x_pix - x_orig).astype(np.int16)
    changed = sign != 0
    xi[changed] += sign[changed]
    return np.clip(xi, 0, 255).astype(np.uint8).reshape(28, 28)

# ─────────── stats collectors ─────────────────────────────────────────────
total_trials = 0
succ_total   = 0
misclassified = 0
records = []

# ───────────────────────── TARGETED ATTACK LOOP ──────────────────────────
for source_digit in range(10):
    idxs = np.where(y == source_digit)[0][:100]

    for rank, idx in enumerate(idxs, 1):
        x0 = X[idx].copy()
        y0 = int(y[idx])

        pred0 = model(to_model(x0)).argmax().item()
        if pred0 != y0:
            misclassified += 1
            continue

        total_trials += 1

        for target_digit in range(10):
            if target_digit == y0:
                continue

            query_count = [0]
            def model_query(x_tensor: torch.Tensor):
                query_count[0] += 1
                return model(x_tensor)

            W_src, b_src = load_sur(y0)
            W_tgt, b_tgt = load_sur(target_digit)

            x = x0.copy()
            success = False

            for _ in range(MAX_ITERS):
                flat = x.reshape(-1)

                if W_src.shape[0] == 1:
                    p_src = sigmoid(W_src[0] @ flat + b_src[0])
                    grad_src = W_src[0] * (p_src - 1)
                else:
                    p_src = softmax(W_src @ flat + b_src)
                    oh_src = np.zeros_like(p_src); oh_src[y0] = 1
                    grad_src = W_src.T @ (p_src - oh_src)

                if W_tgt.shape[0] == 1:
                    p_tgt = sigmoid(W_tgt[0] @ flat + b_tgt[0])
                    grad_tgt = W_tgt[0] * p_tgt
                else:
                    p_tgt = softmax(W_tgt @ flat + b_tgt)
                    oh_tgt = np.zeros_like(p_tgt); oh_tgt[target_digit] = 1
                    grad_tgt = W_tgt.T @ (p_tgt - oh_tgt)

                grad = grad_tgt - grad_src

                x = np.clip(x + EPSILON * np.sign(grad.reshape(x.shape)), 0.0, 1.0)
                if model_query(to_model(x)).argmax().item() == target_digit:
                    success = True
                    break

            if not success:
                continue

            d, lo, hi, best = x - x0, 0.0, 1.0, 1.0
            for _ in range(BIN_STEPS):
                mid = (lo + hi) / 2
                xm  = np.clip(x0 + mid * d, 0.0, 1.0)
                if model_query(to_model(xm)).argmax().item() == target_digit:
                    best, hi = mid, mid
                else:
                    lo = mid
            x_best = np.clip(x0 + best * d, 0.0, 1.0)

            delta = (x_best - x0).reshape(-1) * 255.0
            l2_raw = np.linalg.norm(delta)
            if l2_raw > MAX_L2:
                scale = MAX_L2 / l2_raw
                x_best = x0 + (x_best - x0) * scale

            x_uint8 = push_one_uint8(x_best.reshape(28,28), x0.reshape(28,28))

            if model_query(to_model(x_uint8 / 255.0)).argmax().item() != target_digit:
                continue

            y_adv = int(model_query(to_model(x_uint8 / 255.0)).argmax().item())
            l2_final = np.linalg.norm(
                x_uint8.astype(np.float32) - (x0 * 255.0).reshape(28,28)
            )

            succ_total += 1
            fname = f"true{y0}_adv{y_adv}_mag{l2_final:.1f}_sample{rank}.png"
            Image.fromarray(x_uint8, mode="L") \
                 .save(os.path.join(OUT_DIR, fname))

            records.append({
                'sample_idx': idx,
                'true_label': y0,
                'target_label': target_digit,
                'adv_label': y_adv,
                'success': True,
                'queries': query_count[0],
                'l2_mag': l2_final
            })

# ─────────── build DataFrame & save CSV ──────────────────────────────────
df = pd.DataFrame(records)
csv_path = os.path.join(OUT_DIR, "targeted_attack_stats.csv")
df.to_csv(csv_path, index=False)

# ─────────── enhanced summary matrix ──────────────────────────────────────
pivot_data = df.groupby(['true_label', 'target_label']).agg(
    success_count=('success', 'sum'),
    mean_l2=('l2_mag', 'mean'),
    mean_queries=('queries', 'mean')
).reset_index()

pivot_data['cell'] = pivot_data.apply(
    lambda row: f"{int(row.success_count)} / {row.mean_l2:.1f} / {row.mean_queries:.1f}", axis=1)

matrix = pivot_data.pivot(index="true_label", columns="target_label", values="cell").fillna("-")

print("\n===== Targeted Attack Summary Matrix =====")
print(matrix.to_string())

count_matrix = pivot_data.pivot(index="true_label", columns="target_label", values="success_count").fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(count_matrix, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Success Count'})
plt.title("Targeted Attack Success Count")
plt.xlabel("Target Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "summary_success_heatmap.png"))
plt.close()

print(f"\nTotal successful attacks: {succ_total} / {total_trials * 9}")
print(f"Stats saved to CSV: {csv_path}")

# ─────────── Three Summary Tables ────────────────────────────────────────
success_counts = df.groupby(['true_label', 'target_label'])['success'].sum().unstack().fillna(0).astype(int)
print("\n===== Success Counts Table =====")
print(tabulate(success_counts, headers='keys', tablefmt='fancy_grid'))

query_stats = df.groupby(['true_label', 'target_label'])['queries'].agg(['min', 'max', 'mean']).unstack().round(1)
query_min = query_stats['min'].fillna('-')
query_max = query_stats['max'].fillna('-')
query_mean = query_stats['mean'].fillna('-')

print("\n===== Query Stats Table =====")
print("Min Queries:")
print(tabulate(query_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Queries:")
print(tabulate(query_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Queries:")
print(tabulate(query_mean, headers='keys', tablefmt='fancy_grid'))

# Round and format L2 magnitude stats to 2 decimal places
mag_stats = df.groupby(['true_label', 'target_label'])['l2_mag'].agg(['min', 'max', 'mean']).unstack()

def format_float_table(table):
    return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')

mag_min = format_float_table(mag_stats['min'])
mag_max = format_float_table(mag_stats['max'])
mag_mean = format_float_table(mag_stats['mean'])

print("\n===== L2 Magnitude Stats Table =====")
print("Min Magnitude:")
print(tabulate(mag_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Magnitude:")
print(tabulate(mag_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Magnitude:")
print(tabulate(mag_mean, headers='keys', tablefmt='fancy_grid'))






===== Targeted Attack Summary Matrix =====
target_label                   0                   1                   2                   3                   4                   5                   6                   7                    8                   9
true_label                                                                                                                                                                                                           
0                              -                   -  39 / 1246.1 / 25.4                   -  18 / 1376.3 / 26.5   3 / 1492.3 / 26.0  40 / 1236.9 / 25.5   4 / 1438.1 / 25.8    1 / 1517.3 / 26.0  22 / 1362.2 / 25.9
1             93 / 1134.5 / 26.2                   -  100 / 837.3 / 24.3  100 / 980.3 / 24.8   78 / 953.7 / 25.1  97 / 1028.3 / 25.0  100 / 856.9 / 24.7   98 / 852.3 / 24.5  100 / 1017.6 / 25.1  100 / 755.4 / 24.4
2             42 / 1259.7 / 25.9                   -                   -  23 / 1297.2 / 25.4   6 / 1

  return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')


### MLP2L - 256

In [158]:
import os
import warnings
import numpy as np
import torch
import joblib
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate

# ─────────────── PATHS ────────────────────────────────────────────────────
MODEL_PATH = r"Models and Data splits/model_MLP2L_256.pt"
DATA_PKL   = r"Models and Data splits/Sampled_AllModels_train.pkl"
SUR_DIR    = r"Models and Data splits"
OUT_DIR    = "adversarial_8bit_images/MLP2L_256_train"
os.makedirs(OUT_DIR, exist_ok=True)

# ─────────── HYPER-PARAMETERS (0-1 domain) ────────────────────────────────
EPSILON    = 0.1     # FGSM step size
MAX_ITERS  = 5       # FGSM iterations
BIN_STEPS  = 20      # binary-search iterations
MAX_L2     = 1500    # maximum allowed L2 magnitude in pixel space

# ───────────────────────────────────────────────────────────────────────────
def sigmoid(z):
    z = np.asarray(z, dtype=np.float32)
    pos = z >= 0
    out = np.empty_like(z)
    out[pos]  = 1.0 / (1.0 + np.exp(-z[pos]))
    ez        = np.exp(z[~pos])
    out[~pos] = ez / (1.0 + ez)
    return out

def softmax(lgt):
    lgt = np.asarray(lgt, dtype=np.float32)
    e = np.exp(lgt - lgt.max())
    return e / e.sum()

warnings.filterwarnings("ignore", category=RuntimeWarning,
                        message="overflow encountered")

# ─────────── DATA & MODEL ────────────────────────────────────────────────
data = joblib.load(DATA_PKL)
X, y, _ = data

if X.max() > 1.0:
    X = X.astype(np.float32) / 255.0
    warnings.warn("Data appeared in [0,255]; normalized to [0,1].")
else:
    warnings.warn("Data is already scaled to [0,1]; proceeding without change.")

model = torch.jit.load(MODEL_PATH, map_location="cpu").eval()

def to_model(x01: np.ndarray) -> torch.Tensor:
    flat = x01.reshape(-1) if x01.ndim == 2 else x01
    return torch.from_numpy(flat[None]).float()

# ─────────── surrogate cache ─────────────────────────────────────────────
sur_cache = {}
def load_sur(label):
    if label not in sur_cache:
        s = joblib.load(os.path.join(SUR_DIR, f"surrogate_digit_{label}.pkl"))
        sur_cache[label] = (s.coef_.astype(np.float32),
                            s.intercept_.astype(np.float32))
    return sur_cache[label]

def push_one_uint8(x_float01: np.ndarray, x_clean01: np.ndarray) -> np.ndarray:
    x_pix  = x_float01 * 255.0
    x_orig = x_clean01 * 255.0
    xi     = np.rint(x_pix).astype(np.int16)
    sign   = np.sign(x_pix - x_orig).astype(np.int16)
    changed = sign != 0
    xi[changed] += sign[changed]
    return np.clip(xi, 0, 255).astype(np.uint8).reshape(28, 28)

# ─────────── stats collectors ─────────────────────────────────────────────
total_trials = 0
succ_total   = 0
misclassified = 0
records = []

# ───────────────────────── TARGETED ATTACK LOOP ──────────────────────────
for source_digit in range(10):
    idxs = np.where(y == source_digit)[0][:100]

    for rank, idx in enumerate(idxs, 1):
        x0 = X[idx].copy()
        y0 = int(y[idx])

        pred0 = model(to_model(x0)).argmax().item()
        if pred0 != y0:
            misclassified += 1
            continue

        total_trials += 1

        for target_digit in range(10):
            if target_digit == y0:
                continue

            query_count = [0]
            def model_query(x_tensor: torch.Tensor):
                query_count[0] += 1
                return model(x_tensor)

            W_src, b_src = load_sur(y0)
            W_tgt, b_tgt = load_sur(target_digit)

            x = x0.copy()
            success = False

            for _ in range(MAX_ITERS):
                flat = x.reshape(-1)

                if W_src.shape[0] == 1:
                    p_src = sigmoid(W_src[0] @ flat + b_src[0])
                    grad_src = W_src[0] * (p_src - 1)
                else:
                    p_src = softmax(W_src @ flat + b_src)
                    oh_src = np.zeros_like(p_src); oh_src[y0] = 1
                    grad_src = W_src.T @ (p_src - oh_src)

                if W_tgt.shape[0] == 1:
                    p_tgt = sigmoid(W_tgt[0] @ flat + b_tgt[0])
                    grad_tgt = W_tgt[0] * p_tgt
                else:
                    p_tgt = softmax(W_tgt @ flat + b_tgt)
                    oh_tgt = np.zeros_like(p_tgt); oh_tgt[target_digit] = 1
                    grad_tgt = W_tgt.T @ (p_tgt - oh_tgt)

                grad = grad_tgt - grad_src

                x = np.clip(x + EPSILON * np.sign(grad.reshape(x.shape)), 0.0, 1.0)
                if model_query(to_model(x)).argmax().item() == target_digit:
                    success = True
                    break

            if not success:
                continue

            d, lo, hi, best = x - x0, 0.0, 1.0, 1.0
            for _ in range(BIN_STEPS):
                mid = (lo + hi) / 2
                xm  = np.clip(x0 + mid * d, 0.0, 1.0)
                if model_query(to_model(xm)).argmax().item() == target_digit:
                    best, hi = mid, mid
                else:
                    lo = mid
            x_best = np.clip(x0 + best * d, 0.0, 1.0)

            delta = (x_best - x0).reshape(-1) * 255.0
            l2_raw = np.linalg.norm(delta)
            if l2_raw > MAX_L2:
                scale = MAX_L2 / l2_raw
                x_best = x0 + (x_best - x0) * scale

            x_uint8 = push_one_uint8(x_best.reshape(28,28), x0.reshape(28,28))

            if model_query(to_model(x_uint8 / 255.0)).argmax().item() != target_digit:
                continue

            y_adv = int(model_query(to_model(x_uint8 / 255.0)).argmax().item())
            l2_final = np.linalg.norm(
                x_uint8.astype(np.float32) - (x0 * 255.0).reshape(28,28)
            )

            succ_total += 1
            fname = f"true{y0}_adv{y_adv}_mag{l2_final:.1f}_sample{rank}.png"
            Image.fromarray(x_uint8, mode="L") \
                 .save(os.path.join(OUT_DIR, fname))

            records.append({
                'sample_idx': idx,
                'true_label': y0,
                'target_label': target_digit,
                'adv_label': y_adv,
                'success': True,
                'queries': query_count[0],
                'l2_mag': l2_final
            })

# ─────────── build DataFrame & save CSV ──────────────────────────────────
df = pd.DataFrame(records)
csv_path = os.path.join(OUT_DIR, "targeted_attack_stats.csv")
df.to_csv(csv_path, index=False)

# ─────────── enhanced summary matrix ──────────────────────────────────────
pivot_data = df.groupby(['true_label', 'target_label']).agg(
    success_count=('success', 'sum'),
    mean_l2=('l2_mag', 'mean'),
    mean_queries=('queries', 'mean')
).reset_index()

pivot_data['cell'] = pivot_data.apply(
    lambda row: f"{int(row.success_count)} / {row.mean_l2:.1f} / {row.mean_queries:.1f}", axis=1)

matrix = pivot_data.pivot(index="true_label", columns="target_label", values="cell").fillna("-")

print("\n===== Targeted Attack Summary Matrix =====")
print(matrix.to_string())

count_matrix = pivot_data.pivot(index="true_label", columns="target_label", values="success_count").fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(count_matrix, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Success Count'})
plt.title("Targeted Attack Success Count")
plt.xlabel("Target Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "summary_success_heatmap.png"))
plt.close()

print(f"\nTotal successful attacks: {succ_total} / {total_trials * 9}")
print(f"Stats saved to CSV: {csv_path}")

# ─────────── Three Summary Tables ────────────────────────────────────────
success_counts = df.groupby(['true_label', 'target_label'])['success'].sum().unstack().fillna(0).astype(int)
print("\n===== Success Counts Table =====")
print(tabulate(success_counts, headers='keys', tablefmt='fancy_grid'))

query_stats = df.groupby(['true_label', 'target_label'])['queries'].agg(['min', 'max', 'mean']).unstack().round(1)
query_min = query_stats['min'].fillna('-')
query_max = query_stats['max'].fillna('-')
query_mean = query_stats['mean'].fillna('-')

print("\n===== Query Stats Table =====")
print("Min Queries:")
print(tabulate(query_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Queries:")
print(tabulate(query_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Queries:")
print(tabulate(query_mean, headers='keys', tablefmt='fancy_grid'))

# Round and format L2 magnitude stats to 2 decimal places
mag_stats = df.groupby(['true_label', 'target_label'])['l2_mag'].agg(['min', 'max', 'mean']).unstack()

def format_float_table(table):
    return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')

mag_min = format_float_table(mag_stats['min'])
mag_max = format_float_table(mag_stats['max'])
mag_mean = format_float_table(mag_stats['mean'])

print("\n===== L2 Magnitude Stats Table =====")
print("Min Magnitude:")
print(tabulate(mag_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Magnitude:")
print(tabulate(mag_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Magnitude:")
print(tabulate(mag_mean, headers='keys', tablefmt='fancy_grid'))






===== Targeted Attack Summary Matrix =====
target_label                   0                   1                   2                   3                   4                   5                   6                   7                   8                   9
true_label                                                                                                                                                                                                          
0                              -   4 / 1400.0 / 27.0  39 / 1214.3 / 25.2   1 / 1488.3 / 26.0  36 / 1206.9 / 26.0   2 / 1395.0 / 25.5  45 / 1230.6 / 25.5  37 / 1265.7 / 25.7   8 / 1161.9 / 25.2  19 / 1292.4 / 25.8
1             82 / 1285.8 / 26.7                   -  100 / 842.8 / 24.3   97 / 872.8 / 24.4   97 / 845.5 / 24.8  93 / 1053.1 / 25.1  100 / 973.0 / 25.0  100 / 924.0 / 24.8  93 / 1028.3 / 25.1   97 / 924.2 / 24.9
2             18 / 1328.5 / 25.9                   -                   -  48 / 1227.0 / 25.4   5 / 1336.

  return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')


### CNN

In [5]:
import os
import warnings
import numpy as np
import torch
import joblib
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate
from torchvision import transforms

# ─────────────── PATHS ────────────────────────────────────────────────────
MODEL_PATH = r"Models and Data splits/model_CNN.pt"
DATA_PKL   = r"Models and Data splits/Sampled_AllModels_train.pkl"
SUR_DIR    = r"Models and Data splits"
OUT_DIR    = "adversarial_8bit_images/CNN_train"
os.makedirs(OUT_DIR, exist_ok=True)

# ─────────── HYPER-PARAMETERS (0-1 domain) ────────────────────────────────
EPSILON    = 0.1
MAX_ITERS  = 5
BIN_STEPS  = 20
MAX_L2     = 1500

# ──────────────── TRANSFORM FOR CNN INPUT ─────────────────────────────────
transform_28x28 = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((28, 28), interpolation=Image.BILINEAR),
    transforms.ToTensor()
])

# ───────────────────────────────────────────────────────────────────────────
def sigmoid(z):
    z = np.asarray(z, dtype=np.float32)
    pos = z >= 0
    out = np.empty_like(z)
    out[pos]  = 1.0 / (1.0 + np.exp(-z[pos]))
    ez        = np.exp(z[~pos])
    out[~pos] = ez / (1.0 + ez)
    return out

def softmax(lgt):
    lgt = np.asarray(lgt, dtype=np.float32)
    e = np.exp(lgt - lgt.max())
    return e / e.sum()

warnings.filterwarnings("ignore", category=RuntimeWarning, message="overflow encountered")

# ─────────── DATA & MODEL ────────────────────────────────────────────────
data = joblib.load(DATA_PKL)
X, y, _ = data

if X.max() > 1.0:
    X = X.astype(np.float32) / 255.0
    warnings.warn("Data appeared in [0,255]; normalized to [0,1].")
else:
    warnings.warn("Data is already scaled to [0,1]; proceeding without change.")

model = torch.jit.load(MODEL_PATH, map_location="cpu").eval()

def to_model(x01: np.ndarray) -> torch.Tensor:
    if x01.ndim == 1:
        x01 = x01.reshape(28, 28)
    img = (x01 * 255).astype(np.uint8)
    tensor = transform_28x28(img)
    return tensor.unsqueeze(0).float()  # (1, 1, 28, 28)

# ─────────── surrogate cache ─────────────────────────────────────────────
sur_cache = {}
def load_sur(label):
    if label not in sur_cache:
        s = joblib.load(os.path.join(SUR_DIR, f"surrogate_digit_{label}.pkl"))
        sur_cache[label] = (s.coef_.astype(np.float32),
                            s.intercept_.astype(np.float32))
    return sur_cache[label]

def push_one_uint8(x_float01: np.ndarray, x_clean01: np.ndarray) -> np.ndarray:
    x_pix  = x_float01 * 255.0
    x_orig = x_clean01 * 255.0
    xi     = np.rint(x_pix).astype(np.int16)
    sign   = np.sign(x_pix - x_orig).astype(np.int16)
    changed = sign != 0
    xi[changed] += sign[changed]
    return np.clip(xi, 0, 255).astype(np.uint8).reshape(28, 28)

# ─────────── stats collectors ─────────────────────────────────────────────
total_trials = 0
succ_total   = 0
misclassified = 0
records = []

# ───────────────────────── TARGETED ATTACK LOOP ──────────────────────────
for source_digit in range(10):
    idxs = np.where(y == source_digit)[0][:100]

    for rank, idx in enumerate(idxs, 1):
        x0 = X[idx].copy()
        y0 = int(y[idx])

        pred0 = model(to_model(x0)).argmax().item()
        if pred0 != y0:
            misclassified += 1
            continue

        total_trials += 1

        for target_digit in range(10):
            if target_digit == y0:
                continue

            query_count = [0]
            def model_query(x_tensor: torch.Tensor):
                query_count[0] += 1
                return model(x_tensor)

            W_src, b_src = load_sur(y0)
            W_tgt, b_tgt = load_sur(target_digit)

            x = x0.copy()
            success = False

            for _ in range(MAX_ITERS):
                flat = x.reshape(-1)

                if W_src.shape[0] == 1:
                    p_src = sigmoid(W_src[0] @ flat + b_src[0])
                    grad_src = W_src[0] * (p_src - 1)
                else:
                    p_src = softmax(W_src @ flat + b_src)
                    oh_src = np.zeros_like(p_src); oh_src[y0] = 1
                    grad_src = W_src.T @ (p_src - oh_src)

                if W_tgt.shape[0] == 1:
                    p_tgt = sigmoid(W_tgt[0] @ flat + b_tgt[0])
                    grad_tgt = W_tgt[0] * p_tgt
                else:
                    p_tgt = softmax(W_tgt @ flat + b_tgt)
                    oh_tgt = np.zeros_like(p_tgt); oh_tgt[target_digit] = 1
                    grad_tgt = W_tgt.T @ (p_tgt - oh_tgt)

                grad = grad_tgt - grad_src

                x = np.clip(x + EPSILON * np.sign(grad.reshape(x.shape)), 0.0, 1.0)
                if model_query(to_model(x)).argmax().item() == target_digit:
                    success = True
                    break

            if not success:
                continue

            d, lo, hi, best = x - x0, 0.0, 1.0, 1.0
            for _ in range(BIN_STEPS):
                mid = (lo + hi) / 2
                xm  = np.clip(x0 + mid * d, 0.0, 1.0)
                if model_query(to_model(xm)).argmax().item() == target_digit:
                    best, hi = mid, mid
                else:
                    lo = mid
            x_best = np.clip(x0 + best * d, 0.0, 1.0)

            delta = (x_best - x0).reshape(-1) * 255.0
            l2_raw = np.linalg.norm(delta)
            if l2_raw > MAX_L2:
                scale = MAX_L2 / l2_raw
                x_best = x0 + (x_best - x0) * scale

            x_uint8 = push_one_uint8(x_best.reshape(28,28), x0.reshape(28,28))

            if model_query(to_model(x_uint8 / 255.0)).argmax().item() != target_digit:
                continue

            y_adv = int(model_query(to_model(x_uint8 / 255.0)).argmax().item())
            l2_final = np.linalg.norm(
                x_uint8.astype(np.float32) - (x0 * 255.0).reshape(28,28)
            )

            succ_total += 1
            fname = f"true{y0}_adv{y_adv}_mag{l2_final:.1f}_sample{rank}.png"
            Image.fromarray(x_uint8, mode="L") \
                 .save(os.path.join(OUT_DIR, fname))

            records.append({
                'sample_idx': idx,
                'true_label': y0,
                'target_label': target_digit,
                'adv_label': y_adv,
                'success': True,
                'queries': query_count[0],
                'l2_mag': l2_final
            })

# ─────────── build DataFrame & save CSV ──────────────────────────────────
df = pd.DataFrame(records)
csv_path = os.path.join(OUT_DIR, "targeted_attack_stats.csv")
df.to_csv(csv_path, index=False)

# ─────────── enhanced summary matrix ──────────────────────────────────────
pivot_data = df.groupby(['true_label', 'target_label']).agg(
    success_count=('success', 'sum'),
    mean_l2=('l2_mag', 'mean'),
    mean_queries=('queries', 'mean')
).reset_index()

pivot_data['cell'] = pivot_data.apply(
    lambda row: f"{int(row.success_count)} / {row.mean_l2:.1f} / {row.mean_queries:.1f}", axis=1)

matrix = pivot_data.pivot(index="true_label", columns="target_label", values="cell").fillna("-")

print("\n===== Targeted Attack Summary Matrix =====")
print(matrix.to_string())

count_matrix = pivot_data.pivot(index="true_label", columns="target_label", values="success_count").fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(count_matrix, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Success Count'})
plt.title("Targeted Attack Success Count")
plt.xlabel("Target Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "summary_success_heatmap.png"))
plt.close()

print(f"\nTotal successful attacks: {succ_total} / {total_trials * 9}")
print(f"Stats saved to CSV: {csv_path}")

# ─────────── Three Summary Tables ────────────────────────────────────────
success_counts = df.groupby(['true_label', 'target_label'])['success'].sum().unstack().fillna(0).astype(int)
print("\n===== Success Counts Table =====")
print(tabulate(success_counts, headers='keys', tablefmt='fancy_grid'))

query_stats = df.groupby(['true_label', 'target_label'])['queries'].agg(['min', 'max', 'mean']).unstack().round(1)
query_min = query_stats['min'].fillna('-')
query_max = query_stats['max'].fillna('-')
query_mean = query_stats['mean'].fillna('-')

print("\n===== Query Stats Table =====")
print("Min Queries:")
print(tabulate(query_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Queries:")
print(tabulate(query_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Queries:")
print(tabulate(query_mean, headers='keys', tablefmt='fancy_grid'))

mag_stats = df.groupby(['true_label', 'target_label'])['l2_mag'].agg(['min', 'max', 'mean']).unstack()

def format_float_table(table):
    return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')

mag_min = format_float_table(mag_stats['min'])
mag_max = format_float_table(mag_stats['max'])
mag_mean = format_float_table(mag_stats['mean'])

print("\n===== L2 Magnitude Stats Table =====")
print("Min Magnitude:")
print(tabulate(mag_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Magnitude:")
print(tabulate(mag_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Magnitude:")
print(tabulate(mag_mean, headers='keys', tablefmt='fancy_grid'))





===== Targeted Attack Summary Matrix =====
target_label                  0                  1                   2                   3                   4                   5                  6                   7                   8                  9
true_label                                                                                                                                                                                                      
0                             -                  -   9 / 1398.3 / 25.7                   -   3 / 1294.0 / 26.0  11 / 1389.6 / 25.6                  -                   -  20 / 1250.3 / 25.4  2 / 1308.2 / 26.0
1                             -                  -  70 / 1309.3 / 25.8  15 / 1430.7 / 26.5  15 / 1224.5 / 25.8   1 / 1480.9 / 26.0  6 / 1340.2 / 25.7                   -  42 / 1219.9 / 25.6                  -
2                             -                  -                   -   9 / 1279.6 / 25.6                   -   1 / 152

  return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')


### RF

In [4]:
import os
import warnings
import numpy as np
import joblib
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate

# ─────────────── PATHS ────────────────────────────────────────────────────
MODEL_PATH = r"Models and Data splits/model_RF.pkl"  # RF model path
DATA_PKL   = r"Models and Data splits/Sampled_AllModels_train.pkl"
SUR_DIR    = r"Models and Data splits"
OUT_DIR    = "adversarial_8bit_images/RF_train"
os.makedirs(OUT_DIR, exist_ok=True)

# ─────────── HYPER-PARAMETERS (0-1 domain) ────────────────────────────────
EPSILON    = 0.1
MAX_ITERS  = 5
BIN_STEPS  = 20
MAX_L2     = 1500

def sigmoid(z):
    z = np.asarray(z, dtype=np.float32)
    pos = z >= 0
    out = np.empty_like(z)
    out[pos] = 1.0 / (1.0 + np.exp(-z[pos]))
    ez = np.exp(z[~pos])
    out[~pos] = ez / (1.0 + ez)
    return out

def softmax(lgt):
    lgt = np.asarray(lgt, dtype=np.float32)
    e = np.exp(lgt - lgt.max())
    return e / e.sum()

warnings.filterwarnings("ignore", category=RuntimeWarning, message="overflow encountered")

# ─────────── DATA & MODEL ────────────────────────────────────────────────
data = joblib.load(DATA_PKL)
X, y, _ = data

if X.max() > 1.0:
    X = X.astype(np.float32) / 255.0
    warnings.warn("Data appeared in [0,255]; normalized to [0,1].")
else:
    warnings.warn("Data is already scaled to [0,1]; proceeding without change.")

# Load RF model
rf_model = joblib.load(MODEL_PATH)

def to_model(x01: np.ndarray) -> np.ndarray:
    return x01.reshape(1, -1)

# ─────────── surrogate cache ─────────────────────────────────────────────
sur_cache = {}
def load_sur(label):
    if label not in sur_cache:
        s = joblib.load(os.path.join(SUR_DIR, f"surrogate_digit_{label}.pkl"))
        sur_cache[label] = (s.coef_.astype(np.float32), s.intercept_.astype(np.float32))
    return sur_cache[label]

def push_one_uint8(x_float01: np.ndarray, x_clean01: np.ndarray) -> np.ndarray:
    x_pix = x_float01 * 255.0
    x_orig = x_clean01 * 255.0
    xi = np.rint(x_pix).astype(np.int16)
    sign = np.sign(x_pix - x_orig).astype(np.int16)
    changed = sign != 0
    xi[changed] += sign[changed]
    return np.clip(xi, 0, 255).astype(np.uint8).reshape(28, 28)

# ─────────── stats collectors ─────────────────────────────────────────────
total_trials = 0
succ_total = 0
misclassified = 0
records = []

# ───────────────────────── TARGETED ATTACK LOOP ──────────────────────────
for source_digit in range(10):
    idxs = np.where(y == source_digit)[0][:100]

    for rank, idx in enumerate(idxs, 1):
        x0 = X[idx].copy()
        y0 = int(y[idx])

        pred0 = rf_model.predict(to_model(x0))[0]
        if pred0 != y0:
            misclassified += 1
            continue

        total_trials += 1

        for target_digit in range(10):
            if target_digit == y0:
                continue

            query_count = [0]
            def model_query(x_arr: np.ndarray):
                query_count[0] += 1
                return rf_model.predict(x_arr)[0]

            W_src, b_src = load_sur(y0)
            W_tgt, b_tgt = load_sur(target_digit)

            x = x0.copy()
            success = False

            for _ in range(MAX_ITERS):
                flat = x.reshape(-1)

                if W_src.shape[0] == 1:
                    p_src = sigmoid(W_src[0] @ flat + b_src[0])
                    grad_src = W_src[0] * (p_src - 1)
                else:
                    p_src = softmax(W_src @ flat + b_src)
                    oh_src = np.zeros_like(p_src); oh_src[y0] = 1
                    grad_src = W_src.T @ (p_src - oh_src)

                if W_tgt.shape[0] == 1:
                    p_tgt = sigmoid(W_tgt[0] @ flat + b_tgt[0])
                    grad_tgt = W_tgt[0] * p_tgt
                else:
                    p_tgt = softmax(W_tgt @ flat + b_tgt)
                    oh_tgt = np.zeros_like(p_tgt); oh_tgt[target_digit] = 1
                    grad_tgt = W_tgt.T @ (p_tgt - oh_tgt)

                grad = grad_tgt - grad_src
                x = np.clip(x + EPSILON * np.sign(grad.reshape(x.shape)), 0.0, 1.0)

                if model_query(to_model(x)) == target_digit:
                    success = True
                    break

            if not success:
                continue

            d, lo, hi, best = x - x0, 0.0, 1.0, 1.0
            for _ in range(BIN_STEPS):
                mid = (lo + hi) / 2
                xm = np.clip(x0 + mid * d, 0.0, 1.0)
                if model_query(to_model(xm)) == target_digit:
                    best, hi = mid, mid
                else:
                    lo = mid
            x_best = np.clip(x0 + best * d, 0.0, 1.0)

            delta = (x_best - x0).reshape(-1) * 255.0
            l2_raw = np.linalg.norm(delta)
            if l2_raw > MAX_L2:
                scale = MAX_L2 / l2_raw
                x_best = x0 + (x_best - x0) * scale

            x_uint8 = push_one_uint8(x_best.reshape(28,28), x0.reshape(28,28))

            if model_query(to_model(x_uint8 / 255.0)) != target_digit:
                continue

            y_adv = int(model_query(to_model(x_uint8 / 255.0)))
            l2_final = np.linalg.norm(x_uint8.astype(np.float32) - (x0 * 255.0).reshape(28,28))

            succ_total += 1
            fname = f"true{y0}_adv{y_adv}_mag{l2_final:.1f}_sample{rank}.png"
            Image.fromarray(x_uint8, mode="L").save(os.path.join(OUT_DIR, fname))

            records.append({
                'sample_idx': idx,
                'true_label': y0,
                'target_label': target_digit,
                'adv_label': y_adv,
                'success': True,
                'queries': query_count[0],
                'l2_mag': l2_final
            })

# ─────────── Save Results & Display ───────────────────────────────────────
df = pd.DataFrame(records)
csv_path = os.path.join(OUT_DIR, "targeted_attack_stats.csv")
df.to_csv(csv_path, index=False)

pivot_data = df.groupby(['true_label', 'target_label']).agg(
    success_count=('success', 'sum'),
    mean_l2=('l2_mag', 'mean'),
    mean_queries=('queries', 'mean')
).reset_index()

pivot_data['cell'] = pivot_data.apply(
    lambda row: f"{int(row.success_count)} / {row.mean_l2:.1f} / {row.mean_queries:.1f}", axis=1)

matrix = pivot_data.pivot(index="true_label", columns="target_label", values="cell").fillna("-")

print("\n===== Targeted Attack Summary Matrix =====")
print(matrix.to_string())

count_matrix = pivot_data.pivot(index="true_label", columns="target_label", values="success_count").fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(count_matrix, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Success Count'})
plt.title("Targeted Attack Success Count")
plt.xlabel("Target Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "summary_success_heatmap.png"))
plt.close()

print(f"\nTotal successful attacks: {succ_total} / {total_trials * 9}")
print(f"Stats saved to CSV: {csv_path}")

# ─────────── Three Summary Tables ────────────────────────────────────────
success_counts = df.groupby(['true_label', 'target_label'])['success'].sum().unstack().fillna(0).astype(int)
print("\n===== Success Counts Table =====")
print(tabulate(success_counts, headers='keys', tablefmt='fancy_grid'))

query_stats = df.groupby(['true_label', 'target_label'])['queries'].agg(['min', 'max', 'mean']).unstack().round(1)
query_min = query_stats['min'].fillna('-')
query_max = query_stats['max'].fillna('-')
query_mean = query_stats['mean'].fillna('-')

print("\n===== Query Stats Table =====")
print("Min Queries:")
print(tabulate(query_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Queries:")
print(tabulate(query_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Queries:")
print(tabulate(query_mean, headers='keys', tablefmt='fancy_grid'))

mag_stats = df.groupby(['true_label', 'target_label'])['l2_mag'].agg(['min', 'max', 'mean']).unstack()

def format_float_table(table):
    return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')

mag_min = format_float_table(mag_stats['min'])
mag_max = format_float_table(mag_stats['max'])
mag_mean = format_float_table(mag_stats['mean'])

print("\n===== L2 Magnitude Stats Table =====")
print("Min Magnitude:")
print(tabulate(mag_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Magnitude:")
print(tabulate(mag_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Magnitude:")
print(tabulate(mag_mean, headers='keys', tablefmt='fancy_grid'))




===== Targeted Attack Summary Matrix =====
target_label                   0                  2                   3                  4                  5                   6                  7                  8                  9
true_label                                                                                                                                                                                
0                              -  19 / 964.1 / 24.6                   -  4 / 1079.6 / 25.2                  -   11 / 958.4 / 24.5                  -  13 / 878.0 / 24.8                  -
1              2 / 1039.8 / 25.5  95 / 163.5 / 23.1   75 / 319.6 / 23.5  78 / 481.0 / 24.1  70 / 423.1 / 23.6   64 / 500.8 / 24.0  41 / 379.0 / 23.6  98 / 113.4 / 23.1  2 / 1311.1 / 26.0
2              15 / 987.0 / 24.9                  -  12 / 1132.2 / 25.1   6 / 867.2 / 25.0  5 / 1067.4 / 24.8   23 / 998.7 / 25.0                  -  63 / 748.1 / 24.5  4 / 1098.0 / 25.2
3              34 / 9

  return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')


### XGBoost

In [22]:
import os
import warnings
import numpy as np
import joblib
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from tabulate import tabulate

# ─────────────── PATHS ────────────────────────────────────────────────────
MODEL_PATH = r"Models and Data splits/model_XGB.pkl"  # XGBoost model path
DATA_PKL   = r"Models and Data splits/Sampled_AllModels_train.pkl"
SUR_DIR    = r"Models and Data splits"
OUT_DIR    = "adversarial_8bit_images/XGB_train"
os.makedirs(OUT_DIR, exist_ok=True)

# ─────────── HYPER-PARAMETERS (0-1 domain) ────────────────────────────────
EPSILON    = 0.1
MAX_ITERS  = 5
BIN_STEPS  = 20
MAX_L2     = 1500

def sigmoid(z):
    z = np.asarray(z, dtype=np.float32)
    pos = z >= 0
    out = np.empty_like(z)
    out[pos] = 1.0 / (1.0 + np.exp(-z[pos]))
    ez = np.exp(z[~pos])
    out[~pos] = ez / (1.0 + ez)
    return out

def softmax(lgt):
    lgt = np.asarray(lgt, dtype=np.float32)
    e = np.exp(lgt - lgt.max())
    return e / e.sum()

warnings.filterwarnings("ignore", category=RuntimeWarning, message="overflow encountered")

# ─────────── DATA & MODEL ────────────────────────────────────────────────
data = joblib.load(DATA_PKL)
X, y, _ = data

if X.max() > 1.0:
    X = X.astype(np.float32) / 255.0
    warnings.warn("Data appeared in [0,255]; normalized to [0,1].")
else:
    warnings.warn("Data is already scaled to [0,1]; proceeding without change.")

# Load XGBoost model
xgb_model: XGBClassifier = joblib.load(MODEL_PATH)

def to_model(x01: np.ndarray) -> np.ndarray:
    return x01.reshape(1, -1)

# ─────────── surrogate cache ─────────────────────────────────────────────
sur_cache = {}
def load_sur(label):
    if label not in sur_cache:
        s = joblib.load(os.path.join(SUR_DIR, f"surrogate_digit_{label}.pkl"))
        sur_cache[label] = (s.coef_.astype(np.float32), s.intercept_.astype(np.float32))
    return sur_cache[label]

def push_one_uint8(x_float01: np.ndarray, x_clean01: np.ndarray) -> np.ndarray:
    x_pix = x_float01 * 255.0
    x_orig = x_clean01 * 255.0
    xi = np.rint(x_pix).astype(np.int16)
    sign = np.sign(x_pix - x_orig).astype(np.int16)
    changed = sign != 0
    xi[changed] += sign[changed]
    return np.clip(xi, 0, 255).astype(np.uint8).reshape(28, 28)

# ─────────── stats collectors ─────────────────────────────────────────────
total_trials = 0
succ_total = 0
misclassified = 0
records = []

# ───────────────────────── TARGETED ATTACK LOOP ──────────────────────────
for source_digit in range(10):
    idxs = np.where(y == source_digit)[0][:100]

    for rank, idx in enumerate(idxs, 1):
        x0 = X[idx].copy()
        y0 = int(y[idx])

        pred0 = xgb_model.predict(to_model(x0))[0]
        if pred0 != y0:
            misclassified += 1
            continue

        total_trials += 1

        for target_digit in range(10):
            if target_digit == y0:
                continue

            query_count = [0]
            def model_query(x_arr: np.ndarray):
                query_count[0] += 1
                return xgb_model.predict(x_arr)[0]

            W_src, b_src = load_sur(y0)
            W_tgt, b_tgt = load_sur(target_digit)

            x = x0.copy()
            success = False

            for _ in range(MAX_ITERS):
                flat = x.reshape(-1)

                if W_src.shape[0] == 1:
                    p_src = sigmoid(W_src[0] @ flat + b_src[0])
                    grad_src = W_src[0] * (p_src - 1)
                else:
                    p_src = softmax(W_src @ flat + b_src)
                    oh_src = np.zeros_like(p_src); oh_src[y0] = 1
                    grad_src = W_src.T @ (p_src - oh_src)

                if W_tgt.shape[0] == 1:
                    p_tgt = sigmoid(W_tgt[0] @ flat + b_tgt[0])
                    grad_tgt = W_tgt[0] * p_tgt
                else:
                    p_tgt = softmax(W_tgt @ flat + b_tgt)
                    oh_tgt = np.zeros_like(p_tgt); oh_tgt[target_digit] = 1
                    grad_tgt = W_tgt.T @ (p_tgt - oh_tgt)

                grad = grad_tgt - grad_src
                x = np.clip(x + EPSILON * np.sign(grad.reshape(x.shape)), 0.0, 1.0)

                if model_query(to_model(x)) == target_digit:
                    success = True
                    break

            if not success:
                continue

            d, lo, hi, best = x - x0, 0.0, 1.0, 1.0
            for _ in range(BIN_STEPS):
                mid = (lo + hi) / 2
                xm = np.clip(x0 + mid * d, 0.0, 1.0)
                if model_query(to_model(xm)) == target_digit:
                    best, hi = mid, mid
                else:
                    lo = mid
            x_best = np.clip(x0 + best * d, 0.0, 1.0)

            delta = (x_best - x0).reshape(-1) * 255.0
            l2_raw = np.linalg.norm(delta)
            if l2_raw > MAX_L2:
                scale = MAX_L2 / l2_raw
                x_best = x0 + (x_best - x0) * scale

            x_uint8 = push_one_uint8(x_best.reshape(28,28), x0.reshape(28,28))

            if model_query(to_model(x_uint8 / 255.0)) != target_digit:
                continue

            y_adv = int(model_query(to_model(x_uint8 / 255.0)))
            l2_final = np.linalg.norm(x_uint8.astype(np.float32) - (x0 * 255.0).reshape(28,28))

            succ_total += 1
            fname = f"true{y0}_adv{y_adv}_mag{l2_final:.1f}_sample{rank}.png"
            Image.fromarray(x_uint8, mode="L").save(os.path.join(OUT_DIR, fname))

            records.append({
                'sample_idx': idx,
                'true_label': y0,
                'target_label': target_digit,
                'adv_label': y_adv,
                'success': True,
                'queries': query_count[0],
                'l2_mag': l2_final
            })

# ─────────── Save Results & Display ───────────────────────────────────────
df = pd.DataFrame(records)
csv_path = os.path.join(OUT_DIR, "targeted_attack_stats.csv")
df.to_csv(csv_path, index=False)

pivot_data = df.groupby(['true_label', 'target_label']).agg(
    success_count=('success', 'sum'),
    mean_l2=('l2_mag', 'mean'),
    mean_queries=('queries', 'mean')
).reset_index()

pivot_data['cell'] = pivot_data.apply(
    lambda row: f"{int(row.success_count)} / {row.mean_l2:.1f} / {row.mean_queries:.1f}", axis=1)

matrix = pivot_data.pivot(index="true_label", columns="target_label", values="cell").fillna("-")

print("\n===== Targeted Attack Summary Matrix =====")
print(matrix.to_string())

count_matrix = pivot_data.pivot(index="true_label", columns="target_label", values="success_count").fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(count_matrix, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Success Count'})
plt.title("Targeted Attack Success Count")
plt.xlabel("Target Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "summary_success_heatmap.png"))
plt.close()

print(f"\nTotal successful attacks: {succ_total} / {total_trials * 9}")
print(f"Stats saved to CSV: {csv_path}")

# ─────────── Three Summary Tables ────────────────────────────────────────
success_counts = df.groupby(['true_label', 'target_label'])['success'].sum().unstack().fillna(0).astype(int)
print("\n===== Success Counts Table =====")
print(tabulate(success_counts, headers='keys', tablefmt='fancy_grid'))

query_stats = df.groupby(['true_label', 'target_label'])['queries'].agg(['min', 'max', 'mean']).unstack().round(1)
query_min = query_stats['min'].fillna('-')
query_max = query_stats['max'].fillna('-')
query_mean = query_stats['mean'].fillna('-')

print("\n===== Query Stats Table =====")
print("Min Queries:")
print(tabulate(query_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Queries:")
print(tabulate(query_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Queries:")
print(tabulate(query_mean, headers='keys', tablefmt='fancy_grid'))

mag_stats = df.groupby(['true_label', 'target_label'])['l2_mag'].agg(['min', 'max', 'mean']).unstack()

def format_float_table(table):
    return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')

mag_min = format_float_table(mag_stats['min'])
mag_max = format_float_table(mag_stats['max'])
mag_mean = format_float_table(mag_stats['mean'])

print("\n===== L2 Magnitude Stats Table =====")
print("Min Magnitude:")
print(tabulate(mag_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Magnitude:")
print(tabulate(mag_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Magnitude:")
print(tabulate(mag_mean, headers='keys', tablefmt='fancy_grid'))





===== Targeted Attack Summary Matrix =====
target_label                   0                   2                   3                  4                   5                   6                  7                  8                   9
true_label                                                                                                                                                                                   
0                              -   39 / 990.6 / 24.9  21 / 1014.2 / 24.7  6 / 1107.2 / 25.5    2 / 973.7 / 24.5   7 / 1138.2 / 25.1  3 / 1057.2 / 25.3  76 / 877.1 / 24.9  32 / 1150.2 / 25.5
1                              -   77 / 269.9 / 23.4   61 / 467.7 / 23.9  1 / 1321.1 / 26.0   66 / 554.1 / 24.0  29 / 1128.9 / 25.3  25 / 624.2 / 24.4  98 / 151.5 / 23.1   49 / 592.4 / 24.1
2             22 / 1169.8 / 25.5                   -   39 / 896.1 / 24.6  1 / 1054.8 / 25.0   6 / 1464.7 / 26.0   3 / 1156.2 / 25.3  1 / 1426.6 / 26.0  95 / 605.6 / 24.1   11 / 982.4 / 25.3
3     

  return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')


### SVM

In [47]:
import os
import warnings
import numpy as np
import joblib
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from tabulate import tabulate

# ─────────────── PATHS ────────────────────────────────────────────────────
MODEL_PATH = r"Models and Data splits/model_SVM.pkl"  # SVM model path
DATA_PKL   = r"Models and Data splits/Sampled_AllModels_train.pkl"
SUR_DIR    = r"Models and Data splits"
OUT_DIR    = "adversarial_8bit_images/SVM_train"
os.makedirs(OUT_DIR, exist_ok=True)

# ─────────── HYPER-PARAMETERS (0-1 domain) ────────────────────────────────
EPSILON    = 0.1
MAX_ITERS  = 5
BIN_STEPS  = 20
MAX_L2     = 1500

def sigmoid(z):
    z = np.asarray(z, dtype=np.float32)
    pos = z >= 0
    out = np.empty_like(z)
    out[pos] = 1.0 / (1.0 + np.exp(-z[pos]))
    ez = np.exp(z[~pos])
    out[~pos] = ez / (1.0 + ez)
    return out

def softmax(lgt):
    lgt = np.asarray(lgt, dtype=np.float32)
    e = np.exp(lgt - lgt.max())
    return e / e.sum()

warnings.filterwarnings("ignore", category=RuntimeWarning, message="overflow encountered")

# ─────────── DATA & MODEL ────────────────────────────────────────────────
data = joblib.load(DATA_PKL)
X, y, _ = data

if X.max() > 1.0:
    X = X.astype(np.float32) / 255.0
    warnings.warn("Data appeared in [0,255]; normalized to [0,1].")
else:
    warnings.warn("Data is already scaled to [0,1]; proceeding without change.")

# Load SVM model
svc_model: SVC = joblib.load(MODEL_PATH)

def to_model(x01: np.ndarray) -> np.ndarray:
    return x01.reshape(1, -1)

# ─────────── surrogate cache ─────────────────────────────────────────────
sur_cache = {}
def load_sur(label):
    if label not in sur_cache:
        s = joblib.load(os.path.join(SUR_DIR, f"surrogate_digit_{label}.pkl"))
        sur_cache[label] = (s.coef_.astype(np.float32), s.intercept_.astype(np.float32))
    return sur_cache[label]

def push_one_uint8(x_float01: np.ndarray, x_clean01: np.ndarray) -> np.ndarray:
    x_pix = x_float01 * 255.0
    x_orig = x_clean01 * 255.0
    xi = np.rint(x_pix).astype(np.int16)
    sign = np.sign(x_pix - x_orig).astype(np.int16)
    changed = sign != 0
    xi[changed] += sign[changed]
    return np.clip(xi, 0, 255).astype(np.uint8).reshape(28, 28)

# ─────────── stats collectors ─────────────────────────────────────────────
total_trials = 0
succ_total = 0
misclassified = 0
records = []

# ───────────────────────── TARGETED ATTACK LOOP ──────────────────────────
for source_digit in range(10):
    idxs = np.where(y == source_digit)[0][:100]

    for rank, idx in enumerate(idxs, 1):
        x0 = X[idx].copy()
        y0 = int(y[idx])

        pred0 = svc_model.predict(to_model(x0))[0]
        if pred0 != y0:
            misclassified += 1
            continue

        total_trials += 1

        for target_digit in range(10):
            if target_digit == y0:
                continue

            query_count = [0]
            def model_query(x_arr: np.ndarray):
                query_count[0] += 1
                return svc_model.predict(x_arr)[0]

            W_src, b_src = load_sur(y0)
            W_tgt, b_tgt = load_sur(target_digit)

            x = x0.copy()
            success = False

            for _ in range(MAX_ITERS):
                flat = x.reshape(-1)

                if W_src.shape[0] == 1:
                    p_src = sigmoid(W_src[0] @ flat + b_src[0])
                    grad_src = W_src[0] * (p_src - 1)
                else:
                    p_src = softmax(W_src @ flat + b_src)
                    oh_src = np.zeros_like(p_src); oh_src[y0] = 1
                    grad_src = W_src.T @ (p_src - oh_src)

                if W_tgt.shape[0] == 1:
                    p_tgt = sigmoid(W_tgt[0] @ flat + b_tgt[0])
                    grad_tgt = W_tgt[0] * p_tgt
                else:
                    p_tgt = softmax(W_tgt @ flat + b_tgt)
                    oh_tgt = np.zeros_like(p_tgt); oh_tgt[target_digit] = 1
                    grad_tgt = W_tgt.T @ (p_tgt - oh_tgt)

                grad = grad_tgt - grad_src
                x = np.clip(x + EPSILON * np.sign(grad.reshape(x.shape)), 0.0, 1.0)

                if model_query(to_model(x)) == target_digit:
                    success = True
                    break

            if not success:
                continue

            d, lo, hi, best = x - x0, 0.0, 1.0, 1.0
            for _ in range(BIN_STEPS):
                mid = (lo + hi) / 2
                xm = np.clip(x0 + mid * d, 0.0, 1.0)
                if model_query(to_model(xm)) == target_digit:
                    best, hi = mid, mid
                else:
                    lo = mid
            x_best = np.clip(x0 + best * d, 0.0, 1.0)

            delta = (x_best - x0).reshape(-1) * 255.0
            l2_raw = np.linalg.norm(delta)
            if l2_raw > MAX_L2:
                scale = MAX_L2 / l2_raw
                x_best = x0 + (x_best - x0) * scale

            x_uint8 = push_one_uint8(x_best.reshape(28,28), x0.reshape(28,28))

            if model_query(to_model(x_uint8 / 255.0)) != target_digit:
                continue

            y_adv = int(model_query(to_model(x_uint8 / 255.0)))
            l2_final = np.linalg.norm(x_uint8.astype(np.float32) - (x0 * 255.0).reshape(28,28))

            succ_total += 1
            fname = f"true{y0}_adv{y_adv}_mag{l2_final:.1f}_sample{rank}.png"
            Image.fromarray(x_uint8, mode="L").save(os.path.join(OUT_DIR, fname))

            records.append({
                'sample_idx': idx,
                'true_label': y0,
                'target_label': target_digit,
                'adv_label': y_adv,
                'success': True,
                'queries': query_count[0],
                'l2_mag': l2_final
            })

# ─────────── Save Results & Display ───────────────────────────────────────
df = pd.DataFrame(records)
csv_path = os.path.join(OUT_DIR, "targeted_attack_stats.csv")
df.to_csv(csv_path, index=False)

pivot_data = df.groupby(['true_label', 'target_label']).agg(
    success_count=('success', 'sum'),
    mean_l2=('l2_mag', 'mean'),
    mean_queries=('queries', 'mean')
).reset_index()

pivot_data['cell'] = pivot_data.apply(
    lambda row: f"{int(row.success_count)} / {row.mean_l2:.1f} / {row.mean_queries:.1f}", axis=1)

matrix = pivot_data.pivot(index="true_label", columns="target_label", values="cell").fillna("-")

print("\n===== Targeted Attack Summary Matrix =====")
print(matrix.to_string())

count_matrix = pivot_data.pivot(index="true_label", columns="target_label", values="success_count").fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(count_matrix, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Success Count'})
plt.title("Targeted Attack Success Count")
plt.xlabel("Target Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "summary_success_heatmap.png"))
plt.close()

print(f"\nTotal successful attacks: {succ_total} / {total_trials * 9}")
print(f"Stats saved to CSV: {csv_path}")

# ─────────── Three Summary Tables ────────────────────────────────────────
success_counts = df.groupby(['true_label', 'target_label'])['success'].sum().unstack().fillna(0).astype(int)
print("\n===== Success Counts Table =====")
print(tabulate(success_counts, headers='keys', tablefmt='fancy_grid'))

query_stats = df.groupby(['true_label', 'target_label'])['queries'].agg(['min', 'max', 'mean']).unstack().round(1)
query_min = query_stats['min'].fillna('-')
query_max = query_stats['max'].fillna('-')
query_mean = query_stats['mean'].fillna('-')

print("\n===== Query Stats Table =====")
print("Min Queries:")
print(tabulate(query_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Queries:")
print(tabulate(query_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Queries:")
print(tabulate(query_mean, headers='keys', tablefmt='fancy_grid'))

mag_stats = df.groupby(['true_label', 'target_label'])['l2_mag'].agg(['min', 'max', 'mean']).unstack()

def format_float_table(table):
    return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')

mag_min = format_float_table(mag_stats['min'])
mag_max = format_float_table(mag_stats['max'])
mag_mean = format_float_table(mag_stats['mean'])

print("\n===== L2 Magnitude Stats Table =====")
print("Min Magnitude:")
print(tabulate(mag_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Magnitude:")
print(tabulate(mag_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Magnitude:")
print(tabulate(mag_mean, headers='keys', tablefmt='fancy_grid'))





===== Targeted Attack Summary Matrix =====
target_label                   0                  1                   2                   3                   4                   5                   6                   7                   8                   9
true_label                                                                                                                                                                                                         
0                              -                  -  19 / 1291.6 / 25.4   6 / 1478.4 / 26.0   7 / 1283.2 / 26.0   4 / 1326.9 / 25.5  23 / 1294.0 / 25.6                   -   4 / 1248.3 / 25.5   6 / 1405.3 / 26.2
1              6 / 1443.6 / 26.8                  -  99 / 1066.2 / 24.9  99 / 1243.9 / 25.8  90 / 1123.6 / 25.5  63 / 1337.5 / 26.0  91 / 1314.9 / 25.8  97 / 1051.6 / 25.1  100 / 828.0 / 24.4  55 / 1209.9 / 26.0
2             15 / 1384.2 / 26.0                  -                   -  41 / 1239.3 / 25.3  18 / 1338.1 / 2

  return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')


### kNN

In [64]:
import os
import warnings
import numpy as np
import joblib
import pandas as pd
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate

# ─────────────── PATHS ────────────────────────────────────────────────────
MODEL_PATH = r"Models and Data splits/model_kNN.pkl"  # changed model path
DATA_PKL   = r"Models and Data splits/Sampled_AllModels_train.pkl"
SUR_DIR    = r"Models and Data splits"
OUT_DIR    = "adversarial_8bit_images/kNN_train"
os.makedirs(OUT_DIR, exist_ok=True)

# ─────────── HYPER-PARAMETERS (0-1 domain) ────────────────────────────────
EPSILON    = 0.1
MAX_ITERS  = 5
BIN_STEPS  = 20
MAX_L2     = 1500

def sigmoid(z):
    z = np.asarray(z, dtype=np.float32)
    pos = z >= 0
    out = np.empty_like(z)
    out[pos] = 1.0 / (1.0 + np.exp(-z[pos]))
    ez = np.exp(z[~pos])
    out[~pos] = ez / (1.0 + ez)
    return out

def softmax(lgt):
    lgt = np.asarray(lgt, dtype=np.float32)
    e = np.exp(lgt - lgt.max())
    return e / e.sum()

warnings.filterwarnings("ignore", category=RuntimeWarning, message="overflow encountered")

# ─────────── DATA & MODEL ────────────────────────────────────────────────
data = joblib.load(DATA_PKL)
X, y, _ = data

if X.max() > 1.0:
    X = X.astype(np.float32) / 255.0
    warnings.warn("Data appeared in [0,255]; normalized to [0,1].")
else:
    warnings.warn("Data is already scaled to [0,1]; proceeding without change.")

# Load kNN model
knn_model = joblib.load(MODEL_PATH)

def to_model(x01: np.ndarray) -> np.ndarray:
    return x01.reshape(1, -1)

# ─────────── surrogate cache ─────────────────────────────────────────────
sur_cache = {}
def load_sur(label):
    if label not in sur_cache:
        s = joblib.load(os.path.join(SUR_DIR, f"surrogate_digit_{label}.pkl"))
        sur_cache[label] = (s.coef_.astype(np.float32), s.intercept_.astype(np.float32))
    return sur_cache[label]

def push_one_uint8(x_float01: np.ndarray, x_clean01: np.ndarray) -> np.ndarray:
    x_pix = x_float01 * 255.0
    x_orig = x_clean01 * 255.0
    xi = np.rint(x_pix).astype(np.int16)
    sign = np.sign(x_pix - x_orig).astype(np.int16)
    changed = sign != 0
    xi[changed] += sign[changed]
    return np.clip(xi, 0, 255).astype(np.uint8).reshape(28, 28)

# ─────────── stats collectors ─────────────────────────────────────────────
total_trials = 0
succ_total = 0
misclassified = 0
records = []

# ───────────────────────── TARGETED ATTACK LOOP ──────────────────────────
for source_digit in range(10):
    idxs = np.where(y == source_digit)[0][:100]

    for rank, idx in enumerate(idxs, 1):
        x0 = X[idx].copy()
        y0 = int(y[idx])

        pred0 = knn_model.predict(to_model(x0))[0]
        if pred0 != y0:
            misclassified += 1
            continue

        total_trials += 1

        for target_digit in range(10):
            if target_digit == y0:
                continue

            query_count = [0]
            def model_query(x_arr: np.ndarray):
                query_count[0] += 1
                return knn_model.predict(x_arr)[0]

            W_src, b_src = load_sur(y0)
            W_tgt, b_tgt = load_sur(target_digit)

            x = x0.copy()
            success = False

            for _ in range(MAX_ITERS):
                flat = x.reshape(-1)

                if W_src.shape[0] == 1:
                    p_src = sigmoid(W_src[0] @ flat + b_src[0])
                    grad_src = W_src[0] * (p_src - 1)
                else:
                    p_src = softmax(W_src @ flat + b_src)
                    oh_src = np.zeros_like(p_src); oh_src[y0] = 1
                    grad_src = W_src.T @ (p_src - oh_src)

                if W_tgt.shape[0] == 1:
                    p_tgt = sigmoid(W_tgt[0] @ flat + b_tgt[0])
                    grad_tgt = W_tgt[0] * p_tgt
                else:
                    p_tgt = softmax(W_tgt @ flat + b_tgt)
                    oh_tgt = np.zeros_like(p_tgt); oh_tgt[target_digit] = 1
                    grad_tgt = W_tgt.T @ (p_tgt - oh_tgt)

                grad = grad_tgt - grad_src
                x = np.clip(x + EPSILON * np.sign(grad.reshape(x.shape)), 0.0, 1.0)

                if model_query(to_model(x)) == target_digit:
                    success = True
                    break

            if not success:
                continue

            d, lo, hi, best = x - x0, 0.0, 1.0, 1.0
            for _ in range(BIN_STEPS):
                mid = (lo + hi) / 2
                xm = np.clip(x0 + mid * d, 0.0, 1.0)
                if model_query(to_model(xm)) == target_digit:
                    best, hi = mid, mid
                else:
                    lo = mid
            x_best = np.clip(x0 + best * d, 0.0, 1.0)

            delta = (x_best - x0).reshape(-1) * 255.0
            l2_raw = np.linalg.norm(delta)
            if l2_raw > MAX_L2:
                scale = MAX_L2 / l2_raw
                x_best = x0 + (x_best - x0) * scale

            x_uint8 = push_one_uint8(x_best.reshape(28,28), x0.reshape(28,28))

            if model_query(to_model(x_uint8 / 255.0)) != target_digit:
                continue

            y_adv = int(model_query(to_model(x_uint8 / 255.0)))
            l2_final = np.linalg.norm(x_uint8.astype(np.float32) - (x0 * 255.0).reshape(28,28))

            succ_total += 1
            fname = f"true{y0}_adv{y_adv}_mag{l2_final:.1f}_sample{rank}.png"
            Image.fromarray(x_uint8, mode="L").save(os.path.join(OUT_DIR, fname))

            records.append({
                'sample_idx': idx,
                'true_label': y0,
                'target_label': target_digit,
                'adv_label': y_adv,
                'success': True,
                'queries': query_count[0],
                'l2_mag': l2_final
            })

# ─────────── Save Results & Display ───────────────────────────────────────
df = pd.DataFrame(records)
csv_path = os.path.join(OUT_DIR, "targeted_attack_stats.csv")
df.to_csv(csv_path, index=False)

pivot_data = df.groupby(['true_label', 'target_label']).agg(
    success_count=('success', 'sum'),
    mean_l2=('l2_mag', 'mean'),
    mean_queries=('queries', 'mean')
).reset_index()

pivot_data['cell'] = pivot_data.apply(
    lambda row: f"{int(row.success_count)} / {row.mean_l2:.1f} / {row.mean_queries:.1f}", axis=1)

matrix = pivot_data.pivot(index="true_label", columns="target_label", values="cell").fillna("-")

print("\n===== Targeted Attack Summary Matrix =====")
print(matrix.to_string())

count_matrix = pivot_data.pivot(index="true_label", columns="target_label", values="success_count").fillna(0)

plt.figure(figsize=(10, 8))
sns.heatmap(count_matrix, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Success Count'})
plt.title("Targeted Attack Success Count")
plt.xlabel("Target Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "summary_success_heatmap.png"))
plt.close()

print(f"\nTotal successful attacks: {succ_total} / {total_trials * 9}")
print(f"Stats saved to CSV: {csv_path}")


# ─────────── Three Summary Tables ────────────────────────────────────────
success_counts = df.groupby(['true_label', 'target_label'])['success'].sum().unstack().fillna(0).astype(int)
print("\n===== Success Counts Table =====")
print(tabulate(success_counts, headers='keys', tablefmt='fancy_grid'))

query_stats = df.groupby(['true_label', 'target_label'])['queries'].agg(['min', 'max', 'mean']).unstack().round(1)
query_min = query_stats['min'].fillna('-')
query_max = query_stats['max'].fillna('-')
query_mean = query_stats['mean'].fillna('-')

print("\n===== Query Stats Table =====")
print("Min Queries:")
print(tabulate(query_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Queries:")
print(tabulate(query_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Queries:")
print(tabulate(query_mean, headers='keys', tablefmt='fancy_grid'))

mag_stats = df.groupby(['true_label', 'target_label'])['l2_mag'].agg(['min', 'max', 'mean']).unstack()

def format_float_table(table):
    return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')

mag_min = format_float_table(mag_stats['min'])
mag_max = format_float_table(mag_stats['max'])
mag_mean = format_float_table(mag_stats['mean'])

print("\n===== L2 Magnitude Stats Table =====")
print("Min Magnitude:")
print(tabulate(mag_min, headers='keys', tablefmt='fancy_grid'))
print("\nMax Magnitude:")
print(tabulate(mag_max, headers='keys', tablefmt='fancy_grid'))
print("\nAvg Magnitude:")
print(tabulate(mag_mean, headers='keys', tablefmt='fancy_grid'))






===== Targeted Attack Summary Matrix =====
target_label                  0                  1                  3                  4                 5                  6                  7                 8                 9
true_label                                                                                                                                                                          
2             2 / 1440.7 / 26.0  1 / 1385.8 / 27.0  1 / 1237.9 / 25.0                  -                 -                  -  2 / 1140.6 / 25.5                 -                 -
3             1 / 1502.5 / 26.0  1 / 1424.2 / 27.0                  -                  -  2 / 638.8 / 24.0                  -   1 / 524.6 / 24.0  1 / 678.4 / 24.0                 -
5             1 / 1428.3 / 26.0                  -  2 / 1462.2 / 26.0                  -                 -  2 / 1300.8 / 25.5                  -                 -                 -
6             1 / 1382.9 / 26.0                  - 

  return table.applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else '-')


In [78]:
success_counts.to_clipboard()

In [80]:
query_min.to_clipboard()

In [82]:
query_max.to_clipboard()

In [84]:
query_mean.to_clipboard()

In [86]:
mag_min.to_clipboard()

In [88]:
mag_max.to_clipboard()

In [90]:
mag_mean.to_clipboard()