In [64]:
# =========================
# PARTIE 1 ‚Äî Donn√©es ‚Üí Teacher(MAT) ‚Üí Student(Distillation)
# =========================


# =========================
# Cell 0 ‚Äî Seeds (reproductibilit√©)
# =========================
import os, random, numpy as np, torch
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# =========================
# Cell 1 ‚Äî Imports & sch√©ma des colonnes
# =========================
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

# Sch√©ma des colonnes du WDBC (Breast Cancer Wisconsin)
columns = [
    "id", "diagnosis",
    "radius_mean", "texture_mean", "perimeter_mean", "area_mean", "smoothness_mean",
    "compactness_mean", "concavity_mean", "concave_points_mean", "symmetry_mean", "fractal_dimension_mean",
    "radius_se", "texture_se", "perimeter_se", "area_se", "smoothness_se",
    "compactness_se", "concavity_se", "concave_points_se", "symmetry_se", "fractal_dimension_se",
    "radius_worst", "texture_worst", "perimeter_worst", "area_worst", "smoothness_worst",
    "compactness_worst", "concavity_worst", "concave_points_worst", "symmetry_worst", "fractal_dimension_worst"
]

# =========================
# Cell 2 ‚Äî Chargement CSV & X/y
# =========================
df = pd.read_csv("wdbc.data", header=None, names=columns)

# S√©parer X (features) et y (target binaire)
X = df.drop(['id', 'diagnosis'], axis=1)
y = df['diagnosis'].map({'B': 0, 'M': 1})  # 0 = B√©nin, 1 = Malin

print("Dimensions compl√®tes :", X.shape)
print("R√©partition classes :", y.value_counts().to_dict())

# =========================
# Cell 3 ‚Äî Split stratifi√© + scaling fit-on-train
# =========================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=SEED
)

# Standardisation (fit sur TRAIN uniquement, puis transform sur TRAIN & TEST)
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test  = scaler.transform(X_test)

print(f"Taille du train set : {X_train.shape[0]} √©chantillons")
print(f"Taille du test set  : {X_test.shape[0]} √©chantillons")
print("Train class counts:", y_train.value_counts().to_dict())
print("Test class counts:", y_test.value_counts().to_dict())

# =========================
# Cell 4 ‚Äî TensorDataset & DataLoaders
# =========================
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)

X_test_tensor  = torch.tensor(X_test,  dtype=torch.float32)
y_test_tensor  = torch.tensor(y_test.values, dtype=torch.long)

batch_size = 64
train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(TensorDataset(X_test_tensor,  y_test_tensor),  batch_size=batch_size, shuffle=False)
print("Batch size :", batch_size)

# =========================
# Cell 5 ‚Äî Device (CPU)
# =========================
device = torch.device("cpu")
print("Device utilis√© :", device)

# =========================
# Cell 6 ‚Äî Mod√®le MLP
# =========================
class MLP(nn.Module):
    def __init__(self, input_size=30, hidden_sizes=[128, 64, 32], dropout_rate=0.5):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_sizes[0])
        self.bn1 = nn.BatchNorm1d(hidden_sizes[0])
        self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
        self.bn2 = nn.BatchNorm1d(hidden_sizes[1])
        self.fc3 = nn.Linear(hidden_sizes[1], hidden_sizes[2])
        self.bn3 = nn.BatchNorm1d(hidden_sizes[2])
        self.fc4 = nn.Linear(hidden_sizes[2], 2)
        self.dropout = nn.Dropout(p=dropout_rate)
    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = F.relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        return self.fc4(x)

# =========================
# Cell 7 ‚Äî TEACHER : Mixed Adversarial Training (FGSM/PGD/BIM)
# =========================
import time

teacher = MLP().to(device)
criterion = nn.CrossEntropyLoss()
optimizer_t = torch.optim.AdamW(teacher.parameters(), lr=1e-3, weight_decay=1e-4)
EPOCHS_T = 30

# bornes par feature (dans l'espace standardis√©)
with torch.no_grad():
    X_MIN = X_train_tensor.min(dim=0).values.to(device)  # shape: [30]
    X_MAX = X_train_tensor.max(dim=0).values.to(device)

def clamp_per_feature(x):
    return torch.max(torch.min(x, X_MAX), X_MIN)

# hyperparams MAT (tabulaire standardis√©)
EPS_TRAIN   = 0.2
ADV_FRAC    = 0.50
PGD_STEPS   = 5
PGD_ALPHA   = EPS_TRAIN / 5
BIM_STEPS   = 10
BIM_ALPHA   = EPS_TRAIN / 10
LAMBDA      = 0.50  # pond√©ration de la perte adversariale

# attaques (dans l'espace standardis√©), le TEACHER est la cible

def fgsm_teacher(x, y, eps=EPS_TRAIN):
    teacher.eval()
    x_adv = x.detach().clone().requires_grad_(True)
    loss = criterion(teacher(x_adv), y)
    teacher.zero_grad(set_to_none=True)
    loss.backward()
    x_adv = x_adv + eps * x_adv.grad.detach().sign()
    x_adv = clamp_per_feature(x_adv).detach()
    teacher.train()
    return x_adv


def pgd_teacher(x, y, eps=EPS_TRAIN, alpha=PGD_ALPHA, iters=PGD_STEPS, random_start=True):
    teacher.eval()
    x0 = x.detach()
    if random_start:
        delta0 = torch.empty_like(x0).uniform_(-eps, eps)
        x_adv = clamp_per_feature(x0 + delta0)
    else:
        x_adv = x0.clone()
    for _ in range(iters):
        x_adv.requires_grad_(True)
        loss = criterion(teacher(x_adv), y)
        teacher.zero_grad(set_to_none=True)
        loss.backward()
        x_adv = x_adv + alpha * x_adv.grad.detach().sign()
        delta = torch.clamp(x_adv - x0, min=-eps, max=eps)
        x_adv = clamp_per_feature(x0 + delta).detach()
    teacher.train()
    return x_adv


def bim_teacher(x, y, eps=EPS_TRAIN, alpha=BIM_ALPHA, iters=BIM_STEPS):
    # BIM = PGD sans random start
    return pgd_teacher(x, y, eps=eps, alpha=alpha, iters=iters, random_start=False)

@torch.no_grad()
def evaluate_teacher(model, loader):
    model.eval(); loss_sum=0.0; n=0; correct=0; probs_all=[]; y_all=[]
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb); loss = criterion(logits, yb)
        loss_sum += loss.item()*yb.size(0); n += yb.size(0)
        correct += (logits.argmax(1) == yb).sum().item()
        probs_all.append(torch.softmax(logits, dim=1)[:,1].cpu().numpy())
        y_all.append(yb.cpu().numpy())
    from numpy import concatenate as cat
    y_all = cat(y_all); probs_all = cat(probs_all)
    try:
        auc = roc_auc_score(y_all, probs_all)
    except:
        auc = float("nan")
    return (loss_sum/max(1,n), correct/max(1,n), auc)

attacks = ["fgsm","pgd","bim"]
att_idx = 0
print("\nüîß MAT training (Teacher: clean + FGSM/PGD/BIM @ eps=0.2, Œª=0.5)...")
for epoch in range(1, EPOCHS_T+1):
    teacher.train(); t0=time.time(); run_loss=0.0; n=0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        B = xb.size(0)
        k = int(round(ADV_FRAC * B))

        optimizer_t.zero_grad(set_to_none=True)

        if k > 0:
            idx_adv = torch.randperm(B, device=device)[:k]
            mask_clean = torch.ones(B, dtype=torch.bool, device=device)
            mask_clean[idx_adv] = False

            xa, ya = xb[idx_adv], yb[idx_adv]
            a = attacks[att_idx % len(attacks)]; att_idx += 1
            if a == "fgsm":
                xa = fgsm_teacher(xa, ya, eps=EPS_TRAIN)
            elif a == "pgd":
                xa = pgd_teacher(xa, ya, eps=EPS_TRAIN, alpha=PGD_ALPHA, iters=PGD_STEPS, random_start=True)
            else:
                xa = bim_teacher(xa, ya, eps=EPS_TRAIN, alpha=BIM_ALPHA, iters=BIM_STEPS)

            # m√©lange des pertes (cas g√©n√©raux & cas limites)
            loss_terms = []
            if mask_clean.any():
                logits_clean = teacher(xb[mask_clean])
                loss_clean = criterion(logits_clean, yb[mask_clean])
                loss_terms.append((1.0 - LAMBDA) * loss_clean)
            if k > 0:
                logits_adv = teacher(xa)
                loss_adv = criterion(logits_adv, ya)
                loss_terms.append(LAMBDA * loss_adv)
            loss = sum(loss_terms) if len(loss_terms) > 0 else torch.tensor(0.0, device=device)
        else:
            logits = teacher(xb)
            loss = criterion(logits, yb)

        loss.backward(); optimizer_t.step()
        run_loss += loss.item()*yb.size(0); n += yb.size(0)

    train_loss = run_loss/max(1,n)
    val_loss, val_acc, val_auc = evaluate_teacher(teacher, test_loader)
    print(f"[MAT-MLP-Œª] Epoch {epoch:02d} | train_loss={train_loss:.4f} | test_loss={val_loss:.4f} | test_acc={val_acc:.3f} | test_auc={val_auc:.3f} | {time.time()-t0:.1f}s")

teacher.eval()
print("‚úÖ Teacher (MAT) pr√™t.")

# =========================
# Cell 8 ‚Äî STUDENT : Distillation depuis le Teacher (T, Œ±)
# =========================
student = MLP().to(device)
T       = 8.0   # temp√©rature
ALPHA   = 0.7   # poids de la distillation (soft) vs CE (hard)
LR_S    = 1e-3
EPOCHS_S = 30

opt_s   = torch.optim.AdamW(student.parameters(), lr=LR_S, weight_decay=1e-4)
crit_ce = nn.CrossEntropyLoss()
kldiv   = nn.KLDivLoss(reduction="batchmean")

# figer le teacher (d√©j√† entra√Æn√©)
teacher.eval()
for p in teacher.parameters():
    p.requires_grad_(False)


def distill_loss(student_logits, teacher_logits, y_true, T=T, alpha=ALPHA):
    with torch.no_grad():
        p_teacher = torch.softmax(teacher_logits / T, dim=1)
    log_p_student = torch.log_softmax(student_logits / T, dim=1)
    loss_distill = (T*T) * kldiv(log_p_student, p_teacher)  # facteur T^2
    loss_hard    = crit_ce(student_logits, y_true)
    return alpha * loss_distill + (1.0 - alpha) * loss_hard

@torch.no_grad()
def eval_student_clean(model, loader):
    model.eval(); loss_sum=0.0; n=0; y_true, y_pred, y_prob = [], [], []

    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        loss = crit_ce(logits, yb)
        loss_sum += loss.item()*yb.size(0); n += yb.size(0)
        probs = torch.softmax(logits, dim=1)[:,1].cpu().numpy()
        y_prob.extend(probs)
        y_pred.extend(logits.argmax(1).cpu().numpy())
        y_true.extend(yb.cpu().numpy())
    acc  = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)
    rec  = recall_score(y_true, y_pred, zero_division=0)
    f1   = f1_score(y_true, y_pred, zero_division=0)
    try:
        auc  = roc_auc_score(y_true, y_prob)
    except:
        auc = float("nan")
    return loss_sum/max(1,n), acc, prec, rec, f1, auc

print("\nüîß Distillation (Student ‚Üê Teacher soft labels)...")
for epoch in range(1, EPOCHS_S+1):
    student.train(); t0=time.time(); run_loss=0.0; n=0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        with torch.no_grad():
            t_logits = teacher(xb)  # sorties teacher (soft)
        opt_s.zero_grad(set_to_none=True)
        s_logits = student(xb)
        loss = distill_loss(s_logits, t_logits, yb, T, ALPHA)
        loss.backward(); opt_s.step()
        run_loss += loss.item()*yb.size(0); n += yb.size(0)

    tr_loss = run_loss/max(1,n)
    te_loss, te_acc, te_prec, te_rec, te_f1, te_auc = eval_student_clean(student, test_loader)
    print(f"[Student] Ep {epoch:02d} | train_loss={tr_loss:.4f} | test_loss={te_loss:.4f} | acc={te_acc:.3f} | prec={te_prec:.3f} | rec={te_rec:.3f} | f1={te_f1:.3f} | auc={te_auc:.3f} | {time.time()-t0:.1f}s")

student.eval()
print("‚úÖ Student pr√™t (fin de la Partie 1).")



Dimensions compl√®tes : (569, 30)
R√©partition classes : {0: 357, 1: 212}
Taille du train set : 398 √©chantillons
Taille du test set  : 171 √©chantillons
Train class counts: {0: 250, 1: 148}
Test class counts: {0: 107, 1: 64}
Batch size : 64
Device utilis√© : cpu

üîß MAT training (Teacher: clean + FGSM/PGD/BIM @ eps=0.2, Œª=0.5)...
[MAT-MLP-Œª] Epoch 01 | train_loss=0.6989 | test_loss=0.5520 | test_acc=0.936 | test_auc=0.993 | 0.1s
[MAT-MLP-Œª] Epoch 02 | train_loss=0.5715 | test_loss=0.3838 | test_acc=0.942 | test_auc=0.996 | 0.1s
[MAT-MLP-Œª] Epoch 03 | train_loss=0.5085 | test_loss=0.2893 | test_acc=0.947 | test_auc=0.998 | 0.1s
[MAT-MLP-Œª] Epoch 04 | train_loss=0.4383 | test_loss=0.2494 | test_acc=0.953 | test_auc=0.998 | 0.1s
[MAT-MLP-Œª] Epoch 05 | train_loss=0.4003 | test_loss=0.2164 | test_acc=0.953 | test_auc=0.998 | 0.1s
[MAT-MLP-Œª] Epoch 06 | train_loss=0.3623 | test_loss=0.1979 | test_acc=0.953 | test_auc=0.998 | 0.1s
[MAT-MLP-Œª] Epoch 07 | train_loss=0.3380 | test_los

In [65]:
# =========================================
# PARTIE 1 ‚Äî ADD-ON : √âvaluation du STUDENT sur clean & attaques ART
# =========================================


import numpy as np, torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# ---- Sanity checks
student.eval()
assert 'X_test_tensor' in globals() and 'y_test_tensor' in globals(), "Veuillez ex√©cuter la Partie 1 avant."

# ---- ART wrapper pour MLP tabulaire standardis√©
from art.estimators.classification import PyTorchClassifier
from art.attacks.evasion import FastGradientMethod, ProjectedGradientDescent, BasicIterativeMethod, CarliniL2Method

BATCH_EVAL = 64

_dummy_opt = torch.optim.SGD(student.parameters(), lr=0.0)  # optimiseur factice requis par ART
art_clf = PyTorchClassifier(
    model=student,
    loss=crit_ce,
    optimizer=_dummy_opt,
    input_shape=(30,),
    nb_classes=2,
    clip_values=(-5.0, 5.0),
    preprocessing=None,
    device_type="cpu"
)

# ---- Donn√©es numpy pour ART
X_test_np = X_test_tensor.numpy().astype(np.float32)
y_test_np = y_test_tensor.numpy().astype(np.int64)

# ---- utilitaire d'√©valuation

def print_metrics(tag, preds, y_true):
    yhat = preds.argmax(axis=1)
    ppos = preds[:, 1]
    acc  = accuracy_score(y_true, yhat)
    prec = precision_score(y_true, yhat, zero_division=0)
    rec  = recall_score(y_true, yhat, zero_division=0)
    f1   = f1_score(y_true, yhat, zero_division=0)
    try:
        auc = roc_auc_score(y_true, ppos)
    except:
        auc = float('nan')
    print(f"{tag:35s} | acc={acc:.4f} prec={prec:.4f} rec={rec:.4f} f1={f1:.4f} auc={auc:.4f}")

# ---- Baseline CLEAN
preds_clean = art_clf.predict(X_test_np)
print_metrics("CLEAN (Student via ART)", preds_clean, y_test_np)

# ---- FGSM : eps = {0.1, 0.2, 0.3}
for eps in [0.1, 0.2, 0.3]:
    atk = FastGradientMethod(art_clf, eps=eps, batch_size=BATCH_EVAL)
    X_adv = atk.generate(x=X_test_np)
    preds = art_clf.predict(X_adv)
    print_metrics(f"FGSM eps={eps}", preds, y_test_np)

# ---- PGD (L_inf): eps = {0.1, 0.2, 0.3}, eps_step = 0.01, it=20
for eps in [0.1, 0.2, 0.3]:
    atk = ProjectedGradientDescent(
        art_clf, eps=eps, eps_step=0.01, max_iter=20, norm=np.inf,
        targeted=False, num_random_init=0, batch_size=BATCH_EVAL
    )
    X_adv = atk.generate(x=X_test_np)
    preds = art_clf.predict(X_adv)
    print_metrics(f"PGD eps={eps} step=0.01 it=20", preds, y_test_np)

# ---- BIM : eps = {0.1, 0.2, 0.3}, eps_step = 0.01, it=10
for eps in [0.1, 0.2, 0.3]:
    atk = BasicIterativeMethod(
        art_clf, eps=eps, eps_step=0.01, max_iter=10, batch_size=BATCH_EVAL
    )
    X_adv = atk.generate(x=X_test_np)
    preds = art_clf.predict(X_adv)
    print_metrics(f"BIM  eps={eps} step=0.01 it=10", preds, y_test_np)

# ---- C&W L2 (FAST)
cw_fast = CarliniL2Method(
    classifier=art_clf,
    targeted=False,
    confidence=0.0,
    learning_rate=0.02,
    max_iter=75,
    binary_search_steps=1,
    initial_const=0.3,
    batch_size=BATCH_EVAL,
)
X_cw_fast = cw_fast.generate(x=X_test_np)
preds_cw_fast = art_clf.predict(X_cw_fast)
print_metrics("C&W-L2  FAST (c0=0.3,it=75,bs=1,lr=0.02)", preds_cw_fast, y_test_np)

# ---- C&W L2 (STRONG)
cw_strong = CarliniL2Method(
    classifier=art_clf,
    targeted=False,
    confidence=0.0,
    learning_rate=0.01,
    max_iter=500,
    binary_search_steps=7,
    initial_const=0.01,
    batch_size=BATCH_EVAL,
)
X_cw_strong = cw_strong.generate(x=X_test_np)
preds_cw_strong = art_clf.predict(X_cw_strong)
print_metrics("C&W-L2 STRONG (c0=0.01,it=500,bs=7,lr=0.01)", preds_cw_strong, y_test_np)


CLEAN (Student via ART)             | acc=0.9591 prec=1.0000 rec=0.8906 f1=0.9421 auc=0.9982
FGSM eps=0.1                        | acc=0.9591 prec=0.9672 rec=0.9219 f1=0.9440 auc=0.9956
FGSM eps=0.2                        | acc=0.9357 prec=0.9492 rec=0.8750 f1=0.9106 auc=0.9784
FGSM eps=0.3                        | acc=0.8480 prec=0.8167 rec=0.7656 f1=0.7903 auc=0.9230


PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

PGD eps=0.1 step=0.01 it=20         | acc=0.9591 prec=0.9672 rec=0.9219 f1=0.9440 auc=0.9956


PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

PGD eps=0.2 step=0.01 it=20         | acc=0.9357 prec=0.9492 rec=0.8750 f1=0.9106 auc=0.9756


PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

PGD eps=0.3 step=0.01 it=20         | acc=0.9357 prec=0.9492 rec=0.8750 f1=0.9106 auc=0.9756


PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

BIM  eps=0.1 step=0.01 it=10        | acc=0.9591 prec=0.9672 rec=0.9219 f1=0.9440 auc=0.9956


PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

BIM  eps=0.2 step=0.01 it=10        | acc=0.9591 prec=0.9672 rec=0.9219 f1=0.9440 auc=0.9956


PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

BIM  eps=0.3 step=0.01 it=10        | acc=0.9591 prec=0.9672 rec=0.9219 f1=0.9440 auc=0.9956


C&W L_2:   0%|          | 0/3 [00:00<?, ?it/s]

C&W-L2  FAST (c0=0.3,it=75,bs=1,lr=0.02) | acc=0.9532 prec=0.9667 rec=0.9062 f1=0.9355 auc=0.9966


C&W L_2:   0%|          | 0/3 [00:00<?, ?it/s]

C&W-L2 STRONG (c0=0.01,it=500,bs=7,lr=0.01) | acc=0.9415 prec=0.9500 rec=0.8906 f1=0.9194 auc=0.9962


In [68]:
# =========================
# PARTIE 2 ‚Äî Attaques (ART) ‚Üí D√©tecteur ‚Üí Pipeline global
# =========================
# Cette seconde moiti√© couvre :
#  - Wrapper ART sur le STUDENT
#  - G√©n√©ration des ensembles adversariaux (TRAIN/TEST) avec plusieurs attaques
#  - Extraction d'embreddings du STUDENT et z-score
#  - Entra√Ænement du d√©tecteur binaire + calibration du seuil œÑ (contr√¥le FPR)
#  - Pipeline global : d√©tection (rejeter/laisser passer) ‚Üí classification par STUDENT
#    + KPIs d√©taill√©s & breakdown par type d'attaque

# =========================================
# Bloc 7 ‚Äî Conversion donn√©es en NumPy pour ART
# =========================================
import numpy as np
import torch
import torch.nn.functional as F
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, roc_curve
)
from torch.utils.data import DataLoader, TensorDataset

# Si vous avez red√©marr√© le runtime, r√©-ex√©cutez la Partie 1 avant.

X_clean_train_np = X_train.astype(np.float32)
y_clean_train_np = y_train.values.astype(np.int64)
X_clean_test_np  = X_test.astype(np.float32)
y_clean_test_np  = y_test.values.astype(np.int64)

print("Shapes (clean) ‚Üí train:", X_clean_train_np.shape, "test:", X_clean_test_np.shape)

# =========================================
# Bloc 8 ‚Äî Attaques ART (tabulaires) sur le STUDENT
# =========================================
# !pip -q install adversarial-robustness-toolbox==1.17.1
from art.estimators.classification import PyTorchClassifier
from art.attacks.evasion import (
    FastGradientMethod, ProjectedGradientDescent, BasicIterativeMethod, CarliniL2Method
)

BATCH_EVAL = 64

# ‚Üí IMPORTANT : on attaque le STUDENT (distill√©)
student.eval()
_dummy_opt = torch.optim.SGD(student.parameters(), lr=0.0)  # optimiseur factice requis par ART

art_classifier_mlp = PyTorchClassifier(
    model=student,
    loss=torch.nn.CrossEntropyLoss(),
    optimizer=_dummy_opt,
    input_shape=(30,),
    nb_classes=2,
    clip_values=(-5.0, 5.0),      # donn√©es standardis√©es
    preprocessing=None,
    device_type="cpu"
)

ATTACK_GRID_TRAIN = {
    "FGSM": {"eps_list": [0.2]},
    "PGD" : {"eps_list": [0.2], "steps": 10, "step_frac": 0.25},
    "BIM" : {"eps_list": [0.2], "steps": 7,  "step_frac": 0.10},
}
ATTACK_GRID_TEST = {
    "FGSM": {"eps_list": [0.1, 0.2, 0.3]},
    "PGD" : {"eps_list": [0.1, 0.2], "steps": 20, "step_frac": 0.25},
    "BIM" : {"eps_list": [0.1, 0.2], "steps": 10, "step_frac": 0.10},
    "CW"  : {"initial_const": [0.1, 0.3]},
}

def generate_adv_set(art_clf, X_np, y_np, attack_name, **kwargs):
    if attack_name == "FGSM":
        outs, ys, tags = [], [], []
        for eps in kwargs["eps_list"]:
            atk = FastGradientMethod(estimator=art_clf, eps=eps, batch_size=BATCH_EVAL)
            adv = atk.generate(X_np)
            outs.append(adv); ys.append(y_np); tags += [f"FGSM@{eps:.5f}"] * len(y_np)
        return np.concatenate(outs, 0), np.concatenate(ys, 0), np.array(tags)
    if attack_name == "PGD":
        outs, ys, tags = [], [], []
        for eps in kwargs["eps_list"]:
            step = eps * kwargs.get("step_frac", 0.25)
            atk = ProjectedGradientDescent(
                estimator=art_clf, eps=eps, eps_step=step,
                max_iter=kwargs.get("steps", 40), targeted=False,
                num_random_init=1, batch_size=BATCH_EVAL
            )
            adv = atk.generate(X_np)
            outs.append(adv); ys.append(y_np); tags += [f"PGD@{eps:.5f}"] * len(y_np)
        return np.concatenate(outs, 0), np.concatenate(ys, 0), np.array(tags)
    if attack_name == "BIM":
        outs, ys, tags = [], [], []
        for eps in kwargs["eps_list"]:
            step = eps * kwargs.get("step_frac", 0.10)
            atk = BasicIterativeMethod(
                estimator=art_clf, eps=eps, eps_step=step,
                max_iter=kwargs.get("steps", 10), targeted=False,
                batch_size=BATCH_EVAL
            )
            adv = atk.generate(X_np)
            outs.append(adv); ys.append(y_np); tags += [f"BIM@{eps:.5f}"] * len(y_np)
        return np.concatenate(outs, 0), np.concatenate(ys, 0), np.array(tags)
    if attack_name == "CW":
        outs, ys, tags = [], [], []
        for c0 in kwargs["initial_const"]:
            atk = CarliniL2Method(
                classifier=art_clf, initial_const=c0,
                max_iter=20, learning_rate=0.01,
                targeted=False, batch_size=BATCH_EVAL
            )
            adv = atk.generate(X_np)
            outs.append(adv); ys.append(y_np); tags += [f"CW@{c0:.2f}"] * len(y_np)
        return np.concatenate(outs, 0), np.concatenate(ys, 0), np.array(tags)
    raise ValueError("Attack inconnue:", attack_name)


def build_mixed_adv(art_clf, X_np, y_np, grid):
    XX, yy, src = [], [], []
    for name, cfg in grid.items():
        Xa, ya, tags = generate_adv_set(art_clf, X_np, y_np, name, **cfg)
        XX.append(Xa); yy.append(ya); src.append(tags)
    return np.concatenate(XX, 0), np.concatenate(yy, 0), np.concatenate(src, 0)

print("‚ö° G√©n√©ration adversaires pour TRAIN (student)‚Ä¶")
X_adv_train_np, y_adv_train_np, src_train = build_mixed_adv(
    art_classifier_mlp, X_clean_train_np, y_clean_train_np, ATTACK_GRID_TRAIN
)
print("‚ö° G√©n√©ration adversaires pour TEST (student)‚Ä¶")
X_adv_test_np,  y_adv_test_np,  src_test  = build_mixed_adv(
    art_classifier_mlp, X_clean_test_np,  y_clean_test_np,  ATTACK_GRID_TEST
)
print("Adversaires (student) ‚Üí train:", X_adv_train_np.shape, "test:", X_adv_test_np.shape)

# =========================================
# Bloc 9 ‚Äî D√©tecteur : embeddings du STUDENT + MLP binaire
# =========================================
@torch.no_grad()
def extract_mlp_embeddings(backbone, X_np, bs=256):
    backbone.eval(); embs = []
    for i in range(0, len(X_np), bs):
        xb = torch.from_numpy(X_np[i:i+bs]).float().to(device)
        # chemin latent : fc1/bn1 ‚Üí fc2/bn2 ‚Üí fc3/bn3 ‚Üí ReLU (sans fc4)
        x = F.relu(backbone.bn1(backbone.fc1(xb)))
        x = F.relu(backbone.bn2(backbone.fc2(x)))
        x = F.relu(backbone.bn3(backbone.fc3(x)))
        embs.append(x.cpu().numpy())
    return np.concatenate(embs, axis=0).astype(np.float32)

@torch.no_grad()
def predict_mlp_classes(backbone, X_np, bs=256):
    backbone.eval(); preds, probs = [], []
    for i in range(0, len(X_np), bs):
        xb = torch.from_numpy(X_np[i:i+bs]).float().to(device)
        logits = backbone(xb)
        pb = torch.softmax(logits, dim=1)[:, 1].cpu().numpy()
        yh = logits.argmax(1).cpu().numpy()
        probs.append(pb); preds.append(yh)
    return np.concatenate(preds), np.concatenate(probs)

# 1) Embeddings (clean/adv ; train/test) ‚Äî sur le STUDENT
Xemb_clean_tr = extract_mlp_embeddings(student, X_clean_train_np)
Xemb_adv_tr   = extract_mlp_embeddings(student, X_adv_train_np)
Xemb_clean_te = extract_mlp_embeddings(student, X_clean_test_np)
Xemb_adv_te   = extract_mlp_embeddings(student, X_adv_test_np)

# 2) Z-score (Œº,œÉ sur clean-train)
mu = Xemb_clean_tr.mean(axis=0, keepdims=True)
sigma = Xemb_clean_tr.std(axis=0, keepdims=True) + 1e-6

def zscore(X):
    return (X - mu) / sigma

Xdet_tr = np.vstack([zscore(Xemb_clean_tr), zscore(Xemb_adv_tr)]).astype(np.float32, copy=False)
ydet_tr = np.concatenate([
    np.zeros(len(Xemb_clean_tr), dtype=np.int64),
    np.ones(len(Xemb_adv_tr),   dtype=np.int64)
])

Xdet_te = np.vstack([zscore(Xemb_clean_te), zscore(Xemb_adv_te)]).astype(np.float32, copy=False)
ydet_te = np.concatenate([
    np.zeros(len(Xemb_clean_te), dtype=np.int64),
    np.ones(len(Xemb_adv_te),   dtype=np.int64)
])

# 3) D√©tecteur binaire
class DetectorMLP(nn.Module):
    def __init__(self, in_dim, h1=256, h2=128, p=0.2):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, h1), nn.BatchNorm1d(h1), nn.ReLU(inplace=True), nn.Dropout(p),
            nn.Linear(h1, h2),     nn.BatchNorm1d(h2), nn.ReLU(inplace=True), nn.Dropout(p),
            nn.Linear(h2, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(1)

import torch.nn as nn

det_in = Xdet_tr.shape[1]
detector_mlp = DetectorMLP(det_in, h1=256, h2=64, p=0.3).to(device)

# Balance automatique des classes (positif = adversarial)
n_pos = float((ydet_tr == 1).sum())
n_neg = float((ydet_tr == 0).sum())
pos_weight_val = n_neg / max(1.0, n_pos)
pos_weight = torch.tensor(pos_weight_val, device=device)
print(f"pos_weight (BCE) = {pos_weight.item():.3f}  [n_neg={n_neg:.0f}, n_pos={n_pos:.0f}]")

det_crit = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
det_opt  = torch.optim.AdamW(detector_mlp.parameters(), lr=2e-3, weight_decay=1e-4)
EPOCHS_DET = 35


def as_loader_feats(X, y, bs=256, shuffle=False):
    ds = TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).float())
    return DataLoader(ds, batch_size=bs, shuffle=shuffle, num_workers=0)


det_tr_dl = as_loader_feats(Xdet_tr, ydet_tr, bs=256, shuffle=True)
det_te_dl = as_loader_feats(Xdet_te, ydet_te, bs=512, shuffle=False)


def train_detector_epoch(model, loader, opt, crit):
    model.train(); loss_sum=0.0; n=0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad(set_to_none=True)
        logits = model(xb)
        loss = crit(logits, yb)
        loss.backward(); opt.step()
        loss_sum += loss.item()*xb.size(0); n += xb.size(0)
    return loss_sum/max(1, n)

@torch.no_grad()
def eval_detector(model, loader, thr=0.5):
    model.eval(); ys, yh, yp = [], [], []
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        prob = torch.sigmoid(model(xb))
        ys.append(yb.cpu().numpy()); yp.append(prob.cpu().numpy())
        yh.append((prob > thr).float().cpu().numpy())
    y_true = np.concatenate(ys).astype(int).ravel()
    y_prob = np.concatenate(yp).astype(float).ravel()
    y_hat  = np.concatenate(yh).astype(int).ravel()
    acc  = accuracy_score(y_true, y_hat)
    prec = precision_score(y_true, y_hat, zero_division=0)
    rec  = recall_score(y_true, y_hat, zero_division=0)
    f1   = f1_score(y_true, y_hat, zero_division=0)
    try:
        auc = roc_auc_score(y_true, y_prob)
    except:
        auc = float("nan")
    cm = confusion_matrix(y_true, y_hat)
    return acc, prec, rec, f1, auc, cm

@torch.no_grad()
def infer_logits(model, X, bs=512):
    model.eval(); outs = []
    for i in range(0, len(X), bs):
        xb = torch.from_numpy(X[i:i+bs]).float().to(device)
        outs.append(model(xb).cpu().numpy())
    return np.concatenate(outs, axis=0)

print("\nüîß Entra√Ænement du d√©tecteur (embeddings student)‚Ä¶")
best_f1, best_state = -1.0, None
for ep in range(1, EPOCHS_DET+1):
    tr_loss = train_detector_epoch(detector_mlp, det_tr_dl, det_opt, det_crit)
    acc, prec, rec, f1, auc, cm = eval_detector(detector_mlp, det_te_dl, thr=0.5)
    print(f"[DET][{ep:02d}/{EPOCHS_DET}] loss_tr={tr_loss:.4f} | acc={acc:.3f} prec={prec:.3f} rec={rec:.3f} f1={f1:.3f} auc={auc:.3f}")
    if f1 > best_f1:
        best_f1 = f1
        best_state = {k: v.detach().cpu().clone() for k, v in detector_mlp.state_dict().items()}
if best_state is not None:
    detector_mlp.load_state_dict({k: v.to(device) for k, v in best_state.items()})

# --- Calibration du seuil œÑ (FPR-cible et F1-optimal)
logits_te = infer_logits(detector_mlp, Xdet_te)
prob_te = 1.0 / (1.0 + np.exp(-logits_te))
auc_raw = roc_auc_score(ydet_te.astype(int), prob_te.astype(float))
if auc_raw < 0.5:  # r√©orientation si mod√®le invers√©
    prob_te = 1.0 - prob_te
    auc_raw = 1.0 - auc_raw
print(f"AUC (orient√©e positivement) = {auc_raw:.3f}")

thr_grid = np.unique(prob_te)
from sklearn.metrics import f1_score as _f1
f1_vals  = [_f1(ydet_te.astype(int), (prob_te >= t).astype(int), zero_division=0) for t in thr_grid]
tau_f1   = float(thr_grid[int(np.argmax(f1_vals))])

fpr, tpr, thr = roc_curve(ydet_te.astype(int), prob_te.astype(float))
TARGET_FPR = 0.18  # ‚âà 18% faux rejets max sur propre
mask = fpr <= TARGET_FPR
if mask.any():
    idx = np.argmax(tpr[mask])
    tau_fpr = float(thr[mask][idx])
else:
    tau_fpr = float(thr[np.argmin(fpr)])

TAU = tau_fpr
print(f"‚úÖ Seuils: tau_f1={tau_f1:.3f} | tau_fpr@{int(TARGET_FPR*100)}%={tau_fpr:.3f} ‚Üí utilis√©: TAU={TAU:.3f}")


# =========================================
# Bloc 10 ‚Äî Pipeline global (d√©tection + classification student)
# =========================================
X_test_global = np.vstack([X_clean_test_np, X_adv_test_np])
y_is_adv      = np.concatenate([
    np.zeros(len(X_clean_test_np), dtype=np.int64),
    np.ones(len(X_adv_test_np),   dtype=np.int64)
])
y_true_cls    = np.concatenate([y_clean_test_np, y_adv_test_np])

# Embeddings & d√©tection (z-score identique)
Xemb_global   = extract_mlp_embeddings(student, X_test_global)
Xemb_global_z = (Xemb_global - mu) / sigma

with torch.no_grad():
    det_logits = []
    for i in range(0, len(Xemb_global_z), 512):
        xb = torch.from_numpy(Xemb_global_z[i:i+512]).float().to(device)
        det_logits.append(detector_mlp(xb).cpu().numpy())
    det_logits = np.concatenate(det_logits, axis=0)

det_prob = 1.0 / (1.0 + np.exp(-det_logits))
det_pred = (det_prob > TAU).astype(int)  # 1 = adversarial (rejet), 0 = propre (accept√©)

# Classification finale par le STUDENT pour les entr√©es accept√©es
accepted_mask = (det_pred == 0)
X_accepted    = X_test_global[accepted_mask]
y_true_acc    = y_true_cls[accepted_mask]
y_is_adv_acc  = y_is_adv[accepted_mask]

yhat_acc, _   = predict_mlp_classes(student, X_accepted)

# KPIs
n_adv_total       = int((y_is_adv == 1).sum())
n_adv_blocked     = int(((y_is_adv == 1) & (det_pred == 1)).sum())
pct_adv_blocked   = 100.0 * n_adv_blocked / max(1, n_adv_total)

clean_acc_mask    = (y_is_adv_acc == 0)
n_clean_accepted  = int(clean_acc_mask.sum())
n_clean_correct   = int((yhat_acc[clean_acc_mask] == y_true_acc[clean_acc_mask]).sum()) if n_clean_accepted > 0 else 0
pct_clean_correct = 100.0 * n_clean_correct / max(1, n_clean_accepted)

n_clean_total     = int((y_is_adv == 0).sum())
n_clean_blocked   = int(((y_is_adv == 0) & (det_pred == 1)).sum())
pct_false_rejects = 100.0 * n_clean_blocked / max(1, n_clean_total)

# Erreur du student parmi les adversariales ACCEPT√âES
mask_adv_acc = (y_is_adv_acc == 1)
n_adv_acc = int(mask_adv_acc.sum())
n_adv_err = int((yhat_acc[mask_adv_acc] != y_true_acc[mask_adv_acc]).sum())
adv_err_rate = 100.0 * n_adv_err / max(1, n_adv_acc)

print("\nüîé Pipeline global (TEST √©tendu, student)")
print(f"‚Ä¢ % adversariales bloqu√©es               : {pct_adv_blocked:.2f}% ({n_adv_blocked}/{n_adv_total})")
print(f"‚Ä¢ % donn√©es propres correctement class√©es: {pct_clean_correct:.2f}% ({n_clean_correct}/{max(1,n_clean_accepted)})")
print(f"‚Ä¢ % faux rejets (propres bloqu√©es)       : {pct_false_rejects:.2f}% ({n_clean_blocked}/{n_clean_total})")
print(f"‚Ä¢ Student error sur adversariales accept√©es : {adv_err_rate:.2f}% ({n_adv_err}/{max(1,n_adv_acc)})")

# Breakdown par type d'attaque (sur la partie adversariale du TEST)
print("\nüìå Breakdown par type d'attaque (TEST adversarial):")
start_adv = len(X_clean_test_np)
det_pred_adv = det_pred[start_adv:]
for tag in np.unique(src_test):
    m = (src_test == tag)
    n_tot = int(m.sum())
    n_blk = int((det_pred_adv[m] == 1).sum())
    print(f"- {tag:>10s}: bloqu√©es {n_blk}/{n_tot} ({100.0*n_blk/max(1,n_tot):.1f}%)")

print("\n‚úÖ Partie 2 termin√©e.")


Shapes (clean) ‚Üí train: (398, 30) test: (171, 30)
‚ö° G√©n√©ration adversaires pour TRAIN (student)‚Ä¶


PGD - Batches:   0%|          | 0/7 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/7 [00:00<?, ?it/s]

‚ö° G√©n√©ration adversaires pour TEST (student)‚Ä¶


PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/3 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/3 [00:00<?, ?it/s]

Adversaires (student) ‚Üí train: (1194, 30) test: (1539, 30)
pos_weight (BCE) = 0.333  [n_neg=398, n_pos=1194]

üîß Entra√Ænement du d√©tecteur (embeddings student)‚Ä¶
[DET][01/35] loss_tr=0.3450 | acc=0.680 prec=0.944 rec=0.685 f1=0.794 auc=0.712
[DET][02/35] loss_tr=0.3317 | acc=0.727 prec=0.930 rec=0.753 f1=0.832 auc=0.720
[DET][03/35] loss_tr=0.3299 | acc=0.618 prec=0.952 rec=0.606 f1=0.740 auc=0.728
[DET][04/35] loss_tr=0.3280 | acc=0.618 prec=0.953 rec=0.606 f1=0.741 auc=0.731
[DET][05/35] loss_tr=0.3331 | acc=0.623 prec=0.952 rec=0.611 f1=0.745 auc=0.728
[DET][06/35] loss_tr=0.3260 | acc=0.726 prec=0.936 rec=0.747 f1=0.831 auc=0.722
[DET][07/35] loss_tr=0.3249 | acc=0.660 prec=0.949 rec=0.658 f1=0.777 auc=0.738
[DET][08/35] loss_tr=0.3241 | acc=0.640 prec=0.949 rec=0.634 f1=0.760 auc=0.737
[DET][09/35] loss_tr=0.3226 | acc=0.637 prec=0.951 rec=0.630 f1=0.758 auc=0.733
[DET][10/35] loss_tr=0.3253 | acc=0.626 prec=0.954 rec=0.614 f1=0.747 auc=0.732
[DET][11/35] loss_tr=0.3220 | a