# Seeds

In [53]:
# Cell 0 — Seeds (minimal)
import os, random, numpy as np, torch
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)


<torch._C.Generator at 0x786a3d74fd70>

# chargement et pretraitement des donnees

Imports & schéma de colonnes

In [54]:
# =========================
# Cell 1 — Imports & colonnes
# =========================
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import TensorDataset, DataLoader

# Schéma des colonnes du WDBC (Breast Cancer Wisconsin)
columns = [
    "id", "diagnosis",
    "radius_mean", "texture_mean", "perimeter_mean", "area_mean", "smoothness_mean",
    "compactness_mean", "concavity_mean", "concave_points_mean", "symmetry_mean", "fractal_dimension_mean",
    "radius_se", "texture_se", "perimeter_se", "area_se", "smoothness_se",
    "compactness_se", "concavity_se", "concave_points_se", "symmetry_se", "fractal_dimension_se",
    "radius_worst", "texture_worst", "perimeter_worst", "area_worst", "smoothness_worst",
    "compactness_worst", "concavity_worst", "concave_points_worst", "symmetry_worst", "fractal_dimension_worst"
]


Chargement + préparation X/y

In [55]:
# =========================
# Cell 2 — Chargement CSV & X/y
# =========================
df = pd.read_csv("wdbc.data", header=None, names=columns)

# Séparer X (features) et y (target binaire)
X = df.drop(['id', 'diagnosis'], axis=1)
y = df['diagnosis'].map({'B': 0, 'M': 1})  # 0 = Bénin, 1 = Malin

print("Dimensions complètes :", X.shape)
print("Répartition classes :", y.value_counts().to_dict())


Dimensions complètes : (569, 30)
Répartition classes : {0: 357, 1: 212}


Split STRATIFIÉ + Scaling fit-on-train

In [56]:
# =========================
# Cell 3 — Split stratifié + scaling fit-on-train
# =========================
#  split AVANT le fit du scaler pour éviter la fuite d'information
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

# Standardisation (fit sur TRAIN uniquement, puis transform sur TRAIN & TEST)
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test  = scaler.transform(X_test)

print(f"Taille du train set : {X_train.shape[0]} échantillons")
print(f"Taille du test set  : {X_test.shape[0]} échantillons")


Taille du train set : 398 échantillons
Taille du test set  : 171 échantillons


TensorDataset & DataLoaders

In [57]:
# =========================
# Cell 4 — TensorDataset & DataLoaders
# =========================
# Conversion en tenseurs PyTorch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)

X_test_tensor  = torch.tensor(X_test,  dtype=torch.float32)
y_test_tensor  = torch.tensor(y_test.values, dtype=torch.long)

# Datasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset  = TensorDataset(X_test_tensor,  y_test_tensor)

# DataLoaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False)

print("Batch size :", batch_size)


Batch size : 64


Device (CPU)

In [58]:
# =========================
# Cell 5 — Device (CPU fixé)
# =========================
import torch
device = torch.device("cpu")
print("Device utilisé :", device)


Device utilisé : cpu


# definition de model MLP

In [59]:
import torch.nn as nn
import torch.nn.functional as F

class MLP(nn.Module):
    def __init__(self, input_size=30, hidden_sizes=[128, 64, 32], dropout_rate=0.5):
        super(MLP, self).__init__()

        self.fc1 = nn.Linear(input_size, hidden_sizes[0])
        self.bn1 = nn.BatchNorm1d(hidden_sizes[0])

        self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
        self.bn2 = nn.BatchNorm1d(hidden_sizes[1])

        self.fc3 = nn.Linear(hidden_sizes[1], hidden_sizes[2])
        self.bn3 = nn.BatchNorm1d(hidden_sizes[2])

        self.fc4 = nn.Linear(hidden_sizes[2], 2)

        self.dropout = nn.Dropout(p=dropout_rate)

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)

        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)

        x = F.relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)

        return self.fc4(x)



# entrainement de model

In [60]:
!pip install adversarial-robustness-toolbox



In [61]:
# =========================
# Bloc A — Entraînement TEACHER (CE standard, CPU)
# =========================
import time, copy, torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

teacher = MLP().to(device)            # même archi que le student
crit_ce = nn.CrossEntropyLoss()
opt_t   = torch.optim.AdamW(teacher.parameters(), lr=1e-3, weight_decay=1e-4)

EPOCHS_T, PATIENCE_T = 30, 5
best_val_t, no_impr_t = float("inf"), 0
best_state_t = copy.deepcopy(teacher.state_dict())

def evaluate_loss_acc(model, loader):
    model.eval(); loss_sum=0.0; n=0; correct=0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            loss = crit_ce(logits, yb)
            loss_sum += loss.item()*yb.size(0); n += yb.size(0)
            correct  += (logits.argmax(1) == yb).sum().item()
    return loss_sum/max(1,n), correct/max(1,n)

for epoch in range(1, EPOCHS_T+1):
    teacher.train(); t0=time.time(); run_loss=0.0; n=0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        opt_t.zero_grad(set_to_none=True)
        logits = teacher(xb)
        loss = crit_ce(logits, yb)
        loss.backward(); opt_t.step()
        run_loss += loss.item()*yb.size(0); n += yb.size(0)

    tr_loss = run_loss/max(1,n)
    # Pas de val_loader dédié → on suit le test_loader pour l’early stop (pragmatique)
    val_loss, val_acc = evaluate_loss_acc(teacher, test_loader)
    print(f"[Teacher] Ep {epoch:02d} | train_loss={tr_loss:.4f} | val_loss={val_loss:.4f} | "
          f"val_acc={val_acc:.3f} | {time.time()-t0:.1f}s")

    if val_loss < best_val_t - 1e-4:
        best_val_t = val_loss; no_impr_t = 0
        best_state_t = copy.deepcopy(teacher.state_dict())
    else:
        no_impr_t += 1
        if no_impr_t >= PATIENCE_T:
            print("Early stopping (teacher)."); break

teacher.load_state_dict(best_state_t)
teacher.eval()
print("✅ Teacher prêt.")


[Teacher] Ep 01 | train_loss=0.6982 | val_loss=0.6282 | val_acc=0.924 | 0.0s
[Teacher] Ep 02 | train_loss=0.5740 | val_loss=0.5238 | val_acc=0.942 | 0.0s
[Teacher] Ep 03 | train_loss=0.4910 | val_loss=0.4292 | val_acc=0.947 | 0.0s
[Teacher] Ep 04 | train_loss=0.4341 | val_loss=0.3728 | val_acc=0.942 | 0.0s
[Teacher] Ep 05 | train_loss=0.3863 | val_loss=0.3134 | val_acc=0.942 | 0.1s
[Teacher] Ep 06 | train_loss=0.3451 | val_loss=0.2736 | val_acc=0.953 | 0.1s
[Teacher] Ep 07 | train_loss=0.3321 | val_loss=0.2621 | val_acc=0.936 | 0.0s
[Teacher] Ep 08 | train_loss=0.2906 | val_loss=0.2352 | val_acc=0.936 | 0.1s
[Teacher] Ep 09 | train_loss=0.2632 | val_loss=0.2052 | val_acc=0.953 | 0.0s
[Teacher] Ep 10 | train_loss=0.2396 | val_loss=0.1900 | val_acc=0.947 | 0.0s
[Teacher] Ep 11 | train_loss=0.2073 | val_loss=0.1711 | val_acc=0.947 | 0.0s
[Teacher] Ep 12 | train_loss=0.1983 | val_loss=0.1581 | val_acc=0.953 | 0.0s
[Teacher] Ep 13 | train_loss=0.1873 | val_loss=0.1416 | val_acc=0.959 | 0.0s

In [62]:
# =========================
# Bloc B — Entraînement STUDENT par distillation (T, α)
# =========================
import copy
import torch.nn as nn

student = MLP().to(device)

# Hyper distillation (à ajuster)
T      = 8.0      # température (classique: 4–20)
ALPHA  = 0.7      # poids de la composante distillation (0.5–0.8)
LR_S   = 1e-3
EPOCHS_S, PATIENCE_S = 30, 5

opt_s   = torch.optim.AdamW(student.parameters(), lr=LR_S, weight_decay=1e-4)
kldiv   = nn.KLDivLoss(reduction="batchmean")   # KL(p_teacher || p_student)

# figer le teacher (propre, conseillé)
teacher.eval()
for p in teacher.parameters():
    p.requires_grad_(False)

def distill_loss(student_logits, teacher_logits, y_true, T=T, alpha=ALPHA):
    with torch.no_grad():
        p_teacher = torch.softmax(teacher_logits / T, dim=1)
    log_p_student = torch.log_softmax(student_logits / T, dim=1)
    loss_distill = (T*T) * kldiv(log_p_student, p_teacher)  # facteur T^2
    loss_hard    = crit_ce(student_logits, y_true)
    return alpha * loss_distill + (1.0 - alpha) * loss_hard

def eval_student(model, loader):
    model.eval(); loss_sum=0.0; n=0; correct=0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            loss = crit_ce(logits, yb)
            loss_sum += loss.item()*yb.size(0); n += yb.size(0)
            correct  += (logits.argmax(1) == yb).sum().item()
    return loss_sum/max(1,n), correct/max(1,n)

best_val_s, no_impr_s = float("inf"), 0
best_state_s = copy.deepcopy(student.state_dict())

for epoch in range(1, EPOCHS_S+1):
    student.train(); t0=time.time(); run_loss=0.0; n=0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        with torch.no_grad():
            t_logits = teacher(xb)  # sorties teacher (soft)

        opt_s.zero_grad(set_to_none=True)
        s_logits = student(xb)
        loss = distill_loss(s_logits, t_logits, yb, T, ALPHA)
        loss.backward(); opt_s.step()

        run_loss += loss.item()*yb.size(0); n += yb.size(0)

    tr_loss = run_loss/max(1,n)
    val_loss, val_acc = eval_student(student, test_loader)
    print(f"[Student] Ep {epoch:02d} | train_loss={tr_loss:.4f} | val_loss={val_loss:.4f} "
          f"| val_acc={val_acc:.3f} | {time.time()-t0:.1f}s")

    if val_loss < best_val_s - 1e-4:
        best_val_s = val_loss; no_impr_s = 0
        best_state_s = copy.deepcopy(student.state_dict())
    else:
        no_impr_s += 1
        if no_impr_s >= PATIENCE_S:
            print("Early stopping (student)."); break

student.load_state_dict(best_state_s)
student.eval()
print("✅ Student prêt.")


[Student] Ep 01 | train_loss=1.7481 | val_loss=0.6634 | val_acc=0.386 | 0.0s
[Student] Ep 02 | train_loss=1.3922 | val_loss=0.5366 | val_acc=0.906 | 0.0s
[Student] Ep 03 | train_loss=1.2031 | val_loss=0.4021 | val_acc=0.953 | 0.0s
[Student] Ep 04 | train_loss=1.0356 | val_loss=0.3144 | val_acc=0.953 | 0.0s
[Student] Ep 05 | train_loss=0.8745 | val_loss=0.2594 | val_acc=0.959 | 0.0s
[Student] Ep 06 | train_loss=0.7632 | val_loss=0.2196 | val_acc=0.959 | 0.0s
[Student] Ep 07 | train_loss=0.6857 | val_loss=0.1969 | val_acc=0.965 | 0.0s
[Student] Ep 08 | train_loss=0.5691 | val_loss=0.1716 | val_acc=0.965 | 0.0s
[Student] Ep 09 | train_loss=0.4981 | val_loss=0.1506 | val_acc=0.977 | 0.1s
[Student] Ep 10 | train_loss=0.4686 | val_loss=0.1418 | val_acc=0.977 | 0.1s
[Student] Ep 11 | train_loss=0.4725 | val_loss=0.1365 | val_acc=0.965 | 0.1s
[Student] Ep 12 | train_loss=0.3653 | val_loss=0.1236 | val_acc=0.971 | 0.1s
[Student] Ep 13 | train_loss=0.3276 | val_loss=0.1108 | val_acc=0.982 | 0.1s

# evaluation de model sur donnees propre et adv

In [63]:
# =========================
# Bloc C — Évaluation clean du STUDENT (positif = classe 1 = Malin)
# =========================
student.eval()
y_true, y_pred, y_prob = [], [], []
with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device)
        logits = student(xb)
        prob_pos = torch.softmax(logits, dim=1)[:, 1].cpu().numpy()  # proba classe 1 (Malin)
        y_prob.extend(prob_pos)
        y_pred.extend(logits.argmax(1).cpu().numpy())
        y_true.extend(yb.numpy())

print("DEF(Distillation-MLP) Test | "
      f"acc:{accuracy_score(y_true, y_pred):.4f} | "
      f"prec:{precision_score(y_true, y_pred, zero_division=0):.4f} | "
      f"rec:{recall_score(y_true, y_pred, zero_division=0):.4f} | "
      f"f1:{f1_score(y_true, y_pred, zero_division=0):.4f} | "
      f"auc:{roc_auc_score(y_true, y_prob):.4f}")


DEF(Distillation-MLP) Test | acc:0.9825 | prec:0.9841 | rec:0.9688 | f1:0.9764 | auc:0.9978


In [65]:
# =========================
# (Option) Bloc D — Évaluation adversariale (FGSM/PGD/BIM) avec ART
# =========================
!pip -q install adversarial-robustness-toolbox==1.17.1
from art.estimators.classification import PyTorchClassifier
from art.attacks.evasion import FastGradientMethod, ProjectedGradientDescent, BasicIterativeMethod
import numpy as np

# wrap ART sur le STUDENT (modèle final)
# clip par feature dans l'espace standardisé (cohérent avec tes entrées)
clip_min = X_train_tensor.min(dim=0).values.numpy()
clip_max = X_train_tensor.max(dim=0).values.numpy()

art_clf = PyTorchClassifier(
    model=student,
    loss=crit_ce,
    optimizer=torch.optim.Adam(student.parameters(), lr=1e-3),
    input_shape=(30,),
    nb_classes=2,
    clip_values=(clip_min, clip_max),
    device_type="cpu"
)

X_test_np = X_test_tensor.numpy()
y_test_np = y_test_tensor.numpy()

def eval_adv(X_adv, y_true, name):
    Xa = torch.tensor(X_adv, dtype=torch.float32)
    with torch.no_grad():
        logits = student(Xa)
        probs  = torch.softmax(logits, dim=1)[:,1].cpu().numpy()
        preds  = logits.argmax(1).cpu().numpy()
    print(f"\n[name={name}] "
          f"acc={accuracy_score(y_true, preds):.4f} "
          f"prec={precision_score(y_true, preds, zero_division=0):.4f} "
          f"rec={recall_score(y_true, preds, zero_division=0):.4f} "
          f"f1={f1_score(y_true, preds, zero_division=0):.4f} "
          f"auc={roc_auc_score(y_true, probs):.4f}")

# Grille simple (en standardisé)
EPS_LIST = [0.1, 0.2, 0.3]

for eps in EPS_LIST:
    X_fgsm = FastGradientMethod(art_clf, eps=eps).generate(x=X_test_np)
    eval_adv(X_fgsm, y_test_np, f"FGSM eps={eps}")

for eps in EPS_LIST:
    X_pgd = ProjectedGradientDescent(art_clf, eps=eps, eps_step=eps/10, max_iter=20,
                                     norm=np.inf, targeted=False, num_random_init=0).generate(x=X_test_np)
    eval_adv(X_pgd, y_test_np, f"PGD eps={eps} step={eps/10} it=20")

for eps in EPS_LIST:
    X_bim = BasicIterativeMethod(art_clf, eps=eps, eps_step=eps/10, max_iter=10).generate(x=X_test_np)
    eval_adv(X_bim, y_test_np, f"BIM eps={eps} step={eps/10} it=10")


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/1.7 MB[0m [31m5.0 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━[0m [32m1.4/1.7 MB[0m [31m20.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25h
[name=FGSM eps=0.1] acc=0.9532 prec=0.9828 rec=0.8906 f1=0.9344 auc=0.9950

[name=FGSM eps=0.2] acc=0.8830 prec=0.8438 rec=0.8438 f1=0.8438 auc=0.9619

[name=FGSM eps=0.3] acc=0.8012 prec=0.7344 rec=0.7344 f1=0.7344 auc=0.8769


PGD - Batches:   0%|          | 0/6 [00:00<?, ?it/s]


[name=PGD eps=0.1 step=0.01 it=20] acc=0.9532 prec=0.9828 rec=0.8906 f1=0.9344 auc=0.9947


PGD - Batches:   0%|          | 0/6 [00:00<?, ?it/s]


[name=PGD eps=0.2 step=0.02 it=20] acc=0.8713 prec=0.8182 rec=0.8438 f1=0.8308 auc=0.9546


PGD - Batches:   0%|          | 0/6 [00:00<?, ?it/s]


[name=PGD eps=0.3 step=0.03 it=20] acc=0.7778 prec=0.6912 rec=0.7344 f1=0.7121 auc=0.8490


PGD - Batches:   0%|          | 0/6 [00:00<?, ?it/s]


[name=BIM eps=0.1 step=0.01 it=10] acc=0.9532 prec=0.9828 rec=0.8906 f1=0.9344 auc=0.9949


PGD - Batches:   0%|          | 0/6 [00:00<?, ?it/s]


[name=BIM eps=0.2 step=0.02 it=10] acc=0.8772 prec=0.8308 rec=0.8438 f1=0.8372 auc=0.9588


PGD - Batches:   0%|          | 0/6 [00:00<?, ?it/s]


[name=BIM eps=0.3 step=0.03 it=10] acc=0.7836 prec=0.7015 rec=0.7344 f1=0.7176 auc=0.8640


In [66]:
# =========================
# (Option) Bloc D' — C&W-L2 (mêmes réglages que baseline/MLP & MAT-MLP)
# =========================
from art.attacks.evasion import CarliniL2Method



# --- C&W fast ---
cw_fast = CarliniL2Method(
    classifier=art_clf,
    targeted=False,
    confidence=0.0,
    learning_rate=0.02,
    max_iter=75,
    binary_search_steps=1,
    initial_const=0.3,
    batch_size=64,
)
X_cw_fast = cw_fast.generate(x=X_test_np)
eval_adv(X_cw_fast, y_test_np, "C&W-L2 FAST (conf=0, c0=0.3, it=75, bsearch=1, lr=0.02)")

# --- C&W strong ---
cw_strong = CarliniL2Method(
    classifier=art_clf,
    targeted=False,
    confidence=0.0,
    learning_rate=0.01,
    max_iter=500,
    binary_search_steps=7,
    initial_const=0.01,
    batch_size=64,
)
X_cw_strong = cw_strong.generate(x=X_test_np)
eval_adv(X_cw_strong, y_test_np, "C&W-L2 STRONG (conf=0, c0=0.01, it=500, bsearch=7, lr=0.01)")


C&W L_2:   0%|          | 0/3 [00:00<?, ?it/s]


[name=C&W-L2 FAST (conf=0, c0=0.3, it=75, bsearch=1, lr=0.02)] acc=0.9240 prec=0.9180 rec=0.8750 f1=0.8960 auc=0.9939


C&W L_2:   0%|          | 0/3 [00:00<?, ?it/s]


[name=C&W-L2 STRONG (conf=0, c0=0.01, it=500, bsearch=7, lr=0.01)] acc=0.8363 prec=0.7500 rec=0.8438 f1=0.7941 auc=0.9683
