# Seeds

In [90]:
# Cell 0 — Seeds (minimal)
import os, random, numpy as np, torch
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)


<torch._C.Generator at 0x7e608260fe70>

# chargement et pretraitement des donnees

Imports & schéma de colonnes

In [91]:
# =========================
# Cell 1 — Imports & colonnes
# =========================
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import TensorDataset, DataLoader

# Schéma des colonnes du WDBC (Breast Cancer Wisconsin)
columns = [
    "id", "diagnosis",
    "radius_mean", "texture_mean", "perimeter_mean", "area_mean", "smoothness_mean",
    "compactness_mean", "concavity_mean", "concave_points_mean", "symmetry_mean", "fractal_dimension_mean",
    "radius_se", "texture_se", "perimeter_se", "area_se", "smoothness_se",
    "compactness_se", "concavity_se", "concave_points_se", "symmetry_se", "fractal_dimension_se",
    "radius_worst", "texture_worst", "perimeter_worst", "area_worst", "smoothness_worst",
    "compactness_worst", "concavity_worst", "concave_points_worst", "symmetry_worst", "fractal_dimension_worst"
]


Chargement + préparation X/y

In [92]:
# =========================
# Cell 2 — Chargement CSV & X/y
# =========================
df = pd.read_csv("wdbc.data", header=None, names=columns)

# Séparer X (features) et y (target binaire)
X = df.drop(['id', 'diagnosis'], axis=1)
y = df['diagnosis'].map({'B': 0, 'M': 1})  # 0 = Bénin, 1 = Malin

print("Dimensions complètes :", X.shape)
print("Répartition classes :", y.value_counts().to_dict())


Dimensions complètes : (569, 30)
Répartition classes : {0: 357, 1: 212}


Split STRATIFIÉ + Scaling fit-on-train

In [93]:
# =========================
# Cell 3 — Split stratifié + scaling fit-on-train
# =========================
#  split AVANT le fit du scaler pour éviter la fuite d'information
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

# Standardisation (fit sur TRAIN uniquement, puis transform sur TRAIN & TEST)
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test  = scaler.transform(X_test)

print(f"Taille du train set : {X_train.shape[0]} échantillons")
print(f"Taille du test set  : {X_test.shape[0]} échantillons")


Taille du train set : 398 échantillons
Taille du test set  : 171 échantillons


TensorDataset & DataLoaders

In [94]:
# =========================
# Cell 4 — TensorDataset & DataLoaders
# =========================
# Conversion en tenseurs PyTorch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)

X_test_tensor  = torch.tensor(X_test,  dtype=torch.float32)
y_test_tensor  = torch.tensor(y_test.values, dtype=torch.long)

# Datasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset  = TensorDataset(X_test_tensor,  y_test_tensor)

# DataLoaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False)

print("Batch size :", batch_size)


Batch size : 64


Device (CPU)

In [95]:
# =========================
# Cell 5 — Device (CPU fixé)
# =========================
import torch
device = torch.device("cpu")
print("Device utilisé :", device)


Device utilisé : cpu


# definition de model MLP

In [96]:
import torch.nn as nn
import torch.nn.functional as F

class MLP(nn.Module):
    def __init__(self, input_size=30, hidden_sizes=[128, 64, 32], dropout_rate=0.5):
        super(MLP, self).__init__()

        self.fc1 = nn.Linear(input_size, hidden_sizes[0])
        self.bn1 = nn.BatchNorm1d(hidden_sizes[0])

        self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
        self.bn2 = nn.BatchNorm1d(hidden_sizes[1])

        self.fc3 = nn.Linear(hidden_sizes[1], hidden_sizes[2])
        self.bn3 = nn.BatchNorm1d(hidden_sizes[2])

        self.fc4 = nn.Linear(hidden_sizes[2], 2)

        self.dropout = nn.Dropout(p=dropout_rate)

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)

        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)

        x = F.relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)

        return self.fc4(x)



# entrainement de model et evaluation de model sur donnees propres

In [97]:
!pip install adversarial-robustness-toolbox



In [98]:
# =========================================
# Random Noise Injection (RNI) — MLP tabulaire (WDBC), CPU-only
# =========================================
import time, copy, torch, numpy as np
import torch.nn as nn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# ---- Modèle (nouvelle instance dédiée RNI)
model_rni = MLP().to(device)

criterion = nn.CrossEntropyLoss()
optimizer_rni = torch.optim.AdamW(model_rni.parameters(), lr=1e-3, weight_decay=1e-4)
EPOCHS, PATIENCE = 30, 5

best_val, no_impr = float("inf"), 0
best_state = copy.deepcopy(model_rni.state_dict())

# ---- Bornes par feature (dans l'espace standardisé)
with torch.no_grad():
    X_MIN = X_train_tensor.min(dim=0).values.to(device)  # (30,)
    X_MAX = X_train_tensor.max(dim=0).values.to(device)

def clamp_per_feature(x):
    return torch.max(torch.min(x, X_MAX), X_MIN)

# ---- Hyperparamètres du bruit (standardisé)
SIGMA_STD     = 0.15   # écart-type du bruit gaussien (tabulaire standardisé)
LAMBDA_NOISE  = 0.30   # pondération de la perte bruitée

def add_gaussian_noise_std(x, sigma=SIGMA_STD):
    z = torch.randn_like(x) * sigma
    return clamp_per_feature(x + z)

# ---- Eval légère (loss/acc) pour suivi d'entraînement
@torch.no_grad()
def eval_loss_acc(model, loader):
    model.eval(); loss_sum, n, correct = 0.0, 0, 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        loss = criterion(logits, yb)
        loss_sum += loss.item() * yb.size(0); n += yb.size(0)
        correct += (logits.argmax(1) == yb).sum().item()
    return loss_sum/max(1,n), correct/max(1,n)

# ---- Choix du loader de validation (si pas de val_loader, on utilise test_loader)
try:
    val_loader
except NameError:
    val_loader = test_loader

# ---- Entraînement (mix clean + noisy)
for epoch in range(1, EPOCHS+1):
    model_rni.train(); t0 = time.time()
    run_loss, n = 0.0, 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        xb_noisy = add_gaussian_noise_std(xb, SIGMA_STD)

        optimizer_rni.zero_grad(set_to_none=True)
        logits_clean = model_rni(xb)
        logits_noisy = model_rni(xb_noisy)

        loss_clean = criterion(logits_clean, yb)
        loss_noisy = criterion(logits_noisy, yb)
        loss = (1.0 - LAMBDA_NOISE) * loss_clean + LAMBDA_NOISE * loss_noisy

        loss.backward(); optimizer_rni.step()
        run_loss += loss.item() * yb.size(0); n += yb.size(0)

    train_loss = run_loss / max(1,n)
    val_loss, val_acc = eval_loss_acc(model_rni, val_loader)
    print(f"[RNI-MLP] Epoch {epoch:02d} | train_loss={train_loss:.4f} | "
          f"val_loss={val_loss:.4f} | val_acc={val_acc:.4f} | {time.time()-t0:.1f}s")

    if val_loss < best_val - 1e-4:
        best_val = val_loss; no_impr = 0
        best_state = copy.deepcopy(model_rni.state_dict())
    else:
        no_impr += 1
        if no_impr >= PATIENCE:
            print("Early stopping."); break

# ---- Évaluation CLEAN (métriques complètes)
model_rni.load_state_dict(best_state); model_rni.eval()
y_true, y_pred, y_prob = [], [], []
with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device)
        logits = model_rni(xb)
        prob1 = torch.softmax(logits, dim=1)[:, 1].cpu().numpy()
        y_prob.extend(prob1)
        y_pred.extend(logits.argmax(1).cpu().numpy())
        y_true.extend(yb.numpy())

print("DEF(RNI-MLP) Test | acc:{:.4f} | prec:{:.4f} | rec:{:.4f} | f1:{:.4f} | auc:{:.4f}".format(
    accuracy_score(y_true, y_pred),
    precision_score(y_true, y_pred, pos_label=1, zero_division=0),
    recall_score(y_true, y_pred,    pos_label=1, zero_division=0),
    f1_score(y_true, y_pred,        pos_label=1, zero_division=0),
    roc_auc_score(y_true, y_prob),
))


[RNI-MLP] Epoch 01 | train_loss=0.6869 | val_loss=0.5751 | val_acc=0.9240 | 0.0s
[RNI-MLP] Epoch 02 | train_loss=0.5513 | val_loss=0.4322 | val_acc=0.9532 | 0.0s
[RNI-MLP] Epoch 03 | train_loss=0.4578 | val_loss=0.3571 | val_acc=0.9415 | 0.0s
[RNI-MLP] Epoch 04 | train_loss=0.3998 | val_loss=0.3175 | val_acc=0.9474 | 0.0s
[RNI-MLP] Epoch 05 | train_loss=0.3691 | val_loss=0.2722 | val_acc=0.9474 | 0.1s
[RNI-MLP] Epoch 06 | train_loss=0.3281 | val_loss=0.2549 | val_acc=0.9532 | 0.1s
[RNI-MLP] Epoch 07 | train_loss=0.2893 | val_loss=0.2421 | val_acc=0.9474 | 0.1s
[RNI-MLP] Epoch 08 | train_loss=0.2759 | val_loss=0.2197 | val_acc=0.9415 | 0.0s
[RNI-MLP] Epoch 09 | train_loss=0.2454 | val_loss=0.1909 | val_acc=0.9591 | 0.0s
[RNI-MLP] Epoch 10 | train_loss=0.2245 | val_loss=0.1766 | val_acc=0.9532 | 0.0s
[RNI-MLP] Epoch 11 | train_loss=0.2091 | val_loss=0.1473 | val_acc=0.9591 | 0.0s
[RNI-MLP] Epoch 12 | train_loss=0.1947 | val_loss=0.1395 | val_acc=0.9591 | 0.0s
[RNI-MLP] Epoch 13 | train_l

# evaluation de model sur donnees adv

In [99]:
# =========================================
# Attaques ART (FGSM/PGD/BIM/C&W) — MLP tabulaire (standardisé), CPU-only
# =========================================
!pip -q install adversarial-robustness-toolbox==1.17.1
import numpy as np, torch
from art.estimators.classification import PyTorchClassifier
from art.attacks.evasion import FastGradientMethod, ProjectedGradientDescent, BasicIterativeMethod, CarliniL2Method

# Données numpy dans l'espace standardisé (déjà scalé)
X_test_np = X_test_tensor.numpy().astype(np.float32)
y_test_np = y_test_tensor.numpy().astype(np.int64)

# Clip par feature cohérent avec ton train (per-feature bounds)
clip_min = X_train_tensor.min(dim=0).values.cpu().numpy()
clip_max = X_train_tensor.max(dim=0).values.cpu().numpy()

# Classifier ART (CPU-only)
_dummy_opt = torch.optim.SGD(model_rni.parameters(), lr=0.0)
art_clf = PyTorchClassifier(
    model=model_rni,
    loss=nn.CrossEntropyLoss(),
    optimizer=_dummy_opt,
    input_shape=(30,),
    nb_classes=2,
    clip_values=(clip_min, clip_max),
    preprocessing=None,
    device_type="cpu",
)

# Éval utilitaire (tabulaire standardisé)
def eval_adv_np(X_adv, y_true, name):
    with torch.no_grad():
        xt = torch.from_numpy(X_adv).float().to(device)
        logits = model_rni(xt)
        probs1 = torch.softmax(logits, dim=1)[:,1].cpu().numpy()
        yhat   = logits.argmax(1).cpu().numpy()
    print(f"[name={name}] "
          f"acc={accuracy_score(y_true, yhat):.4f} "
          f"prec={precision_score(y_true, yhat, zero_division=0):.4f} "
          f"rec={recall_score(y_true, yhat, zero_division=0):.4f} "
          f"f1={f1_score(y_true, yhat, zero_division=0):.4f} "
          f"auc={roc_auc_score(y_true, probs1):.4f}")

# Grille epsilon (même style que CNN)
EPS_LIST = [0.1, 0.2, 0.3]
BATCH_EVAL = 64

# FGSM
for eps in EPS_LIST:
    atk = FastGradientMethod(art_clf, eps=eps, batch_size=BATCH_EVAL)
    X_fgsm = atk.generate(x=X_test_np)
    eval_adv_np(X_fgsm, y_test_np, f"FGSM eps={eps}")

# PGD (L_inf)
for eps in EPS_LIST:
    atk = ProjectedGradientDescent(
        art_clf, eps=eps, eps_step=eps/10, max_iter=20, norm=np.inf,
        targeted=False, num_random_init=0, batch_size=BATCH_EVAL
    )
    X_pgd = atk.generate(x=X_test_np)
    eval_adv_np(X_pgd, y_test_np, f"PGD eps={eps} step={eps/10:.02f} it=20")

# BIM
for eps in EPS_LIST:
    atk = BasicIterativeMethod(
        art_clf, eps=eps, eps_step=eps/10, max_iter=10, batch_size=BATCH_EVAL
    )
    X_bim = atk.generate(x=X_test_np)
    eval_adv_np(X_bim, y_test_np, f"BIM eps={eps} step={eps/10:.02f} it=10")

# C&W-L2 (FAST et STRONG)
cw_fast = CarliniL2Method(
    classifier=art_clf, targeted=False, confidence=0.0,
    learning_rate=0.02, max_iter=75, binary_search_steps=1, initial_const=0.3, batch_size=BATCH_EVAL
)
X_cw_fast = cw_fast.generate(x=X_test_np)
eval_adv_np(X_cw_fast, y_test_np, "C&W-L2 FAST (c0=0.3,it=75,bs=1,lr=0.02)")

cw_strong = CarliniL2Method(
    classifier=art_clf, targeted=False, confidence=0.0,
    learning_rate=0.01, max_iter=500, binary_search_steps=7, initial_const=0.01, batch_size=BATCH_EVAL
)
X_cw_strong = cw_strong.generate(x=X_test_np)
eval_adv_np(X_cw_strong, y_test_np, "C&W-L2 STRONG (c0=0.01,it=500,bs=7,lr=0.01)")


[name=FGSM eps=0.1] acc=0.9240 prec=0.9180 rec=0.8750 f1=0.8960 auc=0.9895
[name=FGSM eps=0.2] acc=0.8304 prec=0.7778 rec=0.7656 f1=0.7717 auc=0.9216
[name=FGSM eps=0.3] acc=0.7310 prec=0.6324 rec=0.6719 f1=0.6515 auc=0.7824


PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

[name=PGD eps=0.1 step=0.01 it=20] acc=0.9240 prec=0.9180 rec=0.8750 f1=0.8960 auc=0.9893


PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

[name=PGD eps=0.2 step=0.02 it=20] acc=0.8246 prec=0.7656 rec=0.7656 f1=0.7656 auc=0.9083


PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

[name=PGD eps=0.3 step=0.03 it=20] acc=0.6842 prec=0.5694 rec=0.6406 f1=0.6029 auc=0.7503


PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

[name=BIM eps=0.1 step=0.01 it=10] acc=0.9240 prec=0.9180 rec=0.8750 f1=0.8960 auc=0.9893


PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

[name=BIM eps=0.2 step=0.02 it=10] acc=0.8246 prec=0.7656 rec=0.7656 f1=0.7656 auc=0.9144


PGD - Batches:   0%|          | 0/3 [00:00<?, ?it/s]

[name=BIM eps=0.3 step=0.03 it=10] acc=0.6959 prec=0.5833 rec=0.6562 f1=0.6176 auc=0.7647


C&W L_2:   0%|          | 0/3 [00:00<?, ?it/s]

[name=C&W-L2 FAST (c0=0.3,it=75,bs=1,lr=0.02)] acc=0.8713 prec=0.8088 rec=0.8594 f1=0.8333 auc=0.9817


C&W L_2:   0%|          | 0/3 [00:00<?, ?it/s]

[name=C&W-L2 STRONG (c0=0.01,it=500,bs=7,lr=0.01)] acc=0.7135 prec=0.6000 rec=0.7031 f1=0.6475 auc=0.9140
