# 3️ Notebook 03 – Préparation des données

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image

from sklearn.metrics import roc_auc_score, accuracy_score

# Chemins projet
PROJECT_ROOT = Path(r"C:\Users\othni\Projects\mvtec_ad")
DATA_CSV = PROJECT_ROOT / "experiments" / "image_level_df.csv"
MODELS_DIR = PROJECT_ROOT / "models"
MODELS_DIR.mkdir(exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [2]:
df = pd.read_csv(DATA_CSV)

print(df.head())
print("\nfinal_split counts :")
print(df["final_split"].value_counts())
print("\nlabel counts (global) :")
print(df["label"].value_counts())

                                                path category  split  label  \
0  C:\Users\othni\Projects\mvtec_ad\data\bottle\t...   bottle  train      0   
1  C:\Users\othni\Projects\mvtec_ad\data\bottle\t...   bottle  train      0   
2  C:\Users\othni\Projects\mvtec_ad\data\bottle\t...   bottle  train      0   
3  C:\Users\othni\Projects\mvtec_ad\data\bottle\t...   bottle  train      0   
4  C:\Users\othni\Projects\mvtec_ad\data\bottle\t...   bottle  train      0   

  defect_type final_split  
0        good       train  
1        good       train  
2        good       train  
3        good       train  
4        good       train  

final_split counts :
final_split
train    3629
test      869
val       856
Name: count, dtype: int64

label counts (global) :
label
0    4096
1    1258
Name: count, dtype: int64


In [3]:
class MVTecImageLevelDataset(Dataset):
    def __init__(self, df_split, transform=None):
        self.df = df_split.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = Path(row["path"])
        label = int(row["label"])

        img = Image.open(img_path).convert("RGB")
        if self.transform is not None:
            img = self.transform(img)

        return img, torch.tensor(label, dtype=torch.float32)

# Transforms & DataLoaders

In [14]:
from sklearn.model_selection import train_test_split

# -----------------------------
# 1) Transforms type ImageNet
# -----------------------------
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

eval_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# --------------------------------------
# 2) Re-split global pour le classifieur
#    (avec des 0 et des 1 dans chaque split)
# --------------------------------------
df_all = df.copy()

# 70% train, 15% val, 15% test, stratifié sur le label
train_df, temp_df = train_test_split(
    df_all,
    test_size=0.30,          # 30% pour val+test
    random_state=42,
    stratify=df_all["label"]
)

val_df, test_df = train_test_split(
    temp_df,
    test_size=0.50,          # moitié-moitié -> 15% / 15%
    random_state=42,
    stratify=temp_df["label"]
)

df_train = train_df.reset_index(drop=True)
df_val   = val_df.reset_index(drop=True)
df_test  = test_df.reset_index(drop=True)

print("Taille train :", len(df_train))
print("Taille val   :", len(df_val))
print("Taille test  :", len(df_test))

print("\nRépartition des labels :")
print("Train:\n", df_train["label"].value_counts())
print("\nVal:\n", df_val["label"].value_counts())
print("\nTest:\n", df_test["label"].value_counts())

# --------------------------------------
# 3) Datasets & DataLoaders
# --------------------------------------
train_ds = MVTecImageLevelDataset(df_train, transform=train_transform)
val_ds   = MVTecImageLevelDataset(df_val,   transform=eval_transform)
test_ds  = MVTecImageLevelDataset(df_test,  transform=eval_transform)

BATCH_SIZE = 32  # à ajuster si besoin

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

Taille train : 3747
Taille val   : 803
Taille test  : 804

Répartition des labels :
Train:
 label
0    2867
1     880
Name: count, dtype: int64

Val:
 label
0    614
1    189
Name: count, dtype: int64

Test:
 label
0    615
1    189
Name: count, dtype: int64


# 4️ Définir le modèle ResNet binaire

## Modèle + loss pondérée

In [15]:
import torch
from torch import nn, optim
from torchvision import models
import numpy as np
from sklearn.metrics import roc_auc_score

# Modèle ResNet-18 pré-entraîné ImageNet
base_model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# Remplacer la dernière couche (1000 classes) par 1 logit pour classification binaire
in_features = base_model.fc.in_features
base_model.fc = nn.Linear(in_features, 1)

model = base_model.to(device)
model


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [16]:
# Calcul du poids de la classe positive (défauts) pour gérer le déséquilibre
n_pos = (df_train["label"] == 1).sum()
n_neg = (df_train["label"] == 0).sum()
pos_weight_value = n_neg / n_pos
pos_weight = torch.tensor([pos_weight_value], dtype=torch.float32, device=device)

print("Nombre de normaux (0) :", n_neg)
print("Nombre de défauts (1) :", n_pos)
print("pos_weight =", pos_weight_value)

# Loss binaire avec logit + poids sur la classe positive
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

# Optimiseur
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)

# Nombre d'époques (tu pourras augmenter ensuite)
NUM_EPOCHS = 10


Nombre de normaux (0) : 2867
Nombre de défauts (1) : 880
pos_weight = 3.2579545454545453


In [17]:
def run_epoch(model, loader, optimizer=None):
    """
    Si optimizer est None -> mode évaluation (pas de backprop).
    Retourne : loss_moyenne, y_true (numpy), y_scores (numpy, logits).
    """
    is_train = optimizer is not None
    model.train(is_train)

    all_losses = []
    all_labels = []
    all_scores = []

    for imgs, labels in loader:
        imgs = imgs.to(device)
        labels = labels.to(device)  # (B,)

        logits = model(imgs).squeeze(1)  # (B,)
        loss = criterion(logits, labels)

        if is_train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        all_losses.append(loss.item())
        all_labels.append(labels.detach().cpu().numpy())
        all_scores.append(logits.detach().cpu().numpy())

    all_labels = np.concatenate(all_labels)
    all_scores = np.concatenate(all_scores)

    avg_loss = float(np.mean(all_losses))
    return avg_loss, all_labels, all_scores


In [18]:
best_val_auc = 0.0
best_model_path = MODELS_DIR / "resnet18_image_level_best.pt"

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\nEpoch {epoch}/{NUM_EPOCHS}")
    
    # --- Train ---
    train_loss, train_y, train_scores = run_epoch(model, train_loader, optimizer=optimizer)
    train_probs = torch.sigmoid(torch.tensor(train_scores)).numpy()
    train_auc = roc_auc_score(train_y, train_probs)

    # --- Validation ---
    val_loss, val_y, val_scores = run_epoch(model, val_loader, optimizer=None)
    val_probs = torch.sigmoid(torch.tensor(val_scores)).numpy()
    val_auc = roc_auc_score(val_y, val_probs)

    print(f"  Train loss: {train_loss:.4f} | AUC: {train_auc:.4f}")
    print(f"  Val   loss: {val_loss:.4f} | AUC: {val_auc:.4f}")

    # Sauvegarde du meilleur modèle (selon AUC val)
    if val_auc > best_val_auc:
        best_val_auc = val_auc
        torch.save(model.state_dict(), best_model_path)
        print(f"  -> Nouveau meilleur modèle sauvegardé: {best_model_path}")



Epoch 1/10
  Train loss: 0.8105 | AUC: 0.8092
  Val   loss: 0.7550 | AUC: 0.8664
  -> Nouveau meilleur modèle sauvegardé: C:\Users\othni\Projects\mvtec_ad\models\resnet18_image_level_best.pt

Epoch 2/10
  Train loss: 0.5823 | AUC: 0.9089
  Val   loss: 0.5143 | AUC: 0.9310
  -> Nouveau meilleur modèle sauvegardé: C:\Users\othni\Projects\mvtec_ad\models\resnet18_image_level_best.pt

Epoch 3/10
  Train loss: 0.5039 | AUC: 0.9348
  Val   loss: 0.4566 | AUC: 0.9425
  -> Nouveau meilleur modèle sauvegardé: C:\Users\othni\Projects\mvtec_ad\models\resnet18_image_level_best.pt

Epoch 4/10
  Train loss: 0.3759 | AUC: 0.9631
  Val   loss: 0.4789 | AUC: 0.9489
  -> Nouveau meilleur modèle sauvegardé: C:\Users\othni\Projects\mvtec_ad\models\resnet18_image_level_best.pt

Epoch 5/10
  Train loss: 0.3777 | AUC: 0.9625
  Val   loss: 0.3942 | AUC: 0.9617
  -> Nouveau meilleur modèle sauvegardé: C:\Users\othni\Projects\mvtec_ad\models\resnet18_image_level_best.pt

Epoch 6/10
  Train loss: 0.3115 | AUC: 

In [19]:
# Recréer un modèle ResNet-18 avec la même architecture
best_model = models.resnet18(weights=None)
in_features = best_model.fc.in_features
best_model.fc = nn.Linear(in_features, 1)

# Charger les poids sauvegardés
best_model.load_state_dict(torch.load(best_model_path, map_location=device))
best_model = best_model.to(device)

# Évaluation sur le test
test_loss, test_y, test_scores = run_epoch(best_model, test_loader, optimizer=None)
test_probs = torch.sigmoid(torch.tensor(test_scores)).numpy()
test_auc = roc_auc_score(test_y, test_probs)

print(f"Test loss: {test_loss:.4f} | Test AUC: {test_auc:.4f}")
print("Nombre d'images test :", len(test_y))

Test loss: 0.3293 | Test AUC: 0.9793
Nombre d'images test : 804


In [20]:
from pathlib import Path

splits_dir = PROJECT_ROOT / "experiments"
splits_dir.mkdir(exist_ok=True)

train_csv_path = splits_dir / "image_level_train_split.csv"
val_csv_path   = splits_dir / "image_level_val_split.csv"
test_csv_path  = splits_dir / "image_level_test_split.csv"

df_train.to_csv(train_csv_path, index=False)
df_val.to_csv(val_csv_path, index=False)
df_test.to_csv(test_csv_path, index=False)

train_csv_path, val_csv_path, test_csv_path

(WindowsPath('C:/Users/othni/Projects/mvtec_ad/experiments/image_level_train_split.csv'),
 WindowsPath('C:/Users/othni/Projects/mvtec_ad/experiments/image_level_val_split.csv'),
 WindowsPath('C:/Users/othni/Projects/mvtec_ad/experiments/image_level_test_split.csv'))