In [6]:

# 2. Dézipper dans l'espace local de la machine virtuelle (C'est le secret de la vitesse)
# Remplacez 'NomDuFichierTelecharge.zip' par le vrai nom s'il diffère après le téléchargement
!unzip -q "/content/drive/MyDrive/PHOTOCL.zip" -d "/content/PHOTOCL"

print("Dataset prêt dans /content/PHOTOCL !")

Dataset prêt dans /content/PHOTOCL !


In [7]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, Subset
from torchvision import transforms, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# Configuration du Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Chemin vers votre dataset décompressé
DATA_DIR = "/content/PHOTOCL/dataset_original"  # <--- Modifiez ceci avec votre chemin réel

Using device: cuda


In [8]:
class PhotoCLBinaryDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        # Définition des classes binaires
        # Photo = 1, Autres = 0
        self.class_mapping = {
            'Photo': 1,
            'Painting': 0,
            'Schematics': 0,
            'Sketch': 0,
            'Text': 0
        }

        # Parcours des dossiers
        for class_name, label in self.class_mapping.items():
            class_path = os.path.join(root_dir, class_name)
            if not os.path.exists(class_path):
                continue

            files = [os.path.join(class_path, f) for f in os.listdir(class_path)
                     if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

            self.image_paths.extend(files)
            self.labels.extend([label] * len(files))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]

        # Chargement image (RGB)
        try:
            image = Image.open(img_path).convert("RGB")
        except:
            # Gestion basique d'erreur : retourne une image noire si fichier corrompu
            image = Image.new('RGB', (224, 224))

        if self.transform:
            image = self.transform(image)

        return image, label

# 1. Définir les transformations spécifiques au ViT
# ViT attend une normalisation ImageNet spécifique
vit_transforms = transforms.Compose([
    transforms.Resize((224, 224)),  # ViT patch size requires fixed input
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# 2. Charger le dataset complet
full_dataset = PhotoCLBinaryDataset(DATA_DIR, transform=vit_transforms)

# 3. Stratified Split (80% Train, 20% Val)
# On utilise les labels pour s'assurer que la répartition 0/1 est la même dans train et val
train_idx, val_idx = train_test_split(
    list(range(len(full_dataset))),
    test_size=0.2,
    stratify=full_dataset.labels,
    random_state=42
)

train_dataset = Subset(full_dataset, train_idx)
val_dataset = Subset(full_dataset, val_idx)

# 4. DataLoaders
BATCH_SIZE = 32 # Ajustez selon votre VRAM (16 ou 32 pour ViT)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

print(f"Train size: {len(train_dataset)}, Val size: {len(val_dataset)}")

Train size: 33119, Val size: 8280


In [None]:
def get_vit_model(num_classes=2):
    print("Loading Pretrained ViT-B/16...")
    # Chargement des poids par défaut (ImageNet)
    weights = models.ViT_B_16_Weights.DEFAULT
    model = models.vit_b_16(weights=weights)

    # Freeze des couches de base (Feature Extractor)
    # Cela permet d'entraîner plus vite et évite l'overfitting sur un petit dataset
    for param in model.parameters():
        param.requires_grad = False

    # Remplacement de la tête de classification (The "heads" block in torchvision ViT)
    # Input dim of ViT-B/16 head is 768
    model.heads = nn.Sequential(
        nn.Linear(768, 512),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(512, num_classes) # Sortie : 2 classes (Photo vs Autre)
    )

    return model

model = get_vit_model().to(device)

Loading Pretrained ViT-B/16...
Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth


100%|██████████| 330M/330M [00:02<00:00, 165MB/s]


In [None]:
from tqdm import tqdm
criterion = nn.CrossEntropyLoss()
# On optimise seulement les paramètres de la nouvelle tête (heads)
optimizer = optim.AdamW(model.heads.parameters(), lr=1e-3, weight_decay=1e-4)

num_epochs = 5  # ViT converge vite en transfer learning
history = {'train_loss': [], 'val_acc': []}
print("Starting training...")

for epoch in range(num_epochs):
    # --- Training Phase ---
    model.train()
    running_loss = 0.0

    # Wrap train_loader with tqdm
    train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]", unit="batch")

    for inputs, labels in train_bar:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Update progress bar with current batch loss
        train_bar.set_postfix(loss=loss.item())

    avg_loss = running_loss / len(train_loader)
    history['train_loss'].append(avg_loss)

    # --- Validation Phase ---
    model.eval()
    correct = 0
    total = 0

    # Wrap val_loader with tqdm
    val_bar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]", unit="batch", leave=False)

    with torch.no_grad():
        for inputs, labels in val_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Optional: show rolling accuracy in val bar
            val_bar.set_postfix(acc=f"{(100 * correct / total):.2f}%")

    val_acc = 100 * correct / total
    history['val_acc'].append(val_acc)

    print(f"Summary Epoch {epoch+1}: Loss: {avg_loss:.4f}, Val Accuracy: {val_acc:.2f}%")

print("Training finished.")

Starting training...


Epoch 1/5 [Train]: 100%|██████████| 1035/1035 [08:24<00:00,  2.05batch/s, loss=0.00311]


Summary Epoch 1: Loss: 0.0450, Val Accuracy: 98.91%


Epoch 2/5 [Train]: 100%|██████████| 1035/1035 [08:23<00:00,  2.06batch/s, loss=0.000802]


Summary Epoch 2: Loss: 0.0179, Val Accuracy: 99.01%


Epoch 3/5 [Train]: 100%|██████████| 1035/1035 [08:18<00:00,  2.07batch/s, loss=0.0247]


Summary Epoch 3: Loss: 0.0087, Val Accuracy: 99.03%


Epoch 4/5 [Train]: 100%|██████████| 1035/1035 [08:20<00:00,  2.07batch/s, loss=5.77e-7]


Summary Epoch 4: Loss: 0.0065, Val Accuracy: 99.07%


Epoch 5/5 [Train]: 100%|██████████| 1035/1035 [08:20<00:00,  2.07batch/s, loss=9.19e-7]
                                                                                 

Summary Epoch 5: Loss: 0.0068, Val Accuracy: 99.17%
Training finished.




In [9]:
def evaluate_model(model, loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    # Calcul des métriques
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted') # Weighted pour gérer le déséquilibre si présent

    print(f"Final Accuracy: {acc:.4f}")
    print(f"Final F1-Score: {f1:.4f}")
    print("\nClassification Report:\n")
    # Target names: 0 = Autres, 1 = Photo
    print(classification_report(y_true, y_pred, target_names=['Others', 'Photo']))

    return y_true, y_pred

# Lancer l'évaluation finale
true_labels, predictions = evaluate_model(model, val_loader)

# Sauvegarder le modèle pour la Phase 2
torch.save(model.state_dict(), "vit_photocl_classifier.pth")
print("Modèle sauvegardé : vit_photocl_classifier.pth")

NameError: name 'model' is not defined

In [10]:
def get_resnet_model(num_classes=2):
    print("Loading Pretrained ResNet-50...")
    # Chargement des poids par défaut
    weights = models.ResNet50_Weights.DEFAULT
    model = models.resnet50(weights=weights)

    # Freeze des couches de base
    for param in model.parameters():
        param.requires_grad = False

    # Remplacement de la couche finale (FC)
    # ResNet50 a 2048 features en entrée de la couche fully connected
    in_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(in_features, 512),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(512, num_classes)
    )
    return model

def get_efficientnet_model(num_classes=2):
    print("Loading Pretrained EfficientNet-B0...")
    # Chargement des poids par défaut
    weights = models.EfficientNet_B0_Weights.DEFAULT
    model = models.efficientnet_b0(weights=weights)

    # Freeze des couches de base
    for param in model.parameters():
        param.requires_grad = False

    # Remplacement du classifier
    # EfficientNet a une structure 'classifier' spécifique.
    # La couche Dropout est souvent déjà présente (index 0), on change la Linear (index 1)
    in_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(in_features, num_classes)

    return model

In [11]:
# === CELLULE RESNET ===
import torch.optim as optim
from tqdm import tqdm
import time

print(f"{'='*20} Démarrage ResNet50 {'='*20}")

# 1. Définition du modèle ResNet50
print("Chargement de ResNet50 pré-entraîné...")
weights = models.ResNet50_Weights.DEFAULT
model_resnet = models.resnet50(weights=weights)

# Freeze des couches (on ne touche pas aux features extraites)
for param in model_resnet.parameters():
    param.requires_grad = False

# Remplacement de la tête de classification (Fully Connected)
# ResNet50 sort 2048 features avant la couche finale
in_features = model_resnet.fc.in_features
model_resnet.fc = nn.Sequential(
    nn.Linear(in_features, 512),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(512, 2)  # 2 classes : Photo vs Autre
)

model_resnet = model_resnet.to(device)

# 2. Configuration Entraînement
criterion = nn.CrossEntropyLoss()
# On optimise uniquement la nouvelle tête (.fc)
optimizer = optim.AdamW(model_resnet.fc.parameters(), lr=1e-3, weight_decay=1e-4)
num_epochs = 5

train_loss_history_resnet = []
val_acc_history_resnet = []

# 3. Boucle d'entraînement
for epoch in range(num_epochs):
    start_time = time.time()

    # --- Train ---
    model_resnet.train()
    running_loss = 0.0
    train_bar = tqdm(train_loader, desc=f"[ResNet] Epoch {epoch+1}/{num_epochs} [Train]", leave=False, colour='green')

    for inputs, labels in train_bar:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model_resnet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        train_bar.set_postfix(loss=f"{loss.item():.4f}")

    epoch_loss = running_loss / len(train_loader)
    train_loss_history_resnet.append(epoch_loss)

    # --- Validation ---
    model_resnet.eval()
    correct = 0
    total = 0
    val_bar = tqdm(val_loader, desc=f"[ResNet] Epoch {epoch+1}/{num_epochs} [Val  ]", leave=False, colour='blue')

    with torch.no_grad():
        for inputs, labels in val_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model_resnet(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            val_bar.set_postfix(acc=f"{(100 * correct / total):.2f}%")

    epoch_acc = 100 * correct / total
    val_acc_history_resnet.append(epoch_acc)

    print(f"Epoch {epoch+1} | Loss: {epoch_loss:.4f} | Val Acc: {epoch_acc:.2f}% | Time: {time.time() - start_time:.1f}s")

# 4. Sauvegarde et Évaluation Rapide
torch.save(model_resnet.state_dict(), "resnet_photocl.pth")
print("\nModèle sauvegardé : resnet_photocl.pth")

print("Évaluation finale ResNet...")
evaluate_model(model_resnet, val_loader)

# Libérer la mémoire
del model_resnet
torch.cuda.empty_cache()

Chargement de ResNet50 pré-entraîné...




Epoch 1 | Loss: 0.0708 | Val Acc: 98.70% | Time: 484.5s




Epoch 2 | Loss: 0.0452 | Val Acc: 98.57% | Time: 478.7s




Epoch 3 | Loss: 0.0349 | Val Acc: 98.88% | Time: 477.2s




Epoch 4 | Loss: 0.0288 | Val Acc: 98.79% | Time: 475.2s




Epoch 5 | Loss: 0.0258 | Val Acc: 99.00% | Time: 476.9s

Modèle sauvegardé : resnet_photocl.pth
Évaluation finale ResNet...
Final Accuracy: 0.9900
Final F1-Score: 0.9900

Classification Report:

              precision    recall  f1-score   support

      Others       0.99      0.99      0.99      6281
       Photo       0.98      0.98      0.98      1999

    accuracy                           0.99      8280
   macro avg       0.99      0.99      0.99      8280
weighted avg       0.99      0.99      0.99      8280



In [12]:
# === CELLULE EFFICIENTNET ===
import torch.optim as optim
from tqdm import tqdm
import time

print(f"{'='*20} Démarrage EfficientNet-B0 {'='*20}")

# 1. Définition du modèle EfficientNet
print("Chargement de EfficientNet-B0 pré-entraîné...")
weights = models.EfficientNet_B0_Weights.DEFAULT
model_eff = models.efficientnet_b0(weights=weights)

# Freeze des couches
for param in model_eff.parameters():
    param.requires_grad = False

# Remplacement de la tête de classification
# EfficientNet a un bloc 'classifier'. Index 1 est la couche linéaire.
in_features = model_eff.classifier[1].in_features
model_eff.classifier[1] = nn.Linear(in_features, 2) # 2 classes

model_eff = model_eff.to(device)

# 2. Configuration Entraînement
criterion = nn.CrossEntropyLoss()
# On optimise uniquement le classifier
optimizer = optim.AdamW(model_eff.classifier.parameters(), lr=1e-3, weight_decay=1e-4)
num_epochs = 5

train_loss_history_eff = []
val_acc_history_eff = []

# 3. Boucle d'entraînement
for epoch in range(num_epochs):
    start_time = time.time()

    # --- Train ---
    model_eff.train()
    running_loss = 0.0
    train_bar = tqdm(train_loader, desc=f"[EfficientNet] Epoch {epoch+1}/{num_epochs} [Train]", leave=False, colour='magenta')

    for inputs, labels in train_bar:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model_eff(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        train_bar.set_postfix(loss=f"{loss.item():.4f}")

    epoch_loss = running_loss / len(train_loader)
    train_loss_history_eff.append(epoch_loss)

    # --- Validation ---
    model_eff.eval()
    correct = 0
    total = 0
    val_bar = tqdm(val_loader, desc=f"[EfficientNet] Epoch {epoch+1}/{num_epochs} [Val  ]", leave=False, colour='cyan')

    with torch.no_grad():
        for inputs, labels in val_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model_eff(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            val_bar.set_postfix(acc=f"{(100 * correct / total):.2f}%")

    epoch_acc = 100 * correct / total
    val_acc_history_eff.append(epoch_acc)

    print(f"Epoch {epoch+1} | Loss: {epoch_loss:.4f} | Val Acc: {epoch_acc:.2f}% | Time: {time.time() - start_time:.1f}s")

# 4. Sauvegarde et Évaluation Rapide
torch.save(model_eff.state_dict(), "efficientnet_photocl.pth")
print("\nModèle sauvegardé : efficientnet_photocl.pth")

print("Évaluation finale EfficientNet...")
evaluate_model(model_eff, val_loader)

# Libérer la mémoire
del model_eff
torch.cuda.empty_cache()

Chargement de EfficientNet-B0 pré-entraîné...
Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth


100%|██████████| 20.5M/20.5M [00:00<00:00, 153MB/s]


Epoch 1 | Loss: 0.1802 | Val Acc: 96.17% | Time: 449.7s




Epoch 2 | Loss: 0.1329 | Val Acc: 96.64% | Time: 442.9s




Epoch 3 | Loss: 0.1299 | Val Acc: 96.76% | Time: 445.2s




Epoch 4 | Loss: 0.1234 | Val Acc: 96.86% | Time: 453.4s




Epoch 5 | Loss: 0.1173 | Val Acc: 97.03% | Time: 442.2s

Modèle sauvegardé : efficientnet_photocl.pth
Évaluation finale EfficientNet...
Final Accuracy: 0.9703
Final F1-Score: 0.9703

Classification Report:

              precision    recall  f1-score   support

      Others       0.98      0.98      0.98      6281
       Photo       0.94      0.94      0.94      1999

    accuracy                           0.97      8280
   macro avg       0.96      0.96      0.96      8280
weighted avg       0.97      0.97      0.97      8280

