In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

xhlulu_140k_real_and_fake_faces_path = kagglehub.dataset_download('xhlulu/140k-real-and-fake-faces')
sokhnaballytour_test_stylegan_path = kagglehub.dataset_download('sokhnaballytour/test-stylegan')
sokhnaballytour_test_progan_path = kagglehub.dataset_download('sokhnaballytour/test-progan')
sokhnaballytour_val_dataset_path = kagglehub.dataset_download('sokhnaballytour/val-dataset')
sokhnaballytour_train_dataset_path = kagglehub.dataset_download('sokhnaballytour/train-dataset')
sokhnaballytour_data_iris_path = kagglehub.dataset_download('sokhnaballytour/data-iris')

print('Data source import complete.')


In [None]:
import dlib
import cv2
import torch
import torchvision
from torchvision.models.detection import MaskRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.optim import Adam
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torch.utils.data import Dataset
from PIL import Image
import os
from torchvision import transforms

class AttentionModule(nn.Module):
    def __init__(self, in_channels):
        super(AttentionModule, self).__init__()
        self.trunk_branch = nn.Sequential(
            nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(in_channels),
            nn.ReLU(),
            nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(in_channels)
        )
        self.soft_mask_branch = nn.Sequential(
            nn.Conv2d(in_channels, in_channels // 2, kernel_size=1),
            nn.BatchNorm2d(in_channels // 2),
            nn.ReLU(),
            nn.Conv2d(in_channels // 2, in_channels, kernel_size=1),
            nn.BatchNorm2d(in_channels),
            nn.Sigmoid()
        )

    def forward(self, x):
        trunk = self.trunk_branch(x)
        mask = self.soft_mask_branch(x)
        return trunk * (1 + mask)

class ResidualAttentionNetwork(nn.Module):
    def __init__(self, num_classes=1):
        super(ResidualAttentionNetwork, self).__init__()
        # Couche d'entrée
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Modules d'attention
        self.attention1 = AttentionModule(16)  # 16 canaux en entrée
        self.attention2 = AttentionModule(32)  # 32 canaux en entrée
        self.attention3 = AttentionModule(64)  # 64 canaux en entrée

        # Couches supplémentaires pour gérer les dimensions
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)
        self.bn3 = nn.BatchNorm2d(64)

        # Couche de sortie
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.attention1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.attention2(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.attention3(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return self.sigmoid(x)


def wmw_auc_loss(y_pred, y_true, gamma=0.4, p=2):
    positive = y_pred[y_true == 1]
    negative = y_pred[y_true == 0]
    if len(positive) == 0 or len(negative) == 0:
        return torch.tensor(0.0, device=y_pred.device)
    diff = positive.unsqueeze(1) - negative.unsqueeze(0)
    loss = torch.where(diff < gamma, (-(diff - gamma)) ** p, torch.tensor(0.0, device=y_pred.device))
    return loss.mean()

def combined_loss(y_pred, y_true, alpha=0.4):
    bce_loss = nn.BCELoss()(y_pred, y_true)
    auc_loss = wmw_auc_loss(y_pred, y_true)
    return alpha * bce_loss + (1 - alpha) * auc_loss


def init_weights(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)


In [None]:
from torch.utils.data import DataLoader, Subset, Dataset
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import roc_curve
import numpy as np
from PIL import Image
import os
from tqdm import tqdm
import torch
import torch.nn as nn
from torchvision import transforms
import pandas as pd
from torchmetrics import CohenKappa
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    matthews_corrcoef,
    f1_score,
    confusion_matrix,
    cohen_kappa_score,
    log_loss,
    recall_score,
    precision_score,
)

# Charger le modèle sur CPU ou GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Hyperparamètres
learning_rate = 0.001
batch_size = 128
num_epochs = 20
alpha = 0.4

class IrisDataset(Dataset):
    def __init__(self, real_iris_dir, fake_iris_dir, transform=None):
        self.real_images = [os.path.join(real_iris_dir, img) for img in os.listdir(real_iris_dir)]
        self.fake_images = [os.path.join(fake_iris_dir, img) for img in os.listdir(fake_iris_dir)]
        self.all_images = self.real_images + self.fake_images
        self.labels = [0] * len(self.real_images) + [1] * len(self.fake_images)
        self.transform = transform

    def __len__(self):
        return len(self.all_images)

    def __getitem__(self, idx):
        img_path = self.all_images[idx]
        image = Image.open(img_path).convert("RGB")
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

def evaluate_model(model, data_loader, device, threshold=0.5):
    model.eval()
    all_preds = []
    all_probs = []
    all_labels = []

    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = torch.sigmoid(outputs).squeeze()
            preds = (probs >= threshold).float()

            all_probs.extend(probs.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())

    return np.array(all_labels), np.array(all_preds), np.array(all_probs)

def find_optimal_threshold(labels, probs):
    fpr, tpr, thresholds = roc_curve(labels, probs)
    optimal_idx = np.argmax(tpr - fpr)
    return thresholds[optimal_idx]

# Définir les transformations
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Charger les datasets
train_dataset = IrisDataset(real_iris_dir="/kaggle/input/train-dataset/train_dataset/0_real",
                           fake_iris_dir="/kaggle/input/train-dataset/train_dataset/1_fake",
                           transform=transform)
val_dataset = IrisDataset(real_iris_dir="/kaggle/input/val-dataset/val_dataset/0_real",
                         fake_iris_dir="/kaggle/input/val-dataset/val_dataset/1_fake",
                         transform=transform)
test_dataset = IrisDataset(real_iris_dir="/kaggle/input/test-progan/progan/person/0_real",
                          fake_iris_dir="/kaggle/input/test-progan/progan/person/1_fake",
                          transform=transform)

# Calculer les poids des classes
class_weights = compute_class_weight('balanced',
                                   classes=np.unique(train_dataset.labels),
                                   y=train_dataset.labels)
class_weights = torch.FloatTensor(class_weights).to(device)

# Charger les données
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResidualAttentionNetwork().to(device)
model.apply(init_weights)

# Modifier l'optimiseur pour inclure la régularisation L2
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

# Critère avec poids des classes
criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights[1])

# Pour stocker les métriques
metrics_history = []

# Boucle d'entraînement
for epoch in range(num_epochs):
    model.train()
    train_loss = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs.squeeze(-1), labels.float())

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        train_loss += loss.item()

    # Évaluation sur l'ensemble de validation
    val_labels, val_preds, val_probs = evaluate_model(model, val_loader, device)

    # Trouver le seuil optimal
    optimal_threshold = find_optimal_threshold(val_labels, val_probs)

    # Réévaluer avec le seuil optimal
    val_preds = (val_probs >= optimal_threshold).astype(float)

    # Calculer et sauvegarder les métriques
    epoch_metrics = {
        'epoch': epoch + 1,
        'train_loss': train_loss/len(train_loader),
        'optimal_threshold': optimal_threshold,
        'accuracy': accuracy_score(val_labels, val_preds),
        'auc': roc_auc_score(val_labels, val_probs),
        'precision': precision_score(val_labels, val_preds),
        'recall': recall_score(val_labels, val_preds),
        'f1': f1_score(val_labels, val_preds),
        'mcc': matthews_corrcoef(val_labels, val_preds)
    }

    metrics_history.append(epoch_metrics)

    print(f"\nEpoch {epoch+1}")
    print(f"Training Loss: {epoch_metrics['train_loss']:.4f}")
    print(f"Optimal threshold: {optimal_threshold:.4f}")
    print(f"Validation Metrics:")
    print(f"Accuracy: {epoch_metrics['accuracy']:.4f}")
    print(f"AUC: {epoch_metrics['auc']:.4f}")
    print(f"F1: {epoch_metrics['f1']:.4f}")

    # Sauvegarder les métriques dans un CSV
    pd.DataFrame(metrics_history).to_csv('training_metrics.csv', index=False)

    torch.cuda.empty_cache()

# Évaluation finale sur l'ensemble de test
test_labels, test_preds, test_probs = evaluate_model(model, test_loader, device, threshold=optimal_threshold)
final_metrics = {
    'accuracy': accuracy_score(test_labels, test_preds),
    'auc': roc_auc_score(test_labels, test_probs),
    'precision': precision_score(test_labels, test_preds),
    'recall': recall_score(test_labels, test_preds),
    'f1': f1_score(test_labels, test_preds),
    'mcc': matthews_corrcoef(test_labels, test_preds)
}

print("\nMétriques finales sur l'ensemble de test:")
for metric, value in final_metrics.items():
    print(f"{metric}: {value:.4f}")

# Sauvegarder les métriques finales
pd.DataFrame([final_metrics]).to_csv('final_test_metrics.csv', index=False)

Epoch 1/20: 100%|██████████| 1407/1407 [10:46<00:00,  2.18it/s]



Epoch 1
Training Loss: 0.6926
Optimal threshold: 0.5161
Validation Metrics:
Accuracy: 0.5915
AUC: 0.6125
F1: 0.5909


Epoch 2/20: 100%|██████████| 1407/1407 [05:50<00:00,  4.02it/s]



Epoch 2
Training Loss: 0.6839
Optimal threshold: 0.5072
Validation Metrics:
Accuracy: 0.5920
AUC: 0.6235
F1: 0.5772


Epoch 3/20: 100%|██████████| 1407/1407 [05:27<00:00,  4.30it/s]



Epoch 3
Training Loss: 0.6765
Optimal threshold: 0.5213
Validation Metrics:
Accuracy: 0.6120
AUC: 0.6530
F1: 0.5274


Epoch 4/20: 100%|██████████| 1407/1407 [05:15<00:00,  4.46it/s]



Epoch 4
Training Loss: 0.6681
Optimal threshold: 0.5148
Validation Metrics:
Accuracy: 0.6420
AUC: 0.6780
F1: 0.6175


Epoch 5/20: 100%|██████████| 1407/1407 [05:13<00:00,  4.48it/s]



Epoch 5
Training Loss: 0.6607
Optimal threshold: 0.5342
Validation Metrics:
Accuracy: 0.6815
AUC: 0.7439
F1: 0.6826


Epoch 6/20: 100%|██████████| 1407/1407 [05:11<00:00,  4.51it/s]



Epoch 6
Training Loss: 0.6549
Optimal threshold: 0.5217
Validation Metrics:
Accuracy: 0.6960
AUC: 0.7586
F1: 0.6800


Epoch 7/20: 100%|██████████| 1407/1407 [05:13<00:00,  4.49it/s]



Epoch 7
Training Loss: 0.6502
Optimal threshold: 0.5383
Validation Metrics:
Accuracy: 0.6765
AUC: 0.7342
F1: 0.6416


Epoch 8/20: 100%|██████████| 1407/1407 [05:13<00:00,  4.49it/s]



Epoch 8
Training Loss: 0.6467
Optimal threshold: 0.5257
Validation Metrics:
Accuracy: 0.7085
AUC: 0.7719
F1: 0.7282


Epoch 9/20: 100%|██████████| 1407/1407 [05:13<00:00,  4.48it/s]



Epoch 9
Training Loss: 0.6422
Optimal threshold: 0.5047
Validation Metrics:
Accuracy: 0.6605
AUC: 0.7007
F1: 0.6520


Epoch 10/20: 100%|██████████| 1407/1407 [05:15<00:00,  4.46it/s]



Epoch 10
Training Loss: 0.6382
Optimal threshold: 0.5121
Validation Metrics:
Accuracy: 0.7205
AUC: 0.7926
F1: 0.7201


Epoch 11/20: 100%|██████████| 1407/1407 [05:14<00:00,  4.47it/s]



Epoch 11
Training Loss: 0.6320
Optimal threshold: 0.5923
Validation Metrics:
Accuracy: 0.7370
AUC: 0.8085
F1: 0.7454


Epoch 12/20: 100%|██████████| 1407/1407 [05:14<00:00,  4.47it/s]



Epoch 12
Training Loss: 0.6294
Optimal threshold: 0.5284
Validation Metrics:
Accuracy: 0.7710
AUC: 0.8448
F1: 0.7744


Epoch 13/20: 100%|██████████| 1407/1407 [05:12<00:00,  4.51it/s]



Epoch 13
Training Loss: 0.6261
Optimal threshold: 0.5305
Validation Metrics:
Accuracy: 0.7705
AUC: 0.8484
F1: 0.7784


Epoch 14/20: 100%|██████████| 1407/1407 [05:15<00:00,  4.46it/s]



Epoch 14
Training Loss: 0.6222
Optimal threshold: 0.5073
Validation Metrics:
Accuracy: 0.7505
AUC: 0.8299
F1: 0.7600


Epoch 15/20: 100%|██████████| 1407/1407 [05:12<00:00,  4.51it/s]



Epoch 15
Training Loss: 0.6182
Optimal threshold: 0.5596
Validation Metrics:
Accuracy: 0.7830
AUC: 0.8601
F1: 0.7858


Epoch 16/20: 100%|██████████| 1407/1407 [05:11<00:00,  4.52it/s]



Epoch 16
Training Loss: 0.6146
Optimal threshold: 0.5147
Validation Metrics:
Accuracy: 0.7865
AUC: 0.8617
F1: 0.7975


Epoch 17/20: 100%|██████████| 1407/1407 [05:13<00:00,  4.48it/s]



Epoch 17
Training Loss: 0.6096
Optimal threshold: 0.5104
Validation Metrics:
Accuracy: 0.8055
AUC: 0.8787
F1: 0.8164


Epoch 18/20: 100%|██████████| 1407/1407 [05:12<00:00,  4.51it/s]



Epoch 18
Training Loss: 0.6049
Optimal threshold: 0.5147
Validation Metrics:
Accuracy: 0.8080
AUC: 0.8858
F1: 0.8116


Epoch 19/20: 100%|██████████| 1407/1407 [05:15<00:00,  4.46it/s]



Epoch 19
Training Loss: 0.6022
Optimal threshold: 0.6001
Validation Metrics:
Accuracy: 0.8120
AUC: 0.8900
F1: 0.8103


Epoch 20/20: 100%|██████████| 1407/1407 [05:14<00:00,  4.47it/s]



Epoch 20
Training Loss: 0.5977
Optimal threshold: 0.6078
Validation Metrics:
Accuracy: 0.7795
AUC: 0.8611
F1: 0.7690

Métriques finales sur l'ensemble de test:
accuracy: 0.7550
auc: 0.8543
precision: 0.8148
recall: 0.6600
f1: 0.7293
mcc: 0.5195


In [None]:
def evaluate_model(model, data_loader, device, threshold=0.5):
    model.eval()
    all_preds = []
    all_probs = []
    all_labels = []
    total_log_loss = 0.0
    n_samples = 0

    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            probs = torch.sigmoid(outputs).squeeze()
            preds = (probs >= threshold).float()

            # Calculer log loss
            epsilon = 1e-7
            probs_clipped = torch.clamp(probs, epsilon, 1 - epsilon)
            batch_log_loss = -torch.mean(
                labels.float() * torch.log(probs_clipped) +
                (1 - labels.float()) * torch.log(1 - probs_clipped)
            )

            total_log_loss += batch_log_loss.item() * len(labels)
            n_samples += len(labels)

            all_probs.extend(probs.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return np.array(all_labels), np.array(all_preds), np.array(all_probs), total_log_loss / n_samples

def calculate_specificity(y_true, y_pred):
    tn = np.sum((y_pred == 0) & (y_true == 0))
    fp = np.sum((y_pred == 1) & (y_true == 0))
    return tn / (tn + fp) if (tn + fp) > 0 else 0


# À la fin, pour l'évaluation finale, remplacer par:
# Évaluation finale sur l'ensemble de test
test_labels, test_preds, test_probs, test_log_loss = evaluate_model(
    model, test_loader, device, threshold=optimal_threshold
)
final_metrics = {
    'accuracy': accuracy_score(test_labels, test_preds),
    'auc': roc_auc_score(test_labels, test_probs),
    'precision': precision_score(test_labels, test_preds),
    'recall': recall_score(test_labels, test_preds),
    'specificity': calculate_specificity(test_labels, test_preds),
    'f1': f1_score(test_labels, test_preds),
    'mcc': matthews_corrcoef(test_labels, test_preds),
    'kappa': cohen_kappa_score(test_labels, test_preds),
    'log_loss': test_log_loss
}

print("\nMétriques finales sur l'ensemble de test:")
for metric, value in final_metrics.items():
    print(f"{metric}: {value:.4f}")

# Sauvegarder les métriques finales
pd.DataFrame([final_metrics]).to_csv('final_test_metrics.csv', index=False)


Métriques finales sur l'ensemble de test:
accuracy: 0.7550
auc: 0.8543
precision: 0.8148
recall: 0.6600
specificity: 0.8500
f1: 0.7293
mcc: 0.5195
kappa: 0.5100
log_loss: 0.6118
