In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

dagnelies_deepfake_faces_path = kagglehub.dataset_download('dagnelies/deepfake-faces')
xhlulu_140k_real_and_fake_faces_path = kagglehub.dataset_download('xhlulu/140k-real-and-fake-faces')
sokhnaballytour_test_progan_path = kagglehub.dataset_download('sokhnaballytour/test-progan')
sokhnaballytour_val_dataset_path = kagglehub.dataset_download('sokhnaballytour/val-dataset')
sokhnaballytour_train_dataset_path = kagglehub.dataset_download('sokhnaballytour/train-dataset')

print('Data source import complete.')


In [None]:
!pip install torch torchvision timm


Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)
  Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch)
  Downloading nvidia_nvjitlink_cu12-12.4.127-py3-n

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.metrics import (roc_auc_score, accuracy_score, f1_score, precision_score,
                             recall_score, matthews_corrcoef, cohen_kappa_score, log_loss, confusion_matrix)
import timm
from tqdm import tqdm

In [None]:
# -----------------------------
# 1. Backbone Split: RepVGG-A0 (split into blocks)
# -----------------------------
def get_repvgg_blocks():
    base = timm.create_model('repvgg_a0', pretrained=True)
    stages = list(base.stages.children())
    blocks = [
        base.stem,
        stages[0],
        stages[1],
        stages[2],
        stages[3],
        nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(1)
        )
    ]
    classifier = nn.Linear(1280, 1)
    return nn.ModuleList(blocks), classifier

# -----------------------------
# 2. X-Transfer Architecture (Block Alternation)
# -----------------------------
class XTransfer(nn.Module):
    def __init__(self):
        super().__init__()
        self.master_blocks, self.master_head = get_repvgg_blocks()
        self.aux_blocks, _ = get_repvgg_blocks()

    def route_blocks(self, x, start='aux'):
        out = x
        for i in range(len(self.master_blocks)):
            if i % 2 == 0:
                out = self.aux_blocks[i](out) if start == 'aux' else self.master_blocks[i](out)
            else:
                out = self.master_blocks[i](out) if start == 'aux' else self.aux_blocks[i](out)
        out = self.master_head(out)
        return torch.sigmoid(out)

    def forward_master(self, x):
        out = x
        for block in self.master_blocks:
            out = block(out)
        out = self.master_head(out)
        return torch.sigmoid(out)

# -----------------------------
# 3. AUC Loss (WMW approximation)
# -----------------------------
def auc_loss(y_true, y_score, gamma=0.16, p=2.0):
    y_true = y_true.view(-1)
    y_score = y_score.view(-1)
    pos = y_score[y_true == 1]
    neg = y_score[y_true == 0]
    if len(pos) == 0 or len(neg) == 0:
        return torch.tensor(0.0, device=y_score.device)
    diffs = pos.view(-1, 1) - neg.view(1, -1)
    losses = torch.pow(torch.clamp(gamma - diffs, min=0), p)
    return losses.mean()

# -----------------------------
# 4. Loss function
# -----------------------------
def compute_loss(model, x, y, beta=0.6, s=0.01):
    out1 = model.route_blocks(x, start='aux')
    out2 = model.route_blocks(x, start='master')
    out3 = model.forward_master(x)

    BCE = nn.BCELoss()
    L1 = BCE(out1, y)
    L2 = BCE(out2, y)
    L3 = BCE(out3, y)
    LAUC = auc_loss(y, out3)

    alpha = 2 * L3.item() / (L1.item() + L2.item() + 1e-8)
    reg = torch.norm(model.master_head.weight, 2)

    loss = alpha * (L1 + L2) + beta * L3 + (1 - beta) * LAUC + s * reg
    return loss

# -----------------------------
# 5. Transforms & Loaders
# -----------------------------
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(90),
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.2),
    transforms.GaussianBlur(3),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

def load_separated_source(train_path, val_path, test_path, batch_size=32):
    train_dataset = datasets.ImageFolder(train_path, transform=train_transform)
    val_dataset = datasets.ImageFolder(val_path, transform=test_transform)
    test_dataset = datasets.ImageFolder(test_path, transform=test_transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, val_loader, test_loader

def load_data(domain_dir, batch_size=32):
    train_dataset = datasets.ImageFolder(os.path.join(domain_dir, 'train'), transform=train_transform)
    valid_dataset = datasets.ImageFolder(os.path.join(domain_dir, 'valid'), transform=test_transform)
    test_dataset = datasets.ImageFolder(os.path.join(domain_dir, 'test'), transform=test_transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, valid_loader, test_loader

# -----------------------------
# 6. Phase 1 - Train on Source Domain
# -----------------------------
def train_on_source(model, train_loader, valid_loader, device, num_epochs=10, lr=0.002):
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.001)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    model.to(device)

    for epoch in range(num_epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0
        loop = tqdm(train_loader, desc=f"[Source] Epoch {epoch+1}/{num_epochs}")

        for x, y in loop:
            x, y = x.to(device), y.to(device).float().unsqueeze(1)
            optimizer.zero_grad()
            out = model.forward_master(x)
            loss = F.binary_cross_entropy(out, y)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            correct += (out.round().cpu() == y.cpu()).sum().item()
            total += y.size(0)

        scheduler.step()
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {correct/total:.4f}")

# -----------------------------
# 7. Phase 2 - Transfer to Target Domain with X-Transfer
# -----------------------------
def transfer_to_target(model, train_loader, valid_loader, device, num_epochs=10):
    optimizer = optim.SGD(model.parameters(), lr=0.002, momentum=0.001)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    model.to(device)

    for epoch in range(num_epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0
        loop = tqdm(train_loader, desc=f"[Transfer] Epoch {epoch+1}/{num_epochs}")

        for x, y in loop:
            x, y = x.to(device), y.to(device).float().unsqueeze(1)
            optimizer.zero_grad()
            loss = compute_loss(model, x, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            out = model.forward_master(x)
            correct += (out.round().cpu() == y.cpu()).sum().item()
            total += y.size(0)

        scheduler.step()
        print(f"Transfer Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {correct/total:.4f}")

# -----------------------------
# 8. Evaluation finale
# -----------------------------
def evaluate_model(model, test_loader, device):
    model.eval()
    y_true, y_pred_probs = [], []
    with torch.no_grad():
        for x_test, y_test in test_loader:
            x_test = x_test.to(device)
            preds = model.forward_master(x_test).cpu().numpy()
            y_true.extend(y_test.numpy())
            y_pred_probs.extend(preds)

    y_pred = [1 if p > 0.5 else 0 for p in y_pred_probs]

    auc = roc_auc_score(y_true, y_pred_probs)
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)
    kappa = cohen_kappa_score(y_true, y_pred)
    logloss = log_loss(y_true, y_pred_probs)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    specificity = tn / (tn + fp)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"AUC: {auc:.4f}")
    print(f"MCC: {mcc:.4f}")
    print(f"F1-score: {f1:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Cohen's Kappa: {kappa:.4f}")
    print(f"Log Loss: {logloss:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"Precision: {precision:.4f}")


In [None]:
# -----------------------------
# 5. Transforms & Loaders
# -----------------------------
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(90),
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.2),
    transforms.GaussianBlur(3),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

def load_data(train_dir, valid_dir, test_dir, batch_size=32):
    train_dataset = datasets.ImageFolder(train_dir, transform=train_transform)
    valid_dataset = datasets.ImageFolder(valid_dir, transform=test_transform)
    test_dataset = datasets.ImageFolder(test_dir, transform=test_transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, valid_loader, test_loader

# -----------------------------
# 6. Training Pipeline
# -----------------------------
def train_xtransfer(model, train_loader, valid_loader, device, num_epochs=10, lr=0.002, beta=0.6, s=0.01):
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.001)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    model.to(device)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")

        for x, y in loop:
            x, y = x.to(device), y.to(device).float().unsqueeze(1)

            optimizer.zero_grad()
            loss = compute_loss(model, x, y, beta=beta, s=s)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            preds = model.forward_master(x).round()
            correct += (preds.cpu() == y.cpu()).sum().item()
            total += y.size(0)

        scheduler.step()
        train_acc = correct / total
        avg_loss = running_loss / len(train_loader)

        # Validation
        model.eval()
        val_correct = 0
        val_total = 0
        y_true, y_pred = [], []
        val_loss = 0.0

        with torch.no_grad():
            for x_val, y_val in valid_loader:
                x_val, y_val = x_val.to(device), y_val.to(device).float().unsqueeze(1)
                outputs = model.forward_master(x_val)
                loss = F.binary_cross_entropy(outputs, y_val)
                val_loss += loss.item()
                preds = outputs.round()
                val_correct += (preds.cpu() == y_val.cpu()).sum().item()
                val_total += y_val.size(0)
                y_true.extend(y_val.cpu().numpy())
                y_pred.extend(outputs.cpu().numpy())

        val_acc = val_correct / val_total
        val_auc = roc_auc_score(y_true, y_pred)
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}, Train Accuracy: {train_acc:.4f}, "
              f"Valid Loss: {val_loss/len(valid_loader):.4f}, Valid Accuracy: {val_acc:.4f}, Valid ROC-AUC: {val_auc:.4f}")

# -----------------------------
# 7. Final Evaluation
# -----------------------------
def evaluate(model, test_loader, device):
    model.eval()
    y_true, y_pred_probs = [], []
    with torch.no_grad():
        for x_test, y_test in test_loader:
            x_test = x_test.to(device)
            preds = model.forward_master(x_test).cpu().numpy()
            y_true.extend(y_test.numpy())
            y_pred_probs.extend(preds)

    y_pred = [1 if p > 0.5 else 0 for p in y_pred_probs]

    auc = roc_auc_score(y_true, y_pred_probs)
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)
    kappa = cohen_kappa_score(y_true, y_pred)
    logloss = log_loss(y_true, y_pred_probs)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    specificity = tn / (tn + fp)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"AUC: {auc:.4f}")
    print(f"MCC: {mcc:.4f}")
    print(f"F1-score: {f1:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Cohen's Kappa: {kappa:.4f}")
    print(f"Log Loss: {logloss:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"Precision: {precision:.4f}")



In [None]:
# -----------------------------
# 8. Exécution
# -----------------------------
device = 'cuda' if torch.cuda.is_available() else 'cpu'

train_dir = '/kaggle/input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/train'
valid_dir = '/kaggle/input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/valid'
test_dir  = '/kaggle/input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/test'

train_loader, valid_loader, test_loader = load_data(train_dir, valid_dir, test_dir)

In [None]:
model = XTransfer()
train_xtransfer(model, train_loader, valid_loader, device, num_epochs=10)
evaluate(model, test_loader, device)

model.safetensors:   0%|          | 0.00/36.6M [00:00<?, ?B/s]

Epoch 1/10: 100%|██████████| 3125/3125 [49:51<00:00,  1.04it/s]


Epoch 1/10, Train Loss: 1.2399, Train Accuracy: 0.8559, Valid Loss: 0.4247, Valid Accuracy: 0.8030, Valid ROC-AUC: 0.9367


Epoch 2/10: 100%|██████████| 3125/3125 [39:44<00:00,  1.31it/s]


Epoch 2/10, Train Loss: 0.8477, Train Accuracy: 0.9436, Valid Loss: 0.6320, Valid Accuracy: 0.7345, Valid ROC-AUC: 0.9609


Epoch 3/10: 100%|██████████| 3125/3125 [39:32<00:00,  1.32it/s]


Epoch 3/10, Train Loss: 0.6867, Train Accuracy: 0.9622, Valid Loss: 0.3394, Valid Accuracy: 0.8591, Valid ROC-AUC: 0.9721


Epoch 4/10: 100%|██████████| 3125/3125 [39:19<00:00,  1.32it/s]


Epoch 4/10, Train Loss: 0.5992, Train Accuracy: 0.9662, Valid Loss: 0.2688, Valid Accuracy: 0.8918, Valid ROC-AUC: 0.9702


Epoch 5/10: 100%|██████████| 3125/3125 [38:35<00:00,  1.35it/s]


Epoch 5/10, Train Loss: 0.5386, Train Accuracy: 0.9664, Valid Loss: 0.5487, Valid Accuracy: 0.7820, Valid ROC-AUC: 0.9791


Epoch 6/10: 100%|██████████| 3125/3125 [38:12<00:00,  1.36it/s]


Epoch 6/10, Train Loss: 0.4960, Train Accuracy: 0.9625, Valid Loss: 0.3977, Valid Accuracy: 0.8435, Valid ROC-AUC: 0.9798


Epoch 7/10: 100%|██████████| 3125/3125 [37:49<00:00,  1.38it/s]


Epoch 7/10, Train Loss: 0.4632, Train Accuracy: 0.9561, Valid Loss: 0.8283, Valid Accuracy: 0.7220, Valid ROC-AUC: 0.9806


Epoch 8/10: 100%|██████████| 3125/3125 [37:53<00:00,  1.37it/s]


Epoch 8/10, Train Loss: 0.4443, Train Accuracy: 0.9483, Valid Loss: 0.4266, Valid Accuracy: 0.8337, Valid ROC-AUC: 0.9829


Epoch 9/10: 100%|██████████| 3125/3125 [37:52<00:00,  1.37it/s]


Epoch 9/10, Train Loss: 0.4366, Train Accuracy: 0.9422, Valid Loss: 0.5105, Valid Accuracy: 0.8105, Valid ROC-AUC: 0.9830


Epoch 10/10: 100%|██████████| 3125/3125 [37:51<00:00,  1.38it/s]


Epoch 10/10, Train Loss: 0.4239, Train Accuracy: 0.9394, Valid Loss: 0.3889, Valid Accuracy: 0.8472, Valid ROC-AUC: 0.9831
Accuracy: 0.8545
AUC: 0.9828
MCC: 0.7371
F1-score: 0.8720
Specificity: 0.7175
Cohen's Kappa: 0.7089
Log Loss: 0.3784
Recall: 0.9914
Precision: 0.7782


# Wang

In [None]:
# -----------------------------
# 8. Exécution
# -----------------------------
device = 'cuda' if torch.cuda.is_available() else 'cpu'

train_dir = '/kaggle/input/train-dataset/train_dataset'
valid_dir = '/kaggle/input/val-dataset/val_dataset'
test_dir  = '/kaggle/input/test-progan/progan/person'

train_loader, valid_loader, test_loader = load_data(train_dir, valid_dir, test_dir)

In [None]:
from torchvision import datasets
from torch.utils.data import DataLoader, Subset
import numpy as np
from collections import defaultdict

train_dir = '/kaggle/input/train-dataset/train_dataset'
valid_dir = '/kaggle/input/val-dataset/val_dataset'
test_dir  = '/kaggle/input/test-progan/progan/person'

def load_data(train_dir, valid_dir, test_dir, batch_size=32, train_ratio=1.0, seed=42):
    # Chargement complet des datasets
    train_dataset_full = datasets.ImageFolder(train_dir, transform=train_transform)
    valid_dataset = datasets.ImageFolder(valid_dir, transform=test_transform)
    test_dataset = datasets.ImageFolder(test_dir, transform=test_transform)

    if train_ratio < 1.0:
        # Organiser les indices par classe
        class_indices = defaultdict(list)
        for idx, (_, label) in enumerate(train_dataset_full.samples):
            class_indices[label].append(idx)

        # Équilibrage et sous-échantillonnage
        np.random.seed(seed)
        selected_indices = []

        for label, indices in class_indices.items():
            np.random.shuffle(indices)
            subset_size = int(len(indices) * train_ratio)
            selected_indices.extend(indices[:subset_size])

        # Créer le subset équilibré
        train_dataset = Subset(train_dataset_full, selected_indices)
    else:
        train_dataset = train_dataset_full

    # Loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, valid_loader, test_loader

train_loader, valid_loader, test_loader = load_data(
    train_dir,
    valid_dir,
    test_dir,
    batch_size=32,
    train_ratio=0.4  # Utilise 30% du dataset d'entraînement
)


In [None]:
model = XTransfer()
train_xtransfer(model, train_loader, valid_loader, device, num_epochs=10)
evaluate(model, test_loader, device)

Epoch 1/10: 100%|██████████| 2251/2251 [39:28<00:00,  1.05s/it]


Epoch 1/10, Train Loss: 1.3261, Train Accuracy: 0.8463, Valid Loss: 0.4086, Valid Accuracy: 0.8025, Valid ROC-AUC: 0.9286


Epoch 2/10: 100%|██████████| 2251/2251 [32:01<00:00,  1.17it/s]


Epoch 2/10, Train Loss: 0.8028, Train Accuracy: 0.9549, Valid Loss: 1.2478, Valid Accuracy: 0.6525, Valid ROC-AUC: 0.9063


Epoch 3/10: 100%|██████████| 2251/2251 [31:55<00:00,  1.18it/s]


Epoch 3/10, Train Loss: 0.5832, Train Accuracy: 0.9758, Valid Loss: 0.6789, Valid Accuracy: 0.7535, Valid ROC-AUC: 0.9607


Epoch 4/10: 100%|██████████| 2251/2251 [31:50<00:00,  1.18it/s]


Epoch 4/10, Train Loss: 0.4704, Train Accuracy: 0.9814, Valid Loss: 0.3143, Valid Accuracy: 0.8765, Valid ROC-AUC: 0.9765


Epoch 5/10: 100%|██████████| 2251/2251 [31:50<00:00,  1.18it/s]


Epoch 5/10, Train Loss: 0.3907, Train Accuracy: 0.9835, Valid Loss: 0.4955, Valid Accuracy: 0.8365, Valid ROC-AUC: 0.9779


Epoch 6/10: 100%|██████████| 2251/2251 [31:50<00:00,  1.18it/s]


Epoch 6/10, Train Loss: 0.3539, Train Accuracy: 0.9805, Valid Loss: 0.4497, Valid Accuracy: 0.8525, Valid ROC-AUC: 0.9764


Epoch 7/10: 100%|██████████| 2251/2251 [32:00<00:00,  1.17it/s]


Epoch 7/10, Train Loss: 0.3145, Train Accuracy: 0.9763, Valid Loss: 0.6192, Valid Accuracy: 0.8150, Valid ROC-AUC: 0.9792


Epoch 8/10: 100%|██████████| 2251/2251 [32:02<00:00,  1.17it/s]


Epoch 8/10, Train Loss: 0.2934, Train Accuracy: 0.9715, Valid Loss: 0.4382, Valid Accuracy: 0.8565, Valid ROC-AUC: 0.9823


Epoch 9/10: 100%|██████████| 2251/2251 [32:08<00:00,  1.17it/s]


Epoch 9/10, Train Loss: 0.2905, Train Accuracy: 0.9644, Valid Loss: 0.7989, Valid Accuracy: 0.7785, Valid ROC-AUC: 0.9748


Epoch 10/10: 100%|██████████| 2251/2251 [32:01<00:00,  1.17it/s]


Epoch 10/10, Train Loss: 0.2752, Train Accuracy: 0.9619, Valid Loss: 0.5398, Valid Accuracy: 0.8270, Valid ROC-AUC: 0.9804
Accuracy: 0.7750
AUC: 0.9628
MCC: 0.6092
F1-score: 0.7134
Specificity: 0.9900
Cohen's Kappa: 0.5500
Log Loss: 0.8366
Recall: 0.5600
Precision: 0.9825


# Deepfake Faces

In [None]:
import os
import sys
import sklearn
import tensorflow as tf

import cv2
import pandas as pd
import numpy as np

import plotly.graph_objs as go
from plotly.offline import iplot
from matplotlib import pyplot as plt

def get_data():
    return pd.read_csv('/kaggle/input/deepfake-faces/metadata.csv')

meta=get_data()

real_df = meta[meta["label"] == "REAL"]
fake_df = meta[meta["label"] == "FAKE"]
sample_size = 16293

real_df = real_df.sample(sample_size, random_state=42)
fake_df = fake_df.sample(sample_size, random_state=42)

sample_meta = pd.concat([real_df, fake_df])

from sklearn.model_selection import train_test_split

Train_set, Test_set = train_test_split(sample_meta,test_size=0.2,random_state=42,stratify=sample_meta['label'])
Train_set, Val_set  = train_test_split(Train_set,test_size=0.3,random_state=42,stratify=Train_set['label'])

def retreive_dataset(set_name):
    images,labels=[],[]
    for (img, imclass) in zip(set_name['videoname'], set_name['label']):
        # Construct the image path using os.path.join
        image_path = os.path.join('/kaggle/input/deepfake-faces/faces_224', img[:-4] + '.jpg')

        # Check if the image file exists before attempting to load it
        if os.path.exists(image_path):
            image = cv2.imread(image_path)

            # Check if the image was loaded successfully
            if image is not None:
                images.append(image)
                if(imclass=='FAKE'):
                    labels.append(1)
                else:
                    labels.append(0)
            else:
                print(f"Warning: Could not load image at path: {image_path}")
        else:
            print(f"Warning: Image file does not exist at path: {image_path}")

    return np.array(images),np.array(labels)

X_train,y_train=retreive_dataset(Train_set)
X_val,y_val=retreive_dataset(Val_set)
X_test,y_test=retreive_dataset(Test_set)

2025-04-14 17:09:18.720944: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744650558.963690      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744650559.041934      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:

from torch.utils.data import Dataset, DataLoader
# Dataset personnalisé pour les images de deepfake
class DeepfakeDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        # Conversion BGR -> RGB car OpenCV charge en BGR mais PyTorch attend RGB
        self.images = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in images]
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label


# Créer les dataloaders
train_dataset = DeepfakeDataset(X_train, y_train, transform=train_transform)
val_dataset = DeepfakeDataset(X_val, y_val, transform=test_transform)
test_dataset = DeepfakeDataset(X_test, y_test, transform=test_transform)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True, drop_last=True)
valid_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)


In [None]:
model = XTransfer()
train_xtransfer(model, train_loader, valid_loader, device, num_epochs=10)
evaluate(model, test_loader, device)

Epoch 1/10: 100%|██████████| 570/570 [03:51<00:00,  2.46it/s]


Epoch 1/10, Train Loss: 1.6318, Train Accuracy: 0.7475, Valid Loss: 0.5663, Valid Accuracy: 0.7144, Valid ROC-AUC: 0.7808


Epoch 2/10: 100%|██████████| 570/570 [03:50<00:00,  2.47it/s]


Epoch 2/10, Train Loss: 1.4079, Train Accuracy: 0.8235, Valid Loss: 0.5107, Valid Accuracy: 0.7503, Valid ROC-AUC: 0.8282


Epoch 3/10: 100%|██████████| 570/570 [03:50<00:00,  2.47it/s]


Epoch 3/10, Train Loss: 1.2110, Train Accuracy: 0.8739, Valid Loss: 0.4909, Valid Accuracy: 0.7635, Valid ROC-AUC: 0.8465


Epoch 4/10: 100%|██████████| 570/570 [03:51<00:00,  2.47it/s]


Epoch 4/10, Train Loss: 1.0304, Train Accuracy: 0.9104, Valid Loss: 0.4658, Valid Accuracy: 0.7794, Valid ROC-AUC: 0.8626


Epoch 5/10: 100%|██████████| 570/570 [03:49<00:00,  2.48it/s]


Epoch 5/10, Train Loss: 0.8794, Train Accuracy: 0.9275, Valid Loss: 0.4629, Valid Accuracy: 0.7807, Valid ROC-AUC: 0.8674


Epoch 6/10: 100%|██████████| 570/570 [03:49<00:00,  2.48it/s]


Epoch 6/10, Train Loss: 0.7444, Train Accuracy: 0.9388, Valid Loss: 0.4796, Valid Accuracy: 0.7842, Valid ROC-AUC: 0.8705


Epoch 7/10: 100%|██████████| 570/570 [03:50<00:00,  2.47it/s]


Epoch 7/10, Train Loss: 0.6451, Train Accuracy: 0.9441, Valid Loss: 0.4655, Valid Accuracy: 0.7941, Valid ROC-AUC: 0.8770


Epoch 8/10: 100%|██████████| 570/570 [03:51<00:00,  2.47it/s]


Epoch 8/10, Train Loss: 0.5674, Train Accuracy: 0.9429, Valid Loss: 0.4678, Valid Accuracy: 0.7976, Valid ROC-AUC: 0.8771


Epoch 9/10: 100%|██████████| 570/570 [03:51<00:00,  2.47it/s]


Epoch 9/10, Train Loss: 0.5294, Train Accuracy: 0.9390, Valid Loss: 0.4758, Valid Accuracy: 0.7934, Valid ROC-AUC: 0.8768


Epoch 10/10: 100%|██████████| 570/570 [03:51<00:00,  2.47it/s]


Epoch 10/10, Train Loss: 0.5037, Train Accuracy: 0.9371, Valid Loss: 0.4764, Valid Accuracy: 0.7944, Valid ROC-AUC: 0.8769
Accuracy: 0.7959
AUC: 0.8824
MCC: 0.5924
F1-score: 0.8001
Specificity: 0.7754
Cohen's Kappa: 0.5919
Log Loss: 0.4652
Recall: 0.8165
Precision: 0.7843
