In [1]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
xdxd003_ff_c23_path = kagglehub.dataset_download('xdxd003/ff-c23')

print('Data source import complete.')


Using Colab cache for faster access to the 'ff-c23' dataset.
Data source import complete.


In [2]:
# 1. Uninstall the mismatched versions
# %pip uninstall -y torch torchvision torchaudio torch_xla

# 2. Reinstall the TPU-compatible PyTorch (compatible with Colab/Kaggle TPUs)
# %pip install torch_xla[tpu] torch torchvision

# 3. Reinstall facenet-pytorch WITHOUT dependencies (so it doesn't break PyTorch again)
%pip install facenet-pytorch opencv-python --no-deps

Collecting facenet-pytorch
  Downloading facenet_pytorch-2.6.0-py3-none-any.whl.metadata (12 kB)
Collecting opencv-python
  Downloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (19 kB)
Downloading facenet_pytorch-2.6.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m36.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (67.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: facenet-pytorch, opencv-python
Successfully installed facenet-pytorch-2.6.0 opencv-python-4.12.0.88


In [3]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image, ImageOps
from transformers import AutoModelForImageClassification, AutoImageProcessor
from facenet_pytorch import MTCNN
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report
from torch.utils.data import Dataset, DataLoader

# =================CONFIGURATION=================
DATASET_ROOT = "/kaggle/input/ff-c23/FaceForensics++_C23/"
CSV_FOLDER = os.path.join(DATASET_ROOT, "csv")

MODEL_ID = "sakshamkr1/deitfake-v2"

# High-Accuracy Settings
FRAMES_PER_VIDEO = 15
BATCH_SIZE = 1
NUM_WORKERS = 8
MARGIN = 1.3  # Critical: Capture blending boundaries
ENABLE_TTA = True # Robustness: Average predictions of original + flipped image

# If True, only validates on the Official FF++ Test Split (IDs 860-999)
USE_OFFICIAL_TEST_SPLIT = True

# Manipulations to test. Must match CSV filenames (e.g., 'Deepfakes.csv')
TARGET_MANIPULATIONS = ['Deepfakes', 'Face2Face', 'FaceShifter', 'FaceSwap', 'NeuralTextures']
# ===============================================

# Check Device
try:
    import torch_xla.core.xla_model as xm
    DEVICE = xm.xla_device()
    print(f"--- Running on TPU: {DEVICE} ---")
except:
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"--- Running on Device: {DEVICE} ---")

class FFPPDataset(Dataset):
    def __init__(self, video_paths, labels, processor, frames_per_video=10, mtcnn=None):
        self.video_paths = video_paths
        self.labels = labels
        self.processor = processor
        self.frames_per_video = frames_per_video
        self.mtcnn = mtcnn

    def __len__(self):
        return len(self.video_paths)

    def extract_faces_high_res(self, video_path):
        """
        High-Accuracy Extraction:
        1. Reads full resolution frames.
        2. Detects faces without downscaling.
        3. Applies a 1.3x margin to capture blending artifacts.
        """
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened(): return []
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        if total_frames <= 0: return []

        frame_indices = np.linspace(0, total_frames - 1, self.frames_per_video, dtype=int)
        frames_pil = []

        for idx in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
            ret, frame = cap.read()
            if not ret: continue

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames_pil.append(Image.fromarray(frame_rgb))

        cap.release()
        if not frames_pil: return []

        # Batch Detection
        try:
            boxes_list, _ = self.mtcnn.detect(frames_pil)
        except:
            return []

        final_faces = []
        for i, boxes in enumerate(boxes_list):
            if boxes is not None:
                # Select largest face
                box = boxes[0]

                # --- APPLY MARGIN ---
                x1, y1, x2, y2 = box
                w = x2 - x1
                h = y2 - y1
                cx = x1 + w / 2
                cy = y1 + h / 2

                new_w = w * MARGIN
                new_h = h * MARGIN

                x1 = max(0, cx - new_w / 2)
                y1 = max(0, cy - new_h / 2)
                x2 = min(frames_pil[i].width, cx + new_w / 2)
                y2 = min(frames_pil[i].height, cy + new_h / 2)

                face = frames_pil[i].crop((x1, y1, x2, y2))
                face = face.resize((224, 224), Image.Resampling.BILINEAR)
                final_faces.append(face)

        return final_faces

    def __getitem__(self, idx):
        video_path = self.video_paths[idx]
        label = self.labels[idx]

        if not os.path.exists(video_path): return None

        faces = self.extract_faces_high_res(video_path)
        if not faces: return None

        # --- Test Time Augmentation (TTA) Logic ---
        if ENABLE_TTA:
            # Create flipped versions
            flipped_faces = [ImageOps.mirror(f) for f in faces]
            all_faces = faces + flipped_faces
            inputs = self.processor(images=all_faces, return_tensors="pt")
        else:
            inputs = self.processor(images=faces, return_tensors="pt")

        return {
            "pixel_values": inputs["pixel_values"],
            "label": torch.tensor(label, dtype=torch.long),
            "video_path": video_path
        }

def collate_fn(batch):
    batch = [b for b in batch if b is not None]
    if len(batch) == 0: return None
    return batch

def is_test_video(filename):
    """Filter for Test Split (IDs 860-999)"""
    try:
        name = os.path.basename(filename)
        name = os.path.splitext(name)[0]
        parts = name.split('_')
        video_id = int(parts[0])
        return video_id >= 860
    except:
        return True

def load_paths_from_csv():
    real_paths = []
    fake_paths = []

    print(f"--- Loading paths from CSVs in {CSV_FOLDER} ---")

    # 1. Load REAL videos (original.csv)
    orig_csv_path = os.path.join(CSV_FOLDER, "original.csv")
    if os.path.exists(orig_csv_path):
        df = pd.read_csv(orig_csv_path)
        for _, row in df.iterrows():
            rel_path = row['File Path']
            full_path = os.path.join(DATASET_ROOT, rel_path)

            if USE_OFFICIAL_TEST_SPLIT and not is_test_video(rel_path):
                continue
            real_paths.append(full_path)
    else:
        print("!! WARNING: original.csv not found!")

    # 2. Load FAKE videos
    for manip in TARGET_MANIPULATIONS:
        csv_path = os.path.join(CSV_FOLDER, f"{manip}.csv")
        if os.path.exists(csv_path):
            df = pd.read_csv(csv_path)
            for _, row in df.iterrows():
                rel_path = row['File Path']
                full_path = os.path.join(DATASET_ROOT, rel_path)

                if USE_OFFICIAL_TEST_SPLIT and not is_test_video(rel_path):
                    continue
                fake_paths.append(full_path)
        else:
            print(f"Skipping {manip} (CSV not found)")

    print(f"Loaded {len(real_paths)} Real and {len(fake_paths)} Fake videos from CSVs.")

    paths = real_paths + fake_paths
    # Standard Metrics: 0=Real, 1=Fake
    labels = [0] * len(real_paths) + [1] * len(fake_paths)
    return paths, labels

  DEVICE = xm.xla_device()


--- Running on TPU: xla:0 ---


In [6]:

def main():
    print(f"--- Loading DeitFake: {MODEL_ID} ---")
    processor = AutoImageProcessor.from_pretrained(MODEL_ID,use_fast=1)
    model = AutoModelForImageClassification.from_pretrained(MODEL_ID)
    model.to(DEVICE)
    model.eval()

    print(f"Model ID2LABEL: {model.config.id2label}")

    print("--- Init MTCNN (High Accuracy) ---")
    mtcnn_device = torch.device("cpu") if "xla" in str(DEVICE) else DEVICE
    mtcnn = MTCNN(
        keep_all=False,
        select_largest=True,
        device=mtcnn_device,
        thresholds=[0.6, 0.7, 0.7]
    )

    # Use CSV loader
    video_paths, labels = load_paths_from_csv()

    if not video_paths:
        print("No videos found! Check CSV_FOLDER path.")
        return

    dataset = FFPPDataset(video_paths, labels, processor, FRAMES_PER_VIDEO, mtcnn)
    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, collate_fn=collate_fn, num_workers=NUM_WORKERS)

    print(f"--- Starting Validation (TTA Enabled: {ENABLE_TTA}) ---")

    y_true = []
    y_scores = []
    results = []

    with torch.no_grad():
        for batch in tqdm(dataloader):
            if batch is None: continue
            data = batch[0]

            pixel_values = data["pixel_values"].to(DEVICE)
            label = data["label"].item()
            path = data["video_path"]

            outputs = model(pixel_values)
            probs = torch.softmax(outputs.logits, dim=1)

            # === CRITICAL FIX ===
            # Your Model: Index 0 = Fake, Index 1 = Real
            # Validation Script Target: 1 = Fake, 0 = Real
            # Therefore: We want the probability of Index 0 (Fake)
            fake_prob = probs[:, 0].mean().item()

            y_true.append(label)
            y_scores.append(fake_prob)
            results.append({"video": os.path.basename(path), "label": label, "score": fake_prob})

    # Save results
    df_res = pd.DataFrame(results)
    df_res.to_csv("ffpp_results_high_acc.csv", index=False)

    y_pred_binary = (np.array(y_scores) > 0.5).astype(int)
    acc = accuracy_score(y_true, y_pred_binary)
    auc = roc_auc_score(y_true, y_scores)

    print("\n" + "="*30)
    print(f"ACCURACY: {acc:.4f}")
    print(f"AUC:      {auc:.4f}")
    print("="*30)
    print(classification_report(y_true, y_pred_binary, target_names=["Real", "Fake"]))


In [7]:
if __name__ == "__main__":
    main()

--- Loading DeitFake: sakshamkr1/deitfake-v2 ---
Model ID2LABEL: {0: 'Fake', 1: 'Real'}
--- Init MTCNN (High Accuracy) ---
--- Loading paths from CSVs in /kaggle/input/ff-c23/FaceForensics++_C23/csv ---
Loaded 140 Real and 700 Fake videos from CSVs.
--- Starting Validation (TTA Enabled: True) ---


100%|██████████| 840/840 [37:46<00:00,  2.70s/it]


ACCURACY: 0.7190
AUC:      0.5240
              precision    recall  f1-score   support

        Real       0.16      0.16      0.16       140
        Fake       0.83      0.83      0.83       700

    accuracy                           0.72       840
   macro avg       0.49      0.49      0.49       840
weighted avg       0.72      0.72      0.72       840




