In [None]:
# =========================================================
# STEP 0: INSTALL
# =========================================================
!pip install -q timm retina-face opencv-python albumentations tqdm scikit-learn

# =========================================================
# STEP 1: DOWNLOAD FACEFORENSICS++
# =========================================================
!wget -q https://kaldir.vc.in.tum.de/faceforensics_download_v4.py
!sed -i "s/_ = input('')/# _ = input('')/g" faceforensics_download_v4.py

# REAL
!python faceforensics_download_v4.py /kaggle/working/ffpp \
 -d original \
 -c c23 \
 -t videos \
 -n 200 \
 --server EU2

# FAKE
!python faceforensics_download_v4.py /kaggle/working/ffpp \
 -d Deepfakes \
 -c c23 \
 -t videos \
 -n 200 \
 --server EU2

# =========================================================
# STEP 2: IMPORTS
# =========================================================
import os, cv2, torch, timm
import numpy as np
import torch.nn as nn
from tqdm import tqdm
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from retinaface import RetinaFace
from torchvision import transforms

# =========================================================
# STEP 3: CONFIG
# =========================================================
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IMG_SIZE = 224
BATCH_SIZE = 16
EPOCHS = 3

REAL_VIDEOS = "/kaggle/working/ffpp/original_sequences/youtube/c23/videos"
FAKE_VIDEOS = "/kaggle/working/ffpp/manipulated_sequences/Deepfakes/c23/videos"

FACE_DATA = "/kaggle/working/faces"
os.makedirs(FACE_DATA, exist_ok=True)

# =========================================================
# STEP 4: FACE ALIGNMENT
# =========================================================
def align_and_crop(img, facial_area, landmarks):
    x1,y1,x2,y2 = facial_area
    le = landmarks["left_eye"]
    re = landmarks["right_eye"]

    dx, dy = re[0]-le[0], re[1]-le[1]
    angle = np.degrees(np.arctan2(dy, dx))
    center = ((x1+x2)//2, (y1+y2)//2)

    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    aligned = cv2.warpAffine(img, M, img.shape[1::-1])
    face = aligned[y1:y2, x1:x2]

    return cv2.resize(face, (IMG_SIZE, IMG_SIZE))

# =========================================================
# STEP 5: FACE EXTRACTION
# =========================================================
def extract_faces(video_path, out_dir, every_n=10):
    os.makedirs(out_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    idx = saved = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if idx % every_n == 0:
            faces = RetinaFace.detect_faces(frame)
            if faces:
                face = max(
                    faces.values(),
                    key=lambda f:(f["facial_area"][2]-f["facial_area"][0]) *
                                 (f["facial_area"][3]-f["facial_area"][1])
                )
                crop = align_and_crop(frame, face["facial_area"], face["landmarks"])
                cv2.imwrite(f"{out_dir}/{saved:05d}.jpg", crop)
                saved += 1
        idx += 1

    cap.release()

# =========================================================
# STEP 6: PROCESS VIDEOS
# =========================================================
for cls, src_dir in [("real", REAL_VIDEOS), ("fake", FAKE_VIDEOS)]:
    dst_root = f"{FACE_DATA}/{cls}"
    os.makedirs(dst_root, exist_ok=True)

    for vid in tqdm(os.listdir(src_dir), desc=f"Extracting {cls} faces"):
        extract_faces(
            f"{src_dir}/{vid}",
            f"{dst_root}/{vid.replace('.mp4','')}"
        )

# =========================================================
# STEP 7: DATASET
# =========================================================
class FaceDataset(Dataset):
    def __init__(self, root):
        self.data = []
        for label, cls in enumerate(["real","fake"]):
            for vid in os.listdir(f"{root}/{cls}"):
                for img in os.listdir(f"{root}/{cls}/{vid}"):
                    self.data.append((f"{root}/{cls}/{vid}/{img}", label))

        self.tf = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.5]*3, [0.5]*3)
        ])

    def __len__(self): return len(self.data)

    def __getitem__(self, i):
        path,label = self.data[i]
        return self.tf(Image.open(path).convert("RGB")), label

# =========================================================
# STEP 8: SWIN MODEL
# =========================================================
class SwinDF(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = timm.create_model(
            "swin_base_patch4_window7_224",
            pretrained=True,
            num_classes=0
        )
        self.fc = nn.Linear(self.backbone.num_features, 2)

    def forward(self, x):
        return self.fc(self.backbone(x))

# =========================================================
# STEP 9: TRAIN
# =========================================================
dataset = FaceDataset(FACE_DATA)
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

model = SwinDF().to(DEVICE)
opt = torch.optim.AdamW(model.parameters(), lr=2e-4)
loss_fn = nn.CrossEntropyLoss()

for ep in range(EPOCHS):
    correct = total = 0
    model.train()

    for x,y in tqdm(loader, desc=f"Epoch {ep+1}/{EPOCHS}"):
        x,y = x.to(DEVICE), y.to(DEVICE)
        opt.zero_grad()
        out = model(x)
        loss = loss_fn(out,y)
        loss.backward()
        opt.step()

        correct += (out.argmax(1)==y).sum().item()
        total += y.size(0)

    print(f"Epoch {ep+1} Accuracy: {correct/total:.4f}")

print("âœ… DONE â€” Face-based Swin deepfake detector trained")


In [None]:
# =========================================================
# STEP 0: INSTALL DEPENDENCIES
# =========================================================
!pip install -q timm retina-face opencv-python albumentations tqdm scikit-learn

# =========================================================
# STEP 1: DOWNLOAD FACEFORENSICS++
# =========================================================
!wget -q https://kaldir.vc.in.tum.de/faceforensics_download_v4.py
!sed -i "s/_ = input('')/# _ = input('')/g" faceforensics_download_v4.py

# REAL videos


# =========================================================
# STEP 2: IMPORTS
# =========================================================
import os, cv2, torch, timm
import numpy as np
import torch.nn as nn
from tqdm import tqdm
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from retinaface import RetinaFace
from torchvision import transforms

# =========================================================
# STEP 3: CONFIG
# =========================================================
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IMG_SIZE = 224
BATCH_SIZE = 16
EPOCHS = 3   # increase later for better accuracy

RAW_DATA = "/kaggle/working/ffpp"
FACE_DATA = "/kaggle/working/faces"
os.makedirs(FACE_DATA, exist_ok=True)

REAL_DIR = f"{RAW_DATA}/original_sequences/youtube/c23/videos"
FAKE_DIR = f"{RAW_DATA}/manipulated_sequences/Deepfakes/c23/videos"

# =========================================================
# STEP 4: FACE ALIGNMENT (FIXED)
# =========================================================
def align_and_crop(img, facial_area, landmarks):
    x1, y1, x2, y2 = map(int, facial_area)

    left_eye  = landmarks["left_eye"]
    right_eye = landmarks["right_eye"]

    dx = right_eye[0] - left_eye[0]
    dy = right_eye[1] - left_eye[1]
    angle = np.degrees(np.arctan2(dy, dx))

    cx = int((x1 + x2) / 2)
    cy = int((y1 + y2) / 2)
    center = (cx, cy)

    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    aligned = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))

    face = aligned[y1:y2, x1:x2]
    if face.size == 0:
        return None

    return cv2.resize(face, (IMG_SIZE, IMG_SIZE))

# =========================================================
# STEP 5: FACE EXTRACTION
# =========================================================
def extract_faces(video_path, out_dir, every_n=10):
    os.makedirs(out_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    frame_idx = saved = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_idx % every_n == 0:
            faces = RetinaFace.detect_faces(frame)
            if faces:
                face = max(
                    faces.values(),
                    key=lambda f: (f["facial_area"][2]-f["facial_area"][0]) *
                                  (f["facial_area"][3]-f["facial_area"][1])
                )
                crop = align_and_crop(frame, face["facial_area"], face["landmarks"])
                if crop is not None:
                    cv2.imwrite(f"{out_dir}/{saved:05d}.jpg", crop)
                    saved += 1

        frame_idx += 1

    cap.release()

# =========================================================
# STEP 6: PROCESS VIDEOS â†’ FACE DATASET
# =========================================================
for cls, src_dir in [("real", REAL_DIR), ("fake", FAKE_DIR)]:
    dst_root = f"{FACE_DATA}/{cls}"
    os.makedirs(dst_root, exist_ok=True)

    for vid in tqdm(os.listdir(src_dir), desc=f"Extracting {cls} faces"):
        extract_faces(
            f"{src_dir}/{vid}",
            f"{dst_root}/{vid.replace('.mp4','')}"
        )

# =========================================================
# STEP 7: DATASET
# =========================================================
class FaceDataset(Dataset):
    def __init__(self, root):
        self.samples = []
        for label, cls in enumerate(["real", "fake"]):
            for vid in os.listdir(f"{root}/{cls}"):
                for img in os.listdir(f"{root}/{cls}/{vid}"):
                    self.samples.append((f"{root}/{cls}/{vid}/{img}", label))

        self.tf = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.5]*3, [0.5]*3)
        ])

    def __len__(self): return len(self.samples)

    def __getitem__(self, i):
        path, label = self.samples[i]
        img = Image.open(path).convert("RGB")
        return self.tf(img), label

# =========================================================
# STEP 8: SWIN TRANSFORMER
# =========================================================
class SwinDeepfake(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = timm.create_model(
            "swin_base_patch4_window7_224",
            pretrained=True,
            num_classes=0
        )
        self.head = nn.Linear(self.backbone.num_features, 2)

    def forward(self, x):
        return self.head(self.backbone(x))

# =========================================================
# STEP 9: TRAIN
# =========================================================
dataset = FaceDataset(FACE_DATA)
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

model = SwinDeepfake().to(DEVICE)
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-4)
criterion = nn.CrossEntropyLoss()

for epoch in range(EPOCHS):
    model.train()
    total, correct = 0, 0

    for x, y in tqdm(loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        x, y = x.to(DEVICE), y.to(DEVICE)

        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()

        correct += (out.argmax(1) == y).sum().item()
        total += y.size(0)
 
    print(f"Epoch {epoch+1} Accuracy: {correct/total:.4f}")

print("âœ… TRAINING COMPLETE â€” SWIN TRANSFORMER DEEPFAKE MODEL READY")


In [None]:
class FaceDataset(Dataset):
    def __init__(self, root):
        self.samples = []
        for label, cls in enumerate(["real", "fake"]):
            for vid in os.listdir(f"{root}/{cls}"):
                for img in os.listdir(f"{root}/{cls}/{vid}"):
                    self.samples.append((f"{root}/{cls}/{vid}/{img}", label))

        self.tf = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.5]*3, [0.5]*3)
        ])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        img = Image.open(path).convert("RGB")
        return self.tf(img), label

In [None]:
class SwinDF(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = timm.create_model(
            "swin_tiny_patch4_window7_224",
            pretrained=True,
            num_classes=0
        )
        self.head = nn.Linear(self.backbone.num_features, 2)

    def forward(self, x):
        x = self.backbone(x)
        return self.head(x)

In [None]:
model = SwinDF().to(DEVICE)

for p in model.backbone.parameters():
    p.requires_grad = False

for p in model.head.parameters():
    p.requires_grad = True

In [None]:
!ls /kaggle/working

In [None]:
!zip -r faces_backup.zip /kaggle/working/faces

In [None]:
# =========================================================
# STEP 0: INSTALL
# =========================================================
!pip install -q timm retina-face opencv-python tqdm albumentations

# =========================================================
# STEP 1: IMPORTS
# =========================================================
import os, cv2, torch, timm
import numpy as np
import torch.nn as nn
from tqdm import tqdm
from PIL import Image
from retinaface import RetinaFace
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# =========================================================
# STEP 2: CONFIG
# =========================================================
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IMG_SIZE = 224
BATCH_SIZE = 8          # SAFE for Kaggle GPU
EPOCHS = 3
FRAMES_PER_VIDEO = 10

RAW_ROOT = "/kaggle/working/ffpp"
FACE_ROOT = "/kaggle/working/faces"
os.makedirs(FACE_ROOT, exist_ok=True)

# =========================================================
# STEP 3: SAFE FACE CROP (NO ROTATION)
# =========================================================
def crop_face(frame, facial_area):
    h, w, _ = frame.shape
    x1, y1, x2, y2 = facial_area

    x1 = max(0, x1)
    y1 = max(0, y1)
    x2 = min(w, x2)
    y2 = min(h, y2)

    face = frame[y1:y2, x1:x2]  # âœ… CORRECT ORDER
    if face.size == 0:
        return None

    face = cv2.resize(face, (IMG_SIZE, IMG_SIZE))
    return face

# =========================================================
# STEP 4: EXTRACT FACES FROM VIDEOS
# =========================================================
def extract_faces(video_path, out_dir):
    os.makedirs(out_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = max(total // FRAMES_PER_VIDEO, 1)

    idx = saved = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if idx % step == 0:
            faces = RetinaFace.detect_faces(frame)
            if faces:
                face = max(
                    faces.values(),
                    key=lambda f: (f["facial_area"][2]-f["facial_area"][0]) *
                                  (f["facial_area"][3]-f["facial_area"][1])
                )
                crop = crop_face(frame, face["facial_area"])
                if crop is not None:
                    cv2.imwrite(f"{out_dir}/{saved}.jpg", crop)
                    saved += 1
        idx += 1
    cap.release()

# =========================================================
# STEP 5: PROCESS FF++
# =========================================================
paths = {
    "real": f"{RAW_ROOT}/original_sequences/youtube/c23/videos",
    "fake": f"{RAW_ROOT}/manipulated_sequences/Deepfakes/c23/videos"
}

for cls in ["real", "fake"]:
    dst = f"{FACE_ROOT}/{cls}"
    os.makedirs(dst, exist_ok=True)

    for vid in tqdm(os.listdir(paths[cls]), desc=f"Extracting {cls}"):
        extract_faces(
            f"{paths[cls]}/{vid}",
            f"{dst}/{vid.replace('.mp4','')}"
        )

print("âœ… FACE DATASET READY")

# =========================================================
# STEP 6: DATASET
# =========================================================
class FaceDataset(Dataset):
    def __init__(self, root):
        self.samples = []
        for label, cls in enumerate(["real", "fake"]):
            for vid in os.listdir(f"{root}/{cls}"):
                for img in os.listdir(f"{root}/{cls}/{vid}"):
                    self.samples.append((f"{root}/{cls}/{vid}/{img}", label))

        self.tf = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.5]*3, [0.5]*3)
        ])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, i):
        path, label = self.samples[i]
        img = Image.open(path).convert("RGB")
        return self.tf(img), label

dataset = FaceDataset(FACE_ROOT)
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# =========================================================
# STEP 7: SWIN-TINY (NO OOM)
# =========================================================
class SwinDF(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = timm.create_model(
            "swin_tiny_patch4_window7_224",
            pretrained=True,
            num_classes=0
        )
        self.head = nn.Linear(self.backbone.num_features, 2)

    def forward(self, x):
        return self.head(self.backbone(x))

model = SwinDF().to(DEVICE)

# Freeze backbone for stability + memory
for p in model.backbone.parameters():
    p.requires_grad = False

optimizer = torch.optim.AdamW(model.head.parameters(), lr=2e-4)
criterion = nn.CrossEntropyLoss()

# =========================================================
# STEP 8: TRAIN
# =========================================================
for epoch in range(EPOCHS):
    model.train()
    correct = total = 0

    for x, y in tqdm(loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        x, y = x.to(DEVICE), y.to(DEVICE)

        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()

        correct += (out.argmax(1) == y).sum().item()
        total += y.size(0)

    print(f"Epoch {epoch+1} Accuracy: {correct/total:.4f}")

print("ðŸŽ¯ TRAINING COMPLETE")
