In [31]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install timm facenet-pytorch opencv-python-headless




INSTALL LIBRARIES

In [None]:
import os
import glob
import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from facenet_pytorch import MTCNN
import timm
from tqdm import tqdm

INITIALISE FACE DETECTOR (MTCNN)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
mtcnn = MTCNN(image_size=224, margin=20, device=device)

DEFINE FACE FRAME EXTRACTOR

In [None]:
def extract_faces(video_path, max_frames=16):
    cap = cv2.VideoCapture(video_path)
    faces = []
    while len(faces) < max_frames and cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        face = mtcnn(frame)
        if face is not None:
            faces.append(face)
    cap.release()
    if len(faces) == 0:
        return None
    return torch.stack(faces)


EfficientViT Encoder

In [None]:
class EfficientViTEmbedder(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = timm.create_model('efficientvit_b1.r224_in1k', pretrained=True, features_only=True)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))

    def forward(self, x):
        x = self.model(x)[-1]
        x = self.pool(x).squeeze(-1).squeeze(-1)
        return x


TEMPORAL ConvNet Classifier

In [None]:
class DeepfakeClassifier(nn.Module):
    def __init__(self, embed_dim=256):
        super().__init__()
        self.temporal = nn.Sequential(
            nn.Conv1d(embed_dim, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)
        )
        self.fc = nn.Linear(256, 1)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.temporal(x).squeeze(-1)
        return torch.sigmoid(self.fc(x)).squeeze(1)



DATASET LOADER

In [None]:
class DeepfakeDataset(Dataset):
    def __init__(self, folder, max_frames=16):
        self.samples = []
        for label, sub in enumerate(['real', 'fake']):
            files = glob.glob(os.path.join(folder, sub, '*.mp4'))
            self.samples += [(f, label) for f in files]
        self.max_frames = max_frames

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        faces = extract_faces(path, self.max_frames)
        if faces is None:
            return self.__getitem__((idx + 1) % len(self.samples))
        return faces, torch.tensor(label, dtype=torch.float32)

    def __len__(self):
        return len(self.samples)


TRAINING

In [None]:
embedder = EfficientViTEmbedder().to(device).eval()
classifier = DeepfakeClassifier().to(device)

train_path = "/content/drive/MyDrive/deepfake_dataset/deepfake_dataset/test"
train_data = DeepfakeDataset(train_path)
train_loader = DataLoader(train_data, batch_size=2, shuffle=True)

loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=1e-4)


TRAINING LOOP

In [35]:
for epoch in range(8):
    for frames, labels in tqdm(train_loader):
        B, T, C, H, W = frames.shape
        frames = frames.view(B*T, C, H, W).to(device)

        with torch.no_grad():
            feats = embedder(frames)

        feats = feats.view(B, T, -1)
        preds = classifier(feats)
        loss = loss_fn(preds, labels.to(device))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")


100%|██████████| 20/20 [01:04<00:00,  3.21s/it]


Epoch 1, Loss: 0.2240


100%|██████████| 20/20 [01:03<00:00,  3.20s/it]


Epoch 2, Loss: 0.4184


100%|██████████| 20/20 [01:02<00:00,  3.13s/it]


Epoch 3, Loss: 0.2183


100%|██████████| 20/20 [01:02<00:00,  3.13s/it]


Epoch 4, Loss: 0.0968


100%|██████████| 20/20 [01:02<00:00,  3.14s/it]


Epoch 5, Loss: 0.0338


100%|██████████| 20/20 [01:02<00:00,  3.14s/it]


Epoch 6, Loss: 0.0386


100%|██████████| 20/20 [01:02<00:00,  3.14s/it]


Epoch 7, Loss: 0.0330


100%|██████████| 20/20 [01:03<00:00,  3.18s/it]

Epoch 8, Loss: 0.1504





TEST ON DATASET + EVALUATION

In [36]:
test_path = "/content/drive/MyDrive/deepfake_dataset/deepfake_dataset/test"
test_data = DeepfakeDataset(test_path)
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)

from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix

def evaluate_model(embedder, classifier, dataloader):
    classifier.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for frames, labels in tqdm(dataloader):
            B, T, C, H, W = frames.shape
            frames = frames.view(B*T, C, H, W).to(device)

            feats = embedder(frames)
            feats = feats.view(B, T, -1)

            preds = classifier(feats)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    y_pred_bin = [1 if p > 0.45 else 0 for p in y_pred]

    acc = accuracy_score(y_true, y_pred_bin)
    auc = roc_auc_score(y_true, y_pred)
    cm = confusion_matrix(y_true, y_pred_bin)

    print(f"\n Accuracy: {acc:.4f}")
    print(f" AUC Score: {auc:.4f}")
    print(f" Confusion Matrix:\n{cm}")

evaluate_model(embedder, classifier, test_loader)


100%|██████████| 40/40 [01:03<00:00,  1.58s/it]


 Accuracy: 1.0000
 AUC Score: 1.0000
 Confusion Matrix:
[[20  0]
 [ 0 20]]



