In [14]:
import torch
import torch.nn as nn
from torchvision import transforms, models

import cv2
import numpy as np

In [3]:
class ConvNeXtModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = models.convnext_tiny(pretrained = True)
        self.model.classifier[2] = nn.Linear(self.model.classifier[2].in_features, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        return self.sigmoid(self.model(x))

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [10]:
model = ConvNeXtModel()
model.load_state_dict(torch.load("models/model_evolution_artifacts/best_convnext_model.pth", map_location = device))
model.eval()
model.to(device)

  model.load_state_dict(torch.load("models/model_evolution_artifacts/best_convnext_model.pth", map_location = device))


ConvNeXtModel(
  (model): ConvNeXt(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
        (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
      )
      (1): Sequential(
        (0): CNBlock(
          (block): Sequential(
            (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
            (1): Permute()
            (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
            (3): Linear(in_features=96, out_features=384, bias=True)
            (4): GELU(approximate='none')
            (5): Linear(in_features=384, out_features=96, bias=True)
            (6): Permute()
          )
          (stochastic_depth): StochasticDepth(p=0.0, mode=row)
        )
        (1): CNBlock(
          (block): Sequential(
            (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
            (1): Permute()
            (2): LayerNorm((96,)

In [15]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

In [16]:
cap = cv2.VideoCapture("01_02__outside_talking_still_laughing__YVGY8LOK.mp4")

In [17]:
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Chuyển BGR (OpenCV) sang RGB (PyTorch)
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img_pil = transforms.functional.to_pil_image(img)
    input_tensor = transform(img_pil).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(input_tensor)
        prob = output.item()
        label = "Fake" if prob > 0.5 else "Real"

    # Hiển thị kết quả lên frame
    cv2.putText(frame, f"{label} ({prob:.2f})", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)
    cv2.imshow("DeepFake Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()