In [1]:
import torch
import torch.nn as nn
from torchvision import transforms, models
import cv2
import numpy as np
from PIL import Image

In [2]:
class ConvNeXtModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = models.convnext_tiny(pretrained=True)
        self.model.classifier[2] = nn.Linear(self.model.classifier[2].in_features, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        return self.sigmoid(self.model(x))

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ConvNeXtModel()
model.load_state_dict(torch.load("models/model_evolution_artifacts_final/best_convnext_model.pth", map_location=device))
model.eval()
model.to(device)

  model.load_state_dict(torch.load("models/model_evolution_artifacts_final/best_convnext_model.pth", map_location=device))


ConvNeXtModel(
  (model): ConvNeXt(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
        (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
      )
      (1): Sequential(
        (0): CNBlock(
          (block): Sequential(
            (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
            (1): Permute()
            (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
            (3): Linear(in_features=96, out_features=384, bias=True)
            (4): GELU(approximate='none')
            (5): Linear(in_features=384, out_features=96, bias=True)
            (6): Permute()
          )
          (stochastic_depth): StochasticDepth(p=0.0, mode=row)
        )
        (1): CNBlock(
          (block): Sequential(
            (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
            (1): Permute()
            (2): LayerNorm((96,)

In [4]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

In [16]:
# Load face detector
face_proto = "opencv_face_detector.pbtxt"
face_model = "opencv_face_detector_uint8.pb"
net = cv2.dnn.readNetFromTensorflow(face_model, face_proto)

In [17]:
cap = cv2.VideoCapture("001_870.mp4")

In [18]:
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Phát hiện khuôn mặt
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123], swapRB=False, crop=False)
    net.setInput(blob)
    detections = net.forward()

    found_face = False
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.5:
            found_face = True
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (x1, y1, x2, y2) = box.astype("int")
            x1, y1, x2, y2 = max(0, x1), max(0, y1), min(w, x2), min(h, y2)
            #face_img = frame[y1:y2, x1:x2]
            face_img = frame[max(0, y1-20):min(h, y2+20), max(0, x1-20):min(w, x2+20)]
            if face_img.size == 0:
                continue
                
            # Chuyển đổi ảnh cho model
            img_rgb = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            img_pil = Image.fromarray(img_rgb)
            input_tensor = transform(img_pil).unsqueeze(0).to(device)

            # Dự đoán
            with torch.no_grad():
                output = model(input_tensor)
                prob = output.item()
                label = "Fake" if prob < 0.5 else "Real"

            # Hiển thị kết quả
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
            cv2.putText(frame, f"{label} ({prob:.2f})", (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,255), 2)

    if not found_face:
        cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)

    cv2.imshow("DeepFake Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [8]:
img_path = "data/real_vs_fake_part2/valid/Real/real_9813.jpg"

# Đọc ảnh và xử lý
image = Image.open(img_path).convert("RGB")
input_tensor = transform(image).unsqueeze(0).to(device)  # Thêm batch dimension

# Dự đoán
with torch.no_grad():
    output = model(input_tensor)
    prob = output.item()
    label = "Fake" if prob < 0.5 else "Real"
    print(f"Prediction: {label} ({prob:.8f})")

# Hiển thị ảnh với kết quả
img_cv = cv2.imread(img_path)
cv2.putText(img_cv, f"{label} ({prob:.2f})", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)
cv2.imshow("DeepFake Detection", img_cv)
cv2.waitKey(0)
cv2.destroyAllWindows()

Prediction: Real (0.80430073)
