In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install mtcnn

Collecting mtcnn
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting lz4>=4.3.3 (from mtcnn)
  Downloading lz4-4.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading mtcnn-1.0.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lz4-4.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m65.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lz4, mtcnn
Successfully installed lz4-4.4.4 mtcnn-1.0.0


In [None]:
import cv2
import torch
from torchvision import models
from torch import nn
import numpy as np
from torchvision import transforms
from PIL import Image
from mtcnn import MTCNN

In [None]:
model = models.resnet18(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, 1)
trained_params = torch.load('/content/drive/MyDrive/ColabNotebooks/deepfake_CNN/pretrainedCheckpoints/model_epoch_28.pth')
model.load_state_dict(trained_params['model_state_dict'])
model.state_dict()



OrderedDict([('conv1.weight',
              tensor([[[[-1.2138e-02, -5.7461e-03, -4.3455e-04,  ...,  5.8699e-02,
                          1.9575e-02, -1.0502e-02],
                        [ 1.0863e-02,  1.1773e-02, -1.0584e-01,  ..., -2.6617e-01,
                         -1.2383e-01,  8.6264e-03],
                        [-7.7315e-03,  5.9830e-02,  2.9802e-01,  ...,  5.2393e-01,
                          2.6060e-01,  6.7247e-02],
                        ...,
                        [-2.7782e-02,  1.6023e-02,  7.2669e-02,  ..., -3.3264e-01,
                         -4.1967e-01, -2.5610e-01],
                        [ 3.0566e-02,  4.0920e-02,  6.3075e-02,  ...,  4.1390e-01,
                          3.9359e-01,  1.6722e-01],
                        [-1.3386e-02, -2.7137e-03, -2.3091e-02,  ..., -1.4877e-01,
                         -8.0399e-02, -2.9249e-03]],
              
                       [[-1.4973e-02, -2.7695e-02, -3.4985e-02,  ...,  3.2008e-02,
                          2.8520

In [None]:
transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]
)
detector = MTCNN()

In [None]:
def preprocess_video(path):
  cap = cv2.VideoCapture(path)

  if not cap.isOpened():
    return -1

  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
  frame_indices = np.linspace(0, total_frames - 1, num=6, dtype=int)

  frames = []
  for idx in frame_indices:
    cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
    ret, frame = cap.read()
    if ret:
        frames.append(frame)
  cap.release()
  return frames

In [None]:
def extract_faces(frames, detector):
    face_list = []

    for frame in frames:
        # Convert to RGB for MTCNN
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        faces = detector.detect_faces(rgb_frame)

        if faces:
            # Pick the largest face (by area)
            largest_face = max(faces, key=lambda x: x['box'][2] * x['box'][3])
            x, y, w, h = largest_face['box']
            x, y = max(0, x), max(0, y)  # clamp to non-negative
            face_crop = frame[y:y + h, x:x + w]

            if face_crop.size != 0:
                face_resized = cv2.resize(face_crop, (224, 224))
                face_list.append(face_resized)
                continue

        # If no face or failed crop
        face_list.append(None)

    return face_list


In [None]:
def predict_video(video_path):
    frames = preprocess_video(video_path)
    faces = extract_faces(frames, detector)
    probs = []

    for face in faces:
        face_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
        pil_image = Image.fromarray(face_rgb)
        x = transform(pil_image).unsqueeze(0)  # shape [1, 3, 224, 224]
        with torch.no_grad():
            output = model(x)
            prob = torch.sigmoid(output)
            probs.append(prob)

    print(probs)
    avg_prob = torch.stack(probs).mean(dim=0)  # shape [1, num_classes]
    confidence = avg_prob.item()
    label = "REAL" if confidence > 0.5 else "FAKE"

    return label, confidence

In [None]:
label, confidence = predict_video('/content/drive/MyDrive/me.mp4')
print(f"Label: {label}, Confidence: {confidence}")

[tensor([[0.1660]]), tensor([[0.1225]]), tensor([[0.1167]]), tensor([[0.1447]]), tensor([[0.1233]])]
Label: FAKE, Confidence: 0.13462544977664948
