# Prueba del modelo con mediapipe

En este python notebook se encuentra el código para probar el modelo que verifica si es un deepfake o no.

In [2]:
import torchvision.models as models
import torch.nn as nn

class DeepfakeDetector(nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)
        self.cnn.classifier = nn.Identity()  # elimina la capa final
        self.embedding_dim = 1280
        self.sequence_length = 16

        self.lstm = nn.LSTM(input_size=1285, hidden_size=128, num_layers=1, batch_first=True, bidirectional=True)
        self.fc = nn.Sequential(
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x_imgs, x_lmks):
        B, T, C, H, W = x_imgs.shape
        x_imgs = x_imgs.view(B * T, C, H, W)
        features = self.cnn(x_imgs)                     # (B*T, 1280)
        features = features.view(B, T, -1)              # (B, T, 1280)
        combined = torch.cat([features, x_lmks], dim=2) # (B, T, 1285)
        out, _ = self.lstm(combined)
        out = out[:, -1, :]                             # última salida
        return self.fc(out).squeeze(1)


In [3]:
import os
import cv2
import torch
import numpy as np
from torchvision import transforms
import mediapipe as mp

def predict_deepfake(video_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # === CONFIGURACIÓN ===
    sequence_length = 16
    candidate_frames = 25
    image_size = (256, 256)
    transform = transforms.Compose([transforms.ToTensor()])

    mp_face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1,
                                                   refine_landmarks=True, min_detection_confidence=0.5)

    def extract_landmark_vector(landmarks, frame_shape):
        h, w, _ = frame_shape
        def norm(x): return x / w
        def norm_y(y): return y / h
        left_eye = landmarks.landmark[33]
        right_eye = landmarks.landmark[263]
        nose = landmarks.landmark[1]
        mouth_left = landmarks.landmark[61]
        mouth_right = landmarks.landmark[291]
        return np.array([
            norm(left_eye.x), norm(right_eye.x),
            norm_y(nose.y),
            norm_y(mouth_left.y),
            norm_y(mouth_right.y)
        ], dtype=np.float32)

    def crop_face_from_landmarks(landmarks, frame):
        h, w, _ = frame.shape
        x_coords = [lm.x for lm in landmarks.landmark]
        y_coords = [lm.y for lm in landmarks.landmark]
        min_x, max_x = int(min(x_coords) * w), int(max(x_coords) * w)
        min_y, max_y = int(min(y_coords) * h), int(max(y_coords) * h)
        margin_x = int((max_x - min_x) * 0.2)
        margin_y = int((max_y - min_y) * 0.2)
        x1 = max(min_x - margin_x, 0)
        y1 = max(min_y - margin_y, 0)
        x2 = min(max_x + margin_x, w)
        y2 = min(max_y + margin_y, h)
        face_crop = frame[y1:y2, x1:x2]
        return cv2.resize(face_crop, image_size)

    # === PROCESAR VIDEO ===
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, total_frames - 1, candidate_frames).astype(int)

    images, landmarks_list = [], []
    evidencia_guardada = False

    for idx in frame_indices:
        if len(images) >= sequence_length:
            break
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            continue
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = mp_face_mesh.process(rgb)
        if results.multi_face_landmarks:
            try:
                lmks = results.multi_face_landmarks[0]
                cropped = crop_face_from_landmarks(lmks, frame)
                lmk_vector = extract_landmark_vector(lmks, frame.shape)

                if not evidencia_guardada:
                    cv2.imwrite("evidencia.jpg", cropped)
                    evidencia_guardada = True

                images.append(transform(cropped))
                landmarks_list.append(torch.tensor(lmk_vector, dtype=torch.float32))
            except:
                continue

    cap.release()
    mp_face_mesh.close()

    if len(images) < sequence_length:
        print(f"⚠️ Solo se obtuvieron {len(images)} frames válidos. No se puede hacer inferencia.")
        return

    # === FORMATO TENSORES ===
    x_imgs = torch.stack(images[:sequence_length]).unsqueeze(0).to(device)
    x_lmks = torch.stack(landmarks_list[:sequence_length]).unsqueeze(0).to(device)

    # === CARGAR MODELO Y PREDICCIÓN ===
    model = DeepfakeDetector().to(device)
    model.load_state_dict(torch.load("mediapipe_model.pth", map_location=device))
    model.eval()

    with torch.no_grad():
        output = model(x_imgs, x_lmks)
        prob = output.item()
        label = "FAKE" if prob > 0.5 else "REAL"
        print(f"\nResultado: {label}  |  Probabilidad: {prob:.4f} | Evidencia guardada como evidencia.jpg")



In [16]:
predict_deepfake("C:/Users/Hermanos/Desktop/Proyecto Deepfake/verificacion_video.mp4")



Resultado: REAL  |  Probabilidad: 0.4681 | Evidencia guardada como evidencia.jpg
