<a href="https://colab.research.google.com/github/KaifAhmad1/deepfake/blob/main/Deepfake_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%pip install -q torch opencv-python librosa numpy face-recognition
%pip install -q vllm transformers mediapipe scipy pillow tqdm pydantic moviepy langchain_community langgraph dtw-python
%pip install -q ipywidgets nest_asyncio

In [2]:
import os
import torch
import cv2
import numpy as np
import librosa
import asyncio
import json
import re
import gc
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Any, Tuple, Optional

import mediapipe as mp
from pydantic import BaseModel, Field, PrivateAttr

# Imports for LLMs and chain operations
from langchain_community.llms import VLLM, VLLMOpenAI
from langchain_core.language_models.llms import LLM
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langgraph.graph import StateGraph, END

from moviepy.editor import VideoFileClip

import nest_asyncio
nest_asyncio.apply()

import ipywidgets as widgets
from IPython.display import display, clear_output

# Additional imports for image and video quality metrics
from skimage.metrics import structural_similarity as ssim

# For lip-sync DTW computation
from dtw import dtw

# For face recognition
import face_recognition

  if event.key is 'enter':



Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.



In [3]:
# Set CUDA environment variables and clear GPU memory
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"
torch.cuda.empty_cache()

In [4]:
# Common parameters for model initialization
COMMON_PARAMS = {
    "task": "generate",
    "max_model_len": 4096,
    "dtype": "half",
    "gpu_memory_utilization": 0.85,
    "cpu_offload_gb": 8,
    "enforce_eager": True,
    "trust_remote_code": True
}

def init_vllm_model(name: str, model_id: str, **overrides):
    params = {**COMMON_PARAMS, **overrides}
    print(f"[DEBUG] Initializing model '{name}' with id '{model_id}' with params: {params}")
    return {"name": name, "model_id": model_id, "params": params}

def init_groq_model(name: str, model_id: str):
    api_key = os.environ.get("GROQ_API_KEY", "your_groq_api_key")
    print(f"[DEBUG] Loading model '{name}' with id '{model_id}' using API key.")
    return {"name": name, "model_id": model_id, "api_key": api_key}

# A simple Groq LLM class that simulates the Groq native SDK response.
class GroqLLM:
    def __init__(self, model_data):
        self.model_data = model_data
    def call_as_llm(self, prompt: str) -> str:
        print(f"[DEBUG] GroqLLM called with prompt: {prompt}")
        # Simulated response; replace with actual Groq SDK call if needed.
        return "Score: 0.75\nAnomalies: []"

# GroqLLMWrapper adapts GroqLLM to the LangChain LLM interface.
class GroqLLMWrapper(LLM):
    _groq_llm: GroqLLM = PrivateAttr()

    def __init__(self, groq_llm: GroqLLM, **kwargs):
        super().__init__(**kwargs)
        self._groq_llm = groq_llm

    @property
    def _llm_type(self) -> str:
        return "groq_llm"

    def _call(self, prompt: str, stop=None) -> str:
        return self._groq_llm.call_as_llm(prompt)

print("[DEBUG] Initializing models...")
models = {
    "video": [
        init_vllm_model("llava_next_video", "llava-hf/LLaVA-NeXT-Video-7B-hf", tensor_parallel_size=2, max_tokens=1024),
        init_vllm_model("videomae", "MCG-NJU/videomae-large-static", tensor_parallel_size=2),
        GroqLLMWrapper(GroqLLM(init_groq_model("groq_llama_scout", "meta-llama/llama-4-scout-17b-16e-instruct"))),
        GroqLLMWrapper(GroqLLM(init_groq_model("groq_llama_maverick", "meta-llama/llama-4-maverick-17b-128e-instruct")))
    ],
    "audio": [
        init_vllm_model("wav2vec2", "facebook/wav2vec2-large-robust-ft-swbd-300h", tensor_parallel_size=1),
        init_vllm_model("whisper", "openai/whisper-large-v3", tensor_parallel_size=2),
        init_groq_model("groq_audio_model", "whisper-large-v3-turbo"),
        GroqLLMWrapper(GroqLLM(init_groq_model("groq_llama_scout", "meta-llama/llama-4-scout-17b-16e-instruct"))),
        GroqLLMWrapper(GroqLLM(init_groq_model("groq_llama_maverick", "meta-llama/llama-4-maverick-17b-128e-instruct")))
    ],
    "image": [
        init_vllm_model("llava_image", "llava-hf/llava-onevision-qwen2-7b-ov-hf", tensor_parallel_size=2),
        init_vllm_model("clip", "openai/clip-vit-large-patch14", tensor_parallel_size=1),
        init_groq_model("groq_vision_model", "llama-3.2-90b-vision-preview"),
        GroqLLMWrapper(GroqLLM(init_groq_model("groq_llama_scout", "meta-llama/llama-4-scout-17b-16e-instruct"))),
        GroqLLMWrapper(GroqLLM(init_groq_model("groq_llama_maverick", "meta-llama/llama-4-maverick-17b-128e-instruct")))
    ],
    "text": [
        GroqLLMWrapper(GroqLLM(init_groq_model("groq_text_model", "llama-3.3-70b-versatile"))),
        GroqLLMWrapper(GroqLLM(init_groq_model("groq_llama_scout", "meta-llama/llama-4-scout-17b-16e-instruct"))),
        GroqLLMWrapper(GroqLLM(init_groq_model("groq_llama_maverick", "meta-llama/llama-4-maverick-17b-128e-instruct")))
    ]
}

[DEBUG] Initializing models...
[DEBUG] Initializing model 'llava_next_video' with id 'llava-hf/LLaVA-NeXT-Video-7B-hf' with params: {'task': 'generate', 'max_model_len': 4096, 'dtype': 'half', 'gpu_memory_utilization': 0.85, 'cpu_offload_gb': 8, 'enforce_eager': True, 'trust_remote_code': True, 'tensor_parallel_size': 2, 'max_tokens': 1024}
[DEBUG] Initializing model 'videomae' with id 'MCG-NJU/videomae-large-static' with params: {'task': 'generate', 'max_model_len': 4096, 'dtype': 'half', 'gpu_memory_utilization': 0.85, 'cpu_offload_gb': 8, 'enforce_eager': True, 'trust_remote_code': True, 'tensor_parallel_size': 2}
[DEBUG] Loading model 'groq_llama_scout' with id 'meta-llama/llama-4-scout-17b-16e-instruct' using API key.
[DEBUG] Loading model 'groq_llama_maverick' with id 'meta-llama/llama-4-maverick-17b-128e-instruct' using API key.
[DEBUG] Initializing model 'wav2vec2' with id 'facebook/wav2vec2-large-robust-ft-swbd-300h' with params: {'task': 'generate', 'max_model_len': 4096, 'dt