<a href="https://colab.research.google.com/github/KaifAhmad1/deepfake/blob/main/audio_deepfake_detection_enahced.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

####  **Audio Deepfake Detection, Fake Calls, Spoofing, Fraud Calls and Voice Cloning Analysis for Defensive Forensics**
This script provides a comprehensive forensic analysis pipeline for audio files, focusing on detecting signs of deepfakes, spoofing, and manipulation. It integrates various analysis techniques including signal processing, feature extraction, traditional ML/DSP-based detection methods, SpeechBrain models (stubbed for demonstration), and state-of-the-art multimodal LLMs via vLLM and Groq.


In [1]:
!pip install -q numpy librosa soundfile matplotlib IPython webrtcvad pydub noisereduce pyAudioAnalysis speechbrain langchain openai langgraph transformers vllm requests ipywidgets audiomentations hmmlearn eyed3 langchain_community praat-parselmouth webrtcvad groq

In [6]:
import os
import json
import asyncio
import time
import sys
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
import numpy as np
import torch
import librosa
import librosa.display
import matplotlib.pyplot as plt
import nest_asyncio
import ipywidgets as widgets
import webrtcvad
import noisereduce as nr
import parselmouth
from pydub import AudioSegment
from moviepy.editor import VideoFileClip
from transformers import AutoTokenizer
from vllm import LLM, EngineArgs, SamplingParams
import IPython.display as ipd
from IPython.display import display, clear_output, HTML, Image, Markdown

# --- Optional Dependency Handling & Imports ---
try:
    import soundfile as sf
    HAS_SOUNDFILE = True
except ImportError:
    print("[WARN] soundfile library not found (`pip install soundfile`). Some operations might be slower or fail.")
    HAS_SOUNDFILE = False

try:
    import pyloudnorm as pyln
    HAS_PYLOUDNORM = True
except ImportError:
    print("[WARN] pyloudnorm library not found (`pip install pyloudnorm`). Loudness normalization disabled.")
    HAS_PYLOUDNORM = False

try:
    from scipy import signal
    HAS_SCIPY = True
except ImportError:
    print("[WARN] scipy library not found (`pip install scipy`). De-humming feature disabled.")
    HAS_SCIPY = False

try:
    import seaborn as sns
    HAS_SEABORN = True
except ImportError:
    print("[WARN] seaborn library not found (`pip install seaborn`). Enhanced plots disabled.")
    HAS_SEABORN = False

# --- SpeechBrain & LLM Integrations ---
from speechbrain.inference.speaker import SpeakerRecognition
try:
    from speechbrain.augment import AddNoise
except ImportError:
    AddNoise = None
try:
    from speechbrain.pretrained import EncoderClassifier, LanguageIdentification
except ImportError:
    print("[WARN] SpeechBrain pretrained models not fully available. Some features might be limited.")
    EncoderClassifier, LanguageIdentification = None, None

try:
    from groq import Groq, AsyncGroq
    HAS_GROQ = True
except ImportError:
    print("[WARN] Groq library not installed (`pip install groq`). Groq report generation disabled.")
    HAS_GROQ = False
    AsyncGroq = None

# --- UI/Display ---
nest_asyncio.apply()

[WARN] pyloudnorm library not found (`pip install pyloudnorm`). Loudness normalization disabled.
[WARN] SpeechBrain pretrained models not fully available. Some features might be limited.


In [7]:
# --- Configuration & Constants ---
GENERAL_PIPELINE_SETTINGS = {
    "TARGET_SR": 16000,
    "VAD_AGGRESSIVENESS": 2,
    "MAX_CONCURRENT_TASKS": os.cpu_count() or 4,
    "PRINT_LEVEL": "INFO",
    "LOUDNESS_TARGET_LUFS": -23.0,
    "ENABLE_LOUDNESS_NORMALIZATION": HAS_PYLOUDNORM and HAS_SOUNDFILE,
    "ENABLE_NOISE_REDUCTION": True,
    "ENABLE_DEHUMMING": HAS_SCIPY,
    "MAX_VLLM_TOKENS": 350,
    "VLLM_TEMPERATURE": 0.1,
    "GROQ_MODEL": "llama3-70b-8192",
    "GROQ_TEMPERATURE": 0.1,
    "VLLM_MODELS_TO_RUN": ["qwen2_audio", "ultravox"],
}

MODEL_PATHS = {
    "SPKREC_MODEL_SOURCE": "speechbrain/spkrec-ecapa-voxceleb",
    "ANTISPOOF_MODEL_SOURCE": "speechbrain/anti-spoofing-ecapa-voxceleb",
    "LANGID_MODEL_SOURCE": "speechbrain/lang-id-commonlanguage_ecapa",
    "EMOTION_MODEL_SOURCE": "speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
}

# --- Resource Management ---
executor = ThreadPoolExecutor(max_workers=GENERAL_PIPELINE_SETTINGS["MAX_CONCURRENT_TASKS"], thread_name_prefix='ForensicWorker')
vllm_engines = {}

# --- Utility Functions ---
def print_message(level, message):
    levels = {"DEBUG": 0, "INFO": 1, "WARN": 2, "ERROR": 3}
    if levels.get(level, 1) >= levels.get(GENERAL_PIPELINE_SETTINGS["PRINT_LEVEL"], 1):
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        print(f"{timestamp} [{level:<5}] {message}")

def get_file_extension(file_path):
    return os.path.splitext(file_path)[1].lower()

def is_video_file(ext):
    return ext in [".mp4", ".avi", ".mov", ".mkv", ".webm"]

async def run_sync_in_executor(func, *args):
    loop = asyncio.get_running_loop()
    return await loop.run_in_executor(executor, func, *args)

def set_device_for_engine():
    return "cuda" if torch.cuda.is_available() else "cpu"

# --- Data Model ---
class ForensicReport:
    def __init__(self, **kwargs):
        self.file_path = kwargs.get("file_path")
        self.verdict = kwargs.get("verdict", "Error: Report not generated")
        self.mean_risk_score = kwargs.get("mean_risk_score", -1.0)
        self.confidence = kwargs.get("confidence", 0.0)
        self.all_model_scores = kwargs.get("all_model_scores", {})
        self.all_anomalies = kwargs.get("all_anomalies", [])
        self.groq_summary = kwargs.get("groq_summary", "N/A")
        self.vllm_outputs = kwargs.get("vllm_outputs", {})
        self.features = kwargs.get("features", {})
        self.metrics = kwargs.get("metrics", {})
        self.speaker_info = kwargs.get("speaker_info", {})
        self.quality_info = kwargs.get("quality_info", {})
        self.loudness_info = kwargs.get("loudness_info", {})
        self.compression_info = kwargs.get("compression_info", {})
        self.reverb_info = kwargs.get("reverb_info", {})
        self.edit_detection_info = kwargs.get("edit_detection_info", {})
        self.plots = kwargs.get("plots", {})
        self.processing_times = kwargs.get("processing_times", {})
        self.timestamp = kwargs.get("timestamp", datetime.utcnow().isoformat())

    def json(self, indent=2):
        serializable_data = self._make_serializable(self.__dict__)
        return json.dumps(serializable_data, indent=indent)

    def _make_serializable(self, data):
        if isinstance(data, dict):
            return {k: self._make_serializable(v) for k, v in data.items()}
        elif isinstance(data, list):
            return [self._make_serializable(item) for item in data]
        elif isinstance(data, np.ndarray):
            return data.tolist()
        elif isinstance(data, (np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64)):
            return int(data)
        elif isinstance(data, (np.float_, np.float16, np.float32, np.float64)):
            if np.isnan(data): return None
            if np.isinf(data): return None
            return float(data)
        elif isinstance(data, (np.complex_, np.complex64, np.complex128)):
            return {'real': data.real, 'imag': data.imag}
        elif isinstance(data, (np.bool_)):
            return bool(data)
        elif isinstance(data, (np.void)):
            return None
        return data