<a href="https://colab.research.google.com/github/KaifAhmad1/code-test/blob/main/Deepfake_and_Manipulated_Media_Analysis_R%26D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Deepfake Detection and Manipulated Media Analysis using Multiagent System and Compound AI Approach**

In [1]:
!pip install -q langchain langchain-community langgraph torch transformers opencv-python librosa numpy face-recognition dlib mediapipe scipy pillow tqdm pydantic

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.1/100.1 MB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m34.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.2/138.2 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.7/44.7 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.6/49.6 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone


In [13]:
# Import additional required packages
from typing import Dict, List, Any, Tuple, Optional
import torch
import numpy as np
import cv2
import librosa
import face_recognition
import mediapipe as mp
from pydantic import BaseModel, Field
from datetime import datetime
import json
from transformers import (
    AutoProcessor, AutoModelForVideoClassification, AutoModelForAudioClassification,
    CLIPProcessor, CLIPModel, Blip2Processor, Blip2ForConditionalGeneration,
    VideoMAEFeatureExtractor, VideoMAEForVideoClassification, WhisperProcessor,
    WhisperForAudioClassification, LayoutLMv3Processor, LayoutLMv3ForSequenceClassification,
    OwlViTProcessor, OwlViTForObjectDetection, InstructBlipProcessor,
    InstructBlipForConditionalGeneration, ImageGPTForCausalImageModeling,
    TimesformerForVideoClassification, ASTForAudioClassification, Wav2Vec2ForSequenceClassification,
    LlavaForConditionalGeneration, LlavaProcessor
)
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langgraph.graph import Graph, END

In [14]:
# Enhanced Pydantic Models
class AnalysisMetrics(BaseModel):
    score: float = Field(..., description="Confidence score between 0 and 1")
    anomalies: List[str] = Field(default_factory=list, description="Detected anomalies")
    confidence_level: str = Field(..., description="Low/Medium/High confidence assessment")
    detection_method: str = Field(..., description="Method used for detection")
    timestamp: Optional[float] = Field(None, description="Timestamp of detection")

class DetailedModalityAnalysis(BaseModel):
    visual: AnalysisMetrics
    temporal: AnalysisMetrics
    facial: AnalysisMetrics
    audio: AnalysisMetrics
    semantic: AnalysisMetrics
    behavioral: AnalysisMetrics
    emotional: AnalysisMetrics
    contextual: AnalysisMetrics
    biometric: AnalysisMetrics

class ForensicEvidence(BaseModel):
    type: str
    description: str
    confidence: float
    location: str
    timestamp: Optional[float]
    supporting_data: Dict[str, Any]

class EnhancedForensicReport(BaseModel):
    case_id: str = Field(..., description="Unique identifier for the analysis case")
    timestamp: datetime = Field(default_factory=datetime.now)
    file_metadata: Dict[str, Any]
    analysis_results: DetailedModalityAnalysis
    evidence_items: List[ForensicEvidence]
    final_verdict: str
    confidence_score: float
    risk_assessment: str
    tampering_probability: float
    method_signatures: List[str]
    artifacts_detected: List[str]
    chain_of_custody: List[Dict[str, Any]]
    recommendations: List[str]
    technical_notes: List[str]

In [15]:
def setup_enhanced_models():
    """Initialize comprehensive model environment"""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    models = {
        # Video Analysis
        "videomae": VideoMAEForVideoClassification.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics"),
        "timesformer": TimesformerForVideoClassification.from_pretrained("facebook/timesformer-base-finetuned-k400"),

        # Vision Models
        "clip": CLIPModel.from_pretrained("openai/clip-vit-large-patch14"),
        "owlvit": OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32"),
        "instructblip": InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b"),
        "llava": LlavaForConditionalGeneration.from_pretrained("llava-hf/llava-1.5-13b-hf"),

        # Audio Models
        "whisper": WhisperForAudioClassification.from_pretrained("openai/whisper-large-v3"),
        "wav2vec2": Wav2Vec2ForSequenceClassification.from_pretrained("facebook/wav2vec2-base"),
        "ast": ASTForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593"),

        # Face/Body Analysis
        "face_detector": mp.solutions.face_detection.FaceDetection(min_detection_confidence=0.7),
        "face_mesh": mp.solutions.face_mesh.FaceMesh(
            static_image_mode=False,
            max_num_faces=1,
            min_detection_confidence=0.7,
            min_tracking_confidence=0.7
        ),
        "pose_estimator": mp.solutions.pose.Pose(
            static_image_mode=False,
            min_detection_confidence=0.7,
            min_tracking_confidence=0.7
        ),

        # Processors
        "processors": {
            "clip": CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14"),
            "owlvit": OwlViTProcessor.from_pretrained("google/owlvit-base-patch32"),
            "instructblip": InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b"),
            "whisper": WhisperProcessor.from_pretrained("openai/whisper-large-v3"),
            "llava": LlavaProcessor.from_pretrained("llava-hf/llava-1.5-13b-hf")
        },

        # LLMs
        "llms": {
            "gpt4": ChatOpenAI(model="gpt-4", temperature=0.2),
            "claude": ChatOpenAI(model="claude-3-opus", temperature=0.2)
        }
    }

    return {"device": device, "models": models}

In [16]:
def enhanced_preprocess_video(video_path: str) -> Dict[str, Any]:
    """Advanced preprocessing with comprehensive feature extraction"""
    cap = cv2.VideoCapture(video_path)
    frames = []
    audio_data = None
    metadata = {
        "fps": cap.get(cv2.CAP_PROP_FPS),
        "frame_count": int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
        "width": int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
        "height": int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
        "duration": float(cap.get(cv2.CAP_PROP_FRAME_COUNT)) / float(cap.get(cv2.CAP_PROP_FPS)),
        "codec": int(cap.get(cv2.CAP_PROP_FOURCC)).to_bytes(4, byteorder='little').decode(),
        "bitrate": cap.get(cv2.CAP_PROP_BITRATE)
    }

    # Enhanced frame extraction with quality metrics
    frame_quality_metrics = []
    optical_flow_data = []
    prev_frame = None

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(rgb_frame)

        # Calculate comprehensive frame metrics
        frame_metrics = {
            "blur": cv2.Laplacian(frame, cv2.CV_64F).var(),
            "noise": np.std(frame),
            "brightness": np.mean(frame),
            "contrast": np.std(frame) / np.mean(frame) if np.mean(frame) != 0 else 0,
            "edges": len(cv2.findNonZero(cv2.Canny(frame, 100, 200))) if frame is not None else 0
        }
        frame_quality_metrics.append(frame_metrics)

        # Calculate optical flow
        if prev_frame is not None:
            flow = cv2.calcOpticalFlowFarneback(
                cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY),
                cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY),
                None, 0.5, 3, 15, 3, 5, 1.2, 0
            )
            optical_flow_data.append(flow)

        prev_frame = frame.copy()

    cap.release()

    # Enhanced audio extraction
    try:
        y, sr = librosa.load(video_path)

        # Extract comprehensive audio features
        audio_data = {
            "raw": y,
            "sr": sr,
            "mfcc": librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40),
            "spectral_contrast": librosa.feature.spectral_contrast(y=y, sr=sr),
            "chroma": librosa.feature.chroma_stft(y=y, sr=sr),
            "onset_env": librosa.onset.onset_strength(y=y, sr=sr),
            "tempo": librosa.beat.tempo(y=y, sr=sr)[0],
            "harmony": librosa.effects.harmonic(y),
            "percussive": librosa.effects.percussive(y),
            "spectral_rolloff": librosa.feature.spectral_rolloff(y=y, sr=sr),
            "zero_crossing_rate": librosa.feature.zero_crossing_rate(y),
            "spectral_bandwidth": librosa.feature.spectral_bandwidth(y=y, sr=sr)
        }
    except Exception as e:
        print(f"Audio extraction error: {e}")
        audio_data = None

    return {
        "frames": frames,
        "audio": audio_data,
        "metadata": metadata,
        "frame_quality": frame_quality_metrics,
        "optical_flow": optical_flow_data
    }

In [17]:
def emotional_coherence_agent(
    frames: List[np.ndarray],
    face_mesh: mp.solutions.face_mesh.FaceMesh,
    models: Dict[str, Any]
) -> AnalysisMetrics:
    """Analyze emotional consistency and authenticity"""
    anomalies = []
    scores = []

    # Facial expression analysis
    expression_sequence = []
    for frame in frames:
        results = face_mesh.process(frame)
        if results.multi_face_landmarks:
            landmarks = results.multi_face_landmarks[0]

            # Extract key facial features
            features = extract_facial_features(landmarks)
            expression = analyze_expression(features)
            expression_sequence.append(expression)

    if expression_sequence:
        # Analyze expression transitions
        transition_score = analyze_expression_transitions(expression_sequence)
        scores.append(transition_score)

        if transition_score < 0.6:
            anomalies.append("Unnatural expression transitions detected")

        # Analyze micro-expressions
        micro_expr_score = analyze_micro_expressions(expression_sequence)
        scores.append(micro_expr_score)

        if micro_expr_score < 0.5:
            anomalies.append("Suspicious micro-expression patterns detected")

    # Use CLIP for emotion-context consistency
    clip_processor = models["processors"]["clip"]
    clip_model = models["clip"]

    emotion_scores = []
    for i, frame in enumerate(frames[::30]):  # Sample every 30th frame
        inputs = clip_processor(images=frame, return_tensors="pt", text=["happy face", "sad face", "neutral face"])
        outputs = clip_model(**inputs)

        # Analyze emotional consistency with context
        emotion_consistency = analyze_emotion_context_consistency(outputs, expression_sequence[i] if i < len(expression_sequence) else None)
        emotion_scores.append(emotion_consistency)

    if emotion_scores:
        avg_emotion_score = np.mean(emotion_scores)
        scores.append(avg_emotion_score)

        if avg_emotion_score < 0.6:
            anomalies.append("Emotion-context inconsistency detected")

    final_score = np.mean(scores) if scores else 0.0
    confidence_level = "High" if final_score > 0.8 else "Medium" if final_score > 0.6 else "Low"

    return AnalysisMetrics(
        score=float(final_score),
        anomalies=anomalies,
        confidence_level=confidence_level,
        detection_method="emotional_analysis",
        timestamp=datetime.now().timestamp()
    )

def contextual_consistency_agent(
    video_data: Dict[str, Any],
    models: Dict[str, Any]
) -> AnalysisMetrics:
    """Analyze contextual consistency using multimodal understanding"""
    llava_processor = models["processors"]["llava"]
    llava_model = models["models"]["llava"]

    anomalies = []
    scores = []

    # Sample frames for analysis
    sampled_frames = video_data["frames"][::30]  # Every 30th frame

    # Define context analysis prompts
    context_prompts = [
        "Describe any inconsistencies in the physical environment, lighting, or objects in this scene.",
        "Analyze the relationship between the subject's actions and the environment.",
        "Identify any temporal or spatial discontinuities in this scene."
    ]

    for frame in sampled_frames:
        for prompt in context_prompts:
            inputs = llava_processor(images=frame, text=prompt, return_tensors="pt")
            outputs = llava_model.generate(**inputs)
            response = llava_processor.decode(outputs[0], skip_special_tokens=True)

            # Analysis based on LLaVA's response
            analysis_score = analyze_contextual_response(response)
            scores.append(analysis_score)

            if analysis_score < 0.6:
                anomalies.append(f"Contextual inconsistency detected: {response}")

    # Use CLIP for scene consistency analysis
    clip_processor = models["processors"]["clip"]
    clip_model = models["clip"]

    previous_scene_embedding = None
    for frame in sampled_frames:
        inputs = clip_processor(images=frame, return_tensors="pt")
        scene_embedding = clip_model.get_image_features(**inputs)

        if previous_scene_embedding is not None:
            consistency_score = torch.cosine_similarity(
                previous_scene_embedding, scene_embedding
            ).item()
            scores.append(consistency_score)

            if consistency_score < 0.7:
                anomalies.append(f"Scene consistency break detected")

        previous_scene_embedding = scene_embedding

    final_score = np.mean(scores) if scores else 0.0
    confidence_level = "High" if final_score > 0.8 else "Medium" if final_score > 0.6 else "Low"

    return AnalysisMetrics(
        score=float(final_score),
        anomalies=anomalies,
        confidence_level=confidence_level,
        detection_method="contextual_analysis",
        timestamp=datetime.now().timestamp()
    )

def biometric_analysis_agent(
    frames: List[np.ndarray],
    models: Dict[str, Any]
) -> AnalysisMetrics:
    """Advanced biometric analysis for deepfake detection"""
    face_mesh = models["face_mesh"]
    anomalies = []
    scores = []

    # Facial landmarks analysis
    landmark_sequences = []
    for frame in frames:
        results = face_mesh.process(frame)
        if results.multi_face_landmarks:
            landmarks = np.array([[lm.x, lm.y, lm.z] for lm in results.multi_face_landmarks[0].landmark])
            landmark_sequences.append(landmarks)

    if landmark_sequences:
        # Analyze facial proportions
        proportion_score = analyze_facial_proportions(landmark_sequences)
        scores.append(proportion_score)

        if proportion_score < 0.6:
            anomalies.append("Inconsistent facial proportions detected")

        # Analyze skin texture
        texture_score = analyze_skin_texture(frames, landmark_sequences)
        scores.append(texture_score)

        if texture_score < 0.5:
            anomalies.append("Artificial skin texture patterns detected")

        # Analyze facial symmetry
        symmetry_score = analyze_facial_symmetry(landmark_sequences)
        scores.append(symmetry_score)

        if symmetry_score < 0.6:
            anomalies.append("Abnormal facial symmetry detected")

    # Eye blinking analysis
    blink_patterns = analyze_eye_blinking(landmark_sequences)
    scores.append(blink_patterns["score"])
    if blink_patterns["anomalies"]:
        anomalies.extend(blink_patterns["anomalies"])

    final_score = np.mean(scores) if scores else 0.0
    confidence_level = "High" if final_score > 0.8 else "Medium" if final_score > 0.6 else "Low"

    return AnalysisMetrics(
        score=float(final_score),
        anomalies=anomalies,
        confidence_level=confidence_level,
        detection_method="biometric_analysis",
        timestamp=datetime.now().timestamp()
    )

def analyze_facial_proportions(landmark_sequences: List[np.ndarray]) -> float:
    """Analyze consistency of facial proportions over time"""
    proportion_scores = []

    # Key facial proportion ratios to analyze
    proportion_points = {
        "eye_distance_ratio": ([33, 133], [362, 263]),  # Inter-eye distance to face width
        "nose_mouth_ratio": ([4, 14], [61, 291]),      # Nose to mouth height ratio
        "face_ratio": ([10, 152], [234, 454])         # Face height to width ratio
    }

    for landmarks in landmark_sequences:
        frame_proportions = {}
        for ratio_name, (p1, p2) in proportion_points.items():
            dist1 = np.linalg.norm(landmarks[p1[0]] - landmarks[p1[1]])
            dist2 = np.linalg.norm(landmarks[p2[0]] - landmarks[p2[1]])
            frame_proportions[ratio_name] = dist1 / dist2 if dist2 != 0 else 0

        # Compare with known human proportions
        proportion_score = evaluate_human_proportions(frame_proportions)
        proportion_scores.append(proportion_score)

    return float(np.mean(proportion_scores))

def analyze_skin_texture(
    frames: List[np.ndarray],
    landmark_sequences: List[np.ndarray]
) -> float:
    """Analyze skin texture for artificial patterns"""
    texture_scores = []

    for frame, landmarks in zip(frames, landmark_sequences):
        # Extract skin regions using landmarks
        skin_mask = create_skin_mask(frame, landmarks)
        skin_region = cv2.bitwise_and(frame, frame, mask=skin_mask)

        # Calculate texture features
        texture_features = {
            "glcm": calculate_glcm_features(skin_region),
            "lbp": calculate_lbp_features(skin_region),
            "frequency": analyze_frequency_distribution(skin_region)
        }

        # Score texture naturalness
        texture_score = evaluate_texture_naturalness(texture_features)
        texture_scores.append(texture_score)

    return float(np.mean(texture_scores))

In [18]:
def semantic_coherence_agent(
    video_data: Dict[str, Any],
    models: Dict[str, Any]
) -> AnalysisMetrics:
    """Advanced semantic analysis using multiple LLMs"""
    llm_gpt4 = models["llms"]["gpt4"]
    llm_claude = models["llms"]["claude"]

    anomalies = []
    scores = []

    # Define analysis prompts
    analysis_prompts = [
        ChatPromptTemplate.from_messages([
            ("system", """Analyze this video segment for semantic inconsistencies:
            1. Check for logical continuity in actions and events
            2. Verify natural cause-effect relationships
            3. Assess spatiotemporal coherence
            4. Evaluate behavioral authenticity
            5. Analyze contextual alignment

            Format output as:
            - Score (0-1)
            - List of specific anomalies found
            - Confidence level"""),
            ("human", "{context}")
        ]),
        ChatPromptTemplate.from_messages([
            ("system", """Perform deep semantic analysis of the video:
            1. Identify any narrative breaks or logical gaps
            2. Check for physical law violations
            3. Assess human behavior naturality
            4. Evaluate environmental consistency
            5. Analyze temporal flow

            Provide structured analysis with confidence scores."""),
            ("human", "{context}")
        ])
    ]

    # Prepare context with multimodal features
    context = prepare_multimodal_context(video_data)

    # Run analysis with multiple LLMs
    for prompt in analysis_prompts:
        # GPT-4 Analysis
        gpt4_result = llm_gpt4(prompt.format(context=context))
        scores.append(extract_score_from_llm_response(gpt4_result))
        anomalies.extend(extract_anomalies_from_llm_response(gpt4_result))

        # Claude Analysis
        claude_result = llm_claude(prompt.format(context=context))
        scores.append(extract_score_from_llm_response(claude_result))
        anomalies.extend(extract_anomalies_from_llm_response(claude_result))

    final_score = np.mean(scores)
    confidence_level = "High" if final_score > 0.8 else "Medium" if final_score > 0.6 else "Low"

    return AnalysisMetrics(
        score=float(final_score),
        anomalies=list(set(anomalies)),  # Remove duplicates
        confidence_level=confidence_level,
        detection_method="semantic_analysis",
        timestamp=datetime.now().timestamp()
    )

def generate_enhanced_forensic_report(
    analysis_results: Dict[str, AnalysisMetrics],
    video_data: Dict[str, Any]
) -> EnhancedForensicReport:
    """Generate comprehensive forensic report with detailed analysis"""
    # Create unique case ID
    case_id = f"DFD-{datetime.now().strftime('%Y%m%d-%H%M%S')}"

    # Compile modality analysis
    modality_analysis = DetailedModalityAnalysis(
        visual=analysis_results["visual"],
        temporal=analysis_results["temporal"],
        facial=analysis_results["facial"],
        audio=analysis_results["audio"],
        semantic=analysis_results["semantic"],
        behavioral=analysis_results["behavioral"],
        emotional=analysis_results["emotional"],
        contextual=analysis_results["contextual"],
        biometric=analysis_results["biometric"]
    )

    # Calculate weighted final score
    weights = {
        "visual": 0.15,
        "temporal": 0.12,
        "facial": 0.15,
        "audio": 0.10,
        "semantic": 0.12,
        "behavioral": 0.12,
        "emotional": 0.08,
        "contextual": 0.08,
        "biometric": 0.08
    }

    final_score = sum(
        analysis_results[key].score * weights[key]
        for key in weights
    )

    # Compile evidence items
    evidence_items = []
    for modality, result in analysis_results.items():
        for anomaly in result.anomalies:
            evidence_items.append(ForensicEvidence(
                type=modality,
                description=anomaly,
                confidence=result.score,
                location="Throughout video" if modality in ["semantic", "contextual"] else "Multiple frames",
                timestamp=result.timestamp,
                supporting_data={"detection_method": result.detection_method}
            ))

    # Determine verdict and risk assessment
    verdict = "AUTHENTIC" if final_score > 0.7 else "MANIPULATED"
    tampering_probability = 1.0 - final_score

    risk_levels = {
        (0.8, 1.0): "Low Risk - High confidence in authenticity",
        (0.6, 0.8): "Medium Risk - Some suspicious patterns detected",
        (0.0, 0.6): "High Risk - Strong indicators of manipulation"
    }

    risk_assessment = next(
        desc for (lower, upper), desc in risk_levels.items()
        if lower <= final_score < upper
    )

    # Compile method signatures
    method_signatures = identify_manipulation_signatures(analysis_results)

    # Generate technical notes
    technical_notes = generate_technical_notes(analysis_results, video_data)

    # Create chain of custody
    chain_of_custody = [{
        "timestamp": datetime.now(),
        "action": "Initial Analysis",
        "tools_used": [
            "DeepFake Detection Pipeline v2.0",
            "Multiple AI Models",
            "Forensic Analysis Tools"
        ],
        "hash": calculate_file_hash(video_data)
    }]

    return EnhancedForensicReport(
        case_id=case_id,
        timestamp=datetime.now(),
        file_metadata=video_data["metadata"],
        analysis_results=modality_analysis,
        evidence_items=evidence_items,
        final_verdict=verdict,
        confidence_score=float(final_score),
        risk_assessment=risk_assessment,
        tampering_probability=float(tampering_probability),
        method_signatures=method_signatures,
        artifacts_detected=list(set([item.type for item in evidence_items])),
        chain_of_custody=chain_of_custody,
        recommendations=generate_recommendations(final_score, evidence_items),
        technical_notes=technical_notes
    )

def run_enhanced_detection(video_path: str, verbose: bool = False) -> EnhancedForensicReport:
    """Main function to run enhanced deepfake detection pipeline"""
    try:
        # Setup environment
        env = setup_enhanced_models()

        if verbose:
            print("Initialized environment and models")

        # Preprocess video
        video_data = enhanced_preprocess_video(video_path)

        if verbose:
            print("Completed video preprocessing")

        # Run all analysis agents
        analysis_results = {
            "visual": advanced_visual_analysis_agent(
                video_data["frames"],
                env["models"],
                env["device"]
            ),
            "temporal": temporal_coherence_agent(
                video_data["frames"],
                video_data.get("optical_flow", [])
            ),
            "facial": facial_analysis_agent(
                video_data["frames"],
                env["models"]["face_detector"]
            ),
            "audio": advanced_audio_analysis_agent(
                video_data["audio"],
                env["models"],
                env["device"]
            ),
            "semantic": semantic_coherence_agent(
                video_data,
                env["models"]
            ),
            "behavioral": behavioral_analysis_agent(video_data),
            "emotional": emotional_coherence_agent(
                video_data["frames"],
                env["models"]["face_mesh"],
                env["models"]
            ),
            "contextual": contextual_consistency_agent(
                video_data,
                env["models"]
            ),
            "biometric": biometric_analysis_agent(
                video_data["frames"],
                env["models"]
            )
        }

        if verbose:
            print("Completed all analysis agents")

        # Generate comprehensive report
        report = generate_enhanced_forensic_report(analysis_results, video_data)

        if verbose:
            print("Generated forensic report")

        return report

    except Exception as e:
        print(f"Error in deepfake detection pipeline: {str(e)}")
        raise

In [None]:
if __name__ == "__main__":
    video_path = "path/to/your/video.mp4"
    forensic_report = run_enhanced_detection(video_path, verbose=True)

    # Print report in formatted JSON
    print("\nDeepfake Detection Forensic Report:")
    print(json.dumps(forensic_report.dict(), indent=2, default=str))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/22.9k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/22.7k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/486M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.52k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.42k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/613M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/104k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/9.96G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/9.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.88G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.10k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/77.2k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/6 [00:00<?, ?it/s]

model-00001-of-00006.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00002-of-00006.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00003-of-00006.safetensors:   0%|          | 0.00/4.88G [00:00<?, ?B/s]

model-00004-of-00006.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

model-00005-of-00006.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

model-00006-of-00006.safetensors:   0%|          | 0.00/2.02G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]