<a href="https://colab.research.google.com/github/KaifAhmad1/code-test/blob/main/Deepfake_and_Manipulated_Media_Analysis_R%26D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Deepfake Detection and Manipulated Media Analysis using Multiagent System and Compound AI Approach**

In [6]:
# Installation
%pip install -q langchain langchain-community langgraph torch transformers opencv-python librosa numpy

In [7]:
# Required imports
import os
from typing import Dict, List, Any
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser
from langchain.tools import tool
from langgraph.graph import Graph, END
import numpy as np
import torch
from transformers import (
    AutoProcessor,
    AutoModelForVideoClassification,
    AutoModelForAudioClassification,
    CLIPProcessor,
    CLIPModel
)
import cv2
import librosa
import json

In [8]:
# Environment setup
def setup_environment():
    """Setup necessary environment variables and configurations"""
    os.environ["OPENAI_API_KEY"] = "your-api-key"
    os.environ["ANTHROPIC_API_KEY"] = "your-api-key"

    # Configure device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    return device

In [9]:
# Preprocessing functions
def preprocess_video(video_path: str) -> Dict[str, Any]:
    """Extract frames and audio from video"""
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()

    # Extract audio
    y, sr = librosa.load(video_path)

    return {
        "frames": frames,
        "audio": (y, sr),
        "metadata": extract_metadata(video_path)
    }

def extract_metadata(file_path: str) -> Dict[str, Any]:
    """Extract metadata from media file"""
    metadata = {}
    try:
        # Extract EXIF data
        metadata["file_info"] = os.stat(file_path)
        # Add more metadata extraction as needed
    except Exception as e:
        metadata["error"] = str(e)
    return metadata

In [10]:
# Visual Analysis Agents
def spatial_inconsistency_agent(frames: List[np.ndarray]) -> Dict[str, float]:
    """Detect spatial inconsistencies in frames"""
    model = AutoModelForVideoClassification.from_pretrained("microsoft/xclip-base-patch32")
    processor = AutoProcessor.from_pretrained("microsoft/xclip-base-patch32")

    results = []
    for frame in frames:
        inputs = processor(images=frame, return_tensors="pt")
        outputs = model(**inputs)
        results.append(outputs.logits.softmax(dim=1))

    return {"spatial_confidence": float(np.mean([r[0].item() for r in results]))}

def temporal_coherence_agent(frames: List[np.ndarray]) -> Dict[str, float]:
    """Analyze temporal coherence between frames"""
    # Implementation using TimeSformer or similar
    return {"temporal_confidence": 0.85}  # Placeholder

In [11]:
# Audio Analysis Agents
def audio_analysis_agent(audio_data: tuple) -> Dict[str, float]:
    """Analyze audio for signs of manipulation"""
    y, sr = audio_data

    # Spectral analysis
    spec = librosa.feature.melspectrogram(y=y, sr=sr)

    # Implement more sophisticated audio analysis here
    return {"audio_confidence": 0.75}  # Placeholder

In [12]:
# Semantic Analysis Agents
def setup_llm_agent():
    """Setup LLM agent with LangChain"""
    llm = ChatOpenAI(model="gpt-4-vision-preview")
    prompt = ChatPromptTemplate.from_messages([
        ("system", "Analyze the following media for signs of manipulation."),
        ("human", "{input}")
    ])
    return llm, prompt

@tool
def semantic_analysis_agent(input_data: Dict[str, Any]) -> Dict[str, float]:
    """Perform semantic analysis using LLMs"""
    llm, prompt = setup_llm_agent()

    chain = prompt | llm | JsonOutputParser()
    result = chain.invoke({"input": str(input_data)})

    return {"semantic_confidence": result.get("confidence", 0.0)}

In [13]:
# Integration and Decision Making
def feature_fusion(results: List[Dict[str, float]]) -> Dict[str, float]:
    """Fuse results from different agents"""
    confidences = []
    weights = []

    for result in results:
        for key, value in result.items():
            if "confidence" in key:
                confidences.append(value)
                weights.append(1.0)  # Can be adjusted based on agent reliability

    weighted_avg = np.average(confidences, weights=weights)
    return {"final_confidence": float(weighted_avg)}

def make_decision(fusion_result: Dict[str, float]) -> Dict[str, Any]:
    """Make final decision based on fused results"""
    confidence = fusion_result["final_confidence"]
    threshold = 0.7  # Adjustable threshold

    return {
        "is_fake": confidence < threshold,
        "confidence": confidence,
        "explanation": f"Detection confidence: {confidence:.2f}"
    }

In [14]:
# Main Pipeline
def create_detection_graph():
    """Create LangGraph workflow"""
    workflow = Graph()

    @workflow.node("preprocess")
    def preprocess(state):
        video_data = preprocess_video(state["input_path"])
        return {"video_data": video_data}

    @workflow.node("visual_analysis")
    def visual_analysis(state):
        frames = state["video_data"]["frames"]
        spatial_results = spatial_inconsistency_agent(frames)
        temporal_results = temporal_coherence_agent(frames)
        return {
            "visual_results": {
                "spatial": spatial_results,
                "temporal": temporal_results
            }
        }

    @workflow.node("audio_analysis")
    def audio_analysis(state):
        audio_data = state["video_data"]["audio"]
        results = audio_analysis_agent(audio_data)
        return {"audio_results": results}

    @workflow.node("semantic_analysis")
    def semantic_analysis(state):
        results = semantic_analysis_agent(state["video_data"])
        return {"semantic_results": results}

    @workflow.node("decision")
    def decision(state):
        all_results = [
            state["visual_results"]["spatial"],
            state["visual_results"]["temporal"],
            state["audio_results"],
            state["semantic_results"]
        ]

        fusion_result = feature_fusion(all_results)
        final_decision = make_decision(fusion_result)
        return {"decision": final_decision, "end": True}

    # Define workflow
    workflow.set_entry_point("preprocess")
    workflow.add_edge("preprocess", "visual_analysis")
    workflow.add_edge("preprocess", "audio_analysis")
    workflow.add_edge("preprocess", "semantic_analysis")
    workflow.add_edge("visual_analysis", "decision")
    workflow.add_edge("audio_analysis", "decision")
    workflow.add_edge("semantic_analysis", "decision")

    return workflow

In [15]:
# Main execution function
def detect_deepfake(video_path: str) -> Dict[str, Any]:
    """Main function to run deepfake detection"""
    # Setup
    device = setup_environment()

    # Create and compile workflow
    workflow = create_detection_graph()

    # Run detection
    config = {"input_path": video_path}
    result = workflow.run(config)

    return result["decision"]

In [None]:
# Example usage
if __name__ == "__main__":
    video_path = "path/to/your/video.mp4"
    result = detect_deepfake(video_path)
    print(json.dumps(result, indent=2))