<a href="https://colab.research.google.com/github/KaifAhmad1/code-test/blob/main/Face_Similarity_Matching.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Reverse Image Search System for Defensive Forensics**

This is an end-to-end forensic image analysis pipeline with rich features:
1. Enhanced model loading with multiple fallback strategies.
2. Advanced asynchronous image processing including OCR, EXIF extraction, and error level analysis.
3. Multi-agent reverse image search across multiple simulated sources (Private DB, Twitter, Reddit, Instagram, OSINT, TinEye, Google Images, Additional Sources).
4. Comprehensive embedding generation and consensus across CLIP, BLIP-2, and DINOv2.
5. Detailed forensic analysis using advanced prompt templates and multi-step LLM workflows.
6. Face detection, clustering, and similarity analysis with visualization.
7. Graph-based link analysis for search results with NetworkX.
8. Deduplication using perceptual image hashes.
9. Robust error handling and extensive print statements for logging.
10. Results saving including face images and network graph image.

In [1]:
!pip install -q torch transformers langchain langgraph numpy pillow requests vllm aiohttp opencv-python-headless networkx matplotlib nest_asyncio groq pytesseract

In [2]:
import asyncio
import time
import json
import copy
import getpass
from io import BytesIO
import os
import sys
from datetime import datetime
from typing import Dict, List, Tuple, Union, Any, Optional

import numpy as np
import cv2
from PIL import Image, ImageEnhance, ImageFilter, ExifTags, ImageChops
import matplotlib.pyplot as plt
import networkx as nx
import aiohttp
import torch
import requests

# LLM orchestration with Groq and LangChain
from groq import Groq
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Multi-agent workflow using LangGraph
from langgraph.graph import StateGraph, END

# Patch asyncio for environments like Jupyter or nested loops
import nest_asyncio
nest_asyncio.apply()

# vLLM for optimizing model inference
from vllm import LLM, SamplingParams

# Global flag for OCR availability
OCR_AVAILABLE = False
try:
    import pytesseract
    OCR_AVAILABLE = True
except ImportError:
    OCR_AVAILABLE = False

INFO 03-19 06:45:28 [__init__.py:256] Automatically detected platform cuda.


In [3]:
#############################################
# 1. INITIALIZATION AND MODEL LOADING FUNCTIONS
#############################################
def get_api_keys() -> Tuple[str, str, str]:
    """
    Prompt the user for necessary API keys.
    """
    groq_api_key = os.environ.get("GROQ_API_KEY") or getpass.getpass("Enter your GROQ API Key: ")
    google_cse_id = os.environ.get("GOOGLE_CSE_ID") or getpass.getpass("Enter your Google CSE ID: ")
    google_api_key = os.environ.get("GOOGLE_API_KEY") or getpass.getpass("Enter your Google API Key: ")
    return groq_api_key, google_cse_id, google_api_key


def load_models(device: Optional[str] = None) -> dict:
    """
    Load required models with multiple fallback strategies.
    """
    print("Loading models...")
    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"

    models = {}
    try:
        # Attempt to load with vLLM
        clip_model = LLM(
            model="openai/clip-vit-large-patch14",
            tensor_parallel_size=1,
            gpu_memory_utilization=0.7,
            dtype="float16"
        )
        models["clip_model"] = clip_model
        models["clip_processor"] = None
        print("CLIP (vLLM) model loaded successfully.")

        blip_model = LLM(
            model="Salesforce/blip2-opt-2.7b",
            tensor_parallel_size=1,
            gpu_memory_utilization=0.7,
            dtype="float16"
        )
        models["blip_model"] = blip_model
        models["blip_processor"] = None
        print("BLIP-2 (vLLM) model loaded successfully.")

        dinov2_model = LLM(
            model="facebook/dinov2-base",
            tensor_parallel_size=1,
            gpu_memory_utilization=0.7,
            dtype="float16"
        )
        models["dinov2_model"] = dinov2_model
        models["dinov2_processor"] = None
        print("DINOv2 (vLLM) model loaded successfully.")
    except Exception as e:
        print(f"vLLM loading failed: {e}")
        print("Falling back to direct transformer imports...")
        try:
            from transformers import CLIPProcessor, CLIPModel, AutoProcessor, AutoModel
            clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
            clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
            models["clip_model"] = clip_model.to(device)
            models["clip_processor"] = clip_processor
            print("CLIP (Direct) model loaded successfully.")

            blip_processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
            blip_model = AutoModel.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
            models["blip_model"] = blip_model.to(device)
            models["blip_processor"] = blip_processor
            print("BLIP-2 (Direct) model loaded successfully.")

            dinov2_model = AutoModel.from_pretrained("facebook/dinov2-base")
            dinov2_processor = AutoProcessor.from_pretrained("facebook/dinov2-base")
            models["dinov2_model"] = dinov2_model.to(device)
            models["dinov2_processor"] = dinov2_processor
            print("DINOv2 (Direct) model loaded successfully.")
        except Exception as e2:
            print(f"Direct imports failed: {e2}")
            print("Using OpenCV-based feature detectors as a last resort.")
            models["feature_detector"] = cv2.SIFT_create() if hasattr(cv2, 'SIFT_create') else None
            models["orb_detector"] = cv2.ORB_create()
            models["brisk_detector"] = cv2.BRISK_create()
    models["device"] = device
    print(f"Model initialization complete on device: {device}")
    return models


def initialize_llm(api_key: str) -> Tuple[Any, SamplingParams]:
    """
    Initialize the LLM client using Groq.
    """
    client = Groq(api_key=api_key)
    sampling_params = SamplingParams(
        temperature=0.7,
        top_p=0.9,
        max_tokens=1024
    )
    return client, sampling_params

In [8]:
#############################################
# 2. IMAGE PROCESSING FUNCTIONS
#############################################
async def fetch_image_async(image_url: str) -> bytes:
    """
    Asynchronously fetch an image from a URL.
    """
    async with aiohttp.ClientSession() as session:
        async with session.get(image_url) as response:
            if response.status != 200:
                raise ValueError(f"Failed to fetch image: {response.status}")
            return await response.read()


def load_local_image(image_path: str) -> bytes:
    """
    Load an image from a local file.
    """
    with open(image_path, "rb") as f:
        return f.read()


async def preprocess_image_async(image_source: Union[str, bytes], enhance: bool = True) -> Image.Image:
    """
    Preprocess image: load, enhance, and resize.
    """
    if isinstance(image_source, str):
        if image_source.startswith("http"):
            image_bytes = await fetch_image_async(image_source)
        else:
            image_bytes = load_local_image(image_source)
    else:
        image_bytes = image_source

    image = Image.open(BytesIO(image_bytes)).convert("RGB")
    if enhance:
        enhancer = ImageEnhance.Contrast(image)
        image = enhancer.enhance(1.5)
        image = image.filter(ImageFilter.SHARPEN)
    if max(image.size) > 1024:
        image.thumbnail((1024, 1024), Image.LANCZOS)
    return image


def extract_exif_data(image: Image.Image) -> dict:
    """
    Extract EXIF metadata from an image.
    """
    exif_data = {}
    try:
        exif = image._getexif()
        if exif:
            for tag_id, val in exif.items():
                tag = ExifTags.TAGS.get(tag_id, tag_id)
                exif_data[tag] = "Binary data" if isinstance(val, bytes) or tag == "MakerNote" else val
    except Exception:
        pass
    return exif_data


def perform_ocr(image: Image.Image) -> str:
    """
    Perform OCR on an image if pytesseract is available.
    """
    if not OCR_AVAILABLE:
        return "OCR not available. Install pytesseract."
    try:
        ocr_image = image.copy()
        enhancer = ImageEnhance.Contrast(ocr_image)
        ocr_image = enhancer.enhance(2.0)
        ocr_image = ocr_image.convert("L")
        return pytesseract.image_to_string(ocr_image).strip()
    except Exception as e:
        return f"OCR error: {e}"


def detect_image_manipulation(image: Image.Image) -> dict:
    """
    Detect potential image manipulation using Error Level Analysis (ELA).
    """
    temp_file = BytesIO()
    image.save(temp_file, format="JPEG", quality=90)
    temp_file.seek(0)
    saved_image = Image.open(temp_file).convert("RGB")
    ela_image = ImageChops.difference(image, saved_image)
    extrema = ela_image.getextrema()
    max_diff = max(ex[1] for ex in extrema)
    return {
        "max_diff": max_diff,
        "manipulation_score": min(max_diff / 40.0, 1.0),
        "likely_manipulated": max_diff > 20
    }


def detect_faces(image: Image.Image, cascade_file: Optional[str] = None) -> List[Dict[str, Any]]:
    """
    Detect faces using Haar Cascades from OpenCV.
    """
    img_cv = np.array(image)
    img_cv = img_cv[:, :, ::-1].copy()  # Convert RGB to BGR
    cascade = cascade_file if cascade_file and os.path.exists(cascade_file) else cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
    face_cascade = cv2.CascadeClassifier(cascade)
    gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)
    if len(faces) == 0:
        faces = face_cascade.detectMultiScale(gray, 1.05, 3)
    face_images = []
    for (x, y, w, h) in faces:
        face_img = image.crop((x, y, x+w, y+h)).resize((224, 224), Image.LANCZOS)
        face_images.append({"image": face_img, "coords": (x, y, w, h)})
    return face_images


def save_faces(faces: List[Dict[str, Any]], output_dir: str = "faces_output") -> None:
    """
    Save detected face images to disk.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    for idx, face in enumerate(faces):
        face_path = os.path.join(output_dir, f"face_{timestamp}_{idx+1}.jpg")
        face["image"].save(face_path)
    print(f"Saved {len(faces)} face image(s) in {output_dir}")


def image_hash(image: Image.Image, hash_size: int = 8) -> str:
    """
    Compute a perceptual image hash for deduplication.
    """
    img = image.resize((hash_size, hash_size), Image.LANCZOS).convert("L")
    pixels = list(img.getdata())
    avg = sum(pixels) / len(pixels)
    bits = "".join("1" if pixel > avg else "0" for pixel in pixels)
    return hex(int(bits, 2))[2:].zfill(hash_size**2 // 4)

In [5]:
#############################################
# 3. EMBEDDING & DESCRIPTION FUNCTIONS
#############################################
async def generate_clip_embedding(image: Image.Image, models: dict) -> np.ndarray:
    """Generate an image embedding using the CLIP model."""
    inputs = models["clip_processor"](images=image, return_tensors="pt").to(models["device"])
    with torch.no_grad():
        outputs = models["clip_model"].get_image_features(**inputs)
    embedding = outputs.cpu().numpy()
    norm = np.linalg.norm(embedding)
    return embedding / norm if norm > 0 else embedding

async def generate_blip_embedding(image: Image.Image, models: dict) -> np.ndarray:
    """Generate an image embedding using BLIP-2 model for semantic representation."""
    inputs = models["blip_processor"](images=image, return_tensors="pt").to(models["device"])
    with torch.no_grad():
        outputs = models["blip_model"].get_image_features(**inputs)
    embedding = outputs.cpu().numpy()
    norm = np.linalg.norm(embedding)
    return embedding / norm if norm > 0 else embedding

async def generate_dinov2_embedding(image: Image.Image, models: dict) -> np.ndarray:
    """Generate an image embedding using the DINOv2 model (CLS token)."""
    inputs = models["dinov2_processor"](images=image, return_tensors="pt").to(models["device"])
    with torch.no_grad():
        outputs = models["dinov2_model"](**inputs).last_hidden_state[:, 0]
    embedding = outputs.cpu().numpy()
    norm = np.linalg.norm(embedding)
    return embedding / norm if norm > 0 else embedding

async def generate_image_description(image: Image.Image, models: dict, prompt=None) -> str:
    """
    Generate a detailed description of the image using BLIP-2.
    Focuses on identifiable people, objects, and locations.

    Args:
        image: PIL Image
        models: Dictionary with loaded models
        prompt: Optional custom prompt
    """
    if prompt is None:
        prompt = "Describe this image in detail with focus on identifiable people, objects, and locations:"

    inputs = models["blip_processor"](images=image, text=prompt, return_tensors="pt").to(models["device"])
    with torch.no_grad():
        outputs = models["blip_model"].generate(**inputs, max_new_tokens=100)
    description = models["blip_processor"].batch_decode(outputs, skip_special_tokens=True)[0]
    return description.strip()

async def extract_classical_features(image: Image.Image) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
    """
    Extract classical image features using ORB and optionally SIFT.
    Returns descriptors selected based on the number of keypoints.
    """
    image_np = np.array(image)
    gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)

    # Extract ORB features
    orb = cv2.ORB_create(nfeatures=1000)
    keypoints_orb, descriptors_orb = orb.detectAndCompute(gray, None)

    # Extract SIFT features if available
    descriptors_sift = None
    keypoints_sift = None
    try:
        sift = cv2.SIFT_create()
        keypoints_sift, descriptors_sift = sift.detectAndCompute(gray, None)
    except Exception:
        pass

    # Use the better descriptor
    if descriptors_orb is None and descriptors_sift is None:
        return None, None
    elif descriptors_orb is None:
        return keypoints_sift, descriptors_sift
    elif descriptors_sift is None:
        return keypoints_orb, descriptors_orb
    else:
        if len(keypoints_sift) > len(keypoints_orb):
            return keypoints_sift, descriptors_sift
        else:
            return keypoints_orb, descriptors_orb

def get_histogram_features(image: Image.Image) -> dict:
    """
    Extract color histogram features from the image.

    Args:
        image: PIL Image

    Returns:
        Dictionary with histogram features
    """
    # Convert to NumPy array
    img_np = np.array(image)

    # Calculate histograms for each channel
    hist_r = cv2.calcHist([img_np], [0], None, [256], [0, 256])
    hist_g = cv2.calcHist([img_np], [1], None, [256], [0, 256])
    hist_b = cv2.calcHist([img_np], [2], None, [256], [0, 256])

    # Normalize histograms
    hist_r = cv2.normalize(hist_r, hist_r).flatten()
    hist_g = cv2.normalize(hist_g, hist_g).flatten()
    hist_b = cv2.normalize(hist_b, hist_b).flatten()

    return {
        "histogram_r": hist_r.tolist(),
        "histogram_g": hist_g.tolist(),
        "histogram_b": hist_b.tolist()
    }

def compute_cosine_similarity(embedding1: np.ndarray, embedding2: np.ndarray) -> float:
    """
    Compute cosine similarity between two image embeddings.
    Returns a float between -1 and 1 (1 indicates identical embeddings).
    """
    dot_product = np.dot(embedding1.flatten(), embedding2.flatten())
    norm1 = np.linalg.norm(embedding1)
    norm2 = np.linalg.norm(embedding2)
    if norm1 == 0 or norm2 == 0:
        return 0.0
    return float(dot_product / (norm1 * norm2))

async def generate_consensus_embedding(clip_emb, blip_emb, dinov2_emb) -> np.ndarray:
    """
    Generate a consensus embedding by averaging the CLIP, BLIP-2, and DINOv2 embeddings.
    Weights the embeddings based on their typical performance.
    """
    # Weight the embeddings (these weights can be adjusted)
    weights = np.array([0.4, 0.3, 0.3])  # CLIP, BLIP, DINOv2

    # Ensure all embeddings are flattened and normalized
    embeddings = [
        clip_emb.flatten() / np.linalg.norm(clip_emb),
        blip_emb.flatten() / np.linalg.norm(blip_emb),
        dinov2_emb.flatten() / np.linalg.norm(dinov2_emb)
    ]

    # Check if dimensions match, otherwise resize
    min_dim = min(e.shape[0] for e in embeddings)
    resized_embeddings = [e[:min_dim] for e in embeddings]

    # Weighted average
    consensus = np.average(np.array(resized_embeddings), axis=0, weights=weights)

    # Normalize
    norm = np.linalg.norm(consensus)
    return consensus / norm if norm > 0 else consensus

In [9]:
#############################################
# 4. SEARCH ENGINE FUNCTIONS (Simulated Async Calls)
#############################################
async def search_private_db(embedding: np.ndarray, description: str) -> list:
    await asyncio.sleep(0.5)
    return [
        {"source": "Private DB", "match": "Person_123", "score": 0.91,
         "metadata": {"date": "2023-10-15"}, "embedding": embedding.tolist()},
        {"source": "Private DB", "match": "Person_456", "score": 0.85,
         "metadata": {"date": "2023-09-22"}, "embedding": embedding.tolist()}
    ]


async def search_twitter(embedding: np.ndarray, description: str) -> list:
    await asyncio.sleep(0.7)
    keywords = description.split()[:5]
    return [
        {"source": "Twitter", "match": "Tweet_Image_456", "score": 0.87,
         "metadata": {"username": "@user123", "posted": "2023-11-01", "keywords": keywords},
         "embedding": embedding.tolist()}
    ]


async def search_reddit(embedding: np.ndarray, description: str) -> list:
    await asyncio.sleep(0.6)
    return [
        {"source": "Reddit", "match": "Reddit_Post_321", "score": 0.89,
         "metadata": {"subreddit": "r/pics", "posted": "2023-10-25"},
         "embedding": embedding.tolist()}
    ]


async def search_instagram(embedding: np.ndarray, description: str) -> list:
    await asyncio.sleep(0.8)
    return [
        {"source": "Instagram", "match": "Insta_Post_654", "score": 0.88,
         "metadata": {"username": "user456", "posted": "2023-11-12", "location": "New York"},
         "embedding": embedding.tolist()}
    ]


async def search_osint_sources(embedding: np.ndarray, description: str) -> list:
    await asyncio.sleep(1.0)
    return [
        {"source": "OSINT", "match": "DarkWeb_Post_999", "score": 0.83,
         "metadata": {"forum": "anonymous_forum", "date": "2023-09-10"},
         "embedding": embedding.tolist()},
        {"source": "OSINT", "match": "Telegram_Group_123", "score": 0.79,
         "metadata": {"group": "public_channel_xyz", "date": "2023-10-30"},
         "embedding": embedding.tolist()}
    ]


async def search_tineye(image: Image.Image) -> list:
    img_byte_arr = BytesIO()
    image.save(img_byte_arr, format="JPEG")
    img_byte_arr.seek(0)
    await asyncio.sleep(1.2)
    return [
        {"source": "TinEye", "match": "Website_ABC", "score": 0.92,
         "metadata": {"domain": "example.com", "first_crawled": "2023-08-15"},
         "embedding": None}
    ]


async def search_google_images(image: Image.Image, description: str, api_key: str, cse_id: str) -> list:
    await asyncio.sleep(1.0)
    return [
        {"source": "Google Images", "match": "News_Site_XYZ", "score": 0.86,
         "metadata": {"url": "https://example-news.com/article123", "title": "Example article related to the image"},
         "embedding": None}
    ]


async def search_additional_sources(embedding: np.ndarray, description: str) -> list:
    await asyncio.sleep(0.9)
    return [
        {"source": "Facebook", "match": "FB_Post_123", "score": 0.81,
         "metadata": {"user": "john.doe", "posted": "2023-10-05"},
         "embedding": embedding.tolist()},
        {"source": "LinkedIn", "match": "LinkedIn_Profile_456", "score": 0.78,
         "metadata": {"profile": "jane-smith", "updated": "2023-11-10"},
         "embedding": embedding.tolist()}
    ]


async def merge_search_results(*results: list) -> list:
    merged = []
    seen = set()
    for result_list in results:
        for result in result_list:
            rid = (result["source"], result["match"])
            if rid not in seen:
                seen.add(rid)
                if isinstance(result.get("embedding"), np.ndarray):
                    result["embedding"] = result["embedding"].tolist()
                merged.append(result)
    return sorted(merged, key=lambda x: x.get("score", 0), reverse=True)

In [10]:
#############################################
# 5. ADVANCED FORENSIC ANALYSIS AND PROMPTING
#############################################
async def analyze_results_with_llm(results: list, image_description: str, client: Any) -> str:
    """
    Run forensic analysis using rich LLM prompts.
    """
    clean_results = []
    for r in results:
        clean_results.append({k: v for k, v in r.items() if k != "embedding"})

    prompt = f"""
You are an expert forensic analyst. Analyze the following reverse image search results.

Image Description:
{image_description}

Search Results (in JSON):
{json.dumps(clean_results, indent=2)}

Provide a detailed multi-section report addressing:
- Cross-referencing of entities and duplication checks.
- Temporal and geographic correlations.
- Reliability of sources and potential biases.
- Image similarity analysis and forensic clues.
- Recommendations for further investigation.

Return your structured analysis report.
    """

    response = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a forensic image analysis expert with multi-agent capabilities."},
            {"role": "user", "content": prompt}
        ],
        model="llama-3.3-70b-versatile",
        temperature=0.1
    )
    return response.choices[0].message.content.strip()


async def threat_assessment(analysis: str, client: Any) -> dict:
    """
    Assess potential threats based on forensic analysis using advanced LLM prompts.
    """
    prompt = f"""
Given the forensic analysis below, evaluate the potential threats and vulnerabilities.

Forensic Analysis:
{analysis}

Return a JSON object with:
- "threat_level": integer from 0 to 10
- "categories": a list of threat categories (e.g., ["identity_theft", "privacy_breach"])
- "reasoning": a brief explanation
- "recommended_actions": a list of suggested actions

Return only valid JSON.
    """
    response = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a threat assessment expert specialized in forensic image analysis."},
            {"role": "user", "content": prompt}
        ],
        model="llama-3.3-70b-versatile",
        temperature=0.1
    )
    try:
        txt = response.choices[0].message.content.strip()
        json_start = txt.find("{")
        json_end = txt.rfind("}") + 1
        if json_start >= 0 and json_end > json_start:
            return json.loads(txt[json_start:json_end])
        raise ValueError("JSON not found in output")
    except Exception as e:
        print(f"Threat assessment parsing error: {e}")
        return {
            "threat_level": 5,
            "categories": ["unknown"],
            "reasoning": "Error parsing LLM results, manual review recommended.",
            "recommended_actions": ["Manual analysis"]
        }


async def graph_link_analysis(results: list) -> plt.Figure:
    """
    Visualize search results as a network graph.
    """
    G = nx.Graph()
    for r in results:
        node = f"{r['source']}: {r['match']}"
        G.add_node(node, score=r.get("score", 0))
    nodes = list(G.nodes())
    for i in range(len(nodes)):
        for j in range(i+1, len(nodes)):
            if nodes[i].split(":")[0] == nodes[j].split(":")[0]:
                G.add_edge(nodes[i], nodes[j])
    pos = nx.spring_layout(G, seed=42)
    plt.figure(figsize=(10, 8))
    nx.draw(G, pos, with_labels=True, node_color="lightblue", node_size=1500, font_size=10, edge_color="grey")
    plt.title("Detected Forensic Search Results Network")
    return plt.gcf()

In [11]:
#############################################
# 6. FACE SIMILARITY AND CLUSTERING ANALYSIS
#############################################
async def compute_face_similarities(faces: List[Dict[str, Any]], models: dict) -> Dict[Tuple[int, int], float]:
    """
    Compute cosine similarity between face embeddings using CLIP.
    """
    embeddings = []
    for face in faces:
        emb = await generate_clip_embedding(face["image"], models)
        embeddings.append(emb)
    similarities = {}
    for i in range(len(embeddings)):
        for j in range(i+1, len(embeddings)):
            sim = float(np.dot(embeddings[i].flatten(), embeddings[j].flatten()) /
                        (np.linalg.norm(embeddings[i]) * np.linalg.norm(embeddings[j]) + 1e-10))
            similarities[(i, j)] = sim
    return similarities

In [None]:
#############################################
# 7. MAIN PIPELINE AND MULTI-AGENT WORKFLOW
#############################################
async def main():
    if len(sys.argv) < 2:
        print("Usage: python defensive_forensics.py <image_url_or_local_path>")
        sys.exit(1)
    image_source = sys.argv[1]

    # Step 1: Initialize API keys and LLM client
    groq_api_key, google_cse_id, google_api_key = get_api_keys()
    client, sampling_params = initialize_llm(groq_api_key)

    # Step 2: Load models
    models = load_models()

    # Step 3: Preprocess the input image
    print("Preprocessing image...")
    image = await preprocess_image_async(image_source, enhance=True)
    exif = extract_exif_data(image)
    print("EXIF Data:", exif)

    # Step 4: Detect faces, perform OCR, ELA, and compute perceptual image hash
    faces = detect_faces(image)
    if faces:
        save_faces(faces)
    ocr_text = perform_ocr(image)
    manipulation = detect_image_manipulation(image)
    img_hash = image_hash(image)
    print("OCR Text:", ocr_text)
    print("Manipulation Results:", manipulation)
    print("Image Hash:", img_hash)

    # Step 5: Generate embeddings and description
    clip_emb = await generate_clip_embedding(image, models)
    blip_emb = await generate_blip_embedding(image, models)
    dinov2_emb = await generate_dinov2_embedding(image, models)
    consensus_emb = await generate_consensus_embedding(clip_emb, blip_emb, dinov2_emb)
    description = await generate_image_description(image, models)
    print("Image Description:", description)

    # Step 6: Execute asynchronous reverse image search tasks
    search_tasks = await asyncio.gather(
        search_private_db(consensus_emb, description),
        search_twitter(consensus_emb, description),
        search_reddit(consensus_emb, description),
        search_instagram(consensus_emb, description),
        search_osint_sources(consensus_emb, description),
        search_tineye(image),
        search_google_images(image, description, google_api_key, google_cse_id),
        search_additional_sources(consensus_emb, description)
    )
    merged_results = await merge_search_results(*search_tasks)
    print("Merged Search Results:\n", json.dumps(merged_results, indent=2))

    # Step 7: Advanced forensic analysis using LLM
    analysis_report = await analyze_results_with_llm(merged_results, description, client)
    print("Forensic Analysis Report:\n", analysis_report)

    threat_report = await threat_assessment(analysis_report, client)
    print("Threat Assessment Report:\n", json.dumps(threat_report, indent=2))

    # Step 8: Face similarity analysis if multiple faces detected
    face_similarities = {}
    if len(faces) > 1:
        face_similarities = await compute_face_similarities(faces, models)
        print("Face Similarities:", face_similarities)

    # Step 9: Graph visualization of search results
    fig = await graph_link_analysis(merged_results)
    graph_filename = f"forensic_network_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
    fig.savefig(graph_filename)
    print(f"Network graph saved as {graph_filename}")

    # Step 10: Save final forensic report to file
    report_file = f"forensic_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    final_report = {
        "exif": exif,
        "ocr_text": ocr_text,
        "manipulation": manipulation,
        "image_hash": img_hash,
        "description": description,
        "search_results": merged_results,
        "analysis_report": analysis_report,
        "threat_report": threat_report,
        "face_similarities": face_similarities,
        "timestamp": datetime.now().isoformat()
    }
    with open(report_file, "w") as f:
        json.dump(final_report, f, indent=2)
    print(f"Final forensic report saved to {report_file}")

    print("Forensic processing complete.")


if __name__ == "__main__":
    asyncio.run(main())