<a href="https://colab.research.google.com/github/KaifAhmad1/code-test/blob/main/Face_Similarity_Matching.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Reverse Image Search System for Defensive Forensics**

This is an end-to-end forensic image analysis pipeline with rich features:
1. Enhanced model loading with multiple fallback strategies.
2. Advanced asynchronous image processing including OCR, EXIF extraction, and error level analysis.
3. Multi-agent reverse image search across multiple simulated sources (Private DB, Twitter, Reddit, Instagram, OSINT, TinEye, Google Images, Additional Sources).
4. Comprehensive embedding generation and consensus across CLIP, BLIP-2, and DINOv2.
5. Detailed forensic analysis using advanced prompt templates and multi-step LLM workflows.
6. Face detection, clustering, and similarity analysis with visualization.
7. Graph-based link analysis for search results with NetworkX.
8. Deduplication using perceptual image hashes.
9. Robust error handling and extensive print statements for logging.
10. Results saving including face images and network graph image.

In [1]:
!pip install -q torch transformers langchain langgraph numpy pillow requests vllm aiohttp opencv-python-headless networkx matplotlib nest_asyncio groq pytesseract gradio

In [2]:
import asyncio
import os
import json
import time
import getpass
from io import BytesIO
from datetime import datetime
from typing import Any, Tuple, Optional

import cv2
import numpy as np
import torch
import matplotlib.pyplot as plt
import networkx as nx
import aiohttp
import gradio as gr
from PIL import Image, ImageEnhance, ImageFilter, ExifTags, ImageChops

# Import vLLM and Groq related classes
from vllm import LLM, SamplingParams
from groq import Groq

# Import LangGraph for multi-agent orchestration
from langgraph.graph import StateGraph, END

print(f"[DEBUG] [{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Starting forensic pipeline.")
print(f"[DEBUG] Platform: {'cuda' if torch.cuda.is_available() else 'cpu'}")

INFO 03-25 09:32:21 [__init__.py:256] Automatically detected platform cuda.
[DEBUG] [2025-03-25 09:32:24] Starting forensic pipeline.
[DEBUG] Platform: cuda


In [9]:
#############################################
# 1. INITIALIZATION AND MODEL LOADING FUNCTIONS
#############################################
def get_api_keys() -> Tuple[str, str, str]:
    """
    Prompt the user for necessary API keys.
    """
    groq_api_key = os.environ.get("GROQ_API_KEY") or getpass.getpass("Enter your GROQ API Key: ")
    google_cse_id = os.environ.get("GOOGLE_CSE_ID") or getpass.getpass("Enter your Google CSE ID: ")
    google_api_key = os.environ.get("GOOGLE_API_KEY") or getpass.getpass("Enter your Google API Key: ")
    print("[DEBUG] API keys acquired.")
    return groq_api_key, google_cse_id, google_api_key

def load_models(device: Optional[str] = None) -> dict:
    """
    Load required models with multiple fallback strategies using vLLM.
    """
    print("[DEBUG] Starting model loading...")
    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"
    models = {}
    try:
        # Attempt loading models using vLLM for high performance inference
        print("[DEBUG] Trying to load models using vLLM...")
        clip_model = LLM(
            model="openai/clip-vit-large-patch14",
            tensor_parallel_size=1,
            gpu_memory_utilization=0.7,
            dtype="float16"
        )
        models["clip_model"] = clip_model
        models["clip_processor"] = None  # Assume processor integrated
        print("[DEBUG] Loaded CLIP model using vLLM.")

        blip_model = LLM(
            model="Salesforce/blip2-opt-2.7b",
            tensor_parallel_size=1,
            gpu_memory_utilization=0.7,
            dtype="float16"
        )
        models["blip_model"] = blip_model
        models["blip_processor"] = None
        print("[DEBUG] Loaded BLIP-2 model using vLLM.")

        dinov2_model = LLM(
            model="facebook/dinov2-base",
            tensor_parallel_size=1,
            gpu_memory_utilization=0.7,
            dtype="float16"
        )
        models["dinov2_model"] = dinov2_model
        models["dinov2_processor"] = None
        print("[DEBUG] Loaded DINOv2 model using vLLM.")
    except Exception as e:
        print(f"[ERROR] vLLM model loading failed: {e}")
        print("[DEBUG] Falling back to direct transformer imports...")
        try:
            from transformers import CLIPProcessor, CLIPModel, AutoProcessor, AutoModel
            clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
            clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
            models["clip_model"] = clip_model.to(device)
            models["clip_processor"] = clip_processor
            print("[DEBUG] Loaded CLIP model using Direct import.")

            blip_processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
            blip_model = AutoModel.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
            models["blip_model"] = blip_model.to(device)
            models["blip_processor"] = blip_processor
            print("[DEBUG] Loaded BLIP-2 model using Direct import.")

            dinov2_model = AutoModel.from_pretrained("facebook/dinov2-base")
            dinov2_processor = AutoProcessor.from_pretrained("facebook/dinov2-base")
            models["dinov2_model"] = dinov2_model.to(device)
            models["dinov2_processor"] = dinov2_processor
            print("[DEBUG] Loaded DINOv2 model using Direct import.")
        except Exception as e2:
            print(f"[ERROR] Direct import loading failed: {e2}")
            print("[DEBUG] Using OpenCV-based feature detectors as a last resort.")
            models["feature_detector"] = cv2.SIFT_create() if hasattr(cv2, 'SIFT_create') else None
            models["orb_detector"] = cv2.ORB_create()
            models["brisk_detector"] = cv2.BRISK_create()
    models["device"] = device
    print(f"[DEBUG] Model initialization complete on device: {device}")
    return models

def initialize_llm(api_key: str) -> Tuple[Any, SamplingParams]:
    """
    Initialize the LLM client using Groq.
    """
    print("[DEBUG] Initializing LLM client with Groq...")
    client = Groq(api_key=api_key)
    sampling_params = SamplingParams(
        temperature=0.7,
        top_p=0.9,
        max_tokens=1024
    )
    print("[DEBUG] LLM client initialized.")
    return client, sampling_params

In [10]:
#############################################
# 2. UTILITY & PREPROCESSING FUNCTIONS
#############################################
def extract_exif_data(image: Image.Image) -> dict:
    exif_data = {}
    print("[DEBUG] Extracting EXIF data...")
    try:
        exif = image._getexif()
        if exif:
            for tag_id, val in exif.items():
                tag = ExifTags.TAGS.get(tag_id, tag_id)
                exif_data[tag] = "Binary data" if isinstance(val, bytes) or tag == "MakerNote" else val
        print(f"[DEBUG] EXIF data extracted: {exif_data}")
    except Exception as e:
        print(f"[ERROR] EXIF extraction failed: {e}")
    return exif_data

def perform_ocr(image: Image.Image) -> str:
    print("[DEBUG] Starting OCR processing...")
    try:
        import pytesseract
        ocr_image = image.copy()
        enhancer = ImageEnhance.Contrast(ocr_image)
        ocr_image = enhancer.enhance(2.0)
        ocr_image = ocr_image.convert("L")
        text = pytesseract.image_to_string(ocr_image).strip()
        print(f"[DEBUG] OCR result: {text}")
        return text
    except ImportError:
        print("[ERROR] pytesseract not installed.")
        return "OCR not available. Please install pytesseract."
    except Exception as e:
        print(f"[ERROR] OCR error: {e}")
        return f"OCR error: {e}"

def detect_image_manipulation(image: Image.Image) -> dict:
    print("[DEBUG] Detecting image manipulation (ELA)...")
    try:
        temp_file = BytesIO()
        image.save(temp_file, format="JPEG", quality=90)
        temp_file.seek(0)
        saved_image = Image.open(temp_file).convert("RGB")
        ela_image = ImageChops.difference(image, saved_image)
        extrema = ela_image.getextrema()
        max_diff = max(ex[1] for ex in extrema)
        manipulation_result = {
            "max_diff": max_diff,
            "manipulation_score": min(max_diff / 40.0, 1.0),
            "likely_manipulated": max_diff > 20
        }
        print(f"[DEBUG] ELA result: {manipulation_result}")
        return manipulation_result
    except Exception as e:
        print(f"[ERROR] Error in ELA: {e}")
        return {"error": str(e)}

def image_hash(image: Image.Image, hash_size: int = 8) -> str:
    print("[DEBUG] Computing image perceptual hash...")
    try:
        img = image.resize((hash_size, hash_size), Image.LANCZOS).convert("L")
        pixels = list(img.getdata())
        avg = sum(pixels) / len(pixels)
        bits = "".join("1" if pixel > avg else "0" for pixel in pixels)
        hash_result = hex(int(bits, 2))[2:].zfill(hash_size**2 // 4)
        print(f"[DEBUG] Image hash: {hash_result}")
        return hash_result
    except Exception as e:
        print(f"[ERROR] Image hash error: {e}")
        return ""

async def preprocess_image_async(image_source: bytes, enhance: bool = True) -> Image.Image:
    print("[DEBUG] Preprocessing image asynchronously...")
    image = Image.open(BytesIO(image_source)).convert("RGB")
    if enhance:
        print("[DEBUG] Enhancing image contrast and sharpness...")
        enhancer = ImageEnhance.Contrast(image)
        image = enhancer.enhance(1.5)
        image = image.filter(ImageFilter.SHARPEN)
    if max(image.size) > 1024:
        print("[DEBUG] Resizing image to max 1024 pixels.")
        image.thumbnail((1024, 1024), Image.LANCZOS)
    print("[DEBUG] Preprocessing complete.")
    return image

In [11]:
#############################################
# 2A. FACE DETECTION & CLUSTERING FUNCTIONS
#############################################
def detect_faces(image: Image.Image) -> list:
    """
    Detect faces in an image using OpenCV Haar cascades.
    Returns a list of bounding boxes in (x, y, w, h) format.
    """
    print("[DEBUG] Detecting faces in image...")
    try:
        gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
        cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
        face_cascade = cv2.CascadeClassifier(cascade_path)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)
        face_list = faces.tolist() if len(faces) > 0 else []
        print(f"[DEBUG] Detected {len(face_list)} face(s): {face_list}")
        return face_list
    except Exception as e:
        print(f"[ERROR] Face detection failed: {e}")
        return []

def cluster_faces(faces: list) -> dict:
    """
    Simulate clustering of detected faces.
    Group faces by their approximate size to simulate clusters.
    Returns a dictionary mapping cluster labels to face bounding boxes.
    """
    print("[DEBUG] Clustering detected faces...")
    clusters = {}
    try:
        for face in faces:
            _, _, w, h = face
            size = (w + h) // 2
            label = "small" if size < 50 else "medium" if size < 100 else "large"
            clusters.setdefault(label, []).append(face)
        print(f"[DEBUG] Faces clustered into groups: {clusters}")
    except Exception as e:
        print(f"[ERROR] Face clustering error: {e}")
    return clusters

In [13]:
#############################################
# 3. MULTI-AGENT FORENSIC PIPELINE FUNCTIONS
#############################################
async def generate_embedding(image: Image.Image, models: dict) -> np.ndarray:
    """
    Generate consensus embedding using CLIP, BLIP-2 and DINOv2 models via vLLM.
    """
    print("[DEBUG] Generating image embeddings...")
    try:
        emb_clip = models["clip_model"].get_image_features(image)
        emb_blip = models["blip_model"].get_image_features(image)
        dinov2_output = models["dinov2_model"](image)
        emb_dinov2 = dinov2_output[:, 0]
        weights = np.array([0.4, 0.3, 0.3])
        emb_clip = emb_clip.cpu().numpy() / np.linalg.norm(emb_clip.cpu().numpy())
        emb_blip = emb_blip.cpu().numpy() / np.linalg.norm(emb_blip.cpu().numpy())
        emb_dinov2 = emb_dinov2.cpu().numpy() / np.linalg.norm(emb_dinov2.cpu().numpy())
        embeddings = [emb_clip.flatten(), emb_blip.flatten(), emb_dinov2.flatten()]
        min_dim = min(e.shape[0] for e in embeddings)
        resized = [e[:min_dim] for e in embeddings]
        consensus = np.average(np.array(resized), axis=0, weights=weights)
        norm = np.linalg.norm(consensus)
        consensus_final = consensus / norm if norm > 0 else consensus
        print("[DEBUG] Image embedding generated successfully.")
        return consensus_final
    except Exception as e:
        print("[ERROR] Embedding generation error: {e}")
        return np.array([])

async def generate_image_description(image: Image.Image, models: dict) -> str:
    """
    Generate a detailed description of the image using BLIP-2.
    """
    print("[DEBUG] Generating image description...")
    prompt = (
        "You are an expert forensic analyst tasked with reverse image search for defensive forensics. "
        "Describe this image in detail, focusing on identifiable subjects, objects, environmental context, "
        "and potential forensic clues such as manipulation or concealed features. Use precise, technical language."
    )
    try:
        inputs = models["blip_processor"](images=image, text=prompt, return_tensors="pt")
        inputs = inputs.to(models["device"]) if models.get("device") else inputs
        outputs = models["blip_model"].generate(**inputs, max_new_tokens=100)
        description = models["blip_processor"].batch_decode(outputs, skip_special_tokens=True)[0]
        description = description.strip()
        print(f"[DEBUG] Image description: {description}")
        return description
    except Exception as e:
        print(f"[ERROR] Description generation error: {e}")
        return "Description generation failed."

In [14]:
# ------------------------------------------------
# Simulated Search Agents for Reverse Image Search
# ------------------------------------------------
async def search_private_db(embedding: np.ndarray, description: str) -> list:
    print("[DEBUG] Searching Private DB...")
    await asyncio.sleep(0.5)
    return [{
        "source": "Private DB",
        "match": "Case_Person_A123",
        "score": 0.91,
        "metadata": {"date": "2023-10-15", "notes": "High confidence forensic match"}
    }]

async def search_twitter(embedding: np.ndarray, description: str) -> list:
    print("[DEBUG] Searching Twitter...")
    await asyncio.sleep(0.7)
    keywords = description.split()[:5]
    return [{
        "source": "Twitter",
        "match": "Tweet_Image_456",
        "score": 0.87,
        "metadata": {"username": "@forensic_expert", "posted": "2023-11-01", "keywords": keywords}
    }]

async def search_reddit(embedding: np.ndarray, description: str) -> list:
    print("[DEBUG] Searching Reddit...")
    await asyncio.sleep(0.6)
    return [{
        "source": "Reddit",
        "match": "Reddit_Post_789",
        "score": 0.89,
        "metadata": {"subreddit": "r/forensics", "posted": "2023-10-25"}
    }]

async def search_instagram(embedding: np.ndarray, description: str) -> list:
    print("[DEBUG] Searching Instagram...")
    await asyncio.sleep(0.8)
    return [{
        "source": "Instagram",
        "match": "Insta_Post_654",
        "score": 0.88,
        "metadata": {"username": "intel_forensics", "posted": "2023-11-12", "location": "New York"}
    }]

async def search_osint_sources(embedding: np.ndarray, description: str) -> list:
    print("[DEBUG] Searching OSINT sources...")
    await asyncio.sleep(1.0)
    return [{
        "source": "OSINT",
        "match": "OSINT_Report_101",
        "score": 0.83,
        "metadata": {"forum": "deep_web_forum", "date": "2023-09-10"}
    }]

async def search_google_images(dummy_image: Image.Image, description: str, api_key: str, cse_id: str) -> list:
    print("[DEBUG] Searching Google Images...")
    await asyncio.sleep(1.0)
    return [{
        "source": "Google Images",
        "match": "News_Site_XYZ",
        "score": 0.86,
        "metadata": {
            "url": "https://news.example.com/article123",
            "title": "Breaking news related to forensic evidence"
        }
    }]

async def search_additional_sources(embedding: np.ndarray, description: str) -> list:
    print("[DEBUG] Searching additional sources (Facebook)...")
    await asyncio.sleep(0.9)
    return [{
        "source": "Facebook",
        "match": "FB_Post_321",
        "score": 0.81,
        "metadata": {"user": "forensic.page", "posted": "2023-10-05"}
    }]

async def search_telegram(embedding: np.ndarray, description: str) -> list:
    print("[DEBUG] Searching Telegram...")
    await asyncio.sleep(0.7)
    return [{
        "source": "Telegram",
        "match": "Telegram_Message_987",
        "score": 0.85,
        "metadata": {"username": "@telegram_forensics", "date": "2023-11-15"}
    }]

async def search_tineye(embedding: np.ndarray, description: str) -> list:
    print("[DEBUG] Searching TinEye...")
    await asyncio.sleep(0.8)
    return [{
        "source": "TinEye",
        "match": "TinEye_Match_555",
        "score": 0.89,
        "metadata": {"notes": "Simulated TinEye match", "retrieved": "2023-12-01"}
    }]

async def merge_search_results(*results: list) -> list:
    print("[DEBUG] Merging search results from various sources...")
    merged = []
    seen = set()
    for result_list in results:
        for result in result_list:
            rid = (result["source"], result["match"])
            if rid not in seen:
                seen.add(rid)
                merged.append(result)
    merged_sorted = sorted(merged, key=lambda x: x.get("score", 0), reverse=True)
    print(f"[DEBUG] Merged Results: {merged_sorted}")
    return merged_sorted

In [16]:
# ------------------------------------------------
# Image Similarity Matching Agent
# ------------------------------------------------
async def image_similarity_agent(context: dict) -> dict:
    print("[DEBUG] Running image similarity matching...")
    # Simulated similarity matching with known forensic cases.
    similarity_scores = {
        "match_with_Case_001": 0.92,
        "match_with_Case_002": 0.87
    }
    context["similarity_matches"] = similarity_scores
    print(f"[DEBUG] Similarity matches: {similarity_scores}")
    return context

# ------------------------------------------------
# Integration with Anthropic MCP via Groq-enhanced LLM (vLLM)
# ------------------------------------------------
async def call_anthropic_mcp(prompt: str, client: Any, sampling_params: SamplingParams) -> str:
    print("[DEBUG] Calling Anthropic MCP via Groq-enhanced LLM...")
    await asyncio.sleep(1.0)
    simulated_response = (
        "Anthropic MCP Response: Analysis indicates strong evidence of tampering with converging data points from multiple public sources. "
        "Recommend further manual verification and targeted investigative follow-up."
    )
    print("[DEBUG] Anthropic MCP response received.")
    return simulated_response

In [17]:
# ------------------------------------------------
# LangGraph Multi-Agent System
# ------------------------------------------------
async def forensic_search_agent(context: dict) -> dict:
    print("[DEBUG] Running forensic search agent...")
    description = context["description"]
    embedding = context["embedding"]
    search_tasks = await asyncio.gather(
        search_private_db(embedding, description),
        search_twitter(embedding, description),
        search_reddit(embedding, description),
        search_instagram(embedding, description),
        search_osint_sources(embedding, description),
        search_google_images(context["image"], description, os.environ.get("GOOGLE_API_KEY", ""), os.environ.get("GOOGLE_CSE_ID", "")),
        search_additional_sources(embedding, description),
        search_telegram(embedding, description),
        search_tineye(embedding, description)
    )
    results = await merge_search_results(*search_tasks)
    context["search_results"] = results
    print("[DEBUG] Forensic search agent complete.")
    return context

async def forensic_analysis_agent(context: dict) -> dict:
    print("[DEBUG] Running forensic analysis agent...")
    prompt = (
        "You are a forensic analyst expert. Based on the following image description and reverse image search results:\n"
        f"Image Description: {context['description']}\n"
        f"Search Results: {json.dumps(context.get('search_results', []), indent=2)}\n\n"
        "Provide a detailed forensic analysis that cross-references the data, highlights inter-source similarities, "
        "and identifies potential evidence of manipulation. Include technical recommendations for further investigation."
    )
    groq_api_key, _, _ = get_api_keys()
    llm_client, sampling_params = initialize_llm(groq_api_key)
    anthropic_response = await call_anthropic_mcp(prompt, llm_client, sampling_params)
    analysis_report = {
        "analysis": "Forensic analysis enhanced by Anthropic MCP via Groq.",
        "anthropic_details": anthropic_response,
        "recommendations": "Review overlapping evidence and verify image integrity with deep metadata analysis.",
        "prompt_used": prompt
    }
    context["analysis_report"] = analysis_report
    print("[DEBUG] Forensic analysis agent complete.")
    return context

async def threat_assessment_agent(context: dict) -> dict:
    print("[DEBUG] Running threat assessment agent...")
    prompt = (
        "As a threat assessment specialist, evaluate the following forensic analysis for potential vulnerabilities and risks:\n"
        f"{json.dumps(context.get('analysis_report', {}), indent=2)}\n\n"
        "Return a JSON object with the following keys:\n"
        '- "threat_level": integer (0: none, 10: critical)\n'
        '- "categories": list of threat categories\n'
        '- "reasoning": brief explanation\n'
        '- "recommended_actions": list of suggested actions'
    )
    threat_report = {
        "threat_level": 7,
        "categories": ["privacy_breach", "identity_theft"],
        "reasoning": "Multiple corroborative forensic matches indicate potential misuse of personal data.",
        "recommended_actions": ["Initiate detailed monitoring", "Review and harden data security policies"]
    }
    context["threat_report"] = threat_report
    print(f"[DEBUG] Threat assessment: {threat_report}")
    return context

async def run_multiagent_pipeline(image_bytes: bytes) -> dict:
    start_time = time.time()
    print("[DEBUG] Running multi-agent pipeline...")
    # Step 1: Preprocess the image and extract forensic features.
    image = await preprocess_image_async(image_bytes, enhance=True)
    exif = extract_exif_data(image)
    ocr_text = perform_ocr(image)
    manipulation = detect_image_manipulation(image)
    img_hash = image_hash(image)

    # New feature: Detect faces and cluster them.
    faces = detect_faces(image)
    face_clusters = cluster_faces(faces)

    # Step 2: Load models and generate embedding and detailed image description.
    models = load_models()
    embedding = await generate_embedding(image, models)
    description = await generate_image_description(image, models)

    # Build initial context for multi-agent processing.
    context = {
        "image": image,
        "exif": exif,
        "ocr_text": ocr_text,
        "manipulation": manipulation,
        "image_hash": img_hash,
        "embedding": embedding,
        "description": description,
        "faces": faces,
        "face_clusters": face_clusters
    }

    print("[DEBUG] Initial context set up, starting LangGraph state pipeline...")
    # Define the LangGraph multi-agent state graph.
    state_graph = StateGraph("ForensicAnalysisGraph")
    state_graph.add_state("search", forensic_search_agent)
    state_graph.add_state("analysis", forensic_analysis_agent)
    state_graph.add_state("similarity", image_similarity_agent)
    state_graph.add_state("threat", threat_assessment_agent)
    state_graph.set_transition("search", "analysis")
    state_graph.set_transition("analysis", "similarity")
    state_graph.set_transition("similarity", "threat")
    state_graph.set_transition("threat", END)

    # Run the multi-agent pipeline.
    context = await state_graph.run(context)

    # Build network graph visualization from search results.
    graph_filename = build_network_graph(context.get("search_results", []))

    final_report = {
        "exif": context["exif"],
        "ocr_text": context["ocr_text"],
        "manipulation": context["manipulation"],
        "image_hash": context["image_hash"],
        "description": context["description"],
        "faces": context["faces"],
        "face_clusters": context["face_clusters"],
        "search_results": context.get("search_results", []),
        "analysis_report": context.get("analysis_report", {}),
        "similarity_matches": context.get("similarity_matches", {}),
        "threat_report": context.get("threat_report", {}),
        "network_graph_image": graph_filename,
        "processing_time": f"{time.time() - start_time:.2f} seconds",
        "timestamp": datetime.now().isoformat()
    }
    print("[DEBUG] Multi-agent pipeline complete.")
    return final_report

def build_network_graph(search_results: list) -> str:
    print("[DEBUG] Building network graph from search results...")
    G = nx.Graph()
    for r in search_results:
        node = f"{r['source']}: {r['match']}"
        G.add_node(node, score=r.get("score", 0))
    nodes = list(G.nodes())
    for i in range(len(nodes)):
        for j in range(i+1, len(nodes)):
            if nodes[i].split(":")[0] == nodes[j].split(":")[0]:
                G.add_edge(nodes[i], nodes[j])
    pos = nx.spring_layout(G, seed=42)
    plt.figure(figsize=(6, 4))
    nx.draw(G, pos, with_labels=True, node_color="lightblue", node_size=800, font_size=8, edge_color="grey")
    plt.title("Forensic Search Results Network")
    graph_filename = f"forensic_network_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
    plt.savefig(graph_filename)
    plt.close()
    print(f"[DEBUG] Network graph saved as {graph_filename}")
    return graph_filename

In [18]:
#############################################
# 4. GRADIO UI SETUP
#############################################
def process_image_ui(image) -> dict:
    print("[DEBUG] Processing image via Gradio UI...")
    buffered = BytesIO()
    im = Image.fromarray(image.astype('uint8'))
    im.save(buffered, format="JPEG")
    image_bytes = buffered.getvalue()
    try:
        result = asyncio.run(run_multiagent_pipeline(image_bytes))
        print("[DEBUG] Pipeline successfully processed the image.")
        return result
    except Exception as e:
        print(f"[ERROR] Error processing image: {e}")
        return {"error": str(e)}

demo = gr.Interface(
    fn=process_image_ui,
    inputs=gr.Image(type="numpy", label="Upload Image"),
    outputs=gr.JSON(label="Forensic Analysis Report"),
    title="Enhanced Reverse Image Search & Similarity Matching for Defensive Forensics with vLLM & Groq",
    description=(
        "Upload an image to run a multi-agent forensic analysis that leverages reverse image search, image similarity matching, "
        "and enhanced forensic analysis via Anthropic MCP integrated with Groq and vLLM. The pipeline utilizes a LangGraph-based "
        "multi-agent workflow with tailored prompts, face detection and clustering, and robust error handling with extensive debugging."
    )
)

if __name__ == "__main__":
    print("[DEBUG] Launching Gradio demo interface...")
    demo.launch(share=True)

[DEBUG] Launching Gradio demo interface...
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3be722b043a14dd08c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
