<a href="https://colab.research.google.com/github/KaifAhmad1/code-test/blob/main/Face_Similarity_Matching.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Reverse Image Search System for Defensive Forensics**

In [3]:
!pip install -q torch transformers langchain langgraph numpy pillow requests vllm aiohttp opencv-python-headless networkx matplotlib nest_asyncio groq

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/121.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.9/121.9 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [5]:
import asyncio
import time
import json
import copy
import nest_asyncio
import getpass
from io import BytesIO
import os

import numpy as np
import cv2
from PIL import Image, ImageEnhance, ImageFilter
import matplotlib.pyplot as plt
import networkx as nx
import aiohttp
import torch

# Transformers for image embeddings and description generation
from transformers import CLIPProcessor, CLIPModel, AutoProcessor, AutoModel

# LLM orchestration with Groq and LangChain
from groq import Groq
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Multi-agent workflow using LangGraph
from langgraph.graph import StateGraph, END

# Patch asyncio for environments with an existing event loop
nest_asyncio.apply()

In [6]:
#############################################
# 1. API KEYS & MODEL INITIALIZATION
#############################################

GROQ_API_KEY = getpass.getpass("Enter your GROQ API Key: ")
GOOGLE_CSE_ID = getpass.getpass("Enter your Google CSE ID: ")
GOOGLE_API_KEY = getpass.getpass("Enter your Google API Key: ")

Enter your GROQ API Key: ··········
Enter your Google CSE ID: ··········
Enter your Google API Key: ··········


In [7]:
print("Loading models...")

# Initialize CLIP model for image embeddings
clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Initialize BLIP-2 model for image semantic understanding and description generation
blip_processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
blip_model = AutoModel.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)

# Initialize DINOv2 model for robust feature extraction
dinov2_model = AutoModel.from_pretrained("facebook/dinov2-base")
dinov2_processor = AutoProcessor.from_pretrained("facebook/dinov2-base")

device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model = clip_model.to(device)
blip_model = blip_model.to(device)
dinov2_model = dinov2_model.to(device)

print(f"Models loaded on {device}")

# Initialize Groq client and LLM for forensic analysis
client = Groq(api_key=GROQ_API_KEY)
llama_llm = client.chat.completions.create(
    messages=[{"role": "user", "content": "Initialize the forensic assistant."}],
    model="llama-3.3-70b-versatile",
    temperature=0.1
)

Loading models...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/4.52k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/905 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/961k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/68.0 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/432 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/882 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.56M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/548 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.03k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/122k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/10.0G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/548 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/436 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Models loaded on cuda


In [8]:
#############################################
# 2. IMAGE PROCESSING FUNCTIONS
#############################################
async def fetch_image_async(image_url: str) -> bytes:
    """Asynchronously fetch an image from a URL."""
    async with aiohttp.ClientSession() as session:
        async with session.get(image_url) as response:
            if response.status != 200:
                raise ValueError(f"Failed to fetch image. HTTP status code: {response.status}")
            return await response.read()

async def preprocess_image_async(image_url_or_bytes: str or bytes) -> Image.Image:
    """
    Download and preprocess the image asynchronously.
    Enhances contrast, sharpens, and returns a cleaned-up RGB PIL image.
    """
    if isinstance(image_url_or_bytes, str):
        image_bytes = await fetch_image_async(image_url_or_bytes)
    else:
        image_bytes = image_url_or_bytes

    image = Image.open(BytesIO(image_bytes)).convert("RGB")
    # Enhance contrast and sharpen the image
    enhancer = ImageEnhance.Contrast(image)
    image = enhancer.enhance(1.5)
    image = image.filter(ImageFilter.SHARPEN)
    # Resize image if any dimension exceeds 1024 pixels
    if max(image.size) > 1024:
        image.thumbnail((1024, 1024), Image.LANCZOS)
    return image

def detect_faces(image: Image.Image) -> list:
    """
    Detect faces in the image using Haar Cascade from OpenCV.
    Returns a list of cropped face images as PIL Image objects.
    """
    img_cv = np.array(image)
    img_cv = img_cv[:, :, ::-1].copy()  # Convert from RGB to BGR format
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)
    face_images = []
    for (x, y, w, h) in faces:
        face_img = image.crop((x, y, x+w, y+h))
        face_img = face_img.resize((224, 224), Image.LANCZOS)
        face_images.append(face_img)
    return face_images

def save_faces(faces: list, output_dir="faces_output"):
    """
    Save detected face images to the output directory.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    for idx, face in enumerate(faces):
        face.save(os.path.join(output_dir, f"face_{idx+1}.jpg"))
    print(f"Saved {len(faces)} face image(s) in '{output_dir}'.")

In [9]:
#############################################
# 3. EMBEDDING & DESCRIPTION FUNCTIONS
#############################################
async def generate_clip_embedding(image: Image.Image) -> np.ndarray:
    """Generate an image embedding using the CLIP model."""
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = clip_model.get_image_features(**inputs)
    embedding = outputs.cpu().numpy()
    norm = np.linalg.norm(embedding)
    return embedding / norm if norm > 0 else embedding

async def generate_blip_embedding(image: Image.Image) -> np.ndarray:
    """Generate an image embedding using the BLIP-2 model for semantic representation."""
    inputs = blip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = blip_model.get_image_features(**inputs)
    embedding = outputs.cpu().numpy()
    norm = np.linalg.norm(embedding)
    return embedding / norm if norm > 0 else embedding

async def generate_dinov2_embedding(image: Image.Image) -> np.ndarray:
    """Generate an image embedding using the DINOv2 model (CLS token)."""
    inputs = dinov2_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = dinov2_model(**inputs).last_hidden_state[:, 0]
    embedding = outputs.cpu().numpy()
    norm = np.linalg.norm(embedding)
    return embedding / norm if norm > 0 else embedding

async def generate_image_description(image: Image.Image) -> str:
    """
    Generate a detailed description of the image using BLIP-2.
    The description focuses on identifiable people, objects, and locations.
    """
    prompt = "Describe this image in detail with focus on identifiable people, objects, and locations:"
    inputs = blip_processor(images=image, text=prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = blip_model.generate(**inputs, max_new_tokens=100)
    description = blip_processor.batch_decode(outputs, skip_special_tokens=True)[0]
    return description.strip()

async def extract_classical_features(image: Image.Image) -> np.ndarray:
    """
    Extract classical image features using ORB and optionally SIFT.
    Returns the set of descriptors chosen based on the number of keypoints.
    """
    image_np = np.array(image)
    gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
    orb = cv2.ORB_create(nfeatures=1000)
    keypoints_orb, descriptors_orb = orb.detectAndCompute(gray, None)
    try:
        sift = cv2.SIFT_create()
        keypoints_sift, descriptors_sift = sift.detectAndCompute(gray, None)
    except Exception:
        descriptors_sift = np.array([])
    if descriptors_orb is None and len(descriptors_sift) == 0:
        return np.array([])
    elif descriptors_orb is None:
        return descriptors_sift
    elif len(descriptors_sift) == 0:
        return descriptors_orb
    else:
        return descriptors_sift if len(keypoints_sift) > len(keypoints_orb) else descriptors_orb

def compute_cosine_similarity(embedding1: np.ndarray, embedding2: np.ndarray) -> float:
    """
    Compute cosine similarity between two image embeddings.
    Returns a float between -1 and 1 (1 indicates identical embeddings).
    """
    dot_product = np.dot(embedding1.flatten(), embedding2.flatten())
    norm1 = np.linalg.norm(embedding1)
    norm2 = np.linalg.norm(embedding2)
    if norm1 == 0 or norm2 == 0:
        return 0.0
    return float(dot_product / (norm1 * norm2))

async def generate_consensus_embedding(clip_emb, blip_emb, dinov2_emb) -> np.ndarray:
    """
    Generate a consensus embedding by averaging the CLIP, BLIP, and DINOv2 embeddings.
    """
    embeddings = [clip_emb, blip_emb, dinov2_emb]
    consensus = np.mean(np.array(embeddings), axis=0)
    norm = np.linalg.norm(consensus)
    return consensus / norm if norm > 0 else consensus

In [10]:
#############################################
# 4. SEARCH ENGINE FUNCTIONS (SIMULATED)
#############################################
async def search_private_db(embedding: np.ndarray, description: str) -> list:
    """
    Simulate a search in a private database using image embeddings.
    Returns a list of matching entries with metadata and an embedding placeholder.
    """
    await asyncio.sleep(0.5)
    return [
        {"source": "Private DB", "match": "Person_123", "score": 0.91,
         "metadata": {"date": "2023-10-15"}, "embedding": embedding},
        {"source": "Private DB", "match": "Person_456", "score": 0.85,
         "metadata": {"date": "2023-09-22"}, "embedding": embedding}
    ]

async def search_twitter(embedding: np.ndarray, description: str) -> list:
    """
    Simulate a Twitter search using keywords from the image description.
    Returns matching tweet details with an embedding placeholder.
    """
    await asyncio.sleep(0.7)
    keywords = description.split()[:5]
    return [
        {"source": "Twitter", "match": "Tweet_Image_456", "score": 0.87,
         "metadata": {"username": "@user123", "posted": "2023-11-01", "keywords": keywords},
         "embedding": embedding}
    ]

async def search_reddit(embedding: np.ndarray, description: str) -> list:
    """Simulate a Reddit search and return matching posts with an embedding placeholder."""
    await asyncio.sleep(0.6)
    return [
        {"source": "Reddit", "match": "Reddit_Post_321", "score": 0.89,
         "metadata": {"subreddit": "r/pics", "posted": "2023-10-25"},
         "embedding": embedding}
    ]

async def search_instagram(embedding: np.ndarray, description: str) -> list:
    """Simulate an Instagram search and return matching posts with an embedding placeholder."""
    await asyncio.sleep(0.8)
    return [
        {"source": "Instagram", "match": "Insta_Post_654", "score": 0.88,
         "metadata": {"username": "user456", "posted": "2023-11-12", "location": "New York"},
         "embedding": embedding}
    ]

async def search_osint_sources(embedding: np.ndarray, description: str) -> list:
    """Simulate an OSINT search across multiple sources, returning matching posts with embedding placeholders."""
    await asyncio.sleep(1.0)
    return [
        {"source": "OSINT", "match": "DarkWeb_Post_999", "score": 0.83,
         "metadata": {"forum": "anonymous_forum", "date": "2023-09-10"},
         "embedding": embedding},
        {"source": "OSINT", "match": "Telegram_Group_123", "score": 0.79,
         "metadata": {"group": "public_channel_xyz", "date": "2023-10-30"},
         "embedding": embedding}
    ]

async def search_tineye(image: Image.Image) -> list:
    """Simulate a TinEye reverse image search and return results with a None embedding."""
    img_byte_arr = BytesIO()
    image.save(img_byte_arr, format='JPEG')
    img_byte_arr.seek(0)
    await asyncio.sleep(1.2)
    return [
        {"source": "TinEye", "match": "Website_ABC", "score": 0.92,
         "metadata": {"domain": "example.com", "first_crawled": "2023-08-15"},
         "embedding": None}
    ]

async def search_google_images(image: Image.Image, description: str) -> list:
    """Simulate a Google Images search and return matching results with a None embedding."""
    search_terms = " ".join(description.split()[:7])
    await asyncio.sleep(1.0)
    return [
        {"source": "Google Images", "match": "News_Site_XYZ", "score": 0.86,
         "metadata": {"url": "https://example-news.com/article123", "title": "Example article related to the image"},
         "embedding": None}
    ]

async def merge_search_results(*results: list) -> list:
    """Merge search results from all sources and sort them by their score (descending)."""
    merged = []
    for result_list in results:
        merged.extend(result_list)
    return sorted(merged, key=lambda x: x.get('score', 0), reverse=True)

In [11]:
#############################################
# 5. ANALYSIS & THREAT ASSESSMENT FUNCTIONS
#############################################
async def analyze_results_with_llm(results: list, image_description: str) -> str:
    """
    Analyze the reverse image search results and computed similarity scores using an LLM.
    Generates a structured forensic assessment report.
    """
    prompt_template = """
You are a forensic analyst. Analyze the following reverse image search results.

IMAGE DESCRIPTION:
{image_description}

SEARCH RESULTS (in JSON):
{search_results}

Provide a structured forensic assessment that includes:
1. Cross-referencing of entities.
2. Temporal and geographic correlations.
3. Evaluation of source reliability.
4. Insights on image similarity between the current image and search results.
5. Overall forensic insights about the image origin.

Format your response as a clear multi-section report.
    """
    prompt = PromptTemplate(
        input_variables=["image_description", "search_results"],
        template=prompt_template
    )
    chain = LLMChain(llm=llama_llm, prompt=prompt)
    response = await chain.arun(
        image_description=image_description,
        search_results=json.dumps(results, indent=2)
    )
    return response.strip()

async def threat_assessment(analysis: str) -> dict:
    """
    Assess potential threats or forensic concerns based on the analysis.
    Returns a JSON object containing threat level, categories, reasoning, and recommended actions.
    """
    prompt_template = """
Based on the following forensic image analysis, assess potential threats or concerns.

{analysis}

Return a JSON object with the keys:
- "threat_level": integer (0-10)
- "categories": list of strings (e.g., ["identity_theft", "privacy_breach"])
- "reasoning": a brief explanation
- "recommended_actions": a list of recommended actions

Return valid JSON only.
    """
    prompt = PromptTemplate(
        input_variables=["analysis"],
        template=prompt_template
    )
    chain = LLMChain(llm=llama_llm, prompt=prompt)
    response = await chain.arun(analysis=analysis)
    try:
        return json.loads(response.strip())
    except json.JSONDecodeError:
        return {
            "threat_level": 5,
            "categories": ["unknown"],
            "reasoning": "LLM response parsing failed. Check the forensic analysis manually.",
            "recommended_actions": ["Review analysis manually."]
        }

async def graph_link_analysis(results: list) -> plt.Figure:
    """
    Create a network graph visualization of the search results and similarity scores.
    Nodes represent sources, matches, and metadata.
    """
    G = nx.Graph()
    for result in results:
        source = result['source']
        match = result['match']
        score = result['score']
        G.add_node(source, type='source')
        G.add_node(match, type='match')
        G.add_edge(source, match, weight=score)
        if 'metadata' in result:
            for key, value in result['metadata'].items():
                # Only add metadata if value is a string to ensure hashability.
                if isinstance(value, str):
                    meta_node = f"{key}:{value}"
                    G.add_node(meta_node, type='metadata')
                    G.add_edge(match, meta_node, weight=1.0)
    pos = nx.spring_layout(G, seed=42)
    node_colors = []
    for node in G.nodes():
        node_type = G.nodes[node].get('type', 'unknown')
        if node_type == 'source':
            node_colors.append('lightblue')
        elif node_type == 'match':
            node_colors.append('lightgreen')
        else:
            node_colors.append('lightgray')
    plt.figure(figsize=(12, 8))
    nx.draw(G, pos, with_labels=True, node_color=node_colors,
            edge_color='gray', font_size=8, node_size=800, alpha=0.8)
    edge_labels = {(u, v): f"{d['weight']:.2f}" for u, v, d in G.edges(data=True) if 'weight' in d}
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=7)
    plt.title("Graph & Link Analysis of Search Results")
    return plt.gcf()

In [12]:
#############################################
# 6. MULTI-AGENT WORKFLOW (LANGGRAPH)
#############################################
async def build_search_system(image_url: str) -> dict:
    """
    Execute an end-to-end multi-agent workflow:
    1. Preprocess the image (download, clean, face detection, description generation).
    2. Generate image embeddings using CLIP, BLIP-2, DINOv2 and extract classical features.
    3. Compute a consensus embedding.
    4. Simulate reverse image searches across multiple sources.
    5. Compute image similarity between the current image and the search result embeddings.
    6. Analyze results using an LLM for forensic insights and threat assessment.
    7. Save detected faces locally.
    8. Generate a final forensic report.
    """
    state_schema = {
        "image": None,
        "image_description": "",
        "faces": [],
        "clip_embedding": None,
        "blip_embedding": None,
        "dinov2_embedding": None,
        "consensus_embedding": None,
        "classical_features": None,
        "search_results": {},
        "merged_results": [],
        "analysis": "",
        "threat_assessment": {},
        "graph": None,
        "final_report": ""
    }

    workflow = StateGraph(state_schema)

    # Node 1: Preprocessing
    async def preprocess_node(state):
        print("Preprocessing image...")
        image = await preprocess_image_async(image_url)
        faces = detect_faces(image)
        description = await generate_image_description(image)
        print("Image Description (first 100 chars):", description[:100])
        print(f"Detected {len(faces)} face(s)")
        return {**state, "image": image,
                "faces": faces,
                "image_description": description}

    # Node 2: Embedding Generation
    async def embedding_node(state):
        print("Generating image embeddings...")
        image = state["image"]
        clip_emb, blip_emb, dinov2_emb, classical_features = await asyncio.gather(
            generate_clip_embedding(image),
            generate_blip_embedding(image),
            generate_dinov2_embedding(image),
            extract_classical_features(image)
        )
        return {**state,
                "clip_embedding": clip_emb,
                "blip_embedding": blip_emb,
                "dinov2_embedding": dinov2_emb,
                "classical_features": classical_features}

    # Node 3: Compute Consensus Embedding
    async def consensus_node(state):
        print("Computing consensus embedding from multiple models...")
        consensus_emb = await generate_consensus_embedding(state["clip_embedding"],
                                                           state["blip_embedding"],
                                                           state["dinov2_embedding"])
        return {**state, "consensus_embedding": consensus_emb}

    # Node 4: Search Engine and Similarity Computation
    async def search_node(state):
        print("Searching across multiple sources...")
        consensus_emb = state["consensus_embedding"]
        description = state["image_description"]
        image = state["image"]
        private_results, twitter_results, reddit_results, instagram_results, osint_results, tineye_results, google_results = await asyncio.gather(
            search_private_db(consensus_emb, description),
            search_twitter(consensus_emb, description),
            search_reddit(consensus_emb, description),
            search_instagram(consensus_emb, description),
            search_osint_sources(consensus_emb, description),
            search_tineye(image),
            search_google_images(image, description)
        )
        search_results = {
            "private_db": private_results,
            "twitter": twitter_results,
            "reddit": reddit_results,
            "instagram": instagram_results,
            "osint": osint_results,
            "tineye": tineye_results,
            "google": google_results
        }
        merged = await merge_search_results(
            private_results, twitter_results, reddit_results, instagram_results,
            osint_results, tineye_results, google_results
        )
        print(f"Found {len(merged)} merged result(s)")
        # Compute similarity for results that have an embedding (if available)
        for result in merged:
            if result.get("embedding") is not None:
                similarity = compute_cosine_similarity(consensus_emb, result["embedding"])
                result["similarity"] = similarity
            else:
                result["similarity"] = None
        return {**state,
                "search_results": search_results,
                "merged_results": merged}

    # Node 5: Analysis and Threat Assessment
    async def analysis_node(state):
        print("Analyzing results with LLM and assessing threat...")
        analysis = await analyze_results_with_llm(state["merged_results"], state["image_description"])
        threat_info = await threat_assessment(analysis)
        graph_fig = await graph_link_analysis(state["merged_results"])
        return {**state,
                "analysis": analysis,
                "threat_assessment": threat_info,
                "graph": graph_fig}

    # Node 6: Face Analysis (New Feature: save faces locally)
    async def face_analysis_node(state):
        print("Performing face analysis (saving detected faces)...")
        faces = state.get("faces", [])
        if faces:
            save_faces(faces)
        else:
            print("No faces to save.")
        return state

    # Node 7: Report Generation
    async def report_node(state):
        print("Generating final forensic report...")
        # Create a simplified state for the final report to avoid unhashable/non-serializable types.
        clean_state = {
            "image_description": state.get("image_description", ""),
            "num_faces": len(state.get("faces", [])),
            "num_merged_results": len(state.get("merged_results", [])),
            "analysis": state.get("analysis", ""),
            "threat_assessment": state.get("threat_assessment", {}),
            "top_results": state.get("merged_results", [])[:5]
        }
        report = f"""
==================== FORENSIC REPORT ====================

Image Description:
{clean_state["image_description"]}

Key Findings:
- Total Merged Matches: {clean_state["num_merged_results"]}
- Faces Detected: {clean_state["num_faces"]}
- Threat Level: {clean_state["threat_assessment"].get("threat_level", "Unknown")}/10

Detailed Analysis:
{clean_state["analysis"]}

Threat Assessment:
- Level: {clean_state["threat_assessment"].get("threat_level", "Unknown")}/10
- Categories: {', '.join(clean_state["threat_assessment"].get("categories", ["Unknown"]))}
- Reasoning: {clean_state["threat_assessment"].get("reasoning", "Not available")}
- Recommended Actions: {', '.join(clean_state["threat_assessment"].get("recommended_actions", ["None specified"]))}

Top 5 Search Results with Similarity Scores:
{json.dumps(clean_state["top_results"], indent=2)}

===========================================================
        """
        return {**state, "final_report": report}

    # Build workflow graph
    workflow.add_node("preprocess", preprocess_node)
    workflow.add_node("embed", embedding_node)
    workflow.add_node("consensus", consensus_node)
    workflow.add_node("search", search_node)
    workflow.add_node("analyze", analysis_node)
    workflow.add_node("face_analysis", face_analysis_node)
    workflow.add_node("report", report_node)

    workflow.add_edge("preprocess", "embed")
    workflow.add_edge("embed", "consensus")
    workflow.add_edge("consensus", "search")
    workflow.add_edge("search", "analyze")
    workflow.add_edge("analyze", "face_analysis")
    workflow.add_edge("face_analysis", "report")
    workflow.add_edge("report", END)

    workflow.set_entry_point("preprocess")

    print("Starting multi-agent reverse image search workflow...")
    result = await workflow.ainvoke({})
    print("Workflow completed!")
    return result

In [13]:
#############################################
# 7. MAIN FUNCTION
#############################################
async def main(image_url: str) -> dict:
    """
    Main function to run the reverse image similarity and forensic analysis system.
    """
    try:
        start_time = time.time()
        print(f"Processing image from URL: {image_url}")
        result = await build_search_system(image_url)
        elapsed = time.time() - start_time
        print(f"Total Processing Time: {elapsed:.2f} seconds")
        print(result["final_report"])
        # To display the generated graph, uncomment the following line:
        # plt.show()
        return result
    except Exception as e:
        print("Error during processing:", str(e))
        return {"error": str(e)}

if __name__ == "__main__":
    # Replace with a valid image URL for testing in production
    test_image_url = "https://example.com/test_image.jpg"
    asyncio.run(main(test_image_url))

Processing image from URL: https://example.com/test_image.jpg
Error during processing: unhashable type: 'dict'
