<a href="https://colab.research.google.com/github/KaifAhmad1/code-test/blob/main/Face_Similarity_Matching.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Reverse Image Search System for Defensive Forensics**

In [1]:
!pip install -q torch transformers langchain langgraph numpy pillow requests vllm aiohttp opencv-python-headless networkx matplotlib nest_asyncio groq

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import asyncio
import time
import json
import copy
import nest_asyncio
import getpass
from io import BytesIO
import os
import hashlib
from datetime import datetime
from typing import Dict, List, Tuple, Union, Any, Optional

import numpy as np
import cv2
from PIL import Image, ImageEnhance, ImageFilter, ExifTags
import matplotlib.pyplot as plt
import networkx as nx
import aiohttp
import torch
import requests

# Transformers for image embeddings and description generation
from transformers import CLIPProcessor, CLIPModel, AutoProcessor, AutoModel

# LLM orchestration with Groq and LangChain
from groq import Groq
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Multi-agent workflow using LangGraph
from langgraph.graph import StateGraph, END

# Optional OCR feature
try:
    import pytesseract
    OCR_AVAILABLE = True
except ImportError:
    pytesseract = None
    OCR_AVAILABLE = False

# Patch asyncio for environments with an existing event loop
nest_asyncio.apply()

In [3]:
#############################################
# 1. API KEYS & MODEL INITIALIZATION
#############################################
def get_api_keys():
    """Get API keys from environment variables or prompt user for input."""
    groq_api_key = os.environ.get("GROQ_API_KEY") or getpass.getpass("Enter your GROQ API Key: ")
    google_cse_id = os.environ.get("GOOGLE_CSE_ID") or getpass.getpass("Enter your Google CSE ID: ")
    google_api_key = os.environ.get("GOOGLE_API_KEY") or getpass.getpass("Enter your Google API Key: ")
    return groq_api_key, google_cse_id, google_api_key

def load_models(device=None):
    """Load and initialize all required models."""
    print("Loading models...")
    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"

    # Initialize CLIP model for image embeddings
    clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
    clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

    # Initialize BLIP-2 model for image semantic understanding and description generation
    blip_processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
    blip_model = AutoModel.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)

    # Initialize DINOv2 model for robust feature extraction
    dinov2_model = AutoModel.from_pretrained("facebook/dinov2-base")
    dinov2_processor = AutoProcessor.from_pretrained("facebook/dinov2-base")

    # Move models to device
    clip_model = clip_model.to(device)
    blip_model = blip_model.to(device)
    dinov2_model = dinov2_model.to(device)

    print(f"Models loaded on {device}")
    return {
        "clip_model": clip_model,
        "clip_processor": clip_processor,
        "blip_model": blip_model,
        "blip_processor": blip_processor,
        "dinov2_model": dinov2_model,
        "dinov2_processor": dinov2_processor,
        "device": device
    }

def initialize_llm(api_key):
    """Initialize the LLM client."""
    client = Groq(api_key=api_key)
    return client

In [4]:
#############################################
# 2. IMAGE PROCESSING FUNCTIONS
#############################################
async def fetch_image_async(image_url: str) -> bytes:
    """Asynchronously fetch an image from a URL."""
    async with aiohttp.ClientSession() as session:
        async with session.get(image_url) as response:
            if response.status != 200:
                raise ValueError(f"Failed to fetch image. HTTP status code: {response.status}")
            return await response.read()

def load_local_image(image_path: str) -> bytes:
    """Load an image from a local file path."""
    with open(image_path, 'rb') as f:
        return f.read()

async def preprocess_image_async(image_source: Union[str, bytes], enhance: bool = True) -> Image.Image:
    """
    Download and preprocess the image asynchronously.
    Enhances contrast, sharpens, and returns a cleaned-up RGB PIL image.

    Args:
        image_source: URL string, local path, or bytes of the image
        enhance: Whether to enhance the image or just load it

    Returns:
        Processed PIL Image
    """
    # Determine if the source is a URL, local path, or bytes
    if isinstance(image_source, str):
        if image_source.startswith(('http://', 'https://')):
            image_bytes = await fetch_image_async(image_source)
        else:
            # Assume it's a local file path
            image_bytes = load_local_image(image_source)
    else:
        image_bytes = image_source

    image = Image.open(BytesIO(image_bytes)).convert("RGB")

    if enhance:
        # Enhance contrast and sharpen the image
        enhancer = ImageEnhance.Contrast(image)
        image = enhancer.enhance(1.5)
        image = image.filter(ImageFilter.SHARPEN)

    # Resize image if any dimension exceeds 1024 pixels
    if max(image.size) > 1024:
        image.thumbnail((1024, 1024), Image.LANCZOS)

    return image

def detect_faces(image: Image.Image, cascade_file=None) -> list:
    """
    Detect faces in the image using Haar Cascade from OpenCV.
    Returns a list of cropped face images as PIL Image objects.

    Args:
        image: PIL Image
        cascade_file: Optional custom cascade file path

    Returns:
        List of detected face images
    """
    img_cv = np.array(image)
    img_cv = img_cv[:, :, ::-1].copy()  # Convert from RGB to BGR format

    if cascade_file and os.path.exists(cascade_file):
        face_cascade = cv2.CascadeClassifier(cascade_file)
    else:
        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)

    # Try different parameters for better face detection
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)

    # If no faces found, try more aggressive parameters
    if len(faces) == 0:
        faces = face_cascade.detectMultiScale(gray, 1.05, 3)

    face_images = []
    for (x, y, w, h) in faces:
        face_img = image.crop((x, y, x+w, y+h))
        face_img = face_img.resize((224, 224), Image.LANCZOS)
        face_images.append({
            "image": face_img,
            "coords": (x, y, w, h)
        })

    return face_images

def extract_exif_data(image: Image.Image) -> dict:
    """
    Extract EXIF metadata from an image.

    Args:
        image: PIL Image

    Returns:
        Dictionary with EXIF data
    """
    exif_data = {}
    try:
        # Get EXIF data if available
        exif = image._getexif()
        if exif:
            for tag_id, value in exif.items():
                tag = ExifTags.TAGS.get(tag_id, tag_id)
                # Skip binary data
                if isinstance(value, bytes) or tag == 'MakerNote':
                    exif_data[tag] = "Binary data"
                else:
                    exif_data[tag] = value
    except (AttributeError, KeyError, IndexError):
        pass

    return exif_data

def perform_ocr(image: Image.Image) -> str:
    """
    Perform OCR on the image if pytesseract is available.

    Args:
        image: PIL Image

    Returns:
        Extracted text or empty string
    """
    if not OCR_AVAILABLE:
        return "OCR not available. Install pytesseract to enable this feature."

    try:
        # Preprocess for better OCR
        ocr_img = image.copy()
        enhancer = ImageEnhance.Contrast(ocr_img)
        ocr_img = enhancer.enhance(2.0)

        # Convert to grayscale for better OCR
        ocr_img = ocr_img.convert('L')

        # Run OCR
        text = pytesseract.image_to_string(ocr_img)
        return text.strip()
    except Exception as e:
        return f"OCR error: {str(e)}"

def save_faces(faces: list, output_dir="faces_output"):
    """
    Save detected face images to the output directory.

    Args:
        faces: List of face images
        output_dir: Directory to save faces
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    for idx, face_data in enumerate(faces):
        face = face_data["image"]
        face_file = os.path.join(output_dir, f"face_{timestamp}_{idx+1}.jpg")
        face.save(face_file)

    print(f"Saved {len(faces)} face image(s) in '{output_dir}'.")

def image_hash(image: Image.Image, hash_size=8) -> str:
    """
    Compute perceptual hash for the image for deduplication.

    Args:
        image: PIL Image
        hash_size: Size of the hash

    Returns:
        Hex string of the hash
    """
    # Resize and convert to grayscale
    img = image.resize((hash_size, hash_size), Image.LANCZOS).convert('L')
    pixels = list(img.getdata())

    # Calculate average pixel value
    avg = sum(pixels) / len(pixels)

    # Create hash: 1 if pixel > avg, 0 otherwise
    bits = ''.join('1' if pixel > avg else '0' for pixel in pixels)

    # Convert to hexadecimal
    hex_hash = hex(int(bits, 2))[2:].zfill(hash_size**2 // 4)
    return hex_hash

def detect_image_manipulation(image: Image.Image) -> dict:
    """
    Detect potential image manipulation using ELA (Error Level Analysis).

    Args:
        image: PIL Image

    Returns:
        Dictionary with manipulation assessment
    """
    # Save image with a specific quality level
    temp_file = BytesIO()
    image.save(temp_file, format='JPEG', quality=90)
    temp_file.seek(0)
    saved_image = Image.open(temp_file).convert('RGB')

    # Calculate the difference
    ela_image = ImageChops.difference(image, saved_image)
    extrema = ela_image.getextrema()

    # Determine if manipulation is likely
    max_diff = max([ex[1] for ex in extrema])

    return {
        "max_diff": max_diff,
        "manipulation_score": min(max_diff / 40.0, 1.0),  # Scale from 0-1
        "likely_manipulated": max_diff > 20
    }

In [5]:
#############################################
# 3. EMBEDDING & DESCRIPTION FUNCTIONS
#############################################
async def generate_clip_embedding(image: Image.Image, models: dict) -> np.ndarray:
    """Generate an image embedding using the CLIP model."""
    inputs = models["clip_processor"](images=image, return_tensors="pt").to(models["device"])
    with torch.no_grad():
        outputs = models["clip_model"].get_image_features(**inputs)
    embedding = outputs.cpu().numpy()
    norm = np.linalg.norm(embedding)
    return embedding / norm if norm > 0 else embedding

async def generate_blip_embedding(image: Image.Image, models: dict) -> np.ndarray:
    """Generate an image embedding using BLIP-2 model for semantic representation."""
    inputs = models["blip_processor"](images=image, return_tensors="pt").to(models["device"])
    with torch.no_grad():
        outputs = models["blip_model"].get_image_features(**inputs)
    embedding = outputs.cpu().numpy()
    norm = np.linalg.norm(embedding)
    return embedding / norm if norm > 0 else embedding

async def generate_dinov2_embedding(image: Image.Image, models: dict) -> np.ndarray:
    """Generate an image embedding using the DINOv2 model (CLS token)."""
    inputs = models["dinov2_processor"](images=image, return_tensors="pt").to(models["device"])
    with torch.no_grad():
        outputs = models["dinov2_model"](**inputs).last_hidden_state[:, 0]
    embedding = outputs.cpu().numpy()
    norm = np.linalg.norm(embedding)
    return embedding / norm if norm > 0 else embedding

async def generate_image_description(image: Image.Image, models: dict, prompt=None) -> str:
    """
    Generate a detailed description of the image using BLIP-2.
    Focuses on identifiable people, objects, and locations.

    Args:
        image: PIL Image
        models: Dictionary with loaded models
        prompt: Optional custom prompt
    """
    if prompt is None:
        prompt = "Describe this image in detail with focus on identifiable people, objects, and locations:"

    inputs = models["blip_processor"](images=image, text=prompt, return_tensors="pt").to(models["device"])
    with torch.no_grad():
        outputs = models["blip_model"].generate(**inputs, max_new_tokens=100)
    description = models["blip_processor"].batch_decode(outputs, skip_special_tokens=True)[0]
    return description.strip()

async def extract_classical_features(image: Image.Image) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
    """
    Extract classical image features using ORB and optionally SIFT.
    Returns descriptors selected based on the number of keypoints.
    """
    image_np = np.array(image)
    gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)

    # Extract ORB features
    orb = cv2.ORB_create(nfeatures=1000)
    keypoints_orb, descriptors_orb = orb.detectAndCompute(gray, None)

    # Extract SIFT features if available
    descriptors_sift = None
    keypoints_sift = None
    try:
        sift = cv2.SIFT_create()
        keypoints_sift, descriptors_sift = sift.detectAndCompute(gray, None)
    except Exception:
        pass

    # Use the better descriptor
    if descriptors_orb is None and descriptors_sift is None:
        return None, None
    elif descriptors_orb is None:
        return keypoints_sift, descriptors_sift
    elif descriptors_sift is None:
        return keypoints_orb, descriptors_orb
    else:
        if len(keypoints_sift) > len(keypoints_orb):
            return keypoints_sift, descriptors_sift
        else:
            return keypoints_orb, descriptors_orb

def get_histogram_features(image: Image.Image) -> dict:
    """
    Extract color histogram features from the image.

    Args:
        image: PIL Image

    Returns:
        Dictionary with histogram features
    """
    # Convert to NumPy array
    img_np = np.array(image)

    # Calculate histograms for each channel
    hist_r = cv2.calcHist([img_np], [0], None, [256], [0, 256])
    hist_g = cv2.calcHist([img_np], [1], None, [256], [0, 256])
    hist_b = cv2.calcHist([img_np], [2], None, [256], [0, 256])

    # Normalize histograms
    hist_r = cv2.normalize(hist_r, hist_r).flatten()
    hist_g = cv2.normalize(hist_g, hist_g).flatten()
    hist_b = cv2.normalize(hist_b, hist_b).flatten()

    return {
        "histogram_r": hist_r.tolist(),
        "histogram_g": hist_g.tolist(),
        "histogram_b": hist_b.tolist()
    }

def compute_cosine_similarity(embedding1: np.ndarray, embedding2: np.ndarray) -> float:
    """
    Compute cosine similarity between two image embeddings.
    Returns a float between -1 and 1 (1 indicates identical embeddings).
    """
    dot_product = np.dot(embedding1.flatten(), embedding2.flatten())
    norm1 = np.linalg.norm(embedding1)
    norm2 = np.linalg.norm(embedding2)
    if norm1 == 0 or norm2 == 0:
        return 0.0
    return float(dot_product / (norm1 * norm2))

async def generate_consensus_embedding(clip_emb, blip_emb, dinov2_emb) -> np.ndarray:
    """
    Generate a consensus embedding by averaging the CLIP, BLIP-2, and DINOv2 embeddings.
    Weights the embeddings based on their typical performance.
    """
    # Weight the embeddings (these weights can be adjusted)
    weights = np.array([0.4, 0.3, 0.3])  # CLIP, BLIP, DINOv2

    # Ensure all embeddings are flattened and normalized
    embeddings = [
        clip_emb.flatten() / np.linalg.norm(clip_emb),
        blip_emb.flatten() / np.linalg.norm(blip_emb),
        dinov2_emb.flatten() / np.linalg.norm(dinov2_emb)
    ]

    # Check if dimensions match, otherwise resize
    min_dim = min(e.shape[0] for e in embeddings)
    resized_embeddings = [e[:min_dim] for e in embeddings]

    # Weighted average
    consensus = np.average(np.array(resized_embeddings), axis=0, weights=weights)

    # Normalize
    norm = np.linalg.norm(consensus)
    return consensus / norm if norm > 0 else consensus

In [6]:
#############################################
# 4. SEARCH ENGINE FUNCTIONS
#############################################
async def search_private_db(embedding: np.ndarray, description: str) -> list:
    """
    Search in a private database using image embeddings.

    Args:
        embedding: Image embedding
        description: Image description

    Returns:
        List of matching results
    """
    # For production, replace this with an actual database search
    await asyncio.sleep(0.5)
    return [
        {
            "source": "Private DB",
            "match": "Person_123",
            "score": 0.91,
            "metadata": {"date": "2023-10-15"},
            "embedding": embedding.tolist()
        },
        {
            "source": "Private DB",
            "match": "Person_456",
            "score": 0.85,
            "metadata": {"date": "2023-09-22"},
            "embedding": embedding.tolist()
        }
    ]

async def search_twitter(embedding: np.ndarray, description: str) -> list:
    """
    Search Twitter using keywords from the image description.

    Args:
        embedding: Image embedding
        description: Image description

    Returns:
        List of matching results
    """
    # For production, replace with actual Twitter API calls
    await asyncio.sleep(0.7)
    keywords = description.split()[:5]
    return [
        {
            "source": "Twitter",
            "match": "Tweet_Image_456",
            "score": 0.87,
            "metadata": {
                "username": "@user123",
                "posted": "2023-11-01",
                "keywords": keywords
            },
            "embedding": embedding.tolist()
        }
    ]

async def search_reddit(embedding: np.ndarray, description: str) -> list:
    """
    Search Reddit for matching posts.

    Args:
        embedding: Image embedding
        description: Image description

    Returns:
        List of matching results
    """
    # For production, replace with actual Reddit API calls
    await asyncio.sleep(0.6)
    return [
        {
            "source": "Reddit",
            "match": "Reddit_Post_321",
            "score": 0.89,
            "metadata": {"subreddit": "r/pics", "posted": "2023-10-25"},
            "embedding": embedding.tolist()
        }
    ]

async def search_instagram(embedding: np.ndarray, description: str) -> list:
    """
    Search Instagram for matching posts.

    Args:
        embedding: Image embedding
        description: Image description

    Returns:
        List of matching results
    """
    # For production, replace with actual Instagram API calls
    await asyncio.sleep(0.8)
    return [
        {
            "source": "Instagram",
            "match": "Insta_Post_654",
            "score": 0.88,
            "metadata": {"username": "user456", "posted": "2023-11-12", "location": "New York"},
            "embedding": embedding.tolist()
        }
    ]

async def search_osint_sources(embedding: np.ndarray, description: str) -> list:
    """
    Search OSINT sources for matching content.

    Args:
        embedding: Image embedding
        description: Image description

    Returns:
        List of matching results
    """
    # For production, implement real OSINT APIs
    await asyncio.sleep(1.0)
    return [
        {
            "source": "OSINT",
            "match": "DarkWeb_Post_999",
            "score": 0.83,
            "metadata": {"forum": "anonymous_forum", "date": "2023-09-10"},
            "embedding": embedding.tolist()
        },
        {
            "source": "OSINT",
            "match": "Telegram_Group_123",
            "score": 0.79,
            "metadata": {"group": "public_channel_xyz", "date": "2023-10-30"},
            "embedding": embedding.tolist()
        }
    ]

async def search_tineye(image: Image.Image) -> list:
    """
    Perform a TinEye reverse image search.

    Args:
        image: PIL Image

    Returns:
        List of matching results
    """
    # For production, implement actual TinEye API
    img_byte_arr = BytesIO()
    image.save(img_byte_arr, format='JPEG')
    img_byte_arr.seek(0)
    await asyncio.sleep(1.2)
    return [
        {
            "source": "TinEye",
            "match": "Website_ABC",
            "score": 0.92,
            "metadata": {"domain": "example.com", "first_crawled": "2023-08-15"},
            "embedding": None
        }
    ]

async def search_google_images(image: Image.Image, description: str, api_key: str, cse_id: str) -> list:
    """
    Search Google Images using the CSE API.

    Args:
        image: PIL Image
        description: Image description
        api_key: Google API key
        cse_id: Google CSE ID

    Returns:
        List of matching results
    """
    # For production, implement actual Google API calls
    search_terms = " ".join(description.split()[:7])
    await asyncio.sleep(1.0)

    # If API keys are provided, use the Google Custom Search API
    if api_key and cse_id:
        try:
            # Actual API call would go here
            pass
        except Exception as e:
            print(f"Google API error: {str(e)}")

    return [
        {
            "source": "Google Images",
            "match": "News_Site_XYZ",
            "score": 0.86,
            "metadata": {
                "url": "https://example-news.com/article123",
                "title": "Example article related to the image"
            },
            "embedding": None
        }
    ]

async def search_additional_sources(embedding: np.ndarray, description: str) -> list:
    """
    Search additional sources (Facebook, LinkedIn, etc).

    Args:
        embedding: Image embedding
        description: Image description

    Returns:
        List of matching results
    """
    # For production, implement actual API calls
    await asyncio.sleep(0.9)
    return [
        {
            "source": "Facebook",
            "match": "FB_Post_123",
            "score": 0.81,
            "metadata": {"user": "john.doe", "posted": "2023-10-05"},
            "embedding": embedding.tolist()
        },
        {
            "source": "LinkedIn",
            "match": "LinkedIn_Profile_456",
            "score": 0.78,
            "metadata": {"profile": "jane-smith", "updated": "2023-11-10"},
            "embedding": embedding.tolist()
        }
    ]

async def merge_search_results(*results: list) -> list:
    """
    Merge search results from all sources and sort them by score.

    Args:
        *results: Lists of search results

    Returns:
        Merged and sorted list of results
    """
    merged = []

    # Use a set to track unique matches by source and match ID
    seen = set()

    for result_list in results:
        for result in result_list:
            # Create a unique identifier for deduplication
            result_id = (result['source'], result['match'])

            if result_id not in seen:
                seen.add(result_id)
                # Convert embedding to list if it's a NumPy array
                if isinstance(result.get('embedding'), np.ndarray):
                    result['embedding'] = result['embedding'].tolist()
                merged.append(result)

    # Sort by score in descending order
    return sorted(merged, key=lambda x: x.get('score', 0), reverse=True)

In [7]:
#############################################
# 5. ANALYSIS & THREAT ASSESSMENT FUNCTIONS
#############################################
async def analyze_results_with_llm(results: list, image_description: str, client) -> str:
    """
    Analyze the reverse image search results using an LLM.

    Args:
        results: Search results
        image_description: Image description
        client: LLM client

    Returns:
        Forensic analysis report
    """
    # Prepare data for LLM
    clean_results = []
    for result in results:
        # Remove embedding to keep the data smaller
        clean_result = {k: v for k, v in result.items() if k != 'embedding'}
        clean_results.append(clean_result)

    prompt_template = """
You are a forensic analyst. Analyze the following reverse image search results.

IMAGE DESCRIPTION:
{image_description}

SEARCH RESULTS (in JSON):
{search_results}

Provide a structured forensic assessment that includes:
1. Cross-referencing of entities.
2. Temporal and geographic correlations.
3. Evaluation of source reliability.
4. Insights on image similarity between the current image and search results.
5. Overall forensic insights about the image origin.

Format your response as a clear multi-section report.
    """

    # Use LLM to analyze
    response = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a forensic image analyst providing detailed assessments."},
            {"role": "user", "content": prompt_template.format(
                image_description=image_description,
                search_results=json.dumps(clean_results, indent=2)
            )}
        ],
        model="llama-3.3-70b-versatile",
        temperature=0.1
    )

    return response.choices[0].message.content.strip()

async def threat_assessment(analysis: str, client) -> dict:
    """
    Assess potential threats based on the forensic analysis.

    Args:
        analysis: Forensic analysis text
        client: LLM client

    Returns:
        Threat assessment dictionary
    """
    prompt_template = """
Based on the following forensic image analysis, assess potential threats or concerns.

{analysis}

Return a JSON object with the keys:
- "threat_level": integer (0-10)
- "categories": list of strings (e.g., ["identity_theft", "privacy_breach"])
- "reasoning": a brief explanation
- "recommended_actions": a list of recommended actions

Return valid JSON only.
    """

    response = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a threat assessment analyst providing structured evaluations."},
            {"role": "user", "content": prompt_template.format(analysis=analysis)}
        ],
        model="llama-3.3-70b-versatile",
        temperature=0.1
    )

    try:
        # Extract and parse the JSON from the response
        response_text = response.choices[0].message.content.strip()

        # Handle responses that might have text before/after the JSON
        json_start = response_text.find('{')
        json_end = response_text.rfind('}') + 1
        if json_start >= 0 and json_end > json_start:
            json_str = response_text[json_start:json_end]
            return json.loads(json_str)
        else:
            raise ValueError("No JSON found in response")

    except (json.JSONDecodeError, ValueError) as e:
        print(f"Error parsing threat assessment: {str(e)}")
        return {
            "threat_level": 5,
            "categories": ["unknown"],
            "reasoning": "LLM response parsing failed. Check the forensic analysis manually.",
            "recommended_actions": ["Review analysis manually."]
        }

async def graph_link_analysis(results: list) -> plt.Figure:
    """
    Create a network graph visualization of search results.

    Args:
        results: List of search results

    Returns:

SyntaxError: incomplete input (<ipython-input-7-37e4a3877899>, line 114)