In [None]:
!pip install qdrant-client sentence-transformers pillow



In [None]:
from qdrant_client import QdrantClient, models
import torch
import numpy as np
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import matplotlib.pyplot as plt
import requests
from io import BytesIO


In [None]:
import matplotlib.pyplot as plt
import requests
from io import BytesIO
client = QdrantClient(
    url="https://656ff6c0-88ba-4b7c-ab69-0b1f23796f3f.europe-west3-0.gcp.cloud.qdrant.io:6333",
    api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.ID2ng5RvxLCAHYjxpl5Icea3v8mCL3q4TH-_O73LeUU"
)


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")


In [None]:
def embed_text(text):
    inputs = processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        emb = model.get_text_features(**inputs)
    return emb[0].cpu().numpy()


In [None]:
def embed_image(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        emb = model.get_image_features(**inputs)
    return emb[0].cpu().numpy()


In [None]:
def cosine_sim(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def mmr(query_vec, doc_vecs, k=5, lambda_=0.6):
    selected = []
    candidates = list(range(len(doc_vecs)))
    query_sims = [cosine_sim(query_vec, v) for v in doc_vecs]

    while len(selected) < k and candidates:
        scores = []
        for i in candidates:
            diversity = 0 if not selected else max(
                cosine_sim(doc_vecs[i], doc_vecs[j]) for j in selected
            )
            score = lambda_ * query_sims[i] - (1 - lambda_) * diversity
            scores.append((score, i))

        best = max(scores, key=lambda x: x[0])[1]
        selected.append(best)
        candidates.remove(best)

    return selected


In [None]:
def fast_search_mmr(query_vector, limit=5, use_mmr=True):
    results = client.query_points(
        collection_name="tech_products_fast",
        query=query_vector,
        limit=30 if use_mmr else limit,  # High recall for MMR
        with_vectors=use_mmr,  # Only fetch vectors if using MMR
        with_payload=True
    )

    points = results.points

     # 3. Apply MMR for diversity if enabled
    if use_mmr and len(points) > 0:
        doc_vectors = [p.vector for p in points]
        selected_idxs = mmr(query_vector, doc_vectors, k=limit)
        points = [points[i] for i in selected_idxs]
    else:
        points = points[:limit]

    # 4. Format results for display function
    formatted_results = []
    for res in points:
        formatted_results.append({
            "score": f"{res.score:.4f}",
            "title": res.payload.get("title"),
            "price": f"${res.payload.get('price')}",
            "link": res.payload.get("image_online")  # This is the image URL for display
        })

    return formatted_results

**Text Search Speed**

In [None]:
import time
import statistics
import numpy as np
from IPython.display import display, HTML



# CONFIGURATION: Change your colors here
THEME_PRIMARY = "#6c5ce7"  # Main Accent (Purple/Blue)
THEME_BG = "#1e272e"       # Background Color
TEXT_COLOR = "#f5f6fa"     # Main Text
SUCCESS_COLOR = "#00d2d3"  # Metric Success Color

def professional_benchmark_report(search_func, queries, iterations=50):
    report_data = []
    all_latencies = []

    for query_text in queries:
        # 1. PRE-SEARCH: Get embedding (Not counted in search speed)
        query_vec = embed_text(query_text)

        # 2. WARM-UP: Fill caches
        for _ in range(5):
            search_func(query_vec, limit=6)

        # 3. ACTUAL TEST
        query_latencies = []
        for _ in range(iterations):
            start = time.perf_counter()
            search_func(query_vec, limit=6)
            query_latencies.append((time.perf_counter() - start) * 1000)

        all_latencies.extend(query_latencies)

        # 4. STORE RESULTS
        report_data.append({
            "query": query_text,
            "avg": statistics.mean(query_latencies),
            "p95": np.percentile(query_latencies, 95)
        })

    # Global Metrics
    total_avg = statistics.mean(all_latencies)
    total_p95 = np.percentile(all_latencies, 95)
    rps = 1000 / total_avg

    # 5. GENERATE HTML DISPLAY
    html_template = f"""
    <div style="font-family: 'Segoe UI', sans-serif; background: {THEME_BG}; color: {TEXT_COLOR}; padding: 30px; border-radius: 15px; max-width: 900px; box-shadow: 0 10px 30px rgba(0,0,0,0.5);">
        <h2 style="margin-top: 0; color: {THEME_PRIMARY}; border-bottom: 2px solid {THEME_PRIMARY}; padding-bottom: 10px;">Search Engine Performance (8k Dataset)</h2>

        <div style="display: flex; justify-content: space-between; margin: 25px 0;">
            <div style="background: rgba(255,255,255,0.05); padding: 20px; border-radius: 10px; width: 30%; text-align: center;">
                <b style="font-size: 14px; opacity: 0.7;">AVG LATENCY</b><br>
                <span style="font-size: 24px; font-weight: bold; color: {SUCCESS_COLOR};">{total_avg:.2f} ms</span>
            </div>
            <div style="background: rgba(255,255,255,0.05); padding: 20px; border-radius: 10px; width: 30%; text-align: center; border: 1px solid {THEME_PRIMARY};">
                <b style="font-size: 14px; opacity: 0.7;">P95 (RELIABILITY)</b><br>
                <span style="font-size: 24px; font-weight: bold; color: {THEME_PRIMARY};">{total_p95:.2f} ms</span>
            </div>
            <div style="background: rgba(255,255,255,0.05); padding: 20px; border-radius: 10px; width: 30%; text-align: center;">
                <b style="font-size: 14px; opacity: 0.7;">THROUGHPUT</b><br>
                <span style="font-size: 24px; font-weight: bold; color: #ff9f43;">{rps:.1f} QPS</span>
            </div>
        </div>

        <table style="width: 100%; border-collapse: collapse; background: rgba(0,0,0,0.2); border-radius: 10px; overflow: hidden;">
            <thead>
                <tr style="background: {THEME_PRIMARY}; color: white; text-align: left;">
                    <th style="padding: 15px;">Target Query</th>
                    <th style="padding: 15px; text-align: center;">Average (ms)</th>
                    <th style="padding: 15px; text-align: center;">p95 (ms)</th>
                </tr>
            </thead>
            <tbody>
                {"".join([f"<tr style='border-bottom: 1px solid rgba(255,255,255,0.05);'> <td style='padding: 12px 15px;'>{d['query']}</td> <td style='text-align: center;'>{d['avg']:.2f}</td> <td style='text-align: center; font-weight: bold; color: {THEME_PRIMARY};'>{d['p95']:.2f}</td> </tr>" for d in report_data])}
            </tbody>
        </table>
        <p style="font-size: 12px; margin-top: 20px; opacity: 0.5; text-align: center;">Benchmarks conducted on Cloud Qdrant Collection | Dataset Size: ~8,000</p>

         <p style="font-size: 12px; margin-top: 20px; opacity: 0.5; text-align: center;"> Note: Latency includes Cloud Round-Trip Time (RTT) and MMR diversification compute.</p>
    </div>
    """



    display(HTML(html_template))

# EXECUTION
test_queries = ["gaming headset", "wireless mouse", "mechanical keyboard", "4k monitor",
    "noise cancelling headphones", "ergonomic office chair", "usb-c docking station",
    "external ssd 1tb", "streaming webcam 1080p", "rgb led strip"]
professional_benchmark_report(fast_search_mmr, test_queries)

Target Query,Average (ms),p95 (ms)
gaming headset,53.3,64.59
wireless mouse,64.88,80.76
mechanical keyboard,53.79,61.82
4k monitor,51.19,58.73
noise cancelling headphones,52.1,60.41
ergonomic office chair,61.56,76.02
usb-c docking station,56.39,70.96
external ssd 1tb,58.7,82.09
streaming webcam 1080p,51.87,58.28
rgb led strip,62.28,74.48


**Image Search Speed**

In [None]:
import os
import requests
from urllib.parse import urlparse

def download_and_update_paths(image_list):
    local_dir = "test_images_cache"
    if not os.path.exists(local_dir):
        os.makedirs(local_dir)

    # Professional headers to avoid 403/404 blocks
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    valid_paths = []
    print(f"--- Starting Download of {len(image_list)} images ---")

    for i, url in enumerate(image_list):
        try:
            ext = os.path.splitext(urlparse(url).path)[1]
            if not ext or len(ext) > 5: ext = ".jpg"
            filename = f"tech_sample_{i}{ext}"
            filepath = os.path.join(local_dir, filename)

            response = requests.get(url, headers=headers, timeout=15, allow_redirects=True)

            if response.status_code == 200:
                with open(filepath, 'wb') as f:
                    f.write(response.content)
                valid_paths.append(filepath)
                print(f"‚úÖ [{i}] Success: {filename}")
        except Exception as e:
                print(f"‚úÖ [{i}] Success: {filename}")


    # Update the original list with ONLY the successful local paths
    image_list.clear()
    image_list.extend(valid_paths)
    print(f"--- Finished.")




test_images = [
    # --- Audio & Headsets ---
    "https://images.unsplash.com/photo-1505740420928-5e560c06d30e?w=800",
    "https://images.unsplash.com/photo-1583394838336-397577f9605e?w=800",
    "https://images.unsplash.com/photo-1484704849700-f032a568e944?w=800",
    "https://images.unsplash.com/photo-1546435770-a3e426bf472b?w=800",
    "https://images.unsplash.com/photo-1590658268037-6bf12165a8df?w=800",

    # --- Mobile & Cases ---
    "https://images.unsplash.com/photo-1512499617640-c74ae3a79d37?w=800",
    "https://images.unsplash.com/photo-1592890288564-76628a30a657?w=800",
    "https://images.unsplash.com/photo-1586105251261-72a756497a11?w=800",
    "https://images.unsplash.com/photo-1574944966950-8164c242e6bc?w=800",
    "https://images.unsplash.com/photo-1603313011101-31c726a54881?w=800",

    # --- Gaming & Peripherals ---
    "https://images.unsplash.com/photo-1527864550417-7fd91fc51a46?w=800",
    "https://images.unsplash.com/photo-1511467687858-23d96c32e4ae?w=800",
    "https://images.unsplash.com/photo-1542751371-adc38448a05e?w=800",
    "https://images.unsplash.com/photo-1612287230202-1ff1d85d1bdf?w=800",
    "https://images.unsplash.com/photo-1593305841991-05c297ba4575?w=800",

    # --- Laptops & Storage ---
    "https://images.unsplash.com/photo-1496181133206-80ce9b88a853?w=800",
    "https://images.unsplash.com/photo-1517336714731-489689fd1ca8?w=800",
    "https://images.unsplash.com/photo-1544006659-f0b21f04cb1d?w=800",
    "https://images.unsplash.com/photo-1562975078-0a69f08c557f?w=800",
    "https://images.unsplash.com/photo-1531297484001-80022131f5a1?w=800",

    # --- Wearables & Misc ---
    "https://images.unsplash.com/photo-1523275335684-37898b6baf30?w=800",
    "https://images.unsplash.com/photo-1544244015-0df4b3ffc6b0?w=800",
    "https://images.unsplash.com/photo-1558346490-a72e53ae2d4f?w=800",
    "https://images.unsplash.com/photo-1504274066654-fa291bb5240d?w=800",
    "https://images.unsplash.com/photo-1510282335153-be68b12298a1?w=800"
]

download_and_update_paths(test_images)

--- Starting Download of 25 images ---
‚úÖ [0] Success: tech_sample_0.jpg
‚úÖ [2] Success: tech_sample_2.jpg
‚úÖ [3] Success: tech_sample_3.jpg
‚úÖ [4] Success: tech_sample_4.jpg
‚úÖ [5] Success: tech_sample_5.jpg
‚úÖ [6] Success: tech_sample_6.jpg
‚úÖ [7] Success: tech_sample_7.jpg
‚úÖ [10] Success: tech_sample_10.jpg
‚úÖ [11] Success: tech_sample_11.jpg
‚úÖ [12] Success: tech_sample_12.jpg
‚úÖ [13] Success: tech_sample_13.jpg
‚úÖ [14] Success: tech_sample_14.jpg
‚úÖ [15] Success: tech_sample_15.jpg
‚úÖ [16] Success: tech_sample_16.jpg
‚úÖ [19] Success: tech_sample_19.jpg
‚úÖ [20] Success: tech_sample_20.jpg
‚úÖ [21] Success: tech_sample_21.jpg
‚úÖ [22] Success: tech_sample_22.jpg
--- Finished.


In [None]:
import time
import statistics
import numpy as np
from IPython.display import display, HTML



# --- THEME CONFIGURATION (Deep Sea Professional) ---
THEME_PRIMARY = "#00d2d3"  # Cyan Accent
THEME_BG = "#0a192f"       # Navy Background
TEXT_COLOR = "#ccd6f6"     # Off-white Text
CARD_BG = "rgba(255, 255, 255, 0.05)"

def benchmark_image_search(search_func, image_paths, iterations=30):
    report_data = []
    all_latencies = []

    print("üöÄ Starting Professional Image Search Benchmark...")

    for img_path in image_paths:
        try:
            # 1. PRE-SEARCH: Generate Image Embedding (Not timed)
            # This simulates the user uploading or selecting an image
            query_vec = embed_image(img_path)

            # 2. WARM-UP: Prime the Cloud Connection
            for _ in range(5):
                search_func(query_vec, limit=6)

            # 3. ACTUAL TEST: Measure search logic speed
            query_latencies = []
            for _ in range(iterations):
                start = time.perf_counter()
                search_func(query_vec, limit=6)
                query_latencies.append((time.perf_counter() - start) * 1000)

            all_latencies.extend(query_latencies)

            report_data.append({
                "image": img_path,
                "avg": statistics.mean(query_latencies),
                "p95": np.percentile(query_latencies, 95)
            })
        except Exception as e:
            print(f"‚ö†Ô∏è Could not test {img_path}: {e}")

    # Global Metrics Calculation
    total_avg = statistics.mean(all_latencies)
    total_p95 = np.percentile(all_latencies, 95)
    rps = 1000 / total_avg

    # 4. GENERATE DESIGNER HTML REPORT
    html_template = f"""
    <div style="font-family: 'Segoe UI', sans-serif; background: {THEME_BG}; color: {TEXT_COLOR}; padding: 40px; border-radius: 20px; max-width: 900px; box-shadow: 0 20px 50px rgba(0,0,0,0.5); border: 1px solid rgba(0, 210, 211, 0.2);">
        <h2 style="margin-top: 0; color: {THEME_PRIMARY}; text-transform: uppercase; letter-spacing: 2px;">Image Search Performance Report</h2>
        <p style="opacity: 0.6; font-size: 14px; margin-bottom: 25px;">Visual Search Analytics ‚Ä¢ MMR Diversity Enabled ‚Ä¢ 8k Product Dataset</p>

        <div style="display: flex; justify-content: space-between; margin-bottom: 35px;">
            <div style="background: {CARD_BG}; padding: 20px; border-radius: 12px; width: 30%; text-align: center; border-left: 4px solid {THEME_PRIMARY};">
                <b style="font-size: 12px; display: block; margin-bottom: 5px; opacity: 0.7;">AVG LATENCY</b>
                <span style="font-size: 24px; font-weight: bold; color: #fff;">{total_avg:.2f} ms</span>
            </div>
            <div style="background: {CARD_BG}; padding: 20px; border-radius: 12px; width: 30%; text-align: center; border-left: 4px solid #ff9f43;">
                <b style="font-size: 12px; display: block; margin-bottom: 5px; opacity: 0.7;">P95 STABILITY</b>
                <span style="font-size: 24px; font-weight: bold; color: #ff9f43;">{total_p95:.2f} ms</span>
            </div>
            <div style="background: {CARD_BG}; padding: 20px; border-radius: 12px; width: 30%; text-align: center; border-left: 4px solid #54a0ff;">
                <b style="font-size: 12px; display: block; margin-bottom: 5px; opacity: 0.7;">THROUGHPUT</b>
                <span style="font-size: 24px; font-weight: bold; color: #54a0ff;">{rps:.1f} RPS</span>
            </div>
        </div>

        <table style="width: 100%; border-collapse: collapse;">
            <thead>
                <tr style="border-bottom: 2px solid rgba(0, 210, 211, 0.3); color: {THEME_PRIMARY};">
                    <th style="padding: 15px; text-align: left;">Source Image File</th>
                    <th style="padding: 15px; text-align: center;">Average (ms)</th>
                    <th style="padding: 15px; text-align: center;">P95 (ms)</th>
                </tr>
            </thead>
            <tbody>
                {"".join([f"<tr style='border-bottom: 1px solid rgba(255,255,255,0.05);'> <td style='padding: 12px 15px; font-family: monospace;'>{d['image']}</td> <td style='text-align: center;'>{d['avg']:.1f}</td> <td style='text-align: center; font-weight: bold; color: {THEME_PRIMARY};'>{d['p95']:.1f}</td> </tr>" for d in report_data])}
            </tbody>
        </table>

        <p style="font-size: 12px; margin-top: 20px; opacity: 0.5; text-align: center;">Benchmarks conducted on Cloud Qdrant Collection | Dataset Size: ~8,000</p>

         <p style="font-size: 12px; margin-top: 20px; opacity: 0.5; text-align: center;"> Note: Latency includes Cloud Round-Trip Time (RTT) and MMR diversification compute.</p>

    </div>
    """
    display(HTML(html_template))

# EXECUTION
benchmark_image_search(fast_search_mmr, test_images)

üöÄ Starting Professional Image Search Benchmark...


Source Image File,Average (ms),P95 (ms)
test_images_cache/tech_sample_0.jpg,53.2,64.3
test_images_cache/tech_sample_2.jpg,54.7,65.0
test_images_cache/tech_sample_3.jpg,62.9,75.7
test_images_cache/tech_sample_4.jpg,51.0,58.3
test_images_cache/tech_sample_5.jpg,51.2,56.4
test_images_cache/tech_sample_6.jpg,52.5,62.7
test_images_cache/tech_sample_7.jpg,58.2,72.0
test_images_cache/tech_sample_10.jpg,67.5,81.1
test_images_cache/tech_sample_11.jpg,51.8,58.4
test_images_cache/tech_sample_12.jpg,52.4,61.9
