# Building a Multimodal Search Engine

Learn how to build an image search engine using CLIP.

In [None]:
# Setup
import sys
sys.path.append('../src')

from multimodal_search import MultimodalSearchEngine
from utils import download_samples
from config import IMAGES_DIR
import matplotlib.pyplot as plt
from PIL import Image

print("Libraries loaded!")

## 1. Download Sample Images

In [None]:
# Download sample images
download_samples()

# List available images
import os
images = list(IMAGES_DIR.glob("*.jpg")) + list(IMAGES_DIR.glob("*.png"))
print(f"\nFound {len(images)} images:")
for img in images:
    print(f"  - {img.name}")

## 2. Create and Index Search Engine

In [None]:
# Create search engine
search_engine = MultimodalSearchEngine()

# Index all images
search_engine.index_images(IMAGES_DIR)

print(f"\nIndexed {len(search_engine.image_paths)} images successfully!")

## 3. Interactive Search

In [None]:
def search_and_display(query, top_k=3):
    """Search and display results."""
    results = search_engine.search(query, top_k=top_k)
    
    fig, axes = plt.subplots(1, min(top_k, len(results)), figsize=(15, 5))
    if top_k == 1:
        axes = [axes]
    
    for idx, (path, score) in enumerate(results):
        img = Image.open(path)
        axes[idx].imshow(img)
        axes[idx].set_title(f"{path.name}\nScore: {score:.3f}")
        axes[idx].axis('off')
    
    plt.suptitle(f'Query: "{query}"', fontsize=16)
    plt.tight_layout()
    plt.show()

# Example searches
queries = [
    "a colorful bird",
    "cute furry animal",
    "warm sunset colors",
    "urban cityscape"
]

for query in queries:
    search_and_display(query, top_k=3)

## 4. Custom Query

In [None]:
# Try your own query!
custom_query = "something orange"  # Change this to your query
search_and_display(custom_query, top_k=3)

## 5. Analyze Similarity Scores

In [None]:
# Check similarity for all images with a query
query = "animal"
results = search_engine.search(query, top_k=len(search_engine.image_paths))

# Plot similarity distribution
names = [r[0].name for r in results]
scores = [r[1] for r in results]

plt.figure(figsize=(10, 6))
plt.bar(range(len(scores)), scores)
plt.xticks(range(len(names)), names, rotation=45, ha='right')
plt.ylabel('Similarity Score')
plt.title(f'Similarity Scores for Query: "{query}"')
plt.tight_layout()
plt.show()