# Asset Search

Visualize and search image datasets

In [1]:

from backend.search_manager import SearchManager
from backend.config import ROOT_IMAGE_DIRECTORY
from IPython.display import display, HTML

# Function to generate HTML for displaying images in a grid
def display_image_grid(thumbnails, columns=5):
    html = '<div style="display: flex; flex-wrap: wrap; justify-content: center;">'
    for i, thumbnail in enumerate(thumbnails):
        html += f'<div style="flex: 1 0 {100/columns}%; box-sizing: border-box; padding: 5px;">'
        html += f'<p style="text-align: center;">{i}</p>'
        html += f'<img src="{thumbnail}" style="width: 100%; height: auto;" />'
        html += '</div>'
    html += '</div>'
    display(HTML(html))


In [2]:
# Initialize search manager
search_manager = SearchManager()

In [3]:
# Start HTTP server to serve images

port = 8000
base_url = f"http://127.0.0.1:{port}/"
search_manager.start_http_server(ROOT_IMAGE_DIRECTORY, port)

8000

In [4]:
# Select a dataset
available_datasets = search_manager.get_available_datasets()
print("Available Datasets:", available_datasets)

Available Datasets: ['10cats', 'CoolPics5K', 'Paintings', 'NASA', 'PomologicalWatercolors', 'Magnum', 'Words', 'Vintage Magazines', 'Paris en Photo', 'JB', 'Vintage Industrial', 'Stamps', 'jbuhler', 'Vintage Paperback', 'Tax Forms', 'Watches', 'Vintage Group Pictures']


In [5]:
# Select "All" to search on all the images, othersie use the dataset name
# Embeddings are cached, so the first search will take longer than subsequent ones

selected_dataset = "All"
print("Selected Dataset:", selected_dataset)

# Perform a text-based search
query = "dog"
n_neighbors = 20
results = search_manager.perform_similarity_search(query, selected_dataset, n_neighbors)

# Get thumbnails for search results
thumbnails, uris = search_manager.get_thumbnails(selected_dataset, 
                                                 search_results=[result[0] for result in results], 
                                                 base_url=base_url, 
                                                 base_path=ROOT_IMAGE_DIRECTORY)


# Display thumbnails as a grid
display_image_grid(thumbnails, columns=5)

Selected Dataset: All
Performing text-based search...


In [6]:
n_neighbors = 40

search_id = 7

anchor = results[search_id][0]

results = search_manager.perform_similarity_search(anchor, selected_dataset, n_neighbors)

# Get thumbnails for search results
thumbnails, uris = search_manager.get_thumbnails(selected_dataset, 
                                                 search_results=[result[0] for result in results], 
                                                 base_url=base_url, 
                                                 base_path=ROOT_IMAGE_DIRECTORY)

# Display thumbnails as a grid
display_image_grid(thumbnails, columns=10)