In [1]:
from pathlib import Path
import numpy as np 
import pandas as pd 

dataset_version = 'lite'
unsplash_dataset_path = Path("../ML_data/unsplash-dataset") / dataset_version
features_path = unsplash_dataset_path / "features"

photos = pd.read_csv(unsplash_dataset_path / "photos.tsv000", sep='\t', header = 0)

photo_features = np.load(features_path / "features.npy")
photo_ids = pd.read_csv(features_path / "photo_ids.csv")
photo_ids = list(photo_ids["photo_id"])

In [2]:
photos.iloc[0]

photo_id                                                                2Q8zDWkj0Yw
photo_url                                   https://unsplash.com/photos/2Q8zDWkj0Yw
photo_image_url                   https://images.unsplash.com/photo-141520117961...
photo_submitted_at                                       2014-11-05 15:26:26.678711
photo_featured                                                                    t
photo_width                                                                    4192
photo_height                                                                   2794
photo_aspect_ratio                                                              1.5
photo_description                                                               NaN
photographer_username                                                 lanceanderson
photographer_first_name                                                       Lance
photographer_last_name                                                     A

In [3]:
import clip
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
model, prepocess = clip.load("ViT-B/32", device=device)

In [4]:
search_query = "Two dogs playing in the snow"
with torch.no_grad():
    text_encoded = model.encode_text(clip.tokenize(search_query).to(device))
    text_encoded /= text_encoded.norm(dim=-1, keepdim = True)

In [5]:
text_features = text_encoded.cpu().numpy()
similarities = list((text_features @ photo_features.T).squeeze(0))
best_photos = sorted(zip(similarities, range(photo_features.shape[0])), key=lambda x: x[0], reverse=True)

In [6]:
best_photos[0]

(0.3162, 8220)

In [7]:
from IPython.display import Image
from IPython.core.display import HTML
for i in range(3):
    idx = best_photos[i][1]
    photo_id = photo_ids[idx]
    photo_data = photos[photos["photo_id"] == photo_id].iloc[0]
    display(Image(url=photo_data["photo_image_url"]+"?w=640"))
    display(HTML(f'Photo by <a href="https://unsplash.com/@{photo_data["photographer_username"]}?utm_source=NaturalLanguageImageSearch&utm_medium=referral">{photo_data["photographer_first_name"]} {photo_data["photographer_last_name"]}</a> on <a href="https://unsplash.com/?utm_source=NaturalLanguageImageSearch&utm_medium=referral">Unsplash</a>, similarity={best_photos[i][0]}'))
    print()










In [8]:
def search_image(search_query, N=3):
    with torch.no_grad():
        text_encoded = model.encode_text(clip.tokenize(search_query).to(device))
        text_encoded /= text_encoded.norm(dim=-1, keepdim = True)
    text_features = text_encoded.cpu().numpy()
    similarities = list((text_features @ photo_features.T).squeeze(0))
    best_photos = sorted(zip(similarities, range(photo_features.shape[0])), key=lambda x: x[0], reverse=True)
    for i in range(N):
        idx = best_photos[i][1]
        photo_id = photo_ids[idx]
        photo_data = photos[photos["photo_id"] == photo_id].iloc[0]
        display(Image(url=photo_data["photo_image_url"]+"?w=640"))
        display(HTML(f'Photo by <a href="https://unsplash.com/@{photo_data["photographer_username"]}?utm_source=NaturalLanguageImageSearch&utm_medium=referral">{photo_data["photographer_first_name"]} {photo_data["photographer_last_name"]}</a> on <a href="https://unsplash.com/?utm_source=NaturalLanguageImageSearch&utm_medium=referral">Unsplash</a>, similarity={best_photos[i][0]}'))
        print()

In [9]:
search_image("A cat play on the table", N=4)











