In [14]:
# **This is a bold heading**

In [16]:
# Setup Environment
#!pip install git+https://github.com/openai/CLIP.git
#!pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html

In [18]:
import clip
import torch

# Load the open CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

In [20]:
device

'cpu'

In [24]:
#Download the Precomputed Data
from pathlib import Path

# Create a folder for the precomputed features
!mkdir unsplash-dataset

# Download from Github Releases
if not Path('unsplash-dataset/photo_ids.csv').exists():
  !wget https://github.com/haltakov/natural-language-image-search/releases/download/1.0.0/photo_ids.csv -O unsplash-dataset/photo_ids.csv

if not Path('unsplash-dataset/features.npy').exists():
  !wget https://github.com/haltakov/natural-language-image-search/releases/download/1.0.0/features.npy -O unsplash-dataset/features.npy
  

A subdirectory or file unsplash-dataset already exists.


In [26]:

import pandas as pd
import numpy as np

# Load the photo IDs
photo_ids = pd.read_csv("unsplash-dataset/photo_ids.csv")
photo_ids = list(photo_ids['photo_id'])

# Load the features vectors
photo_features = np.load("unsplash-dataset/features.npy")

# Convert features to Tensors: Float32 on CPU and Float16 on GPU
if device == "cpu":
  photo_features = torch.from_numpy(photo_features).float().to(device)
else:
  photo_features = torch.from_numpy(photo_features).to(device)

# Print some statistics
print(f"Photos loaded: {len(photo_ids)}")

Photos loaded: 1981161


In [None]:
# **Define Functions**

In [28]:
def encode_search_query(search_query):
  with torch.no_grad():
    # Encode and normalize the search query using CLIP
    text_encoded = model.encode_text(clip.tokenize(search_query).to(device))
    text_encoded /= text_encoded.norm(dim=-1, keepdim=True)

  # Retrieve the feature vector
  return text_encoded

In [30]:
def find_best_matches(text_features, photo_features, photo_ids, results_count=3):
  # Compute the similarity between the search query and each photo using the Cosine similarity
  similarities = (photo_features @ text_features.T).squeeze(1)

  # Sort the photos by their similarity score
  best_photo_idx = (-similarities).argsort()

  # Return the photo IDs of the best matches
  return [photo_ids[i] for i in best_photo_idx[:results_count]]

In [56]:
from IPython.display import Image
from IPython.core.display import HTML

def display_photo(photo_id):
  # Get the URL of the photo resized to have a width of 320px
  photo_image_url = f"https://unsplash.com/photos/{photo_id}/download?w=320"

  # Display the photo
  display(Image(url=photo_image_url))

  # Display the attribution text
  display(HTML(f'Photo on Unsplash '))
  print("Photo ID :" + photo_id)
  print()

In [32]:
# from IPython.display import Image
# from IPython.core.display import HTML
# from urllib.request import urlopen
# import json
# #display a photo from Unsplash given its ID.This function needs to call the Unsplash API to get the URL of the photo 
# # and some metadata about the photographer. 
# def display_photo(photo_id):
#   # Proxy for the Unsplash API 
#   unsplash_api_url = f"https://haltakov.net/unsplash-proxy/{photo_id}"
  
#   # Alternatively, you can use your own Unsplash developer account with this code
#   # unsplash_api_url = f"https://api.unsplash.com/photos/{photo_id}?client_id=YOUR_UNSPLASH_ACCESS_KEY"
  
#   # Fetch the photo metadata from the Unsplash API
#   photo_data = json.loads(urlopen(unsplash_api_url).read().decode("utf-8"))

#   # Get the URL of the photo resized to have a width of 480px
#   photo_image_url = photo_data["urls"]["raw"] + "&w=320"

#   # Display the photo
#   display(Image(url=photo_image_url))

#   # Display the attribution text
#   display(HTML(f'Photo by {photo_data["user"]["name"]} on Unsplash'))
#   print()

In [58]:
def search_unslash(search_query, photo_features, photo_ids, results_count=3):
  # Encode the search query
  text_features = encode_search_query(search_query)

  # Find the best matches
  best_photo_ids = find_best_matches(text_features, photo_features, photo_ids, results_count)

  # Display the best photos
  for photo_id in best_photo_ids:
    display_photo(photo_id)

In [60]:
search_query = "Two birds flying above the water"

search_unslash(search_query, photo_features, photo_ids, 3)

Photo ID :MtCh1hgGkiU



Photo ID :Xkhy9_N9Wgo



Photo ID :qWlZ-8VNJWM



In [64]:
search_query = "Boy who wins a swiming medal"

search_unslash(search_query, photo_features, photo_ids, 3)

Photo ID :Iep8SMdpe6I



Photo ID :CBXJ6ljSDzM



Photo ID :ZB1OH9hihjE



In [None]:
# **Combine Text and Photo Seach Queries**
#The idea here is to do a text search for a photo and then modify the search query by adding another photo 
# to the search query in order to transfer some of the photo features to the search.
#This works by adding the features of the photo to the features of the text query. 
# The photo features are multiplied with a weight in order to reduce the influence so that the text query is the main source.
#The results are somewhat sensitive to the prompt...

In [48]:
def search_by_text_and_photo(query_text, query_photo_id, photo_weight=0.5):
  # Encode the search query
  text_features = encode_search_query(query_text)

  # Find the feature vector for the specified photo ID
  query_photo_index = photo_ids.index(query_photo_id)
  query_photo_features = photo_features[query_photo_index]

  # Combine the test and photo queries and normalize again
  search_features = text_features + query_photo_features * photo_weight
  search_features /= search_features.norm(dim=-1, keepdim=True)

  # Find the best match
  best_photo_ids = find_best_matches(search_features, photo_features, photo_ids, 1)

  # Display the results
  print("Test search result")
  search_unslash(query_text, photo_features, photo_ids, 1)

  print("Photo query")
  display(Image(url=f"https://unsplash.com/photos/{query_photo_id}/download?w=320"))

  print("Result for text query + photo query")
  display_photo(best_photo_ids[0])

In [68]:
search_by_text_and_photo("Seagull flying", "qWlZ-8VNJWM")

Test search result


Photo ID :DbGDTKm9SRs

Photo query


Result for text query + photo query


Photo ID :qWlZ-8VNJWM



In [66]:
search_by_text_and_photo("Sydney Opera house", "HSsOC5nqurA")

Test search result


Photo ID :QP-l1uE19iI

Photo query


Result for text query + photo query


Photo ID :KCEUHBb4wjc

