# Reverse Search Engine
Given an image,the goal is to retrieve the top-k similar images from a database at speed.

In [None]:
import torch
import torchvision
import torchvision.models as models
from PIL import Image

# Load the pretrained model
model = models.resnet18(pretrained=True)

# Use the model object to select the desired layer
layer = model._modules.get('avgpool')

# Set model to evaluation mode
model.eval()

transforms = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


def get_vector(image):
    # Create a PyTorch tensor with the transformed image
    t_img = transforms(image)
    # Create a vector of zeros that will hold our feature vector
    # The 'avgpool' layer has an output size of 512
    my_embedding = torch.zeros(512)

    # Define a function that will copy the output of a layer
    def copy_data(m, i, o):
        my_embedding.copy_(o.flatten())                 # <-- flatten

    # Attach that function to our selected layer
    h = layer.register_forward_hook(copy_data)
    # Run the model on our transformed image
    with torch.no_grad():                               # <-- no_grad context
        model(t_img.unsqueeze(0))                       # <-- unsqueeze
    # Detach our copy function from the layer
    h.remove()
    # Return the feature vector
    return my_embedding

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

df = pd.read_csv('data.csv')
df['article_id_path'] = df['article_id'].apply(lambda x: '0' + str(x) + '.jpg')

In [None]:
import os
def get_vector_for_image(article_id_path,product_type_name):
  img_path = os.path.join('/content/drive/MyDrive/data/',product_type_name,article_id_path)
  img = Image.open(img_path)
  vector = get_vector(img)
  return vector
  
df['Embeddings'] = df.apply(lambda x: get_vector_for_image(x.article_id_path,x.product_type_name), axis = 1)

In [None]:
# Save as a pickle for future processing
df.to_pickle('Embeddings-1000.pkl')

In [None]:
!pip install annoy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting annoy
  Downloading annoy-1.17.0.tar.gz (646 kB)
[K     |████████████████████████████████| 646 kB 5.3 MB/s 
[?25hBuilding wheels for collected packages: annoy
  Building wheel for annoy (setup.py) ... [?25l[?25hdone
  Created wheel for annoy: filename=annoy-1.17.0-cp37-cp37m-linux_x86_64.whl size=391576 sha256=ddd8d0a91ba9e05a546e2a96a19b5dac741f0093d76fd523fbbb095ca56de729
  Stored in directory: /root/.cache/pip/wheels/4f/e8/1e/7cc9ebbfa87a3b9f8ba79408d4d31831d67eea918b679a4c07
Successfully built annoy
Installing collected packages: annoy
Successfully installed annoy-1.17.0


In [None]:
from annoy import AnnoyIndex
f = len(df['Embeddings'][0])
t = AnnoyIndex(f, metric='euclidean')

ntree = 500 # hyper-parameter, the more the number of trees better the prediction
for i, vector in enumerate(df['Embeddings']):
    t.add_item(i, vector)
_  = t.build(ntree)

In [None]:
df

Unnamed: 0,article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,...,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc,article_id_path,Embeddings
0,625189002,625189,SB Sebastian rain jkt,262,Jacket,Garment Upper body,1010016,Solid,73,Dark Blue,...,Children Sizes 92-140,4,Baby/Children,45,Kids Outerwear,1007,Outdoor,"Rain jacket in windproof, waterproof functiona...",0625189002.jpg,"[tensor(0.9268), tensor(2.1596), tensor(0.3360..."
1,801905013,801905,Tom Chino Shorts (TVP),274,Shorts,Garment Lower body,1010001,All over pattern,13,Beige,...,Children Sizes 92-140,4,Baby/Children,46,Kids Boy,1025,Shorts,Knee-length shorts in washed cotton twill with...,0801905013.jpg,"[tensor(0.9887), tensor(0.4870), tensor(0.), t..."
2,637922006,637922,Brian Tuxedo Coat,262,Jacket,Garment Upper body,1010016,Solid,9,Black,...,Ladieswear,1,Ladieswear,19,Womens Jackets,1007,Outdoor,Fitted coat in woven fabric with a tuxedo-styl...,0637922006.jpg,"[tensor(0.3852), tensor(0.3776), tensor(0.1682..."
3,698111001,698111,Solene shorts,274,Shorts,Garment Lower body,1010018,Treatment,71,Light Blue,...,Children Sizes 134-170,4,Baby/Children,77,Young Girl,1005,Jersey Fancy,Shorts in stretch jersey with a denim look mad...,0698111001.jpg,"[tensor(1.3565), tensor(0.2290), tensor(0.0044..."
4,636469001,636469,kourtny shorts,274,Shorts,Garment Lower body,1010016,Solid,9,Black,...,Divided,2,Divided,53,Divided Collection,1025,Shorts,"Short, high-waisted twill shorts in a cotton b...",0636469001.jpg,"[tensor(1.0613), tensor(0.0666), tensor(0.0711..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,729948001,729948,OC Rachel Skirt,275,Skirt,Garment Lower body,1010016,Solid,71,Light Blue,...,Divided,2,Divided,50,Divided Projects,1009,Trousers,Knee-length skirt in a cotton weave with brode...,0729948001.jpg,"[tensor(1.4451), tensor(1.6120), tensor(0.0660..."
996,791445002,791445,Fargo tunic,258,Blouse,Garment Upper body,1010016,Solid,31,Light Orange,...,Ladieswear,1,Ladieswear,8,Mama,1010,Blouses,"Tunic in an airy weave with a V-neck, yoke wit...",0791445002.jpg,"[tensor(0.4417), tensor(0.1949), tensor(0.0401..."
997,658459001,658459,Jogger lined 79,272,Trousers,Garment Lower body,1010023,Denim,73,Dark Blue,...,Children Sizes 92-140,4,Baby/Children,46,Kids Boy,1016,Trousers Denim,Joggers in washed denim with an elasticated dr...,0658459001.jpg,"[tensor(0.3145), tensor(1.6253), tensor(0.0438..."
998,741417001,741417,SPEED Kasuma 3-p,272,Trousers,Garment Lower body,1010016,Solid,73,Dark Blue,...,Baby Sizes 50-98,4,Baby/Children,41,Baby Boy,1006,Woven/Jersey/Knitted mix Baby,Trousers in cotton twill with an adjustable el...,0741417001.jpg,"[tensor(0.0369), tensor(0.2694), tensor(0.2917..."


In [None]:
import time
def get_similar_images_annoy(img_index):
    start = time.time()
    base_img_id, base_vector, base_label  = df.iloc[img_index, [0,-1,4]]
    similar_img_ids = t.get_nns_by_item(img_index, 5)
    end = time.time()
    print(f'{(end - start) * 1000} ms')
    return base_img_id, base_label, df.iloc[similar_img_ids]

In [None]:
base_image, base_label, similar_images_df = get_similar_images_annoy(0)

4.379510879516602 ms


In [None]:
similar_images_df

Unnamed: 0,article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,...,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc,article_id_path,Embeddings
0,625189002,625189,SB Sebastian rain jkt,262,Jacket,Garment Upper body,1010016,Solid,73,Dark Blue,...,Children Sizes 92-140,4,Baby/Children,45,Kids Outerwear,1007,Outdoor,"Rain jacket in windproof, waterproof functiona...",0625189002.jpg,"[tensor(0.9268), tensor(2.1596), tensor(0.3360..."
870,564472001,564472,BB Nick varsity speed,262,Jacket,Garment Upper body,1010016,Solid,9,Black,...,Children Sizes 134-170,4,Baby/Children,45,Kids Outerwear,1007,Outdoor,Baseball jacket in woven fabric with a sheen. ...,0564472001.jpg,"[tensor(1.0939), tensor(1.4100), tensor(0.1328..."
981,803662001,803662,CARTER PU SHIRT,259,Shirt,Garment Upper body,1010016,Solid,9,Black,...,Divided,2,Divided,53,Divided Collection,1010,Blouses,Straight-cut shirt in imitation leather with a...,0803662001.jpg,"[tensor(0.9985), tensor(2.7406), tensor(0.4034..."
639,477163001,477163,Shaun (1),308,Hoodie,Garment Upper body,1010016,Solid,9,Black,...,Menswear,3,Menswear,31,Mens Outerwear,1007,Outdoor,"Windbreaker in woven fabric with a mesh-lined,...",0477163001.jpg,"[tensor(0.7147), tensor(2.5043), tensor(0.4261..."
323,569974001,569974,DONT USE ROLAND HOOD,308,Hoodie,Garment Upper body,1010016,Solid,9,Black,...,Menswear,3,Menswear,26,Men Underwear,1002,Jersey Basic,Top in sweatshirt fabric with a lined drawstri...,0569974001.jpg,"[tensor(1.3733), tensor(2.4662), tensor(0.0945..."
