In [1]:
#!pip install faiss-cpu

Collecting faiss-cpu
  Using cached faiss_cpu-1.7.4-cp39-cp39-macosx_11_0_arm64.whl (2.7 MB)
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.4


In [1]:
import faiss
import numpy as np
import pandas as pd


## Create an index

In [2]:
image_embeddings = np.load("image_embeddings.npy")
df = pd.read_csv("H&M_items.csv", dtype = str)
df.head()
print(image_embeddings.shape)

(22729, 512)


In [4]:
list(df[df.article_id.isin(['1160705014'])].index)

[0]

In [32]:
d=512
index = faiss.IndexFlatIP(d)
index.add(image_embeddings.astype('float32'))

In [33]:
# Save the index to a file
faiss.write_index(index, "index.faiss")
# Save the index to the api
faiss.write_index(index, "../api/data/index.faiss")

In [None]:
# Load the index from a file
index = faiss.read_index("index.faiss")

## Search index

In [29]:
image_embeddings[100].reshape((1,512)).astype('float32')

array([[ 3.68975438e-02,  1.14244267e-01, -2.92608049e-02,
         9.45846271e-03,  2.19252948e-02, -5.53652039e-03,
        -2.85253432e-02,  3.22286785e-02,  2.14120615e-02,
        -2.76712310e-02, -2.19240151e-02, -3.25648561e-02,
        -6.19078754e-03,  3.10807265e-02, -3.25602014e-03,
         7.83951022e-03, -1.04843549e-01,  4.88605127e-02,
         8.18401575e-03,  1.74060743e-02,  1.77355446e-02,
        -4.80111092e-02,  1.05488529e-04,  2.79149786e-02,
        -4.61458117e-02, -9.72578526e-02,  2.78171655e-02,
         3.61112393e-02, -2.13668067e-02,  1.41728427e-02,
        -7.63800815e-02,  1.66786052e-02, -8.42903461e-03,
        -4.57269400e-02,  1.56714469e-02,  2.74010114e-02,
         5.84865510e-02,  6.51501119e-02, -3.06518236e-03,
        -2.84532607e-02,  1.50905189e-03,  1.22335320e-02,
         5.23754656e-02, -2.65549719e-02, -1.49119012e-02,
        -1.92571636e-02, -9.64634866e-03, -3.89591381e-02,
         2.94671059e-02, -3.57601903e-02,  6.83422294e-0

In [28]:
# Perform a search
# the params parameter is optional and specifies which filtered items to search from


k = 4  # number of nearest neighbors
#either IDSelectorBatch, or IDSelectorArray for filtered search
D, I = index.search(image_embeddings[100].reshape((1,512)).astype('float32'),
                    k,
                    params=faiss.SearchParameters(sel = faiss.IDSelectorBatch([0, 35177,73844, 100,1001,1230,122]))
                    )
print(I)


[[ 100    0  122 1001]]


In [10]:
# Get embeddings of a certain item from the index
embeddings = index.reconstruct_batch([100, 100]) # get vector at index 100
print(embeddings)
print(embeddings.shape)

[[ 0.03689754  0.11424427 -0.0292608  ... -0.04034693  0.01257371
   0.03626109]
 [ 0.03689754  0.11424427 -0.0292608  ... -0.04034693  0.01257371
   0.03626109]]
(2, 512)


In [11]:
def get_embeddings_by_id(ids:list):
    """
    When two or more items are chosen, their ids are passed to this function to get their embeddings
    
        params: ids: list of ids
    
        returns: embeddings of the ids
    """
    query_string = " or ".join([f"article_id=='{id}'" for id in ids])
    
    indexes = df.query(query_string).index
    embeddings = index.reconstruct_batch(indexes)
    
    return embeddings

In [12]:
ids = ["0429322001", "0429322007"]

get_embeddings_by_id(ids)

array([], shape=(0, 512), dtype=float32)