In [1]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [3]:
def create_index_add_embedding(embeddings):
    embedding_dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(embedding_dimension)
    index.add(embeddings)
    print(f"Number of vectors in FAISS index: {index.ntotal}")
    return index

def search_faiss(index,query, top_k,document):
    # Generate the embedding for the query
    query_embedding = model.encode([query])  # We pass the query as a list to keep it consistent with the batch processing
    query_embedding = np.array(query_embedding).astype('float32')

    # Perform the search for the top 2 most similar documents
    k = 1  # Number of nearest neighbors to retrieve
    distances, indices = index.search(query_embedding, top_k)

    # Print the results
    print(f"Distances: {distances}")
    print(f"Indices: {indices}")

    # Retrieve and print the most relevant documents
    for idx in indices[0]:
        print(f"Relevant Document: {document[idx]}")
    

In [4]:
document = ["ankush is a boy, aadhaar number is 12324242, he likes pan pizza","ankush is a boy pan number is 232525 and needs to have tablet named dopamicnine"]
embeddings = model.encode(document)
faiss_index = create_index_add_embedding(embeddings)

Number of vectors in FAISS index: 2


In [6]:
# Example query
query = "who is the boy ?"
top_k = 1
search_faiss(faiss_index,query, top_k,document)

Distances: [[1.3244383]]
Indices: [[0]]
Relevant Document: ankush is a boy, aadhaar number is 12324242, he likes pan pizza
