In [1]:
from sentence_transformers import SentenceTransformer

# Load the embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Initial a list of sentences
sentences = [
    "Birds fly over the rainbow",
    "Why do birds suddenly appear",
    "A fast brown animal leaps over a sleepy canine",
    "The lazy dog lies under the shady tree",
    "The quick brown fox jumps over the lazy dog",
]

# Convert the sentences into vector (capturing semantic meaning)
sentence_embeddings = model.encode(sentences)

# print(f"Vector Embeddings:\n {sentence_embeddings}")

In [3]:
import faiss

# Dimension of vectors
# Dimension: represents the number of features or attributes encoded in the vector
dim = sentence_embeddings.shape[1]

print(f"Shape of the vector: {sentence_embeddings.shape}")

# Create a Flat index
# Uses L2 (Euclidean) distance to measure the similiarity between two vectors
# IndexFlatL2 measures the L2 (or Euclidean) distance between all given points between our query vector, and the vectors loaded into the index. Itâ€™s simple, very accurate, but not too fast.
index = faiss.IndexFlatL2(dim)

# Add vectors to the index
index.add(sentence_embeddings)


Shape of the vector: (5, 384)


In [4]:
query = "A dark fox jumping over dogs"

# Convert the query into high dimensional vector (capturing semantic meaning)
query_vector = model.encode([query])

# Perform search
# Return two numpy arrays: D (distances) and I (indices)
# D is a 2D array of shape that contains the distances of the nearest neighbors. Each row corresponds to the distances of the k nearest neighbors of the corresponding query vector.
# I is a 2D array of shape that contains the indices of the nearest neighbors. Each row corresponds to the indices of the k nearest neighbors of the corresponding query vector.   
D, I = index.search(query_vector, k=3)  # Search for top 2 most similar sentences

print("Query:", query)
print("Most similar sentences in our index:")
for i in I[0]:
    print("-", sentences[i])


Query: A dark fox jumping over dogs
Most similar sentences in our index:
- The quick brown fox jumps over the lazy dog
- A fast brown animal leaps over a sleepy canine
- Birds fly over the rainbow
