In [1]:
# ==========================================
# FAISS Mastery Notebook: Beginner → Advanced
# ==========================================

#%pip install faiss-cpu
#%pip install sentence-transformers

# -------------------------------
# 1. Imports and Setup
# -------------------------------
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer


In [2]:
# -------------------------------
# 2. Generate Sample Vectors
# -------------------------------
d = 64          # dimension
nb = 1000       # number of database vectors
np.random.seed(42)
vectors = np.random.random((nb, d)).astype('float32')  # database vectors

# Query vector
xq = np.random.random((1, d)).astype('float32')


In [3]:
# -------------------------------
# 3. Exact Search: IndexFlatL2
# -------------------------------
index_flat = faiss.IndexFlatL2(d)  # L2 distance
index_flat.add(vectors)
print("Total vectors in Flat Index:", index_flat.ntotal)

k = 5
distances, indices = index_flat.search(xq, k)
print("\n[Flat Index] Top-5 Neighbors")
print("Indices:", indices)

Total vectors in Flat Index: 1000

[Flat Index] Top-5 Neighbors
Indices: [[502 606 234  85 162]]


In [4]:
# -------------------------------
# 4. IVF (Inverted File Index) - ANN
# -------------------------------
nlist = 50                  # number of clusters
quantizer = faiss.IndexFlatL2(d)
index_ivf = faiss.IndexIVFFlat(quantizer, d, nlist)
index_ivf.train(vectors)
index_ivf.add(vectors)
index_ivf.nprobe = 10       # clusters to search

distances, indices = index_ivf.search(xq, k)
print("\n[IVF Index] Top-5 Neighbors")
print("Indices:", indices)
print("Distances:", distances)



[IVF Index] Top-5 Neighbors
Indices: [[502 606 162 205 466]]
Distances: [[6.099058  6.650052  6.9527254 7.1432023 7.1819143]]


In [5]:
# -------------------------------
# 5. IVF + PQ (Product Quantization)
# -------------------------------
m = 8       # number of sub-vectors
nbits = 8   # bits per sub-vector
index_ivfpq = faiss.IndexIVFPQ(quantizer, d, nlist, m, nbits)
index_ivfpq.train(vectors)
index_ivfpq.add(vectors)
index_ivfpq.nprobe = 10

distances, indices = index_ivfpq.search(xq, k)
print("\n[IVF+PQ Index] Top-5 Neighbors")
print("Indices:", indices)
print("Distances:", distances)



[IVF+PQ Index] Top-5 Neighbors
Indices: [[466 749 502 606 379]]
Distances: [[5.6911244 6.2623863 6.339216  6.6623445 6.7479343]]


In [6]:
# -------------------------------
# 6. HNSW Index (Graph-based ANN)
# -------------------------------
index_hnsw = faiss.IndexHNSWFlat(d, 32)  # 32 neighbors
index_hnsw.add(vectors)
distances, indices = index_hnsw.search(xq, k)
print("\n[HNSW Index] Top-5 Neighbors")
print("Indices:", indices)
print("Distances:", distances)



[HNSW Index] Top-5 Neighbors
Indices: [[502 606 234  85 162]]
Distances: [[6.099058  6.650052  6.803897  6.9097624 6.9527254]]


In [7]:
# -------------------------------
# 7. Cosine Similarity Search
# -------------------------------
faiss.normalize_L2(vectors)
faiss.normalize_L2(xq)

index_cos = faiss.IndexFlatIP(d)  # Inner product
index_cos.add(vectors)
distances, indices = index_cos.search(xq, k)
print("\n[Cosine Similarity via Inner Product] Top-5 Neighbors")
print("Indices:", indices)
print("Distances:", distances)


[Cosine Similarity via Inner Product] Top-5 Neighbors
Indices: [[502 785 162 234 597]]
Distances: [[0.84342945 0.8393954  0.83852947 0.836298   0.83546114]]


In [8]:
# -------------------------------
# 8. Semantic Search Project
# -------------------------------
# Sample documents
documents = [
    "FAISS is a library for similarity search.",
    "Vector databases store embeddings for search.",
    "HNSW is a graph-based search algorithm.",
    "Product Quantization compresses vectors.",
    "SentenceTransformers can create embeddings from text."
]

# Create embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(documents, convert_to_numpy=True)
embeddings = embeddings.astype('float32')

# Normalize for cosine similarity
faiss.normalize_L2(embeddings)

# Create FAISS index
index_semantic = faiss.IndexFlatIP(embeddings.shape[1])
index_semantic.add(embeddings)

# User query
query = ["How to perform vector search using FAISS?"]
query_emb = model.encode(query, convert_to_numpy=True).astype('float32')
faiss.normalize_L2(query_emb)

D, I = index_semantic.search(query_emb, k=3)
print("\n[Semantic Search Results]")
for idx in I[0]:
    print("-", documents[idx])



[Semantic Search Results]
- FAISS is a library for similarity search.
- Vector databases store embeddings for search.
- Product Quantization compresses vectors.


In [9]:
# -------------------------------
# 9. Save & Load Index
# -------------------------------
faiss.write_index(index_flat, "faiss_flat.index")
loaded_index = faiss.read_index("faiss_flat.index")
print("\nLoaded index has", loaded_index.ntotal, "vectors")




Loaded index has 1000 vectors
