# FAISS Basics

Introduction to FAISS for efficient vector similarity search.

**Learning objectives:**
- Create and populate FAISS indexes
- Understand IndexFlatIP vs IndexFlatL2
- Perform basic search operations
- Save and load indexes

In [None]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import time

## 1. Why FAISS?

Let's compare NumPy vs FAISS on a realistic dataset.

In [None]:
# Generate synthetic dataset
n_vectors = 2_000_000
dimension = 384

# Random embeddings (simulate real embeddings)
corpus_embeddings = np.random.randn(n_vectors, dimension).astype('float32')

# Normalize (important for cosine similarity)
norms = np.linalg.norm(corpus_embeddings, axis=1, keepdims=True)
corpus_embeddings = corpus_embeddings / norms

print(f"Corpus: {corpus_embeddings.shape}")
print(f"Data type: {corpus_embeddings.dtype}")
print(f"Memory: {corpus_embeddings.nbytes / 1e6:.1f} MB")

In [None]:
# Create query
query = np.random.randn(1, dimension).astype('float32')
query = query / np.linalg.norm(query)

print(f"Query: {query.shape}")

### NumPy Approach

In [None]:
# NumPy search
start = time.time()
similarities = np.dot(corpus_embeddings, query.T).flatten()
top_10_indices = np.argsort(similarities)[-10:][::-1]
numpy_time = time.time() - start

print(f"NumPy search time: {numpy_time*1000:.2f}ms")
print(f"Top-10 indices: {top_10_indices}")
print(f"Top-10 scores: {similarities[top_10_indices]}")

### FAISS Approach

In [None]:
# Create FAISS index
index = faiss.IndexFlatIP(dimension)  # Inner Product for normalized vectors

# Add vectors
index.add(corpus_embeddings)

print(f"Index size: {index.ntotal} vectors")
print(f"Index type: {type(index).__name__}")

In [None]:
# FAISS search
start = time.time()
distances, indices = index.search(query, k=10)
faiss_time = time.time() - start

print(f"FAISS search time: {faiss_time*1000:.2f}ms")
print(f"Speedup: {numpy_time/faiss_time:.1f}x")
print(f"\nTop-10 indices: {indices[0]}")
print(f"Top-10 scores: {distances[0]}")

**Key observation:** FAISS is 5-20x faster even for exact search!

## 2. IndexFlatIP vs IndexFlatL2

Two metrics for similarity:
- **IndexFlatIP**: Inner Product (dot product) - for normalized vectors
- **IndexFlatL2**: L2 Distance (Euclidean) - when magnitude matters

In [None]:
# Create sample vectors
vec1 = np.array([[1.0, 0.0, 0.0]], dtype='float32')
vec2 = np.array([[0.8, 0.6, 0.0]], dtype='float32')  # Similar direction
vec3 = np.array([[0.0, 1.0, 0.0]], dtype='float32')  # Orthogonal

vectors = np.vstack([vec1, vec2, vec3])
print("Vectors:")
print(vectors)

In [None]:
# IndexFlatIP (Inner Product)
index_ip = faiss.IndexFlatIP(3)
index_ip.add(vectors)

# Search for vec1
D_ip, I_ip = index_ip.search(vec1, k=3)

print("IndexFlatIP (Inner Product):")
print(f"Distances (higher = more similar): {D_ip[0]}")
print(f"Indices: {I_ip[0]}")
print("Most similar: vec1 (itself), then vec2 (similar direction)")

In [None]:
# IndexFlatL2 (Euclidean Distance)
index_l2 = faiss.IndexFlatL2(3)
index_l2.add(vectors)

# Search for vec1
D_l2, I_l2 = index_l2.search(vec1, k=3)

print("IndexFlatL2 (Euclidean):")
print(f"Distances (lower = more similar): {D_l2[0]}")
print(f"Indices: {I_l2[0]}")
print("Same ordering as IP for normalized vectors")

## 3. Working with Real Embeddings

Use sentence-transformers to generate embeddings, then search with FAISS.

In [None]:
# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')
print(f"Model dimension: {model.get_sentence_embedding_dimension()}")

In [None]:
# Sample documents
documents = [
    "How to reset my password?",
    "Cannot login after password change",
    "Forgot my username and password",
    "Account locked after failed login attempts",
    "Payment declined by bank",
    "Invoice not received in email",
    "Refund processing time",
    "How to update billing address",
    "Cannot download invoice PDF",
    "Subscription cancellation process",
]

print(f"Total documents: {len(documents)}")

In [None]:
# Generate embeddings
embeddings = model.encode(
    documents,
    normalize_embeddings=True,  # L2 normalization for IndexFlatIP
    show_progress_bar=True
)

# Convert to float32 (required by FAISS)
embeddings = embeddings.astype('float32')

print(f"Embeddings shape: {embeddings.shape}")
print(f"Embeddings dtype: {embeddings.dtype}")

In [None]:
# Create index
dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)

# Add embeddings
index.add(embeddings)

print(f"Index contains {index.ntotal} vectors")

In [None]:
# Search with text query
query_text = "I can't remember my password"

# Encode query
query_embedding = model.encode(
    query_text,
    normalize_embeddings=True
).astype('float32')

# Ensure 2D shape
query_embedding = query_embedding.reshape(1, -1)

# Search
k = 3
distances, indices = index.search(query_embedding, k)

print(f"Query: {query_text}\n")
print("Top 3 results:")
for i, (score, idx) in enumerate(zip(distances[0], indices[0])):
    print(f"{i+1}. [{score:.3f}] {documents[idx]}")

## 4. Batch Search

Search multiple queries at once for efficiency.

In [None]:
# Multiple queries
queries = [
    "password reset",
    "billing problem",
    "account locked"
]

# Encode batch
query_embeddings = model.encode(
    queries,
    normalize_embeddings=True
).astype('float32')

print(f"Query batch shape: {query_embeddings.shape}")

In [None]:
# Batch search
D, I = index.search(query_embeddings, k=2)

print("Batch search results:\n")
for i, query in enumerate(queries):
    print(f"Query: {query}")
    for score, idx in zip(D[i], I[i]):
        print(f"  [{score:.3f}] {documents[idx]}")
    print()

## 5. Save and Load Index

Persist index to disk for reuse.

In [None]:
# Save index
faiss.write_index(index, "my_index.faiss")
print("Index saved to my_index.faiss")

# Check file size
import os
size_bytes = os.path.getsize("my_index.faiss")
print(f"File size: {size_bytes / 1024:.1f} KB")

In [None]:
# Load index
loaded_index = faiss.read_index("my_index.faiss")

print(f"Loaded index size: {loaded_index.ntotal}")
print(f"Dimension: {loaded_index.d}")

In [None]:
# Verify it works
D_loaded, I_loaded = loaded_index.search(query_embedding, k=3)

print("Search with loaded index:")
for i, (score, idx) in enumerate(zip(D_loaded[0], I_loaded[0])):
    print(f"{i+1}. [{score:.3f}] {documents[idx]}")

# Should be identical to original results
assert np.array_equal(I_loaded, indices), "Results should match!"
print("\n✓ Results match original index")

## 6. Common Pitfalls

Avoid these mistakes when using FAISS.

In [None]:
# ❌ WRONG: Using float64
try:
    bad_embeddings = np.random.randn(10, 384)  # float64
    index.add(bad_embeddings)
except Exception as e:
    print(f"Error with float64: {e}")

# ✓ CORRECT: Use float32
good_embeddings = np.random.randn(10, 384).astype('float32')
index.add(good_embeddings)
print(f"\n✓ Added with float32. Index size: {index.ntotal}")

In [None]:
# ❌ WRONG: 1D query
try:
    query_1d = np.random.randn(384).astype('float32')
    index.search(query_1d, k=5)
except Exception as e:
    print(f"Error with 1D query: {type(e).__name__}")

# ✓ CORRECT: 2D query (n_queries, dimension)
query_2d = np.random.randn(1, 384).astype('float32')
D, I = index.search(query_2d, k=5)
print(f"\n✓ Search with 2D query succeeded")

## Summary

Key takeaways:
1. FAISS is 5-20x faster than NumPy for similarity search
2. Use `IndexFlatIP` for normalized embeddings (cosine similarity)
3. Always use `float32` and 2D arrays `(n, dimension)`
4. Batch queries for better efficiency
5. Save indexes to disk for reuse

**Next:** Learn about approximate search with IVF and HNSW indexes!