In [2]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)

CUDA available: False
CUDA version: None


In [None]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from time import time

# ----------------------- #
# 1. Setup
# ----------------------- #
dimension = 384           # Vector dimension from MiniLM
nlist = 10           # Number of clusters (IVF)
nprobe = 5                # Clusters to search at query time
top_k = 3                 # Top K results to return

# ----------------------- #
# 2. Real Sentences
# ----------------------- #
sentences = [
    "The capital of France is Paris.",
    "Dogs are man's best friend.",
    "Machine learning is a subset of artificial intelligence.",
    "Mount Everest is the tallest mountain in the world.",
    "The Mona Lisa was painted by Leonardo da Vinci.",
    "Photosynthesis is the process by which plants make their food.",
    "Python is a popular programming language.",
    "The Great Wall of China is visible from space.",
    "Albert Einstein developed the theory of relativity.",
    "Soccer is the most popular sport in the world."
]

# ----------------------- #
# 3. Embed Sentences
# ----------------------- #
model = SentenceTransformer('all-MiniLM-L6-v2')
vectors = model.encode(sentences, convert_to_numpy=True).astype('float32')

# ----------------------- #
# 4. Build FAISS IVF Index (CPU)
# ----------------------- #
quantizer = faiss.IndexFlatL2(dimension)  # base index used for clustering
index = faiss.IndexIVFFlat(quantizer, dimension, nlist, faiss.METRIC_L2)

index.train(vectors)       # train IVF clustering
index.add(vectors)         # add sentence embeddings
index.nprobe = nprobe

print("✅ FAISS IVF index created with real sentences.")

# ----------------------- #
# 5. Search Function
# ----------------------- #
def search(query_text, top_k=3):
    query_vec = model.encode([query_text], convert_to_numpy=True).astype('float32')
    D, I = index.search(query_vec, top_k)
    results = [(sentences[i], float(D[0][j])) for j, i in enumerate(I[0])]
    return results

# ----------------------- #
# 6. Try a Query
# ----------------------- #
query = "Who painted the Mona Lisa?"
results = search(query)

print(f"\n🔍 Results for query: '{query}'")
for i, (text, dist) in enumerate(results):
    print(f"{i+1}. {text} (distance: {dist:.4f})")


✅ FAISS IVF index created with real sentences.

🔍 Results for query: 'Who painted the Mona Lisa?'
1. The Mona Lisa was painted by Leonardo da Vinci. (distance: 0.2339)
2. The Great Wall of China is visible from space. (distance: 1.5942)
3. The capital of France is Paris. (distance: 1.7541)
