In [1]:
# notebooks/embeddings_test.py
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import os

# 1) Load model
model = SentenceTransformer("all-MiniLM-L6-v2")  # small & fast

# 2) Sample documents (replace with your dataset later)
documents = [
    "Artificial intelligence is transforming the world.",
    "Machine learning enables systems to learn from data.",
    "Neural networks are a subset of machine learning.",
    "The capital of France is Paris.",
    "Python is a popular programming language."
]

# 3) Create embeddings
vectors = model.encode(documents, convert_to_numpy=True)
print("Embeddings shape:", vectors.shape)

# 4) Build FAISS index
dimension = vectors.shape[1]
index = faiss.IndexFlatIP(dimension)  # use inner product; normalize for cosine
# normalize for cosine similarity
faiss.normalize_L2(vectors)
index.add(vectors)
print("FAISS ntotal:", index.ntotal)

# 5) Query
query = "What is AI?"
q_vec = model.encode([query], convert_to_numpy=True)
faiss.normalize_L2(q_vec)
k = 3
distances, indices = index.search(q_vec, k)
print("\nQuery:", query)
for rank, idx in enumerate(indices[0]):
    print(f"Rank {rank+1}: {documents[idx]} (score: {distances[0][rank]:.4f})")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Embeddings shape: (5, 384)
FAISS ntotal: 5

Query: What is AI?
Rank 1: Artificial intelligence is transforming the world. (score: 0.5704)
Rank 2: Neural networks are a subset of machine learning. (score: 0.4554)
Rank 3: Machine learning enables systems to learn from data. (score: 0.3607)
