In [3]:
!pip install numpy faiss-cpu tqdm




In [6]:
import numpy as np
import faiss                         # pip install faiss-cpu
from tqdm.auto import tqdm

EMBED_FILE = "./Embeddings/poi_embeddings.npy"
INDEX_FILE = "Indexes/poi_ivfpq.index"
D       = 768
NLIST   = 128
M_PQ    = 16
BITS    = 8
TRAIN_BATCH = 10000
ADD_BATCH   = 10000

emb = np.load(EMBED_FILE).astype('float32')
N, _ = emb.shape
print(f"Loaded {N} embeddings of dim {D}")

faiss.normalize_L2(emb)

quantizer = faiss.IndexFlatIP(D)
index_ivfpq = faiss.IndexIVFPQ(quantizer, D, NLIST, M_PQ, BITS)
index_ivfpq.metric_type = faiss.METRIC_INNER_PRODUCT

print("Training IVFPQ…")
idx = np.random.choice(N, size=min(TRAIN_BATCH, N), replace=False)
index_ivfpq.train(emb[idx])

print("Adding embeddings…")
for start in tqdm(range(0, N, ADD_BATCH), desc="Indexing"):
    end = min(start + ADD_BATCH, N)
    index_ivfpq.add(emb[start:end])
print("Total indexed:", index_ivfpq.ntotal)

faiss.write_index(index_ivfpq, INDEX_FILE)
print("Saved IVFPQ index to", INDEX_FILE)

def search(query_vector: np.ndarray, k: int = 10):
    q = query_vector.astype('float32').reshape(1, D)
    faiss.normalize_L2(q)
    Dists, Ids = index_ivfpq.search(q, k)
    return Ids[0], Dists[0]


Loaded 50463 embeddings of dim 768
Training IVFPQ…
Adding embeddings…


Indexing:   0%|          | 0/6 [00:00<?, ?it/s]

Total indexed: 50463
Saved IVFPQ index to ./poi_ivfpq.index


In [26]:
import os
os.environ["OMP_NUM_THREADS"]      = "4"
os.environ["MKL_NUM_THREADS"]      = "4"
os.environ["OPENBLAS_NUM_THREADS"] = "4"

import numpy as np
import faiss
from functools import lru_cache
from sentence_transformers import SentenceTransformer

INDEX_FILE   = "Indexes/poi_ivfpq.index"
EMBED_MODEL  = "sentence-transformers/LaBSE"
DEVICE       = "cpu"
NLIST_PROBE  = 10
TOP_K        = 10

index = faiss.read_index(INDEX_FILE)
index.nprobe = NLIST_PROBE

embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)

@lru_cache(maxsize=512)
def encode_query(query: str) -> np.ndarray:
    qv = embedder.encode([query], convert_to_numpy=True)
    faiss.normalize_L2(qv)
    return qv

def search_pois(query: str):
    qv = encode_query(query)
    D, I = index.search(qv, TOP_K)
    return I[0], D[0]

if __name__ == "__main__":
    ids, dists = search_pois("I want to read some books ")
    print("IDs:", ids)
    print("Dists:", dists)


IDs: [32138 19502 18757 11667  6705 10811 36148  9844 35060 11065]
Dists: [0.27594292 0.27594292 0.27589625 0.25854674 0.24007857 0.23536855
 0.23396839 0.23356201 0.23309447 0.23309447]


In [1]:
import faiss, numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from functools import lru_cache

INDEX_FILE   = "Indexes/poi_ivfpq.index"
META_FILE    = "./Dataset/poi_dataset_enriched_incremental.csv"
EMBED_MODEL  = "sentence-transformers/LaBSE"
DEVICE       = "cpu"
NLIST_PROBE  = 50
TOP_N        = 10
EXPAND_K     = 1000

index = faiss.read_index(INDEX_FILE)
index.nprobe = NLIST_PROBE

df = pd.read_csv(META_FILE, dtype=str)
df = df.set_index("id")

embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)
@lru_cache(maxsize=512)
def encode_query(q: str):
    v = embedder.encode([q], convert_to_numpy=True)
    faiss.normalize_L2(v)
    return v

def search_in_city(query: str, city: str, top_n=TOP_N, expand_k=EXPAND_K):
    qv = encode_query(query)
    D, I = index.search(qv, expand_k)
    D, I = D[0], I[0]

    results = []
    for score, emb_idx in zip(D, I):
        poi_id = df.index[emb_idx]
        row = df.loc[poi_id]
        if row["city"] != city:
            continue
        results.append({
            "id": poi_id,
            "name": row["name"],
            "type": row["type"],
            "score": float(score),
            "lat": float(row["lat"]),
            "lon": float(row["lon"]),
            "description": row["enriched_description"]
        })
        if len(results) >= top_n:
            break
    return results

if __name__ == "__main__":
    hits = search_in_city(
        query="Want to watch film",
        city="Москва"
    )
    for poi in hits:
        print(f"{poi['name']} ({poi['type']}), score={poi['score']:.3f}")
        print("  ", poi["description"])
        print()


К. А. Тимирязеву (node), score=0.364
   In the heart of Moscow's historic center stands a poignant reminder of the city's rich past – the memorial to K.A. Timiryazev, a renowned agronomist and botanist who played a crucial role in Russia's agricultural development. This historical site is a tranquil oasis, nestled between bustling streets and towering skyscrapers, offering visitors a moment to reflect on the contributions of this pioneering scientist.

As you approach the memorial, you'll notice the well-maintained garden surrounding it, with carefully tended flowers and trees that seem to whisper stories of the past. The memorial itself is a simple yet elegant structure, featuring a statue of Timiryazev in his scholarly attire, surrounded by plaques detailing his life and achievements. It's a place where history meets nature, creating an atmosphere

Столичный продукт (node), score=0.361
   In the heart of the bustling city, hidden among towering skyscrapers and neon lights, lies a coz