## Generate Embeddings

In [None]:
from pathlib import Path
import pandas as pd

SENTENCES_FILE = Path("sentences.csv")

if not SENTENCES_FILE.exists():
    print("sentences.csv file not found.")
    exit(1)

dataset = pd.read_csv(SENTENCES_FILE)
dataset.tail(2)

Generate embedding using Open AI, but with free Ollama local model

In [None]:
from openai import OpenAI
import numpy as np

OLLAMA_URL = "http://localhost:11434/v1"
EMBEDDED_SENTENCES_FILE = Path("embedded_sentences.csv")

client = OpenAI(
    base_url=OLLAMA_URL,
    api_key='ollama' # Required for the OpenAI API, but not for OLLAMA
)

def get_embeddings(sentence: str) -> list[float]:
    response = client.embeddings.create(
        model="nomic-embed-text:latest",
        input=sentence
    )
    embedding = response.data[0].embedding
    return embedding

if not EMBEDDED_SENTENCES_FILE.exists():
    dataset["embedding"] = dataset.sentence.apply(get_embeddings)
    dataset.to_csv(EMBEDDED_SENTENCES_FILE, index=False)
else:
    dataset = pd.read_csv(EMBEDDED_SENTENCES_FILE)
    dataset["embedding"] = dataset.embedding.apply(eval).apply(np.array)

dataset.head(3)

In [None]:
dataset["id"] = range(1, len(dataset) + 1)
dataset.head()

Check dimensions of embedding line

In [None]:
embedding_dimension = len(dataset.iloc[0]["embedding"])
embedding_dimension

### FAISS - Facebook library for efficient similarity search and clustering of dense vectors.

Check [Faiss Indexes](https://github.com/facebookresearch/faiss/wiki/Faiss-indexes) for more information.

In [None]:
embeddings = np.array(dataset.embedding.to_list())
embeddings.shape

In [115]:
query = "I love soccer"
query_embedding = get_embeddings(query)

## IndexFlatL2 - Exact Search for L2 algorithm
> The more documents you have got the slower it gets.

In [None]:
from faiss import IndexFlatL2, IndexIVFFlat, IndexIVFPQ

index_l2 = IndexFlatL2(embedding_dimension)
index_l2.is_trained

In [None]:
index_l2.add(embeddings)
index_l2.ntotal

In [None]:
_, document_index = index_l2.search(np.expand_dims(query_embedding, axis=0), k=5)
dataset.iloc[document_index[0]]

## IndexIVFFlat - Inverted file with exact post-verification

### Voronoi Diagram

Any point within a cell of the Voronoi diagram is closest to the centroid (center) associated with that cell.

A Voronoi diagram partitions space into regions based on the distance to a specific set of points, known as centers or generators.

This algorithm allows you to efficiently find the nearest point to your search criteria without having to compare all possible embeddings—only the closest region needs to be considered.

![](./images/ivf.png)

In [None]:
n_centroids = 20
quantizer = IndexFlatL2(embedding_dimension)
index_ivf = IndexIVFFlat(quantizer, embedding_dimension, n_centroids)
index_ivf.is_trained

In [None]:
index_ivf.train(embeddings)
index_ivf.is_trained

In [None]:
index_ivf.add(embeddings)
index_ivf.ntotal

In [None]:
_, document_index = index_ivf.search(np.expand_dims(query_embedding, axis=0), k=5)
dataset.iloc[document_index[0]]

> Using nprobe we can extend searching by looking into neighbors of found cell. 

In [None]:
index_ivf.nprobe = 5
_, document_index = index_ivf.search(np.expand_dims(query_embedding, axis=0), k=5)
dataset.iloc[document_index[0]]

# Final optimization
## IndexIVFPQ - IFV + Product Quantizer (PQ)

![ndexIVFPQ - IFV + Product Quantizer (PQ)](./images/ivf-pq.png)

In [None]:
code_size = 8
bits_per_centroid = 4

index_ifv_pq = IndexIVFPQ(quantizer, embedding_dimension, n_centroids, code_size, bits_per_centroid)

index_ifv_pq.is_trained

In [None]:
index_ifv_pq.train(embeddings)
index_ifv_pq.add(embeddings)
index_ifv_pq.ntotal

In [None]:
index_ifv_pq.nprobe = 5
_, document_index = index_ifv_pq.search(np.expand_dims(query_embedding, axis=0), k=5)
dataset.iloc[document_index[0]]